From 0714353324cd420c3604ec6f35468a0c0f5bbe10 Mon Sep 17 00:00:00 2001 From: Michael Smithhisler Date: Fri, 8 Nov 2024 12:49:38 -0500 Subject: [PATCH] fix: handle template re-renders on client restart (#24399) When multiple templates with api functions are included in a task, it's possible for consul-template to re-render templates as it creates watchers, overwriting render event data. This change uses event fields that do not get overwritten, and only executes the change mode for templates that were actually written to disk. --------- Co-authored-by: Tim Gross --- .changelog/24399.txt | 3 + .../taskrunner/template/template.go | 23 ++++--- .../taskrunner/template/template_test.go | 63 +++++++++++++++++++ 3 files changed, 81 insertions(+), 8 deletions(-) create mode 100644 .changelog/24399.txt diff --git a/.changelog/24399.txt b/.changelog/24399.txt new file mode 100644 index 000000000..43bad70ed --- /dev/null +++ b/.changelog/24399.txt @@ -0,0 +1,3 @@ +```release-note:bug +fix: handles consul template re-renders on client restart +``` diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index ca99b01a6..2a42cd1a4 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -277,6 +277,10 @@ func (tm *TaskTemplateManager) handleFirstRender() { <-eventTimer.C } + // dirtyEvents are events that actually rendered to disk and need to trigger + // their respective change_mode operation + dirtyEvents := map[string]*manager.RenderEvent{} + // outstandingEvent tracks whether there is an outstanding event that should // be fired. outstandingEvent := false @@ -308,22 +312,25 @@ WAIT: continue } - dirty := false for _, event := range events { // This template hasn't been rendered if event.LastWouldRender.IsZero() { continue WAIT } - if event.WouldRender && event.DidRender { - dirty = true + // If the template _actually_ rendered to disk, mark it + // dirty. We track events here so that onTemplateRendered + // doesn't go back to the runner's RenderedEvents and process + // events that don't make us dirty. + if !event.LastDidRender.IsZero() { + dirtyEvents[event.Template.ID()] = event } } // if there's a driver handle then the task is already running and // that changes how we want to behave on first render - if dirty && tm.config.Lifecycle.IsRunning() { + if len(dirtyEvents) > 0 && tm.config.Lifecycle.IsRunning() { handledRenders := make(map[string]time.Time, len(tm.config.Templates)) - tm.onTemplateRendered(handledRenders, time.Time{}) + tm.onTemplateRendered(handledRenders, time.Time{}, dirtyEvents) } break WAIT @@ -417,12 +424,13 @@ func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template failed: %v", err))) case <-tm.runner.TemplateRenderedCh(): - tm.onTemplateRendered(handledRenders, allRenderedTime) + events := tm.runner.RenderEvents() + tm.onTemplateRendered(handledRenders, allRenderedTime, events) } } } -func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time) { +func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time, events map[string]*manager.RenderEvent) { var handling []string signals := make(map[string]struct{}) @@ -430,7 +438,6 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time restart := false var splay time.Duration - events := tm.runner.RenderEvents() for id, event := range events { // First time through diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 4811e364a..90bcb4bb4 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -806,6 +806,69 @@ OUTER: } } +// Tests an edge case where a task has multiple templates and the client is restarted. +// In this case, consul-template may re-render and overwrite some fields in the first +// render event but we still want to make sure it causes a restart. +// We cannot control the order in which these templates are rendered, so this test will +// exhibit flakiness if this edge case is not properly handled. +func TestTaskTemplateManager_FirstRender_MultiSecret(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireVault(t) + + // Make a template that will render based on a key in Vault + vaultPath := "secret/data/restart" + key := "shouldRestart" + content := "shouldRestart" + embedded := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath, key) + file := "my.tmpl" + template := &structs.Template{ + EmbeddedTmpl: embedded, + DestPath: file, + ChangeMode: structs.TemplateChangeModeRestart, + } + + vaultPath2 := "secret/data/noop" + key2 := "noop" + content2 := "noop" + embedded2 := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath2, key2) + file2 := "my.tmpl2" + template2 := &structs.Template{ + EmbeddedTmpl: embedded2, + DestPath: file2, + ChangeMode: structs.TemplateChangeModeNoop, + } + + harness := newTestHarness(t, []*structs.Template{template, template2}, false, true) + + // Write the secret to Vault + logical := harness.vault.Client.Logical() + _, err := logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}}) + must.NoError(t, err) + _, err = logical.Write(vaultPath2, map[string]interface{}{"data": map[string]interface{}{key2: content2}}) + must.NoError(t, err) + + // simulate task is running already + harness.mockHooks.HasHandle = true + + harness.start(t) + defer harness.stop() + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + t.Fatal("Task unblock should have been called") + } + + select { + case <-harness.mockHooks.RestartCh: + case <-harness.mockHooks.SignalCh: + t.Fatal("should not have received signal", harness.mockHooks) + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + t.Fatal("should have restarted") + } +} + func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { ci.Parallel(t) clienttestutil.RequireConsul(t)