diff --git a/.changelog/24399.txt b/.changelog/24399.txt new file mode 100644 index 000000000..43bad70ed --- /dev/null +++ b/.changelog/24399.txt @@ -0,0 +1,3 @@ +```release-note:bug +fix: handles consul template re-renders on client restart +``` diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index ca99b01a6..2a42cd1a4 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -277,6 +277,10 @@ func (tm *TaskTemplateManager) handleFirstRender() { <-eventTimer.C } + // dirtyEvents are events that actually rendered to disk and need to trigger + // their respective change_mode operation + dirtyEvents := map[string]*manager.RenderEvent{} + // outstandingEvent tracks whether there is an outstanding event that should // be fired. outstandingEvent := false @@ -308,22 +312,25 @@ WAIT: continue } - dirty := false for _, event := range events { // This template hasn't been rendered if event.LastWouldRender.IsZero() { continue WAIT } - if event.WouldRender && event.DidRender { - dirty = true + // If the template _actually_ rendered to disk, mark it + // dirty. We track events here so that onTemplateRendered + // doesn't go back to the runner's RenderedEvents and process + // events that don't make us dirty. + if !event.LastDidRender.IsZero() { + dirtyEvents[event.Template.ID()] = event } } // if there's a driver handle then the task is already running and // that changes how we want to behave on first render - if dirty && tm.config.Lifecycle.IsRunning() { + if len(dirtyEvents) > 0 && tm.config.Lifecycle.IsRunning() { handledRenders := make(map[string]time.Time, len(tm.config.Templates)) - tm.onTemplateRendered(handledRenders, time.Time{}) + tm.onTemplateRendered(handledRenders, time.Time{}, dirtyEvents) } break WAIT @@ -417,12 +424,13 @@ func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template failed: %v", err))) case <-tm.runner.TemplateRenderedCh(): - tm.onTemplateRendered(handledRenders, allRenderedTime) + events := tm.runner.RenderEvents() + tm.onTemplateRendered(handledRenders, allRenderedTime, events) } } } -func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time) { +func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time, events map[string]*manager.RenderEvent) { var handling []string signals := make(map[string]struct{}) @@ -430,7 +438,6 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time restart := false var splay time.Duration - events := tm.runner.RenderEvents() for id, event := range events { // First time through diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 4811e364a..90bcb4bb4 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -806,6 +806,69 @@ OUTER: } } +// Tests an edge case where a task has multiple templates and the client is restarted. +// In this case, consul-template may re-render and overwrite some fields in the first +// render event but we still want to make sure it causes a restart. +// We cannot control the order in which these templates are rendered, so this test will +// exhibit flakiness if this edge case is not properly handled. +func TestTaskTemplateManager_FirstRender_MultiSecret(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireVault(t) + + // Make a template that will render based on a key in Vault + vaultPath := "secret/data/restart" + key := "shouldRestart" + content := "shouldRestart" + embedded := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath, key) + file := "my.tmpl" + template := &structs.Template{ + EmbeddedTmpl: embedded, + DestPath: file, + ChangeMode: structs.TemplateChangeModeRestart, + } + + vaultPath2 := "secret/data/noop" + key2 := "noop" + content2 := "noop" + embedded2 := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath2, key2) + file2 := "my.tmpl2" + template2 := &structs.Template{ + EmbeddedTmpl: embedded2, + DestPath: file2, + ChangeMode: structs.TemplateChangeModeNoop, + } + + harness := newTestHarness(t, []*structs.Template{template, template2}, false, true) + + // Write the secret to Vault + logical := harness.vault.Client.Logical() + _, err := logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}}) + must.NoError(t, err) + _, err = logical.Write(vaultPath2, map[string]interface{}{"data": map[string]interface{}{key2: content2}}) + must.NoError(t, err) + + // simulate task is running already + harness.mockHooks.HasHandle = true + + harness.start(t) + defer harness.stop() + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + t.Fatal("Task unblock should have been called") + } + + select { + case <-harness.mockHooks.RestartCh: + case <-harness.mockHooks.SignalCh: + t.Fatal("should not have received signal", harness.mockHooks) + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + t.Fatal("should have restarted") + } +} + func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { ci.Parallel(t) clienttestutil.RequireConsul(t)