From e0dbbb0950dcd5c5098ffc4bfe87c832e917d623 Mon Sep 17 00:00:00 2001 From: Drew Bailey <2614075+drewbailey@users.noreply.github.com> Date: Fri, 27 Sep 2019 15:03:21 -0400 Subject: [PATCH 1/2] Use joint context to cancel prestart hooks fixes https://github.com/hashicorp/nomad/issues/6382 The prestart hook for templates blocks while it resolves vault secrets. If the secret is not found it continues to retry. If a task is shutdown during this time, the prestart hook currently does not receive shutdownCtxCancel, causing it to hang. This PR joins the two contexts so either killCtx or shutdownCtx cancel and stop the task. --- .../taskrunner/task_runner_hooks.go | 6 +- .../taskrunner/task_runner_test.go | 66 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 25d1b59bc..374f29f42 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/LK4D4/joincontext" multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" @@ -192,8 +193,11 @@ func (tr *TaskRunner) prestart() error { } // Run the prestart hook + // use a joint context to allow any blocking pre-start hooks + // to be canceled by either killCtx or shutdownCtx + joinedCtx, _ := joincontext.Join(tr.killCtx, tr.shutdownCtx) var resp interfaces.TaskPrestartResponse - if err := pre.Prestart(tr.killCtx, &req, &resp); err != nil { + if err := pre.Prestart(joinedCtx, &req, &resp); err != nil { tr.emitHookError(err, name) return structs.WrapRecoverable(fmt.Sprintf("prestart hook %q failed: %v", name, err), err) } diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index 8a6ab8cf5..e1f49157d 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -1742,6 +1742,72 @@ func TestTaskRunner_Template_Artifact(t *testing.T) { require.NoErrorf(t, err, "%v not rendered", f2) } +// TestTaskRunner_Template_BlockingPreStart asserts that a template +// that fails to render in PreStart can gracefully be shutdown by +// either killCtx or shutdownCtx +func TestTaskRunner_Template_BlockingPreStart(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Templates = []*structs.Template{ + { + EmbeddedTmpl: `{{ with secret "foo/secret" }}{{ .Data.certificate }}{{ end }}`, + DestPath: "local/test", + ChangeMode: structs.TemplateChangeModeNoop, + }, + } + + task.Vault = &structs.Vault{Policies: []string{"default"}} + + conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name) + defer cleanup() + + tr, err := NewTaskRunner(conf) + require.NoError(t, err) + go tr.Run() + + testutil.WaitForResult(func() (bool, error) { + ts := tr.TaskState() + + if len(ts.Events) == 0 { + return false, fmt.Errorf("no events yet") + } + + foundMissingVaultSecretEvent := false + for _, e := range ts.Events { + if e.Type == "Template" && strings.Contains(e.DisplayMessage, "vault.read(foo/secret)") { + foundMissingVaultSecretEvent = true + } + } + + if !foundMissingVaultSecretEvent { + return false, fmt.Errorf("no missing vault secret template event yet: %#v", ts.Events) + } + + return true, nil + }, func(err error) { + require.NoError(t, err) + }) + + shutdown := func() <-chan bool { + finished := make(chan bool) + go func() { + tr.Shutdown() + finished <- true + }() + + return finished + } + + select { + case <-shutdown(): + // it shut down like it should have + case <-time.After(10 * time.Second): + require.Fail(t, "timeout shutting down task") + } +} + // TestTaskRunner_Template_NewVaultToken asserts that a new vault token is // created when rendering template and that it is revoked on alloc completion func TestTaskRunner_Template_NewVaultToken(t *testing.T) { From 12be12020ed2ca937681a556d52e121fcb5cb436 Mon Sep 17 00:00:00 2001 From: Drew Bailey <2614075+drewbailey@users.noreply.github.com> Date: Fri, 27 Sep 2019 15:53:30 -0400 Subject: [PATCH 2/2] simplify logic to check for vault read event defer shutdown to cleanup after failed run Co-Authored-By: Michael Schurter update comment to include ctx note for shutdown --- client/allocrunner/interfaces/task_lifecycle.go | 2 +- client/allocrunner/taskrunner/task_runner_test.go | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/client/allocrunner/interfaces/task_lifecycle.go b/client/allocrunner/interfaces/task_lifecycle.go index 680c0d9c2..ee99a507b 100644 --- a/client/allocrunner/interfaces/task_lifecycle.go +++ b/client/allocrunner/interfaces/task_lifecycle.go @@ -89,7 +89,7 @@ type TaskPrestartHook interface { // Prestart is called before the task is started including after every // restart. Prestart is not called if the allocation is terminal. // - // The context is cancelled if the task is killed. + // The context is cancelled if the task is killed or shutdown. Prestart(context.Context, *TaskPrestartRequest, *TaskPrestartResponse) error } diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index e1f49157d..25124c3fd 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -1766,6 +1766,7 @@ func TestTaskRunner_Template_BlockingPreStart(t *testing.T) { tr, err := NewTaskRunner(conf) require.NoError(t, err) go tr.Run() + defer tr.Shutdown() testutil.WaitForResult(func() (bool, error) { ts := tr.TaskState() @@ -1774,18 +1775,14 @@ func TestTaskRunner_Template_BlockingPreStart(t *testing.T) { return false, fmt.Errorf("no events yet") } - foundMissingVaultSecretEvent := false for _, e := range ts.Events { if e.Type == "Template" && strings.Contains(e.DisplayMessage, "vault.read(foo/secret)") { - foundMissingVaultSecretEvent = true + return true, nil } } - if !foundMissingVaultSecretEvent { - return false, fmt.Errorf("no missing vault secret template event yet: %#v", ts.Events) - } + return false, fmt.Errorf("no missing vault secret template event yet: %#v", ts.Events) - return true, nil }, func(err error) { require.NoError(t, err) })