From d30e34261e30a96f84d3715f3aa02c2b49e5c5cb Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Fri, 27 Jan 2023 09:59:31 -0600 Subject: [PATCH] client: always run alloc cleanup hooks on final update (#15855) * client: run alloc pre-kill hooks on last pass despite no live tasks This PR fixes a bug where alloc pre-kill hooks were not run in the edge case where there are no live tasks remaining, but it is also the final update to process for the (terminal) allocation. We need to run cleanup hooks here, otherwise they will not run until the allocation gets garbage collected (i.e. via Destroy()), possibly at a distant time in the future. Fixes #15477 * client: do not run ar cleanup hooks if client is shutting down --- .changelog/15477.txt | 3 ++ client/allocrunner/alloc_runner.go | 12 +++++- client/allocrunner/alloc_runner_hooks.go | 1 + client/allocrunner/alloc_runner_test.go | 43 ++++++++++++++++++++ client/allocrunner/alloc_runner_unix_test.go | 1 - 5 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 .changelog/15477.txt diff --git a/.changelog/15477.txt b/.changelog/15477.txt new file mode 100644 index 000000000..4696f4813 --- /dev/null +++ b/.changelog/15477.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where allocation cleanup hooks would not run +``` diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 1d50c2595..8a58fa801 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -125,7 +125,7 @@ type allocRunner struct { allocDir *allocdir.AllocDir // runnerHooks are alloc runner lifecycle hooks that should be run on state - // transistions. + // transitions. runnerHooks []interfaces.RunnerHook // hookState is the output of allocrunner hooks @@ -546,7 +546,9 @@ func (ar *allocRunner) handleTaskStateUpdates() { } } + // kill remaining live tasks if len(liveRunners) > 0 { + // if all live runners are sidecars - kill alloc onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners) if killEvent == nil && onlySidecarsRemaining { @@ -586,6 +588,14 @@ func (ar *allocRunner) handleTaskStateUpdates() { } } } else { + // there are no live runners left + + // run AR pre-kill hooks if this alloc is done, but not if it's because + // the agent is shutting down. + if !ar.isShuttingDown() && done { + ar.preKillHooks() + } + // If there are no live runners left kill all non-poststop task // runners to unblock them from the alloc restart loop. for _, tr := range ar.tasks { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index c2d69ca0e..8b39851fb 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -329,6 +329,7 @@ func (ar *allocRunner) destroy() error { func (ar *allocRunner) preKillHooks() { for _, hook := range ar.runnerHooks { pre, ok := hook.(interfaces.RunnerPreKillHook) + if !ok { continue } diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 90b94657b..48084099a 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "os" "path/filepath" + "sync/atomic" "testing" "time" @@ -23,6 +24,8 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" "github.com/stretchr/testify/require" ) @@ -2398,3 +2401,43 @@ func TestHasSidecarTasks(t *testing.T) { }) } } + +type allocPreKillHook struct { + ran atomic.Bool +} + +func (*allocPreKillHook) Name() string { return "test_prekill" } + +func (h *allocPreKillHook) PreKill() { + h.ran.Store(true) +} + +func TestAllocRunner_PreKill_RunOnDone(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Driver = "mock_driver" + task.Config = map[string]interface{}{"run_for": "2ms"} + alloc.DesiredStatus = "stop" + + conf, cleanup := testAllocRunnerConfig(t, alloc.Copy()) + t.Cleanup(cleanup) + + ar, err := NewAllocRunner(conf) + must.NoError(t, err) + + // set our custom prekill hook + hook := new(allocPreKillHook) + ar.runnerHooks = append(ar.runnerHooks, hook) + + go ar.Run() + defer destroy(ar) + + // wait for completion or timeout + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(hook.ran.Load), + wait.Timeout(5*time.Second), + wait.Gap(500*time.Millisecond), + )) +} diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go index 085956910..0c2492da2 100644 --- a/client/allocrunner/alloc_runner_unix_test.go +++ b/client/allocrunner/alloc_runner_unix_test.go @@ -1,5 +1,4 @@ //go:build !windows -// +build !windows package allocrunner