client: always run alloc cleanup hooks on final update (#15855)

* client: run alloc pre-kill hooks on last pass despite no live tasks This PR fixes a bug where alloc pre-kill hooks were not run in the edge case where there are no live tasks remaining, but it is also the final update to process for the (terminal) allocation. We need to run cleanup hooks here, otherwise they will not run until the allocation gets garbage collected (i.e. via Destroy()), possibly at a distant time in the future. Fixes #15477 * client: do not run ar cleanup hooks if client is shutting down
2026-01-01 16:05:42 +03:00 · 2023-01-27 09:59:31 -06:00
parent b668e74a4a
commit d30e34261e
5 changed files with 58 additions and 2 deletions
--- a/.changelog/15477.txt
+++ b/.changelog/15477.txt
@@ -0,0 +1,3 @@
 ```release-note:bug
 client: Fixed a bug where allocation cleanup hooks would not run
 ```
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -125,7 +125,7 @@ type allocRunner struct {
 	allocDir *allocdir.AllocDir
 	// runnerHooks are alloc runner lifecycle hooks that should be run on state
-	// transistions.
+	// transitions.
 	runnerHooks []interfaces.RunnerHook
 	// hookState is the output of allocrunner hooks
@@ -546,7 +546,9 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 			}
 		}
 		// kill remaining live tasks
 		if len(liveRunners) > 0 {
 			// if all live runners are sidecars - kill alloc
 			onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners)
 			if killEvent == nil && onlySidecarsRemaining {
@@ -586,6 +588,14 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 				}
 			}
 		} else {
 			// there are no live runners left
 			// run AR pre-kill hooks if this alloc is done, but not if it's because
 			// the agent is shutting down.
 			if !ar.isShuttingDown() && done {
 				ar.preKillHooks()
 			}
 			// If there are no live runners left kill all non-poststop task
 			// runners to unblock them from the alloc restart loop.
 			for _, tr := range ar.tasks {
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -329,6 +329,7 @@ func (ar *allocRunner) destroy() error {
 func (ar *allocRunner) preKillHooks() {
 	for _, hook := range ar.runnerHooks {
 		pre, ok := hook.(interfaces.RunnerPreKillHook)
 		if !ok {
 			continue
 		}
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -6,6 +6,7 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -23,6 +24,8 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/testutil"
 	"github.com/shoenig/test/must"
 	"github.com/shoenig/test/wait"
 	"github.com/stretchr/testify/require"
 )
@@ -2398,3 +2401,43 @@ func TestHasSidecarTasks(t *testing.T) {
 		})
 	}
 }
 type allocPreKillHook struct {
 	ran atomic.Bool
 }
 func (*allocPreKillHook) Name() string { return "test_prekill" }
 func (h *allocPreKillHook) PreKill() {
 	h.ran.Store(true)
 }
 func TestAllocRunner_PreKill_RunOnDone(t *testing.T) {
 	ci.Parallel(t)
 	alloc := mock.Alloc()
 	task := alloc.Job.TaskGroups[0].Tasks[0]
 	task.Driver = "mock_driver"
 	task.Config = map[string]interface{}{"run_for": "2ms"}
 	alloc.DesiredStatus = "stop"
 	conf, cleanup := testAllocRunnerConfig(t, alloc.Copy())
 	t.Cleanup(cleanup)
 	ar, err := NewAllocRunner(conf)
 	must.NoError(t, err)
 	// set our custom prekill hook
 	hook := new(allocPreKillHook)
 	ar.runnerHooks = append(ar.runnerHooks, hook)
 	go ar.Run()
 	defer destroy(ar)
 	// wait for completion or timeout
 	must.Wait(t, wait.InitialSuccess(
 		wait.BoolFunc(hook.ran.Load),
 		wait.Timeout(5*time.Second),
 		wait.Gap(500*time.Millisecond),
 	))
 }
--- a/client/allocrunner/alloc_runner_unix_test.go
+++ b/client/allocrunner/alloc_runner_unix_test.go
@@ -1,5 +1,4 @@
 //go:build !windows
 // +build !windows
 package allocrunner