client: always run alloc cleanup hooks on final update (#15855)

* client: run alloc pre-kill hooks on last pass despite no live tasks

This PR fixes a bug where alloc pre-kill hooks were not run in the
edge case where there are no live tasks remaining, but it is also
the final update to process for the (terminal) allocation. We need
to run cleanup hooks here, otherwise they will not run until the
allocation gets garbage collected (i.e. via Destroy()), possibly
at a distant time in the future.

Fixes #15477

* client: do not run ar cleanup hooks if client is shutting down
This commit is contained in:
Seth Hoenig
2023-01-27 09:59:31 -06:00
committed by GitHub
parent b668e74a4a
commit d30e34261e
5 changed files with 58 additions and 2 deletions

3
.changelog/15477.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a bug where allocation cleanup hooks would not run
```

View File

@@ -125,7 +125,7 @@ type allocRunner struct {
allocDir *allocdir.AllocDir allocDir *allocdir.AllocDir
// runnerHooks are alloc runner lifecycle hooks that should be run on state // runnerHooks are alloc runner lifecycle hooks that should be run on state
// transistions. // transitions.
runnerHooks []interfaces.RunnerHook runnerHooks []interfaces.RunnerHook
// hookState is the output of allocrunner hooks // hookState is the output of allocrunner hooks
@@ -546,7 +546,9 @@ func (ar *allocRunner) handleTaskStateUpdates() {
} }
} }
// kill remaining live tasks
if len(liveRunners) > 0 { if len(liveRunners) > 0 {
// if all live runners are sidecars - kill alloc // if all live runners are sidecars - kill alloc
onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners) onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners)
if killEvent == nil && onlySidecarsRemaining { if killEvent == nil && onlySidecarsRemaining {
@@ -586,6 +588,14 @@ func (ar *allocRunner) handleTaskStateUpdates() {
} }
} }
} else { } else {
// there are no live runners left
// run AR pre-kill hooks if this alloc is done, but not if it's because
// the agent is shutting down.
if !ar.isShuttingDown() && done {
ar.preKillHooks()
}
// If there are no live runners left kill all non-poststop task // If there are no live runners left kill all non-poststop task
// runners to unblock them from the alloc restart loop. // runners to unblock them from the alloc restart loop.
for _, tr := range ar.tasks { for _, tr := range ar.tasks {

View File

@@ -329,6 +329,7 @@ func (ar *allocRunner) destroy() error {
func (ar *allocRunner) preKillHooks() { func (ar *allocRunner) preKillHooks() {
for _, hook := range ar.runnerHooks { for _, hook := range ar.runnerHooks {
pre, ok := hook.(interfaces.RunnerPreKillHook) pre, ok := hook.(interfaces.RunnerPreKillHook)
if !ok { if !ok {
continue continue
} }

View File

@@ -6,6 +6,7 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"sync/atomic"
"testing" "testing"
"time" "time"
@@ -23,6 +24,8 @@ import (
"github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil" "github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@@ -2398,3 +2401,43 @@ func TestHasSidecarTasks(t *testing.T) {
}) })
} }
} }
type allocPreKillHook struct {
ran atomic.Bool
}
func (*allocPreKillHook) Name() string { return "test_prekill" }
func (h *allocPreKillHook) PreKill() {
h.ran.Store(true)
}
func TestAllocRunner_PreKill_RunOnDone(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Driver = "mock_driver"
task.Config = map[string]interface{}{"run_for": "2ms"}
alloc.DesiredStatus = "stop"
conf, cleanup := testAllocRunnerConfig(t, alloc.Copy())
t.Cleanup(cleanup)
ar, err := NewAllocRunner(conf)
must.NoError(t, err)
// set our custom prekill hook
hook := new(allocPreKillHook)
ar.runnerHooks = append(ar.runnerHooks, hook)
go ar.Run()
defer destroy(ar)
// wait for completion or timeout
must.Wait(t, wait.InitialSuccess(
wait.BoolFunc(hook.ran.Load),
wait.Timeout(5*time.Second),
wait.Gap(500*time.Millisecond),
))
}

View File

@@ -1,5 +1,4 @@
//go:build !windows //go:build !windows
// +build !windows
package allocrunner package allocrunner