scheduler: monitor deployments correctly (#26605)

Corrects two minor bugs that prevented proper deployment monitoring for systems jobs: populating the new deployment field of the system scheduler object, and correcting allocrunner health checks that were guarded not to run on system jobs.
2026-01-01 16:05:42 +03:00 · 2025-08-25 15:29:13 +02:00
parent 3d373c9a6a
commit 7c4faf9227
3 changed files with 23 additions and 13 deletions
--- a/client/allocrunner/health_hook.go
+++ b/client/allocrunner/health_hook.go
@@ -86,7 +86,9 @@ func newAllocHealthWatcherHook(

 	// Neither deployments nor migrations care about the health of
 	// non-service jobs so never watch their health
-	if alloc.Job.Type != structs.JobTypeService {
+	switch alloc.Job.Type {
+	case structs.JobTypeService, structs.JobTypeSystem:
+	default:
 		return noopAllocHealthWatcherHook{}
 	}

--- a/client/allocrunner/health_hook_test.go
+++ b/client/allocrunner/health_hook_test.go
@@ -19,6 +19,7 @@ import (
 	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/shoenig/test/must"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -396,26 +397,28 @@ func TestHealthHook_SetHealth_unhealthy(t *testing.T) {
 	require.NoError(h.Postrun())
 }

-// TestHealthHook_SystemNoop asserts that system jobs return the noop tracker.
-func TestHealthHook_SystemNoop(t *testing.T) {
+// TestHealthHook_System asserts that system jobs trigger hooks just like service jobs.
+func TestHealthHook_System(t *testing.T) {
 	ci.Parallel(t)

 	alloc := mock.SystemAlloc()
 	h := newAllocHealthWatcherHook(testlog.HCLogger(t), alloc.Copy(), nil, nil, nil, nil)

-	// Assert that it's the noop impl
 	_, ok := h.(noopAllocHealthWatcherHook)
-	require.True(t, ok)
+	must.False(t, ok)

-	// Assert the noop impl does not implement any hooks
+	_, ok = h.(*allocHealthWatcherHook)
+	must.True(t, ok)
+
+	// Assert the other hooks are implemented, too
 	_, ok = h.(interfaces.RunnerPrerunHook)
-	require.False(t, ok)
+	must.True(t, ok)
 	_, ok = h.(interfaces.RunnerUpdateHook)
-	require.False(t, ok)
+	must.True(t, ok)
 	_, ok = h.(interfaces.RunnerPostrunHook)
-	require.False(t, ok)
+	must.True(t, ok)
 	_, ok = h.(interfaces.ShutdownHook)
-	require.False(t, ok)
+	must.True(t, ok)
 }

 // TestHealthHook_BatchNoop asserts that batch jobs return the noop tracker.
--- a/scheduler/scheduler_system.go
+++ b/scheduler/scheduler_system.go
@@ -102,7 +102,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) {
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy)
 		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil,
 			s.failedTGAllocs, s.planAnnotations, structs.EvalStatusFailed, desc,
-			s.queuedAllocs, "")
+			s.queuedAllocs, s.deployment.GetID())
 	}

 	limit := maxSystemScheduleAttempts
@@ -116,7 +116,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) {
 		if statusErr, ok := err.(*SetStatusError); ok {
 			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil,
 				s.failedTGAllocs, s.planAnnotations, statusErr.EvalStatus, err.Error(),
-				s.queuedAllocs, "")
+				s.queuedAllocs, s.deployment.GetID())
 		}
 		return err
 	}
@@ -124,7 +124,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) {
 	// Update the status to complete
 	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil,
 		s.failedTGAllocs, s.planAnnotations, structs.EvalStatusComplete, "",
-		s.queuedAllocs, "")
+		s.queuedAllocs, s.deployment.GetID())
 }

 // process is wrapped in retryMax to iteratively run the handler until we have no
@@ -286,6 +286,11 @@ func (s *SystemScheduler) computeJobAllocs() error {
 	s.plan.Deployment = nr.DeploymentCurrent
 	s.plan.DeploymentUpdates = nr.DeploymentUpdates

+	// Update the stored deployment
+	if nr.DeploymentCurrent != nil {
+		s.deployment = nr.DeploymentCurrent
+	}
+
 	// Add all the allocs to stop
 	for _, e := range r.Stop {
 		s.plan.AppendStoppedAlloc(e.Alloc, sstructs.StatusAllocNotNeeded, "", "")