From 5208ad4c2c738d15496ac7d5391946234d2ea8d5 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 24 Apr 2025 09:24:28 -0400 Subject: [PATCH] scheduler: allow canaries to be migrated on node drain (#25726) When a node is drained that has canaries that are not yet healthy, the canaries may not be properly migrated and the deployment will halt. This happens only if there are more than `migrate.max_parallel` canaries on the node and the canaries are not yet healthy (ex. they have a long `update.min_healthy_time`). In this circumstance, the first batch of canaries are marked for migration by the drainer correctly. But then the reconciler counts these migrated canaries against the total number of expected canaries and no longer progresses the deployment. Because an insufficient number of allocations have reported they're healthy, the deployment cannot be promoted. When the reconciler looks for canaries to cancel, it leaves in the list any canaries that are already terminal (because there shouldn't be any work to do). But this ends up skipping the creation of a new canary to replace terminal canaries that have been marked for migration. Add a conditional for this case to cause the canary to be removed from the list of active canaries so we can replace it. Ref: https://hashicorp.atlassian.net/browse/NMD-560 Fixes: https://github.com/hashicorp/nomad/issues/17842 --- .changelog/25726.txt | 3 + scheduler/generic_sched_test.go | 127 ++++++++++++++++++++++++++++++-- scheduler/reconcile_test.go | 21 +++--- scheduler/reconcile_util.go | 8 ++ 4 files changed, 142 insertions(+), 17 deletions(-) create mode 100644 .changelog/25726.txt diff --git a/.changelog/25726.txt b/.changelog/25726.txt new file mode 100644 index 000000000..702a44f98 --- /dev/null +++ b/.changelog/25726.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: Fixed a bug where draining a node with canaries could result in a stuck deployment +``` diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 0942613ba..50979cc5a 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2024,7 +2024,7 @@ func TestServiceSched_JobModify(t *testing.T) { job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -2417,7 +2417,7 @@ func TestServiceSched_JobModify_Datacenters(t *testing.T) { job2.Datacenters = []string{"dc1", "dc2"} require.NoError(h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -2490,7 +2490,7 @@ func TestServiceSched_JobModify_IncrCount_NodeLimit(t *testing.T) { job2.TaskGroups[0].Count = 3 require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -2601,7 +2601,7 @@ func TestServiceSched_JobModify_CountZero(t *testing.T) { job2.TaskGroups[0].Count = 0 require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -2699,7 +2699,7 @@ func TestServiceSched_JobModify_Rolling(t *testing.T) { job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -2930,7 +2930,7 @@ func TestServiceSched_JobModify_Canaries(t *testing.T) { job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -3073,7 +3073,7 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) { } require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -3714,7 +3714,7 @@ func TestServiceSched_NodeDown(t *testing.T) { allocs := []*structs.Allocation{alloc} must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs)) - // Create a mock evaluation to deal with drain + // Create a mock evaluation eval := &structs.Evaluation{ Namespace: structs.DefaultNamespace, ID: uuid.Generate(), @@ -4173,6 +4173,117 @@ func TestServiceSched_NodeDrain_Down(t *testing.T) { h.AssertEvalStatus(t, structs.EvalStatusComplete) } +func TestServiceSched_NodeDrain_Canaries(t *testing.T) { + ci.Parallel(t) + h := NewHarness(t) + + n1 := mock.Node() + n2 := mock.DrainNode() + must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), n1)) + must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), n2)) + + job := mock.Job() + job.TaskGroups[0].Count = 2 + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) + + // previous version allocations + var allocs []*structs.Allocation + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = n1.ID + alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + allocs = append(allocs, alloc) + t.Logf("prev alloc=%q", alloc.ID) + } + + // canaries on draining node + job = job.Copy() + job.Meta["owner"] = "changed" + job.Version++ + var canaries []string + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = n2.ID + alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + alloc.DesiredStatus = structs.AllocDesiredStatusStop + alloc.ClientStatus = structs.AllocClientStatusComplete + alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: pointer.Of(false), + Canary: true, + } + alloc.DesiredTransition = structs.DesiredTransition{ + Migrate: pointer.Of(true), + } + allocs = append(allocs, alloc) + canaries = append(canaries, alloc.ID) + t.Logf("stopped canary alloc=%q", alloc.ID) + } + + // first canary placed from previous drainer eval + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = n2.ID + alloc.Name = fmt.Sprintf("my-job.web[0]") + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.PreviousAllocation = canaries[0] + alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: pointer.Of(false), + Canary: true, + } + allocs = append(allocs, alloc) + canaries = append(canaries, alloc.ID) + t.Logf("new canary alloc=%q", alloc.ID) + + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) + must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs)) + + deployment := mock.Deployment() + deployment.JobID = job.ID + deployment.JobVersion = job.Version + deployment.JobCreateIndex = job.CreateIndex + deployment.JobSpecModifyIndex = job.JobModifyIndex + deployment.TaskGroups["web"] = &structs.DeploymentState{ + AutoRevert: false, + AutoPromote: false, + Promoted: false, + PlacedCanaries: canaries, + DesiredCanaries: 2, + DesiredTotal: 2, + PlacedAllocs: 3, + HealthyAllocs: 0, + UnhealthyAllocs: 0, + } + must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment)) + + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + NodeID: n2.ID, + Status: structs.EvalStatusPending, + } + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, + h.NextIndex(), []*structs.Evaluation{eval})) + + must.NoError(t, h.Process(NewServiceScheduler, eval)) + must.Len(t, 1, h.Plans) + h.AssertEvalStatus(t, structs.EvalStatusComplete) + must.MapLen(t, 0, h.Plans[0].NodeAllocation) + must.MapLen(t, 1, h.Plans[0].NodeUpdate) + must.Len(t, 2, h.Plans[0].NodeUpdate[n2.ID]) + + for _, alloc := range h.Plans[0].NodeUpdate[n2.ID] { + must.SliceContains(t, canaries, alloc.ID) + } +} + func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { ci.Parallel(t) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 13062768b..f73e2af6e 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -3226,6 +3226,8 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { allocs = append(allocs, alloc) } + n := mock.DrainNode() + // Create two canaries for the new job handled := make(map[string]allocUpdateType) for i := 0; i < 2; i++ { @@ -3233,7 +3235,7 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { canary := mock.Alloc() canary.Job = job canary.JobID = job.ID - canary.NodeID = uuid.Generate() + canary.NodeID = n.ID canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) canary.TaskGroup = job.TaskGroups[0].Name canary.DeploymentID = d.ID @@ -3244,8 +3246,9 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { // Build a map of tainted nodes that contains the last canary tainted := make(map[string]*structs.Node, 1) - n := mock.DrainNode() - n.ID = allocs[11].NodeID + + // This is what drainer sets for draining allocations + allocs[10].DesiredTransition.Migrate = pointer.Of(true) allocs[11].DesiredTransition.Migrate = pointer.Of(true) tainted[n.ID] = n @@ -3258,18 +3261,18 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { assertResults(t, r, &resultExpectation{ createDeployment: nil, deploymentUpdates: nil, - place: 1, + place: 2, inplace: 0, - stop: 1, + stop: 2, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { - Canary: 1, - Ignore: 11, + Canary: 2, + Ignore: 10, }, }, }) - assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) } // Tests the reconciler handles migrating a canary correctly on a lost node diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 88e44692d..c6e8398e2 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -307,6 +307,14 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS continue } + // Terminal canaries that have been marked for migration need to be + // migrated, otherwise we block deployments from progressing by + // counting them as running canaries. + if alloc.DeploymentStatus.IsCanary() && alloc.DesiredTransition.ShouldMigrate() { + migrate[alloc.ID] = alloc + continue + } + // Terminal allocs, if not reconnect, are always untainted as they // should never be migrated. untainted[alloc.ID] = alloc