diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index bc105ac1e..e6f3a4709 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6266,6 +6266,15 @@ func (a *AllocDeploymentStatus) IsUnhealthy() bool { return a.Healthy != nil && !*a.Healthy } +// IsCanary returns if the allocation is marked as a canary +func (a *AllocDeploymentStatus) IsCanary() bool { + if a == nil { + return false + } + + return a.Canary +} + func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { if a == nil { return nil diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index d5a75beae..0618b5c72 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -3113,67 +3113,92 @@ func TestServiceSched_Reschedule_PruneEvents(t *testing.T) { } -// Tests that deployments with failed allocs don't result in placements -func TestDeployment_FailedAllocs_NoReschedule(t *testing.T) { - h := NewHarness(t) - require := require.New(t) - // Create some nodes - var nodes []*structs.Node - for i := 0; i < 10; i++ { - node := mock.Node() - nodes = append(nodes, node) - noErr(t, h.State.UpsertNode(h.NextIndex(), node)) +// Tests that deployments with failed allocs result in placements as long as the +// deployment is running. +func TestDeployment_FailedAllocs_Reschedule(t *testing.T) { + for _, failedDeployment := range []bool{false, true} { + t.Run(fmt.Sprintf("Failed Deployment: %v", failedDeployment), func(t *testing.T) { + h := NewHarness(t) + require := require.New(t) + // Create some nodes + var nodes []*structs.Node + for i := 0; i < 10; i++ { + node := mock.Node() + nodes = append(nodes, node) + noErr(t, h.State.UpsertNode(h.NextIndex(), node)) + } + + // Generate a fake job with allocations and a reschedule policy. + job := mock.Job() + job.TaskGroups[0].Count = 2 + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 1, + Interval: 15 * time.Minute, + } + jobIndex := h.NextIndex() + require.Nil(h.State.UpsertJob(jobIndex, job)) + + deployment := mock.Deployment() + deployment.JobID = job.ID + deployment.JobCreateIndex = jobIndex + deployment.JobVersion = job.Version + if failedDeployment { + deployment.Status = structs.DeploymentStatusFailed + } + + require.Nil(h.State.UpsertDeployment(h.NextIndex(), deployment)) + + var allocs []*structs.Allocation + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = nodes[i].ID + alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + alloc.DeploymentID = deployment.ID + allocs = append(allocs, alloc) + } + // Mark one of the allocations as failed in the past + allocs[1].ClientStatus = structs.AllocClientStatusFailed + allocs[1].TaskStates = map[string]*structs.TaskState{"web": {State: "start", + StartedAt: time.Now().Add(-12 * time.Hour), + FinishedAt: time.Now().Add(-10 * time.Hour)}} + allocs[1].DesiredTransition.Reschedule = helper.BoolToPtr(true) + + require.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs)) + + // Create a mock evaluation + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + require.Nil(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + require.Nil(h.Process(NewServiceScheduler, eval)) + + if failedDeployment { + // Verify no plan created + require.Len(h.Plans, 0) + } else { + require.Len(h.Plans, 1) + plan := h.Plans[0] + + // Ensure the plan allocated + var planned []*structs.Allocation + for _, allocList := range plan.NodeAllocation { + planned = append(planned, allocList...) + } + if len(planned) != 1 { + t.Fatalf("bad: %#v", plan) + } + } + }) } - - // Generate a fake job with allocations and a reschedule policy. - job := mock.Job() - job.TaskGroups[0].Count = 2 - job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ - Attempts: 1, - Interval: 15 * time.Minute, - } - jobIndex := h.NextIndex() - require.Nil(h.State.UpsertJob(jobIndex, job)) - - deployment := mock.Deployment() - deployment.JobID = job.ID - deployment.JobCreateIndex = jobIndex - deployment.JobVersion = job.Version - - require.Nil(h.State.UpsertDeployment(h.NextIndex(), deployment)) - - var allocs []*structs.Allocation - for i := 0; i < 2; i++ { - alloc := mock.Alloc() - alloc.Job = job - alloc.JobID = job.ID - alloc.NodeID = nodes[i].ID - alloc.Name = fmt.Sprintf("my-job.web[%d]", i) - alloc.DeploymentID = deployment.ID - allocs = append(allocs, alloc) - } - // Mark one of the allocations as failed - allocs[1].ClientStatus = structs.AllocClientStatusFailed - - require.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs)) - - // Create a mock evaluation - eval := &structs.Evaluation{ - Namespace: structs.DefaultNamespace, - ID: uuid.Generate(), - Priority: 50, - TriggeredBy: structs.EvalTriggerNodeUpdate, - JobID: job.ID, - Status: structs.EvalStatusPending, - } - require.Nil(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) - - // Process the evaluation - require.Nil(h.Process(NewServiceScheduler, eval)) - - // Verify no plan created - require.Equal(0, len(h.Plans)) - } func TestBatchSched_Run_CompleteAlloc(t *testing.T) { diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 1ab45b10d..6be75b0d9 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -334,6 +334,8 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { all, ignore := a.filterOldTerminalAllocs(all) desiredChanges.Ignore += uint64(len(ignore)) + // canaries is the set of canaries for the current deployment and all is all + // allocs including the canaries canaries, all := a.handleGroupCanaries(all, desiredChanges) // Determine what set of allocations are on tainted nodes @@ -357,14 +359,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { desiredChanges.Stop += uint64(len(stop)) untainted = untainted.difference(stop) - // Having stopped un-needed allocations, append the canaries to the existing - // set of untainted because they are promoted. This will cause them to be - // treated like non-canaries - if !canaryState { - untainted = untainted.union(canaries) - nameIndex.Set(canaries) - } - // Do inplace upgrades where possible and capture the set of upgrades that // need to be done destructively. ignore, inplace, destructive := a.computeUpdates(tg, untainted) @@ -374,6 +368,12 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { dstate.DesiredTotal += len(destructive) + len(inplace) } + // Remove the canaries now that we have handled rescheduling so that we do + // not consider them when making placement decisions. + if canaryState { + untainted = untainted.difference(canaries) + } + // The fact that we have destructive updates and have less canaries than is // desired means we need to create canaries numDestructive := len(destructive) @@ -382,7 +382,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { requireCanary := numDestructive != 0 && strategy != nil && len(canaries) < strategy.Canary && !canariesPromoted if requireCanary && !a.deploymentPaused && !a.deploymentFailed { number := strategy.Canary - len(canaries) - //number = helper.IntMin(numDestructive, number) desiredChanges.Canary += uint64(number) if !existingDeployment { dstate.DesiredCanaries = strategy.Canary @@ -422,16 +421,29 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { min := helper.IntMin(len(place), limit) limit -= min - } else if !deploymentPlaceReady && len(lost) != 0 { - // We are in a situation where we shouldn't be placing more than we need - // to but we have lost allocations. It is a very weird user experience - // if you have a node go down and Nomad doesn't replace the allocations - // because the deployment is paused/failed so we only place to recover - // the lost allocations. - allowed := helper.IntMin(len(lost), len(place)) - desiredChanges.Place += uint64(allowed) - for _, p := range place[:allowed] { - a.result.place = append(a.result.place, p) + } else if !deploymentPlaceReady { + // We do not want to place additional allocations but in the case we + // have lost allocations or allocations that require rescheduling now, + // we do so regardless to avoid odd user experiences. + if len(lost) != 0 { + allowed := helper.IntMin(len(lost), len(place)) + desiredChanges.Place += uint64(allowed) + for _, p := range place[:allowed] { + a.result.place = append(a.result.place, p) + } + } + + // Handle rescheduling of failed allocations even if the deployment is + // failed. We do not reschedule if the allocation is part of the failed + // deployment. + if now := len(rescheduleNow); now != 0 { + for _, p := range place { + prev := p.PreviousAllocation() + if p.IsRescheduling() && !(a.deploymentFailed && prev != nil && a.deployment.ID == prev.DeploymentID) { + a.result.place = append(a.result.place, p) + desiredChanges.Place++ + } + } } } @@ -508,13 +520,14 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // deploymentComplete is whether the deployment is complete which largely // means that no placements were made or desired to be made - deploymentComplete := len(destructive)+len(inplace)+len(place)+len(migrate) == 0 && !requireCanary + deploymentComplete := len(destructive)+len(inplace)+len(place)+len(migrate)+len(rescheduleNow)+len(rescheduleLater) == 0 && !requireCanary // Final check to see if the deployment is complete is to ensure everything // is healthy if deploymentComplete && a.deployment != nil { dstate := a.deployment.TaskGroups[group] - if dstate.DesiredTotal != dstate.HealthyAllocs { + if helper.IntMax(dstate.DesiredTotal, dstate.DesiredCanaries) < dstate.HealthyAllocs || // Make sure we have enough healthy allocs + (dstate.DesiredCanaries > 0 && !dstate.Promoted) { // Make sure we are promoted if we have canaries deploymentComplete = false } } @@ -646,26 +659,24 @@ func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, dest func (a *allocReconciler) computePlacements(group *structs.TaskGroup, nameIndex *allocNameIndex, untainted, migrate allocSet, reschedule allocSet) []allocPlaceResult { - // Hot path the nothing to do case - existing := len(untainted) + len(migrate) - if existing >= group.Count { - return nil - } - var place []allocPlaceResult // Add rescheduled placement results - // Any allocations being rescheduled will remain at DesiredStatusRun ClientStatusFailed + var place []allocPlaceResult for _, alloc := range reschedule { place = append(place, allocPlaceResult{ name: alloc.Name, taskGroup: group, previousAlloc: alloc, reschedule: true, + canary: alloc.DeploymentStatus.IsCanary(), }) - existing += 1 - if existing == group.Count { - break - } } + + // Hot path the nothing to do case + existing := len(untainted) + len(migrate) + len(reschedule) + if existing >= group.Count { + return place + } + // Add remaining placement results if existing < group.Count { for _, name := range nameIndex.Next(uint(group.Count - existing)) { diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index bf6aca5e8..4564b6c3f 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -1935,6 +1935,130 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { tgName := job.TaskGroups[0].Name now := time.Now() + // Set up reschedule policy and update stanza + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Delay: 5 * time.Second, + DelayFunction: "constant", + MaxDelay: 1 * time.Hour, + Unlimited: true, + } + job.TaskGroups[0].Update = canaryUpdate + + job2 := job.Copy() + job2.Version++ + + d := structs.NewDeployment(job2) + d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion + s := &structs.DeploymentState{ + DesiredCanaries: 2, + DesiredTotal: 5, + } + d.TaskGroups[job.TaskGroups[0].Name] = s + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + + // Create 2 healthy canary allocations + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DeploymentID = d.ID + alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ + Canary: true, + Healthy: helper.BoolToPtr(false), + } + s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) + allocs = append(allocs, alloc) + } + + // Mark the canaries as failed + allocs[5].ClientStatus = structs.AllocClientStatusFailed + allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) + + // Mark one of them as already rescheduled once + allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: uuid.Generate(), + PrevNodeID: uuid.Generate(), + }, + }} + + allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} + allocs[6].ClientStatus = structs.AllocClientStatusFailed + allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) + + // Create 4 unhealthy canary allocations that have already been replaced + for i := 0; i < 4; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) + alloc.ClientStatus = structs.AllocClientStatusFailed + alloc.DeploymentID = d.ID + alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ + Canary: true, + Healthy: helper.BoolToPtr(false), + } + s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") + reconciler.now = now + r := reconciler.Compute() + + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // Verify that one rescheduled alloc and one replacement for terminal alloc were placed + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 2, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 2, + Ignore: 9, + }, + }, + }) + + // Rescheduled allocs should have previous allocs + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.place) + assertPlacementsAreRescheduled(t, 2, r.place) +} + +// Tests rescheduling failed canary service allocations when one has reached its +// reschedule limit +func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { + require := require.New(t) + + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + tgName := job.TaskGroups[0].Name + now := time.Now() + // Set up reschedule policy and update stanza job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ Attempts: 1, @@ -1969,7 +2093,7 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { alloc.ClientStatus = structs.AllocClientStatusRunning } - // Create 2 canary allocations + // Create 2 healthy canary allocations for i := 0; i < 2; i++ { alloc := mock.Alloc() alloc.Job = job @@ -1988,20 +2112,41 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { // Mark the canaries as failed allocs[5].ClientStatus = structs.AllocClientStatusFailed + allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) // Mark one of them as already rescheduled once allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ - {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), PrevAllocID: uuid.Generate(), PrevNodeID: uuid.Generate(), }, }} + allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", StartedAt: now.Add(-1 * time.Hour), FinishedAt: now.Add(-10 * time.Second)}} allocs[6].ClientStatus = structs.AllocClientStatusFailed + allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) + + // Create 4 unhealthy canary allocations that have already been replaced + for i := 0; i < 4; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) + alloc.ClientStatus = structs.AllocClientStatusFailed + alloc.DeploymentID = d.ID + alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ + Canary: true, + Healthy: helper.BoolToPtr(false), + } + s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) + allocs = append(allocs, alloc) + } reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") + reconciler.now = now r := reconciler.Compute() // Verify that no follow up evals were created @@ -2012,13 +2157,13 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { assertResults(t, r, &resultExpectation{ createDeployment: nil, deploymentUpdates: nil, - place: 2, + place: 1, inplace: 0, stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { - Place: 2, - Ignore: 5, + Place: 1, + Ignore: 10, }, }, }) @@ -4144,6 +4289,7 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { alloc.NodeID = uuid.Generate() alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) alloc.TaskGroup = job.TaskGroups[0].Name + alloc.DeploymentID = d.ID allocs = append(allocs, alloc) } diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 2d8464f3b..264fcb1f0 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -323,10 +323,6 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID string, d *structs.Deployment) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { // If the allocation is part of an ongoing active deployment, we only allow it to reschedule // if it has been marked eligible - if alloc.DeploymentID != "" && d != nil && alloc.DeploymentID == d.ID && d.Active() && !alloc.DesiredTransition.ShouldReschedule() { - return - } - if d != nil && alloc.DeploymentID == d.ID && d.Active() && !alloc.DesiredTransition.ShouldReschedule() { return }