From c62c246ad97c938d8e99ff0ffac7639c24251851 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 6 Jun 2019 15:04:32 -0400 Subject: [PATCH 1/3] Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ``` --- scheduler/generic_sched.go | 3 +++ scheduler/generic_sched_test.go | 6 +++++- scheduler/reconcile.go | 16 ++++++++++++++ scheduler/reconcile_test.go | 37 +++++++++++++++++++++++---------- scheduler/reconcile_util.go | 2 +- 5 files changed, 51 insertions(+), 13 deletions(-) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 376a826ee..9dacb0d73 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -39,6 +39,9 @@ const ( // node is tainted. allocNodeTainted = "alloc not needed as node is tainted" + // allocRescheduled is the status used when an allocation failed and was rescheduled + allocRescheduled = "alloc was rescheduled because it failed" + // blockedEvalMaxPlanDesc is the description used for blocked evals that are // a result of hitting the max number of plan attempts blockedEvalMaxPlanDesc = "created due to placement conflicts" diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 8a9acc9bb..dee6a5228 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2441,6 +2441,8 @@ func TestServiceSched_NodeDown(t *testing.T) { allocs[9].DesiredStatus = structs.AllocDesiredStatusRun allocs[9].ClientStatus = structs.AllocClientStatusComplete + toBeRescheduled := map[string]bool{allocs[8].ID: true} + // Mark some allocs as running for i := 0; i < 4; i++ { out := allocs[i] @@ -2483,7 +2485,7 @@ func TestServiceSched_NodeDown(t *testing.T) { plan := h.Plans[0] // Test the scheduler marked all non-terminal allocations as lost - require.Len(t, plan.NodeUpdate[node.ID], len(toBeMigrated)+len(toBeLost)) + require.Len(t, plan.NodeUpdate[node.ID], len(toBeMigrated)+len(toBeLost)+len(toBeRescheduled)) for _, out := range plan.NodeUpdate[node.ID] { t.Run("alloc "+out.ID, func(t *testing.T) { @@ -2494,6 +2496,8 @@ func TestServiceSched_NodeDown(t *testing.T) { require.NotEqual(t, structs.AllocClientStatusLost, out.ClientStatus) } else if toBeLost[out.ID] { require.Equal(t, structs.AllocClientStatusLost, out.ClientStatus) + } else if toBeRescheduled[out.ID] { + require.Equal(t, structs.AllocClientStatusFailed, out.ClientStatus) } else { require.Fail(t, "unexpected alloc update") } diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 65262ff93..6e069224d 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -128,6 +128,8 @@ type delayedRescheduleInfo struct { // allocID is the ID of the allocation eligible to be rescheduled allocID string + alloc *structs.Allocation + // rescheduleTime is the time to use in the delayed evaluation rescheduleTime time.Time } @@ -354,6 +356,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Create batched follow up evaluations for allocations that are // reschedulable later and mark the allocations for in place updating a.handleDelayedReschedules(rescheduleLater, all, tg.Name) + desiredChanges.Stop += uint64(len(rescheduleLater)) // Create a structure for choosing names. Seed with the taken names which is // the union of untainted and migrating nodes (includes canaries) @@ -425,6 +428,8 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { for _, p := range place { a.result.place = append(a.result.place, p) } + a.markStop(rescheduleNow, "", allocRescheduled) + desiredChanges.Stop += uint64(len(rescheduleNow)) min := helper.IntMin(len(place), limit) limit -= min @@ -449,6 +454,12 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { if p.IsRescheduling() && !(a.deploymentFailed && prev != nil && a.deployment.ID == prev.DeploymentID) { a.result.place = append(a.result.place, p) desiredChanges.Place++ + + a.result.stop = append(a.result.stop, allocStopResult{ + alloc: prev, + statusDescription: allocRescheduled, + }) + desiredChanges.Stop++ } } } @@ -850,6 +861,11 @@ func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRes evals = append(evals, eval) for _, allocReschedInfo := range rescheduleLater { + a.result.stop = append(a.result.stop, allocStopResult{ + alloc: allocReschedInfo.alloc, + statusDescription: allocRescheduled, + }) + if allocReschedInfo.rescheduleTime.Sub(nextReschedTime) < batchedFailedAllocWindowSize { allocIDToFollowupEvalID[allocReschedInfo.allocID] = eval.ID } else { diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 72e77677a..fc7336c06 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -1320,12 +1320,13 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) { place: 0, inplace: 0, attributeUpdates: 1, - stop: 0, + stop: 1, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 0, InPlaceUpdate: 0, Ignore: 4, + Stop: 1, }, }, }) @@ -1402,12 +1403,13 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { place: 0, inplace: 0, attributeUpdates: 7, - stop: 0, + stop: 7, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 0, InPlaceUpdate: 0, Ignore: 10, + Stop: 7, }, }, }) @@ -1489,11 +1491,12 @@ func TestReconciler_RescheduleNow_Batch(t *testing.T) { createDeployment: nil, deploymentUpdates: nil, place: 1, + stop: 1, inplace: 0, - stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, + Stop: 1, Ignore: 3, }, }, @@ -1565,12 +1568,13 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) { place: 1, inplace: 0, attributeUpdates: 1, - stop: 0, + stop: 1, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, InPlaceUpdate: 0, Ignore: 4, + Stop: 1, }, }, }) @@ -1763,11 +1767,12 @@ func TestReconciler_RescheduleNow_Service(t *testing.T) { deploymentUpdates: nil, place: 2, inplace: 0, - stop: 0, + stop: 1, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 2, Ignore: 3, + Stop: 1, }, }, }) @@ -1841,10 +1846,11 @@ func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { deploymentUpdates: nil, place: 1, inplace: 0, - stop: 0, + stop: 1, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, + Stop: 1, Ignore: 4, }, }, @@ -1920,11 +1926,12 @@ func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { createDeployment: nil, deploymentUpdates: nil, place: 1, + stop: 1, inplace: 0, - stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, + Stop: 1, Ignore: 4, }, }, @@ -2027,11 +2034,12 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) { createDeployment: nil, deploymentUpdates: nil, place: 2, + stop: 2, inplace: 0, - stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 2, + Stop: 2, Ignore: 5, }, }, @@ -2150,11 +2158,12 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { createDeployment: nil, deploymentUpdates: nil, place: 2, + stop: 2, inplace: 0, - stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 2, + Stop: 2, Ignore: 9, }, }, @@ -2276,11 +2285,12 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { createDeployment: nil, deploymentUpdates: nil, place: 1, + stop: 1, inplace: 0, - stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, + Stop: 1, Ignore: 10, }, }, @@ -4440,11 +4450,13 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { // Assert that no rescheduled placements were created assertResults(t, r, &resultExpectation{ place: 5, + stop: 5, createDeployment: nil, deploymentUpdates: nil, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 5, + Stop: 5, Ignore: 5, }, }, @@ -4585,11 +4597,13 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T // Assert that rescheduled placements were created assertResults(t, r, &resultExpectation{ place: 10, + stop: 10, createDeployment: nil, deploymentUpdates: nil, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 10, + Stop: 10, Ignore: 0, }, }, @@ -4653,11 +4667,12 @@ func TestReconciler_ForceReschedule_Service(t *testing.T) { createDeployment: nil, deploymentUpdates: nil, place: 1, + stop: 1, inplace: 0, - stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, + Stop: 1, Ignore: 4, }, }, diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 87fa4f936..ec6c26473 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -261,7 +261,7 @@ func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID str if !eligibleNow { untainted[alloc.ID] = alloc if eligibleLater { - rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime}) + rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, alloc, rescheduleTime}) } } else { rescheduleNow[alloc.ID] = alloc From 34a66835db20eea7d5558c340f3f7b7a0d2b6ca7 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 13 Jun 2019 09:37:18 -0400 Subject: [PATCH 2/3] Don't stop rescheduleLater allocations When an alloc is due to be rescheduleLater, it goes through the reconciler twice: once to be ignored with a follow up evals, and once again when processing the follow up eval where they appear as rescheduleNow. Here, we ignore them in the first run and mark them as stopped in second iteration; rather than stop them twice. --- scheduler/reconcile.go | 6 ------ scheduler/reconcile_test.go | 12 ++++++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 6e069224d..d3f20fe81 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -356,7 +356,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Create batched follow up evaluations for allocations that are // reschedulable later and mark the allocations for in place updating a.handleDelayedReschedules(rescheduleLater, all, tg.Name) - desiredChanges.Stop += uint64(len(rescheduleLater)) // Create a structure for choosing names. Seed with the taken names which is // the union of untainted and migrating nodes (includes canaries) @@ -861,11 +860,6 @@ func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRes evals = append(evals, eval) for _, allocReschedInfo := range rescheduleLater { - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: allocReschedInfo.alloc, - statusDescription: allocRescheduled, - }) - if allocReschedInfo.rescheduleTime.Sub(nextReschedTime) < batchedFailedAllocWindowSize { allocIDToFollowupEvalID[allocReschedInfo.allocID] = eval.ID } else { diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index fc7336c06..e12b52bd6 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -1320,13 +1320,13 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) { place: 0, inplace: 0, attributeUpdates: 1, - stop: 1, + stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 0, InPlaceUpdate: 0, Ignore: 4, - Stop: 1, + Stop: 0, }, }, }) @@ -1403,13 +1403,13 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { place: 0, inplace: 0, attributeUpdates: 7, - stop: 7, + stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 0, InPlaceUpdate: 0, Ignore: 10, - Stop: 7, + Stop: 0, }, }, }) @@ -1568,13 +1568,13 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) { place: 1, inplace: 0, attributeUpdates: 1, - stop: 1, + stop: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, InPlaceUpdate: 0, Ignore: 4, - Stop: 1, + Stop: 0, }, }, }) From 25b44b18db53af1f95fa4f5ee02107a091a19356 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 13 Jun 2019 16:41:19 -0400 Subject: [PATCH 3/3] Test behavior no reschedule for service/batch jobs --- scheduler/reconcile_test.go | 164 ++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index e12b52bd6..132104afa 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -4683,3 +4683,167 @@ func TestReconciler_ForceReschedule_Service(t *testing.T) { assertPlaceResultsHavePreviousAllocs(t, 1, r.place) assertPlacementsAreRescheduled(t, 1, r.place) } + +// Tests behavior of service failure with rescheduling policy preventing rescheduling: +// new allocs should be placed to satisfy the job count, and current allocations are +// left unmodified +func TestReconciler_RescheduleNot_Service(t *testing.T) { + require := require.New(t) + + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + tgName := job.TaskGroups[0].Name + now := time.Now() + + // Set up reschedule policy and update stanza + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 0, + Interval: 24 * time.Hour, + Delay: 5 * time.Second, + DelayFunction: "", + MaxDelay: 1 * time.Hour, + Unlimited: false, + } + job.TaskGroups[0].Update = noCanaryUpdate + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + + // Mark two as failed + allocs[0].ClientStatus = structs.AllocClientStatusFailed + + // Mark one of them as already rescheduled once + allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: uuid.Generate(), + PrevNodeID: uuid.Generate(), + }, + }} + allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} + allocs[1].ClientStatus = structs.AllocClientStatusFailed + + // Mark one as desired state stop + allocs[4].DesiredStatus = structs.AllocDesiredStatusStop + + reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") + r := reconciler.Compute() + + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // no rescheduling, ignore all 4 allocs + // but place one to substitute allocs[4] that was stopped explicitly + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, + Ignore: 4, + Stop: 0, + }, + }, + }) + + // none of the placement should have preallocs or rescheduled + assertPlaceResultsHavePreviousAllocs(t, 0, r.place) + assertPlacementsAreRescheduled(t, 0, r.place) +} + +// Tests behavior of batch failure with rescheduling policy preventing rescheduling: +// current allocations are left unmodified and no follow up +func TestReconciler_RescheduleNot_Batch(t *testing.T) { + require := require.New(t) + // Set desired 4 + job := mock.Job() + job.TaskGroups[0].Count = 4 + now := time.Now() + // Set up reschedule policy + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 0, + Interval: 24 * time.Hour, + Delay: 5 * time.Second, + DelayFunction: "constant", + } + tgName := job.TaskGroups[0].Name + // Create 6 existing allocations - 2 running, 1 complete and 3 failed + var allocs []*structs.Allocation + for i := 0; i < 6; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + // Mark 3 as failed with restart tracking info + allocs[0].ClientStatus = structs.AllocClientStatusFailed + allocs[0].NextAllocation = allocs[1].ID + allocs[1].ClientStatus = structs.AllocClientStatusFailed + allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + }, + }} + allocs[1].NextAllocation = allocs[2].ID + allocs[2].ClientStatus = structs.AllocClientStatusFailed + allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-5 * time.Second)}} + allocs[2].FollowupEvalID = uuid.Generate() + allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + }, + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: allocs[1].ID, + PrevNodeID: uuid.Generate(), + }, + }} + // Mark one as complete + allocs[5].ClientStatus = structs.AllocClientStatusComplete + + reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "") + reconciler.now = now + r := reconciler.Compute() + + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // No reschedule attempts were made and all allocs are untouched + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + stop: 0, + inplace: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 0, + Stop: 0, + Ignore: 4, + }, + }, + }) + +}