From fc50ab930fbe2d8925790cd199a987307c62c3a4 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 29 Mar 2018 13:28:37 -0500 Subject: [PATCH] Refactored for readability, pair programmed with @dadgar --- scheduler/reconcile_test.go | 3 +- scheduler/reconcile_util.go | 115 +++++++++++++++++++++++------------- 2 files changed, 76 insertions(+), 42 deletions(-) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index e82a7b13a..8a24646af 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -3873,7 +3873,6 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { new.DesiredStatus = structs.AllocDesiredStatusStop new.ClientStatus = structs.AllocClientStatusFailed allocs = append(allocs, new) - t.Logf("Canary %q", new.ID) } reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil) @@ -3898,7 +3897,7 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { job.TaskGroups[0].Name: { Stop: 0, InPlaceUpdate: 0, - Ignore: 6, + Ignore: 3, }, }, }) diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 89bb739cd..33428d0a3 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -231,68 +231,103 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi } // filterByRescheduleable filters the allocation set to return the set of allocations that are either -// terminal or running, and a set of allocations that must be rescheduled now. Allocations that can be rescheduled -// at a future time are also returned so that we can create follow up evaluations for them +// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled +// at a future time are also returned so that we can create follow up evaluations for them. Allocs are +// skipped or considered untainted according to logic defined in shouldFilter method. func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) { untainted = make(map[string]*structs.Allocation) rescheduleNow = make(map[string]*structs.Allocation) now := time.Now() for _, alloc := range a { - var isUntainted, eligibleNow, eligibleLater bool + var eligibleNow, eligibleLater bool var rescheduleTime time.Time - if isBatch { - // Allocs from batch jobs should be filtered when the desired status - // is terminal and the client did not finish or when the client - // status is failed so that they will be replaced. If they are - // complete but not failed, they shouldn't be replaced. - switch alloc.DesiredStatus { - case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: - if alloc.RanSuccessfully() { - untainted[alloc.ID] = alloc - } - continue - default: - } - if alloc.NextAllocation == "" { - // Ignore allocs that have already been rescheduled - isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, true) - } - } else { - // Ignore allocs that have already been rescheduled - if alloc.NextAllocation == "" { - isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, false) - } + + // Ignore allocs that have already been rescheduled + if alloc.NextAllocation != "" { + continue } + + isUntainted, skip := shouldFilter(alloc, isBatch) if isUntainted { untainted[alloc.ID] = alloc } - if eligibleNow { + if isUntainted || skip { + continue + } + + // Only failed allocs with desired state run get to this point + // If the failed alloc is not eligible for rescheduling now we add it to the untainted set + eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now) + if !eligibleNow { + untainted[alloc.ID] = alloc + if eligibleLater { + rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime}) + } + } else { rescheduleNow[alloc.ID] = alloc - } else if eligibleLater { - rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime}) } } return } +/* shouldFilter returns whether the alloc should be skipped or considered untainted +Filtering logic for batch jobs: + If complete, and ran successfully - don't skip, set untainted + If desired state is stop, skip + +Filtering logic for service jobs + If desired state is stop/evict - skip + If client status is complete/lost - skip +*/ +func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, skip bool) { + // Allocs from batch jobs should be filtered when the desired status + // is terminal and the client did not finish or when the client + // status is failed so that they will be replaced. If they are + // complete but not failed, they shouldn't be replaced. + if isBatch { + switch alloc.DesiredStatus { + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: + if alloc.RanSuccessfully() { + return true, false + } + return false, true + default: + } + + switch alloc.ClientStatus { + case structs.AllocClientStatusFailed: + default: + return true, false + } + return false, false + } + + // Handle service jobs + switch alloc.DesiredStatus { + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: + return false, true + default: + } + + switch alloc.ClientStatus { + case structs.AllocClientStatusComplete, structs.AllocClientStatusLost: + return false, true + default: + } + return false, false +} + // updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation // should be rescheduled now, later or left in the untainted set -func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { - shouldFilter := false - if !batch { - // For service type jobs we filter terminal allocs - // except for those with ClientStatusFailed - those are checked for reschedulability - shouldFilter = alloc.DesiredStatus == structs.AllocDesiredStatusStop || (alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed) - } +func updateByReschedulable(alloc *structs.Allocation, now time.Time) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { rescheduleTime, eligible := alloc.NextRescheduleTime() if eligible && now.After(rescheduleTime) { rescheduleNow = true - } else if !shouldFilter { - untainted = true - if eligible && alloc.FollowupEvalID == "" { - rescheduleLater = true - } + return + } + if eligible && alloc.FollowupEvalID == "" { + rescheduleLater = true } return }