diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 994c782c6..603292b4b 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -42,6 +42,10 @@ const ( // blockedEvalFailedPlacements is the description used for blocked evals // that are a result of failing to place all allocations. blockedEvalFailedPlacements = "created to place remaining allocations" + + // maxPastRescheduleEvents is the maximum number of past reschedule event + // that we track when unlimited rescheduling is enabled + maxPastRescheduleEvents = 5 ) // SetStatusError is used to set the status of the evaluation to the given error @@ -206,6 +210,7 @@ func (s *GenericScheduler) process() (bool, error) { numTaskGroups = len(s.job.TaskGroups) } s.queuedAllocs = make(map[string]int, numTaskGroups) + s.followUpEvals = nil // Create a plan s.plan = s.eval.MakePlan(s.job) @@ -486,9 +491,10 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul // If the new allocation is replacing an older allocation then we // set the record the older allocation id so that they are chained if prevAllocation != nil { + now := time.Now() alloc.PreviousAllocation = prevAllocation.ID if missing.IsRescheduling() { - updateRescheduleTracker(alloc, prevAllocation, tg.ReschedulePolicy, time.Now()) + updateRescheduleTracker(alloc, prevAllocation, tg.ReschedulePolicy, now) } } @@ -544,8 +550,6 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs return selectOptions } -const max_past_reschedule_events = 5 - // updateRescheduleTracker carries over previous restart attempts and adds the most recent restart func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation, reschedPolicy *structs.ReschedulePolicy, now time.Time) { var rescheduleEvents []*structs.RescheduleEvent @@ -554,10 +558,10 @@ func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation if reschedPolicy != nil { interval = reschedPolicy.Interval } - // if attempts is set copy all events in the interval range + // If attempts is set copy all events in the interval range if reschedPolicy.Attempts > 0 { for _, reschedEvent := range prev.RescheduleTracker.Events { - timeDiff := time.Now().UTC().UnixNano() - reschedEvent.RescheduleTime + timeDiff := now.UnixNano() - reschedEvent.RescheduleTime // Only copy over events that are within restart interval // This keeps the list of events small in cases where there's a long chain of old restart events if interval > 0 && timeDiff <= interval.Nanoseconds() { @@ -565,16 +569,14 @@ func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation } } } else { - // for unlimited restarts, only copy the last n - copied := 0 - for i := len(prev.RescheduleTracker.Events) - 1; i >= 0 && copied < max_past_reschedule_events; i-- { + // Only copy the last n if unlimited is set + start := 0 + if len(prev.RescheduleTracker.Events) > maxPastRescheduleEvents { + start = len(prev.RescheduleTracker.Events) - maxPastRescheduleEvents + } + for i := start; i < len(prev.RescheduleTracker.Events); i++ { reschedEvent := prev.RescheduleTracker.Events[i] rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy()) - copied++ - } - // reverse it to get the correct order - for left, right := 0, len(rescheduleEvents)-1; left < right; left, right = left+1, right-1 { - rescheduleEvents[left], rescheduleEvents[right] = rescheduleEvents[right], rescheduleEvents[left] } } } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index ee7a775bc..be0d964e3 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -4390,7 +4390,7 @@ func Test_updateRescheduleTracker(t *testing.T) { Delay: 5 * time.Second, }, { - RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + RescheduleTime: t1.Add(-70 * time.Minute).UnixNano(), PrevAllocID: prevAlloc.ID, PrevNodeID: prevAlloc.NodeID, Delay: 10 * time.Second, diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 5eedc91be..0fc0b2a83 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -12,6 +12,12 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +const ( + // batchedFailedAllocWindowSize is the window size used + // to batch up failed allocations before creating an eval + batchedFailedAllocWindowSize = 5 * time.Second +) + // allocUpdateType takes an existing allocation and a new job definition and // returns whether the allocation can ignore the change, requires a destructive // update, or can be inplace updated. If it can be inplace updated, an updated @@ -290,8 +296,6 @@ func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescript } } -const batchedFailedAllocWindowSize = 5 * time.Second - // computeGroup reconciles state for a particular task group. It returns whether // the deployment it is for is complete with regards to the task group. func (a *allocReconciler) computeGroup(group string, all allocSet) bool { @@ -343,7 +347,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy) // Create batched follow up evaluations for allocations that are reschedulable later - rescheduleLaterAllocs := make(map[string]*structs.Allocation) + var rescheduleLaterAllocs map[string]*structs.Allocation if len(rescheduleLater) > 0 { rescheduleLaterAllocs = a.handleDelayedReschedules(rescheduleLater, all, tg.Name) } @@ -831,6 +835,9 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted, re return } + +// handleDelayedReschedules creates batched followup evaluations with the WaitUntil field set +// for allocations that are eligible to be rescheduled later func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) allocSet { // Sort by time sort.Slice(rescheduleLater, func(i, j int) bool {