diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 604347fa5..94b3746b6 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -1515,6 +1515,55 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) { r.inplaceUpdate[0].EvalID = evals[0].ID } +// Tests service allocations with client status complete +func TestReconciler_Service_ClientStatusComplete(t *testing.T) { + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + + // Set up reschedule policy + delayDur := 15 * time.Second + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour} + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + } + + // Mark one as client status complete + allocs[4].ClientStatus = structs.AllocClientStatusComplete + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + // Should place a new placement for the alloc that was marked complete + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, + InPlaceUpdate: 0, + Ignore: 4, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) + +} + // Tests rescheduling failed service allocations with desired state stop func TestReconciler_RescheduleNow_Service(t *testing.T) { require := require.New(t) diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index a7b0b8141..d571a8b4e 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -279,18 +279,18 @@ func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow // updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation // should be rescheduled now, later or left in the untainted set func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { - shouldAllow := true + shouldFilter := false if !batch { - // For service type jobs we ignore allocs whose desired state is stop/evict - // everything else is either rescheduleable or untainted - shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict + // For service type jobs we filter terminal allocs + // except for those with ClientStatusFailed - those are checked for reschedulability + shouldFilter = alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed } rescheduleTime, eligible := alloc.NextRescheduleTime() // We consider a time difference of less than 5 seconds to be eligible // because we collapse allocations that failed within 5 seconds into a single evaluation if eligible && now.After(rescheduleTime) { rescheduleNow = true - } else if shouldAllow { + } else if !shouldFilter { untainted = true if eligible && alloc.FollowupEvalID == "" { rescheduleLater = true