Fix edge case in reconciler where service jobs with ClientstatusComplete were not replaced

This commit is contained in:
Preetha Appan
2018-03-23 18:41:00 -05:00
parent 9f74c6a378
commit f401044600
2 changed files with 54 additions and 5 deletions

View File

@@ -1515,6 +1515,55 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) {
r.inplaceUpdate[0].EvalID = evals[0].ID
}
// Tests service allocations with client status complete
func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Set up reschedule policy
delayDur := 15 * time.Second
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DesiredStatus = structs.AllocDesiredStatusRun
}
// Mark one as client status complete
allocs[4].ClientStatus = structs.AllocClientStatusComplete
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil)
r := reconciler.Compute()
// Should place a new placement for the alloc that was marked complete
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
},
},
})
assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
}
// Tests rescheduling failed service allocations with desired state stop
func TestReconciler_RescheduleNow_Service(t *testing.T) {
require := require.New(t)

View File

@@ -279,18 +279,18 @@ func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow
// updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation
// should be rescheduled now, later or left in the untainted set
func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
shouldAllow := true
shouldFilter := false
if !batch {
// For service type jobs we ignore allocs whose desired state is stop/evict
// everything else is either rescheduleable or untainted
shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict
// For service type jobs we filter terminal allocs
// except for those with ClientStatusFailed - those are checked for reschedulability
shouldFilter = alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed
}
rescheduleTime, eligible := alloc.NextRescheduleTime()
// We consider a time difference of less than 5 seconds to be eligible
// because we collapse allocations that failed within 5 seconds into a single evaluation
if eligible && now.After(rescheduleTime) {
rescheduleNow = true
} else if shouldAllow {
} else if !shouldFilter {
untainted = true
if eligible && alloc.FollowupEvalID == "" {
rescheduleLater = true