mirror of
https://github.com/kemko/nomad.git
synced 2026-01-10 12:25:42 +03:00
Fix edge case in reconciler where service jobs with ClientstatusComplete were not replaced
This commit is contained in:
@@ -1515,6 +1515,55 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) {
|
||||
r.inplaceUpdate[0].EvalID = evals[0].ID
|
||||
}
|
||||
|
||||
// Tests service allocations with client status complete
|
||||
func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
|
||||
// Set desired 5
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Count = 5
|
||||
|
||||
// Set up reschedule policy
|
||||
delayDur := 15 * time.Second
|
||||
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
|
||||
|
||||
// Create 5 existing allocations
|
||||
var allocs []*structs.Allocation
|
||||
for i := 0; i < 5; i++ {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = uuid.Generate()
|
||||
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
|
||||
allocs = append(allocs, alloc)
|
||||
alloc.ClientStatus = structs.AllocClientStatusRunning
|
||||
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
||||
}
|
||||
|
||||
// Mark one as client status complete
|
||||
allocs[4].ClientStatus = structs.AllocClientStatusComplete
|
||||
|
||||
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil)
|
||||
r := reconciler.Compute()
|
||||
|
||||
// Should place a new placement for the alloc that was marked complete
|
||||
assertResults(t, r, &resultExpectation{
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
place: 1,
|
||||
inplace: 0,
|
||||
stop: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Place: 1,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 4,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
|
||||
|
||||
}
|
||||
|
||||
// Tests rescheduling failed service allocations with desired state stop
|
||||
func TestReconciler_RescheduleNow_Service(t *testing.T) {
|
||||
require := require.New(t)
|
||||
|
||||
@@ -279,18 +279,18 @@ func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow
|
||||
// updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation
|
||||
// should be rescheduled now, later or left in the untainted set
|
||||
func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
|
||||
shouldAllow := true
|
||||
shouldFilter := false
|
||||
if !batch {
|
||||
// For service type jobs we ignore allocs whose desired state is stop/evict
|
||||
// everything else is either rescheduleable or untainted
|
||||
shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict
|
||||
// For service type jobs we filter terminal allocs
|
||||
// except for those with ClientStatusFailed - those are checked for reschedulability
|
||||
shouldFilter = alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed
|
||||
}
|
||||
rescheduleTime, eligible := alloc.NextRescheduleTime()
|
||||
// We consider a time difference of less than 5 seconds to be eligible
|
||||
// because we collapse allocations that failed within 5 seconds into a single evaluation
|
||||
if eligible && now.After(rescheduleTime) {
|
||||
rescheduleNow = true
|
||||
} else if shouldAllow {
|
||||
} else if !shouldFilter {
|
||||
untainted = true
|
||||
if eligible && alloc.FollowupEvalID == "" {
|
||||
rescheduleLater = true
|
||||
|
||||
Reference in New Issue
Block a user