diff --git a/e2e/rescheduling/input/norescheduling.hcl b/e2e/rescheduling/input/norescheduling.hcl index 63ca9a0df..940548b74 100644 --- a/e2e/rescheduling/input/norescheduling.hcl +++ b/e2e/rescheduling/input/norescheduling.hcl @@ -1,30 +1,35 @@ job "test1" { datacenters = ["dc1"] - type = "service" + type = "service" group "t1" { count = 3 + task "t1" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "lol 5000"] + command = "bash" + args = ["-c", "lol 5000"] } } + update { - max_parallel = 1 - min_healthy_time = "10s" - auto_revert = false + max_parallel = 1 + min_healthy_time = "10s" + auto_revert = false } + restart { - attempts = 0 - delay = "0s" - mode = "fail" - } - reschedule { attempts = 0 - interval = "5m" + delay = "0s" + mode = "fail" + } + + reschedule { + attempts = 0 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/reschedule_success.hcl b/e2e/rescheduling/input/reschedule_success.hcl index 3ac127e10..76b3561c5 100644 --- a/e2e/rescheduling/input/reschedule_success.hcl +++ b/e2e/rescheduling/input/reschedule_success.hcl @@ -1,25 +1,29 @@ job "test3" { datacenters = ["dc1"] - type = "service" + type = "service" group "t3" { count = 3 + task "t3" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "a=`cksum <<< \"${NOMAD_ALLOC_ID}\"| cut -d ' ' -f1`; if ! (( a % 2 )); then sleep 5000; else exit -1; fi"] + command = "bash" + args = ["-c", "a=`cksum <<< \"${NOMAD_ALLOC_ID}\"| cut -d ' ' -f1`; if ! (( a % 2 )); then sleep 5000; else exit -1; fi"] } } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 2 - interval = "5m" + attempts = 2 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_canary.hcl b/e2e/rescheduling/input/rescheduling_canary.hcl index 9979b3d7c..1a848ba75 100644 --- a/e2e/rescheduling/input/rescheduling_canary.hcl +++ b/e2e/rescheduling/input/rescheduling_canary.hcl @@ -1,31 +1,37 @@ job "test5" { datacenters = ["dc1"] - type = "service" + type = "service" group "t5" { count = 3 + task "t5" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "sleep 5000"] + command = "bash" + args = ["-c", "sleep 5000"] } } + update { - max_parallel = 1 - canary = 1 - min_healthy_time = "1s" - auto_revert = false + max_parallel = 1 + canary = 1 + min_healthy_time = "1s" + auto_revert = false } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 3 - interval = "5m" + attempts = 3 + interval = "5m" + delay = "5s" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl b/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl new file mode 100644 index 000000000..cef2e5a37 --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl @@ -0,0 +1,34 @@ +job "test" { + datacenters = ["dc1"] + type = "service" + + group "t1" { + count = 3 + + task "t1" { + driver = "raw_exec" + + config { + command = "bash" + args = ["-c", "sleep 5000"] + } + } + + update { + canary = 3 + max_parallel = 1 + min_healthy_time = "1s" + healthy_deadline = "1m" + auto_revert = true + } + + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + unlimited = "true" + } + } +} diff --git a/e2e/rescheduling/input/rescheduling_default.hcl b/e2e/rescheduling/input/rescheduling_default.hcl index 56a829d7a..6f1f45c62 100644 --- a/e2e/rescheduling/input/rescheduling_default.hcl +++ b/e2e/rescheduling/input/rescheduling_default.hcl @@ -1,21 +1,23 @@ job "test" { datacenters = ["dc1"] - type = "service" + type = "service" group "t" { count = 3 + task "t" { driver = "raw_exec" + config { command = "bash" args = ["-c", "lol 5000"] } } + restart { attempts = 0 delay = "0s" mode = "fail" } - } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_fail.hcl b/e2e/rescheduling/input/rescheduling_fail.hcl index 02ea9c132..117069e5a 100644 --- a/e2e/rescheduling/input/rescheduling_fail.hcl +++ b/e2e/rescheduling/input/rescheduling_fail.hcl @@ -1,25 +1,29 @@ job "test2" { datacenters = ["dc1"] - type = "service" + type = "service" group "t2" { count = 3 + task "t2" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "lol 5000"] + command = "bash" + args = ["-c", "lol 5000"] } } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 2 - interval = "5m" + attempts = 2 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_maxp.hcl b/e2e/rescheduling/input/rescheduling_maxp.hcl new file mode 100644 index 000000000..2ab26b80f --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_maxp.hcl @@ -0,0 +1,35 @@ +job "demo2" { + datacenters = ["dc1"] + type = "service" + + group "t2" { + count = 3 + + task "t2" { + driver = "raw_exec" + + config { + command = "bash" + args = ["-c", "sleep 5000"] + } + } + + update { + max_parallel = 1 + min_healthy_time = "5s" + healthy_deadline = "10m" + auto_revert = false + } + + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + unlimited = "true" + + # attempts = 0 + } + } +} diff --git a/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl b/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl new file mode 100644 index 000000000..39056ae89 --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl @@ -0,0 +1,35 @@ +job "demo3" { + datacenters = ["dc1"] + type = "service" + + group "t2" { + count = 3 + + task "t2" { + driver = "raw_exec" + + config { + command = "bash" + args = ["-c", "ssleep 5000"] + } + } + + update { + max_parallel = 1 + min_healthy_time = "5s" + healthy_deadline = "10m" + auto_revert = true + } + + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + unlimited = "true" + + # attempts = 0 + } + } +} diff --git a/e2e/rescheduling/input/rescheduling_system.hcl b/e2e/rescheduling/input/rescheduling_system.hcl index 91f95fbd5..4291e860b 100644 --- a/e2e/rescheduling/input/rescheduling_system.hcl +++ b/e2e/rescheduling/input/rescheduling_system.hcl @@ -1,20 +1,23 @@ job "test" { datacenters = ["dc1"] - type = "system" + type = "system" group "t" { count = 1 + task "t" { driver = "raw_exec" + config { command = "bash" args = ["-c", "lol 5000"] } } + restart { attempts = 0 delay = "0s" mode = "fail" } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_update.hcl b/e2e/rescheduling/input/rescheduling_update.hcl index 844fa8718..d4ecd6481 100644 --- a/e2e/rescheduling/input/rescheduling_update.hcl +++ b/e2e/rescheduling/input/rescheduling_update.hcl @@ -1,30 +1,35 @@ job "test4" { datacenters = ["dc1"] - type = "service" + type = "service" group "t4" { count = 3 + task "t4" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "sleep 5000"] + command = "bash" + args = ["-c", "sleep 5000"] } } + update { - max_parallel = 1 - min_healthy_time = "10s" - auto_revert = false + max_parallel = 1 + min_healthy_time = "10s" + auto_revert = false } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 3 - interval = "5m" + attempts = 3 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/server_side_restarts_test.go b/e2e/rescheduling/server_side_restarts_test.go index 96bfc1155..322636cb1 100644 --- a/e2e/rescheduling/server_side_restarts_test.go +++ b/e2e/rescheduling/server_side_restarts_test.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/structs" ) var _ = Describe("Server Side Restart Tests", func() { @@ -43,12 +44,27 @@ var _ = Describe("Server Side Restart Tests", func() { Expect(err).ShouldNot(HaveOccurred()) var ret []string for _, a := range allocs { - if a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0 { + if (a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0) || a.FollowupEvalID != "" { ret = append(ret, a.ClientStatus) } } return ret } + + // deploymentStatus is a helper function that returns deployment status of all deployments + // sorted by time + deploymentStatus = func() []string { + deploys, _, err := jobs.Deployments(*job.ID, nil) + Expect(err).ShouldNot(HaveOccurred()) + var ret []string + sort.Slice(deploys, func(i, j int) bool { + return deploys[i].CreateIndex < deploys[j].CreateIndex + }) + for _, d := range deploys { + ret = append(ret, d.Status) + } + return ret + } ) BeforeSuite(func() { @@ -167,21 +183,62 @@ var _ = Describe("Server Side Restart Tests", func() { BeforeEach(func() { specFile = "input/rescheduling_canary.hcl" }) - It("Should have all running allocs", func() { + It("Should have running allocs and successful deployment", func() { Eventually(allocStatuses, 3*time.Second, time.Second).Should( ConsistOf([]string{"running", "running", "running"})) + + time.Sleep(2 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ContainElement(structs.DeploymentStatusSuccessful)) }) + Context("Updating job to make allocs fail", func() { It("Should have no rescheduled allocs", func() { job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} _, _, err := jobs.Register(job, nil) Expect(err).ShouldNot(HaveOccurred()) Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) + + // Verify new deployment and its status + time.Sleep(3 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ContainElement(structs.DeploymentStatusFailed)) }) }) }) + Context("Reschedule with canary and auto revert ", func() { + BeforeEach(func() { + specFile = "input/rescheduling_canary_autorevert.hcl" + }) + It("Should have running allocs and successful deployment", func() { + Eventually(allocStatuses, 3*time.Second, time.Second).Should( + ConsistOf([]string{"running", "running", "running"})) + + time.Sleep(4 * time.Second) + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ContainElement(structs.DeploymentStatusSuccessful)) + + // Make an update that causes the job to fail + job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} + _, _, err := jobs.Register(job, nil) + Expect(err).ShouldNot(HaveOccurred()) + Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) + + // Wait for the revert + Eventually(allocStatuses, 3*time.Second, time.Second).Should( + ConsistOf([]string{"failed", "failed", "failed", "running", "running", "running"})) + + // Verify new deployment and its status + // There should be one successful, one failed, and one more successful (after revert) + time.Sleep(5 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful)) + }) + + }) + }) }) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 60f7e0fc6..8f7c9e31f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5757,7 +5757,7 @@ func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { func (a *Allocation) NextRescheduleTime() (time.Time, bool) { failTime := a.LastEventTime() reschedulePolicy := a.ReschedulePolicy() - if a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { + if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { return time.Time{}, false } diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index f7e404378..8a24646af 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -317,6 +318,8 @@ type resultExpectation struct { func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { t.Helper() + assert := assert.New(t) + if exp.createDeployment != nil && r.deployment == nil { t.Fatalf("Expect a created deployment got none") } else if exp.createDeployment == nil && r.deployment != nil { @@ -330,39 +333,13 @@ func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { } } - if !reflect.DeepEqual(r.deploymentUpdates, exp.deploymentUpdates) { - t.Fatalf("Unexpected deploymentUpdates: %v", pretty.Diff(r.deploymentUpdates, exp.deploymentUpdates)) - } - if l := len(r.place); l != exp.place { - t.Fatalf("Expected %d placements; got %d", exp.place, l) - } - if l := len(r.destructiveUpdate); l != exp.destructive { - t.Fatalf("Expected %d destructive; got %d", exp.destructive, l) - } - if l := len(r.inplaceUpdate); l != exp.inplace { - t.Fatalf("Expected %d inplaceUpdate; got %d", exp.inplace, l) - } - if l := len(r.attributeUpdates); l != exp.attributeUpdates { - t.Fatalf("Expected %d attribute updates; got %d", exp.attributeUpdates, l) - } - if l := len(r.stop); l != exp.stop { - t.Fatalf("Expected %d stops; got %d", exp.stop, l) - } - if l := len(r.desiredTGUpdates); l != len(exp.desiredTGUpdates) { - t.Fatalf("Expected %d task group desired tg updates annotations; got %d", len(exp.desiredTGUpdates), l) - } - - // Check the desired updates happened - for group, desired := range exp.desiredTGUpdates { - act, ok := r.desiredTGUpdates[group] - if !ok { - t.Fatalf("Expected desired updates for group %q", group) - } - - if !reflect.DeepEqual(act, desired) { - t.Fatalf("Unexpected annotations for group %q: %v", group, pretty.Diff(act, desired)) - } - } + assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates") + assert.Len(r.place, exp.place, "Expected Placements") + assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive") + assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates") + assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates") + assert.Len(r.stop, exp.stop, "Expected Stops") + assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations") } // Tests the reconciler properly handles placements for a job that has no @@ -1606,6 +1583,65 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) { } +// Tests service job placement with desired stop and client status complete +func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + + // Set up reschedule policy + delayDur := 15 * time.Second + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 1, + Interval: 24 * time.Hour, + Delay: delayDur, + MaxDelay: 1 * time.Hour, + } + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + } + + // Mark one as failed but with desired status stop + // Should not trigger rescheduling logic but should trigger a placement + allocs[4].ClientStatus = structs.AllocClientStatusFailed + allocs[4].DesiredStatus = structs.AllocDesiredStatusStop + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + // Should place a new placement for the alloc that was marked stopped + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, + InPlaceUpdate: 0, + Ignore: 4, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) + + // Should not have any follow up evals created + require := require.New(t) + require.Equal(0, len(r.desiredFollowupEvals)) +} + // Tests rescheduling failed service allocations with desired state stop func TestReconciler_RescheduleNow_Service(t *testing.T) { require := require.New(t) @@ -3772,3 +3808,97 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { }, }) } + +// Test that a failed deployment cancels non-promoted canaries +func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { + // Create a job + job := mock.Job() + job.TaskGroups[0].Count = 3 + job.TaskGroups[0].Update = &structs.UpdateStrategy{ + Canary: 3, + MaxParallel: 2, + HealthCheck: structs.UpdateStrategyHealthCheck_Checks, + MinHealthyTime: 10 * time.Second, + HealthyDeadline: 10 * time.Minute, + Stagger: 31 * time.Second, + } + + // Create v1 of the job + jobv1 := job.Copy() + jobv1.Version = 1 + jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"} + + // Create v2 of the job + jobv2 := job.Copy() + jobv2.Version = 2 + jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"} + + // Create an existing failed deployment that has promoted one task group + d := structs.NewDeployment(jobv2) + state := &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 3, + PlacedAllocs: 3, + } + d.TaskGroups[job.TaskGroups[0].Name] = state + + // Create the original + var allocs []*structs.Allocation + for i := 0; i < 3; i++ { + new := mock.Alloc() + new.Job = jobv2 + new.JobID = job.ID + new.NodeID = uuid.Generate() + new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + new.TaskGroup = job.TaskGroups[0].Name + new.DeploymentID = d.ID + new.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + new.ClientStatus = structs.AllocClientStatusRunning + allocs = append(allocs, new) + + } + for i := 0; i < 3; i++ { + new := mock.Alloc() + new.Job = jobv1 + new.JobID = jobv1.ID + new.NodeID = uuid.Generate() + new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i)) + new.TaskGroup = job.TaskGroups[0].Name + new.DeploymentID = uuid.Generate() + new.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + new.DesiredStatus = structs.AllocDesiredStatusStop + new.ClientStatus = structs.AllocClientStatusFailed + allocs = append(allocs, new) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil) + r := reconciler.Compute() + + updates := []*structs.DeploymentStatusUpdate{ + { + DeploymentID: d.ID, + Status: structs.DeploymentStatusSuccessful, + StatusDescription: structs.DeploymentStatusDescriptionSuccessful, + }, + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: updates, + place: 0, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Stop: 0, + InPlaceUpdate: 0, + Ignore: 3, + }, + }, + }) +} diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index d571a8b4e..a71adea23 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -231,70 +231,104 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi } // filterByRescheduleable filters the allocation set to return the set of allocations that are either -// terminal or running, and a set of allocations that must be rescheduled now. Allocations that can be rescheduled -// at a future time are also returned so that we can create follow up evaluations for them +// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled +// at a future time are also returned so that we can create follow up evaluations for them. Allocs are +// skipped or considered untainted according to logic defined in shouldFilter method. func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) { untainted = make(map[string]*structs.Allocation) rescheduleNow = make(map[string]*structs.Allocation) now := time.Now() for _, alloc := range a { - var isUntainted, eligibleNow, eligibleLater bool + var eligibleNow, eligibleLater bool var rescheduleTime time.Time - if isBatch { - // Allocs from batch jobs should be filtered when the desired status - // is terminal and the client did not finish or when the client - // status is failed so that they will be replaced. If they are - // complete but not failed, they shouldn't be replaced. - switch alloc.DesiredStatus { - case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: - if alloc.RanSuccessfully() { - untainted[alloc.ID] = alloc - } - continue - default: - } - if alloc.NextAllocation == "" { - // Ignore allocs that have already been rescheduled - isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, true) - } - } else { - // Ignore allocs that have already been rescheduled - if alloc.NextAllocation == "" { - isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, false) - } + + // Ignore allocs that have already been rescheduled + if alloc.NextAllocation != "" { + continue } + + isUntainted, ignore := shouldFilter(alloc, isBatch) if isUntainted { untainted[alloc.ID] = alloc } - if eligibleNow { + if isUntainted || ignore { + continue + } + + // Only failed allocs with desired state run get to this point + // If the failed alloc is not eligible for rescheduling now we add it to the untainted set + eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now) + if !eligibleNow { + untainted[alloc.ID] = alloc + if eligibleLater { + rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime}) + } + } else { rescheduleNow[alloc.ID] = alloc - } else if eligibleLater { - rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime}) } } return } +// shouldFilter returns whether the alloc should be ignored or considered untainted +// Ignored allocs are filtered out. +// Untainted allocs count against the desired total. +// Filtering logic for batch jobs: +// If complete, and ran successfully - untainted +// If desired state is stop - ignore +// +// Filtering logic for service jobs: +// If desired state is stop/evict - ignore +// If client status is complete/lost - ignore +func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bool) { + // Allocs from batch jobs should be filtered when the desired status + // is terminal and the client did not finish or when the client + // status is failed so that they will be replaced. If they are + // complete but not failed, they shouldn't be replaced. + if isBatch { + switch alloc.DesiredStatus { + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: + if alloc.RanSuccessfully() { + return true, false + } + return false, true + default: + } + + switch alloc.ClientStatus { + case structs.AllocClientStatusFailed: + default: + return true, false + } + return false, false + } + + // Handle service jobs + switch alloc.DesiredStatus { + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: + return false, true + default: + } + + switch alloc.ClientStatus { + case structs.AllocClientStatusComplete, structs.AllocClientStatusLost: + return false, true + default: + } + return false, false +} + // updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation // should be rescheduled now, later or left in the untainted set -func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { - shouldFilter := false - if !batch { - // For service type jobs we filter terminal allocs - // except for those with ClientStatusFailed - those are checked for reschedulability - shouldFilter = alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed - } +func updateByReschedulable(alloc *structs.Allocation, now time.Time) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { rescheduleTime, eligible := alloc.NextRescheduleTime() - // We consider a time difference of less than 5 seconds to be eligible - // because we collapse allocations that failed within 5 seconds into a single evaluation if eligible && now.After(rescheduleTime) { rescheduleNow = true - } else if !shouldFilter { - untainted = true - if eligible && alloc.FollowupEvalID == "" { - rescheduleLater = true - } + return + } + if eligible && alloc.FollowupEvalID == "" { + rescheduleLater = true } return }