Merge pull request #4079 from hashicorp/b-filter-desiredstop

Filter desired status stop allocs correctly
2026-01-07 19:05:42 +03:00 · 2018-03-29 15:36:22 -05:00
parent 52937eac24 2ba965fa7a
commit 144471979d
14 changed files with 498 additions and 144 deletions
--- a/e2e/rescheduling/input/norescheduling.hcl
+++ b/e2e/rescheduling/input/norescheduling.hcl
@@ -1,30 +1,35 @@
 job "test1" {
  datacenters = ["dc1"]
-  type = "service"
+  type        = "service"

  group "t1" {
    count = 3
+
    task "t1" {
      driver = "raw_exec"
+
      config {
-         command = "bash"
-         args    = ["-c", "lol 5000"]
+        command = "bash"
+        args    = ["-c", "lol 5000"]
      }
    }
+
    update {
-       max_parallel = 1
-       min_healthy_time = "10s"
-       auto_revert = false
+      max_parallel     = 1
+      min_healthy_time = "10s"
+      auto_revert      = false
    }
+
    restart {
-        attempts = 0
-        delay    = "0s"
-        mode     = "fail"
-    }
-    reschedule {
      attempts = 0
-      interval = "5m"
+      delay    = "0s"
+      mode     = "fail"
+    }
+
+    reschedule {
+      attempts  = 0
+      interval  = "5m"
      unlimited = false
-   }
+    }
  }
-}
+}
--- a/e2e/rescheduling/input/reschedule_success.hcl
+++ b/e2e/rescheduling/input/reschedule_success.hcl
@@ -1,25 +1,29 @@
 job "test3" {
  datacenters = ["dc1"]
-  type = "service"
+  type        = "service"

  group "t3" {
    count = 3
+
    task "t3" {
      driver = "raw_exec"
+
      config {
-         command = "bash"
-         args    = ["-c", "a=`cksum <<< \"${NOMAD_ALLOC_ID}\"| cut -d ' ' -f1`; if ! (( a % 2 )); then sleep 5000; else exit -1; fi"]
+        command = "bash"
+        args    = ["-c", "a=`cksum <<< \"${NOMAD_ALLOC_ID}\"| cut -d ' ' -f1`; if ! (( a % 2 )); then sleep 5000; else exit -1; fi"]
      }
    }
+
    restart {
-        attempts = 0
-        delay    = "0s"
-        mode     = "fail"
+      attempts = 0
+      delay    = "0s"
+      mode     = "fail"
    }
+
    reschedule {
-      attempts = 2
-      interval = "5m"
+      attempts  = 2
+      interval  = "5m"
      unlimited = false
-   }
+    }
  }
-}
+}
--- a/e2e/rescheduling/input/rescheduling_canary.hcl
+++ b/e2e/rescheduling/input/rescheduling_canary.hcl
@@ -1,31 +1,37 @@
 job "test5" {
  datacenters = ["dc1"]
-  type = "service"
+  type        = "service"

  group "t5" {
    count = 3
+
    task "t5" {
      driver = "raw_exec"
+
      config {
-         command = "bash"
-         args    = ["-c", "sleep 5000"]
+        command = "bash"
+        args    = ["-c", "sleep 5000"]
      }
    }
+
    update {
-       max_parallel = 1
-       canary = 1
-       min_healthy_time = "1s"
-       auto_revert = false
+      max_parallel     = 1
+      canary           = 1
+      min_healthy_time = "1s"
+      auto_revert      = false
    }
+
    restart {
-        attempts = 0
-        delay    = "0s"
-        mode     = "fail"
+      attempts = 0
+      delay    = "0s"
+      mode     = "fail"
    }
+
    reschedule {
-      attempts = 3
-      interval = "5m"
+      attempts  = 3
+      interval  = "5m"
+      delay     = "5s"
      unlimited = false
-   }
+    }
  }
-}
+}
--- a/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl
+++ b/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl
@@ -0,0 +1,34 @@
+job "test" {
+  datacenters = ["dc1"]
+  type        = "service"
+
+  group "t1" {
+    count = 3
+
+    task "t1" {
+      driver = "raw_exec"
+
+      config {
+        command = "bash"
+        args    = ["-c", "sleep 5000"]
+      }
+    }
+
+    update {
+      canary           = 3
+      max_parallel     = 1
+      min_healthy_time = "1s"
+      healthy_deadline = "1m"
+      auto_revert      = true
+    }
+
+    restart {
+      attempts = 0
+      mode     = "fail"
+    }
+
+    reschedule {
+      unlimited = "true"
+    }
+  }
+}
--- a/e2e/rescheduling/input/rescheduling_default.hcl
+++ b/e2e/rescheduling/input/rescheduling_default.hcl
@@ -1,21 +1,23 @@
 job "test" {
  datacenters = ["dc1"]
-  type = "service"
+  type        = "service"

  group "t" {
    count = 3
+
    task "t" {
      driver = "raw_exec"
+
      config {
        command = "bash"
        args    = ["-c", "lol 5000"]
      }
    }
+
    restart {
      attempts = 0
      delay    = "0s"
      mode     = "fail"
    }
-
  }
-}
+}
--- a/e2e/rescheduling/input/rescheduling_fail.hcl
+++ b/e2e/rescheduling/input/rescheduling_fail.hcl
@@ -1,25 +1,29 @@
 job "test2" {
  datacenters = ["dc1"]
-  type = "service"
+  type        = "service"

  group "t2" {
    count = 3
+
    task "t2" {
      driver = "raw_exec"
+
      config {
-         command = "bash"
-         args    = ["-c", "lol 5000"]
+        command = "bash"
+        args    = ["-c", "lol 5000"]
      }
    }
+
    restart {
-        attempts = 0
-        delay    = "0s"
-        mode     = "fail"
+      attempts = 0
+      delay    = "0s"
+      mode     = "fail"
    }
+
    reschedule {
-      attempts = 2
-      interval = "5m"
+      attempts  = 2
+      interval  = "5m"
      unlimited = false
-   }
+    }
  }
-}
+}
--- a/e2e/rescheduling/input/rescheduling_maxp.hcl
+++ b/e2e/rescheduling/input/rescheduling_maxp.hcl
@@ -0,0 +1,35 @@
+job "demo2" {
+  datacenters = ["dc1"]
+  type        = "service"
+
+  group "t2" {
+    count = 3
+
+    task "t2" {
+      driver = "raw_exec"
+
+      config {
+        command = "bash"
+        args    = ["-c", "sleep 5000"]
+      }
+    }
+
+    update {
+      max_parallel     = 1
+      min_healthy_time = "5s"
+      healthy_deadline = "10m"
+      auto_revert      = false
+    }
+
+    restart {
+      attempts = 0
+      mode     = "fail"
+    }
+
+    reschedule {
+      unlimited = "true"
+
+      #  attempts  = 0
+    }
+  }
+}
--- a/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl
+++ b/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl
@@ -0,0 +1,35 @@
+job "demo3" {
+  datacenters = ["dc1"]
+  type        = "service"
+
+  group "t2" {
+    count = 3
+
+    task "t2" {
+      driver = "raw_exec"
+
+      config {
+        command = "bash"
+        args    = ["-c", "ssleep 5000"]
+      }
+    }
+
+    update {
+      max_parallel     = 1
+      min_healthy_time = "5s"
+      healthy_deadline = "10m"
+      auto_revert      = true
+    }
+
+    restart {
+      attempts = 0
+      mode     = "fail"
+    }
+
+    reschedule {
+      unlimited = "true"
+
+      #  attempts  = 0
+    }
+  }
+}
--- a/e2e/rescheduling/input/rescheduling_system.hcl
+++ b/e2e/rescheduling/input/rescheduling_system.hcl
@@ -1,20 +1,23 @@
 job "test" {
  datacenters = ["dc1"]
-  type = "system"
+  type        = "system"

  group "t" {
    count = 1
+
    task "t" {
      driver = "raw_exec"
+
      config {
        command = "bash"
        args    = ["-c", "lol 5000"]
      }
    }
+
    restart {
      attempts = 0
      delay    = "0s"
      mode     = "fail"
    }
  }
-}
+}
--- a/e2e/rescheduling/input/rescheduling_update.hcl
+++ b/e2e/rescheduling/input/rescheduling_update.hcl
@@ -1,30 +1,35 @@
 job "test4" {
  datacenters = ["dc1"]
-  type = "service"
+  type        = "service"

  group "t4" {
    count = 3
+
    task "t4" {
      driver = "raw_exec"
+
      config {
-         command = "bash"
-         args    = ["-c", "sleep 5000"]
+        command = "bash"
+        args    = ["-c", "sleep 5000"]
      }
    }
+
    update {
-       max_parallel = 1
-       min_healthy_time = "10s"
-       auto_revert = false
+      max_parallel     = 1
+      min_healthy_time = "10s"
+      auto_revert      = false
    }
+
    restart {
-        attempts = 0
-        delay    = "0s"
-        mode     = "fail"
+      attempts = 0
+      delay    = "0s"
+      mode     = "fail"
    }
+
    reschedule {
-      attempts = 3
-      interval = "5m"
+      attempts  = 3
+      interval  = "5m"
      unlimited = false
-   }
+    }
  }
-}
+}
--- a/e2e/rescheduling/server_side_restarts_test.go
+++ b/e2e/rescheduling/server_side_restarts_test.go
@@ -12,6 +12,7 @@ import (

 	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/helper/uuid"
+	"github.com/hashicorp/nomad/nomad/structs"
 )

 var _ = Describe("Server Side Restart Tests", func() {
@@ -43,12 +44,27 @@ var _ = Describe("Server Side Restart Tests", func() {
 			Expect(err).ShouldNot(HaveOccurred())
 			var ret []string
 			for _, a := range allocs {
-				if a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0 {
+				if (a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0) || a.FollowupEvalID != "" {
 					ret = append(ret, a.ClientStatus)
 				}
 			}
 			return ret
 		}
+
+		// deploymentStatus is a helper function that returns deployment status of all deployments
+		// sorted by time
+		deploymentStatus = func() []string {
+			deploys, _, err := jobs.Deployments(*job.ID, nil)
+			Expect(err).ShouldNot(HaveOccurred())
+			var ret []string
+			sort.Slice(deploys, func(i, j int) bool {
+				return deploys[i].CreateIndex < deploys[j].CreateIndex
+			})
+			for _, d := range deploys {
+				ret = append(ret, d.Status)
+			}
+			return ret
+		}
 	)

 	BeforeSuite(func() {
@@ -167,21 +183,62 @@ var _ = Describe("Server Side Restart Tests", func() {
 			BeforeEach(func() {
 				specFile = "input/rescheduling_canary.hcl"
 			})
-			It("Should have all running allocs", func() {
+			It("Should have running allocs and successful deployment", func() {
 				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
 					ConsistOf([]string{"running", "running", "running"}))
+
+				time.Sleep(2 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
+				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
+					ContainElement(structs.DeploymentStatusSuccessful))
 			})
+
 			Context("Updating job to make allocs fail", func() {
 				It("Should have no rescheduled allocs", func() {
 					job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
 					_, _, err := jobs.Register(job, nil)
 					Expect(err).ShouldNot(HaveOccurred())
 					Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
+
+					// Verify new deployment and its status
+					time.Sleep(3 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
+					Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
+						ContainElement(structs.DeploymentStatusFailed))
 				})
 			})

 		})

+		Context("Reschedule with canary and auto revert ", func() {
+			BeforeEach(func() {
+				specFile = "input/rescheduling_canary_autorevert.hcl"
+			})
+			It("Should have running allocs and successful deployment", func() {
+				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
+					ConsistOf([]string{"running", "running", "running"}))
+
+				time.Sleep(4 * time.Second)
+				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
+					ContainElement(structs.DeploymentStatusSuccessful))
+
+				// Make an update that causes the job to fail
+				job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
+				_, _, err := jobs.Register(job, nil)
+				Expect(err).ShouldNot(HaveOccurred())
+				Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
+
+				// Wait for the revert
+				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
+					ConsistOf([]string{"failed", "failed", "failed", "running", "running", "running"}))
+
+				// Verify new deployment and its status
+				// There should be one successful, one failed, and one more successful (after revert)
+				time.Sleep(5 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
+				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
+					ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful))
+			})
+
+		})
+
 	})

 })
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -5757,7 +5757,7 @@ func (a *Allocation) ReschedulePolicy() *ReschedulePolicy {
 func (a *Allocation) NextRescheduleTime() (time.Time, bool) {
 	failTime := a.LastEventTime()
 	reschedulePolicy := a.ReschedulePolicy()
-	if a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil {
+	if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil {
 		return time.Time{}, false
 	}

--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -15,6 +15,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/kr/pretty"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )

@@ -317,6 +318,8 @@ type resultExpectation struct {

 func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
 	t.Helper()
+	assert := assert.New(t)
+
 	if exp.createDeployment != nil && r.deployment == nil {
 		t.Fatalf("Expect a created deployment got none")
 	} else if exp.createDeployment == nil && r.deployment != nil {
@@ -330,39 +333,13 @@ func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
 		}
 	}

-	if !reflect.DeepEqual(r.deploymentUpdates, exp.deploymentUpdates) {
-		t.Fatalf("Unexpected deploymentUpdates: %v", pretty.Diff(r.deploymentUpdates, exp.deploymentUpdates))
-	}
-	if l := len(r.place); l != exp.place {
-		t.Fatalf("Expected %d placements; got %d", exp.place, l)
-	}
-	if l := len(r.destructiveUpdate); l != exp.destructive {
-		t.Fatalf("Expected %d destructive; got %d", exp.destructive, l)
-	}
-	if l := len(r.inplaceUpdate); l != exp.inplace {
-		t.Fatalf("Expected %d inplaceUpdate; got %d", exp.inplace, l)
-	}
-	if l := len(r.attributeUpdates); l != exp.attributeUpdates {
-		t.Fatalf("Expected %d attribute updates; got %d", exp.attributeUpdates, l)
-	}
-	if l := len(r.stop); l != exp.stop {
-		t.Fatalf("Expected %d stops; got %d", exp.stop, l)
-	}
-	if l := len(r.desiredTGUpdates); l != len(exp.desiredTGUpdates) {
-		t.Fatalf("Expected %d task group desired tg updates annotations; got %d", len(exp.desiredTGUpdates), l)
-	}
-
-	// Check the desired updates happened
-	for group, desired := range exp.desiredTGUpdates {
-		act, ok := r.desiredTGUpdates[group]
-		if !ok {
-			t.Fatalf("Expected desired updates for group %q", group)
-		}
-
-		if !reflect.DeepEqual(act, desired) {
-			t.Fatalf("Unexpected annotations for group %q: %v", group, pretty.Diff(act, desired))
-		}
-	}
+	assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
+	assert.Len(r.place, exp.place, "Expected Placements")
+	assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
+	assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
+	assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
+	assert.Len(r.stop, exp.stop, "Expected Stops")
+	assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
 }

 // Tests the reconciler properly handles placements for a job that has no
@@ -1606,6 +1583,65 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) {

 }

+// Tests service job placement with desired stop and client status complete
+func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
+	// Set desired 5
+	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+
+	// Set up reschedule policy
+	delayDur := 15 * time.Second
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts: 1,
+		Interval: 24 * time.Hour,
+		Delay:    delayDur,
+		MaxDelay: 1 * time.Hour,
+	}
+
+	// Create 5 existing allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+		alloc.DesiredStatus = structs.AllocDesiredStatusRun
+	}
+
+	// Mark one as failed but with desired status stop
+	// Should not trigger rescheduling logic but should trigger a placement
+	allocs[4].ClientStatus = structs.AllocClientStatusFailed
+	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
+
+	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil)
+	r := reconciler.Compute()
+
+	// Should place a new placement for the alloc that was marked stopped
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             1,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:         1,
+				InPlaceUpdate: 0,
+				Ignore:        4,
+			},
+		},
+	})
+
+	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
+
+	// Should not have any follow up evals created
+	require := require.New(t)
+	require.Equal(0, len(r.desiredFollowupEvals))
+}
+
 // Tests rescheduling failed service allocations with desired state stop
 func TestReconciler_RescheduleNow_Service(t *testing.T) {
 	require := require.New(t)
@@ -3772,3 +3808,97 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
 		},
 	})
 }
+
+// Test that a failed deployment cancels non-promoted canaries
+func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
+	// Create a job
+	job := mock.Job()
+	job.TaskGroups[0].Count = 3
+	job.TaskGroups[0].Update = &structs.UpdateStrategy{
+		Canary:          3,
+		MaxParallel:     2,
+		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
+		MinHealthyTime:  10 * time.Second,
+		HealthyDeadline: 10 * time.Minute,
+		Stagger:         31 * time.Second,
+	}
+
+	// Create v1 of the job
+	jobv1 := job.Copy()
+	jobv1.Version = 1
+	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
+
+	// Create v2 of the job
+	jobv2 := job.Copy()
+	jobv2.Version = 2
+	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
+
+	// Create an existing failed deployment that has promoted one task group
+	d := structs.NewDeployment(jobv2)
+	state := &structs.DeploymentState{
+		Promoted:     false,
+		DesiredTotal: 3,
+		PlacedAllocs: 3,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = state
+
+	// Create the original
+	var allocs []*structs.Allocation
+	for i := 0; i < 3; i++ {
+		new := mock.Alloc()
+		new.Job = jobv2
+		new.JobID = job.ID
+		new.NodeID = uuid.Generate()
+		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		new.TaskGroup = job.TaskGroups[0].Name
+		new.DeploymentID = d.ID
+		new.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: helper.BoolToPtr(true),
+		}
+		new.ClientStatus = structs.AllocClientStatusRunning
+		allocs = append(allocs, new)
+
+	}
+	for i := 0; i < 3; i++ {
+		new := mock.Alloc()
+		new.Job = jobv1
+		new.JobID = jobv1.ID
+		new.NodeID = uuid.Generate()
+		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
+		new.TaskGroup = job.TaskGroups[0].Name
+		new.DeploymentID = uuid.Generate()
+		new.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: helper.BoolToPtr(false),
+		}
+		new.DesiredStatus = structs.AllocDesiredStatusStop
+		new.ClientStatus = structs.AllocClientStatusFailed
+		allocs = append(allocs, new)
+	}
+
+	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil)
+	r := reconciler.Compute()
+
+	updates := []*structs.DeploymentStatusUpdate{
+		{
+			DeploymentID:      d.ID,
+			Status:            structs.DeploymentStatusSuccessful,
+			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
+		},
+	}
+
+	// Assert the correct results
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: updates,
+		place:             0,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Stop:          0,
+				InPlaceUpdate: 0,
+				Ignore:        3,
+			},
+		},
+	})
+}
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -231,70 +231,104 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi
 }

 // filterByRescheduleable filters the allocation set to return the set of allocations that are either
-// terminal or running, and a set of allocations that must be rescheduled now. Allocations that can be rescheduled
-// at a future time are also returned so that we can create follow up evaluations for them
+// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
+// at a future time are also returned so that we can create follow up evaluations for them. Allocs are
+// skipped or considered untainted according to logic defined in shouldFilter method.
 func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) {
 	untainted = make(map[string]*structs.Allocation)
 	rescheduleNow = make(map[string]*structs.Allocation)

 	now := time.Now()
 	for _, alloc := range a {
-		var isUntainted, eligibleNow, eligibleLater bool
+		var eligibleNow, eligibleLater bool
 		var rescheduleTime time.Time
-		if isBatch {
-			// Allocs from batch jobs should be filtered when the desired status
-			// is terminal and the client did not finish or when the client
-			// status is failed so that they will be replaced. If they are
-			// complete but not failed, they shouldn't be replaced.
-			switch alloc.DesiredStatus {
-			case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
-				if alloc.RanSuccessfully() {
-					untainted[alloc.ID] = alloc
-				}
-				continue
-			default:
-			}
-			if alloc.NextAllocation == "" {
-				// Ignore allocs that have already been rescheduled
-				isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, true)
-			}
-		} else {
-			// Ignore allocs that have already been rescheduled
-			if alloc.NextAllocation == "" {
-				isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, false)
-			}
+
+		// Ignore allocs that have already been rescheduled
+		if alloc.NextAllocation != "" {
+			continue
 		}
+
+		isUntainted, ignore := shouldFilter(alloc, isBatch)
 		if isUntainted {
 			untainted[alloc.ID] = alloc
 		}
-		if eligibleNow {
+		if isUntainted || ignore {
+			continue
+		}
+
+		// Only failed allocs with desired state run get to this point
+		// If the failed alloc is not eligible for rescheduling now we add it to the untainted set
+		eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now)
+		if !eligibleNow {
+			untainted[alloc.ID] = alloc
+			if eligibleLater {
+				rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime})
+			}
+		} else {
 			rescheduleNow[alloc.ID] = alloc
-		} else if eligibleLater {
-			rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime})
 		}
 	}
 	return
 }

+// shouldFilter returns whether the alloc should be ignored or considered untainted
+// Ignored allocs are filtered out.
+// Untainted allocs count against the desired total.
+// Filtering logic for batch jobs:
+// If complete, and ran successfully - untainted
+// If desired state is stop - ignore
+//
+// Filtering logic for service jobs:
+// If desired state is stop/evict - ignore
+// If client status is complete/lost - ignore
+func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bool) {
+	// Allocs from batch jobs should be filtered when the desired status
+	// is terminal and the client did not finish or when the client
+	// status is failed so that they will be replaced. If they are
+	// complete but not failed, they shouldn't be replaced.
+	if isBatch {
+		switch alloc.DesiredStatus {
+		case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
+			if alloc.RanSuccessfully() {
+				return true, false
+			}
+			return false, true
+		default:
+		}
+
+		switch alloc.ClientStatus {
+		case structs.AllocClientStatusFailed:
+		default:
+			return true, false
+		}
+		return false, false
+	}
+
+	// Handle service jobs
+	switch alloc.DesiredStatus {
+	case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
+		return false, true
+	default:
+	}
+
+	switch alloc.ClientStatus {
+	case structs.AllocClientStatusComplete, structs.AllocClientStatusLost:
+		return false, true
+	default:
+	}
+	return false, false
+}
+
 // updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation
 // should be rescheduled now, later or left in the untainted set
-func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
-	shouldFilter := false
-	if !batch {
-		// For service type jobs we filter terminal allocs
-		// except for those with ClientStatusFailed - those are checked for reschedulability
-		shouldFilter = alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed
-	}
+func updateByReschedulable(alloc *structs.Allocation, now time.Time) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
 	rescheduleTime, eligible := alloc.NextRescheduleTime()
-	// We consider a time difference of less than 5 seconds to be eligible
-	// because we collapse allocations that failed within 5 seconds into a single evaluation
 	if eligible && now.After(rescheduleTime) {
 		rescheduleNow = true
-	} else if !shouldFilter {
-		untainted = true
-		if eligible && alloc.FollowupEvalID == "" {
-			rescheduleLater = true
-		}
+		return
+	}
+	if eligible && alloc.FollowupEvalID == "" {
+		rescheduleLater = true
 	}
 	return
 }