Merge pull request #1205 from hashicorp/b-drain-restarting-jobs

Do not restart successful batch jobs when the node is tainted (removed/drained)
2026-01-04 09:25:46 +03:00 · 2016-05-24 18:18:27 -07:00
parent ecd0e1ee6e d5fa76ec21
commit a113f3173d
5 changed files with 118 additions and 5 deletions
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -1904,6 +1904,21 @@ func (ts *TaskState) Failed() bool {
 	}
 }

+// Successful returns whether a task finished successfully.
+func (ts *TaskState) Successful() bool {
+	l := len(ts.Events)
+	if ts.State != TaskStateDead || l == 0 {
+		return false
+	}
+
+	e := ts.Events[l-1]
+	if e.Type != TaskTerminated {
+		return false
+	}
+
+	return e.ExitCode == 0
+}
+
 const (
 	// TaskDriveFailure indicates that the task could not be started due to a
 	// failure in the driver.
@@ -2336,6 +2351,23 @@ func (a *Allocation) TerminalStatus() bool {
 	}
 }

+// RanSuccessfully returns whether the client has ran the allocation and all
+// tasks finished successfully
+func (a *Allocation) RanSuccessfully() bool {
+	// Handle the case the client hasn't started the allocation.
+	if len(a.TaskStates) == 0 {
+		return false
+	}
+
+	// Check to see if all the tasks finised successfully in the allocation
+	allSuccess := true
+	for _, state := range a.TaskStates {
+		allSuccess = allSuccess && state.Successful()
+	}
+
+	return allSuccess
+}
+
 // Stub returns a list stub for the allocation
 func (a *Allocation) Stub() *AllocListStub {
 	return &AllocListStub{
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -235,12 +235,12 @@ func (s *GenericScheduler) filterCompleteAllocs(allocs []*structs.Allocation) []
 	filter := func(a *structs.Allocation) bool {
 		if s.batch {
 			// Allocs from batch jobs should be filtered when the desired status
-			// is terminal or when the client status is failed so that they will
-			// be replaced. If they are complete but not failed, they shouldn't
-			// be replaced.
+			// is terminal and the client did not finish or when the client
+			// status is failed so that they will be replaced. If they are
+			// complete but not failed, they shouldn't be replaced.
 			switch a.DesiredStatus {
 			case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict, structs.AllocDesiredStatusFailed:
-				return true
+				return !a.RanSuccessfully()
 			default:
 			}

--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -1316,3 +1316,71 @@ func TestBatchSched_Run_FailedAlloc(t *testing.T) {

 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
+
+func TestBatchSched_ReRun_SuccessfullyFinishedAlloc(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create two nodes, one that is drained and has a successfully finished
+	// alloc and a fresh undrained one
+	node := mock.Node()
+	node.Drain = true
+	node2 := mock.Node()
+	noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	noErr(t, h.State.UpsertNode(h.NextIndex(), node2))
+
+	// Create a job
+	job := mock.Job()
+	job.Type = structs.JobTypeBatch
+	job.TaskGroups[0].Count = 1
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	// Create a successful alloc
+	alloc := mock.Alloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-job.web[0]"
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+	alloc.TaskStates = map[string]*structs.TaskState{
+		"web": &structs.TaskState{
+			State: structs.TaskStateDead,
+			Events: []*structs.TaskEvent{
+				{
+					Type:     structs.TaskTerminated,
+					ExitCode: 0,
+				},
+			},
+		},
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Create a mock evaluation to rerun the job
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewBatchScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure no plan
+	if len(h.Plans) != 0 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure no replacement alloc was placed.
+	if len(out) != 1 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -81,8 +81,17 @@ func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
 			continue
 		}

-		// If we are on a tainted node, we must migrate
+		// If we are on a tainted node, we must migrate if we are a service or
+		// if the batch allocation did not finish
 		if taintedNodes[exist.NodeID] {
+			// If the job is batch and finished succesfully, the fact that the
+			// node is tainted does not mean it should be migrated as the work
+			// was already succesfully finished. However for service/system
+			// jobs, tasks should never complete. The check of batch type,
+			// defends against client bugs.
+			if exist.Job.Type == structs.JobTypeBatch && exist.RanSuccessfully() {
+				goto IGNORE
+			}
 			result.migrate = append(result.migrate, allocTuple{
 				Name:      name,
 				TaskGroup: tg,
@@ -102,6 +111,7 @@ func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
 		}

 		// Everything is up-to-date
+	IGNORE:
 		result.ignore = append(result.ignore, allocTuple{
 			Name:      name,
 			TaskGroup: tg,
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -74,6 +74,7 @@ func TestDiffAllocs(t *testing.T) {
 			ID:     structs.GenerateUUID(),
 			NodeID: "zip",
 			Name:   "my-job.web[10]",
+			Job:    oldJob,
 		},

 		// Migrate the 3rd
@@ -81,6 +82,7 @@ func TestDiffAllocs(t *testing.T) {
 			ID:     structs.GenerateUUID(),
 			NodeID: "dead",
 			Name:   "my-job.web[2]",
+			Job:    oldJob,
 		},
 	}

@@ -155,6 +157,7 @@ func TestDiffSystemAllocs(t *testing.T) {
 			ID:     structs.GenerateUUID(),
 			NodeID: "dead",
 			Name:   "my-job.web[0]",
+			Job:    oldJob,
 		},
 	}