From a466f97cbafce2c720c1732b49d3301848a1ad1d Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 23 Feb 2018 16:45:57 -0800 Subject: [PATCH] scheduler: migrate non-terminal migrating allocs filterByTainted node should always migrate non-terminal migrating allocs --- scheduler/reconcile_util.go | 36 ++++++------ scheduler/reconcile_util_test.go | 99 ++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 18 deletions(-) diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 5527aecb4..a7b0b8141 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -199,33 +199,33 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi migrate = make(map[string]*structs.Allocation) lost = make(map[string]*structs.Allocation) for _, alloc := range a { + // Terminal allocs are always untainted as they should never be migrated + if alloc.TerminalStatus() { + untainted[alloc.ID] = alloc + continue + } + + // Non-terminal allocs that should migrate should always migrate + if alloc.DesiredTransition.ShouldMigrate() { + migrate[alloc.ID] = alloc + continue + } + n, ok := nodes[alloc.NodeID] if !ok { + // Node is untainted so alloc is untainted untainted[alloc.ID] = alloc continue } - // If the job is batch and finished successfully, the fact that the - // node is tainted does not mean it should be migrated or marked as - // lost as the work was already successfully finished. However for - // service/system jobs, tasks should never complete. The check of - // batch type, defends against client bugs. - if alloc.Job.Type == structs.JobTypeBatch && alloc.RanSuccessfully() { - untainted[alloc.ID] = alloc + // Allocs on GC'd (nil) or lost nodes are Lost + if n == nil || n.TerminalStatus() { + lost[alloc.ID] = alloc continue } - if !alloc.TerminalStatus() { - if n == nil || n.TerminalStatus() { - lost[alloc.ID] = alloc - } else if alloc.DesiredTransition.ShouldMigrate() { - migrate[alloc.ID] = alloc - } else { - untainted[alloc.ID] = alloc - } - } else { - untainted[alloc.ID] = alloc - } + // All other allocs are untainted + untainted[alloc.ID] = alloc } return } diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go index 3b45a55ed..6d85dfb81 100644 --- a/scheduler/reconcile_util_test.go +++ b/scheduler/reconcile_util_test.go @@ -3,7 +3,9 @@ package scheduler import ( "testing" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/require" ) // Test that we properly create the bitmap even when the alloc set includes an @@ -29,3 +31,100 @@ func TestBitmapFrom(t *testing.T) { t.Fatalf("got %d; want %d", act, exp) } } + +func TestAllocSet_filterByTainted(t *testing.T) { + require := require.New(t) + + nodes := map[string]*structs.Node{ + "draining": &structs.Node{ + ID: "draining", + Drain: true, + }, + "lost": &structs.Node{ + ID: "lost", + Status: structs.NodeStatusDown, + }, + "nil": nil, + "normal": &structs.Node{ + ID: "normal", + Status: structs.NodeStatusReady, + }, + } + + batchJob := &structs.Job{ + Type: structs.JobTypeBatch, + } + + allocs := allocSet{ + // Non-terminal alloc with migrate=true should migrate on a draining node + "migrating1": { + ID: "migrating1", + ClientStatus: structs.AllocClientStatusRunning, + DesiredTransition: structs.DesiredTransition{helper.BoolToPtr(true)}, + Job: batchJob, + NodeID: "draining", + }, + // Non-terminal alloc with migrate=true should migrate on an unknown node + "migrating2": { + ID: "migrating2", + ClientStatus: structs.AllocClientStatusRunning, + DesiredTransition: structs.DesiredTransition{helper.BoolToPtr(true)}, + Job: batchJob, + NodeID: "nil", + }, + "untainted1": { + ID: "untainted1", + ClientStatus: structs.AllocClientStatusRunning, + Job: batchJob, + NodeID: "normal", + }, + // Terminal allocs are always untainted + "untainted2": { + ID: "untainted2", + ClientStatus: structs.AllocClientStatusComplete, + Job: batchJob, + NodeID: "normal", + }, + // Terminal allocs are always untainted, even on draining nodes + "untainted3": { + ID: "untainted3", + ClientStatus: structs.AllocClientStatusComplete, + Job: batchJob, + NodeID: "draining", + }, + // Terminal allocs are always untainted, even on lost nodes + "untainted4": { + ID: "untainted4", + ClientStatus: structs.AllocClientStatusComplete, + Job: batchJob, + NodeID: "lost", + }, + // Non-terminal allocs on lost nodes are lost + "lost1": { + ID: "lost1", + ClientStatus: structs.AllocClientStatusPending, + Job: batchJob, + NodeID: "lost", + }, + // Non-terminal allocs on lost nodes are lost + "lost2": { + ID: "lost2", + ClientStatus: structs.AllocClientStatusRunning, + Job: batchJob, + NodeID: "lost", + }, + } + + untainted, migrate, lost := allocs.filterByTainted(nodes) + require.Len(untainted, 4) + require.Contains(untainted, "untainted1") + require.Contains(untainted, "untainted2") + require.Contains(untainted, "untainted3") + require.Contains(untainted, "untainted4") + require.Len(migrate, 2) + require.Contains(migrate, "migrating1") + require.Contains(migrate, "migrating2") + require.Len(lost, 2) + require.Contains(lost, "lost1") + require.Contains(lost, "lost2") +}