mirror of
https://github.com/kemko/nomad.git
synced 2026-01-07 19:05:42 +03:00
scheduler: migrate non-terminal migrating allocs
filterByTainted node should always migrate non-terminal migrating allocs
This commit is contained in:
@@ -199,33 +199,33 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi
|
||||
migrate = make(map[string]*structs.Allocation)
|
||||
lost = make(map[string]*structs.Allocation)
|
||||
for _, alloc := range a {
|
||||
// Terminal allocs are always untainted as they should never be migrated
|
||||
if alloc.TerminalStatus() {
|
||||
untainted[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
|
||||
// Non-terminal allocs that should migrate should always migrate
|
||||
if alloc.DesiredTransition.ShouldMigrate() {
|
||||
migrate[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
|
||||
n, ok := nodes[alloc.NodeID]
|
||||
if !ok {
|
||||
// Node is untainted so alloc is untainted
|
||||
untainted[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
|
||||
// If the job is batch and finished successfully, the fact that the
|
||||
// node is tainted does not mean it should be migrated or marked as
|
||||
// lost as the work was already successfully finished. However for
|
||||
// service/system jobs, tasks should never complete. The check of
|
||||
// batch type, defends against client bugs.
|
||||
if alloc.Job.Type == structs.JobTypeBatch && alloc.RanSuccessfully() {
|
||||
untainted[alloc.ID] = alloc
|
||||
// Allocs on GC'd (nil) or lost nodes are Lost
|
||||
if n == nil || n.TerminalStatus() {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
|
||||
if !alloc.TerminalStatus() {
|
||||
if n == nil || n.TerminalStatus() {
|
||||
lost[alloc.ID] = alloc
|
||||
} else if alloc.DesiredTransition.ShouldMigrate() {
|
||||
migrate[alloc.ID] = alloc
|
||||
} else {
|
||||
untainted[alloc.ID] = alloc
|
||||
}
|
||||
} else {
|
||||
untainted[alloc.ID] = alloc
|
||||
}
|
||||
// All other allocs are untainted
|
||||
untainted[alloc.ID] = alloc
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@ package scheduler
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// Test that we properly create the bitmap even when the alloc set includes an
|
||||
@@ -29,3 +31,100 @@ func TestBitmapFrom(t *testing.T) {
|
||||
t.Fatalf("got %d; want %d", act, exp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllocSet_filterByTainted(t *testing.T) {
|
||||
require := require.New(t)
|
||||
|
||||
nodes := map[string]*structs.Node{
|
||||
"draining": &structs.Node{
|
||||
ID: "draining",
|
||||
Drain: true,
|
||||
},
|
||||
"lost": &structs.Node{
|
||||
ID: "lost",
|
||||
Status: structs.NodeStatusDown,
|
||||
},
|
||||
"nil": nil,
|
||||
"normal": &structs.Node{
|
||||
ID: "normal",
|
||||
Status: structs.NodeStatusReady,
|
||||
},
|
||||
}
|
||||
|
||||
batchJob := &structs.Job{
|
||||
Type: structs.JobTypeBatch,
|
||||
}
|
||||
|
||||
allocs := allocSet{
|
||||
// Non-terminal alloc with migrate=true should migrate on a draining node
|
||||
"migrating1": {
|
||||
ID: "migrating1",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredTransition: structs.DesiredTransition{helper.BoolToPtr(true)},
|
||||
Job: batchJob,
|
||||
NodeID: "draining",
|
||||
},
|
||||
// Non-terminal alloc with migrate=true should migrate on an unknown node
|
||||
"migrating2": {
|
||||
ID: "migrating2",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredTransition: structs.DesiredTransition{helper.BoolToPtr(true)},
|
||||
Job: batchJob,
|
||||
NodeID: "nil",
|
||||
},
|
||||
"untainted1": {
|
||||
ID: "untainted1",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: batchJob,
|
||||
NodeID: "normal",
|
||||
},
|
||||
// Terminal allocs are always untainted
|
||||
"untainted2": {
|
||||
ID: "untainted2",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
Job: batchJob,
|
||||
NodeID: "normal",
|
||||
},
|
||||
// Terminal allocs are always untainted, even on draining nodes
|
||||
"untainted3": {
|
||||
ID: "untainted3",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
Job: batchJob,
|
||||
NodeID: "draining",
|
||||
},
|
||||
// Terminal allocs are always untainted, even on lost nodes
|
||||
"untainted4": {
|
||||
ID: "untainted4",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
Job: batchJob,
|
||||
NodeID: "lost",
|
||||
},
|
||||
// Non-terminal allocs on lost nodes are lost
|
||||
"lost1": {
|
||||
ID: "lost1",
|
||||
ClientStatus: structs.AllocClientStatusPending,
|
||||
Job: batchJob,
|
||||
NodeID: "lost",
|
||||
},
|
||||
// Non-terminal allocs on lost nodes are lost
|
||||
"lost2": {
|
||||
ID: "lost2",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: batchJob,
|
||||
NodeID: "lost",
|
||||
},
|
||||
}
|
||||
|
||||
untainted, migrate, lost := allocs.filterByTainted(nodes)
|
||||
require.Len(untainted, 4)
|
||||
require.Contains(untainted, "untainted1")
|
||||
require.Contains(untainted, "untainted2")
|
||||
require.Contains(untainted, "untainted3")
|
||||
require.Contains(untainted, "untainted4")
|
||||
require.Len(migrate, 2)
|
||||
require.Contains(migrate, "migrating1")
|
||||
require.Contains(migrate, "migrating2")
|
||||
require.Len(lost, 2)
|
||||
require.Contains(lost, "lost1")
|
||||
require.Contains(lost, "lost2")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user