Fix bug in which dead nodes weren't being properly handled by system scheduler

This commit is contained in:
Alex Dadgar
2016-09-17 11:28:02 -07:00
parent e9ca2d40cb
commit 926f5ee004
3 changed files with 115 additions and 23 deletions

View File

@@ -1228,3 +1228,90 @@ func TestSystemSched_ChainedAlloc(t *testing.T) {
t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs))
}
}
func TestSystemSched_PlanWithDrainedNode(t *testing.T) {
h := NewHarness(t)
// Register two nodes with two different classes
node := mock.Node()
node.NodeClass = "green"
node.Drain = true
node.ComputeClass()
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
node2 := mock.Node()
node2.NodeClass = "blue"
node2.ComputeClass()
noErr(t, h.State.UpsertNode(h.NextIndex(), node2))
// Create a Job with two task groups, each constrianed on node class
job := mock.SystemJob()
tg1 := job.TaskGroups[0]
tg1.Constraints = append(tg1.Constraints,
&structs.Constraint{
LTarget: "${node.class}",
RTarget: "green",
Operand: "==",
})
tg2 := tg1.Copy()
tg2.Name = "web2"
tg2.Constraints[0].RTarget = "blue"
job.TaskGroups = append(job.TaskGroups, tg2)
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
// Create an allocation on each node
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = node.ID
alloc.Name = "my-job.web[0]"
alloc.TaskGroup = "web"
alloc2 := mock.Alloc()
alloc2.Job = job
alloc2.JobID = job.ID
alloc2.NodeID = node2.ID
alloc2.Name = "my-job.web2[0]"
alloc2.TaskGroup = "web2"
noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc, alloc2}))
// Create a mock evaluation to deal with drain
eval := &structs.Evaluation{
ID: structs.GenerateUUID(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
NodeID: node.ID,
}
// Process the evaluation
err := h.Process(NewSystemScheduler, eval)
if err != nil {
t.Fatalf("err: %v", err)
}
// Ensure a single plan
if len(h.Plans) != 1 {
t.Fatalf("bad: %#v", h.Plans)
}
plan := h.Plans[0]
// Ensure the plan evicted the alloc on the failed node
planned := plan.NodeUpdate[node.ID]
if len(planned) != 1 {
t.Fatalf("bad: %#v", plan)
}
// Ensure the plan didn't place
if len(plan.NodeAllocation) != 0 {
t.Fatalf("bad: %#v", plan)
}
// Ensure the allocations is stopped
if planned[0].DesiredStatus != structs.AllocDesiredStatusStop {
t.Fatalf("bad: %#v", planned[0])
}
h.AssertEvalStatus(t, structs.EvalStatusComplete)
}

View File

@@ -190,15 +190,20 @@ func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[
for nodeID, allocs := range nodeAllocs {
diff := diffAllocs(job, taintedNodes, required, allocs, terminalAllocs)
// Mark the alloc as being for a specific node.
for i := range diff.place {
alloc := &diff.place[i]
// If the node is tainted there should be no placements made
if _, ok := taintedNodes[nodeID]; ok {
diff.place = nil
} else {
// Mark the alloc as being for a specific node.
for i := range diff.place {
alloc := &diff.place[i]
// If the new allocation isn't annotated with a previous allocation
// or if the previous allocation isn't from the same node then we
// annotate the allocTuple with a new Allocation
if alloc.Alloc == nil || alloc.Alloc.NodeID != nodeID {
alloc.Alloc = &structs.Allocation{NodeID: nodeID}
// If the new allocation isn't annotated with a previous allocation
// or if the previous allocation isn't from the same node then we
// annotate the allocTuple with a new Allocation
if alloc.Alloc == nil || alloc.Alloc.NodeID != nodeID {
alloc.Alloc = &structs.Allocation{NodeID: nodeID}
}
}
}

View File

@@ -175,15 +175,6 @@ func TestDiffAllocs(t *testing.T) {
func TestDiffSystemAllocs(t *testing.T) {
job := mock.SystemJob()
// Create three alive nodes.
nodes := []*structs.Node{{ID: "foo"}, {ID: "bar"}, {ID: "baz"},
{ID: "pipe"}}
// The "old" job has a previous modify index
oldJob := new(structs.Job)
*oldJob = *job
oldJob.JobModifyIndex -= 1
drainNode := mock.Node()
drainNode.Drain = true
@@ -191,10 +182,19 @@ func TestDiffSystemAllocs(t *testing.T) {
deadNode.Status = structs.NodeStatusDown
tainted := map[string]*structs.Node{
"dead": deadNode,
"drainNode": drainNode,
deadNode.ID: deadNode,
drainNode.ID: drainNode,
}
// Create three alive nodes.
nodes := []*structs.Node{{ID: "foo"}, {ID: "bar"}, {ID: "baz"},
{ID: "pipe"}, {ID: drainNode.ID}, {ID: deadNode.ID}}
// The "old" job has a previous modify index
oldJob := new(structs.Job)
*oldJob = *job
oldJob.JobModifyIndex -= 1
allocs := []*structs.Allocation{
// Update allocation on baz
&structs.Allocation{
@@ -215,14 +215,14 @@ func TestDiffSystemAllocs(t *testing.T) {
// Stop allocation on draining node.
&structs.Allocation{
ID: structs.GenerateUUID(),
NodeID: "drainNode",
NodeID: drainNode.ID,
Name: "my-job.web[0]",
Job: oldJob,
},
// Mark as lost on a dead node
&structs.Allocation{
ID: structs.GenerateUUID(),
NodeID: "dead",
NodeID: deadNode.ID,
Name: "my-job.web[0]",
Job: oldJob,
},
@@ -272,8 +272,8 @@ func TestDiffSystemAllocs(t *testing.T) {
}
// We should place 1
if len(place) != 2 {
t.Fatalf("bad: %#v", place)
if l := len(place); l != 2 {
t.Fatalf("bad: %#v", l)
}
// Ensure that the allocations which are replacements of terminal allocs are