mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
In #8099 we fixed a bug where garbage collecting a job with `disconnect.stop_on_client_after` would spawn recursive delayed evals. But when applied to disconnected allocs with `replace=true`, the fix prevents us from emitting a blocked eval if there's no room for the replacement. Update the guard on creating blocked evals so that rather than checking for `IsZero` that we check for being later than the `WaitUntil`. This separates this guard from the logic guarding the creation of delayed evals so that we can potentially create both when needed. Ref: https://github.com/hashicorp/nomad/pull/8099/files#r435198418
7703 lines
230 KiB
Go
7703 lines
230 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package scheduler
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"slices"
|
|
"sort"
|
|
"testing"
|
|
"time"
|
|
|
|
memdb "github.com/hashicorp/go-memdb"
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/helper"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/scheduler/reconciler"
|
|
sstructs "github.com/hashicorp/nomad/scheduler/structs"
|
|
"github.com/hashicorp/nomad/scheduler/tests"
|
|
"github.com/shoenig/test"
|
|
"github.com/shoenig/test/must"
|
|
)
|
|
|
|
func TestServiceSched_JobRegister(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for range 10 {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations but the eval does
|
|
if plan.Annotations != nil {
|
|
t.Fatalf("expected no annotations")
|
|
}
|
|
must.SliceNotEmpty(t, h.Evals)
|
|
must.Eq(t, 10, h.Evals[0].PlanAnnotations.DesiredTGUpdates["web"].Place)
|
|
|
|
// Ensure the eval has no spawned blocked eval
|
|
if len(h.CreateEvals) != 0 {
|
|
t.Errorf("bad: %#v", h.CreateEvals)
|
|
if h.Evals[0].BlockedEval != "" {
|
|
t.Fatalf("bad: %#v", h.Evals[0])
|
|
}
|
|
t.FailNow()
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Ensure allocations have unique names derived from Job.ID
|
|
allocNames := helper.ConvertSlice(out,
|
|
func(alloc *structs.Allocation) string { return alloc.Name })
|
|
expectAllocNames := []string{}
|
|
for i := 0; i < 10; i++ {
|
|
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i))
|
|
}
|
|
must.SliceContainsAll(t, expectAllocNames, allocNames)
|
|
|
|
// Ensure different ports were used.
|
|
used := make(map[int]map[string]struct{})
|
|
for _, alloc := range out {
|
|
for _, port := range alloc.AllocatedResources.Shared.Ports {
|
|
nodeMap, ok := used[port.Value]
|
|
if !ok {
|
|
nodeMap = make(map[string]struct{})
|
|
used[port.Value] = nodeMap
|
|
}
|
|
if _, ok := nodeMap[alloc.NodeID]; ok {
|
|
t.Fatalf("Port collision on node %q %v", alloc.NodeID, port.Value)
|
|
}
|
|
nodeMap[alloc.NodeID] = struct{}{}
|
|
}
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.TaskGroups[0].EphemeralDisk.Sticky = true
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
if err := h.Process(NewServiceScheduler, eval); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
plan := h.Plans[0]
|
|
planned := make(map[string]*structs.Allocation)
|
|
for _, allocList := range plan.NodeAllocation {
|
|
for _, alloc := range allocList {
|
|
planned[alloc.ID] = alloc
|
|
}
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Update the job to force a rolling upgrade
|
|
updated := job.Copy()
|
|
updated.TaskGroups[0].Tasks[0].Resources.CPU += 10
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, updated))
|
|
|
|
// Create a mock evaluation to handle the update
|
|
eval = &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
h1 := tests.NewHarnessWithState(t, h.State)
|
|
if err := h1.Process(NewServiceScheduler, eval); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure we have created only one new allocation
|
|
// Ensure a single plan
|
|
if len(h1.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h1.Plans)
|
|
}
|
|
plan = h1.Plans[0]
|
|
var newPlanned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
newPlanned = append(newPlanned, allocList...)
|
|
}
|
|
if len(newPlanned) != 10 {
|
|
t.Fatalf("bad plan: %#v", plan)
|
|
}
|
|
// Ensure that the new allocations were placed on the same node as the older
|
|
// ones
|
|
for _, new := range newPlanned {
|
|
if new.PreviousAllocation == "" {
|
|
t.Fatalf("new alloc %q doesn't have a previous allocation", new.ID)
|
|
}
|
|
|
|
old, ok := planned[new.PreviousAllocation]
|
|
if !ok {
|
|
t.Fatalf("new alloc %q previous allocation doesn't match any prior placed alloc (%q)", new.ID, new.PreviousAllocation)
|
|
}
|
|
if new.NodeID != old.NodeID {
|
|
t.Fatalf("new alloc and old alloc node doesn't match; got %q; want %q", new.NodeID, old.NodeID)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_StickyHostVolumes(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
nodes := []*structs.Node{
|
|
mock.Node(),
|
|
mock.Node(),
|
|
}
|
|
|
|
hostVolCapsReadWrite := []*structs.HostVolumeCapability{
|
|
{
|
|
AttachmentMode: structs.HostVolumeAttachmentModeFilesystem,
|
|
AccessMode: structs.HostVolumeAccessModeSingleNodeReader,
|
|
},
|
|
{
|
|
AttachmentMode: structs.HostVolumeAttachmentModeFilesystem,
|
|
AccessMode: structs.HostVolumeAccessModeSingleNodeWriter,
|
|
},
|
|
}
|
|
|
|
dhv := &structs.HostVolume{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Name: "foo",
|
|
NodeID: nodes[1].ID,
|
|
RequestedCapabilities: hostVolCapsReadWrite,
|
|
State: structs.HostVolumeStateReady,
|
|
}
|
|
|
|
nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{}
|
|
nodes[1].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"foo": {ID: dhv.ID}}
|
|
|
|
for _, node := range nodes {
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, 1000, node))
|
|
}
|
|
must.NoError(t, h.State.UpsertHostVolume(1000, dhv))
|
|
|
|
stickyRequest := map[string]*structs.VolumeRequest{
|
|
"foo": {
|
|
Type: "host",
|
|
Source: "foo",
|
|
Sticky: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
},
|
|
}
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Volumes = stickyRequest
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure the plan allocated
|
|
plan := h.Plans[0]
|
|
planned := make(map[string]*structs.Allocation)
|
|
for _, allocList := range plan.NodeAllocation {
|
|
for _, alloc := range allocList {
|
|
planned[alloc.ID] = alloc
|
|
}
|
|
}
|
|
must.MapLen(t, 10, planned)
|
|
|
|
// Ensure that the allocations got the host volume ID added
|
|
for _, p := range planned {
|
|
must.Eq(t, p.PreviousAllocation, "")
|
|
}
|
|
|
|
// Update the job to force a rolling upgrade
|
|
updated := job.Copy()
|
|
updated.TaskGroups[0].Tasks[0].Resources.CPU += 10
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, updated))
|
|
|
|
// Create a mock evaluation to handle the update
|
|
eval = &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure we have created only one new allocation
|
|
must.SliceLen(t, 2, h.Plans)
|
|
plan = h.Plans[0]
|
|
var newPlanned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
newPlanned = append(newPlanned, allocList...)
|
|
}
|
|
must.SliceLen(t, 10, newPlanned)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job with count 2 and disk as 60GB so that only one allocation
|
|
// can fit
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].EphemeralDisk.SizeMB = 88 * 1024
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
if plan.Annotations != nil {
|
|
t.Fatalf("expected no annotations")
|
|
}
|
|
|
|
// Ensure the eval has a blocked eval
|
|
if len(h.CreateEvals) != 1 {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
if h.CreateEvals[0].TriggeredBy != structs.EvalTriggerQueuedAllocs {
|
|
t.Fatalf("bad: %#v", h.CreateEvals[0])
|
|
}
|
|
|
|
// Ensure the plan allocated only one allocation
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 1 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure only one allocation was placed
|
|
if len(out) != 1 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_DistinctHosts(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job that uses distinct host and has count 1 higher than what is
|
|
// possible.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 11
|
|
job.Constraints = append(job.Constraints, &structs.Constraint{Operand: structs.ConstraintDistinctHosts})
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the eval has spawned blocked eval
|
|
if len(h.CreateEvals) != 1 {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
// Ensure the plan failed to alloc
|
|
outEval := h.Evals[0]
|
|
if len(outEval.FailedTGAllocs) != 1 {
|
|
t.Fatalf("bad: %+v", outEval)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Ensure different node was used per.
|
|
used := make(map[string]struct{})
|
|
for _, alloc := range out {
|
|
if _, ok := used[alloc.NodeID]; ok {
|
|
t.Fatalf("Node collision %v", alloc.NodeID)
|
|
}
|
|
used[alloc.NodeID] = struct{}{}
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_DistinctProperty(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
rack := "rack2"
|
|
if i < 5 {
|
|
rack = "rack1"
|
|
}
|
|
node.Meta["rack"] = rack
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job that uses distinct property and has count higher than what is
|
|
// possible.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 8
|
|
job.Constraints = append(job.Constraints,
|
|
&structs.Constraint{
|
|
Operand: structs.ConstraintDistinctProperty,
|
|
LTarget: "${meta.rack}",
|
|
RTarget: "2",
|
|
})
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
if plan.Annotations != nil {
|
|
t.Fatalf("expected no annotations")
|
|
}
|
|
|
|
// Ensure the eval has spawned blocked eval
|
|
if len(h.CreateEvals) != 1 {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
// Ensure the plan failed to alloc
|
|
outEval := h.Evals[0]
|
|
if len(outEval.FailedTGAllocs) != 1 {
|
|
t.Fatalf("bad: %+v", outEval)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 4 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 4 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Ensure each node was only used twice
|
|
used := make(map[string]uint64)
|
|
for _, alloc := range out {
|
|
if count, _ := used[alloc.NodeID]; count > 2 {
|
|
t.Fatalf("Node %v used too much: %d", alloc.NodeID, count)
|
|
}
|
|
used[alloc.NodeID]++
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_DistinctProperty_TaskGroup(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 2; i++ {
|
|
node := mock.Node()
|
|
node.Meta["ssd"] = "true"
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job that uses distinct property only on one task group.
|
|
job := mock.Job()
|
|
job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
|
|
job.TaskGroups[0].Count = 1
|
|
job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints,
|
|
&structs.Constraint{
|
|
Operand: structs.ConstraintDistinctProperty,
|
|
LTarget: "${meta.ssd}",
|
|
})
|
|
|
|
job.TaskGroups[1].Name = "tg2"
|
|
job.TaskGroups[1].Count = 2
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
if plan.Annotations != nil {
|
|
t.Fatalf("expected no annotations")
|
|
}
|
|
|
|
// Ensure the eval hasn't spawned blocked eval
|
|
if len(h.CreateEvals) != 0 {
|
|
t.Fatalf("bad: %#v", h.CreateEvals[0])
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 3 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 3 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_DistinctProperty_TaskGroup_Incr(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a job that uses distinct property over the node-id
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 3
|
|
job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints,
|
|
&structs.Constraint{
|
|
Operand: structs.ConstraintDistinctProperty,
|
|
LTarget: "${node.unique.id}",
|
|
})
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 6; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create some allocations
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 3; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the count
|
|
job2 := job.Copy()
|
|
job2.TaskGroups[0].Count = 6
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
must.Nil(t, plan.Annotations)
|
|
|
|
// Ensure the eval hasn't spawned blocked eval
|
|
must.Len(t, 0, h.CreateEvals)
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Len(t, 6, planned)
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
must.Len(t, 6, out)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// Test job registration with spread configured
|
|
func TestServiceSched_Spread(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
start := uint8(100)
|
|
step := uint8(10)
|
|
|
|
for i := 0; i < 10; i++ {
|
|
name := fmt.Sprintf("%d%% in dc1", start)
|
|
t.Run(name, func(t *testing.T) {
|
|
h := tests.NewHarness(t)
|
|
remaining := uint8(100 - start)
|
|
// Create a job that uses spread over data center
|
|
job := mock.Job()
|
|
job.Datacenters = []string{"dc*"}
|
|
job.TaskGroups[0].Count = 10
|
|
job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads,
|
|
&structs.Spread{
|
|
Attribute: "${node.datacenter}",
|
|
Weight: 100,
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
{
|
|
Value: "dc1",
|
|
Percent: start,
|
|
},
|
|
{
|
|
Value: "dc2",
|
|
Percent: remaining,
|
|
},
|
|
},
|
|
})
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
// Create some nodes, half in dc2
|
|
var nodes []*structs.Node
|
|
nodeMap := make(map[string]*structs.Node)
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
if i%2 == 0 {
|
|
node.Datacenter = "dc2"
|
|
}
|
|
// setting a narrow range makes it more likely for this test to
|
|
// hit bugs in NetworkIndex
|
|
node.NodeResources.MinDynamicPort = 20000
|
|
node.NodeResources.MaxDynamicPort = 20005
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
nodeMap[node.ID] = node
|
|
}
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
must.Nil(t, plan.Annotations)
|
|
|
|
// Ensure the eval hasn't spawned blocked eval
|
|
must.Len(t, 0, h.CreateEvals)
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
dcAllocsMap := make(map[string]int)
|
|
for nodeId, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
dc := nodeMap[nodeId].Datacenter
|
|
c := dcAllocsMap[dc]
|
|
c += len(allocList)
|
|
dcAllocsMap[dc] = c
|
|
}
|
|
must.Len(t, 10, planned)
|
|
|
|
expectedCounts := make(map[string]int)
|
|
expectedCounts["dc1"] = 10 - i
|
|
if i > 0 {
|
|
expectedCounts["dc2"] = i
|
|
}
|
|
must.Eq(t, expectedCounts, dcAllocsMap)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
})
|
|
start = start - step
|
|
}
|
|
}
|
|
|
|
// TestServiceSched_JobRegister_Datacenter_Downgrade tests the case where an
|
|
// allocation fails during a deployment with canaries, an the job changes its
|
|
// datacenter. The replacement for the failed alloc should be placed in the
|
|
// datacenter of the original job.
|
|
func TestServiceSched_JobRegister_Datacenter_Downgrade(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create 5 nodes in each datacenter.
|
|
// Use two loops so nodes are separated by datacenter.
|
|
nodes := []*structs.Node{}
|
|
for i := 0; i < 5; i++ {
|
|
node := mock.Node()
|
|
node.Name = fmt.Sprintf("node-dc1-%d", i)
|
|
node.Datacenter = "dc1"
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
for i := 0; i < 5; i++ {
|
|
node := mock.Node()
|
|
node.Name = fmt.Sprintf("node-dc2-%d", i)
|
|
node.Datacenter = "dc2"
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create first version of the test job running in dc1.
|
|
job1 := mock.Job()
|
|
job1.Version = 1
|
|
job1.Datacenters = []string{"dc1"}
|
|
job1.Status = structs.JobStatusRunning
|
|
job1.TaskGroups[0].Count = 3
|
|
job1.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
Stagger: time.Duration(30 * time.Second),
|
|
MaxParallel: 1,
|
|
HealthCheck: "checks",
|
|
MinHealthyTime: time.Duration(30 * time.Second),
|
|
HealthyDeadline: time.Duration(9 * time.Minute),
|
|
ProgressDeadline: time.Duration(10 * time.Minute),
|
|
AutoRevert: true,
|
|
Canary: 1,
|
|
}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job1))
|
|
|
|
// Create allocs for this job version with one being a canary and another
|
|
// marked as failed.
|
|
allocs := []*structs.Allocation{}
|
|
for i := 0; i < 3; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job1
|
|
alloc.JobID = job1.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
Timestamp: time.Now(),
|
|
Canary: false,
|
|
ModifyIndex: h.NextIndex(),
|
|
}
|
|
if i == 0 {
|
|
alloc.DeploymentStatus.Canary = true
|
|
}
|
|
if i == 1 {
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update job to place it in dc2.
|
|
job2 := job1.Copy()
|
|
job2.Version = 2
|
|
job2.Datacenters = []string{"dc2"}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
eval := &structs.Evaluation{
|
|
Namespace: job2.Namespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job2.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job2.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
processErr := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, processErr, must.Sprint("failed to process eval"))
|
|
must.Len(t, 1, h.Plans)
|
|
|
|
// Verify the plan places the new allocation in dc2 and the replacement
|
|
// for the failed allocation from the previous job version in dc1.
|
|
for nodeID, allocs := range h.Plans[0].NodeAllocation {
|
|
var node *structs.Node
|
|
for _, n := range nodes {
|
|
if n.ID == nodeID {
|
|
node = n
|
|
break
|
|
}
|
|
}
|
|
|
|
must.Len(t, 1, allocs)
|
|
alloc := allocs[0]
|
|
must.SliceContains(t, alloc.Job.Datacenters, node.Datacenter, must.Sprintf(
|
|
"alloc for job in datacenter %q placed in %q",
|
|
alloc.Job.Datacenters,
|
|
node.Datacenter,
|
|
))
|
|
}
|
|
}
|
|
|
|
// TestServiceSched_JobRegister_NodePool_Downgrade tests the case where an
|
|
// allocation fails during a deployment with canaries, where the job changes
|
|
// node pool. The failed alloc should be placed in the node pool of the
|
|
// original job.
|
|
func TestServiceSched_JobRegister_NodePool_Downgrade(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Set global scheduler configuration.
|
|
h.State.SchedulerSetConfig(h.NextIndex(), &structs.SchedulerConfiguration{
|
|
SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
})
|
|
|
|
// Create test node pools with different scheduler algorithms.
|
|
poolBinpack := mock.NodePool()
|
|
poolBinpack.Name = "pool-binpack"
|
|
poolBinpack.SchedulerConfiguration = &structs.NodePoolSchedulerConfiguration{
|
|
SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
}
|
|
|
|
poolSpread := mock.NodePool()
|
|
poolSpread.Name = "pool-spread"
|
|
poolSpread.SchedulerConfiguration = &structs.NodePoolSchedulerConfiguration{
|
|
SchedulerAlgorithm: structs.SchedulerAlgorithmSpread,
|
|
}
|
|
|
|
nodePools := []*structs.NodePool{
|
|
poolBinpack,
|
|
poolSpread,
|
|
}
|
|
h.State.UpsertNodePools(structs.MsgTypeTestSetup, h.NextIndex(), nodePools)
|
|
|
|
// Create 5 nodes in each node pool.
|
|
// Use two loops so nodes are separated by node pool.
|
|
nodes := []*structs.Node{}
|
|
for i := 0; i < 5; i++ {
|
|
node := mock.Node()
|
|
node.Name = fmt.Sprintf("node-binpack-%d", i)
|
|
node.NodePool = poolBinpack.Name
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
for i := 0; i < 5; i++ {
|
|
node := mock.Node()
|
|
node.Name = fmt.Sprintf("node-spread-%d", i)
|
|
node.NodePool = poolSpread.Name
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create first version of the test job running in the binpack node pool.
|
|
job1 := mock.Job()
|
|
job1.Version = 1
|
|
job1.NodePool = poolBinpack.Name
|
|
job1.Status = structs.JobStatusRunning
|
|
job1.TaskGroups[0].Count = 3
|
|
job1.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
Stagger: time.Duration(30 * time.Second),
|
|
MaxParallel: 1,
|
|
HealthCheck: "checks",
|
|
MinHealthyTime: time.Duration(30 * time.Second),
|
|
HealthyDeadline: time.Duration(9 * time.Minute),
|
|
ProgressDeadline: time.Duration(10 * time.Minute),
|
|
AutoRevert: true,
|
|
Canary: 1,
|
|
}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job1))
|
|
|
|
// Create allocs for this job version with one being a canary and another
|
|
// marked as failed.
|
|
allocs := []*structs.Allocation{}
|
|
for i := 0; i < 3; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job1
|
|
alloc.JobID = job1.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
Timestamp: time.Now(),
|
|
Canary: false,
|
|
ModifyIndex: h.NextIndex(),
|
|
}
|
|
if i == 0 {
|
|
alloc.DeploymentStatus.Canary = true
|
|
}
|
|
if i == 1 {
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update job to place it in the spread node pool.
|
|
job2 := job1.Copy()
|
|
job2.Version = 2
|
|
job2.NodePool = poolSpread.Name
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
eval := &structs.Evaluation{
|
|
Namespace: job2.Namespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job2.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job2.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
processErr := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, processErr, must.Sprint("failed to process eval"))
|
|
must.SliceLen(t, 1, h.Plans)
|
|
|
|
// Verify the plan places the new allocation in the spread node pool and
|
|
// the replacement failure from the previous version in the binpack pool.
|
|
for nodeID, allocs := range h.Plans[0].NodeAllocation {
|
|
var node *structs.Node
|
|
for _, n := range nodes {
|
|
if n.ID == nodeID {
|
|
node = n
|
|
break
|
|
}
|
|
}
|
|
|
|
must.Len(t, 1, allocs)
|
|
alloc := allocs[0]
|
|
must.Eq(t, alloc.Job.NodePool, node.NodePool, must.Sprintf(
|
|
"alloc for job in node pool %q placed in node in node pool %q",
|
|
alloc.Job.NodePool,
|
|
node.NodePool,
|
|
))
|
|
}
|
|
}
|
|
|
|
// Test job registration with even spread across dc
|
|
func TestServiceSched_EvenSpread(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
// Create a job that uses even spread over data center
|
|
job := mock.Job()
|
|
job.Datacenters = []string{"dc1", "dc2"}
|
|
job.TaskGroups[0].Count = 10
|
|
job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads,
|
|
&structs.Spread{
|
|
Attribute: "${node.datacenter}",
|
|
Weight: 100,
|
|
})
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
// Create some nodes, half in dc2
|
|
var nodes []*structs.Node
|
|
nodeMap := make(map[string]*structs.Node)
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
if i%2 == 0 {
|
|
node.Datacenter = "dc2"
|
|
}
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
nodeMap[node.ID] = node
|
|
}
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
must.Nil(t, plan.Annotations)
|
|
|
|
// Ensure the eval hasn't spawned blocked eval
|
|
must.Len(t, 0, h.CreateEvals)
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
dcAllocsMap := make(map[string]int)
|
|
for nodeId, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
dc := nodeMap[nodeId].Datacenter
|
|
c := dcAllocsMap[dc]
|
|
c += len(allocList)
|
|
dcAllocsMap[dc] = c
|
|
}
|
|
must.Len(t, 10, planned)
|
|
|
|
// Expect even split allocs across datacenter
|
|
expectedCounts := make(map[string]int)
|
|
expectedCounts["dc1"] = 5
|
|
expectedCounts["dc2"] = 5
|
|
|
|
must.Eq(t, expectedCounts, dcAllocsMap)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_Annotate(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
AnnotatePlan: true,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Ensure the plan had annotations.
|
|
if plan.Annotations == nil {
|
|
t.Fatalf("expected annotations")
|
|
}
|
|
|
|
desiredTGs := plan.Annotations.DesiredTGUpdates
|
|
if l := len(desiredTGs); l != 1 {
|
|
t.Fatalf("incorrect number of task groups; got %v; want %v", l, 1)
|
|
}
|
|
|
|
desiredChanges, ok := desiredTGs["web"]
|
|
if !ok {
|
|
t.Fatalf("expected task group web to have desired changes")
|
|
}
|
|
|
|
expected := &structs.DesiredUpdates{Place: 10}
|
|
must.Eq(t, expected, desiredChanges)
|
|
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_CountZero(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job and set the task group count to zero.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 0
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure there was no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure no allocations placed
|
|
if len(out) != 0 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create NO nodes
|
|
// Create a job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Ensure there is a follow up eval.
|
|
if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
if len(h.Evals) != 1 {
|
|
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
|
|
}
|
|
outEval := h.Evals[0]
|
|
|
|
// Ensure the eval has its spawned blocked eval
|
|
if outEval.BlockedEval != h.CreateEvals[0].ID {
|
|
t.Fatalf("bad: %#v", outEval)
|
|
}
|
|
|
|
// Ensure the plan failed to alloc
|
|
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
|
|
t.Fatalf("bad: %#v", outEval)
|
|
}
|
|
|
|
metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name]
|
|
if !ok {
|
|
t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs)
|
|
}
|
|
|
|
// Check the coalesced failures
|
|
if metrics.CoalescedFailures != 9 {
|
|
t.Fatalf("bad: %#v", metrics)
|
|
}
|
|
|
|
_, ok = metrics.NodesAvailable["dc1"]
|
|
must.False(t, ok, must.Sprintf(
|
|
"expected NodesAvailable metric to be unpopulated when there are no nodes"))
|
|
|
|
must.Zero(t, metrics.NodesInPool, must.Sprint(
|
|
"expected NodesInPool metric to be unpopulated when there are no nodes"))
|
|
|
|
// Check queued allocations
|
|
queued := outEval.QueuedAllocations["web"]
|
|
if queued != 10 {
|
|
t.Fatalf("expected queued: %v, actual: %v", 10, queued)
|
|
}
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_CreateBlockedEval(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a full node
|
|
node := mock.Node()
|
|
node.ReservedResources = &structs.NodeReservedResources{
|
|
Cpu: structs.NodeReservedCpuResources{
|
|
CpuShares: node.NodeResources.Cpu.CpuShares,
|
|
},
|
|
}
|
|
node.ComputeClass()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create an ineligible node
|
|
node2 := mock.Node()
|
|
node2.Attributes["kernel.name"] = "windows"
|
|
node2.ComputeClass()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
|
|
|
|
// Create a jobs
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Ensure the plan has created a follow up eval.
|
|
if len(h.CreateEvals) != 1 {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
created := h.CreateEvals[0]
|
|
if created.Status != structs.EvalStatusBlocked {
|
|
t.Fatalf("bad: %#v", created)
|
|
}
|
|
|
|
classes := created.ClassEligibility
|
|
if len(classes) != 2 || !classes[node.ComputedClass] || classes[node2.ComputedClass] {
|
|
t.Fatalf("bad: %#v", classes)
|
|
}
|
|
|
|
if created.EscapedComputedClass {
|
|
t.Fatalf("bad: %#v", created)
|
|
}
|
|
|
|
// Ensure there is a follow up eval.
|
|
if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
if len(h.Evals) != 1 {
|
|
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
|
|
}
|
|
outEval := h.Evals[0]
|
|
|
|
// Ensure the plan failed to alloc
|
|
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
|
|
t.Fatalf("bad: %#v", outEval)
|
|
}
|
|
|
|
metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name]
|
|
if !ok {
|
|
t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs)
|
|
}
|
|
|
|
// Check the coalesced failures
|
|
if metrics.CoalescedFailures != 9 {
|
|
t.Fatalf("bad: %#v", metrics)
|
|
}
|
|
|
|
// Check the available nodes
|
|
if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 2 {
|
|
t.Fatalf("bad: %#v", metrics)
|
|
}
|
|
|
|
must.Eq(t, 2, metrics.NodesInPool, must.Sprint("expected NodesInPool metric to be set"))
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create one node
|
|
node := mock.Node()
|
|
node.NodeClass = "class_0"
|
|
must.NoError(t, node.ComputeClass())
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job that constrains on a node class
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].Constraints = append(job.Constraints,
|
|
&structs.Constraint{
|
|
LTarget: "${node.class}",
|
|
RTarget: "class_0",
|
|
Operand: "=",
|
|
},
|
|
)
|
|
tg2 := job.TaskGroups[0].Copy()
|
|
tg2.Name = "web2"
|
|
tg2.Constraints[1].RTarget = "class_1"
|
|
job.TaskGroups = append(job.TaskGroups, tg2)
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 2 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure two allocations placed
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
if len(out) != 2 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
if len(h.Evals) != 1 {
|
|
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
|
|
}
|
|
outEval := h.Evals[0]
|
|
|
|
// Ensure the eval has its spawned blocked eval
|
|
if outEval.BlockedEval != h.CreateEvals[0].ID {
|
|
t.Fatalf("bad: %#v", outEval)
|
|
}
|
|
|
|
// Ensure the plan failed to alloc one tg
|
|
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
|
|
t.Fatalf("bad: %#v", outEval)
|
|
}
|
|
|
|
metrics, ok := outEval.FailedTGAllocs[tg2.Name]
|
|
if !ok {
|
|
t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs)
|
|
}
|
|
|
|
// Check the coalesced failures
|
|
if metrics.CoalescedFailures != tg2.Count-1 {
|
|
t.Fatalf("bad: %#v", metrics)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobRegister_SchedulerAlgorithm(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
// Test node pools.
|
|
poolNoSchedConfig := mock.NodePool()
|
|
poolNoSchedConfig.SchedulerConfiguration = nil
|
|
|
|
poolBinpack := mock.NodePool()
|
|
poolBinpack.SchedulerConfiguration = &structs.NodePoolSchedulerConfiguration{
|
|
SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
}
|
|
|
|
poolSpread := mock.NodePool()
|
|
poolSpread.SchedulerConfiguration = &structs.NodePoolSchedulerConfiguration{
|
|
SchedulerAlgorithm: structs.SchedulerAlgorithmSpread,
|
|
}
|
|
|
|
testCases := []struct {
|
|
name string
|
|
nodePool string
|
|
schedulerAlgorithm structs.SchedulerAlgorithm
|
|
expectedAlgorithm structs.SchedulerAlgorithm
|
|
}{
|
|
{
|
|
name: "global binpack",
|
|
nodePool: poolNoSchedConfig.Name,
|
|
schedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
expectedAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
},
|
|
{
|
|
name: "global spread",
|
|
nodePool: poolNoSchedConfig.Name,
|
|
schedulerAlgorithm: structs.SchedulerAlgorithmSpread,
|
|
expectedAlgorithm: structs.SchedulerAlgorithmSpread,
|
|
},
|
|
{
|
|
name: "node pool binpack overrides global config",
|
|
nodePool: poolBinpack.Name,
|
|
schedulerAlgorithm: structs.SchedulerAlgorithmSpread,
|
|
expectedAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
},
|
|
{
|
|
name: "node pool spread overrides global config",
|
|
nodePool: poolSpread.Name,
|
|
schedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
|
|
expectedAlgorithm: structs.SchedulerAlgorithmSpread,
|
|
},
|
|
}
|
|
|
|
jobTypes := []string{
|
|
"batch",
|
|
"service",
|
|
}
|
|
|
|
for _, jobType := range jobTypes {
|
|
for _, tc := range testCases {
|
|
t.Run(fmt.Sprintf("%s/%s", jobType, tc.name), func(t *testing.T) {
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create node pools.
|
|
nodePools := []*structs.NodePool{
|
|
poolNoSchedConfig,
|
|
poolBinpack,
|
|
poolSpread,
|
|
}
|
|
h.State.UpsertNodePools(structs.MsgTypeTestSetup, h.NextIndex(), nodePools)
|
|
|
|
// Create two test nodes. Use two to prevent flakiness due to
|
|
// the scheduler shuffling nodes.
|
|
for i := 0; i < 2; i++ {
|
|
node := mock.Node()
|
|
node.NodePool = tc.nodePool
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Set global scheduler configuration.
|
|
h.State.SchedulerSetConfig(h.NextIndex(), &structs.SchedulerConfiguration{
|
|
SchedulerAlgorithm: tc.schedulerAlgorithm,
|
|
})
|
|
|
|
// Create test job.
|
|
var job *structs.Job
|
|
switch jobType {
|
|
case "batch":
|
|
job = mock.BatchJob()
|
|
case "service":
|
|
job = mock.Job()
|
|
}
|
|
job.TaskGroups[0].Count = 1
|
|
job.NodePool = tc.nodePool
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Register an existing job.
|
|
existingJob := mock.Job()
|
|
existingJob.TaskGroups[0].Count = 1
|
|
existingJob.NodePool = tc.nodePool
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, existingJob))
|
|
|
|
// Process eval for existing job to place an existing alloc.
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: existingJob.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: existingJob.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
var sched sstructs.Factory
|
|
switch jobType {
|
|
case "batch":
|
|
sched = NewBatchScheduler
|
|
case "service":
|
|
sched = NewServiceScheduler
|
|
}
|
|
err := h.Process(sched, eval)
|
|
must.NoError(t, err)
|
|
|
|
must.Len(t, 1, h.Plans)
|
|
allocs, err := h.State.AllocsByJob(nil, existingJob.Namespace, existingJob.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, allocs)
|
|
|
|
// Process eval for test job.
|
|
eval = &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
err = h.Process(sched, eval)
|
|
must.NoError(t, err)
|
|
|
|
must.Len(t, 2, h.Plans)
|
|
allocs, err = h.State.AllocsByJob(nil, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, allocs)
|
|
|
|
// Expect new alloc to be either in the empty node or in the
|
|
// node with the existing alloc depending on the expected
|
|
// scheduler algorithm.
|
|
var expectedAllocCount int
|
|
switch tc.expectedAlgorithm {
|
|
case structs.SchedulerAlgorithmSpread:
|
|
expectedAllocCount = 1
|
|
case structs.SchedulerAlgorithmBinpack:
|
|
expectedAllocCount = 2
|
|
}
|
|
|
|
alloc := allocs[0]
|
|
nodeAllocs, err := h.State.AllocsByNode(nil, alloc.NodeID)
|
|
must.NoError(t, err)
|
|
must.Len(t, expectedAllocCount, nodeAllocs)
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// This test just ensures the scheduler handles the eval type to avoid
|
|
// regressions.
|
|
func TestServiceSched_EvaluateMaxPlanEval(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a job and set the task group count to zero.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 0
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock blocked evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Status: structs.EvalStatusBlocked,
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerMaxPlans,
|
|
JobID: job.ID,
|
|
}
|
|
|
|
// Insert it into the state store
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure there was no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_Plan_Partial_Progress(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node of limited resources
|
|
legacyCpuResources4000, processorResources4000 := tests.CpuResources(4000)
|
|
node := mock.Node()
|
|
node.NodeResources.Processors = processorResources4000
|
|
node.NodeResources.Cpu = legacyCpuResources4000
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job with a high resource ask so that all the allocations can't
|
|
// be placed on a single node.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 3
|
|
job.TaskGroups[0].Tasks[0].Resources.CPU = 3600
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure a single plan
|
|
must.SliceLen(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
must.Nil(t, plan.Annotations)
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.SliceLen(t, 1, planned)
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure only one allocations placed
|
|
must.SliceLen(t, 1, out)
|
|
|
|
// Ensure 2 queued
|
|
queued := h.Evals[0].QueuedAllocations["web"]
|
|
must.Eq(t, 2, queued, must.Sprintf("exp: 2, got: %#v", h.Evals[0].QueuedAllocations))
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_EvaluateBlockedEval(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock blocked evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Status: structs.EvalStatusBlocked,
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
}
|
|
|
|
// Insert it into the state store
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure there was no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Ensure that the eval was reblocked
|
|
if len(h.ReblockEvals) != 1 {
|
|
t.Fatalf("bad: %#v", h.ReblockEvals)
|
|
}
|
|
if h.ReblockEvals[0].ID != eval.ID {
|
|
t.Fatalf("expect same eval to be reblocked; got %q; want %q", h.ReblockEvals[0].ID, eval.ID)
|
|
}
|
|
|
|
// Ensure the eval status was not updated
|
|
if len(h.Evals) != 0 {
|
|
t.Fatalf("Existing eval should not have status set")
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_EvaluateBlockedEval_Finished(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job and set the task group count to zero.
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock blocked evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Status: structs.EvalStatusBlocked,
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
}
|
|
|
|
// Insert it into the state store
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
if plan.Annotations != nil {
|
|
t.Fatalf("expected no annotations")
|
|
}
|
|
|
|
// Ensure the eval has no spawned blocked eval
|
|
if len(h.Evals) != 1 {
|
|
t.Errorf("bad: %#v", h.Evals)
|
|
if h.Evals[0].BlockedEval != "" {
|
|
t.Fatalf("bad: %#v", h.Evals[0])
|
|
}
|
|
t.FailNow()
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Ensure the eval was not reblocked
|
|
if len(h.ReblockEvals) != 0 {
|
|
t.Fatalf("Existing eval should not have been reblocked as it placed all allocations")
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Ensure queued allocations is zero
|
|
queued := h.Evals[0].QueuedAllocations["web"]
|
|
if queued != 0 {
|
|
t.Fatalf("expected queued: %v, actual: %v", 0, queued)
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_JobModify(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Add a few terminal status allocations, these should be ignored
|
|
var terminal []*structs.Allocation
|
|
for i := 0; i < 5; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed // #10446
|
|
terminal = append(terminal, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), terminal))
|
|
|
|
// Update the job
|
|
job2 := mock.Job()
|
|
job2.ID = job.ID
|
|
|
|
// Update the task, such that it cannot be done in-place
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted all allocs
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
if len(update) != len(allocs) {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_ExistingDuplicateAllocIndex(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
testHarness := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
mockJob := mock.Job()
|
|
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
|
|
|
|
// Generate some allocations which will represent our pre-existing
|
|
// allocations. These have aggressive duplicate names.
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = mockJob
|
|
alloc.JobID = mockJob.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
|
|
if i%2 == 0 {
|
|
alloc.Name = "my-job.web[0]"
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
|
|
|
|
// Generate a job modification which will force a destructive update.
|
|
mockJob2 := mock.Job()
|
|
mockJob2.ID = mockJob.ID
|
|
mockJob2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
|
|
|
|
// Create a mock evaluation which represents work to reconcile the job
|
|
// update.
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: mockJob2.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation and ensure we get a single plan as a result.
|
|
must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 1, testHarness.Plans)
|
|
|
|
// Iterate and track the node allocations to ensure we have the correct
|
|
// amount, and that there a now no duplicate names.
|
|
totalNodeAllocations := 0
|
|
allocIndexNames := make(map[string]int)
|
|
|
|
for _, planNodeAlloc := range testHarness.Plans[0].NodeAllocation {
|
|
for _, nodeAlloc := range planNodeAlloc {
|
|
totalNodeAllocations++
|
|
allocIndexNames[nodeAlloc.Name]++
|
|
|
|
if val, ok := allocIndexNames[nodeAlloc.Name]; ok && val > 1 {
|
|
t.Fatalf("found duplicate alloc name %q found", nodeAlloc.Name)
|
|
}
|
|
}
|
|
}
|
|
must.Eq(t, 10, totalNodeAllocations)
|
|
|
|
testHarness.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_ProposedDuplicateAllocIndex(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
testHarness := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a job which includes a canary update strategy.
|
|
mockJob := mock.MinJob()
|
|
mockJob.TaskGroups[0].Count = 3
|
|
mockJob.Update = structs.UpdateStrategy{
|
|
Canary: 1,
|
|
MaxParallel: 3,
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
|
|
|
|
// Generate some allocations which will represent our pre-existing
|
|
// allocations.
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 3; i++ {
|
|
alloc := mock.MinAlloc()
|
|
alloc.Namespace = structs.DefaultNamespace
|
|
alloc.Job = mockJob
|
|
alloc.JobID = mockJob.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = structs.AllocName(mockJob.ID, mockJob.TaskGroups[0].Name, uint(i))
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
|
|
|
|
// Generate a job modification which will force a destructive update as
|
|
// well as a scaling.
|
|
mockJob2 := mockJob.Copy()
|
|
mockJob2.Version++
|
|
mockJob2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
mockJob2.TaskGroups[0].Count++
|
|
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
|
|
|
|
nextRaftIndex := testHarness.NextIndex()
|
|
deploymentID := uuid.Generate()
|
|
|
|
// Upsert a canary into state, this represents the first stage of the
|
|
// deployment process and jumps us to the point where duplicate allocation
|
|
// indexes could be produced.
|
|
canaryAlloc := mock.MinAlloc()
|
|
canaryAlloc.Namespace = structs.DefaultNamespace
|
|
canaryAlloc.Job = mockJob2
|
|
canaryAlloc.JobID = mockJob2.ID
|
|
canaryAlloc.NodeID = nodes[1].ID
|
|
canaryAlloc.Name = structs.AllocName(mockJob2.ID, mockJob2.TaskGroups[0].Name, uint(0))
|
|
canaryAlloc.DeploymentID = deploymentID
|
|
canaryAlloc.ClientStatus = structs.AllocClientStatusRunning
|
|
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, nextRaftIndex, []*structs.Allocation{
|
|
canaryAlloc,
|
|
}))
|
|
|
|
// Craft our deployment object which represents the post-canary state. This
|
|
// unblocks the rest of the deployment process, where we replace the old
|
|
// job version allocations.
|
|
canaryDeployment := structs.Deployment{
|
|
ID: deploymentID,
|
|
Namespace: mockJob2.Namespace,
|
|
JobID: mockJob2.ID,
|
|
JobVersion: mockJob2.Version,
|
|
TaskGroups: map[string]*structs.DeploymentState{
|
|
mockJob2.TaskGroups[0].Name: {
|
|
Promoted: true,
|
|
DesiredTotal: 4,
|
|
HealthyAllocs: 1,
|
|
PlacedAllocs: 1,
|
|
PlacedCanaries: []string{canaryAlloc.ID},
|
|
},
|
|
},
|
|
Status: structs.DeploymentStatusRunning,
|
|
StatusDescription: structs.DeploymentStatusDescriptionRunning,
|
|
EvalPriority: 50,
|
|
JobCreateIndex: mockJob2.CreateIndex,
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertDeployment(nextRaftIndex, &canaryDeployment))
|
|
|
|
// Create a mock evaluation which represents work to reconcile the job
|
|
// update.
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: mockJob2.ID,
|
|
Status: structs.EvalStatusPending,
|
|
DeploymentID: deploymentID,
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation and ensure we get a single plan as a result.
|
|
must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 1, testHarness.Plans)
|
|
|
|
// Iterate and track the node allocations to ensure we have the correct
|
|
// amount, and that there a now no duplicate names. Before the duplicate
|
|
// allocation name fix, this section of testing would fail.
|
|
totalNodeAllocations := 0
|
|
allocIndexNames := map[string]int{canaryAlloc.Name: 1}
|
|
|
|
for _, planNodeAlloc := range testHarness.Plans[0].NodeAllocation {
|
|
for _, nodeAlloc := range planNodeAlloc {
|
|
totalNodeAllocations++
|
|
allocIndexNames[nodeAlloc.Name]++
|
|
|
|
if val, ok := allocIndexNames[nodeAlloc.Name]; ok && val > 1 {
|
|
t.Fatalf("found duplicate alloc name %q found", nodeAlloc.Name)
|
|
}
|
|
}
|
|
}
|
|
must.Eq(t, 3, totalNodeAllocations)
|
|
|
|
// Ensure the correct number of destructive node updates.
|
|
totalNodeUpdates := 0
|
|
|
|
for _, planNodeUpdate := range testHarness.Plans[0].NodeUpdate {
|
|
totalNodeUpdates += len(planNodeUpdate)
|
|
}
|
|
must.Eq(t, 3, totalNodeUpdates)
|
|
|
|
testHarness.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_ExistingDuplicateAllocIndexNonDestructive(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
testHarness := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
mockJob := mock.MinJob()
|
|
mockJob.TaskGroups[0].Count = 10
|
|
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
|
|
|
|
// Generate some allocations which will represent our pre-existing
|
|
// allocations. These have aggressive duplicate names.
|
|
var (
|
|
allocs []*structs.Allocation
|
|
allocIDs []string
|
|
)
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.MinAlloc()
|
|
alloc.Namespace = structs.DefaultNamespace
|
|
alloc.Job = mockJob
|
|
alloc.JobID = mockJob.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
|
|
if i%2 == 0 {
|
|
alloc.Name = "my-job.web[0]"
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
allocIDs = append(allocIDs, alloc.ID)
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
|
|
|
|
// Generate a job modification which will be an in-place update.
|
|
mockJob2 := mockJob.Copy()
|
|
mockJob2.ID = mockJob.ID
|
|
mockJob2.Update.MaxParallel = 2
|
|
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
|
|
|
|
// Create a mock evaluation which represents work to reconcile the job
|
|
// update.
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: mockJob2.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation and ensure we get a single plan as a result.
|
|
must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 1, testHarness.Plans)
|
|
|
|
// Ensure the plan did not want to perform any destructive updates.
|
|
var nodeUpdateCount int
|
|
|
|
for _, nodeUpdateAllocs := range testHarness.Plans[0].NodeUpdate {
|
|
nodeUpdateCount += len(nodeUpdateAllocs)
|
|
}
|
|
must.Zero(t, nodeUpdateCount)
|
|
|
|
// Ensure the plan updated the existing allocs by checking the count, the
|
|
// job object, and the allocation IDs.
|
|
var (
|
|
nodeAllocationCount int
|
|
nodeAllocationIDs []string
|
|
)
|
|
|
|
for _, nodeAllocs := range testHarness.Plans[0].NodeAllocation {
|
|
nodeAllocationCount += len(nodeAllocs)
|
|
|
|
for _, nodeAlloc := range nodeAllocs {
|
|
must.Eq(t, mockJob2, nodeAlloc.Job)
|
|
nodeAllocationIDs = append(nodeAllocationIDs, nodeAlloc.ID)
|
|
}
|
|
}
|
|
must.Eq(t, 10, nodeAllocationCount)
|
|
must.SliceContainsAll(t, allocIDs, nodeAllocationIDs)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_Datacenters(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes in 3 DCs
|
|
var nodes []*structs.Node
|
|
for i := 1; i < 4; i++ {
|
|
node := mock.Node()
|
|
node.Datacenter = fmt.Sprintf("dc%d", i)
|
|
nodes = append(nodes, node)
|
|
h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 3
|
|
job.Datacenters = []string{"dc1", "dc2", "dc3"}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 3; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job to 2 DCs
|
|
job2 := job.Copy()
|
|
job2.TaskGroups[0].Count = 4
|
|
job2.Datacenters = []string{"dc1", "dc2"}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Ensure a single plan
|
|
must.SliceLen(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
must.MapLen(t, 1, plan.NodeUpdate) // alloc in DC3 gets destructive update
|
|
must.SliceLen(t, 1, plan.NodeUpdate[nodes[2].ID])
|
|
must.Eq(t, allocs[2].ID, plan.NodeUpdate[nodes[2].ID][0].ID)
|
|
|
|
must.MapLen(t, 2, plan.NodeAllocation) // only 2 eligible nodes
|
|
placed := map[string]*structs.Allocation{}
|
|
for node, placedAllocs := range plan.NodeAllocation {
|
|
must.True(t,
|
|
slices.Contains([]string{nodes[0].ID, nodes[1].ID}, node),
|
|
must.Sprint("allocation placed on ineligible node"),
|
|
)
|
|
for _, alloc := range placedAllocs {
|
|
placed[alloc.ID] = alloc
|
|
}
|
|
}
|
|
must.MapLen(t, 4, placed)
|
|
must.Eq(t, nodes[0].ID, placed[allocs[0].ID].NodeID, must.Sprint("alloc should not have moved"))
|
|
must.Eq(t, nodes[1].ID, placed[allocs[1].ID].NodeID, must.Sprint("alloc should not have moved"))
|
|
}
|
|
|
|
// Have a single node and submit a job. Increment the count such that all fit
|
|
// on the node but the node doesn't have enough resources to fit the new count +
|
|
// 1. This tests that we properly discount the resources of existing allocs.
|
|
func TestServiceSched_JobModify_IncrCount_NodeLimit(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create one node
|
|
node := mock.Node()
|
|
node.NodeResources.Cpu.CpuShares = 1000
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with one allocation
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Tasks[0].Resources.CPU = 256
|
|
job2 := job.Copy()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.AllocatedResources.Tasks["web"].Cpu.CpuShares = 256
|
|
allocs = append(allocs, alloc)
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job to count 3
|
|
job2.TaskGroups[0].Count = 3
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan didn't evicted the alloc
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
if len(update) != 0 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 3 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure the plan had no failures
|
|
if len(h.Evals) != 1 {
|
|
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
|
|
}
|
|
outEval := h.Evals[0]
|
|
if outEval == nil || len(outEval.FailedTGAllocs) != 0 {
|
|
t.Fatalf("bad: %#v", outEval)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
if len(out) != 3 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_CountZero(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
var nodes []*structs.Node
|
|
for range 10 {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// allocations w/ DesiredStatus=run that we expect to stop
|
|
var allocs []*structs.Allocation
|
|
for i := range 10 {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i))
|
|
if i%2 == 0 {
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Add a few server-terminal status allocations, these should be ignored
|
|
var terminal []*structs.Allocation
|
|
for i := range 5 {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i))
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
|
terminal = append(terminal, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), terminal))
|
|
|
|
// Update the job to be count zero
|
|
job2 := mock.Job()
|
|
job2.ID = job.ID
|
|
job2.TaskGroups[0].Count = 0
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted all non-server-terminal allocs
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
must.Eq(t, len(allocs), len(update), must.Sprintf("expected all stopped: %#v", plan))
|
|
|
|
// Ensure the plan didn't place any allocations
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Len(t, 0, planned, must.Sprintf("expected no placements: %#v", plan))
|
|
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
must.Len(t, 0, out, must.Sprintf("expected no non-terminal allocs: %#v", out))
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_Rolling(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job
|
|
job2 := mock.Job()
|
|
job2.ID = job.ID
|
|
desiredUpdates := 4
|
|
job2.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
MaxParallel: desiredUpdates,
|
|
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
|
|
MinHealthyTime: 10 * time.Second,
|
|
HealthyDeadline: 10 * time.Minute,
|
|
}
|
|
|
|
// Update the task, such that it cannot be done in-place
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted only MaxParallel
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
if len(update) != desiredUpdates {
|
|
t.Fatalf("bad: got %d; want %d: %#v", len(update), desiredUpdates, plan)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != desiredUpdates {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Check that the deployment id is attached to the eval
|
|
if h.Evals[0].DeploymentID == "" {
|
|
t.Fatalf("Eval not annotated with deployment id")
|
|
}
|
|
|
|
// Ensure a deployment was created
|
|
if plan.Deployment == nil {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
dstate, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name]
|
|
if !ok {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
if dstate.DesiredTotal != 10 && dstate.DesiredCanaries != 0 {
|
|
t.Fatalf("bad: %#v", dstate)
|
|
}
|
|
}
|
|
|
|
// This tests that the old allocation is stopped before placing.
|
|
// It is critical to test that the updated job attempts to place more
|
|
// allocations as this allows us to assert that destructive changes are done
|
|
// first.
|
|
func TestServiceSched_JobModify_Rolling_FullNode(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node and clear the reserved resources
|
|
node := mock.Node()
|
|
node.ReservedResources = nil
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a resource ask that is the same as the resources available on the
|
|
// node
|
|
cpu := node.NodeResources.Cpu.CpuShares
|
|
mem := node.NodeResources.Memory.MemoryMB
|
|
|
|
request := &structs.Resources{
|
|
CPU: int(cpu),
|
|
MemoryMB: int(mem),
|
|
}
|
|
allocated := &structs.AllocatedResources{
|
|
Tasks: map[string]*structs.AllocatedTaskResources{
|
|
"web": {
|
|
Cpu: structs.AllocatedCpuResources{
|
|
CpuShares: cpu,
|
|
},
|
|
Memory: structs.AllocatedMemoryResources{
|
|
MemoryMB: mem,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Generate a fake job with one alloc that consumes the whole node
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 1
|
|
job.TaskGroups[0].Tasks[0].Resources = request
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
alloc := mock.Alloc()
|
|
alloc.AllocatedResources = allocated
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Update the job to place more versions of the task group, drop the count
|
|
// and force destructive updates
|
|
job2 := job.Copy()
|
|
job2.TaskGroups[0].Count = 5
|
|
job2.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
MaxParallel: 5,
|
|
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
|
|
MinHealthyTime: 10 * time.Second,
|
|
HealthyDeadline: 10 * time.Minute,
|
|
}
|
|
job2.TaskGroups[0].Tasks[0].Resources = mock.Job().TaskGroups[0].Tasks[0].Resources
|
|
|
|
// Update the task, such that it cannot be done in-place
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted only MaxParallel
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
if len(update) != 1 {
|
|
t.Fatalf("bad: got %d; want %d: %#v", len(update), 1, plan)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 5 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Check that the deployment id is attached to the eval
|
|
if h.Evals[0].DeploymentID == "" {
|
|
t.Fatalf("Eval not annotated with deployment id")
|
|
}
|
|
|
|
// Ensure a deployment was created
|
|
if plan.Deployment == nil {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
dstate, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name]
|
|
if !ok {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
if dstate.DesiredTotal != 5 || dstate.DesiredCanaries != 0 {
|
|
t.Fatalf("bad: %#v", dstate)
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_JobModify_Canaries(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job
|
|
job2 := mock.Job()
|
|
job2.ID = job.ID
|
|
desiredUpdates := 2
|
|
job2.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
MaxParallel: desiredUpdates,
|
|
Canary: desiredUpdates,
|
|
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
|
|
MinHealthyTime: 10 * time.Second,
|
|
HealthyDeadline: 10 * time.Minute,
|
|
}
|
|
|
|
// Update the task, such that it cannot be done in-place
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted nothing
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
if len(update) != 0 {
|
|
t.Fatalf("bad: got %d; want %d: %#v", len(update), 0, plan)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != desiredUpdates {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
for _, canary := range planned {
|
|
if canary.DeploymentStatus == nil || !canary.DeploymentStatus.Canary {
|
|
t.Fatalf("expected canary field to be set on canary alloc %q", canary.ID)
|
|
}
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Check that the deployment id is attached to the eval
|
|
if h.Evals[0].DeploymentID == "" {
|
|
t.Fatalf("Eval not annotated with deployment id")
|
|
}
|
|
|
|
// Ensure a deployment was created
|
|
if plan.Deployment == nil {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure local state was not altered in scheduler
|
|
staleDState, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name]
|
|
must.True(t, ok)
|
|
|
|
must.Eq(t, 0, len(staleDState.PlacedCanaries))
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
// Grab the latest state
|
|
deploy, err := h.State.DeploymentByID(ws, plan.Deployment.ID)
|
|
must.NoError(t, err)
|
|
|
|
state, ok := deploy.TaskGroups[job.TaskGroups[0].Name]
|
|
must.True(t, ok)
|
|
|
|
must.Eq(t, 10, state.DesiredTotal)
|
|
must.Eq(t, desiredUpdates, state.DesiredCanaries)
|
|
|
|
// Assert the canaries were added to the placed list
|
|
must.Eq(t, desiredUpdates, len(state.PlacedCanaries))
|
|
}
|
|
|
|
func TestServiceSched_JobModify_InPlace(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and create an older deployment
|
|
job := mock.Job()
|
|
d := mock.Deployment()
|
|
d.JobID = job.ID
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d))
|
|
|
|
taskName := job.TaskGroups[0].Tasks[0].Name
|
|
|
|
adr := structs.AllocatedDeviceResource{
|
|
Type: "gpu",
|
|
Vendor: "nvidia",
|
|
Name: "1080ti",
|
|
DeviceIDs: []string{uuid.Generate()},
|
|
}
|
|
|
|
asr := structs.AllocatedSharedResources{
|
|
Ports: structs.AllocatedPorts{{Label: "http"}},
|
|
Networks: structs.Networks{{Mode: "bridge"}},
|
|
}
|
|
|
|
// Create allocs that are part of the old deployment
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.AllocForNode(nodes[i])
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DeploymentID = d.ID
|
|
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
|
|
alloc.AllocatedResources.Tasks[taskName].Devices = []*structs.AllocatedDeviceResource{&adr}
|
|
alloc.AllocatedResources.Shared = asr
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job
|
|
job2 := mock.Job()
|
|
job2.ID = job.ID
|
|
desiredUpdates := 4
|
|
job2.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
MaxParallel: desiredUpdates,
|
|
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
|
|
MinHealthyTime: 10 * time.Second,
|
|
HealthyDeadline: 10 * time.Minute,
|
|
}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan did not evict any allocs
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
if len(update) != 0 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure the plan updated the existing allocs
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
for _, p := range planned {
|
|
if p.Job != job2 {
|
|
t.Fatalf("should update job")
|
|
}
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
must.Len(t, 10, out)
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
// Verify the allocated networks and devices did not change
|
|
rp := structs.Port{Label: "admin", Value: 5000}
|
|
for _, alloc := range out {
|
|
// Verify Shared Allocared Resources Persisted
|
|
must.Eq(t, asr.Ports, alloc.AllocatedResources.Shared.Ports)
|
|
must.Eq(t, asr.Networks, alloc.AllocatedResources.Shared.Networks)
|
|
|
|
for _, resources := range alloc.AllocatedResources.Tasks {
|
|
must.Eq(t, rp, resources.Networks[0].ReservedPorts[0])
|
|
if len(resources.Devices) == 0 || reflect.DeepEqual(resources.Devices[0], adr) {
|
|
t.Fatalf("bad devices has changed: %#v", alloc)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Verify the deployment id was changed and health cleared
|
|
for _, alloc := range out {
|
|
if alloc.DeploymentID == d.ID {
|
|
t.Fatalf("bad: deployment id not cleared")
|
|
} else if alloc.DeploymentStatus != nil {
|
|
t.Fatalf("bad: deployment status not cleared")
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestServiceSched_JobModify_InPlace08 asserts that inplace updates of
|
|
// allocations created with Nomad 0.8 do not cause panics.
|
|
//
|
|
// COMPAT(0.11) - While we do not guarantee that upgrades from 0.8 -> 0.10
|
|
// (skipping 0.9) are safe, we do want to avoid panics in the scheduler which
|
|
// cause unrecoverable server outages with no chance of recovery.
|
|
//
|
|
// Safe to remove in 0.11.0 as no one should ever be trying to upgrade from 0.8
|
|
// to 0.11!
|
|
func TestServiceSched_JobModify_InPlace08(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with 0.8 allocations
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create 0.8 alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job.Copy()
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.AllocatedResources = nil // 0.8 didn't have this
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Update the job inplace
|
|
job2 := job.Copy()
|
|
|
|
job2.TaskGroups[0].Tasks[0].Services[0].Tags[0] = "newtag"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure a single plan
|
|
must.SliceLen(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan did not evict any allocs
|
|
var update []*structs.Allocation
|
|
for _, updateList := range plan.NodeUpdate {
|
|
update = append(update, updateList...)
|
|
}
|
|
must.SliceLen(t, 0, update)
|
|
|
|
// Ensure the plan updated the existing alloc
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.SliceLen(t, 1, planned)
|
|
for _, p := range planned {
|
|
must.Eq(t, job2, p.Job)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
must.SliceLen(t, 1, out)
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
newAlloc := out[0]
|
|
|
|
// Verify AllocatedResources was set
|
|
must.NotNil(t, newAlloc.AllocatedResources)
|
|
}
|
|
|
|
func TestServiceSched_JobModify_DistinctProperty(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
node.Meta["rack"] = fmt.Sprintf("rack%d", i)
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job that uses distinct property and has count higher than what is
|
|
// possible.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 11
|
|
job.Constraints = append(job.Constraints,
|
|
&structs.Constraint{
|
|
Operand: structs.ConstraintDistinctProperty,
|
|
LTarget: "${meta.rack}",
|
|
})
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
oldJob := job.Copy()
|
|
oldJob.JobModifyIndex -= 1
|
|
oldJob.TaskGroups[0].Count = 4
|
|
|
|
// Place 4 of 10
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 4; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = oldJob
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
if plan.Annotations != nil {
|
|
t.Fatalf("expected no annotations")
|
|
}
|
|
|
|
// Ensure the eval hasn't spawned blocked eval
|
|
if len(h.CreateEvals) != 1 {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
|
|
// Ensure the plan failed to alloc
|
|
outEval := h.Evals[0]
|
|
if len(outEval.FailedTGAllocs) != 1 {
|
|
t.Fatalf("bad: %+v", outEval)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", planned)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Ensure different node was used per.
|
|
used := make(map[string]struct{})
|
|
for _, alloc := range out {
|
|
if _, ok := used[alloc.NodeID]; ok {
|
|
t.Fatalf("Node collision %v", alloc.NodeID)
|
|
}
|
|
used[alloc.NodeID] = struct{}{}
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// TestServiceSched_JobModify_NodeReschedulePenalty ensures that
|
|
// a failing allocation gets rescheduled with a penalty to the old
|
|
// node, but an updated job doesn't apply the penalty.
|
|
func TestServiceSched_JobModify_NodeReschedulePenalty(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: 1,
|
|
Interval: 15 * time.Minute,
|
|
Delay: 5 * time.Second,
|
|
MaxDelay: 1 * time.Minute,
|
|
DelayFunction: "constant",
|
|
}
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
// Mark one of the allocations as failed
|
|
allocs[1].ClientStatus = structs.AllocClientStatusFailed
|
|
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now.Add(-10 * time.Second)}}
|
|
failedAlloc := allocs[1]
|
|
failedAllocID := failedAlloc.ID
|
|
successAllocID := allocs[0].ID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create and process a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure we have one plan
|
|
must.Eq(t, 1, len(h.Plans))
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Verify that one new allocation got created with its restart tracker info
|
|
must.Eq(t, 3, len(out))
|
|
var newAlloc *structs.Allocation
|
|
for _, alloc := range out {
|
|
if alloc.ID != successAllocID && alloc.ID != failedAllocID {
|
|
newAlloc = alloc
|
|
}
|
|
}
|
|
must.Eq(t, failedAllocID, newAlloc.PreviousAllocation)
|
|
must.Eq(t, 1, len(newAlloc.RescheduleTracker.Events))
|
|
must.Eq(t, failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID)
|
|
|
|
// Verify that the node-reschedule penalty was applied to the new alloc
|
|
for _, scoreMeta := range newAlloc.Metrics.ScoreMetaData {
|
|
if scoreMeta.NodeID == failedAlloc.NodeID {
|
|
must.Eq(t, -1.0, scoreMeta.Scores["node-reschedule-penalty"], must.Sprintf(
|
|
"eval to replace failed alloc missing node-reshedule-penalty: %v",
|
|
scoreMeta.Scores,
|
|
))
|
|
}
|
|
}
|
|
|
|
// Update the job, such that it cannot be done in-place
|
|
job2 := job.Copy()
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create and process a mock evaluation
|
|
eval = &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Lookup the new allocations by JobID
|
|
out, err = h.State.AllocsByJob(ws, job.Namespace, job2.ID, false)
|
|
must.NoError(t, err)
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
must.Eq(t, 2, len(out))
|
|
|
|
// No new allocs have node-reschedule-penalty
|
|
for _, alloc := range out {
|
|
must.Nil(t, alloc.RescheduleTracker)
|
|
must.NotNil(t, alloc.Metrics)
|
|
for _, scoreMeta := range alloc.Metrics.ScoreMetaData {
|
|
if scoreMeta.NodeID != failedAlloc.NodeID {
|
|
must.Eq(t, 0.0, scoreMeta.Scores["node-reschedule-penalty"], must.Sprintf(
|
|
"eval for updated job should not include node-reshedule-penalty: %v",
|
|
scoreMeta.Scores,
|
|
))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_JobDeregister_Purged(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
for _, alloc := range allocs {
|
|
h.State.UpsertJobSummary(h.NextIndex(), mock.JobSummary(alloc.JobID))
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation to deregister the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobDeregister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted all nodes
|
|
if len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"]) != len(allocs) {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure that the job field on the allocation is still populated
|
|
for _, alloc := range out {
|
|
if alloc.Job == nil {
|
|
t.Fatalf("bad: %#v", alloc)
|
|
}
|
|
}
|
|
|
|
// Ensure no remaining allocations
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
if len(out) != 0 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_JobDeregister_Stopped(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
job.Stop = true
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a summary where the queued allocs are set as we want to assert
|
|
// they get zeroed out.
|
|
summary := mock.JobSummary(job.ID)
|
|
web := summary.Summary["web"]
|
|
web.Queued = 2
|
|
must.NoError(t, h.State.UpsertJobSummary(h.NextIndex(), summary))
|
|
|
|
// Create a mock evaluation to deregister the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobDeregister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure a single plan
|
|
must.SliceLen(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted all nodes
|
|
must.SliceLen(t, len(allocs), plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"])
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure that the job field on the allocation is still populated
|
|
for _, alloc := range out {
|
|
must.NotNil(t, alloc.Job)
|
|
}
|
|
|
|
// Ensure no remaining allocations
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
must.SliceLen(t, 0, out)
|
|
|
|
// Assert the job summary is cleared out
|
|
sout, err := h.State.JobSummaryByID(ws, job.Namespace, job.ID)
|
|
must.NoError(t, err)
|
|
must.NotNil(t, sout)
|
|
must.MapContainsKey(t, sout.Summary, "web")
|
|
webOut := sout.Summary["web"]
|
|
must.Zero(t, webOut.Queued)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_NodeDown(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
cases := []struct {
|
|
name string
|
|
desired string
|
|
client string
|
|
migrate bool
|
|
reschedule bool
|
|
terminal bool
|
|
lost bool
|
|
}{
|
|
{
|
|
name: "should stop is running should be lost",
|
|
desired: structs.AllocDesiredStatusStop,
|
|
client: structs.AllocClientStatusRunning,
|
|
lost: true,
|
|
},
|
|
{
|
|
name: "should run is pending should be migrate",
|
|
desired: structs.AllocDesiredStatusRun,
|
|
client: structs.AllocClientStatusPending,
|
|
migrate: true,
|
|
},
|
|
{
|
|
name: "should run is running should be migrate",
|
|
desired: structs.AllocDesiredStatusRun,
|
|
client: structs.AllocClientStatusRunning,
|
|
migrate: true,
|
|
},
|
|
{
|
|
name: "should run is lost should be terminal",
|
|
desired: structs.AllocDesiredStatusRun,
|
|
client: structs.AllocClientStatusLost,
|
|
terminal: true,
|
|
},
|
|
{
|
|
name: "should run is complete should be terminal",
|
|
desired: structs.AllocDesiredStatusRun,
|
|
client: structs.AllocClientStatusComplete,
|
|
terminal: true,
|
|
},
|
|
{
|
|
name: "should run is failed should be rescheduled",
|
|
desired: structs.AllocDesiredStatusRun,
|
|
client: structs.AllocClientStatusFailed,
|
|
reschedule: true,
|
|
},
|
|
{
|
|
name: "should evict is running should be lost",
|
|
desired: structs.AllocDesiredStatusEvict,
|
|
client: structs.AllocClientStatusRunning,
|
|
lost: true,
|
|
},
|
|
}
|
|
|
|
for i, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
h := tests.NewHarness(t)
|
|
|
|
// Register a node
|
|
node := mock.Node()
|
|
node.Status = structs.NodeStatusDown
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
|
|
alloc.DesiredStatus = tc.desired
|
|
alloc.ClientStatus = tc.client
|
|
|
|
// Mark for migration if necessary
|
|
alloc.DesiredTransition.Migrate = pointer.Of(tc.migrate)
|
|
|
|
allocs := []*structs.Allocation{alloc}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
if tc.terminal {
|
|
must.Len(t, 0, h.Plans, must.Sprint("expected no plan"))
|
|
} else {
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected plan"))
|
|
|
|
plan := h.Plans[0]
|
|
out := plan.NodeUpdate[node.ID]
|
|
must.Len(t, 1, out)
|
|
|
|
outAlloc := out[0]
|
|
if tc.migrate {
|
|
must.NotEq(t, structs.AllocClientStatusLost, outAlloc.ClientStatus)
|
|
} else if tc.reschedule {
|
|
must.Eq(t, structs.AllocClientStatusFailed, outAlloc.ClientStatus)
|
|
} else if tc.lost {
|
|
must.Eq(t, structs.AllocClientStatusLost, outAlloc.ClientStatus)
|
|
} else {
|
|
t.Fatal("unexpected alloc update")
|
|
}
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_StopOnClientAfter(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
cases := []struct {
|
|
name string
|
|
jobSpecFn func(*structs.Job)
|
|
previousStopWhen time.Time
|
|
expectBlockedEval bool
|
|
expectUpdate bool
|
|
expectedAllocStates int
|
|
}{
|
|
{
|
|
name: "no StopOnClientAfter reschedule now",
|
|
jobSpecFn: func(job *structs.Job) {
|
|
job.TaskGroups[0].Count = 1
|
|
job.TaskGroups[0].Disconnect = &structs.DisconnectStrategy{
|
|
StopOnClientAfter: nil,
|
|
}
|
|
},
|
|
expectBlockedEval: true,
|
|
expectedAllocStates: 1,
|
|
},
|
|
{
|
|
name: "StopOnClientAfter reschedule now",
|
|
jobSpecFn: func(job *structs.Job) {
|
|
job.TaskGroups[0].Count = 1
|
|
job.TaskGroups[0].Disconnect = &structs.DisconnectStrategy{
|
|
StopOnClientAfter: pointer.Of(1 * time.Second),
|
|
}
|
|
},
|
|
previousStopWhen: time.Now().UTC().Add(-10 * time.Second),
|
|
expectBlockedEval: true,
|
|
expectedAllocStates: 2,
|
|
},
|
|
{
|
|
name: "StopOnClientAfter reschedule later",
|
|
jobSpecFn: func(job *structs.Job) {
|
|
job.TaskGroups[0].Count = 1
|
|
job.TaskGroups[0].Disconnect = &structs.DisconnectStrategy{
|
|
StopOnClientAfter: pointer.Of(1 * time.Second),
|
|
}
|
|
},
|
|
expectBlockedEval: false,
|
|
expectUpdate: true,
|
|
expectedAllocStates: 1,
|
|
},
|
|
}
|
|
|
|
for i, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
h := tests.NewHarness(t)
|
|
|
|
// Node, which is down
|
|
node := mock.Node()
|
|
node.Status = structs.NodeStatusDown
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
job := mock.Job()
|
|
|
|
tc.jobSpecFn(job)
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Alloc for the running group
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
if !tc.previousStopWhen.IsZero() {
|
|
alloc.AllocStates = []*structs.AllocState{{
|
|
Field: structs.AllocStateFieldClientStatus,
|
|
Value: structs.AllocClientStatusLost,
|
|
Time: tc.previousStopWhen,
|
|
}}
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(
|
|
structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to deal with node going down
|
|
evals := []*structs.Evaluation{{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}}
|
|
eval := evals[0]
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), evals))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
must.Eq(t, h.Evals[0].Status, structs.EvalStatusComplete)
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected a plan"))
|
|
|
|
// One followup eval created, either delayed or blocked
|
|
must.Len(t, 1, h.CreateEvals)
|
|
followupEval := h.CreateEvals[0]
|
|
must.Eq(t, eval.ID, followupEval.PreviousEval)
|
|
|
|
// Either way, no new alloc was created
|
|
allocs, err := h.State.AllocsByJob(nil, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, allocs)
|
|
must.Eq(t, alloc.ID, allocs[0].ID)
|
|
alloc = allocs[0]
|
|
|
|
// Allocations have been transitioned to lost
|
|
must.Eq(t, structs.AllocDesiredStatusStop, alloc.DesiredStatus)
|
|
must.Eq(t, structs.AllocClientStatusLost, alloc.ClientStatus)
|
|
|
|
// 1 if rescheduled, 2 for rescheduled later
|
|
test.Len(t, tc.expectedAllocStates, alloc.AllocStates)
|
|
|
|
if tc.expectBlockedEval {
|
|
must.Eq(t, structs.EvalStatusBlocked, followupEval.Status)
|
|
|
|
} else {
|
|
must.Eq(t, structs.EvalStatusPending, followupEval.Status)
|
|
must.NotEq(t, time.Time{}, followupEval.WaitUntil)
|
|
|
|
if tc.expectUpdate {
|
|
must.Len(t, 1, h.Plans[0].NodeUpdate[node.ID])
|
|
must.Eq(t, structs.AllocClientStatusLost,
|
|
h.Plans[0].NodeUpdate[node.ID][0].ClientStatus)
|
|
must.MapLen(t, 0, h.Plans[0].NodeAllocation)
|
|
} else {
|
|
must.Len(t, 0, h.Plans[0].NodeUpdate[node.ID])
|
|
must.MapLen(t, 1, h.Plans[0].NodeAllocation)
|
|
}
|
|
}
|
|
|
|
// Register a new node, leave it up, process the followup eval
|
|
node = mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(),
|
|
[]*structs.Evaluation{followupEval}))
|
|
must.NoError(t, h.Process(NewServiceScheduler, followupEval))
|
|
|
|
allocs, err = h.State.AllocsByJob(nil, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 2, allocs)
|
|
|
|
alloc2 := allocs[0]
|
|
if alloc2.ID == alloc.ID {
|
|
alloc2 = allocs[1]
|
|
}
|
|
|
|
must.Eq(t, structs.AllocClientStatusPending, alloc2.ClientStatus)
|
|
must.Eq(t, structs.AllocDesiredStatusRun, alloc2.DesiredStatus)
|
|
must.Eq(t, node.ID, alloc2.NodeID)
|
|
|
|
// No more follow-up evals
|
|
must.SliceEmpty(t, h.ReblockEvals)
|
|
must.Len(t, 1, h.CreateEvals)
|
|
must.Eq(t, h.CreateEvals[0].ID, followupEval.ID)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_NodeUpdate(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Register a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Mark some allocs as running
|
|
ws := memdb.NewWatchSet()
|
|
for i := 0; i < 4; i++ {
|
|
out, _ := h.State.AllocByID(ws, allocs[i].ID)
|
|
out.ClientStatus = structs.AllocClientStatusRunning
|
|
must.NoError(t, h.State.UpdateAllocsFromClient(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{out}))
|
|
}
|
|
|
|
// Create a mock evaluation which won't trigger any new placements
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 {
|
|
t.Fatalf("bad queued allocations: %v", h.Evals[0].QueuedAllocations)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_NodeDrain(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Register a draining node
|
|
node := mock.DrainNode()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DesiredTransition.Migrate = pointer.Of(true)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation to deal with drain
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted all allocs
|
|
if len(plan.NodeUpdate[node.ID]) != len(allocs) {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 10 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
out, _ = structs.FilterTerminalAllocs(out)
|
|
if len(out) != 10 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_NodeDrain_Down(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Register a draining node
|
|
node := mock.DrainNode()
|
|
node.Status = structs.NodeStatusDown
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Set the desired state of the allocs to stop
|
|
var stop []*structs.Allocation
|
|
for i := 0; i < 6; i++ {
|
|
newAlloc := allocs[i].Copy()
|
|
newAlloc.ClientStatus = structs.AllocDesiredStatusStop
|
|
newAlloc.DesiredTransition.Migrate = pointer.Of(true)
|
|
stop = append(stop, newAlloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), stop))
|
|
|
|
// Mark some of the allocations as running
|
|
var running []*structs.Allocation
|
|
for i := 4; i < 6; i++ {
|
|
newAlloc := stop[i].Copy()
|
|
newAlloc.ClientStatus = structs.AllocClientStatusRunning
|
|
running = append(running, newAlloc)
|
|
}
|
|
must.NoError(t, h.State.UpdateAllocsFromClient(structs.MsgTypeTestSetup, h.NextIndex(), running))
|
|
|
|
// Mark some of the allocations as complete
|
|
var complete []*structs.Allocation
|
|
for i := 6; i < 10; i++ {
|
|
newAlloc := allocs[i].Copy()
|
|
newAlloc.TaskStates = make(map[string]*structs.TaskState)
|
|
newAlloc.TaskStates["web"] = &structs.TaskState{
|
|
State: structs.TaskStateDead,
|
|
Events: []*structs.TaskEvent{
|
|
{
|
|
Type: structs.TaskTerminated,
|
|
ExitCode: 0,
|
|
},
|
|
},
|
|
}
|
|
newAlloc.ClientStatus = structs.AllocClientStatusComplete
|
|
complete = append(complete, newAlloc)
|
|
}
|
|
must.NoError(t, h.State.UpdateAllocsFromClient(structs.MsgTypeTestSetup, h.NextIndex(), complete))
|
|
|
|
// Create a mock evaluation to deal with the node update
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted non terminal allocs
|
|
if len(plan.NodeUpdate[node.ID]) != 6 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure that all the allocations which were in running or pending state
|
|
// has been marked as lost
|
|
var lostAllocs []string
|
|
for _, alloc := range plan.NodeUpdate[node.ID] {
|
|
lostAllocs = append(lostAllocs, alloc.ID)
|
|
}
|
|
sort.Strings(lostAllocs)
|
|
|
|
var expectedLostAllocs []string
|
|
for i := 0; i < 6; i++ {
|
|
expectedLostAllocs = append(expectedLostAllocs, allocs[i].ID)
|
|
}
|
|
sort.Strings(expectedLostAllocs)
|
|
|
|
if !reflect.DeepEqual(expectedLostAllocs, lostAllocs) {
|
|
t.Fatalf("expected: %v, actual: %v", expectedLostAllocs, lostAllocs)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestServiceSched_NodeDrain_Canaries(t *testing.T) {
|
|
ci.Parallel(t)
|
|
h := tests.NewHarness(t)
|
|
|
|
node := mock.Node()
|
|
drainedNode := mock.DrainNode()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), drainedNode))
|
|
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].Update = &structs.UpdateStrategy{Canary: 2}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// previous version allocations
|
|
var allocs []*structs.Allocation
|
|
for i := range 2 {
|
|
alloc := mock.MinAllocForJob(job)
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
t.Logf("prev alloc=%q", alloc.ID)
|
|
}
|
|
|
|
// canaries on draining node
|
|
job = job.Copy()
|
|
job.Meta["owner"] = "changed"
|
|
job.Version++
|
|
var canaries []string
|
|
|
|
for i := range 2 {
|
|
alloc := mock.MinAllocForJob(job)
|
|
alloc.NodeID = drainedNode.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(false),
|
|
Canary: true,
|
|
}
|
|
alloc.DesiredTransition = structs.DesiredTransition{
|
|
Migrate: pointer.Of(true),
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
canaries = append(canaries, alloc.ID)
|
|
t.Logf("canary on draining node=%q", alloc.ID)
|
|
}
|
|
|
|
deadCanary := allocs[2]
|
|
deadCanary.DesiredStatus = structs.AllocDesiredStatusStop
|
|
deadCanary.ClientStatus = structs.AllocClientStatusComplete
|
|
|
|
canaryToDrain := allocs[3]
|
|
canaryToDrain.DesiredStatus = structs.AllocDesiredStatusRun
|
|
canaryToDrain.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
// replacement canary placed from previous eval
|
|
replacement := mock.MinAllocForJob(job)
|
|
replacement.NodeID = node.ID
|
|
replacement.Name = fmt.Sprintf("my-job.web[0]")
|
|
replacement.ClientStatus = structs.AllocClientStatusRunning
|
|
replacement.PreviousAllocation = canaries[0]
|
|
replacement.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(false),
|
|
Canary: true,
|
|
}
|
|
allocs = append(allocs, replacement)
|
|
canaries = append(canaries, replacement.ID)
|
|
t.Logf("replacement canary alloc=%q", replacement.ID)
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
deployment := mock.Deployment()
|
|
deployment.JobID = job.ID
|
|
deployment.JobVersion = job.Version
|
|
deployment.JobCreateIndex = job.CreateIndex
|
|
deployment.JobSpecModifyIndex = job.JobModifyIndex
|
|
deployment.TaskGroups["web"] = &structs.DeploymentState{
|
|
AutoRevert: false,
|
|
AutoPromote: false,
|
|
Promoted: false,
|
|
PlacedCanaries: canaries,
|
|
DesiredCanaries: 2,
|
|
DesiredTotal: 2,
|
|
PlacedAllocs: 3,
|
|
HealthyAllocs: 0,
|
|
UnhealthyAllocs: 0,
|
|
}
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment))
|
|
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: drainedNode.ID,
|
|
Status: structs.EvalStatusPending,
|
|
AnnotatePlan: true,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 1, h.Plans)
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
|
|
must.MapLen(t, 1, h.Plans[0].NodeAllocation)
|
|
must.Len(t, 1, h.Plans[0].NodeAllocation[node.ID])
|
|
must.Eq(t, 1, h.Plans[0].Annotations.DesiredTGUpdates["web"].Canary)
|
|
|
|
must.MapLen(t, 1, h.Plans[0].NodeUpdate)
|
|
must.Len(t, 1, h.Plans[0].NodeUpdate[drainedNode.ID])
|
|
must.Eq(t, canaryToDrain.ID, h.Plans[0].NodeUpdate[drainedNode.ID][0].ID)
|
|
}
|
|
|
|
func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Register a draining node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DesiredTransition.Migrate = pointer.Of(true)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
node.DrainStrategy = mock.DrainNode().DrainStrategy
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a mock evaluation to deal with drain
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
queued := h.Evals[0].QueuedAllocations["web"]
|
|
if queued != 2 {
|
|
t.Fatalf("expected: %v, actual: %v", 2, queued)
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_RetryLimit(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
h.Planner = &tests.RejectPlan{h}
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure multiple plans
|
|
if len(h.Plans) == 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure no allocations placed
|
|
if len(out) != 0 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Should hit the retry limit
|
|
h.AssertEvalStatus(t, structs.EvalStatusFailed)
|
|
}
|
|
|
|
func TestServiceSched_Reschedule_OnceNow(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: 1,
|
|
Interval: 15 * time.Minute,
|
|
Delay: 5 * time.Second,
|
|
MaxDelay: 1 * time.Minute,
|
|
DelayFunction: "constant",
|
|
}
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
// Mark one of the allocations as failed
|
|
allocs[1].ClientStatus = structs.AllocClientStatusFailed
|
|
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now.Add(-10 * time.Second)}}
|
|
failedAllocID := allocs[1].ID
|
|
successAllocID := allocs[0].ID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure multiple plans
|
|
if len(h.Plans) == 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Verify that one new allocation got created with its restart tracker info
|
|
must.Eq(t, 3, len(out))
|
|
var newAlloc *structs.Allocation
|
|
for _, alloc := range out {
|
|
if alloc.ID != successAllocID && alloc.ID != failedAllocID {
|
|
newAlloc = alloc
|
|
}
|
|
}
|
|
must.Eq(t, failedAllocID, newAlloc.PreviousAllocation)
|
|
must.Eq(t, 1, len(newAlloc.RescheduleTracker.Events))
|
|
must.Eq(t, failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID)
|
|
|
|
// Mark this alloc as failed again, should not get rescheduled
|
|
newAlloc.ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{newAlloc}))
|
|
|
|
// Create another mock evaluation
|
|
eval = &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err = h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
// Verify no new allocs were created this time
|
|
out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Eq(t, 3, len(out))
|
|
|
|
}
|
|
|
|
// Tests that alloc reschedulable at a future time creates a follow up eval
|
|
func TestServiceSched_Reschedule_Later(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
delayDuration := 15 * time.Second
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: 1,
|
|
Interval: 15 * time.Minute,
|
|
Delay: delayDuration,
|
|
MaxDelay: 1 * time.Minute,
|
|
DelayFunction: "constant",
|
|
}
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
// Mark one of the allocations as failed
|
|
allocs[1].ClientStatus = structs.AllocClientStatusFailed
|
|
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now}}
|
|
failedAllocID := allocs[1].ID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure multiple plans
|
|
if len(h.Plans) == 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Verify no new allocs were created
|
|
must.Eq(t, 2, len(out))
|
|
|
|
// Verify follow up eval was created for the failed alloc
|
|
alloc, err := h.State.AllocByID(ws, failedAllocID)
|
|
must.NoError(t, err)
|
|
must.NotEq(t, "", alloc.FollowupEvalID)
|
|
|
|
// Ensure there is a follow up eval.
|
|
if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusPending {
|
|
t.Fatalf("bad: %#v", h.CreateEvals)
|
|
}
|
|
followupEval := h.CreateEvals[0]
|
|
must.Eq(t, now.Add(delayDuration), followupEval.WaitUntil)
|
|
}
|
|
|
|
func TestServiceSched_Reschedule_MultipleNow(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
maxRestartAttempts := 3
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: maxRestartAttempts,
|
|
Interval: 30 * time.Minute,
|
|
Delay: 5 * time.Second,
|
|
DelayFunction: "constant",
|
|
}
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
// Mark one of the allocations as failed
|
|
allocs[1].ClientStatus = structs.AllocClientStatusFailed
|
|
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now.Add(-10 * time.Second)}}
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
expectedNumAllocs := 3
|
|
expectedNumReschedTrackers := 1
|
|
|
|
failedAllocId := allocs[1].ID
|
|
failedNodeID := allocs[1].NodeID
|
|
|
|
for i := 0; i < maxRestartAttempts; i++ {
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure multiple plans
|
|
if len(h.Plans) == 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Verify that a new allocation got created with its restart tracker info
|
|
must.Eq(t, expectedNumAllocs, len(out))
|
|
|
|
// Find the new alloc with ClientStatusPending
|
|
var pendingAllocs []*structs.Allocation
|
|
var prevFailedAlloc *structs.Allocation
|
|
|
|
for _, alloc := range out {
|
|
if alloc.ClientStatus == structs.AllocClientStatusPending {
|
|
pendingAllocs = append(pendingAllocs, alloc)
|
|
}
|
|
if alloc.ID == failedAllocId {
|
|
prevFailedAlloc = alloc
|
|
}
|
|
}
|
|
must.Eq(t, 1, len(pendingAllocs))
|
|
newAlloc := pendingAllocs[0]
|
|
must.Eq(t, expectedNumReschedTrackers, len(newAlloc.RescheduleTracker.Events))
|
|
|
|
// Verify the previous NodeID in the most recent reschedule event
|
|
reschedEvents := newAlloc.RescheduleTracker.Events
|
|
must.Eq(t, failedAllocId, reschedEvents[len(reschedEvents)-1].PrevAllocID)
|
|
must.Eq(t, failedNodeID, reschedEvents[len(reschedEvents)-1].PrevNodeID)
|
|
|
|
// Verify that the next alloc of the failed alloc is the newly rescheduled alloc
|
|
must.Eq(t, newAlloc.ID, prevFailedAlloc.NextAllocation)
|
|
|
|
// Mark this alloc as failed again
|
|
newAlloc.ClientStatus = structs.AllocClientStatusFailed
|
|
newAlloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-12 * time.Second),
|
|
FinishedAt: now.Add(-10 * time.Second)}}
|
|
|
|
failedAllocId = newAlloc.ID
|
|
failedNodeID = newAlloc.NodeID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{newAlloc}))
|
|
|
|
// Create another mock evaluation
|
|
eval = &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
expectedNumAllocs += 1
|
|
expectedNumReschedTrackers += 1
|
|
}
|
|
|
|
// Process last eval again, should not reschedule
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Verify no new allocs were created because restart attempts were exhausted
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Eq(t, 5, len(out)) // 2 original, plus 3 reschedule attempts
|
|
}
|
|
|
|
func TestServiceSched_BlockedReschedule(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with a newly-failed allocation and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 1
|
|
delayDuration := 15 * time.Second
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: 3,
|
|
Interval: 15 * time.Minute,
|
|
Delay: delayDuration,
|
|
MaxDelay: 1 * time.Minute,
|
|
DelayFunction: "constant",
|
|
}
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now}}
|
|
failedAllocID := alloc.ID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation for the allocation failure
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerRetryFailedAlloc,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// -----------------------------------
|
|
// first reschedule which works with delay as expected
|
|
|
|
// Process the evaluation and assert we have a plan
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 1, h.Plans)
|
|
must.MapLen(t, 0, h.Plans[0].NodeUpdate) // no stop
|
|
must.MapLen(t, 1, h.Plans[0].NodeAllocation) // ignore but update with follow-up eval
|
|
|
|
// Lookup the allocations by JobID and verify no new allocs created
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, out)
|
|
|
|
// Verify follow-up eval was created for the failed alloc
|
|
// and write the eval to the state store
|
|
alloc, err = h.State.AllocByID(ws, failedAllocID)
|
|
must.NoError(t, err)
|
|
must.NotEq(t, "", alloc.FollowupEvalID)
|
|
must.Len(t, 1, h.CreateEvals)
|
|
followupEval := h.CreateEvals[0]
|
|
must.Eq(t, structs.EvalStatusPending, followupEval.Status)
|
|
must.Eq(t, now.Add(delayDuration), followupEval.WaitUntil)
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{followupEval}))
|
|
|
|
// Follow-up delay "expires", so process the follow-up eval, which results
|
|
// in a replacement and stop
|
|
must.NoError(t, h.Process(NewServiceScheduler, followupEval))
|
|
must.Len(t, 2, h.Plans)
|
|
must.MapLen(t, 1, h.Plans[1].NodeUpdate) // stop original
|
|
must.MapLen(t, 1, h.Plans[1].NodeAllocation) // place new
|
|
|
|
out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 2, out)
|
|
|
|
var replacementAllocID string
|
|
for _, alloc := range out {
|
|
if alloc.ID != failedAllocID {
|
|
must.NotNil(t, alloc.RescheduleTracker,
|
|
must.Sprint("replacement alloc should have reschedule tracker"))
|
|
must.Len(t, 1, alloc.RescheduleTracker.Events)
|
|
replacementAllocID = alloc.ID
|
|
break
|
|
}
|
|
}
|
|
|
|
// -----------------------------------
|
|
// Replacement alloc fails, second reschedule but it blocks because of delay
|
|
|
|
alloc, err = h.State.AllocByID(ws, replacementAllocID)
|
|
must.NoError(t, err)
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now}}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation for the allocation failure
|
|
eval.ID = uuid.Generate()
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation and assert we have a plan
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 3, h.Plans)
|
|
must.MapLen(t, 0, h.Plans[2].NodeUpdate) // stop
|
|
must.MapLen(t, 1, h.Plans[2].NodeAllocation) // place
|
|
|
|
// Lookup the allocations by JobID and verify no new allocs created
|
|
out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 2, out)
|
|
|
|
// Verify follow-up eval was created for the failed alloc
|
|
// and write the eval to the state store
|
|
alloc, err = h.State.AllocByID(ws, replacementAllocID)
|
|
must.NoError(t, err)
|
|
must.NotEq(t, "", alloc.FollowupEvalID)
|
|
must.Len(t, 2, h.CreateEvals)
|
|
followupEval = h.CreateEvals[1]
|
|
must.Eq(t, structs.EvalStatusPending, followupEval.Status)
|
|
must.Eq(t, now.Add(delayDuration), followupEval.WaitUntil)
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{followupEval}))
|
|
|
|
// "use up" resources on the node so the follow-up will block
|
|
node.NodeResources.Memory.MemoryMB = 200
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Process the follow-up eval, which results in a stop but not a replacement
|
|
must.NoError(t, h.Process(NewServiceScheduler, followupEval))
|
|
must.Len(t, 4, h.Plans)
|
|
must.MapLen(t, 1, h.Plans[3].NodeUpdate) // stop
|
|
must.MapLen(t, 0, h.Plans[3].NodeAllocation) // place
|
|
|
|
out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 2, out)
|
|
|
|
// Verify blocked eval was created and write it to state
|
|
must.Len(t, 3, h.CreateEvals)
|
|
blockedEval := h.CreateEvals[2]
|
|
must.Eq(t, structs.EvalTriggerQueuedAllocs, blockedEval.TriggeredBy)
|
|
must.Eq(t, structs.EvalStatusBlocked, blockedEval.Status)
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{blockedEval}))
|
|
|
|
// "free up" resources on the node so the blocked eval will succeed
|
|
node.NodeResources.Memory.MemoryMB = 8000
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// if we process the blocked eval, the task state of the replacement alloc
|
|
// will not be old enough to be rescheduled yet and we'll get a no-op
|
|
must.NoError(t, h.Process(NewServiceScheduler, blockedEval))
|
|
must.Len(t, 4, h.Plans, must.Sprint("expected no new plan"))
|
|
|
|
// bypass the timer check by setting the alloc's follow-up eval ID to be the
|
|
// blocked eval
|
|
alloc, err = h.State.AllocByID(ws, replacementAllocID)
|
|
must.NoError(t, err)
|
|
alloc = alloc.Copy()
|
|
alloc.FollowupEvalID = blockedEval.ID
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
must.NoError(t, h.Process(NewServiceScheduler, blockedEval))
|
|
must.Len(t, 5, h.Plans)
|
|
must.MapLen(t, 1, h.Plans[4].NodeUpdate) // stop
|
|
must.MapLen(t, 1, h.Plans[4].NodeAllocation) // place
|
|
|
|
out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 3, out)
|
|
|
|
for _, alloc := range out {
|
|
if alloc.ID != failedAllocID && alloc.ID != replacementAllocID {
|
|
must.NotNil(t, alloc.RescheduleTracker,
|
|
must.Sprint("replacement alloc should have reschedule tracker"))
|
|
must.Len(t, 2, alloc.RescheduleTracker.Events)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_BlockedDisconnectReplace(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
node := mock.Node()
|
|
node.Status = structs.NodeStatusDisconnected
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Generate a fake job with a newly-disconnected allocation with a disconnect policy
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 1
|
|
replaceDelayDuration := 15 * time.Second
|
|
lostAfterDuration := 12 * time.Hour
|
|
job.TaskGroups[0].Disconnect = &structs.DisconnectStrategy{
|
|
LostAfter: lostAfterDuration,
|
|
Replace: pointer.Of(true),
|
|
Reconcile: "best_score",
|
|
}
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: 3,
|
|
Interval: 15 * time.Minute,
|
|
Delay: replaceDelayDuration,
|
|
MaxDelay: 1 * time.Minute,
|
|
DelayFunction: "constant",
|
|
}
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now().UTC()
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now}}
|
|
disconnectedAllocID := alloc.ID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation for the allocation failure
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerRetryFailedAlloc,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// -----------------------------------
|
|
// first eval should result in 1 delayed eval for the lost_after and 1
|
|
// blocked eval for the rescheduyle
|
|
|
|
// Process the evaluation and assert we have a plan
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
must.Len(t, 1, h.Plans)
|
|
must.MapLen(t, 0, h.Plans[0].NodeUpdate) // no stop
|
|
must.MapLen(t, 1, h.Plans[0].NodeAllocation) // ignore but update with follow-up eval
|
|
|
|
// Lookup the allocations by JobID and verify no new allocs created
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, out)
|
|
|
|
// Verify follow-up eval was created for the failed alloc
|
|
// and write the eval to the state store
|
|
alloc, err = h.State.AllocByID(ws, disconnectedAllocID)
|
|
must.NoError(t, err)
|
|
must.NotEq(t, "", alloc.FollowupEvalID)
|
|
must.Len(t, 2, h.CreateEvals)
|
|
followupEval0 := h.CreateEvals[0]
|
|
followupEval1 := h.CreateEvals[1]
|
|
|
|
must.Eq(t, structs.EvalStatusBlocked, followupEval0.Status)
|
|
must.Eq(t, structs.EvalStatusPending, followupEval1.Status)
|
|
|
|
// TODO: the top-level scheduler calls its own time.Now not shared with the
|
|
// reconciler
|
|
// must.Eq(t, now.Add(lostAfterDuration), followupEval1.WaitUntil)
|
|
must.True(t, followupEval1.WaitUntil.After(now.Add(lostAfterDuration)))
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{followupEval0, followupEval1}))
|
|
}
|
|
|
|
// Tests that old reschedule attempts are pruned
|
|
func TestServiceSched_Reschedule_PruneEvents(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and an update policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
DelayFunction: "exponential",
|
|
MaxDelay: 1 * time.Hour,
|
|
Delay: 5 * time.Second,
|
|
Unlimited: true,
|
|
}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
now := time.Now()
|
|
// Mark allocations as failed with restart info
|
|
allocs[1].TaskStates = map[string]*structs.TaskState{job.TaskGroups[0].Name: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now.Add(-15 * time.Minute)}}
|
|
allocs[1].ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
allocs[1].RescheduleTracker = &structs.RescheduleTracker{
|
|
Events: []*structs.RescheduleEvent{
|
|
{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
|
|
PrevAllocID: uuid.Generate(),
|
|
PrevNodeID: uuid.Generate(),
|
|
Delay: 5 * time.Second,
|
|
},
|
|
{RescheduleTime: now.Add(-40 * time.Minute).UTC().UnixNano(),
|
|
PrevAllocID: allocs[0].ID,
|
|
PrevNodeID: uuid.Generate(),
|
|
Delay: 10 * time.Second,
|
|
},
|
|
{RescheduleTime: now.Add(-30 * time.Minute).UTC().UnixNano(),
|
|
PrevAllocID: allocs[0].ID,
|
|
PrevNodeID: uuid.Generate(),
|
|
Delay: 20 * time.Second,
|
|
},
|
|
{RescheduleTime: now.Add(-20 * time.Minute).UTC().UnixNano(),
|
|
PrevAllocID: allocs[0].ID,
|
|
PrevNodeID: uuid.Generate(),
|
|
Delay: 40 * time.Second,
|
|
},
|
|
{RescheduleTime: now.Add(-10 * time.Minute).UTC().UnixNano(),
|
|
PrevAllocID: allocs[0].ID,
|
|
PrevNodeID: uuid.Generate(),
|
|
Delay: 80 * time.Second,
|
|
},
|
|
{RescheduleTime: now.Add(-3 * time.Minute).UTC().UnixNano(),
|
|
PrevAllocID: allocs[0].ID,
|
|
PrevNodeID: uuid.Generate(),
|
|
Delay: 160 * time.Second,
|
|
},
|
|
},
|
|
}
|
|
expectedFirstRescheduleEvent := allocs[1].RescheduleTracker.Events[1]
|
|
expectedDelay := 320 * time.Second
|
|
failedAllocID := allocs[1].ID
|
|
successAllocID := allocs[0].ID
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure multiple plans
|
|
if len(h.Plans) == 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Verify that one new allocation got created with its restart tracker info
|
|
must.Eq(t, 3, len(out))
|
|
var newAlloc *structs.Allocation
|
|
for _, alloc := range out {
|
|
if alloc.ID != successAllocID && alloc.ID != failedAllocID {
|
|
newAlloc = alloc
|
|
}
|
|
}
|
|
|
|
must.Eq(t, failedAllocID, newAlloc.PreviousAllocation)
|
|
// Verify that the new alloc copied the last 5 reschedule attempts
|
|
must.Eq(t, 6, len(newAlloc.RescheduleTracker.Events))
|
|
must.Eq(t, expectedFirstRescheduleEvent, newAlloc.RescheduleTracker.Events[0])
|
|
|
|
mostRecentRescheduleEvent := newAlloc.RescheduleTracker.Events[5]
|
|
// Verify that the failed alloc ID is in the most recent reschedule event
|
|
must.Eq(t, failedAllocID, mostRecentRescheduleEvent.PrevAllocID)
|
|
// Verify that the delay value was captured correctly
|
|
must.Eq(t, expectedDelay, mostRecentRescheduleEvent.Delay)
|
|
|
|
}
|
|
|
|
// Tests that deployments with failed allocs result in placements as long as the
|
|
// deployment is running.
|
|
func TestDeployment_FailedAllocs_Reschedule(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
for _, failedDeployment := range []bool{false, true} {
|
|
t.Run(fmt.Sprintf("Failed Deployment: %v", failedDeployment), func(t *testing.T) {
|
|
h := tests.NewHarness(t)
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations and a reschedule policy.
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
Attempts: 1,
|
|
Interval: 15 * time.Minute,
|
|
}
|
|
jobIndex := h.NextIndex()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, jobIndex, nil, job))
|
|
|
|
deployment := mock.Deployment()
|
|
deployment.JobID = job.ID
|
|
deployment.JobCreateIndex = jobIndex
|
|
deployment.JobVersion = job.Version
|
|
if failedDeployment {
|
|
deployment.Status = structs.DeploymentStatusFailed
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DeploymentID = deployment.ID
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
// Mark one of the allocations as failed in the past
|
|
allocs[1].ClientStatus = structs.AllocClientStatusFailed
|
|
allocs[1].TaskStates = map[string]*structs.TaskState{"web": {State: "start",
|
|
StartedAt: time.Now().Add(-12 * time.Hour),
|
|
FinishedAt: time.Now().Add(-10 * time.Hour)}}
|
|
allocs[1].DesiredTransition.Reschedule = pointer.Of(true)
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
if failedDeployment {
|
|
// Verify no plan created
|
|
must.Len(t, 0, h.Plans)
|
|
} else {
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 1 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestBatchSched_Run_CompleteAlloc(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a complete alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure no plan as it should be a no-op
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure no allocations placed
|
|
if len(out) != 1 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestBatchSched_Run_FailedAlloc(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
// Create a failed alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now.Add(-10 * time.Second)}}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure a replacement alloc was placed.
|
|
if len(out) != 2 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Ensure that the scheduler is recording the correct number of queued
|
|
// allocations
|
|
queued := h.Evals[0].QueuedAllocations["web"]
|
|
if queued != 0 {
|
|
t.Fatalf("expected: %v, actual: %v", 1, queued)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestBatchSched_Run_LostAlloc(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.ID = "my-job"
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 3
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Desired = 3
|
|
// Mark one as lost and then schedule
|
|
// [(0, run, running), (1, run, running), (1, stop, lost)]
|
|
|
|
// Create two running allocations
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i <= 1; i++ {
|
|
alloc := mock.AllocForNodeWithoutReservedPort(node)
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
|
|
// Create a failed alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[1]"
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
allocs = append(allocs, alloc)
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure a replacement alloc was placed.
|
|
if len(out) != 4 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
// Assert that we have the correct number of each alloc name
|
|
expected := map[string]int{
|
|
"my-job.web[0]": 1,
|
|
"my-job.web[1]": 2,
|
|
"my-job.web[2]": 1,
|
|
}
|
|
actual := make(map[string]int, 3)
|
|
for _, alloc := range out {
|
|
actual[alloc.Name] += 1
|
|
}
|
|
must.Eq(t, expected, actual)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestBatchSched_Run_FailedAllocQueuedAllocations(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
node := mock.DrainNode()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
tgName := job.TaskGroups[0].Name
|
|
now := time.Now()
|
|
|
|
// Create a failed alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead",
|
|
StartedAt: now.Add(-1 * time.Hour),
|
|
FinishedAt: now.Add(-10 * time.Second)}}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure that the scheduler is recording the correct number of queued
|
|
// allocations
|
|
queued := h.Evals[0].QueuedAllocations["web"]
|
|
if queued != 1 {
|
|
t.Fatalf("expected: %v, actual: %v", 1, queued)
|
|
}
|
|
}
|
|
|
|
func TestBatchSched_ReRun_SuccessfullyFinishedAlloc(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create two nodes, one that is drained and has a successfully finished
|
|
// alloc and a fresh undrained one
|
|
node := mock.DrainNode()
|
|
node2 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a successful alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
alloc.TaskStates = map[string]*structs.TaskState{
|
|
"web": {
|
|
State: structs.TaskStateDead,
|
|
Events: []*structs.TaskEvent{
|
|
{
|
|
Type: structs.TaskTerminated,
|
|
ExitCode: 0,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to rerun the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure no replacement alloc was placed.
|
|
if len(out) != 1 {
|
|
t.Fatalf("bad: %#v", out)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// This test checks that terminal allocations that receive an in-place updated
|
|
// are not added to the plan
|
|
func TestBatchSched_JobModify_InPlace_Terminal(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation to trigger the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans[0])
|
|
}
|
|
}
|
|
|
|
// This test ensures that terminal jobs from older versions are ignored.
|
|
func TestBatchSched_JobModify_Destructive_Terminal(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
var nodes []*structs.Node
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
nodes = append(nodes, node)
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Generate a fake job with allocations
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job
|
|
job2 := mock.Job()
|
|
job2.ID = job.ID
|
|
job2.Type = structs.JobTypeBatch
|
|
job2.Version++
|
|
job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
allocs = nil
|
|
for i := 0; i < 10; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job2
|
|
alloc.JobID = job2.ID
|
|
alloc.NodeID = nodes[i].ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
alloc.TaskStates = map[string]*structs.TaskState{
|
|
"web": {
|
|
State: structs.TaskStateDead,
|
|
Events: []*structs.TaskEvent{
|
|
{
|
|
Type: structs.TaskTerminated,
|
|
ExitCode: 0,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a mock evaluation to deal with drain
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
}
|
|
|
|
// This test asserts that an allocation from an old job that is running on a
|
|
// drained node is cleaned up.
|
|
func TestBatchSched_NodeDrain_Running_OldJob(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create two nodes, one that is drained and has a successfully finished
|
|
// alloc and a fresh undrained one
|
|
node := mock.DrainNode()
|
|
node2 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a running alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create an update job
|
|
job2 := job.Copy()
|
|
job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"}
|
|
job2.Version++
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted 1
|
|
if len(plan.NodeUpdate[node.ID]) != 1 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
// Ensure the plan places 1
|
|
if len(plan.NodeAllocation[node2.ID]) != 1 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// This test asserts that an allocation from a job that is complete on a
|
|
// drained node is ignored up.
|
|
func TestBatchSched_NodeDrain_Complete(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create two nodes, one that is drained and has a successfully finished
|
|
// alloc and a fresh undrained one
|
|
node := mock.DrainNode()
|
|
node2 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a complete alloc
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
alloc.TaskStates = make(map[string]*structs.TaskState)
|
|
alloc.TaskStates["web"] = &structs.TaskState{
|
|
State: structs.TaskStateDead,
|
|
Events: []*structs.TaskEvent{
|
|
{
|
|
Type: structs.TaskTerminated,
|
|
ExitCode: 0,
|
|
},
|
|
},
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure no plan
|
|
if len(h.Plans) != 0 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// This is a slightly odd test but it ensures that we handle a scale down of a
|
|
// task group's count and that it works even if all the allocs have the same
|
|
// name.
|
|
func TestBatchSched_ScaleDown_SameName(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.Type = structs.JobTypeBatch
|
|
job.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
scoreMetric := &structs.AllocMetric{
|
|
NodesEvaluated: 10,
|
|
NodesFiltered: 3,
|
|
ScoreMetaData: []*structs.NodeScoreMeta{
|
|
{
|
|
NodeID: node.ID,
|
|
Scores: map[string]float64{
|
|
"bin-packing": 0.5435,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
// Create a few running alloc
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 5; i++ {
|
|
alloc := mock.AllocForNodeWithoutReservedPort(node)
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
alloc.Metrics = scoreMetric
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Update the job's modify index to force an inplace upgrade
|
|
updatedJob := job.Copy()
|
|
updatedJob.JobModifyIndex = job.JobModifyIndex + 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, updatedJob))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewBatchScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted 4 of the 5
|
|
must.Eq(t, 4, len(plan.NodeUpdate[node.ID]))
|
|
|
|
// Ensure that the scheduler did not overwrite the original score metrics for the i
|
|
for _, inPlaceAllocs := range plan.NodeAllocation {
|
|
for _, alloc := range inPlaceAllocs {
|
|
must.Eq(t, scoreMetric, alloc.Metrics)
|
|
}
|
|
}
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestGenericSched_AllocFit_Lifecycle(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
testCases := []struct {
|
|
Name string
|
|
NodeCpu int
|
|
TaskResources structs.Resources
|
|
MainTaskCount int
|
|
InitTaskCount int
|
|
SideTaskCount int
|
|
ShouldPlaceAlloc bool
|
|
}{
|
|
{
|
|
Name: "simple init + sidecar",
|
|
NodeCpu: 1200,
|
|
TaskResources: structs.Resources{
|
|
CPU: 500,
|
|
MemoryMB: 256,
|
|
},
|
|
MainTaskCount: 1,
|
|
InitTaskCount: 1,
|
|
SideTaskCount: 1,
|
|
ShouldPlaceAlloc: true,
|
|
},
|
|
{
|
|
Name: "too big init + sidecar",
|
|
NodeCpu: 1200,
|
|
TaskResources: structs.Resources{
|
|
CPU: 700,
|
|
MemoryMB: 256,
|
|
},
|
|
MainTaskCount: 1,
|
|
InitTaskCount: 1,
|
|
SideTaskCount: 1,
|
|
ShouldPlaceAlloc: false,
|
|
},
|
|
{
|
|
Name: "many init + sidecar",
|
|
NodeCpu: 1200,
|
|
TaskResources: structs.Resources{
|
|
CPU: 100,
|
|
MemoryMB: 100,
|
|
},
|
|
MainTaskCount: 3,
|
|
InitTaskCount: 5,
|
|
SideTaskCount: 5,
|
|
ShouldPlaceAlloc: true,
|
|
},
|
|
{
|
|
Name: "too many init + sidecar",
|
|
NodeCpu: 1200,
|
|
TaskResources: structs.Resources{
|
|
CPU: 100,
|
|
MemoryMB: 100,
|
|
},
|
|
MainTaskCount: 10,
|
|
InitTaskCount: 10,
|
|
SideTaskCount: 10,
|
|
ShouldPlaceAlloc: false,
|
|
},
|
|
{
|
|
Name: "too many too big",
|
|
NodeCpu: 1200,
|
|
TaskResources: structs.Resources{
|
|
CPU: 1000,
|
|
MemoryMB: 100,
|
|
},
|
|
MainTaskCount: 10,
|
|
InitTaskCount: 10,
|
|
SideTaskCount: 10,
|
|
ShouldPlaceAlloc: false,
|
|
},
|
|
}
|
|
for _, testCase := range testCases {
|
|
t.Run(testCase.Name, func(t *testing.T) {
|
|
h := tests.NewHarness(t)
|
|
|
|
legacyCpuResources, processorResources := tests.CpuResources(testCase.NodeCpu)
|
|
node := mock.Node()
|
|
node.NodeResources.Processors = processorResources
|
|
node.NodeResources.Cpu = legacyCpuResources
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job with sidecar & init tasks
|
|
job := mock.VariableLifecycleJob(testCase.TaskResources, testCase.MainTaskCount, testCase.InitTaskCount, testCase.SideTaskCount)
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
allocs := 0
|
|
if testCase.ShouldPlaceAlloc {
|
|
allocs = 1
|
|
}
|
|
// Ensure no plan as it should be a no-op
|
|
must.Len(t, allocs, h.Plans)
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure no allocations placed
|
|
must.Len(t, allocs, out)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestGenericSched_AllocFit_MemoryOversubscription(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
node := mock.Node()
|
|
node.NodeResources.Cpu.CpuShares = 10000
|
|
node.NodeResources.Memory.MemoryMB = 1224
|
|
node.ReservedResources.Memory.MemoryMB = 60
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 10
|
|
job.TaskGroups[0].Tasks[0].Resources.CPU = 100
|
|
job.TaskGroups[0].Tasks[0].Resources.MemoryMB = 200
|
|
job.TaskGroups[0].Tasks[0].Resources.MemoryMaxMB = 500
|
|
job.TaskGroups[0].Tasks[0].Resources.DiskMB = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// expectedAllocs should be floor((nodeResources.MemoryMB-reservedResources.MemoryMB) / job.MemoryMB)
|
|
expectedAllocs := 5
|
|
must.Len(t, 1, h.Plans)
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
must.Len(t, expectedAllocs, out)
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func TestGenericSched_ChainedAlloc(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes
|
|
for i := 0; i < 10; i++ {
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
// Process the evaluation
|
|
if err := h.Process(NewServiceScheduler, eval); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
var allocIDs []string
|
|
for _, allocList := range h.Plans[0].NodeAllocation {
|
|
for _, alloc := range allocList {
|
|
allocIDs = append(allocIDs, alloc.ID)
|
|
}
|
|
}
|
|
sort.Strings(allocIDs)
|
|
|
|
// Create a new harness to invoke the scheduler again
|
|
h1 := tests.NewHarnessWithState(t, h.State)
|
|
job1 := mock.Job()
|
|
job1.ID = job.ID
|
|
job1.TaskGroups[0].Tasks[0].Env["foo"] = "bar"
|
|
job1.TaskGroups[0].Count = 12
|
|
must.NoError(t, h1.State.UpsertJob(structs.MsgTypeTestSetup, h1.NextIndex(), nil, job1))
|
|
|
|
// Create a mock evaluation to update the job
|
|
eval1 := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job1.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job1.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval1}))
|
|
|
|
// Process the evaluation
|
|
if err := h1.Process(NewServiceScheduler, eval1); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
plan := h1.Plans[0]
|
|
|
|
// Collect all the chained allocation ids and the new allocations which
|
|
// don't have any chained allocations
|
|
var prevAllocs []string
|
|
var newAllocs []string
|
|
for _, allocList := range plan.NodeAllocation {
|
|
for _, alloc := range allocList {
|
|
if alloc.PreviousAllocation == "" {
|
|
newAllocs = append(newAllocs, alloc.ID)
|
|
continue
|
|
}
|
|
prevAllocs = append(prevAllocs, alloc.PreviousAllocation)
|
|
}
|
|
}
|
|
sort.Strings(prevAllocs)
|
|
|
|
// Ensure that the new allocations has their corresponding original
|
|
// allocation ids
|
|
if !reflect.DeepEqual(prevAllocs, allocIDs) {
|
|
t.Fatalf("expected: %v, actual: %v", len(allocIDs), len(prevAllocs))
|
|
}
|
|
|
|
// Ensuring two new allocations don't have any chained allocations
|
|
if len(newAllocs) != 2 {
|
|
t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs))
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_NodeDrain_Sticky(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Register a draining node
|
|
node := mock.DrainNode()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create an alloc on the draining node
|
|
alloc := mock.Alloc()
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.NodeID = node.ID
|
|
alloc.Job.TaskGroups[0].Count = 1
|
|
alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true
|
|
alloc.DesiredTransition.Migrate = pointer.Of(true)
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, alloc.Job))
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation to deal with drain
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: alloc.Job.ID,
|
|
NodeID: node.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected plan"))
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan evicted all allocs
|
|
must.Eq(t, 1, len(plan.NodeUpdate[node.ID]),
|
|
must.Sprint("expected alloc to be evicted"))
|
|
|
|
// Ensure the plan didn't create any new allocations
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Eq(t, 0, len(planned))
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// This test ensures that when a job is stopped, the scheduler properly cancels
|
|
// an outstanding deployment.
|
|
func TestServiceSched_CancelDeployment_Stopped(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Generate a fake job
|
|
job := mock.Job()
|
|
job.JobModifyIndex = job.CreateIndex + 1
|
|
job.ModifyIndex = job.CreateIndex + 1
|
|
job.Stop = true
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a deployment
|
|
d := mock.Deployment()
|
|
d.JobID = job.ID
|
|
d.JobCreateIndex = job.CreateIndex
|
|
d.JobModifyIndex = job.JobModifyIndex - 1
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d))
|
|
|
|
// Create a mock evaluation to deregister the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobDeregister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan cancelled the existing deployment
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID)
|
|
must.NoError(t, err)
|
|
|
|
if out == nil {
|
|
t.Fatalf("No deployment for job")
|
|
}
|
|
if out.ID != d.ID {
|
|
t.Fatalf("Latest deployment for job is different than original deployment")
|
|
}
|
|
if out.Status != structs.DeploymentStatusCancelled {
|
|
t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled)
|
|
}
|
|
if out.StatusDescription != structs.DeploymentStatusDescriptionStoppedJob {
|
|
t.Fatalf("Deployment status description is %q, want %q",
|
|
out.StatusDescription, structs.DeploymentStatusDescriptionStoppedJob)
|
|
}
|
|
|
|
// Ensure the plan didn't allocate anything
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 0 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// This test ensures that when a job is updated and had an old deployment, the scheduler properly cancels
|
|
// the deployment.
|
|
func TestServiceSched_CancelDeployment_NewerJob(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Generate a fake job
|
|
job := mock.Job()
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a deployment for an old version of the job
|
|
d := mock.Deployment()
|
|
d.JobID = job.ID
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d))
|
|
|
|
// Upsert again to bump job version
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to kick the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Ensure a single plan
|
|
if len(h.Plans) != 1 {
|
|
t.Fatalf("bad: %#v", h.Plans)
|
|
}
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure the plan cancelled the existing deployment
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID)
|
|
must.NoError(t, err)
|
|
|
|
if out == nil {
|
|
t.Fatalf("No deployment for job")
|
|
}
|
|
if out.ID != d.ID {
|
|
t.Fatalf("Latest deployment for job is different than original deployment")
|
|
}
|
|
if out.Status != structs.DeploymentStatusCancelled {
|
|
t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled)
|
|
}
|
|
if out.StatusDescription != structs.DeploymentStatusDescriptionNewerJob {
|
|
t.Fatalf("Deployment status description is %q, want %q",
|
|
out.StatusDescription, structs.DeploymentStatusDescriptionNewerJob)
|
|
}
|
|
// Ensure the plan didn't allocate anything
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
if len(planned) != 0 {
|
|
t.Fatalf("bad: %#v", plan)
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
// Various table driven tests for carry forward
|
|
// of past reschedule events
|
|
func Test_updateRescheduleTracker(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
t1 := time.Now().UTC()
|
|
alloc := mock.Alloc()
|
|
prevAlloc := mock.Alloc()
|
|
|
|
type testCase struct {
|
|
desc string
|
|
prevAllocEvents []*structs.RescheduleEvent
|
|
reschedPolicy *structs.ReschedulePolicy
|
|
expectedRescheduleEvents []*structs.RescheduleEvent
|
|
reschedTime time.Time
|
|
}
|
|
|
|
testCases := []testCase{
|
|
{
|
|
desc: "No past events",
|
|
prevAllocEvents: nil,
|
|
reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second},
|
|
reschedTime: t1,
|
|
expectedRescheduleEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "one past event, linear delay",
|
|
prevAllocEvents: []*structs.RescheduleEvent{
|
|
{RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second}},
|
|
reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second},
|
|
reschedTime: t1,
|
|
expectedRescheduleEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "one past event, fibonacci delay",
|
|
prevAllocEvents: []*structs.RescheduleEvent{
|
|
{RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second}},
|
|
reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 60 * time.Second},
|
|
reschedTime: t1,
|
|
expectedRescheduleEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "eight past events, fibonacci delay, unlimited",
|
|
prevAllocEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 10 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 15 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 25 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 40 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 65 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 105 * time.Second,
|
|
},
|
|
},
|
|
reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 240 * time.Second},
|
|
reschedTime: t1,
|
|
expectedRescheduleEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 15 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 25 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 40 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 65 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 105 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 170 * time.Second,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: " old attempts past interval, exponential delay, limited",
|
|
prevAllocEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.Add(-2 * time.Hour).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 5 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-70 * time.Minute).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 10 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 20 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 40 * time.Second,
|
|
},
|
|
},
|
|
reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", MaxDelay: 240 * time.Second},
|
|
reschedTime: t1,
|
|
expectedRescheduleEvents: []*structs.RescheduleEvent{
|
|
{
|
|
RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 20 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 40 * time.Second,
|
|
},
|
|
{
|
|
RescheduleTime: t1.UnixNano(),
|
|
PrevAllocID: prevAlloc.ID,
|
|
PrevNodeID: prevAlloc.NodeID,
|
|
Delay: 80 * time.Second,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.desc, func(t *testing.T) {
|
|
prevAlloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.prevAllocEvents}
|
|
prevAlloc.Job.LookupTaskGroup(prevAlloc.TaskGroup).ReschedulePolicy = tc.reschedPolicy
|
|
UpdateRescheduleTracker(alloc, prevAlloc, tc.reschedTime)
|
|
must.Eq(t, tc.expectedRescheduleEvents, alloc.RescheduleTracker.Events)
|
|
})
|
|
}
|
|
|
|
}
|
|
|
|
func TestServiceSched_Preemption(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
legacyCpuResources, processorResources := tests.CpuResources(1000)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
node.Resources = nil
|
|
node.ReservedResources = nil
|
|
node.NodeResources = &structs.NodeResources{
|
|
Processors: processorResources,
|
|
Cpu: legacyCpuResources,
|
|
Memory: structs.NodeMemoryResources{
|
|
MemoryMB: 2048,
|
|
},
|
|
Disk: structs.NodeDiskResources{
|
|
DiskMB: 100 * 1024,
|
|
},
|
|
Networks: []*structs.NetworkResource{
|
|
{
|
|
Mode: "host",
|
|
Device: "eth0",
|
|
CIDR: "192.168.0.100/32",
|
|
MBits: 1000,
|
|
},
|
|
},
|
|
}
|
|
node.ReservedResources = &structs.NodeReservedResources{
|
|
Cpu: structs.NodeReservedCpuResources{
|
|
CpuShares: 50,
|
|
},
|
|
Memory: structs.NodeReservedMemoryResources{
|
|
MemoryMB: 256,
|
|
},
|
|
Disk: structs.NodeReservedDiskResources{
|
|
DiskMB: 4 * 1024,
|
|
},
|
|
Networks: structs.NodeReservedNetworkResources{
|
|
ReservedHostPorts: "22",
|
|
},
|
|
}
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a couple of jobs and schedule them
|
|
job1 := mock.Job()
|
|
job1.TaskGroups[0].Count = 1
|
|
job1.TaskGroups[0].Networks = nil
|
|
job1.Priority = 30
|
|
r1 := job1.TaskGroups[0].Tasks[0].Resources
|
|
r1.CPU = 500
|
|
r1.MemoryMB = 1024
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job1))
|
|
|
|
job2 := mock.Job()
|
|
job2.TaskGroups[0].Count = 1
|
|
job2.TaskGroups[0].Networks = nil
|
|
job2.Priority = 50
|
|
r2 := job2.TaskGroups[0].Tasks[0].Resources
|
|
r2.CPU = 350
|
|
r2.MemoryMB = 512
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation to register the jobs
|
|
eval1 := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job1.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job1.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
eval2 := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job2.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job2.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval1, eval2}))
|
|
|
|
expectedPreemptedAllocs := make(map[string]struct{})
|
|
// Process the two evals for job1 and job2 and make sure they allocated
|
|
for index, eval := range []*structs.Evaluation{eval1, eval2} {
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
plan := h.Plans[index]
|
|
|
|
// Ensure the plan doesn't have annotations.
|
|
must.Nil(t, plan.Annotations)
|
|
|
|
// Ensure the eval has no spawned blocked eval
|
|
must.Eq(t, 0, len(h.CreateEvals))
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Eq(t, 1, len(planned))
|
|
expectedPreemptedAllocs[planned[0].ID] = struct{}{}
|
|
}
|
|
|
|
// Create a higher priority job
|
|
job3 := mock.Job()
|
|
job3.Priority = 100
|
|
job3.TaskGroups[0].Count = 1
|
|
job3.TaskGroups[0].Networks = nil
|
|
r3 := job3.TaskGroups[0].Tasks[0].Resources
|
|
r3.CPU = 900
|
|
r3.MemoryMB = 1700
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job3))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job3.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job3.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// New plan should be the third one in the harness
|
|
plan := h.Plans[2]
|
|
|
|
// Ensure the eval has no spawned blocked eval
|
|
must.Eq(t, 0, len(h.CreateEvals))
|
|
|
|
// Ensure the plan allocated
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range plan.NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Eq(t, 1, len(planned))
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job3.Namespace, job3.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure all allocations placed
|
|
must.Eq(t, 1, len(out))
|
|
actualPreemptedAllocs := make(map[string]struct{})
|
|
for _, id := range out[0].PreemptedAllocations {
|
|
actualPreemptedAllocs[id] = struct{}{}
|
|
}
|
|
must.Eq(t, expectedPreemptedAllocs, actualPreemptedAllocs)
|
|
}
|
|
|
|
// TestServiceSched_Migrate_NonCanary asserts that when rescheduling
|
|
// non-canary allocations, a single allocation is migrated
|
|
func TestServiceSched_Migrate_NonCanary(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
node1 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node1))
|
|
|
|
job := mock.Job()
|
|
job.Stable = true
|
|
job.TaskGroups[0].Count = 1
|
|
job.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
MaxParallel: 1,
|
|
Canary: 1,
|
|
}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
deployment := &structs.Deployment{
|
|
ID: uuid.Generate(),
|
|
JobID: job.ID,
|
|
Namespace: job.Namespace,
|
|
JobVersion: job.Version,
|
|
JobModifyIndex: job.JobModifyIndex,
|
|
JobCreateIndex: job.CreateIndex,
|
|
TaskGroups: map[string]*structs.DeploymentState{
|
|
"web": {DesiredTotal: 1},
|
|
},
|
|
Status: structs.DeploymentStatusSuccessful,
|
|
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
|
|
}
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment))
|
|
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node1.ID
|
|
alloc.DeploymentID = deployment.ID
|
|
alloc.Name = "my-job.web[0]"
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
|
alloc.DesiredTransition.Migrate = pointer.Of(true)
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerAllocStop,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
must.MapContainsKey(t, plan.NodeAllocation, node1.ID)
|
|
allocs := plan.NodeAllocation[node1.ID]
|
|
must.Len(t, 1, allocs)
|
|
|
|
}
|
|
|
|
// TestServiceSched_Migrate_CanaryStatus asserts that migrations/rescheduling
|
|
// of allocations use the proper versions of allocs rather than latest:
|
|
// Canaries should be replaced by canaries, and non-canaries should be replaced
|
|
// with the latest promoted version.
|
|
func TestServiceSched_Migrate_CanaryStatus(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
node1 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node1))
|
|
|
|
totalCount := 3
|
|
desiredCanaries := 1
|
|
|
|
job := mock.Job()
|
|
job.Stable = true
|
|
job.TaskGroups[0].Count = totalCount
|
|
job.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
MaxParallel: 1,
|
|
Canary: desiredCanaries,
|
|
}
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
deployment := &structs.Deployment{
|
|
ID: uuid.Generate(),
|
|
JobID: job.ID,
|
|
Namespace: job.Namespace,
|
|
JobVersion: job.Version,
|
|
JobModifyIndex: job.JobModifyIndex,
|
|
JobCreateIndex: job.CreateIndex,
|
|
TaskGroups: map[string]*structs.DeploymentState{
|
|
"web": {DesiredTotal: totalCount},
|
|
},
|
|
Status: structs.DeploymentStatusSuccessful,
|
|
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
|
|
}
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 3; i++ {
|
|
alloc := mock.AllocForNodeWithoutReservedPort(node1)
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.DeploymentID = deployment.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// new update with new task group
|
|
job2 := job.Copy()
|
|
job2.Stable = false
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans)
|
|
plan := h.Plans[0]
|
|
|
|
// Ensure a deployment was created
|
|
must.NotNil(t, plan.Deployment)
|
|
updateDeployment := plan.Deployment.ID
|
|
|
|
// Check status first - should be 4 allocs, only one is canary
|
|
{
|
|
ws := memdb.NewWatchSet()
|
|
allocs, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, true)
|
|
must.NoError(t, err)
|
|
must.Len(t, 4, allocs)
|
|
|
|
sort.Slice(allocs, func(i, j int) bool { return allocs[i].CreateIndex < allocs[j].CreateIndex })
|
|
|
|
for _, a := range allocs[:3] {
|
|
must.Eq(t, structs.AllocDesiredStatusRun, a.DesiredStatus)
|
|
must.Eq(t, uint64(0), a.Job.Version)
|
|
must.False(t, a.DeploymentStatus.IsCanary())
|
|
must.Eq(t, node1.ID, a.NodeID)
|
|
must.Eq(t, deployment.ID, a.DeploymentID)
|
|
}
|
|
must.Eq(t, structs.AllocDesiredStatusRun, allocs[3].DesiredStatus)
|
|
must.Eq(t, uint64(1), allocs[3].Job.Version)
|
|
must.True(t, allocs[3].DeploymentStatus.Canary)
|
|
must.Eq(t, node1.ID, allocs[3].NodeID)
|
|
must.Eq(t, updateDeployment, allocs[3].DeploymentID)
|
|
}
|
|
|
|
// now, drain node1 and ensure all are migrated to node2
|
|
node1 = node1.Copy()
|
|
node1.Status = structs.NodeStatusDown
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node1))
|
|
|
|
node2 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
|
|
|
|
neval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
NodeID: node1.ID,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{neval}))
|
|
|
|
// Process the evaluation
|
|
err = h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Now test that all node1 allocs are migrated while preserving Version and Canary info
|
|
{
|
|
// FIXME: This is a bug, we ought to reschedule canaries in this case but don't
|
|
rescheduleCanary := false
|
|
|
|
expectedMigrations := 3
|
|
if rescheduleCanary {
|
|
expectedMigrations++
|
|
}
|
|
|
|
ws := memdb.NewWatchSet()
|
|
allocs, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, true)
|
|
must.NoError(t, err)
|
|
must.Len(t, 4+expectedMigrations, allocs)
|
|
|
|
nodeAllocs := map[string][]*structs.Allocation{}
|
|
for _, a := range allocs {
|
|
nodeAllocs[a.NodeID] = append(nodeAllocs[a.NodeID], a)
|
|
}
|
|
|
|
must.Len(t, 4, nodeAllocs[node1.ID])
|
|
for _, a := range nodeAllocs[node1.ID] {
|
|
must.Eq(t, structs.AllocDesiredStatusStop, a.DesiredStatus)
|
|
must.Eq(t, node1.ID, a.NodeID)
|
|
}
|
|
|
|
node2Allocs := nodeAllocs[node2.ID]
|
|
must.Len(t, expectedMigrations, node2Allocs)
|
|
sort.Slice(node2Allocs, func(i, j int) bool { return node2Allocs[i].Job.Version < node2Allocs[j].Job.Version })
|
|
|
|
for _, a := range node2Allocs[:3] {
|
|
must.Eq(t, structs.AllocDesiredStatusRun, a.DesiredStatus)
|
|
must.Eq(t, uint64(0), a.Job.Version)
|
|
must.Eq(t, node2.ID, a.NodeID)
|
|
must.Eq(t, deployment.ID, a.DeploymentID)
|
|
}
|
|
if rescheduleCanary {
|
|
must.Eq(t, structs.AllocDesiredStatusRun, node2Allocs[3].DesiredStatus)
|
|
must.Eq(t, uint64(1), node2Allocs[3].Job.Version)
|
|
must.Eq(t, node2.ID, node2Allocs[3].NodeID)
|
|
must.Eq(t, updateDeployment, node2Allocs[3].DeploymentID)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestDowngradedJobForPlacement_PicksTheLatest asserts that downgradedJobForPlacement
|
|
// picks the latest deployment that have either been marked as promoted or is considered
|
|
// non-destructive so it doesn't use canaries.
|
|
func TestDowngradedJobForPlacement_PicksTheLatest(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// This test tests downgradedJobForPlacement directly to ease testing many different scenarios
|
|
// without invoking the full machinary of scheduling and updating deployment state tracking.
|
|
//
|
|
// It scafold the parts of scheduler and state stores so we can mimic the updates.
|
|
updates := []struct {
|
|
// Version of the job this update represent
|
|
version uint64
|
|
|
|
// whether this update is marked as promoted: Promoted is only true if the job
|
|
// update is a "destructive" update and has been updated manually
|
|
promoted bool
|
|
|
|
// mustCanaries indicate whether the job update requires placing canaries due to
|
|
// it being a destructive update compared to the latest promoted deployment.
|
|
mustCanaries bool
|
|
|
|
// the expected version for migrating a stable non-canary alloc after applying this update
|
|
expectedVersion uint64
|
|
}{
|
|
// always use latest promoted deployment
|
|
{1, true, true, 1},
|
|
{2, true, true, 2},
|
|
{3, true, true, 3},
|
|
|
|
// ignore most recent non promoted
|
|
{4, false, true, 3},
|
|
{5, false, true, 3},
|
|
{6, false, true, 3},
|
|
|
|
// use latest promoted after promotion
|
|
{7, true, true, 7},
|
|
|
|
// non destructive updates that don't require canaries and are treated as promoted
|
|
{8, false, false, 8},
|
|
}
|
|
|
|
job := mock.Job()
|
|
job.Version = 0
|
|
job.Stable = true
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
initDeployment := &structs.Deployment{
|
|
ID: uuid.Generate(),
|
|
JobID: job.ID,
|
|
Namespace: job.Namespace,
|
|
JobVersion: job.Version,
|
|
JobModifyIndex: job.JobModifyIndex,
|
|
JobCreateIndex: job.CreateIndex,
|
|
TaskGroups: map[string]*structs.DeploymentState{
|
|
"web": {
|
|
DesiredTotal: 1,
|
|
Promoted: true,
|
|
},
|
|
},
|
|
Status: structs.DeploymentStatusSuccessful,
|
|
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
|
|
}
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), initDeployment))
|
|
|
|
deploymentIDs := []string{initDeployment.ID}
|
|
|
|
for i, u := range updates {
|
|
t.Run(fmt.Sprintf("%d: %#+v", i, u), func(t *testing.T) {
|
|
t.Logf("case: %#+v", u)
|
|
nj := job.Copy()
|
|
nj.Version = u.version
|
|
nj.TaskGroups[0].Tasks[0].Env["version"] = fmt.Sprintf("%v", u.version)
|
|
nj.TaskGroups[0].Count = 1
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, nj))
|
|
|
|
desiredCanaries := 1
|
|
if !u.mustCanaries {
|
|
desiredCanaries = 0
|
|
}
|
|
deployment := &structs.Deployment{
|
|
ID: uuid.Generate(),
|
|
JobID: nj.ID,
|
|
Namespace: nj.Namespace,
|
|
JobVersion: nj.Version,
|
|
JobModifyIndex: nj.JobModifyIndex,
|
|
JobCreateIndex: nj.CreateIndex,
|
|
TaskGroups: map[string]*structs.DeploymentState{
|
|
"web": {
|
|
DesiredTotal: 1,
|
|
Promoted: u.promoted,
|
|
DesiredCanaries: desiredCanaries,
|
|
},
|
|
},
|
|
Status: structs.DeploymentStatusSuccessful,
|
|
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
|
|
}
|
|
must.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment))
|
|
|
|
deploymentIDs = append(deploymentIDs, deployment.ID)
|
|
|
|
sched := h.Scheduler(NewServiceScheduler).(*GenericScheduler)
|
|
|
|
sched.job = nj
|
|
sched.deployment = deployment
|
|
placement := &reconciler.AllocPlaceResult{}
|
|
placement.SetTaskGroup(nj.TaskGroups[0])
|
|
|
|
// Here, assert the downgraded job version
|
|
foundDeploymentID, foundJob, err := sched.downgradedJobForPlacement(placement)
|
|
must.NoError(t, err)
|
|
must.Eq(t, u.expectedVersion, foundJob.Version)
|
|
must.Eq(t, deploymentIDs[u.expectedVersion], foundDeploymentID)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestServiceSched_RunningWithNextAllocation asserts that if a running allocation has
|
|
// NextAllocation Set, the allocation is not ignored and will be stopped
|
|
func TestServiceSched_RunningWithNextAllocation(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
node1 := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node1))
|
|
|
|
totalCount := 2
|
|
job := mock.Job()
|
|
job.Version = 0
|
|
job.Stable = true
|
|
job.TaskGroups[0].Count = totalCount
|
|
job.TaskGroups[0].Update = nil
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < totalCount+1; i++ {
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node1.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
|
|
// simulate a case where .NextAllocation is set but alloc is still running
|
|
allocs[2].PreviousAllocation = allocs[0].ID
|
|
allocs[0].NextAllocation = allocs[2].ID
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// new update with new task group
|
|
job2 := job.Copy()
|
|
job2.Version = 1
|
|
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// assert that all original allocations have been stopped
|
|
for _, alloc := range allocs {
|
|
updated, err := h.State.AllocByID(nil, alloc.ID)
|
|
must.NoError(t, err)
|
|
must.Eq(t, structs.AllocDesiredStatusStop, updated.DesiredStatus, must.Sprintf("alloc %v", alloc.ID))
|
|
}
|
|
|
|
// assert that the new job has proper allocations
|
|
|
|
jobAllocs, err := h.State.AllocsByJob(nil, job.Namespace, job.ID, true)
|
|
must.NoError(t, err)
|
|
|
|
must.Len(t, 5, jobAllocs)
|
|
|
|
allocsByVersion := map[uint64][]string{}
|
|
for _, alloc := range jobAllocs {
|
|
allocsByVersion[alloc.Job.Version] = append(allocsByVersion[alloc.Job.Version], alloc.ID)
|
|
}
|
|
must.Len(t, 2, allocsByVersion[1])
|
|
must.Len(t, 3, allocsByVersion[0])
|
|
}
|
|
|
|
func TestServiceSched_CSIVolumesPerAlloc(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create some nodes, each running the CSI plugin
|
|
for i := 0; i < 5; i++ {
|
|
node := mock.Node()
|
|
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
|
"test-plugin": {
|
|
PluginID: "test-plugin",
|
|
Healthy: true,
|
|
NodeInfo: &structs.CSINodeInfo{MaxVolumes: 2},
|
|
},
|
|
}
|
|
must.NoError(t, h.State.UpsertNode(
|
|
structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// create per-alloc volumes
|
|
vol0 := structs.NewCSIVolume("volume-unique[0]", 0)
|
|
vol0.PluginID = "test-plugin"
|
|
vol0.Namespace = structs.DefaultNamespace
|
|
vol0.AccessMode = structs.CSIVolumeAccessModeSingleNodeWriter
|
|
vol0.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem
|
|
|
|
vol1 := vol0.Copy()
|
|
vol1.ID = "volume-unique[1]"
|
|
vol2 := vol0.Copy()
|
|
vol2.ID = "volume-unique[2]"
|
|
|
|
// create shared volume
|
|
shared := vol0.Copy()
|
|
shared.ID = "volume-shared"
|
|
// TODO: this should cause a test failure, see GH-10157
|
|
// replace this value with structs.CSIVolumeAccessModeSingleNodeWriter
|
|
// once its been fixed
|
|
shared.AccessMode = structs.CSIVolumeAccessModeMultiNodeReader
|
|
|
|
must.NoError(t, h.State.UpsertCSIVolume(
|
|
h.NextIndex(), []*structs.CSIVolume{shared, vol0, vol1, vol2}))
|
|
|
|
// Create a job that uses both
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Count = 3
|
|
job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
|
|
"shared": {
|
|
Type: "csi",
|
|
Name: "shared",
|
|
Source: "volume-shared",
|
|
ReadOnly: true,
|
|
},
|
|
"unique": {
|
|
Type: "csi",
|
|
Name: "unique",
|
|
Source: "volume-unique",
|
|
PerAlloc: true,
|
|
},
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation and expect a single plan without annotations
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected one plan"))
|
|
must.Nil(t, h.Plans[0].Annotations, must.Sprint("expected no annotations"))
|
|
|
|
// Expect the eval has not spawned a blocked eval
|
|
must.Eq(t, len(h.CreateEvals), 0)
|
|
must.Eq(t, "", h.Evals[0].BlockedEval, must.Sprint("did not expect a blocked eval"))
|
|
must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status)
|
|
|
|
// Ensure the plan allocated and we got expected placements
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range h.Plans[0].NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Len(t, 3, planned, must.Sprint("expected 3 planned allocations"))
|
|
|
|
out, err := h.State.AllocsByJob(nil, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 3, out, must.Sprint("expected 3 placed allocations"))
|
|
|
|
// Allocations don't have references to the actual volumes assigned, but
|
|
// because we set a max of 2 volumes per Node plugin, we can verify that
|
|
// they've been properly scheduled by making sure they're all on separate
|
|
// clients.
|
|
seen := map[string]struct{}{}
|
|
for _, alloc := range out {
|
|
_, ok := seen[alloc.NodeID]
|
|
must.False(t, ok, must.Sprint("allocations should be scheduled to separate nodes"))
|
|
seen[alloc.NodeID] = struct{}{}
|
|
}
|
|
|
|
// Update the job to 5 instances
|
|
job.TaskGroups[0].Count = 5
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a new eval and process it. It should not create a new plan.
|
|
eval.ID = uuid.Generate()
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
err = h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected one plan"))
|
|
|
|
// Expect the eval to have failed
|
|
must.NotEq(t, "", h.Evals[1].BlockedEval,
|
|
must.Sprint("expected a blocked eval to be spawned"))
|
|
must.Eq(t, 2, h.Evals[1].QueuedAllocations["web"], must.Sprint("expected 2 queued allocs"))
|
|
must.Eq(t, 5, h.Evals[1].FailedTGAllocs["web"].
|
|
ConstraintFiltered["missing CSI Volume volume-unique[3]"])
|
|
|
|
// Upsert 2 more per-alloc volumes
|
|
vol4 := vol0.Copy()
|
|
vol4.ID = "volume-unique[3]"
|
|
vol5 := vol0.Copy()
|
|
vol5.ID = "volume-unique[4]"
|
|
must.NoError(t, h.State.UpsertCSIVolume(
|
|
h.NextIndex(), []*structs.CSIVolume{vol4, vol5}))
|
|
|
|
// Process again with failure fixed. It should create a new plan
|
|
eval.ID = uuid.Generate()
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
err = h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
must.Len(t, 2, h.Plans, must.Sprint("expected two plans"))
|
|
must.Nil(t, h.Plans[1].Annotations, must.Sprint("expected no annotations"))
|
|
|
|
must.Eq(t, "", h.Evals[2].BlockedEval, must.Sprint("did not expect a blocked eval"))
|
|
must.MapLen(t, 0, h.Evals[2].FailedTGAllocs)
|
|
|
|
// Ensure the plan allocated and we got expected placements
|
|
planned = []*structs.Allocation{}
|
|
for _, allocList := range h.Plans[1].NodeAllocation {
|
|
planned = append(planned, allocList...)
|
|
}
|
|
must.Len(t, 2, planned, must.Sprint("expected 2 new planned allocations"))
|
|
|
|
out, err = h.State.AllocsByJob(nil, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
must.Len(t, 5, out, must.Sprint("expected 5 placed allocations total"))
|
|
|
|
// Make sure they're still all on seperate clients
|
|
seen = map[string]struct{}{}
|
|
for _, alloc := range out {
|
|
_, ok := seen[alloc.NodeID]
|
|
must.False(t, ok, must.Sprint("allocations should be scheduled to separate nodes"))
|
|
seen[alloc.NodeID] = struct{}{}
|
|
}
|
|
|
|
}
|
|
|
|
func TestServiceSched_CSITopology(t *testing.T) {
|
|
ci.Parallel(t)
|
|
h := tests.NewHarness(t)
|
|
|
|
zones := []string{"zone-0", "zone-1", "zone-2", "zone-3"}
|
|
|
|
// Create some nodes, each running a CSI plugin with topology for
|
|
// a different "zone"
|
|
for i := 0; i < 12; i++ {
|
|
node := mock.Node()
|
|
node.Datacenter = zones[i%4]
|
|
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
|
"test-plugin-" + zones[i%4]: {
|
|
PluginID: "test-plugin-" + zones[i%4],
|
|
Healthy: true,
|
|
NodeInfo: &structs.CSINodeInfo{
|
|
MaxVolumes: 3,
|
|
AccessibleTopology: &structs.CSITopology{
|
|
Segments: map[string]string{"zone": zones[i%4]}},
|
|
},
|
|
},
|
|
}
|
|
must.NoError(t, h.State.UpsertNode(
|
|
structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
}
|
|
|
|
// create 2 per-alloc volumes for those zones
|
|
vol0 := structs.NewCSIVolume("myvolume[0]", 0)
|
|
vol0.PluginID = "test-plugin-zone-0"
|
|
vol0.Namespace = structs.DefaultNamespace
|
|
vol0.AccessMode = structs.CSIVolumeAccessModeSingleNodeWriter
|
|
vol0.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem
|
|
vol0.RequestedTopologies = &structs.CSITopologyRequest{
|
|
Required: []*structs.CSITopology{{
|
|
Segments: map[string]string{"zone": "zone-0"},
|
|
}},
|
|
}
|
|
|
|
vol1 := vol0.Copy()
|
|
vol1.ID = "myvolume[1]"
|
|
vol1.PluginID = "test-plugin-zone-1"
|
|
vol1.RequestedTopologies.Required[0].Segments["zone"] = "zone-1"
|
|
|
|
must.NoError(t, h.State.UpsertCSIVolume(
|
|
h.NextIndex(), []*structs.CSIVolume{vol0, vol1}))
|
|
|
|
// Create a job that uses those volumes
|
|
job := mock.Job()
|
|
job.Datacenters = zones
|
|
job.TaskGroups[0].Count = 2
|
|
job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
|
|
"myvolume": {
|
|
Type: "csi",
|
|
Name: "unique",
|
|
Source: "myvolume",
|
|
PerAlloc: true,
|
|
},
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation and expect a single plan without annotations
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected one plan"))
|
|
must.Nil(t, h.Plans[0].Annotations, must.Sprint("expected no annotations"))
|
|
|
|
// Expect the eval has not spawned a blocked eval
|
|
must.Eq(t, len(h.CreateEvals), 0)
|
|
must.Eq(t, "", h.Evals[0].BlockedEval, must.Sprint("did not expect a blocked eval"))
|
|
must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status)
|
|
|
|
}
|
|
|
|
// Tests that a client disconnect generates attribute updates and follow up evals.
|
|
func TestServiceSched_Client_Disconnect_Creates_Updates_and_Evals(t *testing.T) {
|
|
|
|
jobVersions := []struct {
|
|
name string
|
|
jobSpec func(time.Duration) *structs.Job
|
|
}{
|
|
{
|
|
name: "job-with-disconnect-block",
|
|
jobSpec: func(lostAfter time.Duration) *structs.Job {
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Disconnect = &structs.DisconnectStrategy{
|
|
LostAfter: lostAfter,
|
|
}
|
|
return job
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, version := range jobVersions {
|
|
t.Run(version.name, func(t *testing.T) {
|
|
|
|
h := tests.NewHarness(t)
|
|
count := 1
|
|
maxClientDisconnect := 10 * time.Minute
|
|
|
|
job := version.jobSpec(maxClientDisconnect)
|
|
job.TaskGroups[0].Count = count
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
disconnectedNode, job, unknownAllocs := initNodeAndAllocs(t, h, job,
|
|
structs.NodeStatusReady, structs.AllocClientStatusRunning)
|
|
|
|
// Now disconnect the node
|
|
disconnectedNode.Status = structs.NodeStatusDisconnected
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), disconnectedNode))
|
|
|
|
// Create an evaluation triggered by the disconnect
|
|
evals := []*structs.Evaluation{{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: 50,
|
|
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
|
JobID: job.ID,
|
|
NodeID: disconnectedNode.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}}
|
|
|
|
nodeStatusUpdateEval := evals[0]
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), evals))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, nodeStatusUpdateEval)
|
|
must.NoError(t, err)
|
|
must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status)
|
|
must.Len(t, 1, h.Plans, must.Sprint("expected a plan"))
|
|
|
|
// Two followup delayed eval created
|
|
must.Len(t, 2, h.CreateEvals)
|
|
followUpEval1 := h.CreateEvals[0]
|
|
must.Eq(t, nodeStatusUpdateEval.ID, followUpEval1.PreviousEval)
|
|
must.Eq(t, "pending", followUpEval1.Status)
|
|
must.NotEq(t, time.Time{}, followUpEval1.WaitUntil)
|
|
|
|
followUpEval2 := h.CreateEvals[1]
|
|
must.Eq(t, nodeStatusUpdateEval.ID, followUpEval2.PreviousEval)
|
|
must.Eq(t, "pending", followUpEval2.Status)
|
|
must.NotEq(t, time.Time{}, followUpEval2.WaitUntil)
|
|
|
|
// Validate that the ClientStatus updates are part of the plan.
|
|
must.Len(t, count, h.Plans[0].NodeAllocation[disconnectedNode.ID])
|
|
|
|
// Pending update should have unknown status.
|
|
for _, nodeAlloc := range h.Plans[0].NodeAllocation[disconnectedNode.ID] {
|
|
must.Eq(t, nodeAlloc.ClientStatus, structs.AllocClientStatusUnknown)
|
|
}
|
|
|
|
// Simulate that NodeAllocation got processed.
|
|
must.NoError(t, h.State.UpsertAllocs(
|
|
structs.MsgTypeTestSetup, h.NextIndex(),
|
|
h.Plans[0].NodeAllocation[disconnectedNode.ID]))
|
|
|
|
// Validate that the StateStore Upsert applied the ClientStatus we specified.
|
|
|
|
for _, alloc := range unknownAllocs {
|
|
alloc, err = h.State.AllocByID(nil, alloc.ID)
|
|
must.NoError(t, err)
|
|
must.Eq(t, alloc.ClientStatus, structs.AllocClientStatusUnknown)
|
|
|
|
// Allocations have been transitioned to unknown
|
|
must.Eq(t, structs.AllocDesiredStatusRun, alloc.DesiredStatus)
|
|
must.Eq(t, structs.AllocClientStatusUnknown, alloc.ClientStatus)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestServiceSched_ReservedCores_InPlace(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
h := tests.NewHarness(t)
|
|
|
|
// Create a node
|
|
node := mock.Node()
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
// Create a job
|
|
job := mock.Job()
|
|
job.TaskGroups[0].Tasks[0].Resources.Cores = 1
|
|
job.TaskGroups[0].Count = 2
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
// Create running allocations on existing cores
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 2; i++ {
|
|
alloc := mock.AllocForNodeWithoutReservedPort(node)
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = []uint16{uint16(i + 1)}
|
|
allocs = append(allocs, alloc)
|
|
}
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
|
|
// Create a new job version with a different count
|
|
job2 := job.Copy()
|
|
job2.TaskGroups[0].Count = 3
|
|
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
|
|
|
|
// Create a mock evaluation to register the job
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
// Process the evaluation
|
|
err := h.Process(NewServiceScheduler, eval)
|
|
must.NoError(t, err)
|
|
|
|
// Ensure a single plan
|
|
must.Len(t, 1, h.Plans)
|
|
|
|
// Ensure the eval has no spawned blocked eval due to core exhaustion
|
|
must.Eq(t, "", h.Evals[0].BlockedEval, must.Sprint("blocked eval should be empty, without core exhaustion"))
|
|
|
|
// Ensure the plan allocated with the correct reserved cores
|
|
var planned []*structs.Allocation
|
|
for _, allocList := range h.Plans[0].NodeAllocation {
|
|
for _, alloc := range allocList {
|
|
switch alloc.Name {
|
|
case "my-job.web[0]": // Ensure that the first planned alloc is still on core 1
|
|
must.Eq(t, []uint16{uint16(1)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
case "my-job.web[1]": // Ensure that the second planned alloc is still on core 2
|
|
must.Eq(t, []uint16{uint16(2)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
default: // Ensure that the new planned alloc is not on core 1 or 2
|
|
must.NotEq(t, []uint16{uint16(2)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
must.NotEq(t, []uint16{uint16(1)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
}
|
|
}
|
|
planned = append(planned, allocList...)
|
|
}
|
|
|
|
must.Len(t, 3, planned)
|
|
|
|
// Lookup the allocations by JobID
|
|
ws := memdb.NewWatchSet()
|
|
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
|
|
must.NoError(t, err)
|
|
|
|
must.Len(t, 3, out)
|
|
|
|
// Ensure the allocations continute to have the correct reserved cores
|
|
for _, alloc := range out {
|
|
switch alloc.Name {
|
|
case "my-job.web[0]": // Ensure that the first alloc is still on core 1
|
|
must.Eq(t, []uint16{uint16(1)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
case "my-job.web[1]": // Ensure that the second alloc is still on core 2
|
|
must.Eq(t, []uint16{uint16(2)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
default: // Ensure that the new alloc is not on core 1 or 2
|
|
must.NotEq(t, []uint16{uint16(2)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
must.NotEq(t, []uint16{uint16(1)}, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores)
|
|
}
|
|
}
|
|
|
|
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
|
}
|
|
|
|
func initNodeAndAllocs(t *testing.T, h *tests.Harness, job *structs.Job,
|
|
nodeStatus, clientStatus string) (*structs.Node, *structs.Job, []*structs.Allocation) {
|
|
// Node, which is ready
|
|
node := mock.Node()
|
|
node.Status = nodeStatus
|
|
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
|
|
|
|
allocs := make([]*structs.Allocation, job.TaskGroups[0].Count)
|
|
for i := 0; i < job.TaskGroups[0].Count; i++ {
|
|
// Alloc for the running group
|
|
alloc := mock.Alloc()
|
|
alloc.Job = job
|
|
alloc.JobID = job.ID
|
|
alloc.NodeID = node.ID
|
|
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
alloc.ClientStatus = clientStatus
|
|
|
|
allocs[i] = alloc
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
|
|
return node, job, allocs
|
|
|
|
}
|