|
|
|
|
@@ -2,7 +2,6 @@ package drainer
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"fmt"
|
|
|
|
|
"testing"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
@@ -11,309 +10,552 @@ import (
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/state"
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
"golang.org/x/time/rate"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func testDrainingJobWatcher(t *testing.T) (*drainingJobWatcher, *state.StateStore) {
|
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
|
limiter := rate.NewLimiter(100.0, 100)
|
|
|
|
|
logger := testlog.Logger(t)
|
|
|
|
|
w := NewDrainingJobWatcher(context.Background(), limiter, state, logger)
|
|
|
|
|
return w, state
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestDrainingJobWatcher_Interface(t *testing.T) {
|
|
|
|
|
t.Parallel()
|
|
|
|
|
require := require.New(t)
|
|
|
|
|
w, _ := testDrainingJobWatcher(t)
|
|
|
|
|
require.Implements((*DrainingJobWatcher)(nil), w)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DrainingJobWatcher tests:
|
|
|
|
|
// TODO Test that several jobs allocation changes get batched
|
|
|
|
|
// TODO Test that jobs are deregistered when they have no more to migrate
|
|
|
|
|
// TODO Test that the watcher gets triggered on alloc changes
|
|
|
|
|
// TODO Test that the watcher cancels its query when a new job is registered
|
|
|
|
|
|
|
|
|
|
func TestHandleTaskGroup_AllDone(t *testing.T) {
|
|
|
|
|
t.Parallel()
|
|
|
|
|
require := require.New(t)
|
|
|
|
|
func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) {
|
|
|
|
|
n1 := mock.Node()
|
|
|
|
|
n1.Name = "draining"
|
|
|
|
|
n1.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
|
Deadline: time.Minute,
|
|
|
|
|
},
|
|
|
|
|
ForceDeadline: time.Now().Add(time.Minute),
|
|
|
|
|
}
|
|
|
|
|
require.Nil(t, state.UpsertNode(100, n1))
|
|
|
|
|
|
|
|
|
|
// Create a non-draining node
|
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
|
n := mock.Node()
|
|
|
|
|
require.Nil(state.UpsertNode(100, n))
|
|
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
|
require.Nil(state.UpsertJob(101, job))
|
|
|
|
|
|
|
|
|
|
// Create 10 running allocs on the healthy node
|
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = n.ID
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
|
}
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(102, allocs))
|
|
|
|
|
|
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
|
|
res := &jobResult{}
|
|
|
|
|
require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res))
|
|
|
|
|
require.Empty(res.drain)
|
|
|
|
|
require.Empty(res.migrated)
|
|
|
|
|
require.True(res.done)
|
|
|
|
|
n2 := mock.Node()
|
|
|
|
|
n2.Name = "running"
|
|
|
|
|
require.Nil(t, state.UpsertNode(101, n2))
|
|
|
|
|
return n1, n2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestHandleTaskGroup_AllOnDrainingNodes(t *testing.T) {
|
|
|
|
|
func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) {
|
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
|
|
limiter := rate.NewLimiter(100.0, 100)
|
|
|
|
|
logger := testlog.Logger(t)
|
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
|
w := NewDrainingJobWatcher(ctx, limiter, state, logger)
|
|
|
|
|
return w, cancel
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestDrainingJobWatcher_Interface is a compile-time assertion that we
|
|
|
|
|
// implement the intended interface.
|
|
|
|
|
func TestDrainingJobWatcher_Interface(t *testing.T) {
|
|
|
|
|
w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t))
|
|
|
|
|
cancel()
|
|
|
|
|
var _ DrainingJobWatcher = w
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches
|
|
|
|
|
// allocation changes from multiple jobs.
|
|
|
|
|
func TestDrainingJobWatcher_DrainJobs(t *testing.T) {
|
|
|
|
|
t.Parallel()
|
|
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
|
|
// The loop value sets the max parallel for the drain strategy
|
|
|
|
|
for i := 1; i < 8; i++ {
|
|
|
|
|
// Create a draining node
|
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
|
n := mock.Node()
|
|
|
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
|
Deadline: 5 * time.Minute,
|
|
|
|
|
},
|
|
|
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertNode(100, n))
|
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
|
jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state)
|
|
|
|
|
defer cancelWatcher()
|
|
|
|
|
drainingNode, runningNode := testNodes(t, state)
|
|
|
|
|
|
|
|
|
|
var index uint64 = 101
|
|
|
|
|
count := 8
|
|
|
|
|
|
|
|
|
|
newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.JobID = job.ID
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = node.ID
|
|
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 2 jobs with count 10, max parallel 3
|
|
|
|
|
jnss := make([]structs.JobNs, 2)
|
|
|
|
|
jobs := make([]*structs.Job, 2)
|
|
|
|
|
for i := 0; i < 2; i++ {
|
|
|
|
|
job := mock.Job()
|
|
|
|
|
job.TaskGroups[0].Migrate.MaxParallel = i
|
|
|
|
|
require.Nil(state.UpsertJob(101, job))
|
|
|
|
|
jobs[i] = job
|
|
|
|
|
jnss[i] = structs.NewJobNs(job.Namespace, job.ID)
|
|
|
|
|
job.TaskGroups[0].Migrate.MaxParallel = 3
|
|
|
|
|
job.TaskGroups[0].Count = count
|
|
|
|
|
require.Nil(state.UpsertJob(index, job))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
// Create 10 running allocs on the draining node
|
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = n.ID
|
|
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
|
a := newAlloc(drainingNode, job)
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
|
}
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(102, allocs))
|
|
|
|
|
|
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
|
require.Nil(err)
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, allocs))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
res := &jobResult{}
|
|
|
|
|
require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res))
|
|
|
|
|
require.Len(res.drain, i)
|
|
|
|
|
require.Empty(res.migrated)
|
|
|
|
|
require.False(res.done)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Only register jobs with watcher after creating all data models as
|
|
|
|
|
// once the watcher starts we need to track the index carefully for
|
|
|
|
|
// updating the batch future
|
|
|
|
|
jobWatcher.RegisterJobs(jnss)
|
|
|
|
|
|
|
|
|
|
// assertOps asserts how many allocs should be drained and migrated.
|
|
|
|
|
// The drains and migrations - if any - are returned.
|
|
|
|
|
assertOps := func(drained, migrated int) (drains *DrainRequest, migrations []*structs.Allocation) {
|
|
|
|
|
t.Helper()
|
|
|
|
|
var drainsChecked, migrationsChecked bool
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case drains = <-jobWatcher.Drain():
|
|
|
|
|
ids := make([]string, len(drains.Allocs))
|
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
|
|
|
}
|
|
|
|
|
t.Logf("draining %d allocs: %v", len(ids), ids)
|
|
|
|
|
require.False(drainsChecked, "drains already received")
|
|
|
|
|
drainsChecked = true
|
|
|
|
|
require.Lenf(drains.Allocs, drained,
|
|
|
|
|
"expected %d drains but found %d", drained, len(drains.Allocs))
|
|
|
|
|
case migrations = <-jobWatcher.Migrated():
|
|
|
|
|
ids := make([]string, len(migrations))
|
|
|
|
|
for i, a := range migrations {
|
|
|
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
|
|
|
}
|
|
|
|
|
t.Logf("migrating %d allocs: %v", len(ids), ids)
|
|
|
|
|
require.False(migrationsChecked, "migrations already received")
|
|
|
|
|
migrationsChecked = true
|
|
|
|
|
require.Lenf(migrations, migrated,
|
|
|
|
|
"expected %d migrations but found %d", migrated, len(migrations))
|
|
|
|
|
case <-time.After(10 * time.Millisecond):
|
|
|
|
|
if !drainsChecked && drained > 0 {
|
|
|
|
|
t.Fatalf("expected %d drains but none happened", drained)
|
|
|
|
|
}
|
|
|
|
|
if !migrationsChecked && migrated > 0 {
|
|
|
|
|
t.Fatalf("expected %d migrations but none happened", migrated)
|
|
|
|
|
}
|
|
|
|
|
return drains, migrations
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expect a first batch of MaxParallel allocs from each job
|
|
|
|
|
drains, _ := assertOps(6, 0)
|
|
|
|
|
|
|
|
|
|
// Fake migrating the drained allocs by starting new ones and stopping
|
|
|
|
|
// the old ones
|
|
|
|
|
drainedAllocs := make([]*structs.Allocation, len(drains.Allocs))
|
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
|
|
|
|
|
|
// create a copy so we can reuse this slice
|
|
|
|
|
drainedAllocs[i] = a.Copy()
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, drainedAllocs))
|
|
|
|
|
drains.Resp.Respond(index, nil)
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
// Just setting ShouldMigrate should not cause any further drains
|
|
|
|
|
assertOps(0, 0)
|
|
|
|
|
|
|
|
|
|
// Proceed our fake migration along by creating new allocs and stopping
|
|
|
|
|
// old ones
|
|
|
|
|
replacements := make([]*structs.Allocation, len(drainedAllocs))
|
|
|
|
|
updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
|
|
|
for i, a := range drainedAllocs {
|
|
|
|
|
// Stop drained allocs
|
|
|
|
|
a.DesiredTransition.Migrate = nil
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
|
|
// Create a replacement
|
|
|
|
|
replacement := mock.Alloc()
|
|
|
|
|
replacement.JobID = a.Job.ID
|
|
|
|
|
replacement.Job = a.Job
|
|
|
|
|
replacement.TaskGroup = a.TaskGroup
|
|
|
|
|
replacement.NodeID = runningNode.ID
|
|
|
|
|
// start in pending state with no health status
|
|
|
|
|
|
|
|
|
|
updates = append(updates, a, replacement)
|
|
|
|
|
replacements[i] = replacement.Copy()
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, updates))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
// The drained allocs stopping cause migrations but no new drains
|
|
|
|
|
// because the replacements have not started
|
|
|
|
|
assertOps(0, 6)
|
|
|
|
|
|
|
|
|
|
// Finally kickoff further drain activity by "starting" replacements
|
|
|
|
|
for _, a := range replacements {
|
|
|
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, replacements))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
require.NotEmpty(jobWatcher.drainingJobs())
|
|
|
|
|
|
|
|
|
|
// 6 new drains
|
|
|
|
|
drains, _ = assertOps(6, 0)
|
|
|
|
|
|
|
|
|
|
// Fake migrations once more to finish the drain
|
|
|
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
|
|
|
|
|
|
// create a copy so we can reuse this slice
|
|
|
|
|
drainedAllocs[i] = a.Copy()
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, drainedAllocs))
|
|
|
|
|
drains.Resp.Respond(index, nil)
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
assertOps(0, 0)
|
|
|
|
|
|
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
|
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
|
|
|
for i, a := range drainedAllocs {
|
|
|
|
|
a.DesiredTransition.Migrate = nil
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
|
|
|
updates = append(updates, a, replacement)
|
|
|
|
|
replacements[i] = replacement.Copy()
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, updates))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
assertOps(0, 6)
|
|
|
|
|
|
|
|
|
|
for _, a := range replacements {
|
|
|
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, replacements))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
require.NotEmpty(jobWatcher.drainingJobs())
|
|
|
|
|
|
|
|
|
|
// Final 4 new drains
|
|
|
|
|
drains, _ = assertOps(4, 0)
|
|
|
|
|
|
|
|
|
|
// Fake migrations once more to finish the drain
|
|
|
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
|
|
|
|
|
|
// create a copy so we can reuse this slice
|
|
|
|
|
drainedAllocs[i] = a.Copy()
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, drainedAllocs))
|
|
|
|
|
drains.Resp.Respond(index, nil)
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
assertOps(0, 0)
|
|
|
|
|
|
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
|
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
|
|
|
for i, a := range drainedAllocs {
|
|
|
|
|
a.DesiredTransition.Migrate = nil
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
|
|
|
updates = append(updates, a, replacement)
|
|
|
|
|
replacements[i] = replacement.Copy()
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, updates))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
assertOps(0, 4)
|
|
|
|
|
|
|
|
|
|
for _, a := range replacements {
|
|
|
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(index, replacements))
|
|
|
|
|
index++
|
|
|
|
|
|
|
|
|
|
// No jobs should be left!
|
|
|
|
|
require.Empty(jobWatcher.drainingJobs())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DrainingJobWatcher tests:
|
|
|
|
|
// TODO Test that the watcher cancels its query when a new job is registered
|
|
|
|
|
|
|
|
|
|
// handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup
|
|
|
|
|
//
|
|
|
|
|
// Two nodes will be initialized: one draining and one running.
|
|
|
|
|
type handleTaskGroupTestCase struct {
|
|
|
|
|
// Name of test
|
|
|
|
|
Name string
|
|
|
|
|
|
|
|
|
|
// Expectations
|
|
|
|
|
ExpectedDrained int
|
|
|
|
|
ExpectedMigrated int
|
|
|
|
|
ExpectedDone bool
|
|
|
|
|
|
|
|
|
|
// Count overrides the default count of 10 if set
|
|
|
|
|
Count int
|
|
|
|
|
|
|
|
|
|
// MaxParallel overrides the default max_parallel of 1 if set
|
|
|
|
|
MaxParallel int
|
|
|
|
|
|
|
|
|
|
// AddAlloc will be called 10 times to create test allocs
|
|
|
|
|
//
|
|
|
|
|
// Allocs default to be healthy on the draining node
|
|
|
|
|
AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestHandeTaskGroup_Table(t *testing.T) {
|
|
|
|
|
cases := []handleTaskGroupTestCase{
|
|
|
|
|
{
|
|
|
|
|
// All allocs on draining node
|
|
|
|
|
Name: "AllDraining",
|
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// All allocs on non-draining node
|
|
|
|
|
Name: "AllNonDraining",
|
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: true,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// Some allocs on non-draining node but not healthy
|
|
|
|
|
Name: "SomeNonDrainingUnhealthy",
|
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: false,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
if i%2 == 0 {
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
a.DeploymentStatus = nil
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// One draining, other allocs on non-draining node and healthy
|
|
|
|
|
Name: "OneDraining",
|
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: false,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
if i != 0 {
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// One already draining, other allocs on non-draining node and healthy
|
|
|
|
|
Name: "OneAlreadyDraining",
|
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: false,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
if i == 0 {
|
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// One already drained, other allocs on non-draining node and healthy
|
|
|
|
|
Name: "OneAlreadyDrained",
|
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
|
ExpectedDone: true,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
if i == 0 {
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// All allocs are terminl, nothing to be drained
|
|
|
|
|
Name: "AllMigrating",
|
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
|
ExpectedMigrated: 10,
|
|
|
|
|
ExpectedDone: true,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// All allocs may be drained at once
|
|
|
|
|
Name: "AllAtOnce",
|
|
|
|
|
ExpectedDrained: 10,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: false,
|
|
|
|
|
MaxParallel: 10,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// Drain 2
|
|
|
|
|
Name: "Drain2",
|
|
|
|
|
ExpectedDrained: 2,
|
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
|
ExpectedDone: false,
|
|
|
|
|
MaxParallel: 2,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// One on new node, one drained, and one draining
|
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
|
MaxParallel: 2,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
switch i {
|
|
|
|
|
case 0:
|
|
|
|
|
// One alloc on running node
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
case 1:
|
|
|
|
|
// One alloc already migrated
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// 8 on new node, one drained, and one draining
|
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
|
MaxParallel: 2,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
switch i {
|
|
|
|
|
case 0, 1, 2, 3, 4, 5, 6, 7:
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
case 8:
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// 5 on new node, two drained, and three draining
|
|
|
|
|
ExpectedDrained: 3,
|
|
|
|
|
ExpectedMigrated: 2,
|
|
|
|
|
MaxParallel: 5,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
switch i {
|
|
|
|
|
case 0, 1, 2, 3, 4:
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
case 8, 9:
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// Not all on new node have health set
|
|
|
|
|
Name: "PendingHealth",
|
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
|
MaxParallel: 3,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
switch i {
|
|
|
|
|
case 0:
|
|
|
|
|
// Deployment status UNset for 1 on new node
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
a.DeploymentStatus = nil
|
|
|
|
|
case 1, 2, 3, 4:
|
|
|
|
|
// Deployment status set for 4 on new node
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
case 9:
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// 5 max parallel - 1 migrating - 2 with unset health = 2 drainable
|
|
|
|
|
Name: "PendingHealthHigherMax",
|
|
|
|
|
ExpectedDrained: 2,
|
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
|
MaxParallel: 5,
|
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
|
switch i {
|
|
|
|
|
case 0, 1:
|
|
|
|
|
// Deployment status UNset for 2 on new node
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
a.DeploymentStatus = nil
|
|
|
|
|
case 2, 3, 4:
|
|
|
|
|
// Deployment status set for 3 on new node
|
|
|
|
|
a.NodeID = runningID
|
|
|
|
|
case 9:
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, testCase := range cases {
|
|
|
|
|
t.Run(testCase.Name, func(t *testing.T) {
|
|
|
|
|
testHandleTaskGroup(t, testCase)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestHandleTaskGroup_MixedHealth(t *testing.T) {
|
|
|
|
|
cases := []struct {
|
|
|
|
|
maxParallel int
|
|
|
|
|
drainingNodeAllocs int
|
|
|
|
|
healthSet int
|
|
|
|
|
healthUnset int
|
|
|
|
|
expectedDrain int
|
|
|
|
|
expectedMigrated int
|
|
|
|
|
expectedDone bool
|
|
|
|
|
}{
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 2,
|
|
|
|
|
drainingNodeAllocs: 10,
|
|
|
|
|
healthSet: 0,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 2,
|
|
|
|
|
expectedMigrated: 0,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 2,
|
|
|
|
|
drainingNodeAllocs: 9,
|
|
|
|
|
healthSet: 0,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 1,
|
|
|
|
|
expectedMigrated: 1,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 5,
|
|
|
|
|
drainingNodeAllocs: 9,
|
|
|
|
|
healthSet: 0,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 4,
|
|
|
|
|
expectedMigrated: 1,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 2,
|
|
|
|
|
drainingNodeAllocs: 5,
|
|
|
|
|
healthSet: 2,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 0,
|
|
|
|
|
expectedMigrated: 5,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 2,
|
|
|
|
|
drainingNodeAllocs: 5,
|
|
|
|
|
healthSet: 3,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 0,
|
|
|
|
|
expectedMigrated: 5,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 2,
|
|
|
|
|
drainingNodeAllocs: 5,
|
|
|
|
|
healthSet: 4,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 1,
|
|
|
|
|
expectedMigrated: 5,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 2,
|
|
|
|
|
drainingNodeAllocs: 5,
|
|
|
|
|
healthSet: 4,
|
|
|
|
|
healthUnset: 1,
|
|
|
|
|
expectedDrain: 1,
|
|
|
|
|
expectedMigrated: 5,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 1,
|
|
|
|
|
drainingNodeAllocs: 5,
|
|
|
|
|
healthSet: 4,
|
|
|
|
|
healthUnset: 1,
|
|
|
|
|
expectedDrain: 0,
|
|
|
|
|
expectedMigrated: 5,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 3,
|
|
|
|
|
drainingNodeAllocs: 5,
|
|
|
|
|
healthSet: 3,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 1,
|
|
|
|
|
expectedMigrated: 5,
|
|
|
|
|
expectedDone: false,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxParallel: 3,
|
|
|
|
|
drainingNodeAllocs: 0,
|
|
|
|
|
healthSet: 10,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 0,
|
|
|
|
|
expectedMigrated: 10,
|
|
|
|
|
expectedDone: true,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
// Is the case where deadline is hit and all 10 are just marked
|
|
|
|
|
// stopped. We should detect the job as done.
|
|
|
|
|
maxParallel: 3,
|
|
|
|
|
drainingNodeAllocs: 0,
|
|
|
|
|
healthSet: 0,
|
|
|
|
|
healthUnset: 0,
|
|
|
|
|
expectedDrain: 0,
|
|
|
|
|
expectedMigrated: 10,
|
|
|
|
|
expectedDone: true,
|
|
|
|
|
},
|
|
|
|
|
func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) {
|
|
|
|
|
t.Parallel()
|
|
|
|
|
require := require.New(t)
|
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
|
|
// Create nodes
|
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
|
drainingNode, runningNode := testNodes(t, state)
|
|
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
|
job.TaskGroups[0].Count = 10
|
|
|
|
|
if tc.Count > 0 {
|
|
|
|
|
job.TaskGroups[0].Count = tc.Count
|
|
|
|
|
}
|
|
|
|
|
if tc.MaxParallel > 0 {
|
|
|
|
|
job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertJob(102, job))
|
|
|
|
|
|
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.JobID = job.ID
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
|
|
|
|
|
// Default to being healthy on the draining node
|
|
|
|
|
a.NodeID = drainingNode.ID
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
|
}
|
|
|
|
|
if tc.AddAlloc != nil {
|
|
|
|
|
tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID)
|
|
|
|
|
}
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for cnum, c := range cases {
|
|
|
|
|
t.Run(fmt.Sprintf("%d", cnum), func(t *testing.T) {
|
|
|
|
|
require := require.New(t)
|
|
|
|
|
require.Nil(state.UpsertAllocs(103, allocs))
|
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
|
|
// Create a draining node
|
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
|
|
|
|
|
|
drainingNode := mock.Node()
|
|
|
|
|
drainingNode.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
|
Deadline: 5 * time.Minute,
|
|
|
|
|
},
|
|
|
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertNode(100, drainingNode))
|
|
|
|
|
|
|
|
|
|
healthyNode := mock.Node()
|
|
|
|
|
require.Nil(state.UpsertNode(101, healthyNode))
|
|
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
|
job.TaskGroups[0].Migrate.MaxParallel = c.maxParallel
|
|
|
|
|
require.Nil(state.UpsertJob(101, job))
|
|
|
|
|
|
|
|
|
|
// Create running allocs on the draining node with health set
|
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
|
for i := 0; i < c.drainingNodeAllocs; i++ {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = drainingNode.ID
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
|
}
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create stopped allocs on the draining node
|
|
|
|
|
for i := 10 - c.drainingNodeAllocs; i > 0; i-- {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = drainingNode.ID
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
|
}
|
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create allocs on the healthy node with health set
|
|
|
|
|
for i := 0; i < c.healthSet; i++ {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = healthyNode.ID
|
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
|
}
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create allocs on the healthy node with health not set
|
|
|
|
|
for i := 0; i < c.healthUnset; i++ {
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
|
a.Job = job
|
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
a.NodeID = healthyNode.ID
|
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
|
}
|
|
|
|
|
require.Nil(state.UpsertAllocs(103, allocs))
|
|
|
|
|
|
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
|
|
res := &jobResult{}
|
|
|
|
|
require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res))
|
|
|
|
|
require.Len(res.drain, c.expectedDrain)
|
|
|
|
|
require.Len(res.migrated, c.expectedMigrated)
|
|
|
|
|
require.Equal(c.expectedDone, res.done)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
res := newJobResult()
|
|
|
|
|
require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 102, res))
|
|
|
|
|
assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d",
|
|
|
|
|
tc.ExpectedDrained, len(res.drain))
|
|
|
|
|
assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d",
|
|
|
|
|
tc.ExpectedMigrated, len(res.migrated))
|
|
|
|
|
assert.Equal(tc.ExpectedDone, res.done)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestHandleTaskGroup_Migrations(t *testing.T) {
|
|
|
|
|
|