From 369a04b135c5680129c2167665afca590db28c6c Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 2 Jun 2017 16:11:29 -0700 Subject: [PATCH] Deployment tests --- nomad/structs/structs.go | 47 +- scheduler/reconcile.go | 175 ++-- scheduler/reconcile_test.go | 1591 +++++++++++++++++++++++++++++++++-- scheduler/reconcile_util.go | 28 +- 4 files changed, 1689 insertions(+), 152 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 66c45f65d..f98abe86f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -4080,26 +4080,6 @@ func (a *Allocation) RanSuccessfully() bool { return allSuccess } -// Stub returns a list stub for the allocation -func (a *Allocation) Stub() *AllocListStub { - return &AllocListStub{ - ID: a.ID, - EvalID: a.EvalID, - Name: a.Name, - NodeID: a.NodeID, - JobID: a.JobID, - TaskGroup: a.TaskGroup, - DesiredStatus: a.DesiredStatus, - DesiredDescription: a.DesiredDescription, - ClientStatus: a.ClientStatus, - ClientDescription: a.ClientDescription, - TaskStates: a.TaskStates, - CreateIndex: a.CreateIndex, - ModifyIndex: a.ModifyIndex, - CreateTime: a.CreateTime, - } -} - // ShouldMigrate returns if the allocation needs data migration func (a *Allocation) ShouldMigrate() bool { if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { @@ -4123,6 +4103,33 @@ func (a *Allocation) ShouldMigrate() bool { return true } +// DeploymentHealthy returns if the allocation is marked as healthy as part of a +// deployment +func (a *Allocation) DeploymentHealthy() bool { + return a.DeploymentStatus != nil && + a.DeploymentStatus.Healthy != nil && *a.DeploymentStatus.Healthy +} + +// Stub returns a list stub for the allocation +func (a *Allocation) Stub() *AllocListStub { + return &AllocListStub{ + ID: a.ID, + EvalID: a.EvalID, + Name: a.Name, + NodeID: a.NodeID, + JobID: a.JobID, + TaskGroup: a.TaskGroup, + DesiredStatus: a.DesiredStatus, + DesiredDescription: a.DesiredDescription, + ClientStatus: a.ClientStatus, + ClientDescription: a.ClientDescription, + TaskStates: a.TaskStates, + CreateIndex: a.CreateIndex, + ModifyIndex: a.ModifyIndex, + CreateTime: a.CreateTime, + } +} + // AllocListStub is used to return a subset of alloc information type AllocListStub struct { ID string diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 7c27d12bc..186007929 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -44,6 +44,9 @@ type allocReconciler struct { // deploymentPaused marks whether the deployment is paused deploymentPaused bool + // deploymentFailed marks whether the deployment is failed + deploymentFailed bool + // taintedNodes contains a map of nodes that are tainted taintedNodes map[string]*structs.Node @@ -119,6 +122,7 @@ func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch // Detect if the deployment is paused if deployment != nil { a.deploymentPaused = deployment.Status == structs.DeploymentStatusPaused + a.deploymentFailed = deployment.Status == structs.DeploymentStatusFailed } return a @@ -152,9 +156,9 @@ func (a *allocReconciler) Compute() *reconcileResults { // computeDeployments cancels any deployment that is not needed and creates a // deployment if it is needed func (a *allocReconciler) computeDeployments() { - // If the job is stopped and there is a deployment cancel it + // If the job is stopped and there is a deployment non-terminal deployment, cancel it if a.job.Stopped() { - if a.deployment != nil { + if a.deployment != nil && a.deployment.Active() { a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusCancelled, @@ -168,7 +172,7 @@ func (a *allocReconciler) computeDeployments() { // Check if the deployment is referencing an older job and cancel it if d := a.deployment; d != nil { - if d.JobCreateIndex != a.job.CreateIndex || d.JobModifyIndex != a.job.JobModifyIndex { + if d.Active() && (d.JobCreateIndex != a.job.CreateIndex || d.JobModifyIndex != a.job.JobModifyIndex) { a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusCancelled, @@ -178,6 +182,7 @@ func (a *allocReconciler) computeDeployments() { } } + // XXX Should probably do this as needed // Create a new deployment if necessary if a.deployment == nil && !a.job.Stopped() && a.job.HasUpdateStrategy() { a.deployment = structs.NewDeployment(a.job) @@ -221,13 +226,11 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { // that the task group no longer exists tg := a.job.LookupTaskGroup(group) - // Determine what set of alloations are on tainted nodes - untainted, migrate, lost := all.filterByTainted(a.taintedNodes) - // If the task group is nil, then the task group has been removed so all we // need to do is stop everything if tg == nil { a.logger.Printf("RECONCILER -- STOPPING ALL") + untainted, migrate, lost := all.filterByTainted(a.taintedNodes) a.markStop(untainted, "", allocNotNeeded) a.markStop(migrate, "", allocNotNeeded) a.markStop(lost, structs.AllocClientStatusLost, allocLost) @@ -249,51 +252,60 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { } } - a.logger.Printf("RECONCILER -- untainted (%d); migrate (%d); lost (%d)", len(untainted), len(migrate), len(lost)) - a.logger.Printf("RECONCILER -- untainted %#v", untainted) - // Get any existing canaries - canaries := untainted.filterByCanary() + canaries := all.filterByCanary() // Cancel any canary from a prior deployment if len(canaries) != 0 { if a.deployment != nil { current, older := canaries.filterByDeployment(a.deployment.ID) + + // Stop the older canaries a.markStop(older, "", allocNotNeeded) desiredChanges.Stop += uint64(len(older)) - - a.logger.Printf("RECONCILER -- older canaries %#v", older) - a.logger.Printf("RECONCILER -- current canaries %#v", current) - - untainted = untainted.difference(older) canaries = current + + // Handle canaries on migrating/lost nodes here by just stopping + // them + untainted, migrate, lost := canaries.filterByTainted(a.taintedNodes) + a.markStop(migrate, "", allocMigrating) + a.markStop(lost, structs.AllocClientStatusLost, allocLost) + canaries = untainted + + // Update the all set + all = all.difference(older, migrate, lost) + a.logger.Printf("RECONCILER -- canaries %#v", canaries) } else { + // XXX this is totally wrong they may just be promoted good canaries // We don't need any of those canaries since there no longer is a // deployment a.markStop(canaries, "", allocNotNeeded) desiredChanges.Stop += uint64(len(canaries)) - untainted = untainted.difference(canaries) + all = all.difference(canaries) canaries = nil } - a.logger.Printf("RECONCILER -- untainted - remove canaries %#v", untainted) } + // Determine what set of alloations are on tainted nodes + untainted, migrate, lost := all.filterByTainted(a.taintedNodes) + a.logger.Printf("RECONCILER -- untainted (%d); migrate (%d); lost (%d)", len(untainted), len(migrate), len(lost)) + // Create a structure for choosing names. Seed with the taken names which is - // the union of untainted and migrating nodes + // the union of untainted and migrating nodes (includes canaries) nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate)) // Stop any unneeded allocations and update the untainted set to not // included stopped allocations. We ignore canaries since that can push us // over the desired count - existingCanariesPromoted := dstate == nil || dstate.DesiredCanaries == 0 || dstate.Promoted - stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, existingCanariesPromoted) + canaryState := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted + stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, canaryState) desiredChanges.Stop += uint64(len(stop)) untainted = untainted.difference(stop) // Having stopped un-needed allocations, append the canaries to the existing // set of untainted because they are promoted. This will cause them to be // treated like non-canaries - if existingCanariesPromoted { + if !canaryState { untainted = untainted.union(canaries) nameIndex.Set(canaries) } @@ -303,7 +315,9 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { ignore, inplace, destructive := a.computeUpdates(tg, untainted) desiredChanges.Ignore += uint64(len(ignore)) desiredChanges.InPlaceUpdate += uint64(len(inplace)) - desiredChanges.DestructiveUpdate += uint64(len(destructive)) + if creatingDeployment { + dstate.DesiredTotal += len(destructive) + len(inplace) + } a.logger.Printf("RECONCILER -- Stopping (%d)", len(stop)) a.logger.Printf("RECONCILER -- Inplace (%d); Destructive (%d)", len(inplace), len(destructive)) @@ -313,16 +327,15 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { numDestructive := len(destructive) strategy := tg.Update requireCanary := numDestructive != 0 && strategy != nil && len(canaries) < strategy.Canary - if requireCanary && !a.deploymentPaused { + if requireCanary && !a.deploymentPaused && !a.deploymentFailed { number := strategy.Canary - len(canaries) number = helper.IntMin(numDestructive, number) desiredChanges.Canary += uint64(number) if creatingDeployment { dstate.DesiredCanaries = strategy.Canary - dstate.DesiredTotal += strategy.Canary } - a.logger.Printf("RECONCILER -- Canary (%d)", number) + a.logger.Printf("RECONCILER -- Place Canaries (%d)", number) for _, name := range nameIndex.NextCanaries(uint(number), canaries, destructive) { a.result.place = append(a.result.place, allocPlaceResult{ name: name, @@ -333,12 +346,12 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { } // Determine how many we can place - haveCanaries := dstate != nil && dstate.DesiredCanaries != 0 - limit := a.computeLimit(tg, untainted, destructive, haveCanaries) + canaryState = dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted + limit := a.computeLimit(tg, untainted, destructive, canaryState) a.logger.Printf("RECONCILER -- LIMIT %v", limit) // Place if: - // * The deployment is not paused + // * The deployment is not paused or failed // * Not placing any canaries // * If there are any canaries that they have been promoted place := a.computePlacements(tg, nameIndex, untainted, migrate) @@ -346,27 +359,23 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { dstate.DesiredTotal += len(place) } - if !a.deploymentPaused && existingCanariesPromoted { - // Update the desired changes and if we are creating a deployment update - // the state. - desiredChanges.Place += uint64(len(place)) - + if !a.deploymentPaused && !a.deploymentFailed && !canaryState { // Place all new allocations a.logger.Printf("RECONCILER -- Placing (%d)", len(place)) + desiredChanges.Place += uint64(len(place)) for _, p := range place { a.result.place = append(a.result.place, p) } + // XXX Needs to be done in order // Do all destructive updates min := helper.IntMin(len(destructive), limit) - i := 0 + limit -= min + desiredChanges.DestructiveUpdate += uint64(min) + desiredChanges.Ignore += uint64(len(destructive) - min) a.logger.Printf("RECONCILER -- Destructive Updating (%d)", min) - for _, alloc := range destructive { - if i == min { - break - } - i++ - + for _, alloc := range destructive.nameOrder()[:min] { + a.logger.Printf("RECONCILER -- Destructive Updating %q %q", alloc.ID, alloc.Name) a.result.stop = append(a.result.stop, allocStopResult{ alloc: alloc, statusDescription: allocUpdating, @@ -377,44 +386,53 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) { previousAlloc: alloc, }) } - limit -= min + } else { + a.logger.Printf("RECONCILER -- NON PROMOTED CASE") + desiredChanges.Ignore += uint64(len(destructive)) } // TODO Migrations should be done using a stagger and max_parallel. - desiredChanges.Migrate += uint64(len(migrate)) - a.logger.Printf("RECONCILER -- Migrating (%d)", len(migrate)) + if !a.deploymentFailed { + desiredChanges.Migrate += uint64(len(migrate)) + a.logger.Printf("RECONCILER -- Migrating (%d)", len(migrate)) + } else { + desiredChanges.Stop += uint64(len(migrate)) + } + for _, alloc := range migrate { a.result.stop = append(a.result.stop, allocStopResult{ alloc: alloc, statusDescription: allocMigrating, }) - a.result.place = append(a.result.place, allocPlaceResult{ - name: alloc.Name, - canary: false, - taskGroup: tg, - previousAlloc: alloc, - }) + + // If the deployment is failed, just stop the allocation + if !a.deploymentFailed { + a.result.place = append(a.result.place, allocPlaceResult{ + name: alloc.Name, + canary: false, + taskGroup: tg, + previousAlloc: alloc, + }) + } } + } // computeLimit returns the placement limit for a particular group. The inputs // are the group definition, the untainted and destructive allocation set and -// whether any canaries exist or are being placed. -func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, destructive allocSet, canaries bool) int { +// whether we are in a canary state. +func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, destructive allocSet, canaryState bool) int { // If there is no update stategy or deployment for the group we can deploy // as many as the group has if group.Update == nil || len(destructive) == 0 { return group.Count - } else if a.deploymentPaused { - // If the deployment is paused, do not create anything else + } else if a.deploymentPaused || a.deploymentFailed { + // If the deployment is paused or failed, do not create anything else return 0 } - // Get the state of the deployment for the group - deploymentState := a.deployment.TaskGroups[group.Name] - // If we have canaries and they have not been promoted the limit is 0 - if canaries && (deploymentState == nil || !deploymentState.Promoted) { + if canaryState { return 0 } @@ -424,7 +442,7 @@ func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, dest limit := group.Update.MaxParallel partOf, _ := untainted.filterByDeployment(a.deployment.ID) for _, alloc := range partOf { - if alloc.DeploymentStatus == nil || alloc.DeploymentStatus.Healthy == nil { + if !alloc.DeploymentHealthy() { limit-- } } @@ -457,7 +475,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup, // computeStop returns the set of allocations to stop given the group definiton // and the set of untainted and canary allocations for the group. func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *allocNameIndex, - untainted, migrate, lost, canaries allocSet, promoted bool) allocSet { + untainted, migrate, lost, canaries allocSet, canaryState bool) allocSet { // Mark all lost allocations for stop. Previous allocation doesn't matter // here since it is on a lost node @@ -465,15 +483,37 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc stop = stop.union(lost) a.markStop(lost, structs.AllocClientStatusLost, allocLost) - if !promoted { - // Canaries are in the untainted set and should be discounted. + // If we are still deploying or creating canaries, don't stop them + if canaryState { untainted = untainted.difference(canaries) } // Hot path the nothing to do case remove := len(untainted) + len(migrate) - group.Count if remove <= 0 { - return nil + return stop + } + + // Prefer stopping any alloc that has the same name as the canaries if we + // are promoted + if !canaryState && len(canaries) != 0 { + canaryNames := canaries.nameSet() + for id, alloc := range untainted.difference(canaries) { + if _, match := canaryNames[alloc.Name]; match { + a.logger.Printf("ALEX -- STOPPING alloc with same name as canary %q %q", id, alloc.Name) + stop[id] = alloc + a.result.stop = append(a.result.stop, allocStopResult{ + alloc: alloc, + statusDescription: allocNotNeeded, + }) + delete(untainted, id) + + remove-- + if remove == 0 { + return stop + } + } + } } // Prefer selecting from the migrating set before stopping existing allocs @@ -505,7 +545,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc removeNames := nameIndex.Highest(uint(remove)) for id, alloc := range untainted { if _, remove := removeNames[alloc.Name]; remove { - a.logger.Printf("ALEX -- STOPPING normal alloc %q", id) + a.logger.Printf("ALEX -- STOPPING normal alloc %q %q", id, alloc.Name) stop[id] = alloc a.result.stop = append(a.result.stop, allocStopResult{ alloc: alloc, @@ -639,13 +679,16 @@ func (a *allocNameIndex) UnsetIndex(idx uint) { func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) []string { next := make([]string, 0, n) + // Create a name index + existingNames := existing.nameSet() + // First select indexes from the allocations that are undergoing destructive // updates. This way we avoid duplicate names as they will get replaced. dmap := bitmapFrom(destructive, uint(a.count)) var remainder uint for _, idx := range dmap.IndexesInRange(true, uint(0), uint(a.count)-1) { name := structs.AllocName(a.job, a.taskGroup, uint(idx)) - if _, used := existing[name]; !used { + if _, used := existingNames[name]; !used { next = append(next, name) a.b.Set(uint(idx)) @@ -660,7 +703,7 @@ func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) [] // Get the set of unset names that can be used for _, idx := range a.b.IndexesInRange(false, uint(0), uint(a.count)-1) { name := structs.AllocName(a.job, a.taskGroup, uint(idx)) - if _, used := existing[name]; !used { + if _, used := existingNames[name]; !used { next = append(next, name) a.b.Set(uint(idx)) @@ -677,7 +720,7 @@ func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) [] var i uint for i = 0; i < remainder; i++ { name := structs.AllocName(a.job, a.taskGroup, i) - if _, used := existing[name]; !used { + if _, used := existingNames[name]; !used { next = append(next, name) a.b.Set(i) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 6fabe1b8f..e92621b48 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -1,61 +1,88 @@ package scheduler import ( + "fmt" "log" "os" "reflect" "regexp" "strconv" "testing" + "time" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" ) /* - -TODO: Basic Tests: -√ Place when there is nothing in the cluster -√ Place remainder when there is some in the cluster -√ Scale down from n to n-m where n != m -√ Scale down from n to zero -√ Inplace upgrade test -√ Inplace upgrade and scale up test -√ Inplace upgrade and scale down test -√ Destructive upgrade -√ Destructive upgrade and scale up test -√ Destructive upgrade and scale down test -√ Handle lost nodes -√ Handle lost nodes and scale up -√ Handle lost nodes and scale down -√ Handle draining nodes -√ Handle draining nodes and scale up -√ Handle draining nodes and scale down -√ Handle task group being removed -√ Handle job being stopped both as .Stopped and nil -√ Place more that one group - -Canary Tests: -- Stopped job cancels any existing deployment -- JobIndex change cancels any existing deployment -- Creates a deployment if any group has an update strategy -- Paused deployment doesn't create any more canaries -- Paused deployment doesn't do any placements -- Paused deployment doesn't do destructive updates -- Paused deployment does do migrations -- Canary that is on a tainted node -- Stop old canaries -- Create new canaries on job change -- Create new canaries on job change while scaling up -- Create new canaries on job change while scaling down -- Fill canaries if partial placement -- Promote canaries unblocks max_parallel -- Failed deployment should not place anything +√ Place when there is nothing in the cluster +√ Place remainder when there is some in the cluster +√ Scale down from n to n-m where n != m +√ Scale down from n to zero +√ Inplace upgrade test +√ Inplace upgrade and scale up test +√ Inplace upgrade and scale down test +√ Destructive upgrade +√ Destructive upgrade and scale up test +√ Destructive upgrade and scale down test +√ Handle lost nodes +√ Handle lost nodes and scale up +√ Handle lost nodes and scale down +√ Handle draining nodes +√ Handle draining nodes and scale up +√ Handle draining nodes and scale down +√ Handle task group being removed +√ Handle job being stopped both as .Stopped and nil +√ Place more that one group +Deployment Tests: +√ Stopped job cancels any active deployment +√ Stopped job doesn't cancel terminal deployment +√ JobIndex change cancels any active deployment +√ JobIndex change doens't cancels any terminal deployment +√ Destructive changes create deployment and get rolled out via max_parallelism +- Don't create a deployment if there are no changes +- Deployment created by all inplace updates +√ Paused or failed deployment doesn't create any more canaries +√ Paused or failed deployment doesn't do any placements +√ Paused or failed deployment doesn't do destructive updates +√ Paused does do migrations +√ Failed deployment doesn't do migrations +√ Canary that is on a draining node +√ Canary that is on a lost node +√ Stop old canaries +√ Create new canaries on job change +√ Create new canaries on job change while scaling up +√ Create new canaries on job change while scaling down +√ Fill canaries if partial placement +√ Promote canaries unblocks max_parallel +√ Promote canaries when canaries == count +√ Only place as many as are healthy in deployment +√ Limit calculation accounts for healthy allocs on migrating/lost nodes +√ Failed deployment should not place anything +- Run after canaries have been promoted, new allocs have been rolled out and there is no deployment */ +var ( + canaryUpdate = &structs.UpdateStrategy{ + Canary: 2, + MaxParallel: 2, + HealthCheck: structs.UpdateStrategyHealthCheck_Checks, + MinHealthyTime: 10 * time.Second, + HealthyDeadline: 10 * time.Minute, + } + + noCanaryUpdate = &structs.UpdateStrategy{ + MaxParallel: 4, + HealthCheck: structs.UpdateStrategyHealthCheck_Checks, + MinHealthyTime: 10 * time.Second, + HealthyDeadline: 10 * time.Minute, + } +) + func testLogger() *log.Logger { return log.New(os.Stderr, "", log.LstdFlags) } @@ -84,6 +111,16 @@ func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *s return false, false, newAlloc } +func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { + return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { + if fn, ok := handled[existing.ID]; ok { + return fn(existing, newJob, newTG) + } + + return unhandled(existing, newJob, newTG) + } +} + var ( // AllocationIndexRegex is a regular expression to find the allocation index. allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") @@ -131,6 +168,38 @@ func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { } } +func assertNoCanariesStopped(t *testing.T, stop []allocStopResult) { + for _, s := range stop { + if s.alloc.Canary { + t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) + } + } +} + +func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { + names := make(map[string]struct{}, numPrevious) + + found := 0 + for _, p := range place { + if _, ok := names[p.name]; ok { + t.Fatalf("Name %q already placed", p.name) + } + names[p.name] = struct{}{} + + if p.previousAlloc == nil { + continue + } + + if act := p.previousAlloc.Name; p.name != act { + t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name) + } + found++ + } + if numPrevious != found { + t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found) + } +} + func intRange(pairs ...int) []int { if len(pairs)%2 != 0 { return nil @@ -171,7 +240,7 @@ func allocsToNames(allocs []*structs.Allocation) []string { type resultExpectation struct { createDeployment *structs.Deployment - deploymentUpdates int + deploymentUpdates []*structs.DeploymentStatusUpdate place int inplace int stop int @@ -184,12 +253,16 @@ func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { t.Fatalf("Expect a created deployment got none") } else if exp.createDeployment == nil && r.createDeployment != nil { t.Fatalf("Expect no created deployment; got %v", r.createDeployment) - } else if !reflect.DeepEqual(r.createDeployment, exp.createDeployment) { - t.Fatalf("Unexpected createdDeployment: %v", pretty.Diff(r.createDeployment, exp.createDeployment)) + } else if exp.createDeployment != nil && r.createDeployment != nil { + // Clear the deployment ID + r.createDeployment.ID, exp.createDeployment.ID = "", "" + if !reflect.DeepEqual(r.createDeployment, exp.createDeployment) { + t.Fatalf("Unexpected createdDeployment: %v", pretty.Diff(r.createDeployment, exp.createDeployment)) + } } - if l := len(r.deploymentUpdates); l != exp.deploymentUpdates { - t.Fatalf("Expect %d deployment updates; got %v", exp.deploymentUpdates, r.deploymentUpdates) + if !reflect.DeepEqual(r.deploymentUpdates, exp.deploymentUpdates) { + t.Fatalf("Unexpected deploymentUpdates: %v", pretty.Diff(r.deploymentUpdates, exp.deploymentUpdates)) } if l := len(r.place); l != exp.place { t.Fatalf("Expected %d placements; got %d", exp.place, l) @@ -227,7 +300,7 @@ func TestReconciler_Place_NoExisting(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 10, inplace: 0, stop: 0, @@ -263,7 +336,7 @@ func TestReconciler_Place_Existing(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 5, inplace: 0, stop: 0, @@ -301,7 +374,7 @@ func TestReconciler_ScaleDown_Partial(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 0, inplace: 0, stop: 10, @@ -340,7 +413,7 @@ func TestReconciler_ScaleDown_Zero(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 0, inplace: 0, stop: 20, @@ -375,7 +448,7 @@ func TestReconciler_Inplace(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 0, inplace: 10, stop: 0, @@ -413,7 +486,7 @@ func TestReconciler_Inplace_ScaleUp(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 5, inplace: 10, stop: 0, @@ -453,7 +526,7 @@ func TestReconciler_Inplace_ScaleDown(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 0, inplace: 5, stop: 5, @@ -490,7 +563,7 @@ func TestReconciler_Destructive(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 10, inplace: 0, stop: 10, @@ -503,6 +576,7 @@ func TestReconciler_Destructive(t *testing.T) { assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) + assertPlaceResultsHavePreviousAllocs(t, 10, r.place) } // Tests the reconciler properly handles destructive upgrading allocations while @@ -529,7 +603,7 @@ func TestReconciler_Destructive_ScaleUp(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 15, inplace: 0, stop: 10, @@ -543,6 +617,7 @@ func TestReconciler_Destructive_ScaleUp(t *testing.T) { assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) assertNamesHaveIndexes(t, intRange(0, 14), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 10, r.place) } // Tests the reconciler properly handles destructive upgrading allocations while @@ -569,7 +644,7 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 5, inplace: 0, stop: 10, @@ -583,6 +658,7 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) { assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) assertNamesHaveIndexes(t, intRange(0, 4), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 5, r.place) } // Tests the reconciler properly handles lost nodes with allocations @@ -615,13 +691,14 @@ func TestReconciler_LostNode(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 2, inplace: 0, stop: 2, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 2, + Stop: 2, Ignore: 8, }, }, @@ -664,13 +741,14 @@ func TestReconciler_LostNode_ScaleUp(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 7, inplace: 0, stop: 2, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 7, + Stop: 2, Ignore: 8, }, }, @@ -713,7 +791,7 @@ func TestReconciler_LostNode_ScaleDown(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 0, inplace: 0, stop: 5, @@ -758,7 +836,7 @@ func TestReconciler_DrainNode(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 2, inplace: 0, stop: 2, @@ -772,6 +850,7 @@ func TestReconciler_DrainNode(t *testing.T) { assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.place) } // Tests the reconciler properly handles draining nodes with allocations while @@ -807,7 +886,7 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 7, inplace: 0, stop: 2, @@ -822,6 +901,7 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) { assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.place) } // Tests the reconciler properly handles draining nodes with allocations while @@ -857,7 +937,7 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 1, inplace: 0, stop: 3, @@ -872,6 +952,7 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) { assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.place) } // Tests the reconciler properly handles a task group being removed @@ -899,7 +980,7 @@ func TestReconciler_RemovedTG(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 10, inplace: 0, stop: 10, @@ -943,7 +1024,7 @@ func TestReconciler_JobStopped(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - // Create 10 allocations for a tg that no longer exists + // Create 10 allocations var allocs []*structs.Allocation for i := 0; i < 10; i++ { alloc := mock.Alloc() @@ -961,7 +1042,7 @@ func TestReconciler_JobStopped(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 0, inplace: 0, stop: 10, @@ -1001,7 +1082,7 @@ func TestReconciler_MultiTG(t *testing.T) { // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, - deploymentUpdates: 0, + deploymentUpdates: nil, place: 18, inplace: 0, stop: 0, @@ -1018,3 +1099,1387 @@ func TestReconciler_MultiTG(t *testing.T) { assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) } + +// Tests the reconciler cancels an old deployment when the job is being stopped +func TestReconciler_CancelDeployment_JobStop(t *testing.T) { + job := mock.Job() + job.Stop = true + + running := structs.NewDeployment(job) + failed := structs.NewDeployment(job) + failed.Status = structs.DeploymentStatusFailed + + cases := []struct { + name string + job *structs.Job + jobID, taskGroup string + deployment *structs.Deployment + cancel bool + }{ + { + name: "stopped job, running deployment", + job: job, + jobID: job.ID, + taskGroup: job.TaskGroups[0].Name, + deployment: running, + cancel: true, + }, + { + name: "nil job, running deployment", + job: nil, + jobID: "foo", + taskGroup: "bar", + deployment: running, + cancel: true, + }, + { + name: "stopped job, failed deployment", + job: job, + jobID: job.ID, + taskGroup: job.TaskGroups[0].Name, + deployment: failed, + cancel: false, + }, + { + name: "nil job, failed deployment", + job: nil, + jobID: "foo", + taskGroup: "bar", + deployment: failed, + cancel: false, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Create 10 allocations + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = c.job + alloc.JobID = c.jobID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) + alloc.TaskGroup = c.taskGroup + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil) + r := reconciler.Compute() + + var updates []*structs.DeploymentStatusUpdate + if c.cancel { + updates = []*structs.DeploymentStatusUpdate{ + { + DeploymentID: c.deployment.ID, + Status: structs.DeploymentStatusCancelled, + StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, + }, + } + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: updates, + place: 0, + inplace: 0, + stop: 10, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + c.taskGroup: { + Stop: 10, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) + }) + } +} + +// Tests the reconciler cancels an old deployment when the job is updated +func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { + // Create a base job + job := mock.Job() + + // Create two deployments + running := structs.NewDeployment(job) + failed := structs.NewDeployment(job) + failed.Status = structs.DeploymentStatusFailed + + // Make the job newer than the deployment + job.JobModifyIndex += 10 + + cases := []struct { + name string + deployment *structs.Deployment + cancel bool + }{ + { + name: "running deployment", + deployment: running, + cancel: true, + }, + { + name: "failed deployment", + deployment: failed, + cancel: false, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Create 10 allocations + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil) + r := reconciler.Compute() + + var updates []*structs.DeploymentStatusUpdate + if c.cancel { + updates = []*structs.DeploymentStatusUpdate{ + { + DeploymentID: c.deployment.ID, + Status: structs.DeploymentStatusCancelled, + StatusDescription: structs.DeploymentStatusDescriptionNewerJob, + }, + } + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: updates, + place: 0, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Ignore: 10, + }, + }, + }) + }) + } +} + +// Tests the reconciler creates a deployment and does a rolling upgrade +func TestReconciler_CreateDeployment_RollingUpgrade(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + DesiredTotal: 10, + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: d, + deploymentUpdates: nil, + place: 4, + inplace: 0, + stop: 4, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + DestructiveUpdate: 4, + Ignore: 6, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 3), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 3), stopResultsToNames(r.stop)) +} + +// Tests the reconciler doesn't place any more canaries when the deployment is +// paused or failed +func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + cases := []struct { + name string + deploymentStatus string + }{ + { + name: "paused deployment", + deploymentStatus: structs.DeploymentStatusPaused, + }, + { + name: "failed deployment", + deploymentStatus: structs.DeploymentStatusFailed, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Create a deployment that is paused and has placed some canaries + d := structs.NewDeployment(job) + d.Status = c.deploymentStatus + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredCanaries: 2, + DesiredTotal: 10, + PlacedAllocs: 1, + } + + // Create 10 allocations for the original job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + allocs = append(allocs, canary) + + mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Ignore: 11, + }, + }, + }) + }) + } +} + +// Tests the reconciler doesn't place any more allocs when the deployment is +// paused or failed +func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + job.TaskGroups[0].Count = 15 + + cases := []struct { + name string + deploymentStatus string + }{ + { + name: "paused deployment", + deploymentStatus: structs.DeploymentStatusPaused, + }, + { + name: "failed deployment", + deploymentStatus: structs.DeploymentStatusFailed, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Create a deployment that is paused and has placed some canaries + d := structs.NewDeployment(job) + d.Status = c.deploymentStatus + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 15, + PlacedAllocs: 10, + } + + // Create 10 allocations for the new job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Ignore: 10, + }, + }, + }) + }) + } +} + +// Tests the reconciler doesn't do any more destructive updates when the +// deployment is paused or failed +func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + + cases := []struct { + name string + deploymentStatus string + }{ + { + name: "paused deployment", + deploymentStatus: structs.DeploymentStatusPaused, + }, + { + name: "failed deployment", + deploymentStatus: structs.DeploymentStatusFailed, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Create a deployment that is paused and has placed some canaries + d := structs.NewDeployment(job) + d.Status = c.deploymentStatus + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 10, + PlacedAllocs: 1, + } + + // Create 9 allocations for the original job + var allocs []*structs.Allocation + for i := 1; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create one for the new job + newAlloc := mock.Alloc() + newAlloc.Job = job + newAlloc.JobID = job.ID + newAlloc.NodeID = structs.GenerateUUID() + newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) + newAlloc.TaskGroup = job.TaskGroups[0].Name + newAlloc.DeploymentID = d.ID + allocs = append(allocs, newAlloc) + + mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Ignore: 10, + }, + }, + }) + }) + } +} + +// Tests the reconciler handles migrations correctly when a deployment is paused +// or failed +func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + + cases := []struct { + name string + deploymentStatus string + place int + stop int + ignoreAnnotation uint64 + migrateAnnotation uint64 + stopAnnotation uint64 + }{ + { + name: "paused deployment", + deploymentStatus: structs.DeploymentStatusPaused, + place: 3, + stop: 3, + ignoreAnnotation: 5, + migrateAnnotation: 3, + }, + { + name: "failed deployment", + deploymentStatus: structs.DeploymentStatusFailed, + place: 0, + stop: 3, + ignoreAnnotation: 5, + migrateAnnotation: 0, + stopAnnotation: 3, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // Create a deployment that is paused and has placed some canaries + d := structs.NewDeployment(job) + d.Status = c.deploymentStatus + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 10, + PlacedAllocs: 8, + } + + // Create 8 allocations in the deployment + var allocs []*structs.Allocation + for i := 0; i < 8; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.DeploymentID = d.ID + allocs = append(allocs, alloc) + } + + // Build a map of tainted nodes + tainted := make(map[string]*structs.Node, 3) + for i := 0; i < 3; i++ { + n := mock.Node() + n.ID = allocs[i].NodeID + n.Drain = true + tainted[n.ID] = n + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, tainted) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: c.place, + inplace: 0, + stop: c.stop, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Migrate: c.migrateAnnotation, + Ignore: c.ignoreAnnotation, + Stop: c.stopAnnotation, + }, + }, + }) + }) + } +} + +// Tests the reconciler handles migrating a canary correctly on a draining node +func TestReconciler_DrainNode_Canary(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + // Create a deployment that is paused and has placed some canaries + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 10, + DesiredCanaries: 2, + PlacedAllocs: 2, + } + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create two canaries for the new job + handled := make(map[string]allocUpdateType) + for i := 0; i < 2; i++ { + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + allocs = append(allocs, canary) + handled[canary.ID] = allocUpdateFnIgnore + } + + // Build a map of tainted nodes that contains the last canary + tainted := make(map[string]*structs.Node, 1) + n := mock.Node() + n.ID = allocs[11].NodeID + n.Drain = true + tainted[n.ID] = n + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 1, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 1, + Ignore: 11, + }, + }, + }) + assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) +} + +// Tests the reconciler handles migrating a canary correctly on a lost node +func TestReconciler_LostNode_Canary(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + // Create a deployment that is paused and has placed some canaries + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 10, + DesiredCanaries: 2, + PlacedAllocs: 2, + } + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create two canaries for the new job + handled := make(map[string]allocUpdateType) + for i := 0; i < 2; i++ { + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + allocs = append(allocs, canary) + handled[canary.ID] = allocUpdateFnIgnore + } + + // Build a map of tainted nodes that contains the last canary + tainted := make(map[string]*structs.Node, 1) + n := mock.Node() + n.ID = allocs[11].NodeID + n.Status = structs.NodeStatusDown + tainted[n.ID] = n + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 1, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 1, + Ignore: 11, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) +} + +// Tests the reconciler handles stopping canaries from older deployments +func TestReconciler_StopOldCanaries(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + // Create an old deployment that has placed some canaries + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 10, + DesiredCanaries: 2, + PlacedAllocs: 2, + } + + // Update the job + job.JobModifyIndex += 10 + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create canaries + for i := 0; i < 2; i++ { + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + allocs = append(allocs, canary) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + newD := structs.NewDeployment(job) + newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + DesiredCanaries: 2, + DesiredTotal: 10, + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: newD, + deploymentUpdates: []*structs.DeploymentStatusUpdate{ + { + DeploymentID: d.ID, + Status: structs.DeploymentStatusCancelled, + StatusDescription: structs.DeploymentStatusDescriptionNewerJob, + }, + }, + place: 2, + inplace: 0, + stop: 2, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 2, + Stop: 2, + Ignore: 10, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) +} + +// Tests the reconciler creates new canaries when the job changes +func TestReconciler_NewCanaries(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + newD := structs.NewDeployment(job) + newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + DesiredCanaries: 2, + DesiredTotal: 10, + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: newD, + deploymentUpdates: nil, + place: 2, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 2, + Ignore: 10, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) +} + +// Tests the reconciler creates new canaries when the job changes and scales up +func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { + // Scale the job up to 15 + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + job.TaskGroups[0].Count = 15 + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + newD := structs.NewDeployment(job) + newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + DesiredCanaries: 2, + DesiredTotal: 15, + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: newD, + deploymentUpdates: nil, + place: 2, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 2, + Ignore: 10, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) +} + +// Tests the reconciler creates new canaries when the job changes and scales +// down +func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { + // Scale the job down to 5 + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + job.TaskGroups[0].Count = 5 + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + newD := structs.NewDeployment(job) + newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + DesiredCanaries: 2, + DesiredTotal: 5, + } + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: newD, + deploymentUpdates: nil, + place: 2, + inplace: 0, + stop: 5, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 2, + Stop: 5, + Ignore: 5, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) +} + +// Tests the reconciler handles filling the names of partially placed canaries +func TestReconciler_NewCanaries_FillNames(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = &structs.UpdateStrategy{ + Canary: 4, + MaxParallel: 2, + HealthCheck: structs.UpdateStrategyHealthCheck_Checks, + MinHealthyTime: 10 * time.Second, + HealthyDeadline: 10 * time.Minute, + } + + // Create an existing deployment that has placed some canaries + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: false, + DesiredTotal: 10, + DesiredCanaries: 4, + PlacedAllocs: 2, + } + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create canaries but pick names at the ends + for i := 0; i < 4; i += 3 { + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + allocs = append(allocs, canary) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 2, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Canary: 2, + Ignore: 12, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) +} + +// Tests the reconciler handles canary promotion by unblocking max_parallel +func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + // Create an existing deployment that has placed some canaries and mark them + // promoted + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: true, + DesiredTotal: 10, + DesiredCanaries: 2, + PlacedAllocs: 2, + } + + // Create 10 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create the canaries + handled := make(map[string]allocUpdateType) + for i := 0; i < 2; i++ { + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + canary.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + allocs = append(allocs, canary) + handled[canary.ID] = allocUpdateFnIgnore + } + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 2, + inplace: 0, + stop: 4, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Stop: 2, + DestructiveUpdate: 2, + Ignore: 8, + }, + }, + }) + + assertNoCanariesStopped(t, r.stop) + assertNamesHaveIndexes(t, intRange(2, 3), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 3), stopResultsToNames(r.stop)) +} + +// Tests the reconciler handles canary promotion when the canary count equals +// the total correctly +func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + job.TaskGroups[0].Count = 2 + + // Create an existing deployment that has placed some canaries and mark them + // promoted + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: true, + DesiredTotal: 2, + DesiredCanaries: 2, + PlacedAllocs: 2, + } + + // Create 2 allocations from the old job + var allocs []*structs.Allocation + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create the canaries + handled := make(map[string]allocUpdateType) + for i := 0; i < 2; i++ { + // Create one canary + canary := mock.Alloc() + canary.Job = job + canary.JobID = job.ID + canary.NodeID = structs.GenerateUUID() + canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + canary.TaskGroup = job.TaskGroups[0].Name + canary.Canary = true + canary.DeploymentID = d.ID + canary.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + allocs = append(allocs, canary) + handled[canary.ID] = allocUpdateFnIgnore + } + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 0, + stop: 2, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Stop: 2, + Ignore: 2, + }, + }, + }) + + assertNoCanariesStopped(t, r.stop) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) +} + +// Tests the reconciler checks the health of placed allocs to determine the +// limit +func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + + cases := []struct { + healthy int + }{ + { + healthy: 0, + }, + { + healthy: 1, + }, + { + healthy: 2, + }, + { + healthy: 3, + }, + { + healthy: 4, + }, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) { + // Create an existing deployment that has placed some canaries and mark them + // promoted + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: true, + DesiredTotal: 10, + PlacedAllocs: 4, + } + + // Create 6 allocations from the old job + var allocs []*structs.Allocation + for i := 4; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create the new allocs + handled := make(map[string]allocUpdateType) + for i := 0; i < 4; i++ { + new := mock.Alloc() + new.Job = job + new.JobID = job.ID + new.NodeID = structs.GenerateUUID() + new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + new.TaskGroup = job.TaskGroups[0].Name + new.DeploymentID = d.ID + if i < c.healthy { + new.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + } + allocs = append(allocs, new) + handled[new.ID] = allocUpdateFnIgnore + } + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: c.healthy, + inplace: 0, + stop: c.healthy, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + DestructiveUpdate: uint64(c.healthy), + Ignore: uint64(10 - c.healthy), + }, + }, + }) + + if c.healthy != 0 { + assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), stopResultsToNames(r.stop)) + } + }) + } +} + +// Tests the reconciler handles an alloc on a tainted node during a rolling +// update +func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + + // Create an existing deployment that has some placed allocs + d := structs.NewDeployment(job) + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: true, + DesiredTotal: 10, + PlacedAllocs: 4, + } + + // Create 6 allocations from the old job + var allocs []*structs.Allocation + for i := 4; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create the healthy replacements + handled := make(map[string]allocUpdateType) + for i := 0; i < 4; i++ { + new := mock.Alloc() + new.Job = job + new.JobID = job.ID + new.NodeID = structs.GenerateUUID() + new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + new.TaskGroup = job.TaskGroups[0].Name + new.DeploymentID = d.ID + new.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + allocs = append(allocs, new) + handled[new.ID] = allocUpdateFnIgnore + } + + // Build a map of tainted nodes + tainted := make(map[string]*structs.Node, 2) + for i := 0; i < 2; i++ { + n := mock.Node() + n.ID = allocs[6+i].NodeID + if i == 0 { + n.Status = structs.NodeStatusDown + } else { + n.Drain = true + } + tainted[n.ID] = n + } + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 6, + inplace: 0, + stop: 6, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, // Place the lost + Stop: 1, // Stop the lost + Migrate: 1, // Migrate the tainted + DestructiveUpdate: 4, + Ignore: 4, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 1, 4, 7), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1, 4, 7), stopResultsToNames(r.stop)) +} + +// Tests the reconciler handles a failed deployment and does no placements +func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = noCanaryUpdate + + // Create an existing failed deployment that has some placed allocs + d := structs.NewDeployment(job) + d.Status = structs.DeploymentStatusFailed + d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ + Promoted: true, + DesiredTotal: 10, + PlacedAllocs: 4, + } + + // Create 6 allocations from the old job + var allocs []*structs.Allocation + for i := 4; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + allocs = append(allocs, alloc) + } + + // Create the healthy replacements + handled := make(map[string]allocUpdateType) + for i := 0; i < 4; i++ { + new := mock.Alloc() + new.Job = job + new.JobID = job.ID + new.NodeID = structs.GenerateUUID() + new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + new.TaskGroup = job.TaskGroups[0].Name + new.DeploymentID = d.ID + new.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + allocs = append(allocs, new) + handled[new.ID] = allocUpdateFnIgnore + } + + // Build a map of tainted nodes + tainted := make(map[string]*structs.Node, 2) + for i := 0; i < 2; i++ { + n := mock.Node() + n.ID = allocs[6+i].NodeID + if i == 0 { + n.Status = structs.NodeStatusDown + } else { + n.Drain = true + } + tainted[n.ID] = n + } + + mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) + reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 0, + stop: 2, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Stop: 2, + Ignore: 8, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) +} + +// Tests the reconciler handles a run after a deployment is complete +// successfully. +func TestReconciler_CompleteDeployment(t *testing.T) { + job := mock.Job() + job.TaskGroups[0].Update = canaryUpdate + + // Create allocations from the old job + dID := structs.GenerateUUID() + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = structs.GenerateUUID() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.DeploymentID = dID + if i < 2 { + alloc.Canary = true + } + allocs = append(allocs, alloc) + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Ignore: 10, + }, + }, + }) +} diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 295a40e1a..c27872504 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -2,6 +2,8 @@ package scheduler import ( "fmt" + "sort" + "strings" "github.com/hashicorp/nomad/nomad/structs" ) @@ -54,10 +56,30 @@ func (a allocSet) GoString() string { } start := fmt.Sprintf("len(%d) [\n", len(a)) - for k := range a { - start += k + ",\n" + var s []string + for k, v := range a { + s = append(s, fmt.Sprintf("%q: %v", k, v.Name)) } - return start + "]" + return start + strings.Join(s, "\n") + "]" +} + +func (a allocSet) nameSet() map[string]struct{} { + names := make(map[string]struct{}, len(a)) + for _, alloc := range a { + names[alloc.Name] = struct{}{} + } + return names +} + +func (a allocSet) nameOrder() []*structs.Allocation { + allocs := make([]*structs.Allocation, 0, len(a)) + for _, alloc := range a { + allocs = append(allocs, alloc) + } + sort.Slice(allocs, func(i, j int) bool { + return allocs[i].Index() < allocs[j].Index() + }) + return allocs } // difference returns a new allocSet that has all the existing item except those