diff --git a/ci/test-core.json b/ci/test-core.json index 5ec461809..9fb12cf7f 100644 --- a/ci/test-core.json +++ b/ci/test-core.json @@ -46,6 +46,7 @@ "nomad/volumewatcher/...", "plugins/...", "scheduler/...", + "scheduler/reconcile/...", "testutil/..." ] } diff --git a/scheduler/README.md b/scheduler/README.md index 662b25dc5..de80c0956 100644 --- a/scheduler/README.md +++ b/scheduler/README.md @@ -30,19 +30,21 @@ in more detail: +--------------+ +-----------+ +-------------+ +----------+ ``` -## Cluster reconciliation +## Reconciliation The first step for the service and bach job scheduler is called -"reconciliation," and its logic lies in `scheduler/reconcile.go` file. The -`allocReconciler` object has one public method: `Compute`, which takes no -arguments and returns `reconcileResults` object. This results object tells the -scheduler about desired deployment to be updated or created, which allocations -to place, which should be updated destructively or in-place, which should be -stopped, and which are disconnected or are reconnecting. The reconciler works -in terms of "buckets," that is, it processes allocations by putting them into -different sets, and that's how its whole logic is implemented. +"reconciliation," and its logic lies in the `scheduler/reconciler` package. +There are two reconcilers: `AllocReconciler` object for service and batch jobs, +and `Node` reconciler used by system and sysbatch jobs. -The following vocabulary is used by the reconciler: +Both reconciler's task is to tell the scheduler about desired allocations or +deployments to be updated or created, which should be updated destructively or +in-place, which should be stopped, and which are disconnected or are +reconnecting. The reconciler works in terms of "buckets," that is, it processes +allocations by putting them into different sets, and that's how its whole logic +is implemented. + +The following vocabulary is used by the reconcilers: - "tainted node:" a node is considered "tainted" if allocations must be migrated off of it. These are nodes that are draining or have been drained, but also @@ -74,6 +76,8 @@ status, but also when it's pending or initializing. - "expiring allocations:" allocations which are not possible to reschedule, due to lost configurations of their disconnected clients. +### Cluster Reconciler + The following diagram illustrates the logic flow of the cluster reconciler: ``` @@ -161,6 +165,10 @@ The following diagram illustrates the logic flow of the cluster reconciler: +---------------+ ``` +### Node Reconciler + +TODO + ## Feasibility checking Nomad uses a set of iterators to iterate over nodes and check how feasible @@ -170,7 +178,7 @@ feasibility iterators that live in `scheduler/feasible.go` to filter by: - node eligibiligy, - data center, -- and node pool. +- and node pool. Once nodes are filtered, the `Stack` implementations (`GenericStack` and `SystemStack`) check for: diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index f5ce54f30..e1fc9e3a5 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -16,6 +16,8 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler/reconcile" + sstructs "github.com/hashicorp/nomad/scheduler/structs" ) const ( @@ -27,52 +29,6 @@ const ( // we will attempt to schedule if we continue to hit conflicts for batch. maxBatchScheduleAttempts = 2 - // allocNotNeeded is the status used when a job no longer requires an allocation - allocNotNeeded = "alloc not needed due to job update" - - // allocReconnected is the status to use when a replacement allocation is stopped - // because a disconnected node reconnects. - allocReconnected = "alloc not needed due to disconnected client reconnect" - - // allocMigrating is the status used when we must migrate an allocation - allocMigrating = "alloc is being migrated" - - // allocUpdating is the status used when a job requires an update - allocUpdating = "alloc is being updated due to job update" - - // allocLost is the status used when an allocation is lost - allocLost = "alloc is lost since its node is down" - - // allocUnknown is the status used when an allocation is unknown - allocUnknown = "alloc is unknown since its node is disconnected" - - // allocInPlace is the status used when speculating on an in-place update - allocInPlace = "alloc updating in-place" - - // allocNodeTainted is the status used when stopping an alloc because its - // node is tainted. - allocNodeTainted = "alloc not needed as node is tainted" - - // allocRescheduled is the status used when an allocation failed and was rescheduled - allocRescheduled = "alloc was rescheduled because it failed" - - // blockedEvalMaxPlanDesc is the description used for blocked evals that are - // a result of hitting the max number of plan attempts - blockedEvalMaxPlanDesc = "created due to placement conflicts" - - // blockedEvalFailedPlacements is the description used for blocked evals - // that are a result of failing to place all allocations. - blockedEvalFailedPlacements = "created to place remaining allocations" - - // reschedulingFollowupEvalDesc is the description used when creating follow - // up evals for delayed rescheduling - reschedulingFollowupEvalDesc = "created for delayed rescheduling" - - // disconnectTimeoutFollowupEvalDesc is the description used when creating follow - // up evals for allocations that be should be stopped after its disconnect - // timeout has passed. - disconnectTimeoutFollowupEvalDesc = "created for delayed disconnect timeout" - // maxPastRescheduleEvents is the maximum number of past reschedule event // that we track when unlimited rescheduling is enabled maxPastRescheduleEvents = 5 @@ -235,9 +191,9 @@ func (s *GenericScheduler) createBlockedEval(planFailure bool) error { s.blocked = s.eval.CreateBlockedEval(classEligibility, escaped, e.QuotaLimitReached(), s.failedTGAllocs) if planFailure { s.blocked.TriggeredBy = structs.EvalTriggerMaxPlans - s.blocked.StatusDescription = blockedEvalMaxPlanDesc + s.blocked.StatusDescription = sstructs.DescBlockedEvalMaxPlan } else { - s.blocked.StatusDescription = blockedEvalFailedPlacements + s.blocked.StatusDescription = sstructs.DescBlockedEvalFailedPlacements } return s.planner.CreateEval(s.blocked) @@ -381,54 +337,53 @@ func (s *GenericScheduler) computeJobAllocs() error { // nodes to lost, but only if the scheduler has already marked them updateNonTerminalAllocsToLost(s.plan, tainted, allocs) - reconciler := NewAllocReconciler(s.logger, + r := reconcile.NewAllocReconciler(s.logger, genericAllocUpdateFn(s.ctx, s.stack, s.eval.ID), s.batch, s.eval.JobID, s.job, s.deployment, allocs, tainted, s.eval.ID, s.eval.Priority, s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true)) - - results := reconciler.Compute() - s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", results)) + r.Compute() + s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", r.Result)) if s.eval.AnnotatePlan { s.plan.Annotations = &structs.PlanAnnotations{ - DesiredTGUpdates: results.desiredTGUpdates, + DesiredTGUpdates: r.Result.DesiredTGUpdates, } } // Add the deployment changes to the plan - s.plan.Deployment = results.deployment - s.plan.DeploymentUpdates = results.deploymentUpdates + s.plan.Deployment = r.Result.Deployment + s.plan.DeploymentUpdates = r.Result.DeploymentUpdates // Store all the follow up evaluations from rescheduled allocations - if len(results.desiredFollowupEvals) > 0 { - for _, evals := range results.desiredFollowupEvals { + if len(r.Result.DesiredFollowupEvals) > 0 { + for _, evals := range r.Result.DesiredFollowupEvals { s.followUpEvals = append(s.followUpEvals, evals...) } } // Update the stored deployment - if results.deployment != nil { - s.deployment = results.deployment + if r.Result.Deployment != nil { + s.deployment = r.Result.Deployment } // Handle the stop - for _, stop := range results.stop { - s.plan.AppendStoppedAlloc(stop.alloc, stop.statusDescription, stop.clientStatus, stop.followupEvalID) + for _, stop := range r.Result.Stop { + s.plan.AppendStoppedAlloc(stop.Alloc, stop.StatusDescription, stop.ClientStatus, stop.FollowupEvalID) } // Handle disconnect updates - for _, update := range results.disconnectUpdates { + for _, update := range r.Result.DisconnectUpdates { s.plan.AppendUnknownAlloc(update) } // Handle reconnect updates. // Reconnected allocs have a new AllocState entry. - for _, update := range results.reconnectUpdates { + for _, update := range r.Result.ReconnectUpdates { s.ctx.Plan().AppendAlloc(update, nil) } // Handle the in-place updates - for _, update := range results.inplaceUpdate { + for _, update := range r.Result.InplaceUpdate { if update.DeploymentID != s.deployment.GetID() { update.DeploymentID = s.deployment.GetID() update.DeploymentStatus = nil @@ -437,12 +392,12 @@ func (s *GenericScheduler) computeJobAllocs() error { } // Handle the annotation updates - for _, update := range results.attributeUpdates { + for _, update := range r.Result.AttributeUpdates { s.ctx.Plan().AppendAlloc(update, nil) } // Nothing remaining to do if placement is not required - if len(results.place)+len(results.destructiveUpdate) == 0 { + if len(r.Result.Place)+len(r.Result.DestructiveUpdate) == 0 { // If the job has been purged we don't have access to the job. Otherwise // set the queued allocs to zero. This is true if the job is being // stopped as well. @@ -455,23 +410,23 @@ func (s *GenericScheduler) computeJobAllocs() error { } // Compute the placements - place := make([]placementResult, 0, len(results.place)) - for _, p := range results.place { - s.queuedAllocs[p.taskGroup.Name] += 1 + place := make([]reconcile.PlacementResult, 0, len(r.Result.Place)) + for _, p := range r.Result.Place { + s.queuedAllocs[p.TaskGroup().Name] += 1 place = append(place, p) } - destructive := make([]placementResult, 0, len(results.destructiveUpdate)) - for _, p := range results.destructiveUpdate { - s.queuedAllocs[p.placeTaskGroup.Name] += 1 + destructive := make([]reconcile.PlacementResult, 0, len(r.Result.DestructiveUpdate)) + for _, p := range r.Result.DestructiveUpdate { + s.queuedAllocs[p.TaskGroup().Name] += 1 destructive = append(destructive, p) } - return s.computePlacements(destructive, place, results.taskGroupAllocNameIndexes) + return s.computePlacements(destructive, place, r.Result.TaskGroupAllocNameIndexes) } // downgradedJobForPlacement returns the previous stable version of the job for // downgrading a placement for non-canaries -func (s *GenericScheduler) downgradedJobForPlacement(p placementResult) (string, *structs.Job, error) { +func (s *GenericScheduler) downgradedJobForPlacement(p reconcile.PlacementResult) (string, *structs.Job, error) { ns, jobID := s.job.Namespace, s.job.ID tgName := p.TaskGroup().Name @@ -509,7 +464,9 @@ func (s *GenericScheduler) downgradedJobForPlacement(p placementResult) (string, // computePlacements computes placements for allocations. It is given the set of // destructive updates to place and the set of new placements to place. -func (s *GenericScheduler) computePlacements(destructive, place []placementResult, nameIndex map[string]*allocNameIndex) error { +func (s *GenericScheduler) computePlacements( + destructive, place []reconcile.PlacementResult, nameIndex map[string]*reconcile.AllocNameIndex, +) error { // Get the base nodes nodes, byDC, err := s.setNodes(s.job) @@ -528,7 +485,7 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul // Have to handle destructive changes first as we need to discount their // resources. To understand this imagine the resources were reduced and the // count was scaled up. - for _, results := range [][]placementResult{destructive, place} { + for _, results := range [][]reconcile.PlacementResult{destructive, place} { for _, missing := range results { // Get the task group tg := missing.TaskGroup() @@ -880,7 +837,7 @@ func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation } // findPreferredNode finds the preferred node for an allocation -func (s *GenericScheduler) findPreferredNode(place placementResult) (*structs.Node, error) { +func (s *GenericScheduler) findPreferredNode(place reconcile.PlacementResult) (*structs.Node, error) { prev := place.PreviousAllocation() if prev == nil { return nil, nil @@ -929,7 +886,7 @@ func (s *GenericScheduler) selectNextOption(tg *structs.TaskGroup, selectOptions } // handlePreemptions sets relevant preeemption related fields. -func (s *GenericScheduler) handlePreemptions(option *RankedNode, alloc *structs.Allocation, missing placementResult) { +func (s *GenericScheduler) handlePreemptions(option *RankedNode, alloc *structs.Allocation, missing reconcile.PlacementResult) { if option.PreemptedAllocs == nil { return } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 3f2e1570d..1984c6bda 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -18,6 +18,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler/reconcile" "github.com/shoenig/test" "github.com/shoenig/test/must" ) @@ -7048,9 +7049,8 @@ func TestDowngradedJobForPlacement_PicksTheLatest(t *testing.T) { sched.job = nj sched.deployment = deployment - placement := &allocPlaceResult{ - taskGroup: nj.TaskGroups[0], - } + placement := &reconcile.AllocPlaceResult{} + placement.SetTaskGroup(nj.TaskGroups[0]) // Here, assert the downgraded job version foundDeploymentID, foundJob, err := sched.downgradedJobForPlacement(placement) diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile/allocs.go similarity index 93% rename from scheduler/reconcile_util.go rename to scheduler/reconcile/allocs.go index c6e8398e2..acd4a372d 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile/allocs.go @@ -1,12 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package scheduler - -// The structs and helpers in this file are split out of reconciler.go for code -// manageability and should not be shared to the system schedulers! If you need -// something here for system/sysbatch jobs, double-check it's safe to use for -// all scheduler types before moving it into util.go +package reconcile import ( "errors" @@ -19,12 +14,15 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -// placementResult is an allocation that must be placed. It potentially has a +// This file contains various types and methods that are used for keeping track +// of allocations during reconciliation process. + +// PlacementResult is an allocation that must be placed. It potentially has a // previous allocation attached to it that should be stopped only if the // paired placement is complete. This gives an atomic place/stop behavior to // prevent an impossible resource ask as part of a rolling update to wipe the // job out. -type placementResult interface { +type PlacementResult interface { // TaskGroup returns the task group the placement is for TaskGroup() *structs.TaskGroup @@ -57,17 +55,17 @@ type placementResult interface { MinJobVersion() uint64 } -// allocStopResult contains the information required to stop a single allocation -type allocStopResult struct { - alloc *structs.Allocation - clientStatus string - statusDescription string - followupEvalID string +// AllocStopResult contains the information required to stop a single allocation +type AllocStopResult struct { + Alloc *structs.Allocation + ClientStatus string + StatusDescription string + FollowupEvalID string } -// allocPlaceResult contains the information required to place a single +// AllocPlaceResult contains the information required to place a single // allocation -type allocPlaceResult struct { +type AllocPlaceResult struct { name string canary bool taskGroup *structs.TaskGroup @@ -79,18 +77,19 @@ type allocPlaceResult struct { minJobVersion uint64 } -func (a allocPlaceResult) TaskGroup() *structs.TaskGroup { return a.taskGroup } -func (a allocPlaceResult) Name() string { return a.name } -func (a allocPlaceResult) Canary() bool { return a.canary } -func (a allocPlaceResult) PreviousAllocation() *structs.Allocation { return a.previousAlloc } -func (a allocPlaceResult) SetPreviousAllocation(alloc *structs.Allocation) { +func (a AllocPlaceResult) TaskGroup() *structs.TaskGroup { return a.taskGroup } +func (a AllocPlaceResult) Name() string { return a.name } +func (a AllocPlaceResult) Canary() bool { return a.canary } +func (a AllocPlaceResult) PreviousAllocation() *structs.Allocation { return a.previousAlloc } +func (a AllocPlaceResult) SetPreviousAllocation(alloc *structs.Allocation) { a.previousAlloc = alloc } -func (a allocPlaceResult) IsRescheduling() bool { return a.reschedule } -func (a allocPlaceResult) StopPreviousAlloc() (bool, string) { return false, "" } -func (a allocPlaceResult) DowngradeNonCanary() bool { return a.downgradeNonCanary } -func (a allocPlaceResult) MinJobVersion() uint64 { return a.minJobVersion } -func (a allocPlaceResult) PreviousLost() bool { return a.lost } +func (a AllocPlaceResult) IsRescheduling() bool { return a.reschedule } +func (a AllocPlaceResult) StopPreviousAlloc() (bool, string) { return false, "" } +func (a AllocPlaceResult) DowngradeNonCanary() bool { return a.downgradeNonCanary } +func (a AllocPlaceResult) MinJobVersion() uint64 { return a.minJobVersion } +func (a AllocPlaceResult) PreviousLost() bool { return a.lost } +func (a *AllocPlaceResult) SetTaskGroup(tg *structs.TaskGroup) { a.taskGroup = tg } // allocDestructiveResult contains the information required to do a destructive // update. Destructive changes should be applied atomically, as in the old alloc @@ -644,9 +643,9 @@ func (a allocSet) filterByClientStatus(clientStatus string) allocSet { return allocs } -// allocNameIndex is used to select allocation names for placement or removal +// AllocNameIndex is used to select allocation names for placement or removal // given an existing set of placed allocations. -type allocNameIndex struct { +type AllocNameIndex struct { job, taskGroup string count int b structs.Bitmap @@ -662,11 +661,11 @@ type allocNameIndex struct { // newAllocNameIndex returns an allocNameIndex for use in selecting names of // allocations to create or stop. It takes the job and task group name, desired // count and any existing allocations as input. -func newAllocNameIndex(job, taskGroup string, count int, in allocSet) *allocNameIndex { +func newAllocNameIndex(job, taskGroup string, count int, in allocSet) *AllocNameIndex { bitMap, duplicates := bitmapFrom(in, uint(count)) - return &allocNameIndex{ + return &AllocNameIndex{ count: count, b: bitMap, job: job, @@ -736,7 +735,7 @@ func bitmapFrom(input allocSet, minSize uint) (structs.Bitmap, map[uint]int) { // Highest removes and returns the highest n used names. The returned set // can be less than n if there aren't n names set in the index -func (a *allocNameIndex) Highest(n uint) map[string]struct{} { +func (a *AllocNameIndex) Highest(n uint) map[string]struct{} { h := make(map[string]struct{}, n) for i := a.b.Size(); i > uint(0) && uint(len(h)) < n; i-- { // Use this to avoid wrapping around b/c of the unsigned int @@ -752,13 +751,13 @@ func (a *allocNameIndex) Highest(n uint) map[string]struct{} { // IsDuplicate checks whether the passed allocation index is duplicated within // the tracking. -func (a *allocNameIndex) IsDuplicate(idx uint) bool { +func (a *AllocNameIndex) IsDuplicate(idx uint) bool { val, ok := a.duplicates[idx] return ok && val > 0 } // UnsetIndex unsets the index as having its name used -func (a *allocNameIndex) UnsetIndex(idx uint) { +func (a *AllocNameIndex) UnsetIndex(idx uint) { // If this index is a duplicate, remove the duplicate entry. Otherwise, we // can remove it from the bitmap tracking. @@ -773,7 +772,7 @@ func (a *allocNameIndex) UnsetIndex(idx uint) { // NextCanaries returns the next n names for use as canaries and sets them as // used. The existing canaries and destructive updates are also passed in. -func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) []string { +func (a *AllocNameIndex) NextCanaries(n uint, existing, destructive allocSet) []string { next := make([]string, 0, n) // Create a name index @@ -830,7 +829,7 @@ func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) [] // Next returns the next n names for use as new placements and sets them as // used. -func (a *allocNameIndex) Next(n uint) []string { +func (a *AllocNameIndex) Next(n uint) []string { next := make([]string, 0, n) // Get the set of unset names that can be used diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile/allocs_test.go similarity index 99% rename from scheduler/reconcile_util_test.go rename to scheduler/reconcile/allocs_test.go index f3f2a7a7e..3c293f232 100644 --- a/scheduler/reconcile_util_test.go +++ b/scheduler/reconcile/allocs_test.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package scheduler +package reconcile import ( "testing" @@ -1554,7 +1554,7 @@ func Test_allocNameIndex_Highest(t *testing.T) { testCases := []struct { name string - inputAllocNameIndex *allocNameIndex + inputAllocNameIndex *AllocNameIndex inputN uint expectedOutput map[string]struct{} }{ @@ -1651,7 +1651,7 @@ func Test_allocNameIndex_NextCanaries(t *testing.T) { testCases := []struct { name string - inputAllocNameIndex *allocNameIndex + inputAllocNameIndex *AllocNameIndex inputN uint inputExisting allocSet inputDestructive allocSet @@ -1716,7 +1716,7 @@ func Test_allocNameIndex_Next(t *testing.T) { testCases := []struct { name string - inputAllocNameIndex *allocNameIndex + inputAllocNameIndex *AllocNameIndex inputN uint expectedOutput []string }{ diff --git a/scheduler/reconcile.go b/scheduler/reconcile/reconcile_cluster.go similarity index 82% rename from scheduler/reconcile.go rename to scheduler/reconcile/reconcile_cluster.go index 7e210a5be..59cd97344 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile/reconcile_cluster.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package scheduler +package reconcile // The reconciler is the first stage in the scheduler for service and batch // jobs. It compares the existing state to the desired state to determine the @@ -20,7 +20,7 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" - reconnectingpicker "github.com/hashicorp/nomad/scheduler/reconnecting_picker" + sstructs "github.com/hashicorp/nomad/scheduler/structs" ) const ( @@ -34,37 +34,27 @@ const ( rescheduleWindowSize = 1 * time.Second ) -type ReconnectingPicker interface { - PickReconnectingAlloc(disconnect *structs.DisconnectStrategy, original *structs.Allocation, replacement *structs.Allocation) *structs.Allocation -} - -// allocUpdateType takes an existing allocation and a new job definition and +// AllocUpdateType takes an existing allocation and a new job definition and // returns whether the allocation can ignore the change, requires a destructive // update, or can be inplace updated. If it can be inplace updated, an updated // allocation that has the new resources and alloc metrics attached will be // returned. -type allocUpdateType func(existing *structs.Allocation, newJob *structs.Job, +type AllocUpdateType func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (ignore, destructive bool, updated *structs.Allocation) -type AllocReconcilerOption func(*allocReconciler) +type AllocReconcilerOption func(*AllocReconciler) -func AllocRenconcilerWithNow(now time.Time) AllocReconcilerOption { - return func(ar *allocReconciler) { - ar.now = now - } -} - -// allocReconciler is used to determine the set of allocations that require +// AllocReconciler is used to determine the set of allocations that require // placement, inplace updating or stopping given the job specification and // existing cluster state. The reconciler should only be used for batch and // service jobs. -type allocReconciler struct { +type AllocReconciler struct { // logger is used to log debug information. Logging should be kept at a // minimal here logger log.Logger // canInplace is used to check if the allocation can be inplace upgraded - allocUpdateFn allocUpdateType + allocUpdateFn AllocUpdateType // batch marks whether the job is a batch job batch bool @@ -108,62 +98,62 @@ type allocReconciler struct { // defaults to time.Now, and overridden in unit tests now time.Time - reconnectingPicker ReconnectingPicker + reconnectingPicker reconnectingPickerInterface - // result is the results of the reconcile. During computation it can be + // Result is the results of the reconcile. During computation it can be // used to store intermediate state - result *reconcileResults + Result *ReconcileResults } -// reconcileResults contains the results of the reconciliation and should be +// ReconcileResults contains the results of the reconciliation and should be // applied by the scheduler. -type reconcileResults struct { - // deployment is the deployment that should be created or updated as a +type ReconcileResults struct { + // Deployment is the Deployment that should be created or updated as a // result of scheduling - deployment *structs.Deployment + Deployment *structs.Deployment - // deploymentUpdates contains a set of deployment updates that should be + // DeploymentUpdates contains a set of deployment updates that should be // applied as a result of scheduling - deploymentUpdates []*structs.DeploymentStatusUpdate + DeploymentUpdates []*structs.DeploymentStatusUpdate - // place is the set of allocations to place by the scheduler - place []allocPlaceResult + // Place is the set of allocations to Place by the scheduler + Place []AllocPlaceResult - // destructiveUpdate is the set of allocations to apply a destructive update to - destructiveUpdate []allocDestructiveResult + // DestructiveUpdate is the set of allocations to apply a destructive update to + DestructiveUpdate []allocDestructiveResult - // inplaceUpdate is the set of allocations to apply an inplace update to - inplaceUpdate []*structs.Allocation + // InplaceUpdate is the set of allocations to apply an inplace update to + InplaceUpdate []*structs.Allocation - // stop is the set of allocations to stop - stop []allocStopResult + // Stop is the set of allocations to Stop + Stop []AllocStopResult - // attributeUpdates are updates to the allocation that are not from a + // AttributeUpdates are updates to the allocation that are not from a // jobspec change. - attributeUpdates map[string]*structs.Allocation + AttributeUpdates map[string]*structs.Allocation - // disconnectUpdates is the set of allocations are on disconnected nodes, but + // DisconnectUpdates is the set of allocations are on disconnected nodes, but // have not yet had their ClientStatus set to AllocClientStatusUnknown. - disconnectUpdates map[string]*structs.Allocation + DisconnectUpdates map[string]*structs.Allocation - // reconnectUpdates is the set of allocations that have ClientStatus set to + // ReconnectUpdates is the set of allocations that have ClientStatus set to // AllocClientStatusUnknown, but the associated Node has reconnected. - reconnectUpdates map[string]*structs.Allocation + ReconnectUpdates map[string]*structs.Allocation - // desiredTGUpdates captures the desired set of changes to make for each + // DesiredTGUpdates captures the desired set of changes to make for each // task group. - desiredTGUpdates map[string]*structs.DesiredUpdates + DesiredTGUpdates map[string]*structs.DesiredUpdates - // desiredFollowupEvals is the map of follow up evaluations to create per task group + // DesiredFollowupEvals is the map of follow up evaluations to create per task group // This is used to create a delayed evaluation for rescheduling failed allocations. - desiredFollowupEvals map[string][]*structs.Evaluation + DesiredFollowupEvals map[string][]*structs.Evaluation - // taskGroupAllocNameIndexes is a tracking of the allocation name index, + // TaskGroupAllocNameIndexes is a tracking of the allocation name index, // keyed by the task group name. This is stored within the results, so the // generic scheduler can use this to perform duplicate alloc index checks // before submitting the plan. This is always non-nil and is handled within // a single routine, so does not require a mutex. - taskGroupAllocNameIndexes map[string]*allocNameIndex + TaskGroupAllocNameIndexes map[string]*AllocNameIndex } // delayedRescheduleInfo contains the allocation id and a time when its eligible to be rescheduled. @@ -179,18 +169,18 @@ type delayedRescheduleInfo struct { rescheduleTime time.Time } -func (r *reconcileResults) GoString() string { +func (r *ReconcileResults) GoString() string { base := fmt.Sprintf("Total changes: (place %d) (destructive %d) (inplace %d) (stop %d) (disconnect %d) (reconnect %d)", - len(r.place), len(r.destructiveUpdate), len(r.inplaceUpdate), len(r.stop), len(r.disconnectUpdates), len(r.reconnectUpdates)) + len(r.Place), len(r.DestructiveUpdate), len(r.InplaceUpdate), len(r.Stop), len(r.DisconnectUpdates), len(r.ReconnectUpdates)) - if r.deployment != nil { - base += fmt.Sprintf("\nCreated Deployment: %q", r.deployment.ID) + if r.Deployment != nil { + base += fmt.Sprintf("\nCreated Deployment: %q", r.Deployment.ID) } - for _, u := range r.deploymentUpdates { + for _, u := range r.DeploymentUpdates { base += fmt.Sprintf("\nDeployment Update for ID %q: Status %q; Description %q", u.DeploymentID, u.Status, u.StatusDescription) } - for tg, u := range r.desiredTGUpdates { + for tg, u := range r.DesiredTGUpdates { base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u) } @@ -199,12 +189,12 @@ func (r *reconcileResults) GoString() string { // NewAllocReconciler creates a new reconciler that should be used to determine // the changes required to bring the cluster state inline with the declared jobspec -func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch bool, +func NewAllocReconciler(logger log.Logger, allocUpdateFn AllocUpdateType, batch bool, jobID string, job *structs.Job, deployment *structs.Deployment, existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node, evalID string, - evalPriority int, supportsDisconnectedClients bool, opts ...AllocReconcilerOption) *allocReconciler { + evalPriority int, supportsDisconnectedClients bool, opts ...AllocReconcilerOption) *AllocReconciler { - ar := &allocReconciler{ + ar := &AllocReconciler{ logger: logger.Named("reconciler"), allocUpdateFn: allocUpdateFn, batch: batch, @@ -217,15 +207,15 @@ func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch evalPriority: evalPriority, supportsDisconnectedClients: supportsDisconnectedClients, now: time.Now().UTC(), - result: &reconcileResults{ - attributeUpdates: make(map[string]*structs.Allocation), - disconnectUpdates: make(map[string]*structs.Allocation), - reconnectUpdates: make(map[string]*structs.Allocation), - desiredTGUpdates: make(map[string]*structs.DesiredUpdates), - desiredFollowupEvals: make(map[string][]*structs.Evaluation), - taskGroupAllocNameIndexes: make(map[string]*allocNameIndex), + Result: &ReconcileResults{ + AttributeUpdates: make(map[string]*structs.Allocation), + DisconnectUpdates: make(map[string]*structs.Allocation), + ReconnectUpdates: make(map[string]*structs.Allocation), + DesiredTGUpdates: make(map[string]*structs.DesiredUpdates), + DesiredFollowupEvals: make(map[string][]*structs.Evaluation), + TaskGroupAllocNameIndexes: make(map[string]*AllocNameIndex), }, - reconnectingPicker: reconnectingpicker.New(logger), + reconnectingPicker: newReconnectingPicker(logger), } for _, op := range opts { @@ -237,7 +227,7 @@ func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch // Compute reconciles the existing cluster state and returns the set of changes // required to converge the job spec and state -func (a *allocReconciler) Compute() *reconcileResults { +func (a *AllocReconciler) Compute() { // Create the allocation matrix m := newAllocMatrix(a.job, a.existingAllocs) @@ -247,17 +237,15 @@ func (a *allocReconciler) Compute() *reconcileResults { // stopping all running allocs if a.job.Stopped() { a.handleStop(m) - return a.result + return } a.computeDeploymentPaused() deploymentComplete := a.computeDeploymentComplete(m) a.computeDeploymentUpdates(deploymentComplete) - - return a.result } -func (a *allocReconciler) computeDeploymentComplete(m allocMatrix) bool { +func (a *AllocReconciler) computeDeploymentComplete(m allocMatrix) bool { complete := true for group, as := range m { groupComplete := a.computeGroup(group, as) @@ -267,7 +255,7 @@ func (a *allocReconciler) computeDeploymentComplete(m allocMatrix) bool { return complete } -func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) { +func (a *AllocReconciler) computeDeploymentUpdates(deploymentComplete bool) { if a.deployment != nil { // Mark the deployment as complete if possible if deploymentComplete { @@ -276,14 +264,14 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) { // need to make sure we don't revert those states if a.deployment.Status != structs.DeploymentStatusUnblocking && a.deployment.Status != structs.DeploymentStatusSuccessful { - a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ + a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusBlocked, StatusDescription: structs.DeploymentStatusDescriptionBlocked, }) } } else { - a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ + a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusSuccessful, StatusDescription: structs.DeploymentStatusDescriptionSuccessful, @@ -293,7 +281,7 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) { // Mark the deployment as pending since its state is now computed. if a.deployment.Status == structs.DeploymentStatusInitializing { - a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ + a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusPending, StatusDescription: structs.DeploymentStatusDescriptionPendingForPeer, @@ -302,7 +290,7 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) { } // Set the description of a created deployment - if d := a.result.deployment; d != nil { + if d := a.Result.Deployment; d != nil { if d.RequiresPromotion() { if d.HasAutoPromote() { d.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion @@ -322,7 +310,7 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) { // pending or initializing, which are the initial states for multi-region // job deployments. This flag tells Compute that we should not make // placements on the deployment. -func (a *allocReconciler) computeDeploymentPaused() { +func (a *AllocReconciler) computeDeploymentPaused() { if a.deployment != nil { a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused || a.deployment.Status == structs.DeploymentStatusPending || @@ -338,11 +326,11 @@ func (a *allocReconciler) computeDeploymentPaused() { // 1. Jobs that are marked for stop, but there is a non-terminal deployment. // 2. Deployments that are active, but referencing a different job version. // 3. Deployments that are already successful. -func (a *allocReconciler) cancelUnneededDeployments() { +func (a *AllocReconciler) cancelUnneededDeployments() { // If the job is stopped and there is a non-terminal deployment, cancel it if a.job.Stopped() { if a.deployment != nil && a.deployment.Active() { - a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ + a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusCancelled, StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, @@ -363,7 +351,7 @@ func (a *allocReconciler) cancelUnneededDeployments() { // Check if the deployment is active and referencing an older job and cancel it if d.JobCreateIndex != a.job.CreateIndex || d.JobVersion != a.job.Version { if d.Active() { - a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ + a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{ DeploymentID: a.deployment.ID, Status: structs.DeploymentStatusCancelled, StatusDescription: structs.DeploymentStatusDescriptionNewerJob, @@ -382,61 +370,61 @@ func (a *allocReconciler) cancelUnneededDeployments() { } // handleStop marks all allocations to be stopped, handling the lost case -func (a *allocReconciler) handleStop(m allocMatrix) { +func (a *AllocReconciler) handleStop(m allocMatrix) { for group, as := range m { as = filterByTerminal(as) desiredChanges := new(structs.DesiredUpdates) desiredChanges.Stop = a.filterAndStopAll(as) - a.result.desiredTGUpdates[group] = desiredChanges + a.Result.DesiredTGUpdates[group] = desiredChanges } } // filterAndStopAll stops all allocations in an allocSet. This is useful in when // stopping an entire job or task group. -func (a *allocReconciler) filterAndStopAll(set allocSet) uint64 { +func (a *AllocReconciler) filterAndStopAll(set allocSet) uint64 { untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now) - a.markStop(untainted, "", allocNotNeeded) - a.markStop(migrate, "", allocNotNeeded) - a.markStop(lost, structs.AllocClientStatusLost, allocLost) - a.markStop(disconnecting, "", allocNotNeeded) - a.markStop(reconnecting, "", allocNotNeeded) - a.markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", allocNotNeeded) - a.markStop(expiring.filterByClientStatus(structs.AllocClientStatusUnknown), "", allocNotNeeded) + a.markStop(untainted, "", sstructs.StatusAllocNotNeeded) + a.markStop(migrate, "", sstructs.StatusAllocNotNeeded) + a.markStop(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost) + a.markStop(disconnecting, "", sstructs.StatusAllocNotNeeded) + a.markStop(reconnecting, "", sstructs.StatusAllocNotNeeded) + a.markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", sstructs.StatusAllocNotNeeded) + a.markStop(expiring.filterByClientStatus(structs.AllocClientStatusUnknown), "", sstructs.StatusAllocNotNeeded) return uint64(len(set)) } // markStop is a helper for marking a set of allocation for stop with a // particular client status and description. -func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescription string) { +func (a *AllocReconciler) markStop(allocs allocSet, clientStatus, statusDescription string) { for _, alloc := range allocs { - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - clientStatus: clientStatus, - statusDescription: statusDescription, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + ClientStatus: clientStatus, + StatusDescription: statusDescription, }) } } // markDelayed does markStop, but optionally includes a FollowupEvalID so that we can update // the stopped alloc with its delayed rescheduling evalID -func (a *allocReconciler) markDelayed(allocs allocSet, clientStatus, statusDescription string, followupEvals map[string]string) { +func (a *AllocReconciler) markDelayed(allocs allocSet, clientStatus, statusDescription string, followupEvals map[string]string) { for _, alloc := range allocs { - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - clientStatus: clientStatus, - statusDescription: statusDescription, - followupEvalID: followupEvals[alloc.ID], + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + ClientStatus: clientStatus, + StatusDescription: statusDescription, + FollowupEvalID: followupEvals[alloc.ID], }) } } // computeGroup reconciles state for a particular task group. It returns whether // the deployment it is for is complete with regards to the task group. -func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool { +func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool { // Create the desired update object for the group desiredChanges := new(structs.DesiredUpdates) - a.result.desiredTGUpdates[groupName] = desiredChanges + a.Result.DesiredTGUpdates[groupName] = desiredChanges // Get the task group. The task group may be nil if the job was updates such // that the task group no longer exists @@ -542,7 +530,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool { // which is the union of untainted, rescheduled, allocs on migrating // nodes, and allocs on down nodes (includes canaries) nameIndex := newAllocNameIndex(a.jobID, groupName, tg.Count, untainted.union(migrate, rescheduleNow, lost)) - a.result.taskGroupAllocNameIndexes[groupName] = nameIndex + a.Result.TaskGroupAllocNameIndexes[groupName] = nameIndex // Stop any unneeded allocations and update the untainted set to not // include stopped allocations. @@ -583,7 +571,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool { // * If there are any canaries that they have been promoted // * There is no delayed stop_after_client_disconnect alloc, which delays scheduling for the whole group // * An alloc was lost - var place []allocPlaceResult + var place []AllocPlaceResult if len(lostLater) == 0 { place = a.computePlacements(tg, nameIndex, untainted, migrate, rescheduleNow, lost, isCanarying) if !existingDeployment { @@ -609,7 +597,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool { // Deployments that are still initializing need to be sent in full in the // plan so its internal state can be persisted by the plan applier. if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusInitializing { - a.result.deployment = a.deployment + a.Result.Deployment = a.deployment } deploymentComplete := a.isDeploymentComplete(groupName, destructive, inplace, @@ -618,7 +606,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool { return deploymentComplete } -func (a *allocReconciler) initializeDeploymentState(group string, tg *structs.TaskGroup) (*structs.DeploymentState, bool) { +func (a *AllocReconciler) initializeDeploymentState(group string, tg *structs.TaskGroup) (*structs.DeploymentState, bool) { var dstate *structs.DeploymentState existingDeployment := false @@ -639,7 +627,7 @@ func (a *allocReconciler) initializeDeploymentState(group string, tg *structs.Ta } // If we have destructive updates, and have fewer canaries than is desired, we need to create canaries. -func (a *allocReconciler) requiresCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, destructive, canaries allocSet) bool { +func (a *AllocReconciler) requiresCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, destructive, canaries allocSet) bool { canariesPromoted := dstate != nil && dstate.Promoted return tg.Update != nil && len(destructive) != 0 && @@ -647,14 +635,14 @@ func (a *allocReconciler) requiresCanaries(tg *structs.TaskGroup, dstate *struct !canariesPromoted } -func (a *allocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, - destructive, canaries allocSet, desiredChanges *structs.DesiredUpdates, nameIndex *allocNameIndex) { +func (a *AllocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, + destructive, canaries allocSet, desiredChanges *structs.DesiredUpdates, nameIndex *AllocNameIndex) { dstate.DesiredCanaries = tg.Update.Canary if !a.deploymentPaused && !a.deploymentFailed { desiredChanges.Canary += uint64(tg.Update.Canary - len(canaries)) for _, name := range nameIndex.NextCanaries(uint(desiredChanges.Canary), canaries, destructive) { - a.result.place = append(a.result.place, allocPlaceResult{ + a.Result.Place = append(a.Result.Place, AllocPlaceResult{ name: name, canary: true, taskGroup: tg, @@ -665,7 +653,7 @@ func (a *allocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs // filterOldTerminalAllocs filters allocations that should be ignored since they // are allocations that are terminal from a previous job version. -func (a *allocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignore allocSet) { +func (a *AllocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignore allocSet) { if !a.batch { return all, nil } @@ -688,7 +676,7 @@ func (a *allocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignor // cancelUnneededCanaries handles the canaries for the group by stopping the // unneeded ones and returning the current set of canaries and the updated total // set of allocs for the group -func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChanges *structs.DesiredUpdates) (canaries, all allocSet) { +func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChanges *structs.DesiredUpdates) (canaries, all allocSet) { // Stop any canary from an older deployment or from a failed one var stop []string @@ -715,7 +703,7 @@ func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChang // stopSet is the allocSet that contains the canaries we desire to stop from // above. stopSet := all.fromKeys(stop) - a.markStop(stopSet, "", allocNotNeeded) + a.markStop(stopSet, "", sstructs.StatusAllocNotNeeded) desiredChanges.Stop += uint64(len(stopSet)) all = all.difference(stopSet) @@ -732,8 +720,8 @@ func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChang // We don't add these stops to desiredChanges because the deployment is // still active. DesiredChanges is used to report deployment progress/final // state. These transient failures aren't meaningful. - a.markStop(migrate, "", allocMigrating) - a.markStop(lost, structs.AllocClientStatusLost, allocLost) + a.markStop(migrate, "", sstructs.StatusAllocMigrating) + a.markStop(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost) canaries = untainted all = all.difference(migrate, lost) @@ -745,7 +733,7 @@ func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChang // computeUnderProvisionedBy returns the number of allocs that still need to be // placed for a particular group. The inputs are the group definition, the untainted, // destructive, and migrate allocation sets, and whether we are in a canary state. -func (a *allocReconciler) computeUnderProvisionedBy(group *structs.TaskGroup, untainted, destructive, migrate allocSet, isCanarying bool) int { +func (a *AllocReconciler) computeUnderProvisionedBy(group *structs.TaskGroup, untainted, destructive, migrate allocSet, isCanarying bool) int { // If no update strategy, nothing is migrating, and nothing is being replaced, // allow as many as defined in group.Count if group.Update.IsEmpty() || len(destructive)+len(migrate) == 0 { @@ -790,14 +778,14 @@ func (a *allocReconciler) computeUnderProvisionedBy(group *structs.TaskGroup, un // definition, the set of untainted, migrating and reschedule allocations for the group. // // Placements will meet or exceed group count. -func (a *allocReconciler) computePlacements(group *structs.TaskGroup, - nameIndex *allocNameIndex, untainted, migrate, reschedule, lost allocSet, - isCanarying bool) []allocPlaceResult { +func (a *AllocReconciler) computePlacements(group *structs.TaskGroup, + nameIndex *AllocNameIndex, untainted, migrate, reschedule, lost allocSet, + isCanarying bool) []AllocPlaceResult { // Add rescheduled placement results - var place []allocPlaceResult + var place []AllocPlaceResult for _, alloc := range reschedule { - place = append(place, allocPlaceResult{ + place = append(place, AllocPlaceResult{ name: alloc.Name, taskGroup: group, previousAlloc: alloc, @@ -822,7 +810,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup, } existing++ - place = append(place, allocPlaceResult{ + place = append(place, AllocPlaceResult{ name: alloc.Name, taskGroup: group, previousAlloc: alloc, @@ -837,7 +825,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup, // Add remaining placement results if existing < group.Count { for _, name := range nameIndex.Next(uint(group.Count - existing)) { - place = append(place, allocPlaceResult{ + place = append(place, AllocPlaceResult{ name: name, taskGroup: group, downgradeNonCanary: isCanarying, @@ -853,15 +841,15 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup, // and if the placement is already rescheduling or part of a failed deployment. // The input deploymentPlaceReady is calculated as the deployment is not paused, failed, or canarying. // It returns the number of allocs still needed. -func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates, - place []allocPlaceResult, rescheduleNow, lost allocSet, underProvisionedBy int) int { +func (a *AllocReconciler) computeReplacements(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates, + place []AllocPlaceResult, rescheduleNow, lost allocSet, underProvisionedBy int) int { // Disconnecting allocs are not failing, but are included in rescheduleNow. // Create a new set that only includes the actual failures and compute // replacements based off that. failed := make(allocSet) for id, alloc := range rescheduleNow { - _, ok := a.result.disconnectUpdates[id] + _, ok := a.Result.DisconnectUpdates[id] if !ok && alloc.ClientStatus != structs.AllocClientStatusUnknown { failed[id] = alloc } @@ -872,9 +860,9 @@ func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desired desiredChanges.Place += uint64(len(place)) // This relies on the computePlacements having built this set, which in // turn relies on len(lostLater) == 0. - a.result.place = append(a.result.place, place...) + a.Result.Place = append(a.Result.Place, place...) - a.markStop(failed, "", allocRescheduled) + a.markStop(failed, "", sstructs.StatusAllocRescheduled) desiredChanges.Stop += uint64(len(failed)) minimum := min(len(place), underProvisionedBy) @@ -891,7 +879,7 @@ func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desired if len(lost) != 0 { allowed := min(len(lost), len(place)) desiredChanges.Place += uint64(allowed) - a.result.place = append(a.result.place, place[:allowed]...) + a.Result.Place = append(a.Result.Place, place[:allowed]...) } // if no failures or there are no pending placements return. @@ -907,17 +895,17 @@ func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desired partOfFailedDeployment := a.deploymentFailed && prev != nil && a.deployment.ID == prev.DeploymentID if !partOfFailedDeployment && p.IsRescheduling() { - a.result.place = append(a.result.place, p) + a.Result.Place = append(a.Result.Place, p) desiredChanges.Place++ - _, prevIsDisconnecting := a.result.disconnectUpdates[prev.ID] + _, prevIsDisconnecting := a.Result.DisconnectUpdates[prev.ID] if prevIsDisconnecting { continue } - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: prev, - statusDescription: allocRescheduled, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: prev, + StatusDescription: sstructs.StatusAllocRescheduled, }) desiredChanges.Stop++ } @@ -926,7 +914,7 @@ func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desired return underProvisionedBy } -func (a *allocReconciler) computeDestructiveUpdates(destructive allocSet, underProvisionedBy int, +func (a *AllocReconciler) computeDestructiveUpdates(destructive allocSet, underProvisionedBy int, desiredChanges *structs.DesiredUpdates, tg *structs.TaskGroup) { // Do all destructive updates @@ -934,23 +922,23 @@ func (a *allocReconciler) computeDestructiveUpdates(destructive allocSet, underP desiredChanges.DestructiveUpdate += uint64(minimum) desiredChanges.Ignore += uint64(len(destructive) - minimum) for _, alloc := range destructive.nameOrder()[:minimum] { - a.result.destructiveUpdate = append(a.result.destructiveUpdate, allocDestructiveResult{ + a.Result.DestructiveUpdate = append(a.Result.DestructiveUpdate, allocDestructiveResult{ placeName: alloc.Name, placeTaskGroup: tg, stopAlloc: alloc, - stopStatusDescription: allocUpdating, + stopStatusDescription: sstructs.StatusAllocUpdating, }) } } -func (a *allocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdates, migrate allocSet, tg *structs.TaskGroup, isCanarying bool) { +func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdates, migrate allocSet, tg *structs.TaskGroup, isCanarying bool) { desiredChanges.Migrate += uint64(len(migrate)) for _, alloc := range migrate.nameOrder() { - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - statusDescription: allocMigrating, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + StatusDescription: sstructs.StatusAllocMigrating, }) - a.result.place = append(a.result.place, allocPlaceResult{ + a.Result.Place = append(a.Result.Place, AllocPlaceResult{ name: alloc.Name, canary: alloc.DeploymentStatus.IsCanary(), taskGroup: tg, @@ -962,7 +950,7 @@ func (a *allocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdat } } -func (a *allocReconciler) createDeployment(groupName string, strategy *structs.UpdateStrategy, +func (a *AllocReconciler) createDeployment(groupName string, strategy *structs.UpdateStrategy, existingDeployment bool, dstate *structs.DeploymentState, all, destructive allocSet) { // Guard the simple cases that require no computation first. if existingDeployment || @@ -971,7 +959,7 @@ func (a *allocReconciler) createDeployment(groupName string, strategy *structs.U return } - updatingSpec := len(destructive) != 0 || len(a.result.inplaceUpdate) != 0 + updatingSpec := len(destructive) != 0 || len(a.Result.InplaceUpdate) != 0 hadRunning := false for _, alloc := range all { @@ -990,15 +978,15 @@ func (a *allocReconciler) createDeployment(groupName string, strategy *structs.U // A previous group may have made the deployment already. If not create one. if a.deployment == nil { a.deployment = structs.NewDeployment(a.job, a.evalPriority, a.now.UnixNano()) - a.result.deployment = a.deployment + a.Result.Deployment = a.deployment } // Attach the groups deployment state to the deployment a.deployment.TaskGroups[groupName] = dstate } -func (a *allocReconciler) isDeploymentComplete(groupName string, destructive, inplace, migrate, rescheduleNow allocSet, - place []allocPlaceResult, rescheduleLater []*delayedRescheduleInfo, requiresCanaries bool) bool { +func (a *AllocReconciler) isDeploymentComplete(groupName string, destructive, inplace, migrate, rescheduleNow allocSet, + place []AllocPlaceResult, rescheduleLater []*delayedRescheduleInfo, requiresCanaries bool) bool { complete := len(destructive)+len(inplace)+len(place)+len(migrate)+len(rescheduleNow)+len(rescheduleLater) == 0 && !requiresCanaries @@ -1021,14 +1009,14 @@ func (a *allocReconciler) isDeploymentComplete(groupName string, destructive, in // computeStop returns the set of allocations that are marked for stopping given // the group definition, the set of allocations in various states and whether we // are canarying. -func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *allocNameIndex, +func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *AllocNameIndex, untainted, migrate, lost, canaries allocSet, isCanarying bool, followupEvals map[string]string) allocSet { // Mark all lost allocations for stop. var stop allocSet stop = stop.union(lost) - a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals) + a.markDelayed(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost, followupEvals) // If we are still deploying or creating canaries, don't stop them if isCanarying { @@ -1062,9 +1050,9 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc for id, alloc := range untainted.difference(canaries) { if _, match := canaryNames[alloc.Name]; match { stop[id] = alloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - statusDescription: allocNotNeeded, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + StatusDescription: sstructs.StatusAllocNotNeeded, }) delete(untainted, id) @@ -1084,9 +1072,9 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc if _, match := removeNames[alloc.Name]; !match { continue } - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - statusDescription: allocNotNeeded, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + StatusDescription: sstructs.StatusAllocNotNeeded, }) delete(migrate, id) stop[id] = alloc @@ -1104,9 +1092,9 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc for id, alloc := range untainted { if _, ok := removeNames[alloc.Name]; ok { stop[id] = alloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - statusDescription: allocNotNeeded, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + StatusDescription: sstructs.StatusAllocNotNeeded, }) delete(untainted, id) @@ -1121,9 +1109,9 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc // were allocations with duplicate names. for id, alloc := range untainted { stop[id] = alloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: alloc, - statusDescription: allocNotNeeded, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: alloc, + StatusDescription: sstructs.StatusAllocNotNeeded, }) delete(untainted, id) @@ -1149,7 +1137,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc // - If the reconnecting allocation is to be stopped, its replacements may // not be present in any of the returned sets. The rest of the reconciler // logic will handle them. -func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all allocSet, tg *structs.TaskGroup) (allocSet, allocSet) { +func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all allocSet, tg *structs.TaskGroup) (allocSet, allocSet) { stop := make(allocSet) reconnect := make(allocSet) @@ -1161,10 +1149,10 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc if reconnectFailed { stop[reconnectingAlloc.ID] = reconnectingAlloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: reconnectingAlloc, - clientStatus: structs.AllocClientStatusFailed, - statusDescription: allocRescheduled, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: reconnectingAlloc, + ClientStatus: structs.AllocClientStatusFailed, + StatusDescription: sstructs.StatusAllocRescheduled, }) continue } @@ -1180,9 +1168,9 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc if stopReconnecting { stop[reconnectingAlloc.ID] = reconnectingAlloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: reconnectingAlloc, - statusDescription: allocNotNeeded, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: reconnectingAlloc, + StatusDescription: sstructs.StatusAllocNotNeeded, }) continue } @@ -1218,15 +1206,15 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc } // Pick which allocation we want to keep using the disconnect reconcile strategy - keepAlloc := a.reconnectingPicker.PickReconnectingAlloc(tg.Disconnect, reconnectingAlloc, replacementAlloc) + keepAlloc := a.reconnectingPicker.pickReconnectingAlloc(tg.Disconnect, reconnectingAlloc, replacementAlloc) if keepAlloc == replacementAlloc { // The replacement allocation is preferred, so stop the one // reconnecting if not stopped yet. if _, ok := stop[reconnectingAlloc.ID]; !ok { stop[reconnectingAlloc.ID] = reconnectingAlloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: reconnectingAlloc, - statusDescription: allocNotNeeded, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: reconnectingAlloc, + StatusDescription: sstructs.StatusAllocNotNeeded, }) } } else { @@ -1234,9 +1222,9 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc // that are not in server terminal status or stopped already. if _, ok := stop[replacementAlloc.ID]; !ok { stop[replacementAlloc.ID] = replacementAlloc - a.result.stop = append(a.result.stop, allocStopResult{ - alloc: replacementAlloc, - statusDescription: allocReconnected, + a.Result.Stop = append(a.Result.Stop, AllocStopResult{ + Alloc: replacementAlloc, + StatusDescription: sstructs.StatusAllocReconnected, }) } } @@ -1259,7 +1247,7 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc // 2. Those that can be upgraded in-place. These are added to the results // automatically since the function contains the correct state to do so, // 3. Those that require destructive updates -func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) { +func (a *AllocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) { // Determine the set of allocations that need to be updated ignore = make(map[string]*structs.Allocation) inplace = make(map[string]*structs.Allocation) @@ -1273,7 +1261,7 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted all destructive[alloc.ID] = alloc } else { inplace[alloc.ID] = alloc - a.result.inplaceUpdate = append(a.result.inplaceUpdate, inplaceAlloc) + a.Result.InplaceUpdate = append(a.Result.InplaceUpdate, inplaceAlloc) } } @@ -1283,7 +1271,7 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted all // createRescheduleLaterEvals creates batched followup evaluations with the WaitUntil field // set for allocations that are eligible to be rescheduled later, and marks the alloc with // the followupEvalID -func (a *allocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) { +func (a *AllocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) { // followupEvals are created in the same way as for delayed lost allocs allocIDToFollowupEvalID := a.createLostLaterEvals(rescheduleLater, tgName) @@ -1294,10 +1282,10 @@ func (a *allocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedR updatedAlloc.FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID] // Can't updated an allocation that is disconnected - if _, ok := a.result.disconnectUpdates[laterAlloc.allocID]; !ok { - a.result.attributeUpdates[laterAlloc.allocID] = updatedAlloc + if _, ok := a.Result.DisconnectUpdates[laterAlloc.allocID]; !ok { + a.Result.AttributeUpdates[laterAlloc.allocID] = updatedAlloc } else { - a.result.disconnectUpdates[laterAlloc.allocID].FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID] + a.Result.DisconnectUpdates[laterAlloc.allocID].FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID] } } } @@ -1307,7 +1295,7 @@ func (a *allocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedR // set to running, and these allocs are appended to the Plan as non-destructive // updates. Clients are responsible for reconciling the DesiredState with the // actual state as the node comes back online. -func (a *allocReconciler) computeReconnecting(reconnecting allocSet) { +func (a *AllocReconciler) computeReconnecting(reconnecting allocSet) { // Create updates that will be appended to the plan. for _, alloc := range reconnecting { @@ -1335,14 +1323,14 @@ func (a *allocReconciler) computeReconnecting(reconnecting allocSet) { // Use a copy to prevent mutating the object from statestore. reconnectedAlloc := alloc.Copy() reconnectedAlloc.AppendState(structs.AllocStateFieldClientStatus, alloc.ClientStatus) - a.result.reconnectUpdates[reconnectedAlloc.ID] = reconnectedAlloc + a.Result.ReconnectUpdates[reconnectedAlloc.ID] = reconnectedAlloc } } // handleDelayedLost creates batched followup evaluations with the WaitUntil field set for // lost allocations. followupEvals are appended to a.result as a side effect, we return a // map of alloc IDs to their followupEval IDs. -func (a *allocReconciler) createLostLaterEvals(rescheduleLater []*delayedRescheduleInfo, tgName string) map[string]string { +func (a *AllocReconciler) createLostLaterEvals(rescheduleLater []*delayedRescheduleInfo, tgName string) map[string]string { if len(rescheduleLater) == 0 { return map[string]string{} } @@ -1366,7 +1354,7 @@ func (a *allocReconciler) createLostLaterEvals(rescheduleLater []*delayedResched JobID: a.job.ID, JobModifyIndex: a.job.ModifyIndex, Status: structs.EvalStatusPending, - StatusDescription: reschedulingFollowupEvalDesc, + StatusDescription: sstructs.DescReschedulingFollowupEval, WaitUntil: nextReschedTime, } evals = append(evals, eval) @@ -1404,7 +1392,7 @@ func (a *allocReconciler) createLostLaterEvals(rescheduleLater []*delayedResched // createTimeoutLaterEvals creates followup evaluations with the // WaitUntil field set for allocations in an unknown state on disconnected nodes. // It returns a map of allocIDs to their associated followUpEvalIDs. -func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName string) map[string]string { +func (a *AllocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName string) map[string]string { if len(disconnecting) == 0 { return map[string]string{} } @@ -1434,7 +1422,7 @@ func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName JobID: a.job.ID, JobModifyIndex: a.job.ModifyIndex, Status: structs.EvalStatusPending, - StatusDescription: disconnectTimeoutFollowupEvalDesc, + StatusDescription: sstructs.DescDisconnectTimeoutFollowupEval, WaitUntil: nextReschedTime, } evals = append(evals, eval) @@ -1458,7 +1446,7 @@ func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName JobID: a.job.ID, JobModifyIndex: a.job.ModifyIndex, Status: structs.EvalStatusPending, - StatusDescription: disconnectTimeoutFollowupEvalDesc, + StatusDescription: sstructs.DescDisconnectTimeoutFollowupEval, WaitUntil: timeoutInfo.rescheduleTime, } evals = append(evals, eval) @@ -1476,14 +1464,14 @@ func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName // Create updates that will be applied to the allocs to mark the FollowupEvalID // and the unknown ClientStatus and AllocState. -func (a *allocReconciler) appendUnknownDisconnectingUpdates(disconnecting allocSet, allocIDToFollowupEvalID map[string]string, rescheduleNow allocSet) { +func (a *AllocReconciler) appendUnknownDisconnectingUpdates(disconnecting allocSet, allocIDToFollowupEvalID map[string]string, rescheduleNow allocSet) { for id, alloc := range disconnecting { updatedAlloc := alloc.Copy() updatedAlloc.ClientStatus = structs.AllocClientStatusUnknown updatedAlloc.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown) - updatedAlloc.ClientDescription = allocUnknown + updatedAlloc.ClientDescription = sstructs.StatusAllocUnknown updatedAlloc.FollowupEvalID = allocIDToFollowupEvalID[id] - a.result.disconnectUpdates[updatedAlloc.ID] = updatedAlloc + a.Result.DisconnectUpdates[updatedAlloc.ID] = updatedAlloc // update the reschedule set so that any placements holding onto this // pointer are using the right pointer for PreviousAllocation() @@ -1497,13 +1485,13 @@ func (a *allocReconciler) appendUnknownDisconnectingUpdates(disconnecting allocS // appendFollowupEvals appends a set of followup evals for a task group to the // desiredFollowupEvals map which is later added to the scheduler's followUpEvals set. -func (a *allocReconciler) appendFollowupEvals(tgName string, evals []*structs.Evaluation) { +func (a *AllocReconciler) appendFollowupEvals(tgName string, evals []*structs.Evaluation) { // Merge with - if existingFollowUpEvals, ok := a.result.desiredFollowupEvals[tgName]; ok { + if existingFollowUpEvals, ok := a.Result.DesiredFollowupEvals[tgName]; ok { evals = append(existingFollowUpEvals, evals...) } - a.result.desiredFollowupEvals[tgName] = evals + a.Result.DesiredFollowupEvals[tgName] = evals } // emitRescheduleInfo emits metrics about the rescheduling decision of an evaluation. If a followup evaluation is diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile/reconcile_cluster_test.go similarity index 94% rename from scheduler/reconcile_test.go rename to scheduler/reconcile/reconcile_cluster_test.go index 12f5937cf..16600b008 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile/reconcile_cluster_test.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package scheduler +package reconcile import ( "fmt" @@ -76,7 +76,7 @@ func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *s return false, false, newAlloc } -func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { +func allocUpdateFnMock(handled map[string]AllocUpdateType, unhandled AllocUpdateType) AllocUpdateType { return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { if fn, ok := handled[existing.ID]; ok { return fn(existing, newJob, newTG) @@ -134,7 +134,7 @@ func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { } } -func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) { +func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []AllocStopResult) { t.Helper() canaryIndex := make(map[string]struct{}) for _, state := range d.TaskGroups { @@ -144,13 +144,13 @@ func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocSt } for _, s := range stop { - if _, ok := canaryIndex[s.alloc.ID]; ok { - t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) + if _, ok := canaryIndex[s.Alloc.ID]; ok { + t.Fatalf("Stopping canary alloc %q %q", s.Alloc.ID, s.Alloc.Name) } } } -func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { +func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []AllocPlaceResult) { t.Helper() names := make(map[string]struct{}, numPrevious) @@ -175,7 +175,7 @@ func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place [ } } -func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) { +func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []AllocPlaceResult) { t.Helper() names := make(map[string]struct{}, numRescheduled) @@ -213,7 +213,7 @@ func intRange(pairs ...int) []int { return r } -func placeResultsToNames(place []allocPlaceResult) []string { +func placeResultsToNames(place []AllocPlaceResult) []string { names := make([]string, 0, len(place)) for _, p := range place { names = append(names, p.name) @@ -229,10 +229,10 @@ func destructiveResultsToNames(destructive []allocDestructiveResult) []string { return names } -func stopResultsToNames(stop []allocStopResult) []string { +func stopResultsToNames(stop []AllocStopResult) []string { names := make([]string, 0, len(stop)) for _, s := range stop { - names = append(names, s.alloc.Name) + names = append(names, s.Alloc.Name) } return names } @@ -266,30 +266,30 @@ type resultExpectation struct { stop int } -func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { +func assertResults(t *testing.T, r *ReconcileResults, exp *resultExpectation) { t.Helper() - if exp.createDeployment != nil && r.deployment == nil { + if exp.createDeployment != nil && r.Deployment == nil { t.Errorf("Expect a created deployment got none") - } else if exp.createDeployment == nil && r.deployment != nil { - t.Errorf("Expect no created deployment; got %#v", r.deployment) - } else if exp.createDeployment != nil && r.deployment != nil { + } else if exp.createDeployment == nil && r.Deployment != nil { + t.Errorf("Expect no created deployment; got %#v", r.Deployment) + } else if exp.createDeployment != nil && r.Deployment != nil { // Clear the deployment ID - r.deployment.ID, exp.createDeployment.ID = "", "" - must.Eq(t, exp.createDeployment, r.deployment, must.Sprintf( + r.Deployment.ID, exp.createDeployment.ID = "", "" + must.Eq(t, exp.createDeployment, r.Deployment, must.Sprintf( "Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v", - r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))) + r.Deployment, exp.createDeployment, pretty.Diff(r.Deployment, exp.createDeployment))) } - test.Eq(t, exp.deploymentUpdates, r.deploymentUpdates, test.Sprint("Expected Deployment Updates")) - test.SliceLen(t, exp.place, r.place, test.Sprint("Expected Placements")) - test.SliceLen(t, exp.destructive, r.destructiveUpdate, test.Sprint("Expected Destructive")) - test.SliceLen(t, exp.inplace, r.inplaceUpdate, test.Sprint("Expected Inplace Updates")) - test.MapLen(t, exp.attributeUpdates, r.attributeUpdates, test.Sprint("Expected Attribute Updates")) - test.MapLen(t, exp.reconnectUpdates, r.reconnectUpdates, test.Sprint("Expected Reconnect Updates")) - test.MapLen(t, exp.disconnectUpdates, r.disconnectUpdates, test.Sprint("Expected Disconnect Updates")) - test.SliceLen(t, exp.stop, r.stop, test.Sprint("Expected Stops")) - test.Eq(t, exp.desiredTGUpdates, r.desiredTGUpdates, test.Sprint("Expected Desired TG Update Annotations")) + test.Eq(t, exp.deploymentUpdates, r.DeploymentUpdates, test.Sprint("Expected Deployment Updates")) + test.SliceLen(t, exp.place, r.Place, test.Sprint("Expected Placements")) + test.SliceLen(t, exp.destructive, r.DestructiveUpdate, test.Sprint("Expected Destructive")) + test.SliceLen(t, exp.inplace, r.InplaceUpdate, test.Sprint("Expected Inplace Updates")) + test.MapLen(t, exp.attributeUpdates, r.AttributeUpdates, test.Sprint("Expected Attribute Updates")) + test.MapLen(t, exp.reconnectUpdates, r.ReconnectUpdates, test.Sprint("Expected Reconnect Updates")) + test.MapLen(t, exp.disconnectUpdates, r.DisconnectUpdates, test.Sprint("Expected Disconnect Updates")) + test.SliceLen(t, exp.stop, r.Stop, test.Sprint("Expected Stops")) + test.Eq(t, exp.desiredTGUpdates, r.DesiredTGUpdates, test.Sprint("Expected Desired TG Update Annotations")) } func buildAllocations(job *structs.Job, count int, clientStatus, desiredStatus string, nodeScore float64) []*structs.Allocation { @@ -352,7 +352,8 @@ func TestReconciler_Place_NoExisting(t *testing.T) { reconciler := NewAllocReconciler( testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "", job.Priority, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -368,7 +369,7 @@ func TestReconciler_Place_NoExisting(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles placements for a job that has some @@ -391,7 +392,8 @@ func TestReconciler_Place_Existing(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -408,7 +410,7 @@ func TestReconciler_Place_Existing(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles stopping allocations for a job that has @@ -432,7 +434,8 @@ func TestReconciler_ScaleDown_Partial(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -449,7 +452,7 @@ func TestReconciler_ScaleDown_Partial(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.Stop)) } // Tests the reconciler properly handles stopping allocations for a job that has @@ -474,7 +477,8 @@ func TestReconciler_ScaleDown_Zero(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -490,7 +494,7 @@ func TestReconciler_ScaleDown_Zero(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.Stop)) } // Tests the reconciler properly handles stopping allocations for a job that has @@ -517,7 +521,8 @@ func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -533,7 +538,7 @@ func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { }, }) - assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.Stop)) } // Tests the reconciler properly handles inplace upgrading allocations @@ -555,7 +560,8 @@ func TestReconciler_Inplace(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -571,7 +577,7 @@ func TestReconciler_Inplace(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) + assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.InplaceUpdate)) } // Tests the reconciler properly handles inplace upgrading allocations while @@ -596,7 +602,8 @@ func TestReconciler_Inplace_ScaleUp(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -613,8 +620,8 @@ func TestReconciler_Inplace_ScaleUp(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) - assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.InplaceUpdate)) + assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles inplace upgrading allocations while @@ -639,7 +646,8 @@ func TestReconciler_Inplace_ScaleDown(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -656,8 +664,8 @@ func TestReconciler_Inplace_ScaleDown(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate)) - assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.InplaceUpdate)) + assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.Stop)) } // TestReconciler_Inplace_Rollback tests that a rollback to a previous version @@ -697,13 +705,14 @@ func TestReconciler_Inplace_Rollback(t *testing.T) { allocs[2].ClientStatus = structs.AllocClientStatusFailed // job is rolled back, we expect allocs[0] to be updated in-place - allocUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{ + allocUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{ allocs[0].ID: allocUpdateFnInplace, }, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFn, false, job.ID, job, nil, allocs, nil, uuid.Generate(), 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -724,10 +733,10 @@ func TestReconciler_Inplace_Rollback(t *testing.T) { }, }) - test.MapLen(t, 1, r.desiredFollowupEvals, test.Sprint("expected 1 follow-up eval")) - assertNamesHaveIndexes(t, intRange(0, 0), allocsToNames(r.inplaceUpdate)) - assertNamesHaveIndexes(t, intRange(2, 2), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(2, 3), placeResultsToNames(r.place)) + test.MapLen(t, 1, r.DesiredFollowupEvals, test.Sprint("expected 1 follow-up eval")) + assertNamesHaveIndexes(t, intRange(0, 0), allocsToNames(r.InplaceUpdate)) + assertNamesHaveIndexes(t, intRange(2, 2), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(2, 3), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles destructive upgrading allocations @@ -749,7 +758,8 @@ func TestReconciler_Destructive(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -763,7 +773,7 @@ func TestReconciler_Destructive(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.DestructiveUpdate)) } // Tests the reconciler properly handles destructive upgrading allocations when max_parallel=0 @@ -785,7 +795,8 @@ func TestReconciler_DestructiveMaxParallel(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -799,7 +810,7 @@ func TestReconciler_DestructiveMaxParallel(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.DestructiveUpdate)) } // Tests the reconciler properly handles destructive upgrading allocations while @@ -824,7 +835,8 @@ func TestReconciler_Destructive_ScaleUp(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -840,8 +852,8 @@ func TestReconciler_Destructive_ScaleUp(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) - assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.DestructiveUpdate)) + assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles destructive upgrading allocations while @@ -866,7 +878,8 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -882,8 +895,8 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.DestructiveUpdate)) } // Tests the reconciler properly handles lost nodes with allocations @@ -914,7 +927,8 @@ func TestReconciler_LostNode(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -932,8 +946,8 @@ func TestReconciler_LostNode(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles lost nodes with allocations while @@ -967,7 +981,8 @@ func TestReconciler_LostNode_ScaleUp(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -985,8 +1000,8 @@ func TestReconciler_LostNode_ScaleUp(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles lost nodes with allocations while @@ -1020,7 +1035,8 @@ func TestReconciler_LostNode_ScaleDown(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1037,7 +1053,7 @@ func TestReconciler_LostNode_ScaleDown(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.Stop)) } // Tests the reconciler properly handles draining nodes with allocations @@ -1068,7 +1084,8 @@ func TestReconciler_DrainNode(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1085,11 +1102,11 @@ func TestReconciler_DrainNode(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 2, r.place) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.Place) // These should not have the reschedule field set - assertPlacementsAreRescheduled(t, 0, r.place) + assertPlacementsAreRescheduled(t, 0, r.Place) } // Tests the reconciler properly handles draining nodes with allocations while @@ -1123,7 +1140,8 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1141,11 +1159,11 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 2, r.place) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.Place) // These should not have the reschedule field set - assertPlacementsAreRescheduled(t, 0, r.place) + assertPlacementsAreRescheduled(t, 0, r.Place) } // Tests the reconciler properly handles draining nodes with allocations while @@ -1179,7 +1197,8 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1197,11 +1216,11 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) // These should not have the reschedule field set - assertPlacementsAreRescheduled(t, 0, r.place) + assertPlacementsAreRescheduled(t, 0, r.Place) } // Tests the reconciler properly handles a task group being removed @@ -1227,7 +1246,8 @@ func TestReconciler_RemovedTG(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1246,8 +1266,8 @@ func TestReconciler_RemovedTG(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles a job in stopped states @@ -1292,7 +1312,8 @@ func TestReconciler_JobStopped(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1308,7 +1329,7 @@ func TestReconciler_JobStopped(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.Stop)) }) } } @@ -1361,8 +1382,9 @@ func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() - must.SliceEmpty(t, r.stop) + reconciler.Compute() + r := reconciler.Result + must.SliceEmpty(t, r.Stop) // Assert the correct results assertResults(t, r, &resultExpectation{ createDeployment: nil, @@ -1400,7 +1422,8 @@ func TestReconciler_MultiTG(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1420,7 +1443,7 @@ func TestReconciler_MultiTG(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.Place)) } // Tests the reconciler properly handles jobs with multiple task groups with @@ -1455,7 +1478,8 @@ func TestReconciler_MultiTG_SingleUpdateBlock(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -1532,11 +1556,12 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate(), 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Two reschedule attempts were already made, one more can be made at a future time // Verify that the follow up eval has the expected waitUntil time - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.NotNil(t, evals) must.SliceLen(t, 1, evals) must.Eq(t, now.Add(delayDur), evals[0].WaitUntil) @@ -1558,11 +1583,11 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) { }, }, }) - assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates)) + assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.AttributeUpdates)) // Verify that the followup evalID field is set correctly var annotated *structs.Allocation - for _, a := range r.attributeUpdates { + for _, a := range r.AttributeUpdates { annotated = a } must.Eq(t, evals[0].ID, annotated.FollowupEvalID) @@ -1613,10 +1638,11 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate(), 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that two follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.NotNil(t, evals) must.SliceLen(t, 2, evals) @@ -1642,10 +1668,10 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { }, }, }) - assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates)) + assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.AttributeUpdates)) // Verify that the followup evalID field is set correctly - for _, alloc := range r.attributeUpdates { + for _, alloc := range r.AttributeUpdates { if allocNameToIndex(alloc.Name) < 5 { must.Eq(t, evals[0].ID, alloc.FollowupEvalID) } else if allocNameToIndex(alloc.Name) < 7 { @@ -1710,10 +1736,11 @@ func TestReconciler_RescheduleNow_Batch(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "", 50, true) reconciler.now = now - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Two reschedule attempts were made, one more can be made now @@ -1733,9 +1760,9 @@ func TestReconciler_RescheduleNow_Batch(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) - assertPlacementsAreRescheduled(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) + assertPlacementsAreRescheduled(t, 1, r.Place) } @@ -1785,11 +1812,12 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate(), 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Should place a new placement and create a follow up eval for the delayed reschedule // Verify that the follow up eval has the expected waitUntil time - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.NotNil(t, evals) must.SliceLen(t, 1, evals) must.Eq(t, now.Add(delayDur), evals[0].WaitUntil) @@ -1811,12 +1839,12 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates)) + assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.Place)) + assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.AttributeUpdates)) // Verify that the followup evalID field is set correctly var annotated *structs.Allocation - for _, a := range r.attributeUpdates { + for _, a := range r.AttributeUpdates { annotated = a } must.Eq(t, evals[0].ID, annotated.FollowupEvalID) @@ -1857,7 +1885,8 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Should place a new placement for the alloc that was marked complete assertResults(t, r, &resultExpectation{ @@ -1875,7 +1904,7 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.Place)) } @@ -1916,7 +1945,8 @@ func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Should place a new placement for the alloc that was marked stopped assertResults(t, r, &resultExpectation{ @@ -1934,10 +1964,10 @@ func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.Place)) // Should not have any follow up evals created - must.MapEmpty(t, r.desiredFollowupEvals) + must.MapEmpty(t, r.DesiredFollowupEvals) } // Tests rescheduling failed service allocations with desired state stop @@ -1993,10 +2023,11 @@ func TestReconciler_RescheduleNow_Service(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc and one replacement for terminal alloc were placed @@ -2016,9 +2047,9 @@ func TestReconciler_RescheduleNow_Service(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) - assertPlacementsAreRescheduled(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) + assertPlacementsAreRescheduled(t, 1, r.Place) } // Tests rescheduling failed service allocations when there's clock drift (upto a second) @@ -2073,10 +2104,11 @@ func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) reconciler.now = now - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc was placed @@ -2096,9 +2128,9 @@ func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) - assertPlacementsAreRescheduled(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) + assertPlacementsAreRescheduled(t, 1, r.Place) } // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift @@ -2155,10 +2187,11 @@ func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID, 50, true) reconciler.now = now.Add(-30 * time.Second) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc was placed @@ -2178,9 +2211,9 @@ func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) - assertPlacementsAreRescheduled(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) + assertPlacementsAreRescheduled(t, 1, r.Place) } // Tests rescheduling failed service allocations when there are canaries @@ -2264,10 +2297,11 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc and one replacement for terminal alloc were placed @@ -2287,9 +2321,9 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 2, r.place) - assertPlacementsAreRescheduled(t, 2, r.place) + assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.Place) + assertPlacementsAreRescheduled(t, 2, r.Place) } // Tests rescheduling failed canary service allocations @@ -2389,10 +2423,11 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "", 50, true) reconciler.now = now - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc and one replacement for terminal alloc were placed @@ -2412,9 +2447,9 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 2, r.place) - assertPlacementsAreRescheduled(t, 2, r.place) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 2, r.Place) + assertPlacementsAreRescheduled(t, 2, r.Place) } // Tests rescheduling failed canary service allocations when one has reached its @@ -2517,10 +2552,11 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "", 50, true) reconciler.now = now - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc and one replacement for terminal alloc were placed @@ -2540,9 +2576,9 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) - assertPlacementsAreRescheduled(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) + assertPlacementsAreRescheduled(t, 1, r.Place) } // Tests failed service allocations that were already rescheduled won't be rescheduled again @@ -2584,7 +2620,8 @@ func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Should place 1 - one is a new placement to make up the desired count of 5 // failing allocs are not rescheduled @@ -2603,7 +2640,7 @@ func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { }) // name index 0 is used for the replacement because its - assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.Place)) } // Tests the reconciler cancels an old deployment when the job is being stopped @@ -2674,7 +2711,8 @@ func TestReconciler_CancelDeployment_JobStop(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result var updates []*structs.DeploymentStatusUpdate if c.cancel { @@ -2701,7 +2739,7 @@ func TestReconciler_CancelDeployment_JobStop(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.Stop)) }) } } @@ -2754,7 +2792,8 @@ func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result var updates []*structs.DeploymentStatusUpdate if c.cancel { @@ -2806,11 +2845,12 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - d := structs.NewDeployment(job, 50, r.deployment.CreateTime) + d := structs.NewDeployment(job, 50, r.Deployment.CreateTime) d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredTotal: 10, } @@ -2828,7 +2868,7 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.DestructiveUpdate)) } // Tests the reconciler creates a deployment for inplace updates @@ -2854,11 +2894,12 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - d := structs.NewDeployment(job, 50, r.deployment.CreateTime) + d := structs.NewDeployment(job, 50, r.Deployment.CreateTime) d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredTotal: 10, } @@ -2901,11 +2942,12 @@ func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - d := structs.NewDeployment(job, 50, r.deployment.CreateTime) + d := structs.NewDeployment(job, 50, r.Deployment.CreateTime) d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredTotal: 5, } @@ -2950,7 +2992,8 @@ func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3028,10 +3071,11 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { allocs = append(allocs, canary) d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID} - mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) + mockUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3099,7 +3143,8 @@ func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3173,10 +3218,11 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing newAlloc.DeploymentID = d.ID allocs = append(allocs, newAlloc) - mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) + mockUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3227,7 +3273,7 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { n := mock.DrainNode() // Create two canaries for the new job - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 2; i++ { // Create one canary canary := mock.Alloc() @@ -3253,7 +3299,8 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3269,8 +3316,8 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { }, }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) } // Tests the reconciler handles migrating a canary correctly on a lost node @@ -3303,7 +3350,7 @@ func TestReconciler_LostNode_Canary(t *testing.T) { } // Create two canaries for the new job - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 2; i++ { // Create one canary canary := mock.Alloc() @@ -3328,7 +3375,8 @@ func TestReconciler_LostNode_Canary(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3345,8 +3393,8 @@ func TestReconciler_LostNode_Canary(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.Place)) } // Tests the reconciler handles stopping canaries from older deployments @@ -3397,11 +3445,12 @@ func TestReconciler_StopOldCanaries(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - newD := structs.NewDeployment(job, 50, r.deployment.CreateTime) + newD := structs.NewDeployment(job, 50, r.Deployment.CreateTime) newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredCanaries: 2, @@ -3430,8 +3479,8 @@ func TestReconciler_StopOldCanaries(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) } // Tests the reconciler creates new canaries when the job changes @@ -3455,11 +3504,12 @@ func TestReconciler_NewCanaries(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - newD := structs.NewDeployment(job, 50, r.deployment.CreateTime) + newD := structs.NewDeployment(job, 50, r.Deployment.CreateTime) newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredCanaries: 2, @@ -3481,7 +3531,7 @@ func TestReconciler_NewCanaries(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) } // Tests the reconciler creates new canaries when the job changes and the @@ -3508,11 +3558,12 @@ func TestReconciler_NewCanaries_CountGreater(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - newD := structs.NewDeployment(job, 50, r.deployment.CreateTime) + newD := structs.NewDeployment(job, 50, r.Deployment.CreateTime) newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion state := &structs.DeploymentState{ DesiredCanaries: 7, @@ -3535,7 +3586,7 @@ func TestReconciler_NewCanaries_CountGreater(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.Place)) } // Tests the reconciler creates new canaries when the job changes for multiple @@ -3564,11 +3615,12 @@ func TestReconciler_NewCanaries_MultiTG(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - newD := structs.NewDeployment(job, 50, r.deployment.CreateTime) + newD := structs.NewDeployment(job, 50, r.Deployment.CreateTime) newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion state := &structs.DeploymentState{ DesiredCanaries: 2, @@ -3596,7 +3648,7 @@ func TestReconciler_NewCanaries_MultiTG(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.Place)) } // Tests the reconciler creates new canaries when the job changes and scales up @@ -3622,11 +3674,12 @@ func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - newD := structs.NewDeployment(job, 50, r.deployment.CreateTime) + newD := structs.NewDeployment(job, 50, r.Deployment.CreateTime) newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredCanaries: 2, @@ -3648,7 +3701,7 @@ func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) } // Tests the reconciler creates new canaries when the job changes and scales @@ -3675,11 +3728,12 @@ func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - newD := structs.NewDeployment(job, 50, r.deployment.CreateTime) + newD := structs.NewDeployment(job, 50, r.Deployment.CreateTime) newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredCanaries: 2, @@ -3702,8 +3756,8 @@ func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) + assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.Stop)) } // Tests the reconciler handles filling the names of partially placed canaries @@ -3757,7 +3811,8 @@ func TestReconciler_NewCanaries_FillNames(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3774,7 +3829,7 @@ func TestReconciler_NewCanaries_FillNames(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.Place)) } // Tests the reconciler handles canary promotion by unblocking max_parallel @@ -3808,7 +3863,7 @@ func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { } // Create the canaries - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 2; i++ { // Create one canary canary := mock.Alloc() @@ -3829,7 +3884,8 @@ func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -3846,9 +3902,9 @@ func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { }, }) - assertNoCanariesStopped(t, d, r.stop) - assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate)) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNoCanariesStopped(t, d, r.Stop) + assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.DestructiveUpdate)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) } // Tests the reconciler handles canary promotion when the canary count equals @@ -3885,7 +3941,7 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { } // Create the canaries - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 2; i++ { // Create one canary canary := mock.Alloc() @@ -3906,7 +3962,8 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result updates := []*structs.DeploymentStatusUpdate{ { @@ -3931,8 +3988,8 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { }, }) - assertNoCanariesStopped(t, d, r.stop) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNoCanariesStopped(t, d, r.Stop) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) } // Tests the reconciler checks the health of placed allocs to determine the @@ -3987,7 +4044,7 @@ func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { } // Create the new allocs - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 4; i++ { new := mock.Alloc() new.Job = job @@ -4008,7 +4065,8 @@ func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4024,7 +4082,7 @@ func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { }) if c.healthy != 0 { - assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.DestructiveUpdate)) } }) } @@ -4059,7 +4117,7 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { } // Create the healthy replacements - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 8; i++ { new := mock.Alloc() new.Job = job @@ -4092,7 +4150,8 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4112,9 +4171,9 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) - assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.DestructiveUpdate)) + assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.Place)) + assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.Stop)) } // Tests the reconciler handles a failed deployment with allocs on tainted @@ -4147,7 +4206,7 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) { } // Create the healthy replacements - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 0; i < 4; i++ { new := mock.Alloc() new.Job = job @@ -4180,7 +4239,8 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4199,8 +4259,8 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.Place)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) } // Tests the reconciler handles a run after a deployment is complete @@ -4239,7 +4299,8 @@ func TestReconciler_CompleteDeployment(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4297,7 +4358,8 @@ func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result updates := []*structs.DeploymentStatusUpdate{ { @@ -4352,7 +4414,7 @@ func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { // Create 6 allocations from the old job var allocs []*structs.Allocation - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for _, group := range []int{0, 1} { replacements := 4 state := s0 @@ -4395,7 +4457,8 @@ func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4415,7 +4478,7 @@ func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.Stop)) } // Test that a failed deployment and updated job works @@ -4467,11 +4530,12 @@ func TestReconciler_FailedDeployment_NewJob(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // reconciler sets the creation time automatically so we have to copy here, // otherwise there will be a discrepancy - dnew := structs.NewDeployment(jobNew, 50, r.deployment.CreateTime) + dnew := structs.NewDeployment(jobNew, 50, r.Deployment.CreateTime) dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredTotal: 10, } @@ -4489,7 +4553,7 @@ func TestReconciler_FailedDeployment_NewJob(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.DestructiveUpdate)) } // Tests the reconciler marks a deployment as complete @@ -4525,7 +4589,8 @@ func TestReconciler_MarkDeploymentComplete(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result updates := []*structs.DeploymentStatusUpdate{ { @@ -4581,7 +4646,7 @@ func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { } // Create 20 from new job - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for i := 10; i < 30; i++ { alloc := mock.Alloc() alloc.Job = job @@ -4597,7 +4662,8 @@ func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4635,9 +4701,10 @@ func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result - d := structs.NewDeployment(job, 50, r.deployment.CreateTime) + d := structs.NewDeployment(job, 50, r.Deployment.CreateTime) d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ DesiredTotal: 10, } @@ -4657,8 +4724,8 @@ func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) + assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.Place)) + assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.DestructiveUpdate)) } // Tests that the reconciler handles rerunning a batch job in the case that the @@ -4690,7 +4757,8 @@ func TestReconciler_Batch_Rerun(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert the correct results assertResults(t, r, &resultExpectation{ @@ -4707,7 +4775,7 @@ func TestReconciler_Batch_Rerun(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.Place)) } // Test that a failed deployment will not result in rescheduling failed allocations @@ -4754,7 +4822,8 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert that no rescheduled placements were created assertResults(t, r, &resultExpectation{ @@ -4812,7 +4881,8 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert that no rescheduled placements were created assertResults(t, r, &resultExpectation{ @@ -4900,7 +4970,8 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result updates := []*structs.DeploymentStatusUpdate{ { @@ -4965,7 +5036,8 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Assert that rescheduled placements were created assertResults(t, r, &resultExpectation{ @@ -4981,7 +5053,7 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T }, }, }) - assertPlaceResultsHavePreviousAllocs(t, 10, r.place) + assertPlaceResultsHavePreviousAllocs(t, 10, r.Place) } // Tests force rescheduling a failed alloc that is past its reschedule limit @@ -5030,10 +5102,11 @@ func TestReconciler_ForceReschedule_Service(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // Verify that one rescheduled alloc was created because of the forced reschedule @@ -5053,9 +5126,9 @@ func TestReconciler_ForceReschedule_Service(t *testing.T) { }) // Rescheduled allocs should have previous allocs - assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) - assertPlaceResultsHavePreviousAllocs(t, 1, r.place) - assertPlacementsAreRescheduled(t, 1, r.place) + assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.Place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.Place) + assertPlacementsAreRescheduled(t, 1, r.Place) } // Tests behavior of service failure with rescheduling policy preventing rescheduling: @@ -5113,10 +5186,11 @@ func TestReconciler_RescheduleNot_Service(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "", 50, true) - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // no rescheduling, ignore all 4 allocs @@ -5137,8 +5211,8 @@ func TestReconciler_RescheduleNot_Service(t *testing.T) { }) // none of the placement should have preallocs or rescheduled - assertPlaceResultsHavePreviousAllocs(t, 0, r.place) - assertPlacementsAreRescheduled(t, 0, r.place) + assertPlaceResultsHavePreviousAllocs(t, 0, r.Place) + assertPlacementsAreRescheduled(t, 0, r.Place) } type mockPicker struct { @@ -5147,7 +5221,7 @@ type mockPicker struct { result string } -func (mp *mockPicker) PickReconnectingAlloc(disconnect *structs.DisconnectStrategy, +func (mp *mockPicker) pickReconnectingAlloc(disconnect *structs.DisconnectStrategy, original *structs.Allocation, replacement *structs.Allocation) *structs.Allocation { mp.strategy = disconnect.ReconcileStrategy() mp.called = true @@ -5521,24 +5595,25 @@ func TestReconciler_Disconnected_Client(t *testing.T) { } reconciler.reconnectingPicker = mpc + reconciler.Compute() - results := reconciler.Compute() + results := reconciler.Result assertResults(t, results, tc.expected) must.Eq(t, tc.reconcileStrategy, mpc.strategy) must.Eq(t, tc.callPicker, mpc.called) - for _, stopResult := range results.stop { + for _, stopResult := range results.Stop { // Skip replacement allocs. - if !origAllocs.Contains(stopResult.alloc.ID) { + if !origAllocs.Contains(stopResult.Alloc.ID) { continue } if tc.shouldStopOnDisconnectedNode { - must.Eq(t, testNode.ID, stopResult.alloc.NodeID) + must.Eq(t, testNode.ID, stopResult.Alloc.NodeID) } - must.Eq(t, job.Version, stopResult.alloc.Job.Version) + must.Eq(t, job.Version, stopResult.Alloc.Job.Version) } }) } @@ -5604,10 +5679,11 @@ func TestReconciler_RescheduleNot_Batch(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "", 50, true) reconciler.now = now - r := reconciler.Compute() + reconciler.Compute() + r := reconciler.Result // Verify that no follow up evals were created - evals := r.desiredFollowupEvals[tgName] + evals := r.DesiredFollowupEvals[tgName] must.Nil(t, evals) // No reschedule attempts were made and all allocs are untouched @@ -5636,10 +5712,11 @@ func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nodes, "", 50, true) reconciler.now = time.Now().UTC() - results := reconciler.Compute() + reconciler.Compute() + results := reconciler.Result // Verify that 1 follow up eval was created with the values we expect. - evals := results.desiredFollowupEvals[job.TaskGroups[0].Name] + evals := results.DesiredFollowupEvals[job.TaskGroups[0].Name] must.SliceLen(t, 1, evals) expectedTime := reconciler.now.Add(5 * time.Minute) @@ -5649,7 +5726,7 @@ func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) { // Validate that the queued disconnectUpdates have the right client status, // and that they have a valid FollowUpdEvalID. - for _, disconnectUpdate := range results.disconnectUpdates { + for _, disconnectUpdate := range results.DisconnectUpdates { must.Eq(t, structs.AllocClientStatusUnknown, disconnectUpdate.ClientStatus) must.NotEq(t, "", disconnectUpdate.FollowupEvalID) must.Eq(t, eval.ID, disconnectUpdate.FollowupEvalID) @@ -5697,7 +5774,8 @@ func TestReconciler_Disconnect_UpdateJobAfterReconnect(t *testing.T) { reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "", 50, true) - results := reconciler.Compute() + reconciler.Compute() + results := reconciler.Result // Assert both allocations will be updated. assertResults(t, results, &resultExpectation{ @@ -6000,7 +6078,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) { // Populate Alloc IDS, Node IDs, Job on canaries canariesConfigured := 0 - handled := make(map[string]allocUpdateType) + handled := make(map[string]AllocUpdateType) for node, allocs := range tc.canaryAllocs { for _, alloc := range allocs { alloc.ID = uuid.Generate() @@ -6047,7 +6125,8 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) { mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, updatedJob.ID, updatedJob, deployment, allocs, tainted, "", 50, true) - result := reconciler.Compute() + reconciler.Compute() + result := reconciler.Result // Assert the correct results assertResults(t, result, tc.expectedResult) @@ -6055,7 +6134,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) { // Validate that placements are either for placed canaries for the // updated job, or for disconnected allocs for the original job // and that they have a disconnect update. - for _, placeResult := range result.place { + for _, placeResult := range result.Place { found := false must.NotNil(t, placeResult.previousAlloc) for _, deployed := range tc.deployedAllocs[disconnectedNode] { @@ -6063,7 +6142,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) { found = true must.Eq(t, job.Version, placeResult.previousAlloc.Job.Version) must.Eq(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID) - _, exists := result.disconnectUpdates[placeResult.previousAlloc.ID] + _, exists := result.DisconnectUpdates[placeResult.previousAlloc.ID] must.True(t, exists) break } @@ -6073,7 +6152,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) { found = true must.Eq(t, updatedJob.Version, placeResult.previousAlloc.Job.Version) must.Eq(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID) - _, exists := result.disconnectUpdates[placeResult.previousAlloc.ID] + _, exists := result.DisconnectUpdates[placeResult.previousAlloc.ID] must.True(t, exists) break } @@ -6082,8 +6161,8 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) { } // Validate that stops are for pending disconnects - for _, stopResult := range result.stop { - must.Eq(t, pending, stopResult.alloc.ClientStatus) + for _, stopResult := range result.Stop { + must.Eq(t, pending, stopResult.Alloc.ClientStatus) } }) } @@ -6198,7 +6277,7 @@ func TestReconciler_ComputeDeploymentPaused(t *testing.T) { testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, deployment, nil, nil, "", job.Priority, true) - _ = reconciler.Compute() + reconciler.Compute() must.Eq(t, tc.expected, reconciler.deploymentPaused) }) diff --git a/scheduler/system_util.go b/scheduler/reconcile/reconcile_node.go similarity index 80% rename from scheduler/system_util.go rename to scheduler/reconcile/reconcile_node.go index 031c8b041..44d91f41f 100644 --- a/scheduler/system_util.go +++ b/scheduler/reconcile/reconcile_node.go @@ -1,36 +1,52 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package scheduler - -// The structs and helpers in this file are split out of scheduler_system.go and -// shared by the system and sysbatch scheduler. No code in the generic scheduler -// or reconciler should use anything here! If you need something here for -// service/batch jobs, double-check it's safe to use for all scheduler types -// before moving it into util.go +package reconcile import ( "fmt" "time" "github.com/hashicorp/nomad/nomad/structs" + sstructs "github.com/hashicorp/nomad/scheduler/structs" ) -// materializeSystemTaskGroups is used to materialize all the task groups -// a system or sysbatch job requires. -func materializeSystemTaskGroups(job *structs.Job) map[string]*structs.TaskGroup { - out := make(map[string]*structs.TaskGroup) - if job.Stopped() { - return out +// Node is like diffSystemAllocsForNode however, the allocations in the +// diffResult contain the specific nodeID they should be allocated on. +func Node( + job *structs.Job, // jobs whose allocations are going to be diff-ed + readyNodes []*structs.Node, // list of nodes in the ready state + notReadyNodes map[string]struct{}, // list of nodes in DC but not ready, e.g. draining + taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by node id) + allocs []*structs.Allocation, // non-terminal allocations + terminal structs.TerminalByNodeByName, // latest terminal allocations (by node id) + serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic +) *NodeReconcileResult { + + // Build a mapping of nodes to all their allocs. + nodeAllocs := make(map[string][]*structs.Allocation, len(allocs)) + for _, alloc := range allocs { + nodeAllocs[alloc.NodeID] = append(nodeAllocs[alloc.NodeID], alloc) } - for _, tg := range job.TaskGroups { - for i := 0; i < tg.Count; i++ { - name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i) - out[name] = tg + eligibleNodes := make(map[string]*structs.Node) + for _, node := range readyNodes { + if _, ok := nodeAllocs[node.ID]; !ok { + nodeAllocs[node.ID] = nil } + eligibleNodes[node.ID] = node } - return out + + // Create the required task groups. + required := materializeSystemTaskGroups(job) + + result := new(NodeReconcileResult) + for nodeID, allocs := range nodeAllocs { + diff := diffSystemAllocsForNode(job, nodeID, eligibleNodes, notReadyNodes, taintedNodes, required, allocs, terminal, serverSupportsDisconnectedClients) + result.Append(diff) + } + + return result } // diffSystemAllocsForNode is used to do a set difference between the target allocations @@ -52,8 +68,8 @@ func diffSystemAllocsForNode( allocs []*structs.Allocation, // non-terminal allocations that exist terminal structs.TerminalByNodeByName, // latest terminal allocations (by node, id) serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic -) *diffResult { - result := new(diffResult) +) *NodeReconcileResult { + result := new(NodeReconcileResult) // Scan the existing updates existing := make(map[string]struct{}) // set of alloc names @@ -67,7 +83,7 @@ func diffSystemAllocsForNode( // If not required, we stop the alloc if !ok { - result.stop = append(result.stop, allocTuple{ + result.Stop = append(result.Stop, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -93,7 +109,7 @@ func diffSystemAllocsForNode( // If we have been marked for migration and aren't terminal, migrate if !exist.TerminalStatus() && exist.DesiredTransition.ShouldMigrate() { - result.migrate = append(result.migrate, allocTuple{ + result.Migrate = append(result.Migrate, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -103,7 +119,7 @@ func diffSystemAllocsForNode( // If we are a sysbatch job and terminal, ignore (or stop?) the alloc if job.Type == structs.JobTypeSysBatch && exist.TerminalStatus() { - result.ignore = append(result.ignore, allocTuple{ + result.Ignore = append(result.Ignore, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -113,7 +129,7 @@ func diffSystemAllocsForNode( // Expired unknown allocs are lost. Expired checks that status is unknown. if supportsDisconnectedClients && expired { - result.lost = append(result.lost, allocTuple{ + result.Lost = append(result.Lost, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -125,7 +141,7 @@ func diffSystemAllocsForNode( if supportsDisconnectedClients && exist.ClientStatus == structs.AllocClientStatusUnknown && exist.DesiredStatus == structs.AllocDesiredStatusRun { - result.ignore = append(result.ignore, allocTuple{ + result.Ignore = append(result.Ignore, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -144,7 +160,7 @@ func diffSystemAllocsForNode( // alloc has already reconnected. reconnecting := exist.Copy() reconnecting.AppendState(structs.AllocStateFieldClientStatus, exist.ClientStatus) - result.reconnecting = append(result.reconnecting, allocTuple{ + result.Reconnecting = append(result.Reconnecting, AllocTuple{ Name: name, TaskGroup: tg, Alloc: reconnecting, @@ -173,8 +189,8 @@ func diffSystemAllocsForNode( disconnect := exist.Copy() disconnect.ClientStatus = structs.AllocClientStatusUnknown disconnect.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown) - disconnect.ClientDescription = allocUnknown - result.disconnecting = append(result.disconnecting, allocTuple{ + disconnect.ClientDescription = sstructs.StatusAllocUnknown + result.Disconnecting = append(result.Disconnecting, AllocTuple{ Name: name, TaskGroup: tg, Alloc: disconnect, @@ -183,7 +199,7 @@ func diffSystemAllocsForNode( } if !exist.TerminalStatus() && (node == nil || node.TerminalStatus()) { - result.lost = append(result.lost, allocTuple{ + result.Lost = append(result.Lost, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -204,7 +220,7 @@ func diffSystemAllocsForNode( // Existing allocations on nodes that are no longer targeted // should be stopped if _, eligible := eligibleNodes[nodeID]; !eligible { - result.stop = append(result.stop, allocTuple{ + result.Stop = append(result.Stop, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -214,7 +230,7 @@ func diffSystemAllocsForNode( // If the definition is updated we need to update if job.JobModifyIndex != exist.Job.JobModifyIndex { - result.update = append(result.update, allocTuple{ + result.Update = append(result.Update, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -224,7 +240,7 @@ func diffSystemAllocsForNode( // Everything is up-to-date IGNORE: - result.ignore = append(result.ignore, allocTuple{ + result.Ignore = append(result.Ignore, AllocTuple{ Name: name, TaskGroup: tg, Alloc: exist, @@ -243,14 +259,14 @@ func diffSystemAllocsForNode( if alloc, termExists := terminal.Get(nodeID, name); termExists { // the alloc is terminal, but now the job has been updated if job.JobModifyIndex != alloc.Job.JobModifyIndex { - result.update = append(result.update, allocTuple{ + result.Update = append(result.Update, AllocTuple{ Name: name, TaskGroup: tg, Alloc: alloc, }) } else { // alloc is terminal and job unchanged, leave it alone - result.ignore = append(result.ignore, allocTuple{ + result.Ignore = append(result.Ignore, AllocTuple{ Name: name, TaskGroup: tg, Alloc: alloc, @@ -275,7 +291,7 @@ func diffSystemAllocsForNode( } termOnNode, _ := terminal.Get(nodeID, name) - allocTuple := allocTuple{ + allocTuple := AllocTuple{ Name: name, TaskGroup: tg, Alloc: termOnNode, @@ -288,64 +304,53 @@ func diffSystemAllocsForNode( allocTuple.Alloc = &structs.Allocation{NodeID: nodeID} } - result.place = append(result.place, allocTuple) + result.Place = append(result.Place, allocTuple) } } return result } -// diffSystemAllocs is like diffSystemAllocsForNode however, the allocations in the -// diffResult contain the specific nodeID they should be allocated on. -func diffSystemAllocs( - job *structs.Job, // jobs whose allocations are going to be diff-ed - readyNodes []*structs.Node, // list of nodes in the ready state - notReadyNodes map[string]struct{}, // list of nodes in DC but not ready, e.g. draining - taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by node id) - allocs []*structs.Allocation, // non-terminal allocations - terminal structs.TerminalByNodeByName, // latest terminal allocations (by node id) - serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic -) *diffResult { - - // Build a mapping of nodes to all their allocs. - nodeAllocs := make(map[string][]*structs.Allocation, len(allocs)) - for _, alloc := range allocs { - nodeAllocs[alloc.NodeID] = append(nodeAllocs[alloc.NodeID], alloc) +// materializeSystemTaskGroups is used to materialize all the task groups +// a system or sysbatch job requires. +func materializeSystemTaskGroups(job *structs.Job) map[string]*structs.TaskGroup { + out := make(map[string]*structs.TaskGroup) + if job.Stopped() { + return out } - eligibleNodes := make(map[string]*structs.Node) - for _, node := range readyNodes { - if _, ok := nodeAllocs[node.ID]; !ok { - nodeAllocs[node.ID] = nil + for _, tg := range job.TaskGroups { + for i := 0; i < tg.Count; i++ { + name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i) + out[name] = tg } - eligibleNodes[node.ID] = node } - - // Create the required task groups. - required := materializeSystemTaskGroups(job) - - result := new(diffResult) - for nodeID, allocs := range nodeAllocs { - diff := diffSystemAllocsForNode(job, nodeID, eligibleNodes, notReadyNodes, taintedNodes, required, allocs, terminal, serverSupportsDisconnectedClients) - result.Append(diff) - } - - return result + return out } -// evictAndPlace is used to mark allocations for evicts and add them to the -// placement queue. evictAndPlace modifies both the diffResult and the -// limit. It returns true if the limit has been reached. -func evictAndPlace(ctx Context, diff *diffResult, allocs []allocTuple, desc string, limit *int) bool { - n := len(allocs) - for i := 0; i < n && i < *limit; i++ { - a := allocs[i] - ctx.Plan().AppendStoppedAlloc(a.Alloc, desc, "", "") - diff.place = append(diff.place, a) - } - if n <= *limit { - *limit -= n - return false - } - *limit = 0 - return true +// AllocTuple is a tuple of the allocation name and potential alloc ID +type AllocTuple struct { + Name string + TaskGroup *structs.TaskGroup + Alloc *structs.Allocation +} + +// NodeReconcileResult is used to return the sets that result from the diff +type NodeReconcileResult struct { + Place, Update, Migrate, Stop, Ignore, Lost, Disconnecting, Reconnecting []AllocTuple +} + +func (d *NodeReconcileResult) GoString() string { + return fmt.Sprintf("allocs: (place %d) (update %d) (migrate %d) (stop %d) (ignore %d) (lost %d) (disconnecting %d) (reconnecting %d)", + len(d.Place), len(d.Update), len(d.Migrate), len(d.Stop), len(d.Ignore), len(d.Lost), len(d.Disconnecting), len(d.Reconnecting)) +} + +func (d *NodeReconcileResult) Append(other *NodeReconcileResult) { + d.Place = append(d.Place, other.Place...) + d.Update = append(d.Update, other.Update...) + d.Migrate = append(d.Migrate, other.Migrate...) + d.Stop = append(d.Stop, other.Stop...) + d.Ignore = append(d.Ignore, other.Ignore...) + d.Lost = append(d.Lost, other.Lost...) + d.Disconnecting = append(d.Disconnecting, other.Disconnecting...) + d.Reconnecting = append(d.Reconnecting, other.Reconnecting...) } diff --git a/scheduler/system_util_test.go b/scheduler/reconcile/reconcile_node_test.go similarity index 81% rename from scheduler/system_util_test.go rename to scheduler/reconcile/reconcile_node_test.go index 54412f93a..a1902058e 100644 --- a/scheduler/system_util_test.go +++ b/scheduler/reconcile/reconcile_node_test.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package scheduler +package reconcile import ( "fmt" @@ -25,14 +25,20 @@ type diffResultCount struct { } // assertDiffCount is a test helper that compares against a diffResult -func assertDiffCount(t *testing.T, expected diffResultCount, diff *diffResult) { +func assertDiffCount(t *testing.T, expected diffResultCount, diff *NodeReconcileResult) { t.Helper() - test.Len(t, expected.update, diff.update, test.Sprintf("expected update")) - test.Len(t, expected.ignore, diff.ignore, test.Sprintf("expected ignore")) - test.Len(t, expected.stop, diff.stop, test.Sprintf("expected stop")) - test.Len(t, expected.migrate, diff.migrate, test.Sprintf("expected migrate")) - test.Len(t, expected.lost, diff.lost, test.Sprintf("expected lost")) - test.Len(t, expected.place, diff.place, test.Sprintf("expected place")) + test.Len(t, expected.update, diff.Update, test.Sprintf("expected update")) + test.Len(t, expected.ignore, diff.Ignore, test.Sprintf("expected ignore")) + test.Len(t, expected.stop, diff.Stop, test.Sprintf("expected stop")) + test.Len(t, expected.migrate, diff.Migrate, test.Sprintf("expected migrate")) + test.Len(t, expected.lost, diff.Lost, test.Sprintf("expected lost")) + test.Len(t, expected.place, diff.Place, test.Sprintf("expected place")) +} + +func newNode(name string) *structs.Node { + n := mock.Node() + n.Name = name + return n } func TestDiffSystemAllocsForNode_Sysbatch_terminal(t *testing.T) { @@ -69,8 +75,8 @@ func TestDiffSystemAllocsForNode_Sysbatch_terminal(t *testing.T) { diff := diffSystemAllocsForNode(job, "node1", eligible, nil, tainted, required, live, terminal, true) assertDiffCount(t, diffResultCount{ignore: 1, place: 1}, diff) - if len(diff.ignore) > 0 { - must.Eq(t, terminal["node1"]["my-sysbatch.pinger[0]"], diff.ignore[0].Alloc) + if len(diff.Ignore) > 0 { + must.Eq(t, terminal["node1"]["my-sysbatch.pinger[0]"], diff.Ignore[0].Alloc) } }) @@ -211,14 +217,14 @@ func TestDiffSystemAllocsForNode_Stops(t *testing.T) { job, node.ID, eligible, nil, tainted, required, allocs, terminal, true) assertDiffCount(t, diffResultCount{ignore: 1, stop: 1, update: 1}, diff) - if len(diff.update) > 0 { - test.Eq(t, allocs[0], diff.update[0].Alloc) + if len(diff.Update) > 0 { + test.Eq(t, allocs[0], diff.Update[0].Alloc) } - if len(diff.ignore) > 0 { - test.Eq(t, allocs[1], diff.ignore[0].Alloc) + if len(diff.Ignore) > 0 { + test.Eq(t, allocs[1], diff.Ignore[0].Alloc) } - if len(diff.stop) > 0 { - test.Eq(t, allocs[2], diff.stop[0].Alloc) + if len(diff.Stop) > 0 { + test.Eq(t, allocs[2], diff.Stop[0].Alloc) } } @@ -337,8 +343,8 @@ func TestDiffSystemAllocsForNode_DrainingNode(t *testing.T) { tainted, required, allocs, terminal, true) assertDiffCount(t, diffResultCount{migrate: 1, ignore: 1}, diff) - if len(diff.migrate) > 0 { - test.Eq(t, allocs[0], diff.migrate[0].Alloc) + if len(diff.Migrate) > 0 { + test.Eq(t, allocs[0], diff.Migrate[0].Alloc) } } @@ -388,8 +394,8 @@ func TestDiffSystemAllocsForNode_LostNode(t *testing.T) { tainted, required, allocs, terminal, true) assertDiffCount(t, diffResultCount{lost: 2}, diff) - if len(diff.migrate) > 0 { - test.Eq(t, allocs[0], diff.migrate[0].Alloc) + if len(diff.Migrate) > 0 { + test.Eq(t, allocs[0], diff.Migrate[0].Alloc) } } @@ -596,29 +602,29 @@ func TestDiffSystemAllocs(t *testing.T) { }, } - diff := diffSystemAllocs(job, nodes, nil, tainted, allocs, terminal, true) + diff := Node(job, nodes, nil, tainted, allocs, terminal, true) assertDiffCount(t, diffResultCount{ update: 1, ignore: 1, migrate: 1, lost: 1, place: 6}, diff) - if len(diff.update) > 0 { - must.Eq(t, allocs[0], diff.update[0].Alloc) // first alloc should be updated + if len(diff.Update) > 0 { + must.Eq(t, allocs[0], diff.Update[0].Alloc) // first alloc should be updated } - if len(diff.ignore) > 0 { - must.Eq(t, allocs[1], diff.ignore[0].Alloc) // We should ignore the second alloc + if len(diff.Ignore) > 0 { + must.Eq(t, allocs[1], diff.Ignore[0].Alloc) // We should ignore the second alloc } - if len(diff.migrate) > 0 { - must.Eq(t, allocs[2], diff.migrate[0].Alloc) + if len(diff.Migrate) > 0 { + must.Eq(t, allocs[2], diff.Migrate[0].Alloc) } - if len(diff.lost) > 0 { - must.Eq(t, allocs[3], diff.lost[0].Alloc) // We should mark the 5th alloc as lost + if len(diff.Lost) > 0 { + must.Eq(t, allocs[3], diff.Lost[0].Alloc) // We should mark the 5th alloc as lost } // Ensure that the allocations which are replacements of terminal allocs are // annotated. for _, m := range terminal { for _, alloc := range m { - for _, tuple := range diff.place { + for _, tuple := range diff.Place { if alloc.NodeID == tuple.Alloc.NodeID && alloc.TaskGroup == "web" { must.Eq(t, alloc, tuple.Alloc) } @@ -626,62 +632,3 @@ func TestDiffSystemAllocs(t *testing.T) { } } } - -func TestEvictAndPlace_LimitLessThanAllocs(t *testing.T) { - ci.Parallel(t) - - _, ctx := testContext(t) - allocs := []allocTuple{ - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - } - diff := &diffResult{} - - limit := 2 - must.True(t, evictAndPlace(ctx, diff, allocs, "", &limit), - must.Sprintf("evictAndReplace() should have returned true")) - must.Zero(t, limit, - must.Sprint("evictAndReplace() should decrement limit")) - must.Len(t, 2, diff.place, - must.Sprintf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)) -} - -func TestEvictAndPlace_LimitEqualToAllocs(t *testing.T) { - ci.Parallel(t) - - _, ctx := testContext(t) - allocs := []allocTuple{ - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - } - diff := &diffResult{} - - limit := 4 - must.False(t, evictAndPlace(ctx, diff, allocs, "", &limit), - must.Sprint("evictAndReplace() should have returned false")) - must.Zero(t, limit, must.Sprint("evictAndReplace() should decrement limit")) - must.Len(t, 4, diff.place, - must.Sprintf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)) -} - -func TestEvictAndPlace_LimitGreaterThanAllocs(t *testing.T) { - ci.Parallel(t) - - _, ctx := testContext(t) - allocs := []allocTuple{ - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - {Alloc: &structs.Allocation{ID: uuid.Generate()}}, - } - diff := &diffResult{} - - limit := 6 - must.False(t, evictAndPlace(ctx, diff, allocs, "", &limit)) - must.Eq(t, 2, limit, must.Sprint("evictAndReplace() should decrement limit")) - must.Len(t, 4, diff.place, must.Sprintf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)) -} diff --git a/scheduler/reconnecting_picker/reconnecting_picker.go b/scheduler/reconcile/reconnecting_picker.go similarity index 83% rename from scheduler/reconnecting_picker/reconnecting_picker.go rename to scheduler/reconcile/reconnecting_picker.go index 8a6c29ad9..469313b67 100644 --- a/scheduler/reconnecting_picker/reconnecting_picker.go +++ b/scheduler/reconcile/reconnecting_picker.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package reconnectingpicker +package reconcile import ( "time" @@ -10,19 +10,25 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -type ReconnectingPicker struct { +type reconnectingPickerInterface interface { + pickReconnectingAlloc(*structs.DisconnectStrategy, *structs.Allocation, *structs.Allocation) *structs.Allocation +} + +type reconnectingPicker struct { logger log.Logger } -func New(logger log.Logger) *ReconnectingPicker { - rp := ReconnectingPicker{ +func newReconnectingPicker(logger log.Logger) *reconnectingPicker { + rp := reconnectingPicker{ logger: logger.Named("reconnecting-picker"), } return &rp } -func (rp *ReconnectingPicker) PickReconnectingAlloc(ds *structs.DisconnectStrategy, original *structs.Allocation, replacement *structs.Allocation) *structs.Allocation { +func (rp *reconnectingPicker) pickReconnectingAlloc( + ds *structs.DisconnectStrategy, original *structs.Allocation, replacement *structs.Allocation, +) *structs.Allocation { // Check if the replacement is a newer job version. // Always prefer the replacement if true. replacementIsNewer := replacement.Job.Version > original.Job.Version || @@ -60,7 +66,7 @@ func (rp *ReconnectingPicker) PickReconnectingAlloc(ds *structs.DisconnectStrate // This function is not commutative, meaning that pickReconnectingAlloc(A, B) // is not the same as pickReconnectingAlloc(B, A). Preference is given to keep // the original allocation when possible. -func (rp *ReconnectingPicker) pickBestScore(original *structs.Allocation, replacement *structs.Allocation) *structs.Allocation { +func (rp *reconnectingPicker) pickBestScore(original *structs.Allocation, replacement *structs.Allocation) *structs.Allocation { // Check if the replacement has better placement score. // If any of the scores is not available, only pick the replacement if @@ -85,15 +91,15 @@ func (rp *ReconnectingPicker) pickBestScore(original *structs.Allocation, replac return original } -func (rp *ReconnectingPicker) pickOriginal(original, _ *structs.Allocation) *structs.Allocation { +func (rp *reconnectingPicker) pickOriginal(original, _ *structs.Allocation) *structs.Allocation { return original } -func (rp *ReconnectingPicker) pickReplacement(_, replacement *structs.Allocation) *structs.Allocation { +func (rp *reconnectingPicker) pickReplacement(_, replacement *structs.Allocation) *structs.Allocation { return replacement } -func (rp *ReconnectingPicker) pickLongestRunning(original, replacement *structs.Allocation) *structs.Allocation { +func (rp *reconnectingPicker) pickLongestRunning(original, replacement *structs.Allocation) *structs.Allocation { tg := original.Job.LookupTaskGroup(original.TaskGroup) orgStartTime := startOfLeaderOrOldestTaskInMain(original, tg) diff --git a/scheduler/reconnecting_picker/reconnecting_picker_test.go b/scheduler/reconcile/reconnecting_picker_test.go similarity index 95% rename from scheduler/reconnecting_picker/reconnecting_picker_test.go rename to scheduler/reconcile/reconnecting_picker_test.go index ececa81fc..3a7f9f4fd 100644 --- a/scheduler/reconnecting_picker/reconnecting_picker_test.go +++ b/scheduler/reconcile/reconnecting_picker_test.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -package reconnectingpicker +package reconcile import ( "testing" @@ -13,7 +13,7 @@ import ( ) func TestPickReconnectingAlloc_NewerVersion(t *testing.T) { - rp := New(hclog.NewNullLogger()) + rp := newReconnectingPicker(hclog.NewNullLogger()) ds := &structs.DisconnectStrategy{ Reconcile: "best-score", } @@ -59,14 +59,14 @@ func TestPickReconnectingAlloc_NewerVersion(t *testing.T) { }, } - result := rp.PickReconnectingAlloc(ds, original, replacement) + result := rp.pickReconnectingAlloc(ds, original, replacement) must.Eq(t, tc.expected, result) } } func TestPickReconnectingAlloc_DifferentStrategies(t *testing.T) { - rp := New(hclog.NewNullLogger()) + rp := newReconnectingPicker(hclog.NewNullLogger()) now := time.Now() original := &structs.Allocation{ @@ -164,7 +164,7 @@ func TestPickReconnectingAlloc_DifferentStrategies(t *testing.T) { Reconcile: tc.strategy, } - result := rp.PickReconnectingAlloc(ds, original, replacement) + result := rp.pickReconnectingAlloc(ds, original, replacement) must.Eq(t, tc.expected, result) }) @@ -172,7 +172,7 @@ func TestPickReconnectingAlloc_DifferentStrategies(t *testing.T) { } func TestPickReconnectingAlloc_BestScore(t *testing.T) { - rp := New(hclog.NewNullLogger()) + rp := newReconnectingPicker(hclog.NewNullLogger()) original := &structs.Allocation{ Job: &structs.Job{ @@ -249,7 +249,7 @@ func TestPickReconnectingAlloc_BestScore(t *testing.T) { replacement.ClientStatus = tc.replacementClientStatus replacement.Metrics.ScoreMetaData[0].NormScore = tc.replacementScore - result := rp.PickReconnectingAlloc(&structs.DisconnectStrategy{ + result := rp.pickReconnectingAlloc(&structs.DisconnectStrategy{ Reconcile: structs.ReconcileOptionBestScore, }, original, replacement) @@ -259,7 +259,7 @@ func TestPickReconnectingAlloc_BestScore(t *testing.T) { } func TestPickReconnectingAlloc_LongestRunning(t *testing.T) { - rp := New(hclog.NewNullLogger()) + rp := newReconnectingPicker(hclog.NewNullLogger()) now := time.Now() taskGroupNoLeader := &structs.TaskGroup{ Name: "taskGroupNoLeader", @@ -465,7 +465,7 @@ func TestPickReconnectingAlloc_LongestRunning(t *testing.T) { original.TaskStates["task2"] = &tc.originalState replacement.TaskStates["task2"] = &tc.replacementState - result := rp.PickReconnectingAlloc(&structs.DisconnectStrategy{ + result := rp.pickReconnectingAlloc(&structs.DisconnectStrategy{ Reconcile: structs.ReconcileOptionLongestRunning, }, original, replacement) diff --git a/scheduler/scheduler_system.go b/scheduler/scheduler_system.go index 8b6d328f8..96528a6c8 100644 --- a/scheduler/scheduler_system.go +++ b/scheduler/scheduler_system.go @@ -11,6 +11,8 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler/reconcile" + sstructs "github.com/hashicorp/nomad/scheduler/structs" ) const ( @@ -255,27 +257,27 @@ func (s *SystemScheduler) computeJobAllocs() error { live, term := structs.SplitTerminalAllocs(allocs) // Diff the required and existing allocations - diff := diffSystemAllocs(s.job, s.nodes, s.notReadyNodes, tainted, live, term, + r := reconcile.Node(s.job, s.nodes, s.notReadyNodes, tainted, live, term, s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true)) - s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", diff)) + s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", r)) // Add all the allocs to stop - for _, e := range diff.stop { - s.plan.AppendStoppedAlloc(e.Alloc, allocNotNeeded, "", "") + for _, e := range r.Stop { + s.plan.AppendStoppedAlloc(e.Alloc, sstructs.StatusAllocNotNeeded, "", "") } // Add all the allocs to migrate - for _, e := range diff.migrate { - s.plan.AppendStoppedAlloc(e.Alloc, allocNodeTainted, "", "") + for _, e := range r.Migrate { + s.plan.AppendStoppedAlloc(e.Alloc, sstructs.StatusAllocNodeTainted, "", "") } // Lost allocations should be transitioned to desired status stop and client // status lost. - for _, e := range diff.lost { - s.plan.AppendStoppedAlloc(e.Alloc, allocLost, structs.AllocClientStatusLost, "") + for _, e := range r.Lost { + s.plan.AppendStoppedAlloc(e.Alloc, sstructs.StatusAllocLost, structs.AllocClientStatusLost, "") } - for _, e := range diff.disconnecting { + for _, e := range r.Disconnecting { s.plan.AppendUnknownAlloc(e.Alloc) } @@ -283,11 +285,11 @@ func (s *SystemScheduler) computeJobAllocs() error { // Attempt to do the upgrades in place. // Reconnecting allocations need to be updated to persists alloc state // changes. - updates := make([]allocTuple, 0, len(diff.update)+len(diff.reconnecting)) - updates = append(updates, diff.update...) - updates = append(updates, diff.reconnecting...) + updates := make([]reconcile.AllocTuple, 0, len(r.Update)+len(r.Reconnecting)) + updates = append(updates, r.Update...) + updates = append(updates, r.Reconnecting...) destructiveUpdates, inplaceUpdates := inplaceUpdate(s.ctx, s.eval, s.job, s.stack, updates) - diff.update = destructiveUpdates + r.Update = destructiveUpdates for _, inplaceUpdate := range inplaceUpdates { allocExistsForTaskGroup[inplaceUpdate.TaskGroup.Name] = true @@ -295,21 +297,21 @@ func (s *SystemScheduler) computeJobAllocs() error { if s.eval.AnnotatePlan { s.plan.Annotations = &structs.PlanAnnotations{ - DesiredTGUpdates: desiredUpdates(diff, inplaceUpdates, destructiveUpdates), + DesiredTGUpdates: desiredUpdates(r, inplaceUpdates, destructiveUpdates), } } // Check if a rolling upgrade strategy is being used - limit := len(diff.update) + limit := len(r.Update) if !s.job.Stopped() && s.job.Update.Rolling() { limit = s.job.Update.MaxParallel } // Treat non in-place updates as an eviction and new placement. - s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit) + s.limitReached = evictAndPlace(s.ctx, r, r.Update, sstructs.StatusAllocUpdating, &limit) // Nothing remaining to do if placement is not required - if len(diff.place) == 0 { + if len(r.Place) == 0 { if !s.job.Stopped() { for _, tg := range s.job.TaskGroups { s.queuedAllocs[tg.Name] = 0 @@ -319,16 +321,16 @@ func (s *SystemScheduler) computeJobAllocs() error { } // Record the number of allocations that needs to be placed per Task Group - for _, allocTuple := range diff.place { + for _, allocTuple := range r.Place { s.queuedAllocs[allocTuple.TaskGroup.Name] += 1 } - for _, ignoredAlloc := range diff.ignore { + for _, ignoredAlloc := range r.Ignore { allocExistsForTaskGroup[ignoredAlloc.TaskGroup.Name] = true } // Compute the placements - return s.computePlacements(diff.place, allocExistsForTaskGroup) + return s.computePlacements(r.Place, allocExistsForTaskGroup) } func mergeNodeFiltered(acc, curr *structs.AllocMetric) *structs.AllocMetric { @@ -356,7 +358,7 @@ func mergeNodeFiltered(acc, curr *structs.AllocMetric) *structs.AllocMetric { } // computePlacements computes placements for allocations -func (s *SystemScheduler) computePlacements(place []allocTuple, existingByTaskGroup map[string]bool) error { +func (s *SystemScheduler) computePlacements(place []reconcile.AllocTuple, existingByTaskGroup map[string]bool) error { nodeByID := make(map[string]*structs.Node, len(s.nodes)) for _, node := range s.nodes { nodeByID[node.ID] = node @@ -536,7 +538,7 @@ func (s *SystemScheduler) addBlocked(node *structs.Node) error { } blocked := s.eval.CreateBlockedEval(classEligibility, escaped, e.QuotaLimitReached(), s.failedTGAllocs) - blocked.StatusDescription = blockedEvalFailedPlacements + blocked.StatusDescription = sstructs.DescBlockedEvalFailedPlacements blocked.NodeID = node.ID return s.planner.CreateEval(blocked) @@ -566,3 +568,21 @@ func (s *SystemScheduler) canHandle(trigger string) bool { } return true } + +// evictAndPlace is used to mark allocations for evicts and add them to the +// placement queue. evictAndPlace modifies both the diffResult and the +// limit. It returns true if the limit has been reached. +func evictAndPlace(ctx Context, diff *reconcile.NodeReconcileResult, allocs []reconcile.AllocTuple, desc string, limit *int) bool { + n := len(allocs) + for i := 0; i < n && i < *limit; i++ { + a := allocs[i] + ctx.Plan().AppendStoppedAlloc(a.Alloc, desc, "", "") + diff.Place = append(diff.Place, a) + } + if n <= *limit { + *limit -= n + return false + } + *limit = 0 + return true +} diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index b306a534b..1d8e809f1 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -16,6 +16,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler/reconcile" "github.com/shoenig/test/must" ) @@ -3286,3 +3287,62 @@ func TestSystemSched_CSITopology(t *testing.T) { must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status) } + +func TestEvictAndPlace_LimitLessThanAllocs(t *testing.T) { + ci.Parallel(t) + + _, ctx := testContext(t) + allocs := []reconcile.AllocTuple{ + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + } + diff := &reconcile.NodeReconcileResult{} + + limit := 2 + must.True(t, evictAndPlace(ctx, diff, allocs, "", &limit), + must.Sprintf("evictAndReplace() should have returned true")) + must.Zero(t, limit, + must.Sprint("evictAndReplace() should decrement limit")) + must.Len(t, 2, diff.Place, + must.Sprintf("evictAndReplace() didn't insert into diffResult properly: %v", diff.Place)) +} + +func TestEvictAndPlace_LimitEqualToAllocs(t *testing.T) { + ci.Parallel(t) + + _, ctx := testContext(t) + allocs := []reconcile.AllocTuple{ + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + } + diff := &reconcile.NodeReconcileResult{} + + limit := 4 + must.False(t, evictAndPlace(ctx, diff, allocs, "", &limit), + must.Sprint("evictAndReplace() should have returned false")) + must.Zero(t, limit, must.Sprint("evictAndReplace() should decrement limit")) + must.Len(t, 4, diff.Place, + must.Sprintf("evictAndReplace() didn't insert into diffResult properly: %v", diff.Place)) +} + +func TestEvictAndPlace_LimitGreaterThanAllocs(t *testing.T) { + ci.Parallel(t) + + _, ctx := testContext(t) + allocs := []reconcile.AllocTuple{ + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + {Alloc: &structs.Allocation{ID: uuid.Generate()}}, + } + diff := &reconcile.NodeReconcileResult{} + + limit := 6 + must.False(t, evictAndPlace(ctx, diff, allocs, "", &limit)) + must.Eq(t, 2, limit, must.Sprint("evictAndReplace() should decrement limit")) + must.Len(t, 4, diff.Place, must.Sprintf("evictAndReplace() didn't insert into diffResult properly: %v", diff.Place)) +} diff --git a/scheduler/structs/const.go b/scheduler/structs/const.go new file mode 100644 index 000000000..d862e1ee6 --- /dev/null +++ b/scheduler/structs/const.go @@ -0,0 +1,54 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +const ( + // StatusAllocNotNeeded is the status used when a job no longer requires an + // allocation + StatusAllocNotNeeded = "alloc not needed due to job update" + + // StatusAllocReconnected is the status to use when a replacement allocation is + // stopped because a disconnected node reconnects. + StatusAllocReconnected = "alloc not needed due to disconnected client reconnect" + + // StatusAllocMigrating is the status used when we must migrate an allocation + StatusAllocMigrating = "alloc is being migrated" + + // StatusAllocUpdating is the status used when a job requires an update + StatusAllocUpdating = "alloc is being updated due to job update" + + // StatusAllocLost is the status used when an allocation is lost + StatusAllocLost = "alloc is lost since its node is down" + + // StatusAllocUnknown is the status used when an allocation is unknown + StatusAllocUnknown = "alloc is unknown since its node is disconnected" + + // StatusAllocInPlace is the status used when speculating on an in-place update + StatusAllocInPlace = "alloc updating in-place" + + // StatusAllocNodeTainted is the status used when stopping an alloc because its + // node is tainted. + StatusAllocNodeTainted = "alloc not needed as node is tainted" + + // StatusAllocRescheduled is the status used when an allocation failed and was + // rescheduled + StatusAllocRescheduled = "alloc was rescheduled because it failed" + + // DescBlockedEvalMaxPlan is the description used for blocked evals that are + // a result of hitting the max number of plan attempts + DescBlockedEvalMaxPlan = "created due to placement conflicts" + + // DescBlockedEvalFailedPlacements is the description used for blocked evals + // that are a result of failing to place all allocations. + DescBlockedEvalFailedPlacements = "created to place remaining allocations" + + // DescReschedulingFollowupEval is the description used when creating follow + // up evals for delayed rescheduling + DescReschedulingFollowupEval = "created for delayed rescheduling" + + // DescDisconnectTimeoutFollowupEval is the description used when creating follow + // up evals for allocations that be should be stopped after its disconnect + // timeout has passed. + DescDisconnectTimeoutFollowupEval = "created for delayed disconnect timeout" +) diff --git a/scheduler/util.go b/scheduler/util.go index 0d2372aa0..d3f33f222 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -15,36 +15,10 @@ import ( "github.com/hashicorp/go-set/v3" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler/reconcile" + sstructs "github.com/hashicorp/nomad/scheduler/structs" ) -// allocTuple is a tuple of the allocation name and potential alloc ID -type allocTuple struct { - Name string - TaskGroup *structs.TaskGroup - Alloc *structs.Allocation -} - -// diffResult is used to return the sets that result from the diff -type diffResult struct { - place, update, migrate, stop, ignore, lost, disconnecting, reconnecting []allocTuple -} - -func (d *diffResult) GoString() string { - return fmt.Sprintf("allocs: (place %d) (update %d) (migrate %d) (stop %d) (ignore %d) (lost %d) (disconnecting %d) (reconnecting %d)", - len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore), len(d.lost), len(d.disconnecting), len(d.reconnecting)) -} - -func (d *diffResult) Append(other *diffResult) { - d.place = append(d.place, other.place...) - d.update = append(d.update, other.update...) - d.migrate = append(d.migrate, other.migrate...) - d.stop = append(d.stop, other.stop...) - d.ignore = append(d.ignore, other.ignore...) - d.lost = append(d.lost, other.lost...) - d.disconnecting = append(d.disconnecting, other.disconnecting...) - d.reconnecting = append(d.reconnecting, other.reconnecting...) -} - // readyNodesInDCsAndPool returns all the ready nodes in the given datacenters // and pool, and a mapping of each data center to the count of ready nodes. func readyNodesInDCsAndPool(state State, dcs []string, pool string) ([]*structs.Node, map[string]struct{}, map[string]int, error) { @@ -615,7 +589,7 @@ func setStatus(logger log.Logger, planner Planner, // inplaceUpdate attempts to update allocations in-place where possible. It // returns the allocs that couldn't be done inplace and then those that could. func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, - stack Stack, updates []allocTuple) (destructive, inplace []allocTuple) { + stack Stack, updates []reconcile.AllocTuple) (destructive, inplace []reconcile.AllocTuple) { // doInplace manipulates the updates map to make the current allocation // an inplace update. @@ -675,7 +649,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, // the current allocation is discounted when checking for feasibility. // Otherwise we would be trying to fit the tasks current resources and // updated resources. After select is called we can remove the evict. - ctx.Plan().AppendStoppedAlloc(update.Alloc, allocInPlace, "", "") + ctx.Plan().AppendStoppedAlloc(update.Alloc, sstructs.StatusAllocInPlace, "", "") // Attempt to match the task group option := stack.Select(update.TaskGroup, @@ -743,11 +717,11 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, // desiredUpdates takes the diffResult as well as the set of inplace and // destructive updates and returns a map of task groups to their set of desired // updates. -func desiredUpdates(diff *diffResult, inplaceUpdates, - destructiveUpdates []allocTuple) map[string]*structs.DesiredUpdates { +func desiredUpdates(diff *reconcile.NodeReconcileResult, inplaceUpdates, + destructiveUpdates []reconcile.AllocTuple) map[string]*structs.DesiredUpdates { desiredTgs := make(map[string]*structs.DesiredUpdates) - for _, tuple := range diff.place { + for _, tuple := range diff.Place { name := tuple.TaskGroup.Name des, ok := desiredTgs[name] if !ok { @@ -758,7 +732,7 @@ func desiredUpdates(diff *diffResult, inplaceUpdates, des.Place++ } - for _, tuple := range diff.stop { + for _, tuple := range diff.Stop { name := tuple.Alloc.TaskGroup des, ok := desiredTgs[name] if !ok { @@ -769,7 +743,7 @@ func desiredUpdates(diff *diffResult, inplaceUpdates, des.Stop++ } - for _, tuple := range diff.ignore { + for _, tuple := range diff.Ignore { name := tuple.TaskGroup.Name des, ok := desiredTgs[name] if !ok { @@ -780,7 +754,7 @@ func desiredUpdates(diff *diffResult, inplaceUpdates, des.Ignore++ } - for _, tuple := range diff.migrate { + for _, tuple := range diff.Migrate { name := tuple.TaskGroup.Name des, ok := desiredTgs[name] if !ok { @@ -864,7 +838,7 @@ func updateNonTerminalAllocsToLost(plan *structs.Plan, tainted map[string]*struc alloc.DesiredStatus == structs.AllocDesiredStatusEvict) && (alloc.ClientStatus == structs.AllocClientStatusRunning || alloc.ClientStatus == structs.AllocClientStatusPending) { - plan.AppendStoppedAlloc(alloc, allocLost, structs.AllocClientStatusLost, "") + plan.AppendStoppedAlloc(alloc, sstructs.StatusAllocLost, structs.AllocClientStatusLost, "") } } } @@ -875,7 +849,7 @@ func updateNonTerminalAllocsToLost(plan *structs.Plan, tainted map[string]*struc // by the reconciler to make decisions about how to update an allocation. The // factory allows the reconciler to be unaware of how to determine the type of // update necessary and can minimize the set of objects it is exposed to. -func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateType { +func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) reconcile.AllocUpdateType { return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (ignore, destructive bool, updated *structs.Allocation) { // Same index, so nothing to do if existing.Job.JobModifyIndex == newJob.JobModifyIndex { @@ -922,7 +896,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy // the current allocation is discounted when checking for feasibility. // Otherwise we would be trying to fit the tasks current resources and // updated resources. After select is called we can remove the evict. - ctx.Plan().AppendStoppedAlloc(existing, allocInPlace, "", "") + ctx.Plan().AppendStoppedAlloc(existing, sstructs.StatusAllocInPlace, "", "") // Attempt to match the task group option := stack.Select(newTG, &SelectOptions{AllocName: existing.Name}) diff --git a/scheduler/util_test.go b/scheduler/util_test.go index e7b99d240..ab2466b52 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler/reconcile" "github.com/shoenig/test/must" ) @@ -27,12 +28,6 @@ func BenchmarkTasksUpdated(b *testing.B) { } } -func newNode(name string) *structs.Node { - n := mock.Node() - n.Name = name - return n -} - func TestReadyNodesInDCsAndPool(t *testing.T) { ci.Parallel(t) @@ -701,7 +696,7 @@ func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) { tg.Tasks = nil tg.Tasks = append(tg.Tasks, task) - updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} + updates := []reconcile.AllocTuple{{Alloc: alloc, TaskGroup: tg}} stack := NewGenericStack(false, ctx) // Do the inplace update. @@ -755,7 +750,7 @@ func TestInplaceUpdate_AllocatedResources(t *testing.T) { }, } - updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} + updates := []reconcile.AllocTuple{{Alloc: alloc, TaskGroup: tg}} stack := NewGenericStack(false, ctx) // Do the inplace update. @@ -813,7 +808,7 @@ func TestInplaceUpdate_NoMatch(t *testing.T) { resource := &structs.Resources{CPU: 99999} tg.Tasks[0].Resources = resource - updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} + updates := []reconcile.AllocTuple{{Alloc: alloc, TaskGroup: tg}} stack := NewGenericStack(false, ctx) // Do the inplace update. @@ -882,7 +877,7 @@ func TestInplaceUpdate_Success(t *testing.T) { // Add the new services tg.Tasks[0].Services = append(tg.Tasks[0].Services, newServices...) - updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} + updates := []reconcile.AllocTuple{{Alloc: alloc, TaskGroup: tg}} stack := NewGenericStack(false, ctx) stack.SetJob(job) @@ -932,7 +927,7 @@ func TestInplaceUpdate_WildcardDatacenters(t *testing.T) { must.NoError(t, store.UpsertJobSummary(1000, mock.JobSummary(alloc.JobID))) must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1001, []*structs.Allocation{alloc})) - updates := []allocTuple{{Alloc: alloc, TaskGroup: job.TaskGroups[0]}} + updates := []reconcile.AllocTuple{{Alloc: alloc, TaskGroup: job.TaskGroups[0]}} stack := NewGenericStack(false, ctx) unplaced, inplace := inplaceUpdate(ctx, eval, job, stack, updates) @@ -974,7 +969,7 @@ func TestInplaceUpdate_NodePools(t *testing.T) { must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1004, []*structs.Allocation{alloc1, alloc2})) - updates := []allocTuple{ + updates := []reconcile.AllocTuple{ {Alloc: alloc1, TaskGroup: job.TaskGroups[0]}, {Alloc: alloc2, TaskGroup: job.TaskGroups[0]}, } @@ -1181,36 +1176,36 @@ func TestDesiredUpdates(t *testing.T) { tg2 := &structs.TaskGroup{Name: "bar"} a2 := &structs.Allocation{TaskGroup: "bar"} - place := []allocTuple{ + place := []reconcile.AllocTuple{ {TaskGroup: tg1}, {TaskGroup: tg1}, {TaskGroup: tg1}, {TaskGroup: tg2}, } - stop := []allocTuple{ + stop := []reconcile.AllocTuple{ {TaskGroup: tg2, Alloc: a2}, {TaskGroup: tg2, Alloc: a2}, } - ignore := []allocTuple{ + ignore := []reconcile.AllocTuple{ {TaskGroup: tg1}, } - migrate := []allocTuple{ + migrate := []reconcile.AllocTuple{ {TaskGroup: tg2}, } - inplace := []allocTuple{ + inplace := []reconcile.AllocTuple{ {TaskGroup: tg1}, {TaskGroup: tg1}, } - destructive := []allocTuple{ + destructive := []reconcile.AllocTuple{ {TaskGroup: tg1}, {TaskGroup: tg2}, {TaskGroup: tg2}, } - diff := &diffResult{ - place: place, - stop: stop, - ignore: ignore, - migrate: migrate, + diff := &reconcile.NodeReconcileResult{ + Place: place, + Stop: stop, + Ignore: ignore, + Migrate: migrate, } expected := map[string]*structs.DesiredUpdates{