From 690fc7809195d7837abbef45e3f71a5b9522a7fa Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 5 Jul 2017 21:26:04 -0700 Subject: [PATCH] Plan apply handles canaries and success is set via update --- nomad/plan_apply.go | 56 +++++++++++++++++++++++++++++++++++++--- nomad/structs/structs.go | 22 +++++++++++++++- scheduler/reconcile.go | 10 ++++--- scheduler/util.go | 3 ++- 4 files changed, 82 insertions(+), 9 deletions(-) diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index cb5c5eb10..a300126f4 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -132,8 +132,8 @@ func (s *Server) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap Job: plan.Job, Alloc: make([]*structs.Allocation, 0, minUpdates), }, - Deployment: plan.Deployment, - DeploymentUpdates: plan.DeploymentUpdates, + Deployment: result.Deployment, + DeploymentUpdates: result.DeploymentUpdates, } for _, updateList := range result.NodeUpdate { req.Alloc = append(req.Alloc, updateList...) @@ -201,8 +201,10 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P // Create a result holder for the plan result := &structs.PlanResult{ - NodeUpdate: make(map[string][]*structs.Allocation), - NodeAllocation: make(map[string][]*structs.Allocation), + NodeUpdate: make(map[string][]*structs.Allocation), + NodeAllocation: make(map[string][]*structs.Allocation), + Deployment: plan.Deployment.Copy(), + DeploymentUpdates: plan.DeploymentUpdates, } // Collect all the nodeIDs @@ -242,6 +244,8 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P if plan.AllAtOnce { result.NodeUpdate = nil result.NodeAllocation = nil + result.DeploymentUpdates = nil + result.Deployment = nil return true } @@ -315,10 +319,54 @@ OUTER: err := fmt.Errorf("partialCommit with RefreshIndex of 0 (%d node, %d alloc)", nodeIndex, allocIndex) mErr.Errors = append(mErr.Errors, err) } + + // If there was a partial commit and we are operating within a + // deployment correct for any canary that may have been desired to be + // placed but wasn't actually placed + correctDeploymentCanaries(result) } return result, mErr.ErrorOrNil() } +// TODO test +// correctDeploymentCanaries ensures that the deployment object doesn't list any +// canaries as placed if they didn't actually get placed. This could happen if +// the plan had a partial commit. +func correctDeploymentCanaries(result *structs.PlanResult) { + // Hot path + if result.Deployment == nil || !result.Deployment.HasPlacedCanaries() { + return + } + + // Build a set of all the allocations IDs that were placed + placedAllocs := make(map[string]struct{}, len(result.NodeAllocation)) + for _, placed := range result.NodeAllocation { + for _, alloc := range placed { + placedAllocs[alloc.ID] = struct{}{} + } + } + + // Go through all the canaries and ensure that the result list only contains + // those that have been placed + for _, group := range result.Deployment.TaskGroups { + canaries := group.PlacedCanaries + if len(canaries) == 0 { + continue + } + + // Prune the canaries in place to avoid allocating an extra slice + i := 0 + for _, canaryID := range canaries { + if _, ok := placedAllocs[canaryID]; ok { + canaries[i] = canaryID + i++ + } + } + + group.PlacedCanaries = canaries[:i] + } +} + // evaluateNodePlan is used to evalute the plan for a single node, // returning if the plan is valid or if an error is encountered func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID string) (bool, error) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2d86afb5c..d1b91e71c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3995,6 +3995,19 @@ func (d *Deployment) GetID() string { return d.ID } +// HasPlacedCanaries returns whether the deployment has placed canaries +func (d *Deployment) HasPlacedCanaries() bool { + if d == nil || len(d.TaskGroups) == 0 { + return false + } + for _, group := range d.TaskGroups { + if len(group.PlacedCanaries) != 0 { + return true + } + } + return false +} + func (d *Deployment) GoString() string { base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) for group, state := range d.TaskGroups { @@ -4876,6 +4889,12 @@ type PlanResult struct { // NodeAllocation contains all the allocations that were committed. NodeAllocation map[string][]*Allocation + // Deployment is the deployment that was committed. + Deployment *Deployment + + // DeploymentUpdates is the set of deployment updates that were commited. + DeploymentUpdates []*DeploymentStatusUpdate + // RefreshIndex is the index the worker should refresh state up to. // This allows all evictions and allocations to be materialized. // If any allocations were rejected due to stale data (node state, @@ -4889,7 +4908,8 @@ type PlanResult struct { // IsNoOp checks if this plan result would do nothing func (p *PlanResult) IsNoOp() bool { - return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 + return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && + len(p.DeploymentUpdates) == 0 && p.Deployment == nil } // FullCommit is used to check if all the allocations in a plan diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index f44d09b8f..6c05d6463 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -157,10 +157,14 @@ func (a *allocReconciler) Compute() *reconcileResults { complete = complete && groupComplete } + // TODO test // Mark the deployment as complete if possible if a.deployment != nil && complete { - a.deployment.Status = structs.DeploymentStatusSuccessful - a.deployment.StatusDescription = structs.DeploymentStatusDescriptionSuccessful + a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ + DeploymentID: a.deployment.ID, + Status: structs.DeploymentStatusSuccessful, + StatusDescription: structs.DeploymentStatusDescriptionSuccessful, + }) } return a.result @@ -257,7 +261,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Get the deployment state for the group var dstate *structs.DeploymentState - var existingDeployment bool + existingDeployment := false deploymentComplete := true if a.deployment != nil { dstate, existingDeployment = a.deployment.TaskGroups[group] diff --git a/scheduler/util.go b/scheduler/util.go index 3fcaa6893..49368981a 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -290,7 +290,8 @@ func retryMax(max int, cb func() (bool, error), reset func() bool) error { // If the result is nil, false is returned. func progressMade(result *structs.PlanResult) bool { return result != nil && (len(result.NodeUpdate) != 0 || - len(result.NodeAllocation) != 0) + len(result.NodeAllocation) != 0) || result.Deployment != nil || + len(result.DeploymentUpdates) != 0 } // taintedNodes is used to scan the allocations and then check if the