diff --git a/.changelog/26188.txt b/.changelog/26188.txt new file mode 100644 index 000000000..638c40be2 --- /dev/null +++ b/.changelog/26188.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: Add reconciler annotations to the output of the `eval status` command +``` diff --git a/api/evaluations.go b/api/evaluations.go index 3c5440965..bd59b44c6 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -116,6 +116,7 @@ type Evaluation struct { BlockedEval string RelatedEvals []*EvaluationStub FailedTGAllocs map[string]*AllocationMetric + PlanAnnotations *PlanAnnotations ClassEligibility map[string]bool EscapedComputedClass bool QuotaLimitReached string diff --git a/command/eval_status.go b/command/eval_status.go index dc32182dc..a4b1eb59c 100644 --- a/command/eval_status.go +++ b/command/eval_status.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" "github.com/posener/complete" + "github.com/ryanuber/columnize" ) type EvalStatusCommand struct { @@ -257,6 +258,36 @@ func (c *EvalStatusCommand) formatEvalStatus(eval *api.Evaluation, placedAllocs c.Ui.Output(c.Colorize().Color("\n[bold]Related Evaluations[reset]")) c.Ui.Output(formatRelatedEvalStubs(eval.RelatedEvals, length)) } + if eval.PlanAnnotations != nil { + + if len(eval.PlanAnnotations.DesiredTGUpdates) > 0 { + c.Ui.Output(c.Colorize().Color("\n[bold]Reconciler Annotations[reset]")) + annotations := make([]string, len(eval.PlanAnnotations.DesiredTGUpdates)+1) + annotations[0] = "Task Group|Ignore|Place|Stop|Migrate|InPlace|Destructive|Canary|Preemptions" + i := 1 + for tg, updates := range eval.PlanAnnotations.DesiredTGUpdates { + annotations[i] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d|%d|%d", + tg, + updates.Ignore, + updates.Place, + updates.Stop, + updates.Migrate, + updates.InPlaceUpdate, + updates.DestructiveUpdate, + updates.Canary, + updates.Preemptions, + ) + i++ + } + c.Ui.Output(columnize.SimpleFormat(annotations)) + } + + if len(eval.PlanAnnotations.PreemptedAllocs) > 0 { + c.Ui.Output(c.Colorize().Color("\n[bold]Preempted Allocations[reset]")) + allocsOut := formatPreemptedAllocListStubs(eval.PlanAnnotations.PreemptedAllocs, length) + c.Ui.Output(allocsOut) + } + } if len(placedAllocs) > 0 { c.Ui.Output(c.Colorize().Color("\n[bold]Placed Allocations[reset]")) allocsOut := formatAllocListStubs(placedAllocs, false, length) @@ -323,3 +354,27 @@ func formatRelatedEvalStubs(evals []*api.EvaluationStub, length int) string { return formatList(out) } + +// formatPreemptedAllocListStubs formats alloc stubs but assumes they don't all +// belong to the same job, as is the case when allocs are preempted by another +// job +func formatPreemptedAllocListStubs(stubs []*api.AllocationListStub, uuidLength int) string { + allocs := make([]string, len(stubs)+1) + allocs[0] = "ID|Job ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" + for i, alloc := range stubs { + now := time.Now() + createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) + modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) + allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s", + limit(alloc.ID, uuidLength), + alloc.JobID, + limit(alloc.NodeID, uuidLength), + alloc.TaskGroup, + alloc.JobVersion, + alloc.DesiredStatus, + alloc.ClientStatus, + createTimePretty, + modTimePretty) + } + return formatList(allocs) +} diff --git a/command/eval_status_test.go b/command/eval_status_test.go index b09ac0512..77578b99d 100644 --- a/command/eval_status_test.go +++ b/command/eval_status_test.go @@ -150,6 +150,22 @@ func TestEvalStatusCommand_Format(t *testing.T) { CoalescedFailures: 0, ScoreMetaData: []*api.NodeScoreMeta{}, }}, + PlanAnnotations: &api.PlanAnnotations{ + DesiredTGUpdates: map[string]*api.DesiredUpdates{"web": {Place: 10}}, + PreemptedAllocs: []*api.AllocationListStub{ + { + ID: uuid.Generate(), + JobID: "another", + NodeID: uuid.Generate(), + TaskGroup: "db", + DesiredStatus: "evict", + JobVersion: 3, + ClientStatus: "complete", + CreateTime: now.Add(-10 * time.Minute).UnixNano(), + ModifyTime: now.Add(-2 * time.Second).UnixNano(), + }, + }, + }, ClassEligibility: map[string]bool{}, EscapedComputedClass: true, QuotaLimitReached: "", @@ -207,4 +223,6 @@ Task Group "web" (failed to place 1 allocation): must.StrContains(t, out, `Related Evaluations`) must.StrContains(t, out, `Placed Allocations`) + must.StrContains(t, out, `Reconciler Annotations`) + must.StrContains(t, out, `Preempted Allocations`) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e500192cd..6f08d54e5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -12428,6 +12428,9 @@ type Evaluation struct { // to determine the cause. FailedTGAllocs map[string]*AllocMetric + // PlanAnnotations represents the output of the reconciliation step. + PlanAnnotations *PlanAnnotations + // ClassEligibility tracks computed node classes that have been explicitly // marked as eligible or ineligible. ClassEligibility map[string]bool diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 882f3c102..e42b1aefc 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -73,9 +73,10 @@ type GenericScheduler struct { deployment *structs.Deployment - blocked *structs.Evaluation - failedTGAllocs map[string]*structs.AllocMetric - queuedAllocs map[string]int + blocked *structs.Evaluation + failedTGAllocs map[string]*structs.AllocMetric + queuedAllocs map[string]int + planAnnotations *structs.PlanAnnotations } // NewServiceScheduler is a factory function to instantiate a new service scheduler @@ -132,7 +133,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) (err error) { desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) return setStatus(s.logger, s.planner, s.eval, nil, s.blocked, - s.failedTGAllocs, structs.EvalStatusFailed, desc, s.queuedAllocs, + s.failedTGAllocs, s.planAnnotations, structs.EvalStatusFailed, desc, s.queuedAllocs, s.deployment.GetID()) } @@ -151,7 +152,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) (err error) { mErr.Errors = append(mErr.Errors, err) } if err := setStatus(s.logger, s.planner, s.eval, nil, s.blocked, - s.failedTGAllocs, statusErr.EvalStatus, err.Error(), + s.failedTGAllocs, s.planAnnotations, statusErr.EvalStatus, err.Error(), s.queuedAllocs, s.deployment.GetID()); err != nil { mErr.Errors = append(mErr.Errors, err) } @@ -173,7 +174,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) (err error) { // Update the status to complete return setStatus(s.logger, s.planner, s.eval, nil, s.blocked, - s.failedTGAllocs, structs.EvalStatusComplete, "", s.queuedAllocs, + s.failedTGAllocs, s.planAnnotations, structs.EvalStatusComplete, "", s.queuedAllocs, s.deployment.GetID()) } @@ -285,6 +286,9 @@ func (s *GenericScheduler) process() (bool, error) { } // Submit the plan and store the results. + if s.eval.AnnotatePlan { + s.plan.Annotations = s.planAnnotations + } result, newState, err := s.planner.SubmitPlan(s.plan) s.planResult = result if err != nil { @@ -359,10 +363,8 @@ func (s *GenericScheduler) computeJobAllocs() error { s.logger.Debug("reconciled current state with desired state", result.Fields()...) } - if s.eval.AnnotatePlan { - s.plan.Annotations = &structs.PlanAnnotations{ - DesiredTGUpdates: result.DesiredTGUpdates, - } + s.planAnnotations = &structs.PlanAnnotations{ + DesiredTGUpdates: result.DesiredTGUpdates, } // Add the deployment changes to the plan @@ -912,10 +914,10 @@ func (s *GenericScheduler) handlePreemptions(option *feasible.RankedNode, alloc s.plan.AppendPreemptedAlloc(stop, alloc.ID) preemptedAllocIDs = append(preemptedAllocIDs, stop.ID) - if s.eval.AnnotatePlan && s.plan.Annotations != nil { - s.plan.Annotations.PreemptedAllocs = append(s.plan.Annotations.PreemptedAllocs, stop.Stub(nil)) - if s.plan.Annotations.DesiredTGUpdates != nil { - desired := s.plan.Annotations.DesiredTGUpdates[missing.TaskGroup().Name] + if s.planAnnotations != nil { + s.planAnnotations.PreemptedAllocs = append(s.planAnnotations.PreemptedAllocs, stop.Stub(nil)) + if s.planAnnotations.DesiredTGUpdates != nil { + desired := s.planAnnotations.DesiredTGUpdates[missing.TaskGroup().Name] desired.Preemptions += 1 } } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 071bf3e0f..3ad5f22b0 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -64,10 +64,12 @@ func TestServiceSched_JobRegister(t *testing.T) { } plan := h.Plans[0] - // Ensure the plan doesn't have annotations. + // Ensure the plan doesn't have annotations but the eval does if plan.Annotations != nil { t.Fatalf("expected no annotations") } + must.SliceNotEmpty(t, h.Evals) + must.Eq(t, 10, h.Evals[0].PlanAnnotations.DesiredTGUpdates["web"].Place) // Ensure the eval has no spawned blocked eval if len(h.CreateEvals) != 0 { diff --git a/scheduler/scheduler_system.go b/scheduler/scheduler_system.go index 131cec825..862dcf261 100644 --- a/scheduler/scheduler_system.go +++ b/scheduler/scheduler_system.go @@ -52,8 +52,9 @@ type SystemScheduler struct { limitReached bool nextEval *structs.Evaluation - failedTGAllocs map[string]*structs.AllocMetric - queuedAllocs map[string]int + failedTGAllocs map[string]*structs.AllocMetric + queuedAllocs map[string]int + planAnnotations *structs.PlanAnnotations } // NewSystemScheduler is a factory function to instantiate a new system @@ -97,7 +98,8 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) { // Verify the evaluation trigger reason is understood if !s.canHandle(eval.TriggeredBy) { desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) - return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusFailed, desc, + return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, + s.failedTGAllocs, s.planAnnotations, structs.EvalStatusFailed, desc, s.queuedAllocs, "") } @@ -110,14 +112,16 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) { progress := func() bool { return progressMade(s.planResult) } if err := retryMax(limit, s.process, progress); err != nil { if statusErr, ok := err.(*SetStatusError); ok { - return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, statusErr.EvalStatus, err.Error(), + return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, + s.failedTGAllocs, s.planAnnotations, statusErr.EvalStatus, err.Error(), s.queuedAllocs, "") } return err } // Update the status to complete - return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusComplete, "", + return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, + s.failedTGAllocs, s.planAnnotations, structs.EvalStatusComplete, "", s.queuedAllocs, "") } @@ -186,6 +190,9 @@ func (s *SystemScheduler) process() (bool, error) { } // Submit the plan + if s.eval.AnnotatePlan { + s.plan.Annotations = s.planAnnotations + } result, newState, err := s.planner.SubmitPlan(s.plan) s.planResult = result if err != nil { @@ -298,10 +305,8 @@ func (s *SystemScheduler) computeJobAllocs() error { allocExistsForTaskGroup[inplaceUpdate.TaskGroup.Name] = true } - if s.eval.AnnotatePlan { - s.plan.Annotations = &structs.PlanAnnotations{ - DesiredTGUpdates: desiredUpdates(r, inplaceUpdates, destructiveUpdates), - } + s.planAnnotations = &structs.PlanAnnotations{ + DesiredTGUpdates: desiredUpdates(r, inplaceUpdates, destructiveUpdates), } // Check if a rolling upgrade strategy is being used @@ -415,9 +420,9 @@ func (s *SystemScheduler) computePlacements(place []reconciler.AllocTuple, exist // If we are annotating the plan, then decrement the desired // placements based on whether the node meets the constraints - if s.eval.AnnotatePlan && s.plan.Annotations != nil && - s.plan.Annotations.DesiredTGUpdates != nil { - desired := s.plan.Annotations.DesiredTGUpdates[tgName] + if s.planAnnotations != nil && + s.planAnnotations.DesiredTGUpdates != nil { + desired := s.planAnnotations.DesiredTGUpdates[tgName] desired.Place -= 1 } @@ -511,10 +516,10 @@ func (s *SystemScheduler) computePlacements(place []reconciler.AllocTuple, exist s.plan.AppendPreemptedAlloc(stop, alloc.ID) preemptedAllocIDs = append(preemptedAllocIDs, stop.ID) - if s.eval.AnnotatePlan && s.plan.Annotations != nil { - s.plan.Annotations.PreemptedAllocs = append(s.plan.Annotations.PreemptedAllocs, stop.Stub(nil)) - if s.plan.Annotations.DesiredTGUpdates != nil { - desired := s.plan.Annotations.DesiredTGUpdates[tgName] + if s.eval.AnnotatePlan && s.planAnnotations != nil { + s.planAnnotations.PreemptedAllocs = append(s.planAnnotations.PreemptedAllocs, stop.Stub(nil)) + if s.planAnnotations.DesiredTGUpdates != nil { + desired := s.planAnnotations.DesiredTGUpdates[tgName] desired.Preemptions += 1 } } diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index 44f2132e2..757c6a4ea 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -53,8 +53,10 @@ func TestSystemSched_JobRegister(t *testing.T) { must.Len(t, 1, h.Plans) plan := h.Plans[0] - // Ensure the plan does not have annotations + // Ensure the plan does not have annotations but the eval does must.Nil(t, plan.Annotations, must.Sprint("expected no annotations")) + must.SliceNotEmpty(t, h.Evals) + must.Eq(t, 10, h.Evals[0].PlanAnnotations.DesiredTGUpdates["web"].Place) // Ensure the plan allocated var planned []*structs.Allocation diff --git a/scheduler/util.go b/scheduler/util.go index 0c3ec9e71..31e20ff05 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -540,7 +540,9 @@ func renderTemplatesUpdated(a, b *structs.RestartPolicy, msg string) comparison // setStatus is used to update the status of the evaluation func setStatus(logger log.Logger, planner sstructs.Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, - tgMetrics map[string]*structs.AllocMetric, status, desc string, + tgMetrics map[string]*structs.AllocMetric, + annotations *structs.PlanAnnotations, + status, desc string, queuedAllocs map[string]int, deploymentID string) error { logger.Debug("setting eval status", "status", status) @@ -558,6 +560,7 @@ func setStatus(logger log.Logger, planner sstructs.Planner, if queuedAllocs != nil { newEval.QueuedAllocations = queuedAllocs } + newEval.PlanAnnotations = annotations return planner.UpdateEval(newEval) } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 63e1568a9..490fa35c3 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -597,7 +597,7 @@ func TestSetStatus(t *testing.T) { eval := mock.Eval() status := "a" desc := "b" - must.NoError(t, setStatus(logger, h, eval, nil, nil, nil, status, desc, nil, "")) + must.NoError(t, setStatus(logger, h, eval, nil, nil, nil, nil, status, desc, nil, "")) must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) newEval := h.Evals[0] @@ -607,7 +607,7 @@ func TestSetStatus(t *testing.T) { // Test next evals h = tests.NewHarness(t) next := mock.Eval() - must.NoError(t, setStatus(logger, h, eval, next, nil, nil, status, desc, nil, "")) + must.NoError(t, setStatus(logger, h, eval, next, nil, nil, nil, status, desc, nil, "")) must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) newEval = h.Evals[0] @@ -616,7 +616,7 @@ func TestSetStatus(t *testing.T) { // Test blocked evals h = tests.NewHarness(t) blocked := mock.Eval() - must.NoError(t, setStatus(logger, h, eval, nil, blocked, nil, status, desc, nil, "")) + must.NoError(t, setStatus(logger, h, eval, nil, blocked, nil, nil, status, desc, nil, "")) must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) newEval = h.Evals[0] @@ -625,7 +625,7 @@ func TestSetStatus(t *testing.T) { // Test metrics h = tests.NewHarness(t) metrics := map[string]*structs.AllocMetric{"foo": nil} - must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, status, desc, nil, "")) + must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, nil, status, desc, nil, "")) must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) newEval = h.Evals[0] @@ -636,15 +636,25 @@ func TestSetStatus(t *testing.T) { h = tests.NewHarness(t) queuedAllocs := map[string]int{"web": 1} - must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, status, desc, queuedAllocs, "")) + must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, nil, status, desc, queuedAllocs, "")) + must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) + + // Test annotations + h = tests.NewHarness(t) + annotations := &structs.PlanAnnotations{ + DesiredTGUpdates: map[string]*structs.DesiredUpdates{"web": {Place: 1}}, + PreemptedAllocs: []*structs.AllocListStub{{ID: uuid.Generate()}}, + } + + must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, annotations, status, desc, queuedAllocs, "")) must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) newEval = h.Evals[0] - must.Eq(t, newEval.QueuedAllocations, queuedAllocs, must.Sprintf("setStatus() didn't set failed task group metrics correctly: %v", newEval)) + must.Eq(t, annotations, newEval.PlanAnnotations, must.Sprintf("setStatus() didn't set plan annotations correctly: %v", newEval)) h = tests.NewHarness(t) dID := uuid.Generate() - must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, status, desc, queuedAllocs, dID)) + must.NoError(t, setStatus(logger, h, eval, nil, nil, metrics, nil, status, desc, queuedAllocs, dID)) must.Eq(t, 1, len(h.Evals), must.Sprintf("setStatus() didn't update plan: %v", h.Evals)) newEval = h.Evals[0]