From aa3c08d069e9a772906c9cc389e2ba210548fa27 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 30 Jun 2025 09:23:36 -0400 Subject: [PATCH] eval status: enrich with related evals and placed allocs tables (#26156) When debugging an evaluation, you almost always want to know about all the related evaluations and what allocations were placed by that evaluation (and where), not just failed placements. We can enrich the command by adding the `related` query parameter to the API, and having the command query for the evaluations allocations automatically. Emit this data as a pair of new tables and expose fields like quota limits, and previous/next/blocked eval without the `-verbose` flag. Update the docs to include the full output and remove references to long-removed behavior of the `-json` flag. Ref: https://hashicorp.atlassian.net/browse/NMD-818 Ref: https://go.hashi.co/rfc/nmd-212 --- .changelog/26156.txt | 7 + api/api_test.go | 3 +- api/evaluations.go | 2 +- command/eval_status.go | 107 +++++++++++----- command/eval_status_test.go | 120 ++++++++++++++++++ website/content/docs/commands/eval/status.mdx | 43 +++++-- 6 files changed, 232 insertions(+), 50 deletions(-) create mode 100644 .changelog/26156.txt diff --git a/.changelog/26156.txt b/.changelog/26156.txt new file mode 100644 index 000000000..fc0419d57 --- /dev/null +++ b/.changelog/26156.txt @@ -0,0 +1,7 @@ +```release-note:improvement +cli: Added related evals and placed allocations tables to the eval status command, and exposed more fields without requiring the `-verbose` flag. +``` + +```release-note:improvement +api: The `Evaluations.Info` method of the Go API now populates the `RelatedEvals` field. +``` diff --git a/api/api_test.go b/api/api_test.go index 879a81d57..085e90432 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -161,7 +161,7 @@ func TestSetQueryOptions(t *testing.T) { c, s := makeClient(t, nil, nil) defer s.Stop() - r, _ := c.newRequest("GET", "/v1/jobs") + r, _ := c.newRequest("GET", "/v1/jobs?format=baz") q := &QueryOptions{ Region: "foo", Namespace: "bar", @@ -188,6 +188,7 @@ func TestSetQueryOptions(t *testing.T) { try("index", "1000") try("wait", "100000ms") try("reverse", "true") + try("format", "baz") } func TestQueryOptionsContext(t *testing.T) { diff --git a/api/evaluations.go b/api/evaluations.go index feac278ca..3c5440965 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -46,7 +46,7 @@ func (e *Evaluations) Count(q *QueryOptions) (*EvalCountResponse, *QueryMeta, er // Info is used to query a single evaluation by its ID. func (e *Evaluations) Info(evalID string, q *QueryOptions) (*Evaluation, *QueryMeta, error) { var resp Evaluation - qm, err := e.client.query("/v1/evaluation/"+evalID, &resp, q) + qm, err := e.client.query("/v1/evaluation/"+evalID+"?related=true", &resp, q) if err != nil { return nil, nil, err } diff --git a/command/eval_status.go b/command/eval_status.go index 2cc1fcd61..dc32182dc 100644 --- a/command/eval_status.go +++ b/command/eval_status.go @@ -36,13 +36,15 @@ Eval Status Options: Monitor an outstanding evaluation -verbose - Show full information. + Show full-length IDs and exact timestamps. -json - Output the evaluation in its JSON format. + Output the evaluation in its JSON format. This format will not include + placed allocations. -t - Format and display evaluation using a Go template. + Format and display evaluation using a Go template. This format will not + include placed allocations. -ui Open the evaluation in the browser. @@ -73,10 +75,6 @@ func (c *EvalStatusCommand) AutocompleteArgs() complete.Predictor { return nil } - if err != nil { - return nil - } - resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Evals, nil) if err != nil { return []string{} @@ -120,12 +118,6 @@ func (c *EvalStatusCommand) Run(args []string) int { evalID := args[0] - // Truncate the id unless full length is requested - length := shortId - if verbose { - length = fullId - } - // Query the allocation info if len(evalID) == 1 { c.Ui.Error("Identifier must contain at least two characters.") @@ -153,6 +145,12 @@ func (c *EvalStatusCommand) Run(args []string) int { return 1 } + // Truncate the id unless full length is requested + length := shortId + if verbose { + length = fullId + } + // If we are in monitor mode, monitor and exit if monitor { mon := newMonitor(c.Ui, client, length) @@ -178,6 +176,30 @@ func (c *EvalStatusCommand) Run(args []string) int { return 0 } + placedAllocs, _, err := client.Evaluations().Allocations(eval.ID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying related allocations: %s", err)) + return 1 + } + + c.formatEvalStatus(eval, placedAllocs, verbose, length) + + hint, _ := c.Meta.showUIPath(UIHintContext{ + Command: "eval status", + PathParams: map[string]string{ + "evalID": eval.ID, + }, + OpenURL: openURL, + }) + if hint != "" { + c.Ui.Warn(hint) + } + + return 0 +} + +func (c *EvalStatusCommand) formatEvalStatus(eval *api.Evaluation, placedAllocs []*api.AllocationListStub, verbose bool, length int) { + failureString, failures := evalFailureStatus(eval) triggerNoun, triggerSubj := getTriggerDetails(eval) statusDesc := eval.StatusDescription @@ -220,16 +242,27 @@ func (c *EvalStatusCommand) Run(args []string) int { basic = append(basic, fmt.Sprintf("Wait Until|%s", formatTime(eval.WaitUntil))) } - - if verbose { - // NextEval, PreviousEval, BlockedEval + if eval.QuotaLimitReached != "" { basic = append(basic, - fmt.Sprintf("Previous Eval|%s", eval.PreviousEval), - fmt.Sprintf("Next Eval|%s", eval.NextEval), - fmt.Sprintf("Blocked Eval|%s", eval.BlockedEval)) + fmt.Sprintf("Quota Limit Reached|%s", eval.QuotaLimitReached)) } + basic = append(basic, + fmt.Sprintf("Previous Eval|%s", limit(eval.PreviousEval, length)), + fmt.Sprintf("Next Eval|%s", limit(eval.NextEval, length)), + fmt.Sprintf("Blocked Eval|%s", limit(eval.BlockedEval, length)), + ) c.Ui.Output(formatKV(basic)) + if len(eval.RelatedEvals) > 0 { + c.Ui.Output(c.Colorize().Color("\n[bold]Related Evaluations[reset]")) + c.Ui.Output(formatRelatedEvalStubs(eval.RelatedEvals, length)) + } + if len(placedAllocs) > 0 { + c.Ui.Output(c.Colorize().Color("\n[bold]Placed Allocations[reset]")) + allocsOut := formatAllocListStubs(placedAllocs, false, length) + c.Ui.Output(allocsOut) + } + if failures { c.Ui.Output(c.Colorize().Color("\n[bold]Failed Placements[reset]")) sorted := sortedTaskGroupFromMetrics(eval.FailedTGAllocs) @@ -240,29 +273,18 @@ func (c *EvalStatusCommand) Run(args []string) int { if metrics.CoalescedFailures > 0 { noun += "s" } - c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) + c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", + tg, metrics.CoalescedFailures+1, noun)) c.Ui.Output(formatAllocMetrics(metrics, false, " ")) c.Ui.Output("") } if eval.BlockedEval != "" { - c.Ui.Output(fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder", + c.Ui.Output(fmt.Sprintf( + "Evaluation %q waiting for additional capacity to place remainder", limit(eval.BlockedEval, length))) } } - - hint, _ := c.Meta.showUIPath(UIHintContext{ - Command: "eval status", - PathParams: map[string]string{ - "evalID": eval.ID, - }, - OpenURL: openURL, - }) - if hint != "" { - c.Ui.Warn(hint) - } - - return 0 } func sortedTaskGroupFromMetrics(groups map[string]*api.AllocationMetric) []string { @@ -284,3 +306,20 @@ func getTriggerDetails(eval *api.Evaluation) (noun, subject string) { return "", "" } } + +func formatRelatedEvalStubs(evals []*api.EvaluationStub, length int) string { + out := make([]string, len(evals)+1) + out[0] = "ID|Priority|Triggered By|Node ID|Status|Description" + for i, eval := range evals { + out[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s|%s", + limit(eval.ID, length), + eval.Priority, + eval.TriggeredBy, + limit(eval.NodeID, length), + eval.Status, + eval.StatusDescription, + ) + } + + return formatList(out) +} diff --git a/command/eval_status_test.go b/command/eval_status_test.go index dc15cb3a1..b09ac0512 100644 --- a/command/eval_status_test.go +++ b/command/eval_status_test.go @@ -6,9 +6,13 @@ package command import ( "strings" "testing" + "time" "github.com/hashicorp/cli" + "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/posener/complete" @@ -88,3 +92,119 @@ func TestEvalStatusCommand_AutocompleteArgs(t *testing.T) { must.SliceLen(t, 1, res) must.Eq(t, e.ID, res[0]) } + +func TestEvalStatusCommand_Format(t *testing.T) { + now := time.Now().UTC() + ui := cli.NewMockUi() + cmd := &EvalStatusCommand{Meta: Meta{Ui: ui}} + + eval := &api.Evaluation{ + ID: uuid.Generate(), + Priority: 50, + Type: api.JobTypeService, + TriggeredBy: structs.EvalTriggerAllocStop, + Namespace: api.DefaultNamespace, + JobID: "example", + JobModifyIndex: 0, + DeploymentID: uuid.Generate(), + Status: api.EvalStatusComplete, + StatusDescription: "complete", + NextEval: "", + PreviousEval: uuid.Generate(), + BlockedEval: uuid.Generate(), + RelatedEvals: []*api.EvaluationStub{{ + ID: uuid.Generate(), + Priority: 50, + Type: "service", + TriggeredBy: "queued-allocs", + Namespace: api.DefaultNamespace, + JobID: "example", + DeploymentID: "", + Status: "pending", + StatusDescription: "", + WaitUntil: time.Time{}, + NextEval: "", + PreviousEval: uuid.Generate(), + BlockedEval: "", + CreateIndex: 0, + ModifyIndex: 0, + CreateTime: 0, + ModifyTime: 0, + }}, + FailedTGAllocs: map[string]*api.AllocationMetric{"web": { + NodesEvaluated: 6, + NodesFiltered: 4, + NodesInPool: 10, + NodesAvailable: map[string]int{}, + ClassFiltered: map[string]int{}, + ConstraintFiltered: map[string]int{"${attr.kernel.name} = linux": 2}, + NodesExhausted: 2, + ClassExhausted: map[string]int{}, + DimensionExhausted: map[string]int{"memory": 2}, + QuotaExhausted: []string{}, + ResourcesExhausted: map[string]*api.Resources{"web": { + Cores: pointer.Of(3), + }}, + Scores: map[string]float64{}, + AllocationTime: 0, + CoalescedFailures: 0, + ScoreMetaData: []*api.NodeScoreMeta{}, + }}, + ClassEligibility: map[string]bool{}, + EscapedComputedClass: true, + QuotaLimitReached: "", + QueuedAllocations: map[string]int{}, + SnapshotIndex: 1001, + CreateIndex: 999, + ModifyIndex: 1003, + CreateTime: now.UnixNano(), + ModifyTime: now.Add(time.Second).UnixNano(), + } + + placed := []*api.AllocationListStub{ + { + ID: uuid.Generate(), + NodeID: uuid.Generate(), + TaskGroup: "web", + DesiredStatus: "run", + JobVersion: 2, + ClientStatus: "running", + CreateTime: now.Add(-10 * time.Second).UnixNano(), + ModifyTime: now.Add(-2 * time.Second).UnixNano(), + }, + { + ID: uuid.Generate(), + NodeID: uuid.Generate(), + TaskGroup: "web", + JobVersion: 2, + DesiredStatus: "run", + ClientStatus: "pending", + CreateTime: now.Add(-3 * time.Second).UnixNano(), + ModifyTime: now.Add(-1 * time.Second).UnixNano(), + }, + { + ID: uuid.Generate(), + NodeID: uuid.Generate(), + TaskGroup: "web", + JobVersion: 2, + DesiredStatus: "run", + ClientStatus: "pending", + CreateTime: now.Add(-4 * time.Second).UnixNano(), + ModifyTime: now.UnixNano(), + }, + } + + cmd.formatEvalStatus(eval, placed, false, shortId) + out := ui.OutputWriter.String() + + // there isn't much logic here, so this is just a smoke test + must.StrContains(t, out, ` +Failed Placements +Task Group "web" (failed to place 1 allocation): + * Constraint "${attr.kernel.name} = linux": 2 nodes excluded by filter + * Resources exhausted on 2 nodes + * Dimension "memory" exhausted on 2 nodes`) + + must.StrContains(t, out, `Related Evaluations`) + must.StrContains(t, out, `Placed Allocations`) +} diff --git a/website/content/docs/commands/eval/status.mdx b/website/content/docs/commands/eval/status.mdx index 3bd98a891..8d3019e10 100644 --- a/website/content/docs/commands/eval/status.mdx +++ b/website/content/docs/commands/eval/status.mdx @@ -40,21 +40,23 @@ indicated by exit code 1. ## Options - `-monitor`: Monitor an outstanding evaluation -- `-verbose`: Show full information. -- `-json` : Output a list of all evaluations in JSON format. This - behavior is deprecated and has been replaced by `nomad eval list - -json`. In Nomad 1.4.0 the behavior of this option will change to - output only the selected evaluation in JSON. -- `-t` : Format and display evaluation using a Go template. +- `-verbose`: Show full-length IDs and exact timestamps. +- `-json`: Output the evaluation in its JSON format. This format will not + include placed allocations. +- `-t` : Format and display evaluation using a Go template. This format will not + include placed allocations. - `-ui`: Open the evaluation in the browser. ## Examples -Show the status of an evaluation that has placement failures +Show the status of an evaluation with related evaluations, successful +placements, failed placements. ```shell-session -$ nomad eval status 2ae0e6a5 -ID = 2ae0e6a5 +$ nomad eval status 8f6af533 +ID = 8f6af533 +Create Time = 11s ago +Modify Time = 10s ago Status = complete Status Description = complete Type = service @@ -63,14 +65,27 @@ Job ID = example Namespace = default Priority = 50 Placement Failures = true +Previous Eval = c324b46f +Next Eval = +Blocked Eval = 2b83d3af -==> Failed Placements -Task Group "cache" (failed to place 1 allocation): - * Class "foo" filtered 1 nodes - * Constraint "${attr.kernel.name} = windows" filtered 1 nodes +Related Evaluations +ID Priority Triggered By Node ID Status Description +fd6f3091 50 queued-allocs pending +Placed Allocations +ID Node ID Task Group Version Desired Status Created Modified +b7e298fa 24c15262 web 2 run running 10s ago 2s ago +108841a9 41f70903 web 2 run pending 3s ago 1s ago +4869a26d d372b337 web 2 run pending 4s ago 0s ago -Evaluation "67493a64" waiting for additional capacity to place remainder +Failed Placements +Task Group "web" (failed to place 1 allocation): + * Constraint "${attr.kernel.name} = linux": 2 nodes excluded by filter + * Resources exhausted on 2 nodes + * Dimension "memory" exhausted on 2 nodes + +Evaluation "2b83d3af" waiting for additional capacity to place remainder ``` Monitor an existing evaluation