eval status: enrich with related evals and placed allocs tables (#26156)

When debugging an evaluation, you almost always want to know about all the
related evaluations and what allocations were placed by that evaluation (and
where), not just failed placements. We can enrich the command by adding the
`related` query parameter to the API, and having the command query for the
evaluations allocations automatically. Emit this data as a pair of new tables
and expose fields like quota limits, and previous/next/blocked eval without the
`-verbose` flag.

Update the docs to include the full output and remove references to long-removed
behavior of the `-json` flag.

Ref: https://hashicorp.atlassian.net/browse/NMD-818
Ref: https://go.hashi.co/rfc/nmd-212
This commit is contained in:
Tim Gross
2025-06-30 09:23:36 -04:00
committed by GitHub
parent 0c2fcb3e30
commit aa3c08d069
6 changed files with 232 additions and 50 deletions

7
.changelog/26156.txt Normal file
View File

@@ -0,0 +1,7 @@
```release-note:improvement
cli: Added related evals and placed allocations tables to the eval status command, and exposed more fields without requiring the `-verbose` flag.
```
```release-note:improvement
api: The `Evaluations.Info` method of the Go API now populates the `RelatedEvals` field.
```

View File

@@ -161,7 +161,7 @@ func TestSetQueryOptions(t *testing.T) {
c, s := makeClient(t, nil, nil)
defer s.Stop()
r, _ := c.newRequest("GET", "/v1/jobs")
r, _ := c.newRequest("GET", "/v1/jobs?format=baz")
q := &QueryOptions{
Region: "foo",
Namespace: "bar",
@@ -188,6 +188,7 @@ func TestSetQueryOptions(t *testing.T) {
try("index", "1000")
try("wait", "100000ms")
try("reverse", "true")
try("format", "baz")
}
func TestQueryOptionsContext(t *testing.T) {

View File

@@ -46,7 +46,7 @@ func (e *Evaluations) Count(q *QueryOptions) (*EvalCountResponse, *QueryMeta, er
// Info is used to query a single evaluation by its ID.
func (e *Evaluations) Info(evalID string, q *QueryOptions) (*Evaluation, *QueryMeta, error) {
var resp Evaluation
qm, err := e.client.query("/v1/evaluation/"+evalID, &resp, q)
qm, err := e.client.query("/v1/evaluation/"+evalID+"?related=true", &resp, q)
if err != nil {
return nil, nil, err
}

View File

@@ -36,13 +36,15 @@ Eval Status Options:
Monitor an outstanding evaluation
-verbose
Show full information.
Show full-length IDs and exact timestamps.
-json
Output the evaluation in its JSON format.
Output the evaluation in its JSON format. This format will not include
placed allocations.
-t
Format and display evaluation using a Go template.
Format and display evaluation using a Go template. This format will not
include placed allocations.
-ui
Open the evaluation in the browser.
@@ -73,10 +75,6 @@ func (c *EvalStatusCommand) AutocompleteArgs() complete.Predictor {
return nil
}
if err != nil {
return nil
}
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Evals, nil)
if err != nil {
return []string{}
@@ -120,12 +118,6 @@ func (c *EvalStatusCommand) Run(args []string) int {
evalID := args[0]
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
if len(evalID) == 1 {
c.Ui.Error("Identifier must contain at least two characters.")
@@ -153,6 +145,12 @@ func (c *EvalStatusCommand) Run(args []string) int {
return 1
}
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// If we are in monitor mode, monitor and exit
if monitor {
mon := newMonitor(c.Ui, client, length)
@@ -178,6 +176,30 @@ func (c *EvalStatusCommand) Run(args []string) int {
return 0
}
placedAllocs, _, err := client.Evaluations().Allocations(eval.ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying related allocations: %s", err))
return 1
}
c.formatEvalStatus(eval, placedAllocs, verbose, length)
hint, _ := c.Meta.showUIPath(UIHintContext{
Command: "eval status",
PathParams: map[string]string{
"evalID": eval.ID,
},
OpenURL: openURL,
})
if hint != "" {
c.Ui.Warn(hint)
}
return 0
}
func (c *EvalStatusCommand) formatEvalStatus(eval *api.Evaluation, placedAllocs []*api.AllocationListStub, verbose bool, length int) {
failureString, failures := evalFailureStatus(eval)
triggerNoun, triggerSubj := getTriggerDetails(eval)
statusDesc := eval.StatusDescription
@@ -220,16 +242,27 @@ func (c *EvalStatusCommand) Run(args []string) int {
basic = append(basic,
fmt.Sprintf("Wait Until|%s", formatTime(eval.WaitUntil)))
}
if verbose {
// NextEval, PreviousEval, BlockedEval
if eval.QuotaLimitReached != "" {
basic = append(basic,
fmt.Sprintf("Previous Eval|%s", eval.PreviousEval),
fmt.Sprintf("Next Eval|%s", eval.NextEval),
fmt.Sprintf("Blocked Eval|%s", eval.BlockedEval))
fmt.Sprintf("Quota Limit Reached|%s", eval.QuotaLimitReached))
}
basic = append(basic,
fmt.Sprintf("Previous Eval|%s", limit(eval.PreviousEval, length)),
fmt.Sprintf("Next Eval|%s", limit(eval.NextEval, length)),
fmt.Sprintf("Blocked Eval|%s", limit(eval.BlockedEval, length)),
)
c.Ui.Output(formatKV(basic))
if len(eval.RelatedEvals) > 0 {
c.Ui.Output(c.Colorize().Color("\n[bold]Related Evaluations[reset]"))
c.Ui.Output(formatRelatedEvalStubs(eval.RelatedEvals, length))
}
if len(placedAllocs) > 0 {
c.Ui.Output(c.Colorize().Color("\n[bold]Placed Allocations[reset]"))
allocsOut := formatAllocListStubs(placedAllocs, false, length)
c.Ui.Output(allocsOut)
}
if failures {
c.Ui.Output(c.Colorize().Color("\n[bold]Failed Placements[reset]"))
sorted := sortedTaskGroupFromMetrics(eval.FailedTGAllocs)
@@ -240,29 +273,18 @@ func (c *EvalStatusCommand) Run(args []string) int {
if metrics.CoalescedFailures > 0 {
noun += "s"
}
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):",
tg, metrics.CoalescedFailures+1, noun))
c.Ui.Output(formatAllocMetrics(metrics, false, " "))
c.Ui.Output("")
}
if eval.BlockedEval != "" {
c.Ui.Output(fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder",
c.Ui.Output(fmt.Sprintf(
"Evaluation %q waiting for additional capacity to place remainder",
limit(eval.BlockedEval, length)))
}
}
hint, _ := c.Meta.showUIPath(UIHintContext{
Command: "eval status",
PathParams: map[string]string{
"evalID": eval.ID,
},
OpenURL: openURL,
})
if hint != "" {
c.Ui.Warn(hint)
}
return 0
}
func sortedTaskGroupFromMetrics(groups map[string]*api.AllocationMetric) []string {
@@ -284,3 +306,20 @@ func getTriggerDetails(eval *api.Evaluation) (noun, subject string) {
return "", ""
}
}
func formatRelatedEvalStubs(evals []*api.EvaluationStub, length int) string {
out := make([]string, len(evals)+1)
out[0] = "ID|Priority|Triggered By|Node ID|Status|Description"
for i, eval := range evals {
out[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s|%s",
limit(eval.ID, length),
eval.Priority,
eval.TriggeredBy,
limit(eval.NodeID, length),
eval.Status,
eval.StatusDescription,
)
}
return formatList(out)
}

View File

@@ -6,9 +6,13 @@ package command
import (
"strings"
"testing"
"time"
"github.com/hashicorp/cli"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/posener/complete"
@@ -88,3 +92,119 @@ func TestEvalStatusCommand_AutocompleteArgs(t *testing.T) {
must.SliceLen(t, 1, res)
must.Eq(t, e.ID, res[0])
}
func TestEvalStatusCommand_Format(t *testing.T) {
now := time.Now().UTC()
ui := cli.NewMockUi()
cmd := &EvalStatusCommand{Meta: Meta{Ui: ui}}
eval := &api.Evaluation{
ID: uuid.Generate(),
Priority: 50,
Type: api.JobTypeService,
TriggeredBy: structs.EvalTriggerAllocStop,
Namespace: api.DefaultNamespace,
JobID: "example",
JobModifyIndex: 0,
DeploymentID: uuid.Generate(),
Status: api.EvalStatusComplete,
StatusDescription: "complete",
NextEval: "",
PreviousEval: uuid.Generate(),
BlockedEval: uuid.Generate(),
RelatedEvals: []*api.EvaluationStub{{
ID: uuid.Generate(),
Priority: 50,
Type: "service",
TriggeredBy: "queued-allocs",
Namespace: api.DefaultNamespace,
JobID: "example",
DeploymentID: "",
Status: "pending",
StatusDescription: "",
WaitUntil: time.Time{},
NextEval: "",
PreviousEval: uuid.Generate(),
BlockedEval: "",
CreateIndex: 0,
ModifyIndex: 0,
CreateTime: 0,
ModifyTime: 0,
}},
FailedTGAllocs: map[string]*api.AllocationMetric{"web": {
NodesEvaluated: 6,
NodesFiltered: 4,
NodesInPool: 10,
NodesAvailable: map[string]int{},
ClassFiltered: map[string]int{},
ConstraintFiltered: map[string]int{"${attr.kernel.name} = linux": 2},
NodesExhausted: 2,
ClassExhausted: map[string]int{},
DimensionExhausted: map[string]int{"memory": 2},
QuotaExhausted: []string{},
ResourcesExhausted: map[string]*api.Resources{"web": {
Cores: pointer.Of(3),
}},
Scores: map[string]float64{},
AllocationTime: 0,
CoalescedFailures: 0,
ScoreMetaData: []*api.NodeScoreMeta{},
}},
ClassEligibility: map[string]bool{},
EscapedComputedClass: true,
QuotaLimitReached: "",
QueuedAllocations: map[string]int{},
SnapshotIndex: 1001,
CreateIndex: 999,
ModifyIndex: 1003,
CreateTime: now.UnixNano(),
ModifyTime: now.Add(time.Second).UnixNano(),
}
placed := []*api.AllocationListStub{
{
ID: uuid.Generate(),
NodeID: uuid.Generate(),
TaskGroup: "web",
DesiredStatus: "run",
JobVersion: 2,
ClientStatus: "running",
CreateTime: now.Add(-10 * time.Second).UnixNano(),
ModifyTime: now.Add(-2 * time.Second).UnixNano(),
},
{
ID: uuid.Generate(),
NodeID: uuid.Generate(),
TaskGroup: "web",
JobVersion: 2,
DesiredStatus: "run",
ClientStatus: "pending",
CreateTime: now.Add(-3 * time.Second).UnixNano(),
ModifyTime: now.Add(-1 * time.Second).UnixNano(),
},
{
ID: uuid.Generate(),
NodeID: uuid.Generate(),
TaskGroup: "web",
JobVersion: 2,
DesiredStatus: "run",
ClientStatus: "pending",
CreateTime: now.Add(-4 * time.Second).UnixNano(),
ModifyTime: now.UnixNano(),
},
}
cmd.formatEvalStatus(eval, placed, false, shortId)
out := ui.OutputWriter.String()
// there isn't much logic here, so this is just a smoke test
must.StrContains(t, out, `
Failed Placements
Task Group "web" (failed to place 1 allocation):
* Constraint "${attr.kernel.name} = linux": 2 nodes excluded by filter
* Resources exhausted on 2 nodes
* Dimension "memory" exhausted on 2 nodes`)
must.StrContains(t, out, `Related Evaluations`)
must.StrContains(t, out, `Placed Allocations`)
}

View File

@@ -40,21 +40,23 @@ indicated by exit code 1.
## Options
- `-monitor`: Monitor an outstanding evaluation
- `-verbose`: Show full information.
- `-json` : Output a list of all evaluations in JSON format. This
behavior is deprecated and has been replaced by `nomad eval list
-json`. In Nomad 1.4.0 the behavior of this option will change to
output only the selected evaluation in JSON.
- `-t` : Format and display evaluation using a Go template.
- `-verbose`: Show full-length IDs and exact timestamps.
- `-json`: Output the evaluation in its JSON format. This format will not
include placed allocations.
- `-t` : Format and display evaluation using a Go template. This format will not
include placed allocations.
- `-ui`: Open the evaluation in the browser.
## Examples
Show the status of an evaluation that has placement failures
Show the status of an evaluation with related evaluations, successful
placements, failed placements.
```shell-session
$ nomad eval status 2ae0e6a5
ID = 2ae0e6a5
$ nomad eval status 8f6af533
ID = 8f6af533
Create Time = 11s ago
Modify Time = 10s ago
Status = complete
Status Description = complete
Type = service
@@ -63,14 +65,27 @@ Job ID = example
Namespace = default
Priority = 50
Placement Failures = true
Previous Eval = c324b46f
Next Eval = <none>
Blocked Eval = 2b83d3af
==> Failed Placements
Task Group "cache" (failed to place 1 allocation):
* Class "foo" filtered 1 nodes
* Constraint "${attr.kernel.name} = windows" filtered 1 nodes
Related Evaluations
ID Priority Triggered By Node ID Status Description
fd6f3091 50 queued-allocs <none> pending <none>
Placed Allocations
ID Node ID Task Group Version Desired Status Created Modified
b7e298fa 24c15262 web 2 run running 10s ago 2s ago
108841a9 41f70903 web 2 run pending 3s ago 1s ago
4869a26d d372b337 web 2 run pending 4s ago 0s ago
Evaluation "67493a64" waiting for additional capacity to place remainder
Failed Placements
Task Group "web" (failed to place 1 allocation):
* Constraint "${attr.kernel.name} = linux": 2 nodes excluded by filter
* Resources exhausted on 2 nodes
* Dimension "memory" exhausted on 2 nodes
Evaluation "2b83d3af" waiting for additional capacity to place remainder
```
Monitor an existing evaluation