mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
eval status: enrich with related evals and placed allocs tables (#26156)
When debugging an evaluation, you almost always want to know about all the related evaluations and what allocations were placed by that evaluation (and where), not just failed placements. We can enrich the command by adding the `related` query parameter to the API, and having the command query for the evaluations allocations automatically. Emit this data as a pair of new tables and expose fields like quota limits, and previous/next/blocked eval without the `-verbose` flag. Update the docs to include the full output and remove references to long-removed behavior of the `-json` flag. Ref: https://hashicorp.atlassian.net/browse/NMD-818 Ref: https://go.hashi.co/rfc/nmd-212
This commit is contained in:
7
.changelog/26156.txt
Normal file
7
.changelog/26156.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
```release-note:improvement
|
||||
cli: Added related evals and placed allocations tables to the eval status command, and exposed more fields without requiring the `-verbose` flag.
|
||||
```
|
||||
|
||||
```release-note:improvement
|
||||
api: The `Evaluations.Info` method of the Go API now populates the `RelatedEvals` field.
|
||||
```
|
||||
@@ -161,7 +161,7 @@ func TestSetQueryOptions(t *testing.T) {
|
||||
c, s := makeClient(t, nil, nil)
|
||||
defer s.Stop()
|
||||
|
||||
r, _ := c.newRequest("GET", "/v1/jobs")
|
||||
r, _ := c.newRequest("GET", "/v1/jobs?format=baz")
|
||||
q := &QueryOptions{
|
||||
Region: "foo",
|
||||
Namespace: "bar",
|
||||
@@ -188,6 +188,7 @@ func TestSetQueryOptions(t *testing.T) {
|
||||
try("index", "1000")
|
||||
try("wait", "100000ms")
|
||||
try("reverse", "true")
|
||||
try("format", "baz")
|
||||
}
|
||||
|
||||
func TestQueryOptionsContext(t *testing.T) {
|
||||
|
||||
@@ -46,7 +46,7 @@ func (e *Evaluations) Count(q *QueryOptions) (*EvalCountResponse, *QueryMeta, er
|
||||
// Info is used to query a single evaluation by its ID.
|
||||
func (e *Evaluations) Info(evalID string, q *QueryOptions) (*Evaluation, *QueryMeta, error) {
|
||||
var resp Evaluation
|
||||
qm, err := e.client.query("/v1/evaluation/"+evalID, &resp, q)
|
||||
qm, err := e.client.query("/v1/evaluation/"+evalID+"?related=true", &resp, q)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
@@ -36,13 +36,15 @@ Eval Status Options:
|
||||
Monitor an outstanding evaluation
|
||||
|
||||
-verbose
|
||||
Show full information.
|
||||
Show full-length IDs and exact timestamps.
|
||||
|
||||
-json
|
||||
Output the evaluation in its JSON format.
|
||||
Output the evaluation in its JSON format. This format will not include
|
||||
placed allocations.
|
||||
|
||||
-t
|
||||
Format and display evaluation using a Go template.
|
||||
Format and display evaluation using a Go template. This format will not
|
||||
include placed allocations.
|
||||
|
||||
-ui
|
||||
Open the evaluation in the browser.
|
||||
@@ -73,10 +75,6 @@ func (c *EvalStatusCommand) AutocompleteArgs() complete.Predictor {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Evals, nil)
|
||||
if err != nil {
|
||||
return []string{}
|
||||
@@ -120,12 +118,6 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
|
||||
evalID := args[0]
|
||||
|
||||
// Truncate the id unless full length is requested
|
||||
length := shortId
|
||||
if verbose {
|
||||
length = fullId
|
||||
}
|
||||
|
||||
// Query the allocation info
|
||||
if len(evalID) == 1 {
|
||||
c.Ui.Error("Identifier must contain at least two characters.")
|
||||
@@ -153,6 +145,12 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Truncate the id unless full length is requested
|
||||
length := shortId
|
||||
if verbose {
|
||||
length = fullId
|
||||
}
|
||||
|
||||
// If we are in monitor mode, monitor and exit
|
||||
if monitor {
|
||||
mon := newMonitor(c.Ui, client, length)
|
||||
@@ -178,6 +176,30 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
placedAllocs, _, err := client.Evaluations().Allocations(eval.ID, nil)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error querying related allocations: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
c.formatEvalStatus(eval, placedAllocs, verbose, length)
|
||||
|
||||
hint, _ := c.Meta.showUIPath(UIHintContext{
|
||||
Command: "eval status",
|
||||
PathParams: map[string]string{
|
||||
"evalID": eval.ID,
|
||||
},
|
||||
OpenURL: openURL,
|
||||
})
|
||||
if hint != "" {
|
||||
c.Ui.Warn(hint)
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *EvalStatusCommand) formatEvalStatus(eval *api.Evaluation, placedAllocs []*api.AllocationListStub, verbose bool, length int) {
|
||||
|
||||
failureString, failures := evalFailureStatus(eval)
|
||||
triggerNoun, triggerSubj := getTriggerDetails(eval)
|
||||
statusDesc := eval.StatusDescription
|
||||
@@ -220,16 +242,27 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
basic = append(basic,
|
||||
fmt.Sprintf("Wait Until|%s", formatTime(eval.WaitUntil)))
|
||||
}
|
||||
|
||||
if verbose {
|
||||
// NextEval, PreviousEval, BlockedEval
|
||||
if eval.QuotaLimitReached != "" {
|
||||
basic = append(basic,
|
||||
fmt.Sprintf("Previous Eval|%s", eval.PreviousEval),
|
||||
fmt.Sprintf("Next Eval|%s", eval.NextEval),
|
||||
fmt.Sprintf("Blocked Eval|%s", eval.BlockedEval))
|
||||
fmt.Sprintf("Quota Limit Reached|%s", eval.QuotaLimitReached))
|
||||
}
|
||||
basic = append(basic,
|
||||
fmt.Sprintf("Previous Eval|%s", limit(eval.PreviousEval, length)),
|
||||
fmt.Sprintf("Next Eval|%s", limit(eval.NextEval, length)),
|
||||
fmt.Sprintf("Blocked Eval|%s", limit(eval.BlockedEval, length)),
|
||||
)
|
||||
c.Ui.Output(formatKV(basic))
|
||||
|
||||
if len(eval.RelatedEvals) > 0 {
|
||||
c.Ui.Output(c.Colorize().Color("\n[bold]Related Evaluations[reset]"))
|
||||
c.Ui.Output(formatRelatedEvalStubs(eval.RelatedEvals, length))
|
||||
}
|
||||
if len(placedAllocs) > 0 {
|
||||
c.Ui.Output(c.Colorize().Color("\n[bold]Placed Allocations[reset]"))
|
||||
allocsOut := formatAllocListStubs(placedAllocs, false, length)
|
||||
c.Ui.Output(allocsOut)
|
||||
}
|
||||
|
||||
if failures {
|
||||
c.Ui.Output(c.Colorize().Color("\n[bold]Failed Placements[reset]"))
|
||||
sorted := sortedTaskGroupFromMetrics(eval.FailedTGAllocs)
|
||||
@@ -240,29 +273,18 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
if metrics.CoalescedFailures > 0 {
|
||||
noun += "s"
|
||||
}
|
||||
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
|
||||
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):",
|
||||
tg, metrics.CoalescedFailures+1, noun))
|
||||
c.Ui.Output(formatAllocMetrics(metrics, false, " "))
|
||||
c.Ui.Output("")
|
||||
}
|
||||
|
||||
if eval.BlockedEval != "" {
|
||||
c.Ui.Output(fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder",
|
||||
c.Ui.Output(fmt.Sprintf(
|
||||
"Evaluation %q waiting for additional capacity to place remainder",
|
||||
limit(eval.BlockedEval, length)))
|
||||
}
|
||||
}
|
||||
|
||||
hint, _ := c.Meta.showUIPath(UIHintContext{
|
||||
Command: "eval status",
|
||||
PathParams: map[string]string{
|
||||
"evalID": eval.ID,
|
||||
},
|
||||
OpenURL: openURL,
|
||||
})
|
||||
if hint != "" {
|
||||
c.Ui.Warn(hint)
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func sortedTaskGroupFromMetrics(groups map[string]*api.AllocationMetric) []string {
|
||||
@@ -284,3 +306,20 @@ func getTriggerDetails(eval *api.Evaluation) (noun, subject string) {
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
func formatRelatedEvalStubs(evals []*api.EvaluationStub, length int) string {
|
||||
out := make([]string, len(evals)+1)
|
||||
out[0] = "ID|Priority|Triggered By|Node ID|Status|Description"
|
||||
for i, eval := range evals {
|
||||
out[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s|%s",
|
||||
limit(eval.ID, length),
|
||||
eval.Priority,
|
||||
eval.TriggeredBy,
|
||||
limit(eval.NodeID, length),
|
||||
eval.Status,
|
||||
eval.StatusDescription,
|
||||
)
|
||||
}
|
||||
|
||||
return formatList(out)
|
||||
}
|
||||
|
||||
@@ -6,9 +6,13 @@ package command
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/cli"
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/ci"
|
||||
"github.com/hashicorp/nomad/helper/pointer"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/posener/complete"
|
||||
@@ -88,3 +92,119 @@ func TestEvalStatusCommand_AutocompleteArgs(t *testing.T) {
|
||||
must.SliceLen(t, 1, res)
|
||||
must.Eq(t, e.ID, res[0])
|
||||
}
|
||||
|
||||
func TestEvalStatusCommand_Format(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
ui := cli.NewMockUi()
|
||||
cmd := &EvalStatusCommand{Meta: Meta{Ui: ui}}
|
||||
|
||||
eval := &api.Evaluation{
|
||||
ID: uuid.Generate(),
|
||||
Priority: 50,
|
||||
Type: api.JobTypeService,
|
||||
TriggeredBy: structs.EvalTriggerAllocStop,
|
||||
Namespace: api.DefaultNamespace,
|
||||
JobID: "example",
|
||||
JobModifyIndex: 0,
|
||||
DeploymentID: uuid.Generate(),
|
||||
Status: api.EvalStatusComplete,
|
||||
StatusDescription: "complete",
|
||||
NextEval: "",
|
||||
PreviousEval: uuid.Generate(),
|
||||
BlockedEval: uuid.Generate(),
|
||||
RelatedEvals: []*api.EvaluationStub{{
|
||||
ID: uuid.Generate(),
|
||||
Priority: 50,
|
||||
Type: "service",
|
||||
TriggeredBy: "queued-allocs",
|
||||
Namespace: api.DefaultNamespace,
|
||||
JobID: "example",
|
||||
DeploymentID: "",
|
||||
Status: "pending",
|
||||
StatusDescription: "",
|
||||
WaitUntil: time.Time{},
|
||||
NextEval: "",
|
||||
PreviousEval: uuid.Generate(),
|
||||
BlockedEval: "",
|
||||
CreateIndex: 0,
|
||||
ModifyIndex: 0,
|
||||
CreateTime: 0,
|
||||
ModifyTime: 0,
|
||||
}},
|
||||
FailedTGAllocs: map[string]*api.AllocationMetric{"web": {
|
||||
NodesEvaluated: 6,
|
||||
NodesFiltered: 4,
|
||||
NodesInPool: 10,
|
||||
NodesAvailable: map[string]int{},
|
||||
ClassFiltered: map[string]int{},
|
||||
ConstraintFiltered: map[string]int{"${attr.kernel.name} = linux": 2},
|
||||
NodesExhausted: 2,
|
||||
ClassExhausted: map[string]int{},
|
||||
DimensionExhausted: map[string]int{"memory": 2},
|
||||
QuotaExhausted: []string{},
|
||||
ResourcesExhausted: map[string]*api.Resources{"web": {
|
||||
Cores: pointer.Of(3),
|
||||
}},
|
||||
Scores: map[string]float64{},
|
||||
AllocationTime: 0,
|
||||
CoalescedFailures: 0,
|
||||
ScoreMetaData: []*api.NodeScoreMeta{},
|
||||
}},
|
||||
ClassEligibility: map[string]bool{},
|
||||
EscapedComputedClass: true,
|
||||
QuotaLimitReached: "",
|
||||
QueuedAllocations: map[string]int{},
|
||||
SnapshotIndex: 1001,
|
||||
CreateIndex: 999,
|
||||
ModifyIndex: 1003,
|
||||
CreateTime: now.UnixNano(),
|
||||
ModifyTime: now.Add(time.Second).UnixNano(),
|
||||
}
|
||||
|
||||
placed := []*api.AllocationListStub{
|
||||
{
|
||||
ID: uuid.Generate(),
|
||||
NodeID: uuid.Generate(),
|
||||
TaskGroup: "web",
|
||||
DesiredStatus: "run",
|
||||
JobVersion: 2,
|
||||
ClientStatus: "running",
|
||||
CreateTime: now.Add(-10 * time.Second).UnixNano(),
|
||||
ModifyTime: now.Add(-2 * time.Second).UnixNano(),
|
||||
},
|
||||
{
|
||||
ID: uuid.Generate(),
|
||||
NodeID: uuid.Generate(),
|
||||
TaskGroup: "web",
|
||||
JobVersion: 2,
|
||||
DesiredStatus: "run",
|
||||
ClientStatus: "pending",
|
||||
CreateTime: now.Add(-3 * time.Second).UnixNano(),
|
||||
ModifyTime: now.Add(-1 * time.Second).UnixNano(),
|
||||
},
|
||||
{
|
||||
ID: uuid.Generate(),
|
||||
NodeID: uuid.Generate(),
|
||||
TaskGroup: "web",
|
||||
JobVersion: 2,
|
||||
DesiredStatus: "run",
|
||||
ClientStatus: "pending",
|
||||
CreateTime: now.Add(-4 * time.Second).UnixNano(),
|
||||
ModifyTime: now.UnixNano(),
|
||||
},
|
||||
}
|
||||
|
||||
cmd.formatEvalStatus(eval, placed, false, shortId)
|
||||
out := ui.OutputWriter.String()
|
||||
|
||||
// there isn't much logic here, so this is just a smoke test
|
||||
must.StrContains(t, out, `
|
||||
Failed Placements
|
||||
Task Group "web" (failed to place 1 allocation):
|
||||
* Constraint "${attr.kernel.name} = linux": 2 nodes excluded by filter
|
||||
* Resources exhausted on 2 nodes
|
||||
* Dimension "memory" exhausted on 2 nodes`)
|
||||
|
||||
must.StrContains(t, out, `Related Evaluations`)
|
||||
must.StrContains(t, out, `Placed Allocations`)
|
||||
}
|
||||
|
||||
@@ -40,21 +40,23 @@ indicated by exit code 1.
|
||||
## Options
|
||||
|
||||
- `-monitor`: Monitor an outstanding evaluation
|
||||
- `-verbose`: Show full information.
|
||||
- `-json` : Output a list of all evaluations in JSON format. This
|
||||
behavior is deprecated and has been replaced by `nomad eval list
|
||||
-json`. In Nomad 1.4.0 the behavior of this option will change to
|
||||
output only the selected evaluation in JSON.
|
||||
- `-t` : Format and display evaluation using a Go template.
|
||||
- `-verbose`: Show full-length IDs and exact timestamps.
|
||||
- `-json`: Output the evaluation in its JSON format. This format will not
|
||||
include placed allocations.
|
||||
- `-t` : Format and display evaluation using a Go template. This format will not
|
||||
include placed allocations.
|
||||
- `-ui`: Open the evaluation in the browser.
|
||||
|
||||
## Examples
|
||||
|
||||
Show the status of an evaluation that has placement failures
|
||||
Show the status of an evaluation with related evaluations, successful
|
||||
placements, failed placements.
|
||||
|
||||
```shell-session
|
||||
$ nomad eval status 2ae0e6a5
|
||||
ID = 2ae0e6a5
|
||||
$ nomad eval status 8f6af533
|
||||
ID = 8f6af533
|
||||
Create Time = 11s ago
|
||||
Modify Time = 10s ago
|
||||
Status = complete
|
||||
Status Description = complete
|
||||
Type = service
|
||||
@@ -63,14 +65,27 @@ Job ID = example
|
||||
Namespace = default
|
||||
Priority = 50
|
||||
Placement Failures = true
|
||||
Previous Eval = c324b46f
|
||||
Next Eval = <none>
|
||||
Blocked Eval = 2b83d3af
|
||||
|
||||
==> Failed Placements
|
||||
Task Group "cache" (failed to place 1 allocation):
|
||||
* Class "foo" filtered 1 nodes
|
||||
* Constraint "${attr.kernel.name} = windows" filtered 1 nodes
|
||||
Related Evaluations
|
||||
ID Priority Triggered By Node ID Status Description
|
||||
fd6f3091 50 queued-allocs <none> pending <none>
|
||||
|
||||
Placed Allocations
|
||||
ID Node ID Task Group Version Desired Status Created Modified
|
||||
b7e298fa 24c15262 web 2 run running 10s ago 2s ago
|
||||
108841a9 41f70903 web 2 run pending 3s ago 1s ago
|
||||
4869a26d d372b337 web 2 run pending 4s ago 0s ago
|
||||
|
||||
Evaluation "67493a64" waiting for additional capacity to place remainder
|
||||
Failed Placements
|
||||
Task Group "web" (failed to place 1 allocation):
|
||||
* Constraint "${attr.kernel.name} = linux": 2 nodes excluded by filter
|
||||
* Resources exhausted on 2 nodes
|
||||
* Dimension "memory" exhausted on 2 nodes
|
||||
|
||||
Evaluation "2b83d3af" waiting for additional capacity to place remainder
|
||||
```
|
||||
|
||||
Monitor an existing evaluation
|
||||
|
||||
Reference in New Issue
Block a user