mirror of
https://github.com/kemko/nomad.git
synced 2026-01-04 01:15:43 +03:00
Show failure reason in status
This commit is contained in:
@@ -2,6 +2,7 @@ package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
@@ -151,14 +152,18 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
c.Ui.Output(formatKV(basic))
|
||||
|
||||
if failures {
|
||||
c.Ui.Output("\n==> Failed Allocations")
|
||||
for tg, metrics := range eval.FailedTGAllocs {
|
||||
c.Ui.Output("\n==> Failed Placements")
|
||||
sorted := sortedTaskGroupFromMetrics(eval.FailedTGAllocs)
|
||||
for _, tg := range sorted {
|
||||
metrics := eval.FailedTGAllocs[tg]
|
||||
|
||||
noun := "allocation"
|
||||
if metrics.CoalescedFailures > 0 {
|
||||
noun += "s"
|
||||
}
|
||||
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
|
||||
dumpAllocMetrics(c.Ui, metrics, false)
|
||||
c.Ui.Output("")
|
||||
}
|
||||
|
||||
if eval.BlockedEval != "" {
|
||||
@@ -170,6 +175,15 @@ func (c *EvalStatusCommand) Run(args []string) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func sortedTaskGroupFromMetrics(groups map[string]*api.AllocationMetric) []string {
|
||||
tgs := make([]string, 0, len(groups))
|
||||
for tg, _ := range groups {
|
||||
tgs = append(tgs, tg)
|
||||
}
|
||||
sort.Strings(tgs)
|
||||
return tgs
|
||||
}
|
||||
|
||||
func getTriggerDetails(eval *api.Evaluation) (noun, subject string) {
|
||||
switch eval.TriggeredBy {
|
||||
case "job-register", "job-deregister", "periodic-job", "rolling-update":
|
||||
|
||||
@@ -11,9 +11,16 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxFailedTGs is the maximum number of task groups we show failure reasons
|
||||
// for before defering to eval-status
|
||||
maxFailedTGs = 5
|
||||
)
|
||||
|
||||
type StatusCommand struct {
|
||||
Meta
|
||||
length int
|
||||
length int
|
||||
showEvals, verbose bool
|
||||
}
|
||||
|
||||
func (c *StatusCommand) Help() string {
|
||||
@@ -31,8 +38,10 @@ Status Options:
|
||||
|
||||
-short
|
||||
Display short output. Used only when a single job is being
|
||||
queried, and drops verbose information about allocations
|
||||
and evaluations.
|
||||
queried, and drops verbose information about allocations.
|
||||
|
||||
-evals
|
||||
Display the evaluations associated with the job.
|
||||
|
||||
-verbose
|
||||
Display full information.
|
||||
@@ -45,12 +54,13 @@ func (c *StatusCommand) Synopsis() string {
|
||||
}
|
||||
|
||||
func (c *StatusCommand) Run(args []string) int {
|
||||
var short, verbose bool
|
||||
var short bool
|
||||
|
||||
flags := c.Meta.FlagSet("status", FlagSetClient)
|
||||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
flags.BoolVar(&short, "short", false, "")
|
||||
flags.BoolVar(&verbose, "verbose", false, "")
|
||||
flags.BoolVar(&c.showEvals, "evals", false, "")
|
||||
flags.BoolVar(&c.verbose, "verbose", false, "")
|
||||
|
||||
if err := flags.Parse(args); err != nil {
|
||||
return 1
|
||||
@@ -65,7 +75,7 @@ func (c *StatusCommand) Run(args []string) int {
|
||||
|
||||
// Truncate the id unless full length is requested
|
||||
c.length = shortId
|
||||
if verbose {
|
||||
if c.verbose {
|
||||
c.length = fullId
|
||||
}
|
||||
|
||||
@@ -221,27 +231,65 @@ func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) err
|
||||
func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
|
||||
var evals, allocs []string
|
||||
|
||||
// Query the evaluations
|
||||
jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error querying job evaluations: %s", err)
|
||||
}
|
||||
|
||||
// Query the allocations
|
||||
jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error querying job allocations: %s", err)
|
||||
}
|
||||
|
||||
// Query the evaluations
|
||||
jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error querying job evaluations: %s", err)
|
||||
}
|
||||
|
||||
// Determine latest evaluation with failures whose follow up hasn't
|
||||
// completed.
|
||||
evalsByID := make(map[string]*api.Evaluation, len(jobEvals))
|
||||
for _, eval := range jobEvals {
|
||||
evalsByID[eval.ID] = eval
|
||||
}
|
||||
|
||||
var latestFailedPlacement *api.Evaluation
|
||||
for _, eval := range evalsByID {
|
||||
if len(eval.FailedTGAllocs) == 0 {
|
||||
// Skip evals without failures
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if created blocked eval is finished
|
||||
if blocked, ok := evalsByID[eval.BlockedEval]; ok {
|
||||
if blocked.Status == "complete" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex {
|
||||
latestFailedPlacement = eval
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Format the evals
|
||||
evals = make([]string, len(jobEvals)+1)
|
||||
evals[0] = "ID|Priority|Triggered By|Status"
|
||||
evals[0] = "ID|Priority|Triggered By|Status|Placement Failures"
|
||||
for i, eval := range jobEvals {
|
||||
evals[i+1] = fmt.Sprintf("%s|%d|%s|%s",
|
||||
evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%t",
|
||||
limit(eval.ID, c.length),
|
||||
eval.Priority,
|
||||
eval.TriggeredBy,
|
||||
eval.Status)
|
||||
eval.Status,
|
||||
len(eval.FailedTGAllocs) != 0,
|
||||
)
|
||||
}
|
||||
|
||||
if c.verbose || c.showEvals {
|
||||
c.Ui.Output("\n==> Evaluations")
|
||||
c.Ui.Output(formatList(evals))
|
||||
}
|
||||
|
||||
if latestFailedPlacement != nil {
|
||||
c.outputFailedPlacements(latestFailedPlacement)
|
||||
}
|
||||
|
||||
// Format the allocs
|
||||
@@ -257,13 +305,41 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
|
||||
alloc.ClientStatus)
|
||||
}
|
||||
|
||||
c.Ui.Output("\n==> Evaluations")
|
||||
c.Ui.Output(formatList(evals))
|
||||
c.Ui.Output("\n==> Allocations")
|
||||
c.Ui.Output(formatList(allocs))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
|
||||
if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
c.Ui.Output("\n==> Last Placement Failure")
|
||||
|
||||
sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs)
|
||||
for i, tg := range sorted {
|
||||
if i >= maxFailedTGs {
|
||||
break
|
||||
}
|
||||
|
||||
metrics := failedEval.FailedTGAllocs[tg]
|
||||
|
||||
noun := "allocation"
|
||||
if metrics.CoalescedFailures > 0 {
|
||||
noun += "s"
|
||||
}
|
||||
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
|
||||
dumpAllocMetrics(c.Ui, metrics, false)
|
||||
c.Ui.Output("")
|
||||
}
|
||||
|
||||
if len(sorted) > maxFailedTGs {
|
||||
trunc := fmt.Sprintf("Placement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID)
|
||||
c.Ui.Output(trunc)
|
||||
}
|
||||
}
|
||||
|
||||
// convertApiJob is used to take a *api.Job and convert it to an *struct.Job.
|
||||
// This function is just a hammer and probably needs to be revisited.
|
||||
func convertApiJob(in *api.Job) (*structs.Job, error) {
|
||||
|
||||
Reference in New Issue
Block a user