From c77026f5da95660cceae2316ad68c43d98e4948d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 31 May 2016 21:51:23 +0000 Subject: [PATCH] plan shows failure reasons and ordered annotations --- api/jobs.go | 1 + command/eval_status.go | 2 +- command/monitor.go | 30 ++++++++++++++++++---------- command/plan.go | 41 ++++++++++++++++++++++++++++---------- command/status.go | 2 +- nomad/job_endpoint.go | 9 ++++++++- nomad/job_endpoint_test.go | 6 ++++++ nomad/structs/structs.go | 3 +++ 8 files changed, 70 insertions(+), 24 deletions(-) diff --git a/api/jobs.go b/api/jobs.go index 71c761caa..7ce57c2ac 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -286,6 +286,7 @@ type JobPlanResponse struct { CreatedEvals []*Evaluation Diff *JobDiff Annotations *PlanAnnotations + FailedTGAllocs map[string]*AllocationMetric } type JobDiff struct { diff --git a/command/eval_status.go b/command/eval_status.go index 2e827f7f0..acecc1e0f 100644 --- a/command/eval_status.go +++ b/command/eval_status.go @@ -162,7 +162,7 @@ func (c *EvalStatusCommand) Run(args []string) int { noun += "s" } c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) - dumpAllocMetrics(c.Ui, metrics, false) + c.Ui.Output(formatAllocMetrics(metrics, false, " ")) c.Ui.Output("") } diff --git a/command/monitor.go b/command/monitor.go index 51d4fe78f..3ee6dcc12 100644 --- a/command/monitor.go +++ b/command/monitor.go @@ -2,6 +2,7 @@ package command import ( "fmt" + "strings" "sync" "time" @@ -309,7 +310,10 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int { noun += "s" } m.ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) - dumpAllocMetrics(m.ui, metrics, false) + metrics := formatAllocMetrics(metrics, false, " ") + for _, line := range strings.Split(metrics, "\n") { + m.ui.Output(line) + } } if eval.BlockedEval != "" { @@ -358,46 +362,50 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation, length int) { ui.Output(fmt.Sprintf("Allocation %q status %q (%d/%d nodes filtered)", limit(alloc.ID, length), alloc.ClientStatus, alloc.Metrics.NodesFiltered, alloc.Metrics.NodesEvaluated)) - dumpAllocMetrics(ui, alloc.Metrics, true) + ui.Output(formatAllocMetrics(alloc.Metrics, true, " ")) } -func dumpAllocMetrics(ui cli.Ui, metrics *api.AllocationMetric, scores bool) { +func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix string) string { // Print a helpful message if we have an eligibility problem + var out string if metrics.NodesEvaluated == 0 { - ui.Output(" * No nodes were eligible for evaluation") + out += fmt.Sprintf("%s* No nodes were eligible for evaluation\n", prefix) } // Print a helpful message if the user has asked for a DC that has no // available nodes. for dc, available := range metrics.NodesAvailable { if available == 0 { - ui.Output(fmt.Sprintf(" * No nodes are available in datacenter %q", dc)) + out += fmt.Sprintf("%s* No nodes are available in datacenter %q\n", prefix, dc) } } // Print filter info for class, num := range metrics.ClassFiltered { - ui.Output(fmt.Sprintf(" * Class %q filtered %d nodes", class, num)) + out += fmt.Sprintf("%s* Class %q filtered %d nodes\n", prefix, class, num) } for cs, num := range metrics.ConstraintFiltered { - ui.Output(fmt.Sprintf(" * Constraint %q filtered %d nodes", cs, num)) + out += fmt.Sprintf("%s* Constraint %q filtered %d nodes\n", prefix, cs, num) } // Print exhaustion info if ne := metrics.NodesExhausted; ne > 0 { - ui.Output(fmt.Sprintf(" * Resources exhausted on %d nodes", ne)) + out += fmt.Sprintf("%s* Resources exhausted on %d nodes\n", prefix, ne) } for class, num := range metrics.ClassExhausted { - ui.Output(fmt.Sprintf(" * Class %q exhausted on %d nodes", class, num)) + out += fmt.Sprintf("%s* Class %q exhausted on %d nodes\n", prefix, class, num) } for dim, num := range metrics.DimensionExhausted { - ui.Output(fmt.Sprintf(" * Dimension %q exhausted on %d nodes", dim, num)) + out += fmt.Sprintf("%s* Dimension %q exhausted on %d nodes\n", prefix, dim, num) } // Print scores if scores { for name, score := range metrics.Scores { - ui.Output(fmt.Sprintf(" * Score %q = %f", name, score)) + out += fmt.Sprintf("%s* Score %q = %f\n", prefix, name, score) } } + + out = strings.TrimSuffix(out, "\n") + return out } diff --git a/command/plan.go b/command/plan.go index 7742d5a25..5de76e0aa 100644 --- a/command/plan.go +++ b/command/plan.go @@ -2,6 +2,7 @@ package command import ( "fmt" + "sort" "strings" "github.com/hashicorp/nomad/api" @@ -130,7 +131,8 @@ func (c *PlanCommand) Run(args []string) int { // Print the scheduler dry-run output c.Ui.Output(c.Colorize().Color("[bold]Scheduler dry-run:[reset]")) - c.Ui.Output(c.Colorize().Color(formatDryRun(resp.CreatedEvals))) + c.Ui.Output(c.Colorize().Color(formatDryRun(resp.FailedTGAllocs, resp.CreatedEvals))) + c.Ui.Output("") // Print the job index info c.Ui.Output(c.Colorize().Color(formatJobModifyIndex(resp.JobModifyIndex, file))) @@ -146,28 +148,40 @@ func formatJobModifyIndex(jobModifyIndex uint64, jobName string) string { } // formatDryRun produces a string explaining the results of the dry run. -func formatDryRun(evals []*api.Evaluation) string { +func formatDryRun(failedTGAllocs map[string]*api.AllocationMetric, evals []*api.Evaluation) string { var rolling *api.Evaluation - var blocked *api.Evaluation for _, eval := range evals { if eval.TriggeredBy == "rolling-update" { rolling = eval - } else if eval.Status == "blocked" { - blocked = eval } } var out string - if blocked == nil { - out = "[bold][green] - All tasks successfully allocated.[reset]\n" + if len(failedTGAllocs) == 0 { + out = "[bold][green]- All tasks successfully allocated.[reset]\n" } else { - out = "[bold][yellow] - WARNING: Failed to place all allocations.[reset]\n" + out = "[bold][yellow]- WARNING: Failed to place all allocations.[reset]\n" + sorted := sortedTaskGroupFromMetrics(failedTGAllocs) + for _, tg := range sorted { + metrics := failedTGAllocs[tg] + + noun := "allocation" + if metrics.CoalescedFailures > 0 { + noun += "s" + } + out += fmt.Sprintf("%s[yellow]Task Group %q (failed to place %d %s):\n[reset]", strings.Repeat(" ", 2), tg, metrics.CoalescedFailures+1, noun) + out += fmt.Sprintf("[yellow]%s[reset]\n\n", formatAllocMetrics(metrics, false, strings.Repeat(" ", 4))) + } + if rolling == nil { + out = strings.TrimSuffix(out, "\n") + } } if rolling != nil { - out += fmt.Sprintf("[green] - Rolling update, next evaluation will be in %s.\n", rolling.Wait) + out += fmt.Sprintf("[green]- Rolling update, next evaluation will be in %s.\n", rolling.Wait) } + out = strings.TrimSuffix(out, "\n") return out } @@ -216,8 +230,15 @@ func formatTaskGroupDiff(tg *api.TaskGroupDiff, tgPrefix int, verbose bool) stri // Append the updates and colorize them if l := len(tg.Updates); l > 0 { + order := make([]string, 0, l) + for updateType := range tg.Updates { + order = append(order, updateType) + } + + sort.Strings(order) updates := make([]string, 0, l) - for updateType, count := range tg.Updates { + for _, updateType := range order { + count := tg.Updates[updateType] var color string switch updateType { case scheduler.UpdateTypeIgnore: diff --git a/command/status.go b/command/status.go index 6c4115210..3351bba67 100644 --- a/command/status.go +++ b/command/status.go @@ -320,7 +320,7 @@ func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { c.Ui.Output(fmt.Sprintf("Task Group %q:", tg)) metrics := failedEval.FailedTGAllocs[tg] - dumpAllocMetrics(c.Ui, metrics, false) + c.Ui.Output(formatAllocMetrics(metrics, false, " ")) if i != len(sorted)-1 { c.Ui.Output("") } diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 13a8c9e44..40f3b231c 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -459,7 +459,7 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) // Annotate and store the diff if plans := len(planner.Plans); plans != 1 { - return fmt.Errorf("scheduler resulted in an unexpected number of plans: %d", plans) + return fmt.Errorf("scheduler resulted in an unexpected number of plans: %v", plans) } annotations := planner.Plans[0].Annotations if args.Diff { @@ -474,6 +474,13 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) reply.Diff = jobDiff } + // Grab the failures + if len(planner.Evals) != 1 { + return fmt.Errorf("scheduler resulted in an unexpected number of eval updates: %v", planner.Evals) + } + updatedEval := planner.Evals[0] + + reply.FailedTGAllocs = updatedEval.FailedTGAllocs reply.JobModifyIndex = index reply.Annotations = annotations reply.CreatedEvals = planner.CreateEvals diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 47cc2e9cc..339b13439 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -962,6 +962,9 @@ func TestJobEndpoint_Plan_WithDiff(t *testing.T) { if planResp.Diff == nil { t.Fatalf("no diff") } + if len(planResp.FailedTGAllocs) == 0 { + t.Fatalf("no failed task group alloc metrics") + } } func TestJobEndpoint_Plan_NoDiff(t *testing.T) { @@ -1011,4 +1014,7 @@ func TestJobEndpoint_Plan_NoDiff(t *testing.T) { if planResp.Diff != nil { t.Fatalf("got diff") } + if len(planResp.FailedTGAllocs) == 0 { + t.Fatalf("no failed task group alloc metrics") + } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 6f7be67ea..f624c4bfd 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -403,6 +403,9 @@ type JobPlanResponse struct { // Annotations stores annotations explaining decisions the scheduler made. Annotations *PlanAnnotations + // FailedTGAllocs is the placement failures per task group. + FailedTGAllocs map[string]*AllocMetric + // JobModifyIndex is the modification index of the job. The value can be // used when running `nomad run` to ensure that the Job wasn’t modified // since the last plan. If the job is being created, the value is zero.