From c77026f5da95660cceae2316ad68c43d98e4948d Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 31 May 2016 21:51:23 +0000
Subject: [PATCH] plan shows failure reasons and ordered annotations

---
 api/jobs.go                |  1 +
 command/eval_status.go     |  2 +-
 command/monitor.go         | 30 ++++++++++++++++++----------
 command/plan.go            | 41 ++++++++++++++++++++++++++++----------
 command/status.go          |  2 +-
 nomad/job_endpoint.go      |  9 ++++++++-
 nomad/job_endpoint_test.go |  6 ++++++
 nomad/structs/structs.go   |  3 +++
 8 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/api/jobs.go b/api/jobs.go
index 71c761caa..7ce57c2ac 100644
--- a/api/jobs.go
+++ b/api/jobs.go
@@ -286,6 +286,7 @@ type JobPlanResponse struct {
 	CreatedEvals   []*Evaluation
 	Diff           *JobDiff
 	Annotations    *PlanAnnotations
+	FailedTGAllocs map[string]*AllocationMetric
 }
 
 type JobDiff struct {
diff --git a/command/eval_status.go b/command/eval_status.go
index 2e827f7f0..acecc1e0f 100644
--- a/command/eval_status.go
+++ b/command/eval_status.go
@@ -162,7 +162,7 @@ func (c *EvalStatusCommand) Run(args []string) int {
 				noun += "s"
 			}
 			c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
-			dumpAllocMetrics(c.Ui, metrics, false)
+			c.Ui.Output(formatAllocMetrics(metrics, false, "  "))
 			c.Ui.Output("")
 		}
 
diff --git a/command/monitor.go b/command/monitor.go
index 51d4fe78f..3ee6dcc12 100644
--- a/command/monitor.go
+++ b/command/monitor.go
@@ -2,6 +2,7 @@ package command
 
 import (
 	"fmt"
+	"strings"
 	"sync"
 	"time"
 
@@ -309,7 +310,10 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int {
 						noun += "s"
 					}
 					m.ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
-					dumpAllocMetrics(m.ui, metrics, false)
+					metrics := formatAllocMetrics(metrics, false, "  ")
+					for _, line := range strings.Split(metrics, "\n") {
+						m.ui.Output(line)
+					}
 				}
 
 				if eval.BlockedEval != "" {
@@ -358,46 +362,50 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation, length int) {
 	ui.Output(fmt.Sprintf("Allocation %q status %q (%d/%d nodes filtered)",
 		limit(alloc.ID, length), alloc.ClientStatus,
 		alloc.Metrics.NodesFiltered, alloc.Metrics.NodesEvaluated))
-	dumpAllocMetrics(ui, alloc.Metrics, true)
+	ui.Output(formatAllocMetrics(alloc.Metrics, true, "  "))
 }
 
-func dumpAllocMetrics(ui cli.Ui, metrics *api.AllocationMetric, scores bool) {
+func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix string) string {
 	// Print a helpful message if we have an eligibility problem
+	var out string
 	if metrics.NodesEvaluated == 0 {
-		ui.Output("  * No nodes were eligible for evaluation")
+		out += fmt.Sprintf("%s* No nodes were eligible for evaluation\n", prefix)
 	}
 
 	// Print a helpful message if the user has asked for a DC that has no
 	// available nodes.
 	for dc, available := range metrics.NodesAvailable {
 		if available == 0 {
-			ui.Output(fmt.Sprintf("  * No nodes are available in datacenter %q", dc))
+			out += fmt.Sprintf("%s* No nodes are available in datacenter %q\n", prefix, dc)
 		}
 	}
 
 	// Print filter info
 	for class, num := range metrics.ClassFiltered {
-		ui.Output(fmt.Sprintf("  * Class %q filtered %d nodes", class, num))
+		out += fmt.Sprintf("%s* Class %q filtered %d nodes\n", prefix, class, num)
 	}
 	for cs, num := range metrics.ConstraintFiltered {
-		ui.Output(fmt.Sprintf("  * Constraint %q filtered %d nodes", cs, num))
+		out += fmt.Sprintf("%s* Constraint %q filtered %d nodes\n", prefix, cs, num)
 	}
 
 	// Print exhaustion info
 	if ne := metrics.NodesExhausted; ne > 0 {
-		ui.Output(fmt.Sprintf("  * Resources exhausted on %d nodes", ne))
+		out += fmt.Sprintf("%s* Resources exhausted on %d nodes\n", prefix, ne)
 	}
 	for class, num := range metrics.ClassExhausted {
-		ui.Output(fmt.Sprintf("  * Class %q exhausted on %d nodes", class, num))
+		out += fmt.Sprintf("%s* Class %q exhausted on %d nodes\n", prefix, class, num)
 	}
 	for dim, num := range metrics.DimensionExhausted {
-		ui.Output(fmt.Sprintf("  * Dimension %q exhausted on %d nodes", dim, num))
+		out += fmt.Sprintf("%s* Dimension %q exhausted on %d nodes\n", prefix, dim, num)
 	}
 
 	// Print scores
 	if scores {
 		for name, score := range metrics.Scores {
-			ui.Output(fmt.Sprintf("  * Score %q = %f", name, score))
+			out += fmt.Sprintf("%s* Score %q = %f\n", prefix, name, score)
 		}
 	}
+
+	out = strings.TrimSuffix(out, "\n")
+	return out
 }
diff --git a/command/plan.go b/command/plan.go
index 7742d5a25..5de76e0aa 100644
--- a/command/plan.go
+++ b/command/plan.go
@@ -2,6 +2,7 @@ package command
 
 import (
 	"fmt"
+	"sort"
 	"strings"
 
 	"github.com/hashicorp/nomad/api"
@@ -130,7 +131,8 @@ func (c *PlanCommand) Run(args []string) int {
 
 	// Print the scheduler dry-run output
 	c.Ui.Output(c.Colorize().Color("[bold]Scheduler dry-run:[reset]"))
-	c.Ui.Output(c.Colorize().Color(formatDryRun(resp.CreatedEvals)))
+	c.Ui.Output(c.Colorize().Color(formatDryRun(resp.FailedTGAllocs, resp.CreatedEvals)))
+	c.Ui.Output("")
 
 	// Print the job index info
 	c.Ui.Output(c.Colorize().Color(formatJobModifyIndex(resp.JobModifyIndex, file)))
@@ -146,28 +148,40 @@ func formatJobModifyIndex(jobModifyIndex uint64, jobName string) string {
 }
 
 // formatDryRun produces a string explaining the results of the dry run.
-func formatDryRun(evals []*api.Evaluation) string {
+func formatDryRun(failedTGAllocs map[string]*api.AllocationMetric, evals []*api.Evaluation) string {
 	var rolling *api.Evaluation
-	var blocked *api.Evaluation
 	for _, eval := range evals {
 		if eval.TriggeredBy == "rolling-update" {
 			rolling = eval
-		} else if eval.Status == "blocked" {
-			blocked = eval
 		}
 	}
 
 	var out string
-	if blocked == nil {
-		out = "[bold][green]  - All tasks successfully allocated.[reset]\n"
+	if len(failedTGAllocs) == 0 {
+		out = "[bold][green]- All tasks successfully allocated.[reset]\n"
 	} else {
-		out = "[bold][yellow]  - WARNING: Failed to place all allocations.[reset]\n"
+		out = "[bold][yellow]- WARNING: Failed to place all allocations.[reset]\n"
+		sorted := sortedTaskGroupFromMetrics(failedTGAllocs)
+		for _, tg := range sorted {
+			metrics := failedTGAllocs[tg]
+
+			noun := "allocation"
+			if metrics.CoalescedFailures > 0 {
+				noun += "s"
+			}
+			out += fmt.Sprintf("%s[yellow]Task Group %q (failed to place %d %s):\n[reset]", strings.Repeat(" ", 2), tg, metrics.CoalescedFailures+1, noun)
+			out += fmt.Sprintf("[yellow]%s[reset]\n\n", formatAllocMetrics(metrics, false, strings.Repeat(" ", 4)))
+		}
+		if rolling == nil {
+			out = strings.TrimSuffix(out, "\n")
+		}
 	}
 
 	if rolling != nil {
-		out += fmt.Sprintf("[green]  - Rolling update, next evaluation will be in %s.\n", rolling.Wait)
+		out += fmt.Sprintf("[green]- Rolling update, next evaluation will be in %s.\n", rolling.Wait)
 	}
 
+	out = strings.TrimSuffix(out, "\n")
 	return out
 }
 
@@ -216,8 +230,15 @@ func formatTaskGroupDiff(tg *api.TaskGroupDiff, tgPrefix int, verbose bool) stri
 
 	// Append the updates and colorize them
 	if l := len(tg.Updates); l > 0 {
+		order := make([]string, 0, l)
+		for updateType := range tg.Updates {
+			order = append(order, updateType)
+		}
+
+		sort.Strings(order)
 		updates := make([]string, 0, l)
-		for updateType, count := range tg.Updates {
+		for _, updateType := range order {
+			count := tg.Updates[updateType]
 			var color string
 			switch updateType {
 			case scheduler.UpdateTypeIgnore:
diff --git a/command/status.go b/command/status.go
index 6c4115210..3351bba67 100644
--- a/command/status.go
+++ b/command/status.go
@@ -320,7 +320,7 @@ func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
 
 		c.Ui.Output(fmt.Sprintf("Task Group %q:", tg))
 		metrics := failedEval.FailedTGAllocs[tg]
-		dumpAllocMetrics(c.Ui, metrics, false)
+		c.Ui.Output(formatAllocMetrics(metrics, false, "  "))
 		if i != len(sorted)-1 {
 			c.Ui.Output("")
 		}
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 13a8c9e44..40f3b231c 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -459,7 +459,7 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse)
 
 	// Annotate and store the diff
 	if plans := len(planner.Plans); plans != 1 {
-		return fmt.Errorf("scheduler resulted in an unexpected number of plans: %d", plans)
+		return fmt.Errorf("scheduler resulted in an unexpected number of plans: %v", plans)
 	}
 	annotations := planner.Plans[0].Annotations
 	if args.Diff {
@@ -474,6 +474,13 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse)
 		reply.Diff = jobDiff
 	}
 
+	// Grab the failures
+	if len(planner.Evals) != 1 {
+		return fmt.Errorf("scheduler resulted in an unexpected number of eval updates: %v", planner.Evals)
+	}
+	updatedEval := planner.Evals[0]
+
+	reply.FailedTGAllocs = updatedEval.FailedTGAllocs
 	reply.JobModifyIndex = index
 	reply.Annotations = annotations
 	reply.CreatedEvals = planner.CreateEvals
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 47cc2e9cc..339b13439 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -962,6 +962,9 @@ func TestJobEndpoint_Plan_WithDiff(t *testing.T) {
 	if planResp.Diff == nil {
 		t.Fatalf("no diff")
 	}
+	if len(planResp.FailedTGAllocs) == 0 {
+		t.Fatalf("no failed task group alloc metrics")
+	}
 }
 
 func TestJobEndpoint_Plan_NoDiff(t *testing.T) {
@@ -1011,4 +1014,7 @@ func TestJobEndpoint_Plan_NoDiff(t *testing.T) {
 	if planResp.Diff != nil {
 		t.Fatalf("got diff")
 	}
+	if len(planResp.FailedTGAllocs) == 0 {
+		t.Fatalf("no failed task group alloc metrics")
+	}
 }
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 6f7be67ea..f624c4bfd 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -403,6 +403,9 @@ type JobPlanResponse struct {
 	// Annotations stores annotations explaining decisions the scheduler made.
 	Annotations *PlanAnnotations
 
+	// FailedTGAllocs is the placement failures per task group.
+	FailedTGAllocs map[string]*AllocMetric
+
 	// JobModifyIndex is the modification index of the job. The value can be
 	// used when running `nomad run` to ensure that the Job wasn’t modified
 	// since the last plan. If the job is being created, the value is zero.