plan shows failure reasons and ordered annotations

This commit is contained in:
Alex Dadgar
2016-05-31 21:51:23 +00:00
parent a47c81ca1f
commit c77026f5da
8 changed files with 70 additions and 24 deletions

View File

@@ -286,6 +286,7 @@ type JobPlanResponse struct {
CreatedEvals []*Evaluation
Diff *JobDiff
Annotations *PlanAnnotations
FailedTGAllocs map[string]*AllocationMetric
}
type JobDiff struct {

View File

@@ -162,7 +162,7 @@ func (c *EvalStatusCommand) Run(args []string) int {
noun += "s"
}
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
dumpAllocMetrics(c.Ui, metrics, false)
c.Ui.Output(formatAllocMetrics(metrics, false, " "))
c.Ui.Output("")
}

View File

@@ -2,6 +2,7 @@ package command
import (
"fmt"
"strings"
"sync"
"time"
@@ -309,7 +310,10 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int {
noun += "s"
}
m.ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
dumpAllocMetrics(m.ui, metrics, false)
metrics := formatAllocMetrics(metrics, false, " ")
for _, line := range strings.Split(metrics, "\n") {
m.ui.Output(line)
}
}
if eval.BlockedEval != "" {
@@ -358,46 +362,50 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation, length int) {
ui.Output(fmt.Sprintf("Allocation %q status %q (%d/%d nodes filtered)",
limit(alloc.ID, length), alloc.ClientStatus,
alloc.Metrics.NodesFiltered, alloc.Metrics.NodesEvaluated))
dumpAllocMetrics(ui, alloc.Metrics, true)
ui.Output(formatAllocMetrics(alloc.Metrics, true, " "))
}
func dumpAllocMetrics(ui cli.Ui, metrics *api.AllocationMetric, scores bool) {
func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix string) string {
// Print a helpful message if we have an eligibility problem
var out string
if metrics.NodesEvaluated == 0 {
ui.Output(" * No nodes were eligible for evaluation")
out += fmt.Sprintf("%s* No nodes were eligible for evaluation\n", prefix)
}
// Print a helpful message if the user has asked for a DC that has no
// available nodes.
for dc, available := range metrics.NodesAvailable {
if available == 0 {
ui.Output(fmt.Sprintf(" * No nodes are available in datacenter %q", dc))
out += fmt.Sprintf("%s* No nodes are available in datacenter %q\n", prefix, dc)
}
}
// Print filter info
for class, num := range metrics.ClassFiltered {
ui.Output(fmt.Sprintf(" * Class %q filtered %d nodes", class, num))
out += fmt.Sprintf("%s* Class %q filtered %d nodes\n", prefix, class, num)
}
for cs, num := range metrics.ConstraintFiltered {
ui.Output(fmt.Sprintf(" * Constraint %q filtered %d nodes", cs, num))
out += fmt.Sprintf("%s* Constraint %q filtered %d nodes\n", prefix, cs, num)
}
// Print exhaustion info
if ne := metrics.NodesExhausted; ne > 0 {
ui.Output(fmt.Sprintf(" * Resources exhausted on %d nodes", ne))
out += fmt.Sprintf("%s* Resources exhausted on %d nodes\n", prefix, ne)
}
for class, num := range metrics.ClassExhausted {
ui.Output(fmt.Sprintf(" * Class %q exhausted on %d nodes", class, num))
out += fmt.Sprintf("%s* Class %q exhausted on %d nodes\n", prefix, class, num)
}
for dim, num := range metrics.DimensionExhausted {
ui.Output(fmt.Sprintf(" * Dimension %q exhausted on %d nodes", dim, num))
out += fmt.Sprintf("%s* Dimension %q exhausted on %d nodes\n", prefix, dim, num)
}
// Print scores
if scores {
for name, score := range metrics.Scores {
ui.Output(fmt.Sprintf(" * Score %q = %f", name, score))
out += fmt.Sprintf("%s* Score %q = %f\n", prefix, name, score)
}
}
out = strings.TrimSuffix(out, "\n")
return out
}

View File

@@ -2,6 +2,7 @@ package command
import (
"fmt"
"sort"
"strings"
"github.com/hashicorp/nomad/api"
@@ -130,7 +131,8 @@ func (c *PlanCommand) Run(args []string) int {
// Print the scheduler dry-run output
c.Ui.Output(c.Colorize().Color("[bold]Scheduler dry-run:[reset]"))
c.Ui.Output(c.Colorize().Color(formatDryRun(resp.CreatedEvals)))
c.Ui.Output(c.Colorize().Color(formatDryRun(resp.FailedTGAllocs, resp.CreatedEvals)))
c.Ui.Output("")
// Print the job index info
c.Ui.Output(c.Colorize().Color(formatJobModifyIndex(resp.JobModifyIndex, file)))
@@ -146,28 +148,40 @@ func formatJobModifyIndex(jobModifyIndex uint64, jobName string) string {
}
// formatDryRun produces a string explaining the results of the dry run.
func formatDryRun(evals []*api.Evaluation) string {
func formatDryRun(failedTGAllocs map[string]*api.AllocationMetric, evals []*api.Evaluation) string {
var rolling *api.Evaluation
var blocked *api.Evaluation
for _, eval := range evals {
if eval.TriggeredBy == "rolling-update" {
rolling = eval
} else if eval.Status == "blocked" {
blocked = eval
}
}
var out string
if blocked == nil {
out = "[bold][green] - All tasks successfully allocated.[reset]\n"
if len(failedTGAllocs) == 0 {
out = "[bold][green]- All tasks successfully allocated.[reset]\n"
} else {
out = "[bold][yellow] - WARNING: Failed to place all allocations.[reset]\n"
out = "[bold][yellow]- WARNING: Failed to place all allocations.[reset]\n"
sorted := sortedTaskGroupFromMetrics(failedTGAllocs)
for _, tg := range sorted {
metrics := failedTGAllocs[tg]
noun := "allocation"
if metrics.CoalescedFailures > 0 {
noun += "s"
}
out += fmt.Sprintf("%s[yellow]Task Group %q (failed to place %d %s):\n[reset]", strings.Repeat(" ", 2), tg, metrics.CoalescedFailures+1, noun)
out += fmt.Sprintf("[yellow]%s[reset]\n\n", formatAllocMetrics(metrics, false, strings.Repeat(" ", 4)))
}
if rolling == nil {
out = strings.TrimSuffix(out, "\n")
}
}
if rolling != nil {
out += fmt.Sprintf("[green] - Rolling update, next evaluation will be in %s.\n", rolling.Wait)
out += fmt.Sprintf("[green]- Rolling update, next evaluation will be in %s.\n", rolling.Wait)
}
out = strings.TrimSuffix(out, "\n")
return out
}
@@ -216,8 +230,15 @@ func formatTaskGroupDiff(tg *api.TaskGroupDiff, tgPrefix int, verbose bool) stri
// Append the updates and colorize them
if l := len(tg.Updates); l > 0 {
order := make([]string, 0, l)
for updateType := range tg.Updates {
order = append(order, updateType)
}
sort.Strings(order)
updates := make([]string, 0, l)
for updateType, count := range tg.Updates {
for _, updateType := range order {
count := tg.Updates[updateType]
var color string
switch updateType {
case scheduler.UpdateTypeIgnore:

View File

@@ -320,7 +320,7 @@ func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
c.Ui.Output(fmt.Sprintf("Task Group %q:", tg))
metrics := failedEval.FailedTGAllocs[tg]
dumpAllocMetrics(c.Ui, metrics, false)
c.Ui.Output(formatAllocMetrics(metrics, false, " "))
if i != len(sorted)-1 {
c.Ui.Output("")
}

View File

@@ -459,7 +459,7 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse)
// Annotate and store the diff
if plans := len(planner.Plans); plans != 1 {
return fmt.Errorf("scheduler resulted in an unexpected number of plans: %d", plans)
return fmt.Errorf("scheduler resulted in an unexpected number of plans: %v", plans)
}
annotations := planner.Plans[0].Annotations
if args.Diff {
@@ -474,6 +474,13 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse)
reply.Diff = jobDiff
}
// Grab the failures
if len(planner.Evals) != 1 {
return fmt.Errorf("scheduler resulted in an unexpected number of eval updates: %v", planner.Evals)
}
updatedEval := planner.Evals[0]
reply.FailedTGAllocs = updatedEval.FailedTGAllocs
reply.JobModifyIndex = index
reply.Annotations = annotations
reply.CreatedEvals = planner.CreateEvals

View File

@@ -962,6 +962,9 @@ func TestJobEndpoint_Plan_WithDiff(t *testing.T) {
if planResp.Diff == nil {
t.Fatalf("no diff")
}
if len(planResp.FailedTGAllocs) == 0 {
t.Fatalf("no failed task group alloc metrics")
}
}
func TestJobEndpoint_Plan_NoDiff(t *testing.T) {
@@ -1011,4 +1014,7 @@ func TestJobEndpoint_Plan_NoDiff(t *testing.T) {
if planResp.Diff != nil {
t.Fatalf("got diff")
}
if len(planResp.FailedTGAllocs) == 0 {
t.Fatalf("no failed task group alloc metrics")
}
}

View File

@@ -403,6 +403,9 @@ type JobPlanResponse struct {
// Annotations stores annotations explaining decisions the scheduler made.
Annotations *PlanAnnotations
// FailedTGAllocs is the placement failures per task group.
FailedTGAllocs map[string]*AllocMetric
// JobModifyIndex is the modification index of the job. The value can be
// used when running `nomad run` to ensure that the Job wasnt modified
// since the last plan. If the job is being created, the value is zero.