scheduler: add disconnect and reschedule info to reconciler output (#26255)

The `DesiredUpdates` struct that we send to the Read Eval API doesn't include
information about disconnect/reconnect and rescheduling. Annotate the
`DesiredUpdates` with this data, and adjust the `eval status` command to display
only those fields that have non-zero values in order to make the output width
manageable.

Ref: https://hashicorp.atlassian.net/browse/NMD-815
This commit is contained in:
Tim Gross
2025-07-16 08:46:38 -04:00
committed by GitHub
parent 9a288ef493
commit 35f3f6ce41
10 changed files with 263 additions and 132 deletions

View File

@@ -1596,6 +1596,10 @@ type DesiredUpdates struct {
DestructiveUpdate uint64
Canary uint64
Preemptions uint64
Disconnect uint64
Reconnect uint64
RescheduleNow uint64
RescheduleLater uint64
}
type JobDispatchRequest struct {

View File

@@ -6,6 +6,7 @@ package command
import (
"fmt"
"sort"
"strconv"
"strings"
"time"
@@ -37,7 +38,7 @@ Eval Status Options:
Monitor an outstanding evaluation
-verbose
Show full-length IDs and exact timestamps.
Show full-length IDs, exact timestamps, and all plan annotation fields.
-json
Output the evaluation in its JSON format. This format will not include
@@ -261,25 +262,9 @@ func (c *EvalStatusCommand) formatEvalStatus(eval *api.Evaluation, placedAllocs
if eval.PlanAnnotations != nil {
if len(eval.PlanAnnotations.DesiredTGUpdates) > 0 {
c.Ui.Output(c.Colorize().Color("\n[bold]Reconciler Annotations[reset]"))
annotations := make([]string, len(eval.PlanAnnotations.DesiredTGUpdates)+1)
annotations[0] = "Task Group|Ignore|Place|Stop|Migrate|InPlace|Destructive|Canary|Preemptions"
i := 1
for tg, updates := range eval.PlanAnnotations.DesiredTGUpdates {
annotations[i] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d|%d|%d",
tg,
updates.Ignore,
updates.Place,
updates.Stop,
updates.Migrate,
updates.InPlaceUpdate,
updates.DestructiveUpdate,
updates.Canary,
updates.Preemptions,
)
i++
}
c.Ui.Output(columnize.SimpleFormat(annotations))
c.Ui.Output(c.Colorize().Color("\n[bold]Plan Annotations[reset]"))
c.Ui.Output(formatPlanAnnotations(
eval.PlanAnnotations.DesiredTGUpdates, verbose))
}
if len(eval.PlanAnnotations.PreemptedAllocs) > 0 {
@@ -378,3 +363,62 @@ func formatPreemptedAllocListStubs(stubs []*api.AllocationListStub, uuidLength i
}
return formatList(allocs)
}
// formatPlanAnnotations produces a table with one row per task group where the
// columns are all the changes (ignore, place, stop, etc.) plus all the non-zero
// causes of those changes (migrate, canary, reschedule, etc)
func formatPlanAnnotations(desiredTGUpdates map[string]*api.DesiredUpdates, verbose bool) string {
annotations := make([]string, len(desiredTGUpdates)+1)
annotations[0] = "Task Group|Ignore|Place|Stop|InPlace|Destructive"
optCols := []string{
"Migrate", "Canary", "Preemptions",
"Reschedule Now", "Reschedule Later", "Disconnect", "Reconnect"}
byCol := make([][]uint64, len(optCols))
for i := range byCol {
for j := range len(desiredTGUpdates) + 1 {
byCol[i] = make([]uint64, j+1)
}
}
i := 1
for tg, updates := range desiredTGUpdates {
// we always show the first 5 columns
annotations[i] = fmt.Sprintf("%s|%d|%d|%d|%d|%d",
tg,
updates.Ignore,
updates.Place,
updates.Stop,
updates.InPlaceUpdate,
updates.DestructiveUpdate,
)
// we record how many we have of the other columns so we can show them
// only if populated
byCol[0][i] = updates.Migrate
byCol[1][i] = updates.Canary
byCol[2][i] = updates.Preemptions
byCol[3][i] = updates.RescheduleNow
byCol[4][i] = updates.RescheduleLater
byCol[5][i] = updates.Disconnect
byCol[6][i] = updates.Reconnect
i++
}
// the remaining columns only show if they're populated or if we're in
// verbose mode
for i, col := range optCols {
for tgIdx := range len(desiredTGUpdates) + 1 {
byCol[i][0] += byCol[i][tgIdx]
}
if verbose || byCol[i][0] > 0 {
annotations[0] += "|" + col
for tgIdx := 1; tgIdx < len(desiredTGUpdates)+1; tgIdx++ {
annotations[tgIdx] += "|" + strconv.FormatUint(byCol[i][tgIdx], 10)
}
}
}
return columnize.SimpleFormat(annotations)
}

View File

@@ -223,6 +223,19 @@ Task Group "web" (failed to place 1 allocation):
must.StrContains(t, out, `Related Evaluations`)
must.StrContains(t, out, `Placed Allocations`)
must.StrContains(t, out, `Reconciler Annotations`)
must.StrContains(t, out, `Plan Annotations`)
must.StrContains(t, out, `Preempted Allocations`)
}
func TestEvalStatus_FormatPlanAnnotations(t *testing.T) {
updates := map[string]*api.DesiredUpdates{
"foo": {Place: 1, Ignore: 2, Canary: 1},
"bar": {Place: 1, Stop: 3, Reconnect: 2},
}
out := formatPlanAnnotations(updates, false)
must.Eq(t, `Task Group Ignore Place Stop InPlace Destructive Canary Reconnect
foo 2 1 0 0 0 1 0
bar 0 1 3 0 0 0 2`, out)
}

View File

@@ -13087,11 +13087,15 @@ type DesiredUpdates struct {
DestructiveUpdate uint64
Canary uint64
Preemptions uint64
Disconnect uint64
Reconnect uint64
RescheduleNow uint64
RescheduleLater uint64
}
func (d *DesiredUpdates) GoString() string {
return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)",
d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary)
return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d) (reschedule now %d) (reschedule later %d) (disconnect %d) (reconnect %d)",
d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary, d.RescheduleNow, d.RescheduleLater, d.Disconnect, d.Reconnect)
}
// msgpackHandle is a shared handle for encoding/decoding of structs

View File

@@ -231,6 +231,10 @@ func (r *ReconcileResults) Fields() []any {
tg+"_migrate", u.Migrate,
tg+"_canary", u.Canary,
tg+"_preempt", u.Preemptions,
tg+"_reschedule_now", u.RescheduleNow,
tg+"_reschedule_later", u.RescheduleLater,
tg+"_disconnect", u.Disconnect,
tg+"_reconnect", u.Reconnect,
)
}
@@ -362,8 +366,9 @@ func cancelUnneededDeployments(j *structs.Job, d *structs.Deployment) (*structs.
}
// handleStop marks all allocations to be stopped, handling the lost case.
// Returns result structure with desired changes field set to stopped allocations
// and an array of stopped allocations.
// Returns result structure with desired changes field set to stopped
// allocations and an array of stopped allocations. It mutates the Stop fields
// on the DesiredUpdates.
func (a *AllocReconciler) handleStop(m allocMatrix) (map[string]*structs.DesiredUpdates, []AllocStopResult) {
result := make(map[string]*structs.DesiredUpdates)
allocsToStop := []AllocStopResult{}
@@ -489,6 +494,8 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
// logged.
if len(reconnect) > 0 {
result.ReconnectUpdates = a.computeReconnecting(reconnect)
result.DesiredTGUpdates[tg.Name].Reconnect = uint64(len(result.ReconnectUpdates))
// The rest of the reconnecting allocations is now untainted and will
// be further reconciled below.
untainted = untainted.union(reconnect)
@@ -527,6 +534,8 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
updates := appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals, rescheduleNow)
maps.Copy(result.DisconnectUpdates, updates)
result.DesiredTGUpdates[tg.Name].Disconnect = uint64(len(result.DisconnectUpdates))
result.DesiredTGUpdates[tg.Name].RescheduleNow = uint64(len(rescheduleNow))
}
// Find delays for any lost allocs that have stop_after_client_disconnect
@@ -550,6 +559,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
var followups []*structs.Evaluation
followups, result.AttributeUpdates = a.createRescheduleLaterEvals(rescheduleLater, all, result.DisconnectUpdates)
result.DesiredFollowupEvals[tg.Name] = append(result.DesiredFollowupEvals[tg.Name], followups...)
result.DesiredTGUpdates[tg.Name].RescheduleLater = uint64(len(rescheduleLater))
}
// Create a structure for choosing names. Seed with the taken names
// which is the union of untainted, rescheduled, allocs on migrating
@@ -728,6 +738,8 @@ func requiresCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, de
!canariesPromoted
}
// computeCanaries returns the set of new canaries to place. It mutates the
// Canary field on the DesiredUpdates and the DesiredCanaries on the dstate
func (a *AllocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState,
destructive, canaries allocSet, desiredChanges *structs.DesiredUpdates, nameIndex *AllocNameIndex) []AllocPlaceResult {
dstate.DesiredCanaries = tg.Update.Canary
@@ -997,6 +1009,8 @@ func (a *AllocReconciler) placeAllocs(deploymentPlaceReady bool, desiredChanges
return underProvisionedBy, resultingPlacements, resultingAllocsToStop
}
// computeDestructiveUpdates returns the set of destructive updates. It mutates
// the DestructiveUpdate and Ignore fields on the DesiredUpdates counts
func (a *AllocReconciler) computeDestructiveUpdates(destructive allocSet, underProvisionedBy int,
desiredChanges *structs.DesiredUpdates, tg *structs.TaskGroup) []allocDestructiveResult {
@@ -1018,6 +1032,8 @@ func (a *AllocReconciler) computeDestructiveUpdates(destructive allocSet, underP
return destructiveResult
}
// computeMigrations returns the stops and placements for the allocs marked for
// migration. It mutates the Migrate field on the DesiredUpdates counts
func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdates, migrate allocSet,
tg *structs.TaskGroup, isCanarying bool) ([]AllocStopResult, []AllocPlaceResult) {

View File

@@ -25,7 +25,9 @@ func TestAllocReconciler_PropTest(t *testing.T) {
// collectExpected returns a convenience map that may hold multiple "states" for
// the same alloc (ex. all three of "total" and "terminal" and "failed")
collectExpected := func(ar *AllocReconciler) map[string]map[string]int {
collectExpected := func(t *rapid.T, ar *AllocReconciler) map[string]map[string]int {
t.Helper()
perTaskGroup := map[string]map[string]int{}
for _, tg := range ar.jobState.Job.TaskGroups {
perTaskGroup[tg.Name] = map[string]int{"expect_count": tg.Count}
@@ -33,7 +35,6 @@ func TestAllocReconciler_PropTest(t *testing.T) {
perTaskGroup[tg.Name]["max_canaries"] = tg.Update.Canary
}
}
for _, alloc := range ar.jobState.ExistingAllocs {
if _, ok := perTaskGroup[alloc.TaskGroup]; !ok {
// existing task group doesn't exist in new job
@@ -41,20 +42,54 @@ func TestAllocReconciler_PropTest(t *testing.T) {
}
perTaskGroup[alloc.TaskGroup]["exist_total"]++
perTaskGroup[alloc.TaskGroup]["exist_"+alloc.ClientStatus]++
perTaskGroup[alloc.TaskGroup]["exist_desired_"+alloc.DesiredStatus]++
if alloc.TerminalStatus() {
perTaskGroup[alloc.TaskGroup]["exist_terminal"]++
} else {
perTaskGroup[alloc.TaskGroup]["exist_non_terminal"]++
}
if alloc.ClientTerminalStatus() {
perTaskGroup[alloc.TaskGroup]["exist_client_terminal"]++
} else {
perTaskGroup[alloc.TaskGroup]["exist_non_client_terminal"]++
}
if alloc.ServerTerminalStatus() {
perTaskGroup[alloc.TaskGroup]["exist_server_terminal"]++
} else {
perTaskGroup[alloc.TaskGroup]["exist_non_server_terminal"]++
}
if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary {
perTaskGroup[alloc.TaskGroup]["exist_canary"]++
}
}
// these only assert our categories are reasonable
for _, counts := range perTaskGroup {
must.Eq(t, counts["exist_total"],
(counts["exist_pending"] +
counts["exist_running"] +
counts["exist_complete"] +
counts["exist_failed"] +
counts["exist_lost"] +
counts["exist_unknown"]),
must.Sprintf("exist_total doesn't add up: %+v", counts))
must.Eq(t, counts["exist_client_terminal"],
(counts["exist_complete"] +
counts["exist_failed"] +
counts["exist_lost"]),
must.Sprintf("exist_client_terminal doesn't add up: %+v", counts))
}
return perTaskGroup
}
// sharedSafetyProperties asserts safety properties ("something bad never
// happens") that apply to all job types that use the cluster reconciler
sharedSafetyProperties := func(t *rapid.T, ar *AllocReconciler, results *ReconcileResults, perTaskGroup map[string]map[string]int) {
t.Helper()
// stopped jobs
if ar.jobState.Job.Stopped() {
@@ -92,55 +127,73 @@ func TestAllocReconciler_PropTest(t *testing.T) {
for tgName, counts := range perTaskGroup {
tgUpdates := results.DesiredTGUpdates[tgName]
tprintf := func(msg string) must.Setting {
return must.Sprintf(msg+" (%s) %v => %+v", tgName, counts, tgUpdates)
}
// when the job is stopped or scaled to zero we can make stronger
// assertions, so split out these checks
if counts["expect_count"] == 0 || ar.jobState.Job.Stopped() {
must.Eq(t, 0, int(tgUpdates.Place),
tprintf("no placements on stop or scale-to-zero"))
must.Eq(t, 0, int(tgUpdates.Canary),
tprintf("no canaries on stop or scale-to-zero"))
must.Eq(t, 0, int(tgUpdates.DestructiveUpdate),
tprintf("no destructive updates on stop or scale-to-zero"))
must.Eq(t, 0, int(tgUpdates.Migrate),
tprintf("no migrating on stop or scale-to-zero"))
must.Eq(t, 0, int(tgUpdates.RescheduleLater),
tprintf("no rescheduling later on stop or scale-to-zero"))
must.Eq(t, 0, int(tgUpdates.Preemptions),
tprintf("no preemptions on stop or scale-to-zero"))
continue
}
must.LessEq(t, counts["expect_count"], int(tgUpdates.Place),
must.Sprintf("group placements should never exceed group count (%s): %v",
tgName, counts))
tprintf("group placements should never exceed group count"))
must.LessEq(t, counts["max_canaries"], int(tgUpdates.Canary),
must.Sprintf("canaries should never exceed expected canaries (%s): %v",
tgName, counts))
tprintf("canaries should never exceed expected canaries"))
must.LessEq(t, counts["max_canaries"], int(tgUpdates.Canary)+counts["exist_canary"],
must.Sprintf("canaries+existing canaries should never exceed expected canaries (%s): %v",
tgName, counts))
tprintf("canaries+existing canaries should never exceed expected canaries"))
must.LessEq(t, counts["expect_count"], int(tgUpdates.DestructiveUpdate),
must.Sprintf("destructive updates should never exceed group count (%s): %v",
tgName, counts))
tprintf("destructive updates should never exceed group count"))
must.LessEq(t, counts["expect_count"]+counts["max_canaries"],
int(tgUpdates.Canary)+int(tgUpdates.Place)+int(tgUpdates.DestructiveUpdate),
must.Sprintf("place+canaries+destructive should never exceed group count + expected canaries (%s): %v",
tgName, counts))
tprintf("place+canaries+destructive should never exceed group count + expected canaries"))
// TODO(tgross): needs per-taskgroup reconnect/disconnect values
// must.Eq(t, counts["expect_count"]+int(tgUpdates.Stop)+int(tgUpdates.Disconnected),
// int(tgUpdates.Place)+int(tgUpdates.Ignore)+int(tgUpdates.InPlaceUpdate)+int(tgUpdates.DestructiveUpdate)+int(tgUpdates.Reconnected),
// must.Sprintf(""),
// )
must.LessEq(t, counts["exist_non_client_terminal"], int(tgUpdates.Reconnect),
tprintf("reconnected should never exceed non-client-terminal"))
must.LessEq(t, counts["exist_total"], int(tgUpdates.InPlaceUpdate),
must.Sprintf("in-place updates should never exceed existing allocs (%s): %v",
tgName, counts))
tprintf("in-place updates should never exceed existing allocs"))
must.LessEq(t, counts["exist_total"], int(tgUpdates.DestructiveUpdate),
must.Sprintf("destructive updates should never exceed existing allocs (%s): %v",
tgName, counts))
tprintf("destructive updates should never exceed existing allocs"))
must.LessEq(t, counts["exist_total"], int(tgUpdates.Migrate),
must.Sprintf("migrations should never exceed existing allocs (%s): %v",
tgName, counts))
tprintf("migrations should never exceed existing allocs"))
must.LessEq(t, counts["exist_total"], int(tgUpdates.Ignore),
must.Sprintf("ignore should never exceed existing allocs (%s): %v",
tgName, counts))
tprintf("ignore should never exceed existing allocs"))
must.GreaterEq(t, tgUpdates.Migrate, tgUpdates.Stop,
tprintf("migrated allocs should be stopped"))
must.GreaterEq(t, tgUpdates.RescheduleLater, tgUpdates.Ignore,
tprintf("reschedule-later allocs should be ignored"))
}
}
t.Run("batch jobs", rapid.MakeCheck(func(t *rapid.T) {
ar := genAllocReconciler(structs.JobTypeBatch, &idGenerator{}).Draw(t, "reconciler")
perTaskGroup := collectExpected(ar)
perTaskGroup := collectExpected(t, ar)
results := ar.Compute()
must.NotNil(t, results, must.Sprint("results should never be nil"))
@@ -149,7 +202,7 @@ func TestAllocReconciler_PropTest(t *testing.T) {
t.Run("service jobs", rapid.MakeCheck(func(t *rapid.T) {
ar := genAllocReconciler(structs.JobTypeService, &idGenerator{}).Draw(t, "reconciler")
perTaskGroup := collectExpected(ar)
perTaskGroup := collectExpected(t, ar)
results := ar.Compute()
must.NotNil(t, results, must.Sprint("results should never be nil"))

View File

@@ -820,6 +820,7 @@ func TestReconciler_Inplace_Rollback(t *testing.T) {
Stop: 1,
InPlaceUpdate: 1,
DestructiveUpdate: 1,
RescheduleLater: 1,
},
},
})
@@ -1829,10 +1830,11 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) {
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 0,
InPlaceUpdate: 0,
Ignore: 4,
Stop: 0,
Place: 0,
InPlaceUpdate: 0,
Ignore: 4,
Stop: 0,
RescheduleLater: 1,
},
},
})
@@ -1925,10 +1927,11 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 0,
InPlaceUpdate: 0,
Ignore: 10,
Stop: 0,
Place: 0,
InPlaceUpdate: 0,
Ignore: 10,
Stop: 0,
RescheduleLater: 7,
},
},
})
@@ -2115,10 +2118,11 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) {
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
Stop: 0,
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
Stop: 0,
RescheduleLater: 1,
},
},
})
@@ -6022,7 +6026,8 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
reconnectUpdates: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 2,
Ignore: 2,
Reconnect: 2,
},
},
},
@@ -6041,8 +6046,9 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
reconnectUpdates: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 1,
Ignore: 3,
Stop: 1,
Ignore: 3,
Reconnect: 1,
},
},
},
@@ -6223,8 +6229,10 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
disconnectUpdates: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Place: 2,
Ignore: 3,
Place: 2,
Ignore: 3,
Disconnect: 2,
RescheduleNow: 2,
},
},
},
@@ -6519,6 +6527,8 @@ func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
Stop: 0,
Ignore: 1,
InPlaceUpdate: 0,
Disconnect: 2,
RescheduleNow: 2,
},
},
})
@@ -6668,9 +6678,11 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
reconnectUpdates: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
updatedJob.TaskGroups[0].Name: {
Place: 3,
Canary: 0,
Ignore: 6,
Place: 3,
Canary: 0,
Ignore: 6,
Disconnect: 3,
RescheduleNow: 3,
},
},
},
@@ -6732,9 +6744,11 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
reconnectUpdates: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
updatedJob.TaskGroups[0].Name: {
Place: 2,
Canary: 0,
Ignore: 7,
Place: 2,
Canary: 0,
Ignore: 7,
Disconnect: 2,
RescheduleNow: 2,
},
},
},
@@ -6805,7 +6819,9 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
// the deployment can still progress. We don't include
// them in the stop count since DesiredTGUpdates is used
// to report deployment progress or final deployment state.
Stop: 0,
Stop: 0,
Disconnect: 2,
RescheduleNow: 2,
},
},
},

View File

@@ -69,6 +69,7 @@ func TestNodeReconciler_PropTest(t *testing.T) {
// sharedSafetyProperties asserts safety properties ("something bad never
// happens") that apply to all job types that use the node reconciler
sharedSafetyProperties := func(t *rapid.T, nr *nodeReconcilerInput, results *NodeReconcileResult, perTaskGroup map[string]map[string]int) {
t.Helper()
if !nr.serverSupportsDisconnectedClients {
must.Len(t, 0, results.Disconnecting,

View File

@@ -700,17 +700,7 @@ func desiredUpdates(diff *reconciler.NodeReconcileResult, inplaceUpdates,
destructiveUpdates []reconciler.AllocTuple) map[string]*structs.DesiredUpdates {
desiredTgs := make(map[string]*structs.DesiredUpdates)
for _, tuple := range diff.Place {
name := tuple.TaskGroup.Name
des, ok := desiredTgs[name]
if !ok {
des = &structs.DesiredUpdates{}
desiredTgs[name] = des
}
des.Place++
}
// diff.Stop may have a nil TaskGroup
for _, tuple := range diff.Stop {
name := tuple.Alloc.TaskGroup
des, ok := desiredTgs[name]
@@ -722,49 +712,26 @@ func desiredUpdates(diff *reconciler.NodeReconcileResult, inplaceUpdates,
des.Stop++
}
for _, tuple := range diff.Ignore {
name := tuple.TaskGroup.Name
des, ok := desiredTgs[name]
if !ok {
des = &structs.DesiredUpdates{}
desiredTgs[name] = des
}
incUpdates := func(tuples []reconciler.AllocTuple, fn func(des *structs.DesiredUpdates)) {
for _, tuple := range tuples {
name := tuple.TaskGroup.Name
des, ok := desiredTgs[name]
if !ok {
des = &structs.DesiredUpdates{}
desiredTgs[name] = des
}
des.Ignore++
fn(des)
}
}
for _, tuple := range diff.Migrate {
name := tuple.TaskGroup.Name
des, ok := desiredTgs[name]
if !ok {
des = &structs.DesiredUpdates{}
desiredTgs[name] = des
}
des.Migrate++
}
for _, tuple := range inplaceUpdates {
name := tuple.TaskGroup.Name
des, ok := desiredTgs[name]
if !ok {
des = &structs.DesiredUpdates{}
desiredTgs[name] = des
}
des.InPlaceUpdate++
}
for _, tuple := range destructiveUpdates {
name := tuple.TaskGroup.Name
des, ok := desiredTgs[name]
if !ok {
des = &structs.DesiredUpdates{}
desiredTgs[name] = des
}
des.DestructiveUpdate++
}
incUpdates(diff.Place, func(des *structs.DesiredUpdates) { des.Place++ })
incUpdates(diff.Ignore, func(des *structs.DesiredUpdates) { des.Ignore++ })
incUpdates(diff.Migrate, func(des *structs.DesiredUpdates) { des.Migrate++ })
incUpdates(inplaceUpdates, func(des *structs.DesiredUpdates) { des.InPlaceUpdate++ })
incUpdates(destructiveUpdates, func(des *structs.DesiredUpdates) { des.DestructiveUpdate++ })
incUpdates(diff.Disconnecting, func(des *structs.DesiredUpdates) { des.Disconnect++ })
incUpdates(diff.Reconnecting, func(des *structs.DesiredUpdates) { des.Reconnect++ })
return desiredTgs
}

View File

@@ -40,7 +40,8 @@ indicated by exit code 1.
## Options
- `-monitor`: Monitor an outstanding evaluation
- `-verbose`: Show full-length IDs and exact timestamps.
- `-verbose`: Show full-length IDs, exact timestamps, and all reconciler
annotation fields.
- `-json`: Output the evaluation in its JSON format. This format will not
include placed allocations.
- `-t` : Format and display evaluation using a Go template. This format will not
@@ -50,7 +51,17 @@ indicated by exit code 1.
## Examples
Show the status of an evaluation with related evaluations, successful
placements, failed placements, and preemptions.
placements, scheduler annotations, failed placements, and preemptions.
The plan annotations table shows the output of the scheduler's reconciliation
stage, which produces a desired set of changes that later stages of the
scheduler attempt. The [`update.max_parallel`][] field or placement failures may
result in a difference between these numbers and the updates made to the
job. This table will always include the count for allocations to ignore, place,
stop, inplace update, and destructively update. It may also include the count of
canary allocations or allocations that were rescheduled, migrated, preemptted,
reconnected, or disconnected. Any of these counts may overlap so that, for
example, an allocation can be both migrated and stopped.
```shell-session
$ nomad eval status 8f6af533
@@ -73,9 +84,9 @@ Related Evaluations
ID Priority Triggered By Node ID Status Description
fd6f3091 50 queued-allocs <none> pending <none>
Reconciler Annotations
Task Group Ignore Place Stop Migrate InPlace Destructive Canary Preemptions
group 0 3 0 0 0 0 0 1
Plan Annotations
Task Group Ignore Place Stop InPlace Destructive Migrate Canary Preemptions
group 0 3 0 0 0 0 0 1
Preempted Allocations
ID Job ID Node ID Task Group Version Desired Status Created Modified
@@ -109,3 +120,5 @@ $ nomad eval status -monitor 8262bc83
## General options
@include 'general_options.mdx'
[`update.max_parallel`]: /nomad/docs/job-specification/update#max_parallel