mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
scheduler: add disconnect and reschedule info to reconciler output (#26255)
The `DesiredUpdates` struct that we send to the Read Eval API doesn't include information about disconnect/reconnect and rescheduling. Annotate the `DesiredUpdates` with this data, and adjust the `eval status` command to display only those fields that have non-zero values in order to make the output width manageable. Ref: https://hashicorp.atlassian.net/browse/NMD-815
This commit is contained in:
@@ -1596,6 +1596,10 @@ type DesiredUpdates struct {
|
||||
DestructiveUpdate uint64
|
||||
Canary uint64
|
||||
Preemptions uint64
|
||||
Disconnect uint64
|
||||
Reconnect uint64
|
||||
RescheduleNow uint64
|
||||
RescheduleLater uint64
|
||||
}
|
||||
|
||||
type JobDispatchRequest struct {
|
||||
|
||||
@@ -6,6 +6,7 @@ package command
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -37,7 +38,7 @@ Eval Status Options:
|
||||
Monitor an outstanding evaluation
|
||||
|
||||
-verbose
|
||||
Show full-length IDs and exact timestamps.
|
||||
Show full-length IDs, exact timestamps, and all plan annotation fields.
|
||||
|
||||
-json
|
||||
Output the evaluation in its JSON format. This format will not include
|
||||
@@ -261,25 +262,9 @@ func (c *EvalStatusCommand) formatEvalStatus(eval *api.Evaluation, placedAllocs
|
||||
if eval.PlanAnnotations != nil {
|
||||
|
||||
if len(eval.PlanAnnotations.DesiredTGUpdates) > 0 {
|
||||
c.Ui.Output(c.Colorize().Color("\n[bold]Reconciler Annotations[reset]"))
|
||||
annotations := make([]string, len(eval.PlanAnnotations.DesiredTGUpdates)+1)
|
||||
annotations[0] = "Task Group|Ignore|Place|Stop|Migrate|InPlace|Destructive|Canary|Preemptions"
|
||||
i := 1
|
||||
for tg, updates := range eval.PlanAnnotations.DesiredTGUpdates {
|
||||
annotations[i] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d|%d|%d",
|
||||
tg,
|
||||
updates.Ignore,
|
||||
updates.Place,
|
||||
updates.Stop,
|
||||
updates.Migrate,
|
||||
updates.InPlaceUpdate,
|
||||
updates.DestructiveUpdate,
|
||||
updates.Canary,
|
||||
updates.Preemptions,
|
||||
)
|
||||
i++
|
||||
}
|
||||
c.Ui.Output(columnize.SimpleFormat(annotations))
|
||||
c.Ui.Output(c.Colorize().Color("\n[bold]Plan Annotations[reset]"))
|
||||
c.Ui.Output(formatPlanAnnotations(
|
||||
eval.PlanAnnotations.DesiredTGUpdates, verbose))
|
||||
}
|
||||
|
||||
if len(eval.PlanAnnotations.PreemptedAllocs) > 0 {
|
||||
@@ -378,3 +363,62 @@ func formatPreemptedAllocListStubs(stubs []*api.AllocationListStub, uuidLength i
|
||||
}
|
||||
return formatList(allocs)
|
||||
}
|
||||
|
||||
// formatPlanAnnotations produces a table with one row per task group where the
|
||||
// columns are all the changes (ignore, place, stop, etc.) plus all the non-zero
|
||||
// causes of those changes (migrate, canary, reschedule, etc)
|
||||
func formatPlanAnnotations(desiredTGUpdates map[string]*api.DesiredUpdates, verbose bool) string {
|
||||
annotations := make([]string, len(desiredTGUpdates)+1)
|
||||
|
||||
annotations[0] = "Task Group|Ignore|Place|Stop|InPlace|Destructive"
|
||||
optCols := []string{
|
||||
"Migrate", "Canary", "Preemptions",
|
||||
"Reschedule Now", "Reschedule Later", "Disconnect", "Reconnect"}
|
||||
|
||||
byCol := make([][]uint64, len(optCols))
|
||||
for i := range byCol {
|
||||
for j := range len(desiredTGUpdates) + 1 {
|
||||
byCol[i] = make([]uint64, j+1)
|
||||
}
|
||||
}
|
||||
|
||||
i := 1
|
||||
for tg, updates := range desiredTGUpdates {
|
||||
// we always show the first 5 columns
|
||||
annotations[i] = fmt.Sprintf("%s|%d|%d|%d|%d|%d",
|
||||
tg,
|
||||
updates.Ignore,
|
||||
updates.Place,
|
||||
updates.Stop,
|
||||
updates.InPlaceUpdate,
|
||||
updates.DestructiveUpdate,
|
||||
)
|
||||
|
||||
// we record how many we have of the other columns so we can show them
|
||||
// only if populated
|
||||
byCol[0][i] = updates.Migrate
|
||||
byCol[1][i] = updates.Canary
|
||||
byCol[2][i] = updates.Preemptions
|
||||
byCol[3][i] = updates.RescheduleNow
|
||||
byCol[4][i] = updates.RescheduleLater
|
||||
byCol[5][i] = updates.Disconnect
|
||||
byCol[6][i] = updates.Reconnect
|
||||
i++
|
||||
}
|
||||
|
||||
// the remaining columns only show if they're populated or if we're in
|
||||
// verbose mode
|
||||
for i, col := range optCols {
|
||||
for tgIdx := range len(desiredTGUpdates) + 1 {
|
||||
byCol[i][0] += byCol[i][tgIdx]
|
||||
}
|
||||
if verbose || byCol[i][0] > 0 {
|
||||
annotations[0] += "|" + col
|
||||
for tgIdx := 1; tgIdx < len(desiredTGUpdates)+1; tgIdx++ {
|
||||
annotations[tgIdx] += "|" + strconv.FormatUint(byCol[i][tgIdx], 10)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return columnize.SimpleFormat(annotations)
|
||||
}
|
||||
|
||||
@@ -223,6 +223,19 @@ Task Group "web" (failed to place 1 allocation):
|
||||
|
||||
must.StrContains(t, out, `Related Evaluations`)
|
||||
must.StrContains(t, out, `Placed Allocations`)
|
||||
must.StrContains(t, out, `Reconciler Annotations`)
|
||||
must.StrContains(t, out, `Plan Annotations`)
|
||||
must.StrContains(t, out, `Preempted Allocations`)
|
||||
}
|
||||
|
||||
func TestEvalStatus_FormatPlanAnnotations(t *testing.T) {
|
||||
|
||||
updates := map[string]*api.DesiredUpdates{
|
||||
"foo": {Place: 1, Ignore: 2, Canary: 1},
|
||||
"bar": {Place: 1, Stop: 3, Reconnect: 2},
|
||||
}
|
||||
|
||||
out := formatPlanAnnotations(updates, false)
|
||||
must.Eq(t, `Task Group Ignore Place Stop InPlace Destructive Canary Reconnect
|
||||
foo 2 1 0 0 0 1 0
|
||||
bar 0 1 3 0 0 0 2`, out)
|
||||
}
|
||||
|
||||
@@ -13087,11 +13087,15 @@ type DesiredUpdates struct {
|
||||
DestructiveUpdate uint64
|
||||
Canary uint64
|
||||
Preemptions uint64
|
||||
Disconnect uint64
|
||||
Reconnect uint64
|
||||
RescheduleNow uint64
|
||||
RescheduleLater uint64
|
||||
}
|
||||
|
||||
func (d *DesiredUpdates) GoString() string {
|
||||
return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)",
|
||||
d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary)
|
||||
return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d) (reschedule now %d) (reschedule later %d) (disconnect %d) (reconnect %d)",
|
||||
d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary, d.RescheduleNow, d.RescheduleLater, d.Disconnect, d.Reconnect)
|
||||
}
|
||||
|
||||
// msgpackHandle is a shared handle for encoding/decoding of structs
|
||||
|
||||
@@ -231,6 +231,10 @@ func (r *ReconcileResults) Fields() []any {
|
||||
tg+"_migrate", u.Migrate,
|
||||
tg+"_canary", u.Canary,
|
||||
tg+"_preempt", u.Preemptions,
|
||||
tg+"_reschedule_now", u.RescheduleNow,
|
||||
tg+"_reschedule_later", u.RescheduleLater,
|
||||
tg+"_disconnect", u.Disconnect,
|
||||
tg+"_reconnect", u.Reconnect,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -362,8 +366,9 @@ func cancelUnneededDeployments(j *structs.Job, d *structs.Deployment) (*structs.
|
||||
}
|
||||
|
||||
// handleStop marks all allocations to be stopped, handling the lost case.
|
||||
// Returns result structure with desired changes field set to stopped allocations
|
||||
// and an array of stopped allocations.
|
||||
// Returns result structure with desired changes field set to stopped
|
||||
// allocations and an array of stopped allocations. It mutates the Stop fields
|
||||
// on the DesiredUpdates.
|
||||
func (a *AllocReconciler) handleStop(m allocMatrix) (map[string]*structs.DesiredUpdates, []AllocStopResult) {
|
||||
result := make(map[string]*structs.DesiredUpdates)
|
||||
allocsToStop := []AllocStopResult{}
|
||||
@@ -489,6 +494,8 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
|
||||
// logged.
|
||||
if len(reconnect) > 0 {
|
||||
result.ReconnectUpdates = a.computeReconnecting(reconnect)
|
||||
result.DesiredTGUpdates[tg.Name].Reconnect = uint64(len(result.ReconnectUpdates))
|
||||
|
||||
// The rest of the reconnecting allocations is now untainted and will
|
||||
// be further reconciled below.
|
||||
untainted = untainted.union(reconnect)
|
||||
@@ -527,6 +534,8 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
|
||||
|
||||
updates := appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals, rescheduleNow)
|
||||
maps.Copy(result.DisconnectUpdates, updates)
|
||||
result.DesiredTGUpdates[tg.Name].Disconnect = uint64(len(result.DisconnectUpdates))
|
||||
result.DesiredTGUpdates[tg.Name].RescheduleNow = uint64(len(rescheduleNow))
|
||||
}
|
||||
|
||||
// Find delays for any lost allocs that have stop_after_client_disconnect
|
||||
@@ -550,6 +559,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
|
||||
var followups []*structs.Evaluation
|
||||
followups, result.AttributeUpdates = a.createRescheduleLaterEvals(rescheduleLater, all, result.DisconnectUpdates)
|
||||
result.DesiredFollowupEvals[tg.Name] = append(result.DesiredFollowupEvals[tg.Name], followups...)
|
||||
result.DesiredTGUpdates[tg.Name].RescheduleLater = uint64(len(rescheduleLater))
|
||||
}
|
||||
// Create a structure for choosing names. Seed with the taken names
|
||||
// which is the union of untainted, rescheduled, allocs on migrating
|
||||
@@ -728,6 +738,8 @@ func requiresCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, de
|
||||
!canariesPromoted
|
||||
}
|
||||
|
||||
// computeCanaries returns the set of new canaries to place. It mutates the
|
||||
// Canary field on the DesiredUpdates and the DesiredCanaries on the dstate
|
||||
func (a *AllocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState,
|
||||
destructive, canaries allocSet, desiredChanges *structs.DesiredUpdates, nameIndex *AllocNameIndex) []AllocPlaceResult {
|
||||
dstate.DesiredCanaries = tg.Update.Canary
|
||||
@@ -997,6 +1009,8 @@ func (a *AllocReconciler) placeAllocs(deploymentPlaceReady bool, desiredChanges
|
||||
return underProvisionedBy, resultingPlacements, resultingAllocsToStop
|
||||
}
|
||||
|
||||
// computeDestructiveUpdates returns the set of destructive updates. It mutates
|
||||
// the DestructiveUpdate and Ignore fields on the DesiredUpdates counts
|
||||
func (a *AllocReconciler) computeDestructiveUpdates(destructive allocSet, underProvisionedBy int,
|
||||
desiredChanges *structs.DesiredUpdates, tg *structs.TaskGroup) []allocDestructiveResult {
|
||||
|
||||
@@ -1018,6 +1032,8 @@ func (a *AllocReconciler) computeDestructiveUpdates(destructive allocSet, underP
|
||||
return destructiveResult
|
||||
}
|
||||
|
||||
// computeMigrations returns the stops and placements for the allocs marked for
|
||||
// migration. It mutates the Migrate field on the DesiredUpdates counts
|
||||
func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdates, migrate allocSet,
|
||||
tg *structs.TaskGroup, isCanarying bool) ([]AllocStopResult, []AllocPlaceResult) {
|
||||
|
||||
|
||||
@@ -25,7 +25,9 @@ func TestAllocReconciler_PropTest(t *testing.T) {
|
||||
|
||||
// collectExpected returns a convenience map that may hold multiple "states" for
|
||||
// the same alloc (ex. all three of "total" and "terminal" and "failed")
|
||||
collectExpected := func(ar *AllocReconciler) map[string]map[string]int {
|
||||
collectExpected := func(t *rapid.T, ar *AllocReconciler) map[string]map[string]int {
|
||||
t.Helper()
|
||||
|
||||
perTaskGroup := map[string]map[string]int{}
|
||||
for _, tg := range ar.jobState.Job.TaskGroups {
|
||||
perTaskGroup[tg.Name] = map[string]int{"expect_count": tg.Count}
|
||||
@@ -33,7 +35,6 @@ func TestAllocReconciler_PropTest(t *testing.T) {
|
||||
perTaskGroup[tg.Name]["max_canaries"] = tg.Update.Canary
|
||||
}
|
||||
}
|
||||
|
||||
for _, alloc := range ar.jobState.ExistingAllocs {
|
||||
if _, ok := perTaskGroup[alloc.TaskGroup]; !ok {
|
||||
// existing task group doesn't exist in new job
|
||||
@@ -41,20 +42,54 @@ func TestAllocReconciler_PropTest(t *testing.T) {
|
||||
}
|
||||
perTaskGroup[alloc.TaskGroup]["exist_total"]++
|
||||
perTaskGroup[alloc.TaskGroup]["exist_"+alloc.ClientStatus]++
|
||||
perTaskGroup[alloc.TaskGroup]["exist_desired_"+alloc.DesiredStatus]++
|
||||
if alloc.TerminalStatus() {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_terminal"]++
|
||||
} else {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_non_terminal"]++
|
||||
}
|
||||
if alloc.ClientTerminalStatus() {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_client_terminal"]++
|
||||
} else {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_non_client_terminal"]++
|
||||
}
|
||||
if alloc.ServerTerminalStatus() {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_server_terminal"]++
|
||||
} else {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_non_server_terminal"]++
|
||||
}
|
||||
|
||||
if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary {
|
||||
perTaskGroup[alloc.TaskGroup]["exist_canary"]++
|
||||
}
|
||||
}
|
||||
|
||||
// these only assert our categories are reasonable
|
||||
|
||||
for _, counts := range perTaskGroup {
|
||||
must.Eq(t, counts["exist_total"],
|
||||
(counts["exist_pending"] +
|
||||
counts["exist_running"] +
|
||||
counts["exist_complete"] +
|
||||
counts["exist_failed"] +
|
||||
counts["exist_lost"] +
|
||||
counts["exist_unknown"]),
|
||||
must.Sprintf("exist_total doesn't add up: %+v", counts))
|
||||
|
||||
must.Eq(t, counts["exist_client_terminal"],
|
||||
(counts["exist_complete"] +
|
||||
counts["exist_failed"] +
|
||||
counts["exist_lost"]),
|
||||
must.Sprintf("exist_client_terminal doesn't add up: %+v", counts))
|
||||
}
|
||||
|
||||
return perTaskGroup
|
||||
}
|
||||
|
||||
// sharedSafetyProperties asserts safety properties ("something bad never
|
||||
// happens") that apply to all job types that use the cluster reconciler
|
||||
sharedSafetyProperties := func(t *rapid.T, ar *AllocReconciler, results *ReconcileResults, perTaskGroup map[string]map[string]int) {
|
||||
t.Helper()
|
||||
|
||||
// stopped jobs
|
||||
if ar.jobState.Job.Stopped() {
|
||||
@@ -92,55 +127,73 @@ func TestAllocReconciler_PropTest(t *testing.T) {
|
||||
for tgName, counts := range perTaskGroup {
|
||||
tgUpdates := results.DesiredTGUpdates[tgName]
|
||||
|
||||
tprintf := func(msg string) must.Setting {
|
||||
return must.Sprintf(msg+" (%s) %v => %+v", tgName, counts, tgUpdates)
|
||||
}
|
||||
|
||||
// when the job is stopped or scaled to zero we can make stronger
|
||||
// assertions, so split out these checks
|
||||
if counts["expect_count"] == 0 || ar.jobState.Job.Stopped() {
|
||||
|
||||
must.Eq(t, 0, int(tgUpdates.Place),
|
||||
tprintf("no placements on stop or scale-to-zero"))
|
||||
must.Eq(t, 0, int(tgUpdates.Canary),
|
||||
tprintf("no canaries on stop or scale-to-zero"))
|
||||
must.Eq(t, 0, int(tgUpdates.DestructiveUpdate),
|
||||
tprintf("no destructive updates on stop or scale-to-zero"))
|
||||
must.Eq(t, 0, int(tgUpdates.Migrate),
|
||||
tprintf("no migrating on stop or scale-to-zero"))
|
||||
must.Eq(t, 0, int(tgUpdates.RescheduleLater),
|
||||
tprintf("no rescheduling later on stop or scale-to-zero"))
|
||||
must.Eq(t, 0, int(tgUpdates.Preemptions),
|
||||
tprintf("no preemptions on stop or scale-to-zero"))
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
must.LessEq(t, counts["expect_count"], int(tgUpdates.Place),
|
||||
must.Sprintf("group placements should never exceed group count (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("group placements should never exceed group count"))
|
||||
|
||||
must.LessEq(t, counts["max_canaries"], int(tgUpdates.Canary),
|
||||
must.Sprintf("canaries should never exceed expected canaries (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("canaries should never exceed expected canaries"))
|
||||
|
||||
must.LessEq(t, counts["max_canaries"], int(tgUpdates.Canary)+counts["exist_canary"],
|
||||
must.Sprintf("canaries+existing canaries should never exceed expected canaries (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("canaries+existing canaries should never exceed expected canaries"))
|
||||
|
||||
must.LessEq(t, counts["expect_count"], int(tgUpdates.DestructiveUpdate),
|
||||
must.Sprintf("destructive updates should never exceed group count (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("destructive updates should never exceed group count"))
|
||||
|
||||
must.LessEq(t, counts["expect_count"]+counts["max_canaries"],
|
||||
int(tgUpdates.Canary)+int(tgUpdates.Place)+int(tgUpdates.DestructiveUpdate),
|
||||
must.Sprintf("place+canaries+destructive should never exceed group count + expected canaries (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("place+canaries+destructive should never exceed group count + expected canaries"))
|
||||
|
||||
// TODO(tgross): needs per-taskgroup reconnect/disconnect values
|
||||
// must.Eq(t, counts["expect_count"]+int(tgUpdates.Stop)+int(tgUpdates.Disconnected),
|
||||
// int(tgUpdates.Place)+int(tgUpdates.Ignore)+int(tgUpdates.InPlaceUpdate)+int(tgUpdates.DestructiveUpdate)+int(tgUpdates.Reconnected),
|
||||
// must.Sprintf(""),
|
||||
// )
|
||||
must.LessEq(t, counts["exist_non_client_terminal"], int(tgUpdates.Reconnect),
|
||||
tprintf("reconnected should never exceed non-client-terminal"))
|
||||
|
||||
must.LessEq(t, counts["exist_total"], int(tgUpdates.InPlaceUpdate),
|
||||
must.Sprintf("in-place updates should never exceed existing allocs (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("in-place updates should never exceed existing allocs"))
|
||||
|
||||
must.LessEq(t, counts["exist_total"], int(tgUpdates.DestructiveUpdate),
|
||||
must.Sprintf("destructive updates should never exceed existing allocs (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("destructive updates should never exceed existing allocs"))
|
||||
|
||||
must.LessEq(t, counts["exist_total"], int(tgUpdates.Migrate),
|
||||
must.Sprintf("migrations should never exceed existing allocs (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("migrations should never exceed existing allocs"))
|
||||
|
||||
must.LessEq(t, counts["exist_total"], int(tgUpdates.Ignore),
|
||||
must.Sprintf("ignore should never exceed existing allocs (%s): %v",
|
||||
tgName, counts))
|
||||
tprintf("ignore should never exceed existing allocs"))
|
||||
|
||||
must.GreaterEq(t, tgUpdates.Migrate, tgUpdates.Stop,
|
||||
tprintf("migrated allocs should be stopped"))
|
||||
|
||||
must.GreaterEq(t, tgUpdates.RescheduleLater, tgUpdates.Ignore,
|
||||
tprintf("reschedule-later allocs should be ignored"))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
t.Run("batch jobs", rapid.MakeCheck(func(t *rapid.T) {
|
||||
ar := genAllocReconciler(structs.JobTypeBatch, &idGenerator{}).Draw(t, "reconciler")
|
||||
perTaskGroup := collectExpected(ar)
|
||||
perTaskGroup := collectExpected(t, ar)
|
||||
results := ar.Compute()
|
||||
must.NotNil(t, results, must.Sprint("results should never be nil"))
|
||||
|
||||
@@ -149,7 +202,7 @@ func TestAllocReconciler_PropTest(t *testing.T) {
|
||||
|
||||
t.Run("service jobs", rapid.MakeCheck(func(t *rapid.T) {
|
||||
ar := genAllocReconciler(structs.JobTypeService, &idGenerator{}).Draw(t, "reconciler")
|
||||
perTaskGroup := collectExpected(ar)
|
||||
perTaskGroup := collectExpected(t, ar)
|
||||
results := ar.Compute()
|
||||
must.NotNil(t, results, must.Sprint("results should never be nil"))
|
||||
|
||||
|
||||
@@ -820,6 +820,7 @@ func TestReconciler_Inplace_Rollback(t *testing.T) {
|
||||
Stop: 1,
|
||||
InPlaceUpdate: 1,
|
||||
DestructiveUpdate: 1,
|
||||
RescheduleLater: 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1829,10 +1830,11 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) {
|
||||
stop: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Place: 0,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 4,
|
||||
Stop: 0,
|
||||
Place: 0,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 4,
|
||||
Stop: 0,
|
||||
RescheduleLater: 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1925,10 +1927,11 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
|
||||
stop: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Place: 0,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 10,
|
||||
Stop: 0,
|
||||
Place: 0,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 10,
|
||||
Stop: 0,
|
||||
RescheduleLater: 7,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -2115,10 +2118,11 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) {
|
||||
stop: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Place: 1,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 4,
|
||||
Stop: 0,
|
||||
Place: 1,
|
||||
InPlaceUpdate: 0,
|
||||
Ignore: 4,
|
||||
Stop: 0,
|
||||
RescheduleLater: 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -6022,7 +6026,8 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
|
||||
reconnectUpdates: 2,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
"web": {
|
||||
Ignore: 2,
|
||||
Ignore: 2,
|
||||
Reconnect: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -6041,8 +6046,9 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
|
||||
reconnectUpdates: 1,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
"web": {
|
||||
Stop: 1,
|
||||
Ignore: 3,
|
||||
Stop: 1,
|
||||
Ignore: 3,
|
||||
Reconnect: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -6223,8 +6229,10 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
|
||||
disconnectUpdates: 2,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
"web": {
|
||||
Place: 2,
|
||||
Ignore: 3,
|
||||
Place: 2,
|
||||
Ignore: 3,
|
||||
Disconnect: 2,
|
||||
RescheduleNow: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -6519,6 +6527,8 @@ func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
|
||||
Stop: 0,
|
||||
Ignore: 1,
|
||||
InPlaceUpdate: 0,
|
||||
Disconnect: 2,
|
||||
RescheduleNow: 2,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -6668,9 +6678,11 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
|
||||
reconnectUpdates: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
updatedJob.TaskGroups[0].Name: {
|
||||
Place: 3,
|
||||
Canary: 0,
|
||||
Ignore: 6,
|
||||
Place: 3,
|
||||
Canary: 0,
|
||||
Ignore: 6,
|
||||
Disconnect: 3,
|
||||
RescheduleNow: 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -6732,9 +6744,11 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
|
||||
reconnectUpdates: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
updatedJob.TaskGroups[0].Name: {
|
||||
Place: 2,
|
||||
Canary: 0,
|
||||
Ignore: 7,
|
||||
Place: 2,
|
||||
Canary: 0,
|
||||
Ignore: 7,
|
||||
Disconnect: 2,
|
||||
RescheduleNow: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -6805,7 +6819,9 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
|
||||
// the deployment can still progress. We don't include
|
||||
// them in the stop count since DesiredTGUpdates is used
|
||||
// to report deployment progress or final deployment state.
|
||||
Stop: 0,
|
||||
Stop: 0,
|
||||
Disconnect: 2,
|
||||
RescheduleNow: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -69,6 +69,7 @@ func TestNodeReconciler_PropTest(t *testing.T) {
|
||||
// sharedSafetyProperties asserts safety properties ("something bad never
|
||||
// happens") that apply to all job types that use the node reconciler
|
||||
sharedSafetyProperties := func(t *rapid.T, nr *nodeReconcilerInput, results *NodeReconcileResult, perTaskGroup map[string]map[string]int) {
|
||||
t.Helper()
|
||||
|
||||
if !nr.serverSupportsDisconnectedClients {
|
||||
must.Len(t, 0, results.Disconnecting,
|
||||
|
||||
@@ -700,17 +700,7 @@ func desiredUpdates(diff *reconciler.NodeReconcileResult, inplaceUpdates,
|
||||
destructiveUpdates []reconciler.AllocTuple) map[string]*structs.DesiredUpdates {
|
||||
desiredTgs := make(map[string]*structs.DesiredUpdates)
|
||||
|
||||
for _, tuple := range diff.Place {
|
||||
name := tuple.TaskGroup.Name
|
||||
des, ok := desiredTgs[name]
|
||||
if !ok {
|
||||
des = &structs.DesiredUpdates{}
|
||||
desiredTgs[name] = des
|
||||
}
|
||||
|
||||
des.Place++
|
||||
}
|
||||
|
||||
// diff.Stop may have a nil TaskGroup
|
||||
for _, tuple := range diff.Stop {
|
||||
name := tuple.Alloc.TaskGroup
|
||||
des, ok := desiredTgs[name]
|
||||
@@ -722,49 +712,26 @@ func desiredUpdates(diff *reconciler.NodeReconcileResult, inplaceUpdates,
|
||||
des.Stop++
|
||||
}
|
||||
|
||||
for _, tuple := range diff.Ignore {
|
||||
name := tuple.TaskGroup.Name
|
||||
des, ok := desiredTgs[name]
|
||||
if !ok {
|
||||
des = &structs.DesiredUpdates{}
|
||||
desiredTgs[name] = des
|
||||
}
|
||||
incUpdates := func(tuples []reconciler.AllocTuple, fn func(des *structs.DesiredUpdates)) {
|
||||
for _, tuple := range tuples {
|
||||
name := tuple.TaskGroup.Name
|
||||
des, ok := desiredTgs[name]
|
||||
if !ok {
|
||||
des = &structs.DesiredUpdates{}
|
||||
desiredTgs[name] = des
|
||||
}
|
||||
|
||||
des.Ignore++
|
||||
fn(des)
|
||||
}
|
||||
}
|
||||
|
||||
for _, tuple := range diff.Migrate {
|
||||
name := tuple.TaskGroup.Name
|
||||
des, ok := desiredTgs[name]
|
||||
if !ok {
|
||||
des = &structs.DesiredUpdates{}
|
||||
desiredTgs[name] = des
|
||||
}
|
||||
|
||||
des.Migrate++
|
||||
}
|
||||
|
||||
for _, tuple := range inplaceUpdates {
|
||||
name := tuple.TaskGroup.Name
|
||||
des, ok := desiredTgs[name]
|
||||
if !ok {
|
||||
des = &structs.DesiredUpdates{}
|
||||
desiredTgs[name] = des
|
||||
}
|
||||
|
||||
des.InPlaceUpdate++
|
||||
}
|
||||
|
||||
for _, tuple := range destructiveUpdates {
|
||||
name := tuple.TaskGroup.Name
|
||||
des, ok := desiredTgs[name]
|
||||
if !ok {
|
||||
des = &structs.DesiredUpdates{}
|
||||
desiredTgs[name] = des
|
||||
}
|
||||
|
||||
des.DestructiveUpdate++
|
||||
}
|
||||
incUpdates(diff.Place, func(des *structs.DesiredUpdates) { des.Place++ })
|
||||
incUpdates(diff.Ignore, func(des *structs.DesiredUpdates) { des.Ignore++ })
|
||||
incUpdates(diff.Migrate, func(des *structs.DesiredUpdates) { des.Migrate++ })
|
||||
incUpdates(inplaceUpdates, func(des *structs.DesiredUpdates) { des.InPlaceUpdate++ })
|
||||
incUpdates(destructiveUpdates, func(des *structs.DesiredUpdates) { des.DestructiveUpdate++ })
|
||||
incUpdates(diff.Disconnecting, func(des *structs.DesiredUpdates) { des.Disconnect++ })
|
||||
incUpdates(diff.Reconnecting, func(des *structs.DesiredUpdates) { des.Reconnect++ })
|
||||
|
||||
return desiredTgs
|
||||
}
|
||||
|
||||
@@ -40,7 +40,8 @@ indicated by exit code 1.
|
||||
## Options
|
||||
|
||||
- `-monitor`: Monitor an outstanding evaluation
|
||||
- `-verbose`: Show full-length IDs and exact timestamps.
|
||||
- `-verbose`: Show full-length IDs, exact timestamps, and all reconciler
|
||||
annotation fields.
|
||||
- `-json`: Output the evaluation in its JSON format. This format will not
|
||||
include placed allocations.
|
||||
- `-t` : Format and display evaluation using a Go template. This format will not
|
||||
@@ -50,7 +51,17 @@ indicated by exit code 1.
|
||||
## Examples
|
||||
|
||||
Show the status of an evaluation with related evaluations, successful
|
||||
placements, failed placements, and preemptions.
|
||||
placements, scheduler annotations, failed placements, and preemptions.
|
||||
|
||||
The plan annotations table shows the output of the scheduler's reconciliation
|
||||
stage, which produces a desired set of changes that later stages of the
|
||||
scheduler attempt. The [`update.max_parallel`][] field or placement failures may
|
||||
result in a difference between these numbers and the updates made to the
|
||||
job. This table will always include the count for allocations to ignore, place,
|
||||
stop, inplace update, and destructively update. It may also include the count of
|
||||
canary allocations or allocations that were rescheduled, migrated, preemptted,
|
||||
reconnected, or disconnected. Any of these counts may overlap so that, for
|
||||
example, an allocation can be both migrated and stopped.
|
||||
|
||||
```shell-session
|
||||
$ nomad eval status 8f6af533
|
||||
@@ -73,9 +84,9 @@ Related Evaluations
|
||||
ID Priority Triggered By Node ID Status Description
|
||||
fd6f3091 50 queued-allocs <none> pending <none>
|
||||
|
||||
Reconciler Annotations
|
||||
Task Group Ignore Place Stop Migrate InPlace Destructive Canary Preemptions
|
||||
group 0 3 0 0 0 0 0 1
|
||||
Plan Annotations
|
||||
Task Group Ignore Place Stop InPlace Destructive Migrate Canary Preemptions
|
||||
group 0 3 0 0 0 0 0 1
|
||||
|
||||
Preempted Allocations
|
||||
ID Job ID Node ID Task Group Version Desired Status Created Modified
|
||||
@@ -109,3 +120,5 @@ $ nomad eval status -monitor 8262bc83
|
||||
## General options
|
||||
|
||||
@include 'general_options.mdx'
|
||||
|
||||
[`update.max_parallel`]: /nomad/docs/job-specification/update#max_parallel
|
||||
|
||||
Reference in New Issue
Block a user