From eb47d1ca1190d0de5e2a98c10484a11c8ffa5a9e Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 9 Jul 2025 11:31:04 -0400 Subject: [PATCH] scheduler: eliminate dead code in node reconciler (#26236) While working on property testing in #26216, I discovered we had unreachable code in the node reconciler. The `diffSystemAllocsForNode` function receives a set of non-terminal allocations, but then has branches where it assumes the allocations might be terminal. It's trivially provable that these allocs are always live, as the system scheduler splits the set of known allocs into live and terminal sets before passing them into the node reconciler. Eliminate the unreachable code and improve the variable names to make the known state of the allocs more clear in the reconciler code. Ref: https://github.com/hashicorp/nomad/pull/26216 --- scheduler/reconciler/reconcile_node.go | 27 +++++++++----------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/scheduler/reconciler/reconcile_node.go b/scheduler/reconciler/reconcile_node.go index fe57cc3f4..db53a7108 100644 --- a/scheduler/reconciler/reconcile_node.go +++ b/scheduler/reconciler/reconcile_node.go @@ -18,14 +18,14 @@ func Node( readyNodes []*structs.Node, // list of nodes in the ready state notReadyNodes map[string]struct{}, // list of nodes in DC but not ready, e.g. draining taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by node id) - allocs []*structs.Allocation, // non-terminal allocations + live []*structs.Allocation, // non-terminal allocations terminal structs.TerminalByNodeByName, // latest terminal allocations (by node id) serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic ) *NodeReconcileResult { // Build a mapping of nodes to all their allocs. - nodeAllocs := make(map[string][]*structs.Allocation, len(allocs)) - for _, alloc := range allocs { + nodeAllocs := make(map[string][]*structs.Allocation, len(live)) + for _, alloc := range live { nodeAllocs[alloc.NodeID] = append(nodeAllocs[alloc.NodeID], alloc) } @@ -65,7 +65,7 @@ func diffSystemAllocsForNode( notReadyNodes map[string]struct{}, // nodes that are not ready, e.g. draining taintedNodes map[string]*structs.Node, // nodes which are down (by node id) required map[string]*structs.TaskGroup, // set of allocations that must exist - allocs []*structs.Allocation, // non-terminal allocations that exist + liveAllocs []*structs.Allocation, // non-terminal allocations that exist terminal structs.TerminalByNodeByName, // latest terminal allocations (by node, id) serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic ) *NodeReconcileResult { @@ -73,7 +73,7 @@ func diffSystemAllocsForNode( // Scan the existing updates existing := make(map[string]struct{}) // set of alloc names - for _, exist := range allocs { + for _, exist := range liveAllocs { // Index the existing node name := exist.Name existing[name] = struct{}{} @@ -108,7 +108,7 @@ func diffSystemAllocsForNode( } // If we have been marked for migration and aren't terminal, migrate - if !exist.TerminalStatus() && exist.DesiredTransition.ShouldMigrate() { + if exist.DesiredTransition.ShouldMigrate() { result.Migrate = append(result.Migrate, AllocTuple{ Name: name, TaskGroup: tg, @@ -117,16 +117,6 @@ func diffSystemAllocsForNode( continue } - // If we are a sysbatch job and terminal, ignore (or stop?) the alloc - if job.Type == structs.JobTypeSysBatch && exist.TerminalStatus() { - result.Ignore = append(result.Ignore, AllocTuple{ - Name: name, - TaskGroup: tg, - Alloc: exist, - }) - continue - } - // Expired unknown allocs are lost. Expired checks that status is unknown. if supportsDisconnectedClients && expired { result.Lost = append(result.Lost, AllocTuple{ @@ -149,6 +139,7 @@ func diffSystemAllocsForNode( continue } + // note: the node can be both tainted and nil node, nodeIsTainted := taintedNodes[exist.NodeID] // Filter allocs on a node that is now re-connected to reconnecting. @@ -198,7 +189,7 @@ func diffSystemAllocsForNode( continue } - if !exist.TerminalStatus() && (node == nil || node.TerminalStatus()) { + if node == nil || node.TerminalStatus() { result.Lost = append(result.Lost, AllocTuple{ Name: name, TaskGroup: tg, @@ -319,7 +310,7 @@ func materializeSystemTaskGroups(job *structs.Job) map[string]*structs.TaskGroup } for _, tg := range job.TaskGroups { - for i := 0; i < tg.Count; i++ { + for i := range tg.Count { name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i) out[name] = tg }