From f9b95ae8961ced806704c1a9c55fddec8a55cebd Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 2 Oct 2025 16:17:46 +0200 Subject: [PATCH] scheduler: account for infeasible nodes when reconciling system jobs (#26868) Node reconciler never took node feasibility into account. In cases when there were nodes excluded from allocation placement due to constraints not being met, for example, the desired total or desired canary numbers were never updated in the reconciler to account for that. Thus, deployments would never become successful. --- scheduler/reconciler/reconcile_node.go | 28 ++++++++++++++++---------- scheduler/scheduler_system.go | 7 +++++-- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/scheduler/reconciler/reconcile_node.go b/scheduler/reconciler/reconcile_node.go index 3ac267820..2c516474b 100644 --- a/scheduler/reconciler/reconcile_node.go +++ b/scheduler/reconciler/reconcile_node.go @@ -69,13 +69,17 @@ func (nr *NodeReconciler) Compute( compatHadExistingDeployment := nr.DeploymentCurrent != nil result := new(NodeReconcileResult) - deploymentComplete := true + var deploymentComplete bool for nodeID, allocs := range nodeAllocs { diff, deploymentCompleteForNode := nr.computeForNode(job, nodeID, eligibleNodes, notReadyNodes, taintedNodes, canaryNodes[nodeID], canariesPerTG, required, allocs, terminal, serverSupportsDisconnectedClients) - deploymentComplete = deploymentComplete && deploymentCompleteForNode result.Append(diff) + + deploymentComplete = deploymentCompleteForNode + if deploymentComplete { + break + } } // COMPAT(1.14.0) prevent a new deployment from being created in the case @@ -436,9 +440,9 @@ func (nr *NodeReconciler) computeForNode( dstate.AutoPromote = tg.Update.AutoPromote dstate.ProgressDeadline = tg.Update.ProgressDeadline } + dstate.DesiredTotal = len(eligibleNodes) } - dstate.DesiredTotal = len(eligibleNodes) if isCanarying[tg.Name] && !dstate.Promoted { dstate.DesiredCanaries = canariesPerTG[tg.Name] } @@ -504,8 +508,11 @@ func (nr *NodeReconciler) computeForNode( deploymentPlaceReady := !deploymentPaused && !deploymentFailed deploymentComplete = nr.isDeploymentComplete(tg.Name, result, isCanarying[tg.Name]) - // in this case there's nothing to do - if existingDeployment || tg.Update.IsEmpty() || (dstate.DesiredTotal == 0 && dstate.DesiredCanaries == 0) || !deploymentPlaceReady { + // check if perhaps there's nothing else to do for this TG + if existingDeployment || + tg.Update.IsEmpty() || + (dstate.DesiredTotal == 0 && dstate.DesiredCanaries == 0) || + !deploymentPlaceReady { continue } @@ -538,12 +545,11 @@ func (nr *NodeReconciler) createDeployment(job *structs.Job, tg *structs.TaskGro return a.Job.ID == job.ID && a.Job.Version == job.Version && a.Job.CreateIndex == job.CreateIndex } - for _, alloc := range allocs { - if hadRunningCondition(alloc) { - nr.compatHasSameVersionAllocs = true - hadRunning = true - break - } + if slices.ContainsFunc(allocs, func(alloc *structs.Allocation) bool { + return hadRunningCondition(alloc) + }) { + nr.compatHasSameVersionAllocs = true + hadRunning = true } // if there's a terminal allocation it means we're doing a reschedule. diff --git a/scheduler/scheduler_system.go b/scheduler/scheduler_system.go index 27a7700b9..0a157c93c 100644 --- a/scheduler/scheduler_system.go +++ b/scheduler/scheduler_system.go @@ -444,8 +444,11 @@ func (s *SystemScheduler) computePlacements(place []reconciler.AllocTuple, exist // placements based on whether the node meets the constraints if s.planAnnotations != nil && s.planAnnotations.DesiredTGUpdates != nil { - desired := s.planAnnotations.DesiredTGUpdates[tgName] - desired.Place -= 1 + s.planAnnotations.DesiredTGUpdates[tgName].Place -= 1 + } + + if s.plan.Deployment != nil { + s.deployment.TaskGroups[tgName].DesiredTotal -= 1 } // Filtered nodes are not reported to users, just omitted from the job status