scheduler: always set the right deployment status for system jobs that require promotion (#26851)

In cases where system jobs had the same amount of canary allocations deployed as there were eligible nodes, the scheduler would incorrectly mark the deployment as complete, as if auto promotion was set. This edge case uncovered a bug in the setDeploymentStatusAndUpdates method, and since we round up canary nodes, it may not be such an edge case afterall. --------- Co-authored-by: Tim Gross <tgross@hashicorp.com>
2026-01-01 16:05:42 +03:00 · 2025-09-30 09:18:59 +02:00
parent e6a04e06d1
commit eaa0fe0e27
4 changed files with 194 additions and 25 deletions
--- a/e2e/scheduler_system/input/system_canary_v0_100.nomad.hcl
+++ b/e2e/scheduler_system/input/system_canary_v0_100.nomad.hcl
@@ -0,0 +1,46 @@
 # Copyright (c) HashiCorp, Inc.
 # SPDX-License-Identifier: BUSL-1.1
 job "system_job" {
  datacenters = ["dc1", "dc2"]
  type = "system"
  constraint {
    attribute = "${attr.kernel.name}"
    value     = "linux"
  }
  group "system_job_group" {
    update {
      max_parallel     = 1
      min_healthy_time = "1s"
      healthy_deadline = "1m"
      auto_revert      = false
      canary           = 100
    }
    restart {
      attempts = 10
      interval = "1m"
      delay = "2s"
      mode  = "delay"
    }
    task "system_task" {
      driver = "docker"
      config {
        image = "busybox:1"
        command = "/bin/sh"
        args    = ["-c", "sleep 15000"]
      }
      env {
        version = "0"
      }
    }
  }
 }
--- a/e2e/scheduler_system/input/system_canary_v1_100.nomad.hcl
+++ b/e2e/scheduler_system/input/system_canary_v1_100.nomad.hcl
@@ -0,0 +1,46 @@
 # Copyright (c) HashiCorp, Inc.
 # SPDX-License-Identifier: BUSL-1.1
 job "system_job" {
  datacenters = ["dc1", "dc2"]
  type = "system"
  constraint {
    attribute = "${attr.kernel.name}"
    value     = "linux"
  }
  group "system_job_group" {
    update {
      max_parallel     = 1
      min_healthy_time = "1s"
      healthy_deadline = "1m"
      auto_revert      = false
      canary           = 50
    }
    restart {
      attempts = 10
      interval = "1m"
      delay = "2s"
      mode  = "delay"
    }
    task "system_task" {
      driver = "docker"
      config {
        image = "busybox:1"
        command = "/bin/sh"
        args    = ["-c", "sleep 150000"]
      }
      env {
        version = "1"
      }
    }
  }
 }
--- a/e2e/scheduler_system/systemsched_test.go
+++ b/e2e/scheduler_system/systemsched_test.go
@@ -9,21 +9,24 @@ import (
 	"time"
 	"github.com/hashicorp/nomad/api"
 	"github.com/hashicorp/nomad/e2e/v3/cluster3"
 	"github.com/hashicorp/nomad/e2e/v3/jobs3"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/shoenig/test/must"
 )
-func TestSystemScheduler(t *testing.T) {
+// FIXME: these tests are temporarily disabled until a bug in the scheduler that
-	cluster3.Establish(t,
+// fails to account for constraints is fixed.
-		cluster3.Leader(),
+//
-		cluster3.LinuxClients(3),
+// func TestSystemScheduler(t *testing.T) {
-	)
+// 	cluster3.Establish(t,
-
+// 		cluster3.Leader(),
-	t.Run("testJobUpdateOnIneligibleNode", testJobUpdateOnIneligbleNode)
+// 		cluster3.LinuxClients(3),
-	t.Run("testCanaryUpdate", testCanaryUpdate)
+// 	)
-}
+//
 // 	t.Run("testJobUpdateOnIneligibleNode", testJobUpdateOnIneligbleNode)
 // 	t.Run("testCanaryUpdate", testCanaryUpdate)
 // 	t.Run("testCanaryDeploymentToAllEligibleNodes", testCanaryDeploymentToAllEligibleNodes)
 // }
 func testJobUpdateOnIneligbleNode(t *testing.T) {
 	job, cleanup := jobs3.Submit(t,
@@ -203,3 +206,77 @@ func testCanaryUpdate(t *testing.T) {
 	}
 	must.Eq(t, numberOfEligibleNodes, promotedAllocs)
 }
 func testCanaryDeploymentToAllEligibleNodes(t *testing.T) {
 	_, cleanup := jobs3.Submit(t,
 		"./input/system_canary_v0_100.nomad.hcl",
 		jobs3.DisableRandomJobID(),
 		jobs3.Timeout(60*time.Second),
 	)
 	t.Cleanup(cleanup)
 	// Update job
 	job2, cleanup2 := jobs3.Submit(t,
 		"./input/system_canary_v1_100.nomad.hcl",
 		jobs3.DisableRandomJobID(),
 		jobs3.Timeout(60*time.Second),
 		jobs3.Detach(),
 	)
 	t.Cleanup(cleanup2)
 	// how many eligible nodes do we have?
 	nodesApi := job2.NodesApi()
 	nodesList, _, err := nodesApi.List(nil)
 	must.Nil(t, err)
 	must.SliceNotEmpty(t, nodesList)
 	numberOfEligibleNodes := 0
 	for _, n := range nodesList {
 		if n.SchedulingEligibility == api.NodeSchedulingEligible {
 			numberOfEligibleNodes += 1
 		}
 	}
 	// Get updated allocations
 	allocs := job2.Allocs()
 	must.SliceNotEmpty(t, allocs)
 	deploymentsApi := job2.DeploymentsApi()
 	deploymentsList, _, err := deploymentsApi.List(nil)
 	must.NoError(t, err)
 	var deployment *api.Deployment
 	for _, d := range deploymentsList {
 		if d.JobID == job2.JobID() && d.Status == api.DeploymentStatusRunning {
 			deployment = d
 		}
 	}
 	must.NotNil(t, deployment)
 	// wait for the canary allocations to become healthy
 	timeout, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
 	job2.WaitForDeploymentFunc(timeout, deployment.ID, func(d *api.Deployment) bool {
 		for _, tg := range d.TaskGroups { // we only have 1 tg in this job
 			if d.JobVersion == 1 && tg.HealthyAllocs >= tg.DesiredCanaries {
 				return true
 			}
 		}
 		return false
 	})
 	// find allocations from v1 version of the job, they should all be canaries
 	count := 0
 	for _, a := range allocs {
 		if a.JobVersion == 1 {
 			must.True(t, a.DeploymentStatus.Canary)
 			count += 1
 		}
 	}
 	must.Eq(t, numberOfEligibleNodes, count, must.Sprint("expected canaries to be placed on all eligible nodes"))
 	// deployment must not be terminal and needs to have the right status
 	// description set
 	must.Eq(t, structs.DeploymentStatusDescriptionRunningNeedsPromotion, deployment.StatusDescription)
 }
--- a/scheduler/reconciler/reconcile_node.go
+++ b/scheduler/reconciler/reconcile_node.go
@@ -600,15 +600,26 @@ func (nr *NodeReconciler) isDeploymentComplete(groupName string, buckets *NodeRe
 func (nr *NodeReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool, job *structs.Job) []*structs.DeploymentStatusUpdate {
 	statusUpdates := []*structs.DeploymentStatusUpdate{}
-	if nr.DeploymentCurrent != nil {
+	if d := nr.DeploymentCurrent; d != nil {
 		// Deployments that require promotion should have appropriate status set
 		// immediately, no matter their completness.
 		if d.RequiresPromotion() {
 			if d.HasAutoPromote() {
 				d.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion
 			} else {
 				d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
 			}
 			return statusUpdates
 		}
 		// Mark the deployment as complete if possible
 		if deploymentComplete {
 			if job.IsMultiregion() {
 				// the unblocking/successful states come after blocked, so we
 				// need to make sure we don't revert those states
-				if nr.DeploymentCurrent.Status != structs.DeploymentStatusUnblocking &&
+				if d.Status != structs.DeploymentStatusUnblocking &&
-					nr.DeploymentCurrent.Status != structs.DeploymentStatusSuccessful {
+					d.Status != structs.DeploymentStatusSuccessful {
 					statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
 						DeploymentID:      nr.DeploymentCurrent.ID,
 						Status:            structs.DeploymentStatusBlocked,
@@ -625,7 +636,7 @@ func (nr *NodeReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool,
 		}
 		// Mark the deployment as pending since its state is now computed.
-		if nr.DeploymentCurrent.Status == structs.DeploymentStatusInitializing {
+		if d.Status == structs.DeploymentStatusInitializing {
 			statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
 				DeploymentID:      nr.DeploymentCurrent.ID,
 				Status:            structs.DeploymentStatusPending,
@@ -634,17 +645,6 @@ func (nr *NodeReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool,
 		}
 	}
 	// Set the description of a created deployment
 	if d := nr.DeploymentCurrent; d != nil {
 		if d.RequiresPromotion() {
 			if d.HasAutoPromote() {
 				d.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion
 			} else {
 				d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
 			}
 		}
 	}
 	return statusUpdates
 }