From 42b024db4d1c253055e6942321e2a5b44322849c Mon Sep 17 00:00:00 2001
From: James Rasell <jrasell@users.noreply.github.com>
Date: Fri, 13 Jun 2025 15:48:34 +0100
Subject: [PATCH 01/32] net: Remove overcommitted network conditional. (#26053)

The check simply returns false and has done for a number of years,
therefore there is no need to keep it around or the test that
exercises it.
---
 nomad/structs/funcs.go        |  5 ----
 nomad/structs/network.go      | 12 ----------
 nomad/structs/network_test.go | 44 -----------------------------------
 3 files changed, 61 deletions(-)

diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go
index 7d5bebf45..327ee3ff9 100644
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -212,11 +212,6 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
 		}
 	}
 
-	// Check if the network is overcommitted
-	if netIdx.Overcommitted() {
-		return false, "bandwidth exceeded", used, nil
-	}
-
 	// Check devices and host volumes
 	if checkDevices {
 		accounter := NewDeviceAccounter(node)
diff --git a/nomad/structs/network.go b/nomad/structs/network.go
index fa88b943c..7290a213c 100644
--- a/nomad/structs/network.go
+++ b/nomad/structs/network.go
@@ -182,18 +182,6 @@ func (idx *NetworkIndex) Release() {
 	}
 }
 
-// Overcommitted checks if the network is overcommitted
-func (idx *NetworkIndex) Overcommitted() bool {
-	// TODO remove since bandwidth is deprecated
-	/*for device, used := range idx.UsedBandwidth {
-		avail := idx.AvailBandwidth[device]
-		if used > avail {
-			return true
-		}
-	}*/
-	return false
-}
-
 // SetNode is used to initialize a node's network index with available IPs,
 // reserved ports, and other details from a node's configuration and
 // fingerprinting.
diff --git a/nomad/structs/network_test.go b/nomad/structs/network_test.go
index 1084714f1..4570d7ad8 100644
--- a/nomad/structs/network_test.go
+++ b/nomad/structs/network_test.go
@@ -129,50 +129,6 @@ func TestNetworkIndex_Copy(t *testing.T) {
 	require.NotEqual(t, netIdx, netIdxCopy)
 }
 
-func TestNetworkIndex_Overcommitted(t *testing.T) {
-	t.Skip()
-	ci.Parallel(t)
-	idx := NewNetworkIndex()
-
-	// Consume some network
-	reserved := &NetworkResource{
-		Device:        "eth0",
-		IP:            "192.168.0.100",
-		MBits:         505,
-		ReservedPorts: []Port{{Label: "one", Value: 8000}, {Label: "two", Value: 9000}},
-	}
-	collide, reasons := idx.AddReserved(reserved)
-	if collide || len(reasons) != 0 {
-		t.Fatalf("bad")
-	}
-	if !idx.Overcommitted() {
-		t.Fatalf("have no resources")
-	}
-
-	// Add resources
-	n := &Node{
-		NodeResources: &NodeResources{
-			Networks: []*NetworkResource{
-				{
-					Device: "eth0",
-					CIDR:   "192.168.0.100/32",
-					MBits:  1000,
-				},
-			},
-		},
-	}
-	idx.SetNode(n)
-	if idx.Overcommitted() {
-		t.Fatalf("have resources")
-	}
-
-	// Double up our usage
-	idx.AddReserved(reserved)
-	if !idx.Overcommitted() {
-		t.Fatalf("should be overcommitted")
-	}
-}
-
 func TestNetworkIndex_SetNode(t *testing.T) {
 	ci.Parallel(t)
 

From dfa07e10edf89c3fb3be52b48724f73646a43873 Mon Sep 17 00:00:00 2001
From: Chris Roberts <croberts@hashicorp.com>
Date: Fri, 13 Jun 2025 08:28:31 -0700
Subject: [PATCH 02/32] client: fix batch job drain behavior (#26025)

Batch job allocations that are drained from a node will be moved
to an eligible node. However, when no eligible nodes are available
to place the draining allocations, the tasks will end up being
complete and will not be placed when an eligible node becomes
available. This occurs because the drained allocations are
simultaneously stopped on the draining node while attempting to
be placed on an eligible node. The stopping of the allocations on
the draining node result in tasks being killed, but importantly this
kill does not fail the task. The result is tasks reporting as complete
due to their state being dead and not being failed. As such, when an
eligible node becomes available, all tasks will show as complete and
no allocations will need to be placed.

To prevent the behavior described above a check is performed when
the alloc runner kills its tasks. If the allocation's job type is
batch, and the allocation has a desired transition of migrate, the
task will be failed when it is killed. This ensures the task does
not report as complete, and when an eligible node becomes available
the allocations are placed as expected.
---
 .changelog/26025.txt                    |   3 +
 client/allocrunner/alloc_runner.go      |  33 ++++-
 client/allocrunner/alloc_runner_test.go | 154 ++++++++++++++++++++++++
 3 files changed, 184 insertions(+), 6 deletions(-)
 create mode 100644 .changelog/26025.txt

diff --git a/.changelog/26025.txt b/.changelog/26025.txt
new file mode 100644
index 000000000..3496f30d7
--- /dev/null
+++ b/.changelog/26025.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+client: Fixed bug where drained batch jobs would not be rescheduled if no eligible nodes were immediately available
+```
diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 478cff439..5bebf0018 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -729,14 +729,35 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 	// run alloc prekill hooks
 	ar.preKillHooks()
 
+	// generate task event for given task runner
+	taskEventFn := func(tr *taskrunner.TaskRunner) (te *structs.TaskEvent) {
+		te = structs.NewTaskEvent(structs.TaskKilling).
+			SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
+
+		// if the task is not set failed, the task has not finished,
+		// the job type is batch, and the allocation is being migrated
+		// then mark the task as failed. this ensures the task is recreated
+		// if no eligible nodes are immediately available.
+		if !tr.TaskState().Failed &&
+			tr.TaskState().FinishedAt.IsZero() &&
+			ar.alloc.Job.Type == structs.JobTypeBatch &&
+			ar.alloc.DesiredTransition.Migrate != nil &&
+			*ar.alloc.DesiredTransition.Migrate {
+
+			ar.logger.Trace("marking migrating batch job task failed on kill", "task_name", tr.Task().Name)
+			te.SetFailsTask()
+		}
+		return
+	}
+
 	// Kill leader first, synchronously
 	for name, tr := range ar.tasks {
 		if !tr.IsLeader() {
 			continue
 		}
 
-		taskEvent := structs.NewTaskEvent(structs.TaskKilling)
-		taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
+		taskEvent := taskEventFn(tr)
+
 		err := tr.Kill(context.TODO(), taskEvent)
 		if err != nil && err != taskrunner.ErrTaskNotRunning {
 			ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
@@ -758,8 +779,8 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 		wg.Add(1)
 		go func(name string, tr *taskrunner.TaskRunner) {
 			defer wg.Done()
-			taskEvent := structs.NewTaskEvent(structs.TaskKilling)
-			taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
+			taskEvent := taskEventFn(tr)
+
 			err := tr.Kill(context.TODO(), taskEvent)
 			if err != nil && err != taskrunner.ErrTaskNotRunning {
 				ar.logger.Warn("error stopping task", "error", err, "task_name", name)
@@ -782,8 +803,8 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 		wg.Add(1)
 		go func(name string, tr *taskrunner.TaskRunner) {
 			defer wg.Done()
-			taskEvent := structs.NewTaskEvent(structs.TaskKilling)
-			taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
+			taskEvent := taskEventFn(tr)
+
 			err := tr.Kill(context.TODO(), taskEvent)
 			if err != nil && err != taskrunner.ErrTaskNotRunning {
 				ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name)
diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index af9679854..a2101d364 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -1804,6 +1804,160 @@ func TestAllocRunner_HandlesArtifactFailure(t *testing.T) {
 	require.True(t, state.TaskStates["bad"].Failed)
 }
 
+// Test that alloc runner kills tasks in task group when stopping and
+// fails tasks when job is batch job type and migrating
+func TestAllocRunner_Migrate_Batch_KillTG(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
+	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
+	alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0
+
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config["run_for"] = "10s"
+	alloc.AllocatedResources.Tasks[task.Name] = tr
+
+	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
+	task2.Name = "task 2"
+	task2.Driver = "mock_driver"
+	task2.Config["run_for"] = "1ms"
+	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
+	alloc.AllocatedResources.Tasks[task2.Name] = tr
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc)
+	defer cleanup()
+	ar, err := NewAllocRunner(conf)
+	must.NoError(t, err)
+
+	defer destroy(ar)
+	go ar.Run()
+	upd := conf.StateUpdater.(*MockStateUpdater)
+
+	// Wait for running
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
+		}
+		if last.ClientStatus != structs.AllocClientStatusRunning {
+			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
+		}
+		return true, nil
+	}, func(err error) {
+		must.NoError(t, err)
+	})
+
+	// Wait for completed task
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
+		}
+		if last.ClientStatus != structs.AllocClientStatusRunning {
+			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
+		}
+
+		// task should not have finished yet, task2 should be finished
+		if !last.TaskStates[task.Name].FinishedAt.IsZero() {
+			return false, fmt.Errorf("task should not be finished")
+		}
+		if last.TaskStates[task2.Name].FinishedAt.IsZero() {
+			return false, fmt.Errorf("task should be finished")
+		}
+		return true, nil
+	}, func(err error) {
+		must.NoError(t, err)
+	})
+
+	update := ar.Alloc().Copy()
+	migrate := true
+	update.DesiredTransition.Migrate = &migrate
+	update.DesiredStatus = structs.AllocDesiredStatusStop
+	ar.Update(update)
+
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
+		}
+
+		if last.ClientStatus != structs.AllocClientStatusFailed {
+			return false, fmt.Errorf("got client status %q; want %q", last.ClientStatus, structs.AllocClientStatusFailed)
+		}
+
+		// task should be failed since it was killed, task2 should not
+		// be failed since it was already completed
+		if !last.TaskStates[task.Name].Failed {
+			return false, fmt.Errorf("task should be failed")
+		}
+		if last.TaskStates[task2.Name].Failed {
+			return false, fmt.Errorf("task should not be failed")
+		}
+		return true, nil
+	}, func(err error) {
+		must.NoError(t, err)
+	})
+}
+
+// Test that alloc runner kills tasks in task group when stopping and
+// does not fail tasks when job is batch job type and not migrating
+func TestAllocRunner_Batch_KillTG(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
+	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
+	alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0
+
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config["run_for"] = "10s"
+	alloc.AllocatedResources.Tasks[task.Name] = tr
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc)
+	defer cleanup()
+	ar, err := NewAllocRunner(conf)
+	must.NoError(t, err)
+
+	defer destroy(ar)
+	go ar.Run()
+	upd := conf.StateUpdater.(*MockStateUpdater)
+
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
+		}
+		if last.ClientStatus != structs.AllocClientStatusRunning {
+			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
+		}
+		return true, nil
+	}, func(err error) {
+		must.NoError(t, err)
+	})
+
+	update := ar.Alloc().Copy()
+	update.DesiredStatus = structs.AllocDesiredStatusStop
+	ar.Update(update)
+
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
+		}
+
+		if last.ClientStatus != structs.AllocClientStatusComplete {
+			return false, fmt.Errorf("got client status %q; want %q", last.ClientStatus, structs.AllocClientStatusComplete)
+		}
+
+		return true, nil
+	}, func(err error) {
+		must.NoError(t, err)
+	})
+}
+
 // Test that alloc runner kills tasks in task group when another task fails
 func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
 	ci.Parallel(t)

From fedd042e699f73c3f168fcdebcd547d9dfb8055d Mon Sep 17 00:00:00 2001
From: Chris Roberts <croberts@hashicorp.com>
Date: Fri, 13 Jun 2025 09:23:27 -0700
Subject: [PATCH 03/32] test: update test timeout from 20m to 25m (#26056)

Tests running in CI are starting to bump up to this timeout forcing
re-runs. Adding an additional five minutes to the timeout to help
prevent this from occurring.
---
 GNUmakefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GNUmakefile b/GNUmakefile
index 37a8425b6..d0ad59105 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -296,7 +296,7 @@ test-nomad: # dev ## Run Nomad unit tests
 	@echo "==> with packages $(GOTEST_PKGS)"
 	gotestsum --format=testname --rerun-fails=3 --packages="$(GOTEST_PKGS)" -- \
 		-cover \
-		-timeout=20m \
+		-timeout=25m \
 		-count=1 \
 		-tags "$(GO_TAGS)" \
 		$(GOTEST_PKGS)
@@ -306,7 +306,7 @@ test-nomad-module: dev ## Run Nomad unit tests on sub-module
 	@echo "==> Running Nomad unit tests on sub-module $(GOTEST_MOD)"
 	cd $(GOTEST_MOD); gotestsum --format=testname --rerun-fails=3 --packages=./... -- \
 		-cover \
-		-timeout=20m \
+		-timeout=25m \
 		-count=1 \
 		-race \
 		-tags "$(GO_TAGS)" \
@@ -441,7 +441,7 @@ test: ## Use this target as a smoke test
 	@echo "==> with packages: $(GOTEST_PKGS)"
 	gotestsum --format=testname --packages="$(GOTEST_PKGS)" -- \
 		-cover \
-		-timeout=20m \
+		-timeout=25m \
 		-count=1 \
 		-tags "$(GO_TAGS)" \
 		$(GOTEST_PKGS)

From 26004c54076ef4bb6e75711f5fd39287d44749d6 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Fri, 13 Jun 2025 13:50:54 -0400
Subject: [PATCH 04/32] vault: set renew increment to lease duration (#26041)

When we renew Vault tokens, we use the lease duration to determine how often to
renew. But we also set an `increment` value which is never updated from the
initial 30s. For periodic tokens this is not a problem because the `increment`
field is ignored on renewal. But for non-periodic tokens this prevents the token
TTL from being properly incremented. This behavior has been in place since the
initial Vault client implementation in #1606 but before the switch to workload
identity most (all?) tokens being created were periodic tokens so this was never
detected.

Fix this bug by updating the request's `increment` field to the lease duration
on each renewal.

Also switch out a `time.After` call in backoff of the derive token caller with a
safe timer so that we don't have to spawn a new goroutine per loop, and have
tighter control over when that's GC'd.

Ref: https://github.com/hashicorp/nomad/pull/1606
Ref: https://github.com/hashicorp/nomad/issues/25812
---
 .changelog/26041.txt                          |  3 ++
 .../taskrunner/task_runner_linux_test.go      |  4 +-
 .../taskrunner/task_runner_test.go            | 22 +++++-----
 client/allocrunner/taskrunner/vault_hook.go   | 37 ++++++++++-------
 .../allocrunner/taskrunner/vault_hook_test.go | 16 ++++----
 client/vaultclient/vaultclient.go             | 20 +++++-----
 client/vaultclient/vaultclient_test.go        | 40 +++++++++++++++----
 client/vaultclient/vaultclient_testing.go     | 12 +++---
 8 files changed, 97 insertions(+), 57 deletions(-)
 create mode 100644 .changelog/26041.txt

diff --git a/.changelog/26041.txt b/.changelog/26041.txt
new file mode 100644
index 000000000..7e5593595
--- /dev/null
+++ b/.changelog/26041.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+vault: Fixed a bug where non-periodic tokens would not have their TTL incremented to the lease duration
+```
diff --git a/client/allocrunner/taskrunner/task_runner_linux_test.go b/client/allocrunner/taskrunner/task_runner_linux_test.go
index 67452245e..e773393e6 100644
--- a/client/allocrunner/taskrunner/task_runner_linux_test.go
+++ b/client/allocrunner/taskrunner/task_runner_linux_test.go
@@ -35,8 +35,8 @@ func TestTaskRunner_DisableFileForVaultToken_UpgradePath(t *testing.T) {
 
 	// Setup a test Vault client.
 	token := "1234"
-	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, error) {
-		return token, true, nil
+	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, int, error) {
+		return token, true, 30, nil
 	}
 	vc, err := vaultclient.NewMockVaultClient(structs.VaultDefaultCluster)
 	must.NoError(t, err)
diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go
index 89c1dd914..5f103107c 100644
--- a/client/allocrunner/taskrunner/task_runner_test.go
+++ b/client/allocrunner/taskrunner/task_runner_test.go
@@ -1462,9 +1462,9 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) {
 	// Control when we get a Vault token
 	token := "1234"
 	waitCh := make(chan struct{})
-	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, error) {
+	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, int, error) {
 		<-waitCh
-		return token, true, nil
+		return token, true, 30, nil
 	}
 
 	vc, err := vaultclient.NewMockVaultClient(structs.VaultDefaultCluster)
@@ -1571,8 +1571,8 @@ func TestTaskRunner_DisableFileForVaultToken(t *testing.T) {
 
 	// Setup a test Vault client
 	token := "1234"
-	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, error) {
-		return token, true, nil
+	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, int, error) {
+		return token, true, 30, nil
 	}
 	vc, err := vaultclient.NewMockVaultClient(structs.VaultDefaultCluster)
 	must.NoError(t, err)
@@ -1639,13 +1639,13 @@ func TestTaskRunner_DeriveToken_Retry(t *testing.T) {
 	// Fail on the first attempt to derive a vault token
 	token := "1234"
 	count := 0
-	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, error) {
+	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, int, error) {
 		if count > 0 {
-			return token, true, nil
+			return token, true, 30, nil
 		}
 
 		count++
-		return "", false, structs.NewRecoverableError(fmt.Errorf("want a retry"), true)
+		return "", false, 0, structs.NewRecoverableError(fmt.Errorf("want a retry"), true)
 	}
 	vc, err := vaultclient.NewMockVaultClient(structs.VaultDefaultCluster)
 	must.NoError(t, err)
@@ -1741,8 +1741,8 @@ func TestTaskRunner_DeriveToken_Unrecoverable(t *testing.T) {
 	must.NoError(t, err)
 
 	vc.(*vaultclient.MockVaultClient).SetDeriveTokenWithJWTFn(
-		func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, error) {
-			return "", false, errors.New("unrecoverable")
+		func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, int, error) {
+			return "", false, 0, errors.New("unrecoverable")
 		},
 	)
 
@@ -2076,9 +2076,9 @@ func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) {
 	// Control when we get a Vault token
 	waitCh := make(chan struct{}, 1)
 	defer close(waitCh)
-	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, error) {
+	handler := func(ctx context.Context, req vaultclient.JWTLoginRequest) (string, bool, int, error) {
 		<-waitCh
-		return "1234", true, nil
+		return "1234", true, 30, nil
 	}
 	vc, err := vaultclient.NewMockVaultClient(structs.VaultDefaultCluster)
 	must.NoError(t, err)
diff --git a/client/allocrunner/taskrunner/vault_hook.go b/client/allocrunner/taskrunner/vault_hook.go
index 44764e12c..3a03f178f 100644
--- a/client/allocrunner/taskrunner/vault_hook.go
+++ b/client/allocrunner/taskrunner/vault_hook.go
@@ -238,6 +238,7 @@ func (h *vaultHook) run(token string) {
 	// updatedToken lets us store state between loops. If true, a new token
 	// has been retrieved and we need to apply the Vault change mode
 	var updatedToken bool
+	leaseDuration := 30
 
 OUTER:
 	for {
@@ -255,7 +256,7 @@ OUTER:
 		if token == "" {
 			// Get a token
 			var exit bool
-			token, exit = h.deriveVaultToken()
+			token, leaseDuration, exit = h.deriveVaultToken()
 			if exit {
 				// Exit the manager
 				return
@@ -289,7 +290,10 @@ OUTER:
 		//
 		// If Vault is having availability issues or is overloaded, a large
 		// number of initial token renews can exacerbate the problem.
-		renewCh, err := h.client.RenewToken(token, 30)
+		if leaseDuration == 0 {
+			leaseDuration = 30
+		}
+		renewCh, err := h.client.RenewToken(token, leaseDuration)
 
 		// An error returned means the token is not being renewed
 		if err != nil {
@@ -358,13 +362,17 @@ OUTER:
 
 // deriveVaultToken derives the Vault token using exponential backoffs. It
 // returns the Vault token and whether the manager should exit.
-func (h *vaultHook) deriveVaultToken() (string, bool) {
+func (h *vaultHook) deriveVaultToken() (string, int, bool) {
 	var attempts uint64
 	var backoff time.Duration
+
+	timer, stopTimer := helper.NewSafeTimer(0)
+	defer stopTimer()
+
 	for {
-		token, err := h.deriveVaultTokenJWT()
+		token, lease, err := h.deriveVaultTokenJWT()
 		if err == nil {
-			return token, false
+			return token, lease, false
 		}
 
 		// Check if we can't recover from the error
@@ -374,11 +382,12 @@ func (h *vaultHook) deriveVaultToken() (string, bool) {
 				structs.NewTaskEvent(structs.TaskKilling).
 					SetFailsTask().
 					SetDisplayMessage(fmt.Sprintf("Vault: failed to derive vault token: %v", err)))
-			return "", true
+			return "", 0, true
 		}
 
 		// Handle the retry case
 		backoff = helper.Backoff(vaultBackoffBaseline, vaultBackoffLimit, attempts)
+		timer.Reset(backoff)
 		attempts++
 
 		h.logger.Error("failed to derive Vault token", "error", err, "recoverable", true, "backoff", backoff)
@@ -386,14 +395,14 @@ func (h *vaultHook) deriveVaultToken() (string, bool) {
 		// Wait till retrying
 		select {
 		case <-h.ctx.Done():
-			return "", true
-		case <-time.After(backoff):
+			return "", 0, true
+		case <-timer.C:
 		}
 	}
 }
 
 // deriveVaultTokenJWT returns a Vault ACL token using JWT auth login.
-func (h *vaultHook) deriveVaultTokenJWT() (string, error) {
+func (h *vaultHook) deriveVaultTokenJWT() (string, int, error) {
 	// Retrieve signed identity.
 	signed, err := h.widmgr.Get(structs.WIHandle{
 		IdentityName:       h.widName,
@@ -401,13 +410,13 @@ func (h *vaultHook) deriveVaultTokenJWT() (string, error) {
 		WorkloadType:       structs.WorkloadTypeTask,
 	})
 	if err != nil {
-		return "", structs.NewRecoverableError(
+		return "", 0, structs.NewRecoverableError(
 			fmt.Errorf("failed to retrieve signed workload identity: %w", err),
 			true,
 		)
 	}
 	if signed == nil {
-		return "", structs.NewRecoverableError(
+		return "", 0, structs.NewRecoverableError(
 			errors.New("no signed workload identity available"),
 			false,
 		)
@@ -419,13 +428,13 @@ func (h *vaultHook) deriveVaultTokenJWT() (string, error) {
 	}
 
 	// Derive Vault token with signed identity.
-	token, renewable, err := h.client.DeriveTokenWithJWT(h.ctx, vaultclient.JWTLoginRequest{
+	token, renewable, leaseDuration, err := h.client.DeriveTokenWithJWT(h.ctx, vaultclient.JWTLoginRequest{
 		JWT:       signed.JWT,
 		Role:      role,
 		Namespace: h.vaultBlock.Namespace,
 	})
 	if err != nil {
-		return "", structs.WrapRecoverable(
+		return "", 0, structs.WrapRecoverable(
 			fmt.Sprintf("failed to derive Vault token for identity %s: %v", h.widName, err),
 			err,
 		)
@@ -437,7 +446,7 @@ func (h *vaultHook) deriveVaultTokenJWT() (string, error) {
 		h.allowTokenExpiration = true
 	}
 
-	return token, nil
+	return token, leaseDuration, nil
 }
 
 // writeToken writes the given token to disk
diff --git a/client/allocrunner/taskrunner/vault_hook_test.go b/client/allocrunner/taskrunner/vault_hook_test.go
index 7e4be5e40..00d825a42 100644
--- a/client/allocrunner/taskrunner/vault_hook_test.go
+++ b/client/allocrunner/taskrunner/vault_hook_test.go
@@ -460,10 +460,10 @@ func TestTaskRunner_VaultHook_deriveError(t *testing.T) {
 
 		// Set unrecoverable error.
 		mockVaultClient.SetDeriveTokenWithJWTFn(
-			func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, error) {
+			func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, int, error) {
 				// Cancel the context to simulate the task being killed.
 				cancel()
-				return "", false, structs.NewRecoverableError(errors.New("unrecoverable test error"), false)
+				return "", false, 0, structs.NewRecoverableError(errors.New("unrecoverable test error"), false)
 			})
 
 		err := hook.Prestart(ctx, req, &resp)
@@ -509,16 +509,16 @@ func TestTaskRunner_VaultHook_deriveError(t *testing.T) {
 
 		// Set recoverable error.
 		mockVaultClient.SetDeriveTokenWithJWTFn(
-			func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, error) {
-				return "", false, structs.NewRecoverableError(errors.New("recoverable test error"), true)
+			func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, int, error) {
+				return "", false, 0, structs.NewRecoverableError(errors.New("recoverable test error"), true)
 			})
 
 		go func() {
 			// Wait a bit for the first error then fix token renewal.
 			time.Sleep(time.Second)
 			mockVaultClient.SetDeriveTokenWithJWTFn(
-				func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, error) {
-					return "secret", true, nil
+				func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, int, error) {
+					return "secret", true, 30, nil
 				})
 
 		}()
@@ -555,8 +555,8 @@ func TestTaskRunner_VaultHook_deriveError(t *testing.T) {
 
 		// Derive predictable token and fail renew request.
 		mockVaultClient.SetDeriveTokenWithJWTFn(
-			func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, error) {
-				return "secret", true, nil
+			func(_ context.Context, _ vaultclient.JWTLoginRequest) (string, bool, int, error) {
+				return "secret", true, 30, nil
 			})
 		mockVaultClient.SetRenewTokenError("secret", errors.New("test error"))
 
diff --git a/client/vaultclient/vaultclient.go b/client/vaultclient/vaultclient.go
index 88a107bef..a1afe22de 100644
--- a/client/vaultclient/vaultclient.go
+++ b/client/vaultclient/vaultclient.go
@@ -50,8 +50,9 @@ type VaultClient interface {
 	Stop()
 
 	// DeriveTokenWithJWT returns a Vault ACL token using the JWT login
-	// endpoint, along with whether or not the token is renewable.
-	DeriveTokenWithJWT(context.Context, JWTLoginRequest) (string, bool, error)
+	// endpoint, along with whether or not the token is renewable and its lease
+	// duration.
+	DeriveTokenWithJWT(context.Context, JWTLoginRequest) (string, bool, int, error)
 
 	// RenewToken renews a token with the given increment and adds it to
 	// the min-heap for periodic renewal.
@@ -237,12 +238,12 @@ func (c *vaultClient) unlockAndUnset() {
 }
 
 // DeriveTokenWithJWT returns a Vault ACL token using the JWT login endpoint.
-func (c *vaultClient) DeriveTokenWithJWT(ctx context.Context, req JWTLoginRequest) (string, bool, error) {
+func (c *vaultClient) DeriveTokenWithJWT(ctx context.Context, req JWTLoginRequest) (string, bool, int, error) {
 	if !c.config.IsEnabled() {
-		return "", false, fmt.Errorf("vault client not enabled")
+		return "", false, 0, fmt.Errorf("vault client not enabled")
 	}
 	if !c.isRunning() {
-		return "", false, fmt.Errorf("vault client is not running")
+		return "", false, 0, fmt.Errorf("vault client is not running")
 	}
 
 	c.lock.Lock()
@@ -263,20 +264,20 @@ func (c *vaultClient) DeriveTokenWithJWT(ctx context.Context, req JWTLoginReques
 		},
 	)
 	if err != nil {
-		return "", false, fmt.Errorf("failed to login with JWT: %v", err)
+		return "", false, 0, fmt.Errorf("failed to login with JWT: %v", err)
 	}
 	if s == nil {
-		return "", false, errors.New("JWT login returned an empty secret")
+		return "", false, 0, errors.New("JWT login returned an empty secret")
 	}
 	if s.Auth == nil {
-		return "", false, errors.New("JWT login did not return a token")
+		return "", false, 0, errors.New("JWT login did not return a token")
 	}
 
 	for _, w := range s.Warnings {
 		c.logger.Warn("JWT login warning", "warning", w)
 	}
 
-	return s.Auth.ClientToken, s.Auth.Renewable, nil
+	return s.Auth.ClientToken, s.Auth.Renewable, s.Auth.LeaseDuration, nil
 }
 
 // RenewToken renews the supplied token for a given duration (in seconds) and
@@ -368,6 +369,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
 		} else {
 			// Don't set this if renewal fails
 			leaseDuration = renewResp.Auth.LeaseDuration
+			req.increment = leaseDuration
 		}
 
 		// Reset the token in the API client before returning
diff --git a/client/vaultclient/vaultclient_test.go b/client/vaultclient/vaultclient_test.go
index 2b222f608..1dcfe8ac9 100644
--- a/client/vaultclient/vaultclient_test.go
+++ b/client/vaultclient/vaultclient_test.go
@@ -9,6 +9,7 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
+	"io"
 	"net/http"
 	"net/http/httptest"
 	"testing"
@@ -218,13 +219,14 @@ func TestVaultClient_DeriveTokenWithJWT(t *testing.T) {
 
 	// Derive Vault token using signed JWT.
 	jwtStr := signedWIDs[0].JWT
-	token, renewable, err := c.DeriveTokenWithJWT(context.Background(), JWTLoginRequest{
+	token, renewable, leaseDuration, err := c.DeriveTokenWithJWT(context.Background(), JWTLoginRequest{
 		JWT:       jwtStr,
 		Namespace: "default",
 	})
 	must.NoError(t, err)
 	must.NotEq(t, "", token)
 	must.True(t, renewable)
+	must.Eq(t, 72*60*60, leaseDuration) // token_period from role
 
 	// Verify token has expected properties.
 	v.Client.SetToken(token)
@@ -259,7 +261,7 @@ func TestVaultClient_DeriveTokenWithJWT(t *testing.T) {
 	must.Eq(t, []any{"deny"}, (s.Data[pathDenied]).([]any))
 
 	// Derive Vault token with non-existing role.
-	token, _, err = c.DeriveTokenWithJWT(context.Background(), JWTLoginRequest{
+	token, _, _, err = c.DeriveTokenWithJWT(context.Background(), JWTLoginRequest{
 		JWT:       jwtStr,
 		Role:      "test",
 		Namespace: "default",
@@ -448,8 +450,14 @@ func TestVaultClient_SetUserAgent(t *testing.T) {
 func TestVaultClient_RenewalConcurrent(t *testing.T) {
 	ci.Parallel(t)
 
+	// collects renewal requests that the mock Vault API gets
+	requestCh := make(chan string, 10)
+
 	// Create test server to mock the Vault API.
 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, _ := io.ReadAll(r.Body)
+		requestCh <- string(b)
+
 		resp := vaultapi.Secret{
 			RequestID: uuid.Generate(),
 			LeaseID:   uuid.Generate(),
@@ -458,7 +466,7 @@ func TestVaultClient_RenewalConcurrent(t *testing.T) {
 			Auth: &vaultapi.SecretAuth{
 				ClientToken:   uuid.Generate(),
 				Accessor:      uuid.Generate(),
-				LeaseDuration: 300,
+				LeaseDuration: 1, // force a fast renewal
 			},
 		}
 
@@ -482,9 +490,9 @@ func TestVaultClient_RenewalConcurrent(t *testing.T) {
 	vc.Start()
 
 	// Renew token multiple times in parallel.
-	requests := 100
+	expectedRenewals := 100
 	resultCh := make(chan any)
-	for i := 0; i < requests; i++ {
+	for range expectedRenewals {
 		go func() {
 			_, err := vc.RenewToken("token", 30)
 			resultCh <- err
@@ -494,12 +502,28 @@ func TestVaultClient_RenewalConcurrent(t *testing.T) {
 	// Collect results with timeout.
 	timer, stop := helper.NewSafeTimer(3 * time.Second)
 	defer stop()
-	for i := 0; i < requests; i++ {
+
+	sawInitial := 0
+	sawRenew := 0
+	for {
 		select {
+		case got := <-requestCh:
+			switch got {
+			case `{"increment":1}`:
+				sawRenew++
+			case `{"increment":30}`:
+				sawInitial++
+			default:
+				t.Fatalf("unexpected request body: %q", got)
+			}
+			if sawInitial == expectedRenewals && sawRenew >= expectedRenewals {
+				return
+			}
 		case got := <-resultCh:
 			must.Nil(t, got, must.Sprintf("token renewal error: %v", got))
 		case <-timer.C:
-			t.Fatal("timeout waiting for token renewal")
+			t.Fatalf("timeout waiting for expected token renewals (initial: %d renewed: %d)",
+				sawInitial, sawRenew)
 		}
 	}
 }
@@ -524,7 +548,7 @@ func TestVaultClient_NamespaceReset(t *testing.T) {
 		must.NoError(t, err)
 		vc.Start()
 
-		_, _, err = vc.DeriveTokenWithJWT(context.Background(), JWTLoginRequest{
+		_, _, _, err = vc.DeriveTokenWithJWT(context.Background(), JWTLoginRequest{
 			JWT:       "bogus",
 			Namespace: "bar",
 		})
diff --git a/client/vaultclient/vaultclient_testing.go b/client/vaultclient/vaultclient_testing.go
index 2516ac40d..65d91805a 100644
--- a/client/vaultclient/vaultclient_testing.go
+++ b/client/vaultclient/vaultclient_testing.go
@@ -35,20 +35,22 @@ type MockVaultClient struct {
 
 	// deriveTokenWithJWTFn allows the caller to control the DeriveTokenWithJWT
 	// function.
-	deriveTokenWithJWTFn func(context.Context, JWTLoginRequest) (string, bool, error)
+	deriveTokenWithJWTFn func(context.Context, JWTLoginRequest) (string, bool, int, error)
 
 	// renewable determines if the tokens returned should be marked as renewable
 	renewable bool
 
+	duration int
+
 	mu sync.Mutex
 }
 
 // NewMockVaultClient returns a MockVaultClient for testing
 func NewMockVaultClient(_ string) (VaultClient, error) {
-	return &MockVaultClient{renewable: true}, nil
+	return &MockVaultClient{renewable: true, duration: 30}, nil
 }
 
-func (vc *MockVaultClient) DeriveTokenWithJWT(ctx context.Context, req JWTLoginRequest) (string, bool, error) {
+func (vc *MockVaultClient) DeriveTokenWithJWT(ctx context.Context, req JWTLoginRequest) (string, bool, int, error) {
 	vc.mu.Lock()
 	defer vc.mu.Unlock()
 
@@ -65,7 +67,7 @@ func (vc *MockVaultClient) DeriveTokenWithJWT(ctx context.Context, req JWTLoginR
 		token = fmt.Sprintf("%s-%s", token, req.Role)
 	}
 	vc.jwtTokens[req.JWT] = token
-	return token, vc.renewable, nil
+	return token, vc.renewable, vc.duration, nil
 }
 
 func (vc *MockVaultClient) SetDeriveTokenError(allocID string, tasks []string, err error) {
@@ -161,7 +163,7 @@ func (vc *MockVaultClient) RenewTokenErrCh(token string) chan error {
 }
 
 // SetDeriveTokenWithJWTFn sets the function used to derive tokens using JWT.
-func (vc *MockVaultClient) SetDeriveTokenWithJWTFn(f func(context.Context, JWTLoginRequest) (string, bool, error)) {
+func (vc *MockVaultClient) SetDeriveTokenWithJWTFn(f func(context.Context, JWTLoginRequest) (string, bool, int, error)) {
 	vc.mu.Lock()
 	defer vc.mu.Unlock()
 	vc.deriveTokenWithJWTFn = f

From d6800c41c1f52f581a7169b02ec61fcb17713fc5 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Mon, 16 Jun 2025 12:12:15 -0400
Subject: [PATCH 05/32] E2E: include Windows 2022 host in test targets (#26003)

Some time ago the Windows host we were using as a Nomad client agent test target
started failing to allow ssh connections. The underlying problem appears to be
with sysprep but I wasn't able to debug the exact cause as it's not an area I
have a lot of expertise in.

Swap out the deprecated Windows 2016 host for a Windows 2022 host. This will use
a base image provided by Amazon and then we'll use a userdata script to
bootstrap ssh and some target directories for Terraform to upload files to. The
more modern Windows will let us drop some of extra powershell scripts we were
using as well.

Fixes: https://hashicorp.atlassian.net/browse/NMD-151
Fixes: https://github.com/hashicorp/nomad-e2e/issues/125
---
 e2e/artifact/artifact_test.go                 |   2 -
 e2e/metrics/metrics_test.go                   |  11 +-
 e2e/terraform/Makefile                        |   2 +-
 e2e/terraform/README.md                       |  10 +-
 e2e/terraform/main.tf                         |   4 +-
 e2e/terraform/packer/README.md                |   7 +-
 .../packer/windows-2016-amd64.pkr.hcl         |  64 -------
 .../packer/windows-2016-amd64/README.md       |  20 --
 .../disable-windows-updates.ps1               |  33 ----
 .../packer/windows-2016-amd64/fix-tls.ps1     | 150 ---------------
 .../windows-2016-amd64/install-consul.ps1     |  41 ----
 .../windows-2016-amd64/install-nomad.ps1      |  49 -----
 .../windows-2016-amd64/install-nuget.ps1      |  25 ---
 .../packer/windows-2016-amd64/userdata.ps1    | 137 --------------
 e2e/terraform/provision-infra/compute.tf      |  33 ++--
 e2e/terraform/provision-infra/nomad.tf        |  12 +-
 e2e/terraform/provision-infra/outputs.tf      |   6 +-
 .../provision-nomad/install-windows.tf        |   8 +-
 .../provision-infra/userdata/windows-2016.ps1 |  29 ---
 .../provision-infra/userdata/windows-2022.ps1 | 179 ++++++++++++++++++
 e2e/terraform/provision-infra/variables.tf    |   8 +-
 e2e/terraform/terraform.tfvars                |   2 +-
 e2e/terraform/variables.tf                    |   6 +-
 23 files changed, 232 insertions(+), 606 deletions(-)
 delete mode 100644 e2e/terraform/packer/windows-2016-amd64.pkr.hcl
 delete mode 100644 e2e/terraform/packer/windows-2016-amd64/README.md
 delete mode 100755 e2e/terraform/packer/windows-2016-amd64/disable-windows-updates.ps1
 delete mode 100755 e2e/terraform/packer/windows-2016-amd64/fix-tls.ps1
 delete mode 100755 e2e/terraform/packer/windows-2016-amd64/install-consul.ps1
 delete mode 100755 e2e/terraform/packer/windows-2016-amd64/install-nomad.ps1
 delete mode 100755 e2e/terraform/packer/windows-2016-amd64/install-nuget.ps1
 delete mode 100755 e2e/terraform/packer/windows-2016-amd64/userdata.ps1
 delete mode 100755 e2e/terraform/provision-infra/userdata/windows-2016.ps1
 create mode 100755 e2e/terraform/provision-infra/userdata/windows-2022.ps1

diff --git a/e2e/artifact/artifact_test.go b/e2e/artifact/artifact_test.go
index 0b0c00083..f1d09fb9c 100644
--- a/e2e/artifact/artifact_test.go
+++ b/e2e/artifact/artifact_test.go
@@ -46,8 +46,6 @@ func artifactCheckLogContents(t *testing.T, nomad *api.Client, group, task strin
 }
 
 func testWindows(t *testing.T) {
-	t.Skip("SKIP WINDOWS TEST") // TODO restore when windows client is fixed
-
 	nomad := e2eutil.NomadClient(t)
 	jobID := "artifact-windows-" + uuid.Short()
 	jobIDs := []string{jobID}
diff --git a/e2e/metrics/metrics_test.go b/e2e/metrics/metrics_test.go
index 2eec7d001..7d64e1672 100644
--- a/e2e/metrics/metrics_test.go
+++ b/e2e/metrics/metrics_test.go
@@ -73,6 +73,10 @@ func TestMetrics(t *testing.T) {
 	_, cleanupCaddy := jobs3.Submit(t, "./input/caddy.hcl")
 	t.Cleanup(cleanupCaddy)
 
+	t.Log("running metrics job winagent ...")
+	jobWin, cleanupWin := jobs3.Submit(t, "./input/winagent.hcl")
+	t.Cleanup(cleanupWin)
+
 	t.Log("let the metrics collect for a bit (10s) ...")
 	time.Sleep(10 * time.Second)
 
@@ -89,7 +93,12 @@ func TestMetrics(t *testing.T) {
 		name:   "nomad_client_allocs_cpu_allocated",
 		filter: "exported_job",
 		key:    jobPy.JobID(),
-	}})
+	}, {
+		name:   "nomad_client_allocs_memory_rss",
+		filter: "exported_job",
+		key:    jobWin.JobID(),
+	},
+	})
 
 	t.Log("measuring client metrics ...")
 	testClientMetrics(t, []*metric{{
diff --git a/e2e/terraform/Makefile b/e2e/terraform/Makefile
index fa77c60f8..9fa455f95 100644
--- a/e2e/terraform/Makefile
+++ b/e2e/terraform/Makefile
@@ -8,7 +8,7 @@ custom.tfvars:
 	echo 'nomad_local_binary = "$(PKG_PATH)"' > custom.tfvars
 	echo 'volumes = false' >> custom.tfvars
 	echo 'client_count_linux = 3' >> custom.tfvars
-	echo 'client_count_windows_2016 = 0' >> custom.tfvars
+	echo 'client_count_windows_2022 = 0' >> custom.tfvars
 	echo 'consul_license = "$(shell cat $(CONSUL_LICENSE_PATH))"' >> custom.tfvars
 	echo 'nomad_license = "$(shell cat $(NOMAD_LICENSE_PATH))"' >> custom.tfvars
 
diff --git a/e2e/terraform/README.md b/e2e/terraform/README.md
index 9f26bec92..fbd605337 100644
--- a/e2e/terraform/README.md
+++ b/e2e/terraform/README.md
@@ -53,7 +53,7 @@ region                           = "us-east-1"
 instance_type                    = "t2.medium"
 server_count                     = "3"
 client_count_linux               = "4"
-client_count_windows_2016        = "1"
+client_count_windows_2022        = "1"
 ```
 
 You will also need a Consul Enterprise license file and a Nomad Enterprise
@@ -67,21 +67,21 @@ linux).
 NOTE: If you want to have a cluster with mixed CPU architectures,
 you need to specify the count and also provide the  corresponding
 binary using `var.nomad_local_binary_client_ubuntu_jammy` and or
-`var.nomad_local_binary_client_windows_2016`.
+`var.nomad_local_binary_client_windows_2022`.
 
 Run Terraform apply to deploy the infrastructure:
 
 ```sh
 cd e2e/terraform/
 terraform init
-terraform apply -var="consul_license=$(cat full_path_to_consul.hclic)" -var="nomad_license=$(cat full_path_to_nomad.hclic)"    
+terraform apply -var="consul_license=$(cat full_path_to_consul.hclic)" -var="nomad_license=$(cat full_path_to_nomad.hclic)"
 ```
- 
+
 Alternative you can also run `make apply_full` from the terraform directory:
 
 ```
 export NOMAD_LICENSE_PATH=./nomad.hclic
-export CONSUL_LICENSE_PATH=./consul.hclic 
+export CONSUL_LICENSE_PATH=./consul.hclic
 make apply_full
 ```
 
diff --git a/e2e/terraform/main.tf b/e2e/terraform/main.tf
index 861e57111..129411eae 100644
--- a/e2e/terraform/main.tf
+++ b/e2e/terraform/main.tf
@@ -10,11 +10,11 @@ module "provision-infra" {
 
   server_count                           = var.server_count
   client_count_linux                     = var.client_count_linux
-  client_count_windows_2016              = var.client_count_windows_2016
+  client_count_windows_2022              = var.client_count_windows_2022
   nomad_local_binary_server              = var.nomad_local_binary_server
   nomad_local_binary                     = var.nomad_local_binary
   nomad_local_binary_client_ubuntu_jammy = var.nomad_local_binary_client_ubuntu_jammy
-  nomad_local_binary_client_windows_2016 = var.nomad_local_binary_client_windows_2016
+  nomad_local_binary_client_windows_2022 = var.nomad_local_binary_client_windows_2022
   nomad_license                          = var.nomad_license
   consul_license                         = var.consul_license
   nomad_region                           = var.nomad_region
diff --git a/e2e/terraform/packer/README.md b/e2e/terraform/packer/README.md
index 6dec9b216..c1cf43307 100644
--- a/e2e/terraform/packer/README.md
+++ b/e2e/terraform/packer/README.md
@@ -34,9 +34,6 @@ $ packer --version
 
 # build Ubuntu Jammy AMI
 $ ./build ubuntu-jammy-amd64
-
-# build Windows AMI
-$ ./build windows-2016-amd64
 ```
 
 ## Debugging Packer Builds
@@ -51,3 +48,7 @@ you're done, clean up the machine by looking for "Packer" in the AWS console:
 * [EC2 instances](https://console.aws.amazon.com/ec2/home?region=us-east-1#Instances:search=Packer;sort=tag:Name)
 * [Key pairs](https://console.aws.amazon.com/ec2/v2/home?region=us-east-1#KeyPairs:search=packer;sort=keyName)
 * [Security groups](https://console.aws.amazon.com/ec2/v2/home?region=us-east-1#SecurityGroups:search=packer;sort=groupName)
+
+## Q: What About Windows?
+
+For now, we're using an Amazon base image directly.
diff --git a/e2e/terraform/packer/windows-2016-amd64.pkr.hcl b/e2e/terraform/packer/windows-2016-amd64.pkr.hcl
deleted file mode 100644
index cf14c7c86..000000000
--- a/e2e/terraform/packer/windows-2016-amd64.pkr.hcl
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-variable "build_sha" {
-  type        = string
-  description = "the revision of the packer scripts building this image"
-}
-
-locals {
-  timestamp = regex_replace(timestamp(), "[- TZ:]", "")
-  version   = "v3"
-}
-
-source "amazon-ebs" "latest_windows_2016" {
-  ami_name       = "nomad-e2e-${local.version}-windows-2016-amd64-${local.timestamp}"
-  communicator   = "ssh"
-  instance_type  = "m7a.large"
-  region         = "us-east-1"
-  user_data_file = "windows-2016-amd64/userdata.ps1" # enables ssh
-  ssh_timeout    = "10m"
-  ssh_username   = "Administrator"
-
-  source_ami_filter {
-    filters = {
-      name                = "Windows_Server-2016-English-Full-ECS_Optimized-*"
-      root-device-type    = "ebs"
-      virtualization-type = "hvm"
-    }
-    most_recent = true
-    owners      = ["amazon"]
-  }
-
-  tags = {
-    OS         = "Windows2016"
-    BuilderSha = var.build_sha
-  }
-}
-
-build {
-  sources = ["source.amazon-ebs.latest_windows_2016"]
-
-  provisioner "powershell" {
-    scripts = [
-      "windows-2016-amd64/disable-windows-updates.ps1",
-      "windows-2016-amd64/fix-tls.ps1",
-      "windows-2016-amd64/install-nuget.ps1",
-      "windows-2016-amd64/install-consul.ps1",
-      "windows-2016-amd64/install-nomad.ps1"
-    ]
-  }
-
-  # this restart is required for adding the "containers feature", but we can
-  # wait to do it until right before we do sysprep, which makes debugging
-  # builds slightly faster
-  provisioner "windows-restart" {}
-
-  provisioner "powershell" {
-    inline = [
-      "C:\\ProgramData\\Amazon\\EC2-Windows\\Launch\\Scripts\\SendWindowsIsReady.ps1 -Schedule",
-      "C:\\ProgramData\\Amazon\\EC2-Windows\\Launch\\Scripts\\InitializeInstance.ps1 -Schedule",
-      "C:\\ProgramData\\Amazon\\EC2-Windows\\Launch\\Scripts\\SysprepInstance.ps1 -NoShutdown"
-    ]
-  }
-}
diff --git a/e2e/terraform/packer/windows-2016-amd64/README.md b/e2e/terraform/packer/windows-2016-amd64/README.md
deleted file mode 100644
index 071f41227..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Windows Packer Build
-
-There are a few boilerplate items in the Powershell scripts, explained below.
-
-The default TLS protocol in the version of .NET that our Powershell cmdlets are built in it 1.0, which means plenty of properly configured HTTP servers will reject requests. The boilerplate snippet below sets this for the current script:
-
-```
-# Force TLS1.2
-[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
-```
-
-We need to run some of the scripts as an administrator role. The following is a safety check that we're doing so:
-
-```
-$RunningAsAdmin = ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")
-if (!$RunningAsAdmin) {
-  Write-Error "Must be executed in Administrator level shell."
-  exit 1
-}
-```
diff --git a/e2e/terraform/packer/windows-2016-amd64/disable-windows-updates.ps1 b/e2e/terraform/packer/windows-2016-amd64/disable-windows-updates.ps1
deleted file mode 100755
index 72478430b..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/disable-windows-updates.ps1
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-$RunningAsAdmin = ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")
-if (!$RunningAsAdmin) {
-  Write-Error "Must be executed in Administrator level shell."
-  exit 1
-}
-
-$service = Get-WmiObject Win32_Service -Filter 'Name="wuauserv"'
-
-if (!$service) {
-  Write-Error "Failed to retrieve the wauserv service"
-  exit 1
-}
-
-if ($service.StartMode -ne "Disabled") {
-  $result = $service.ChangeStartMode("Disabled").ReturnValue
-  if($result) {
-    Write-Error "Failed to disable the 'wuauserv' service. The return value was $result."
-    exit 1
-  }
-}
-
-if ($service.State -eq "Running") {
-  $result = $service.StopService().ReturnValue
-  if ($result) {
-    Write-Error "Failed to stop the 'wuauserv' service. The return value was $result."
-    exit 1
-  }
-}
-
-Write-Output "Automatic Windows Updates disabled."
diff --git a/e2e/terraform/packer/windows-2016-amd64/fix-tls.ps1 b/e2e/terraform/packer/windows-2016-amd64/fix-tls.ps1
deleted file mode 100755
index 55dc73a29..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/fix-tls.ps1
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-# This script hardens TLS configuration by disabling weak and broken protocols
-# and enabling useful protocols like TLS 1.1 and 1.2.
-
-$RunningAsAdmin = ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")
-if (!$RunningAsAdmin) {
-  Write-Error "Must be executed in Administrator level shell."
-  exit 1
-}
-
-$weakProtocols = @(
-	'Multi-Protocol Unified Hello',
-	'PCT 1.0',
-	'SSL 2.0',
-	'SSL 3.0'
-)
-
-$strongProtocols = @(
-	'TLS 1.0',
-	'TLS 1.1',
-	'TLS 1.2'
-)
-
-$weakCiphers = @(
-	'DES 56/56',
-	'NULL',
-	'RC2 128/128',
-	'RC2 40/128',
-	'RC2 56/128',
-	'RC4 40/128',
-	'RC4 56/128',
-	'RC4 64/128',
-	'RC4 128/128'
-)
-
-$strongCiphers = @(
-	'AES 128/128',
-	'AES 256/256',
-	'Triple DES 168/168'
-)
-
-$weakHashes = @(
-	'MD5',
-	'SHA'
-)
-
-$strongHashes = @(
-	'SHA 256',
-	'SHA 384',
-	'SHA 512'
-)
-
-$strongKeyExchanges = @(
-	'Diffie-Hellman',
-	'ECDH',
-	'PKCS'
-)
-
-$cipherOrder = @(
-  'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384_P521',
-  'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384_P384',
-  'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384_P256',
-  'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA_P521',
-  'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA_P384',
-  'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA_P256',
-  'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA_P521',
-  'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA_P384',
-  'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA_P256',
-  'TLS_RSA_WITH_AES_256_GCM_SHA384',
-  'TLS_RSA_WITH_AES_128_GCM_SHA256',
-  'TLS_RSA_WITH_AES_256_CBC_SHA256',
-  'TLS_RSA_WITH_AES_256_CBC_SHA',
-  'TLS_RSA_WITH_AES_128_CBC_SHA',
-  'TLS_RSA_WITH_3DES_EDE_CBC_SHA'
-)
-
-# Reset the protocols key
-New-Item 'HKLM:SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols' -Force | Out-Null
-
-# Disable weak protocols
-Foreach ($protocol in $weakProtocols) {
-  New-Item HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Server -Force | Out-Null
-  New-Item HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Client -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Server -name Enabled -value 0 -PropertyType 'DWord' -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Server -name DisabledByDefault -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Client -name Enabled -value 0 -PropertyType 'DWord' -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Client -name DisabledByDefault -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-}
-
-# Enable strong protocols
-Foreach ($protocol in $strongProtocols) {
-  New-Item HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Server -Force | Out-Null
-  New-Item HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Client -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Server -name 'Enabled' -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Server -name 'DisabledByDefault' -value 0 -PropertyType 'DWord' -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Client -name 'Enabled' -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-  New-ItemProperty -path HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Protocols\$protocol\Client -name 'DisabledByDefault' -value 0 -PropertyType 'DWord' -Force | Out-Null
-}
-
-# Reset the ciphers key
-New-Item 'HKLM:SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Ciphers' -Force | Out-Null
-
-# Disable Weak Ciphers
-Foreach ($cipher in $weakCiphers) {
-  $key = (get-item HKLM:\).OpenSubKey("SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Ciphers", $true).CreateSubKey($cipher)
-  $key.SetValue('Enabled', 0, 'DWord')
-  $key.Close()
-}
-
-# Enable Strong Ciphers
-Foreach ($cipher in $strongCiphers) {
-  $key = (get-item HKLM:\).OpenSubKey("SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Ciphers", $true).CreateSubKey($cipher)
-  New-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Ciphers\$cipher" -name 'Enabled' -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-  $key.Close()
-}
-
-# Reset the hashes key
-New-Item 'HKLM:SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Hashes' -Force | Out-Null
-
-# Disable weak hashes
-Foreach ($hash in $weakHashes) {
-  $key = (get-item HKLM:\).OpenSubKey("SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Hashes", $true).CreateSubKey($hash)
-  New-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Hashes\$hash" -name 'Enabled' -value '0' -PropertyType 'DWord' -Force | Out-Null
-  $key.Close()
-}
-
-# Enable Hashes
-Foreach ($hash in $strongHashes) {
-  $key = (get-item HKLM:\).OpenSubKey("SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Hashes", $true).CreateSubKey($hash)
-  New-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\Hashes\$hash" -name 'Enabled' -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-  $key.Close()
-}
-
-# Reset the KeyExchangeAlgorithms key
-New-Item 'HKLM:SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\KeyExchangeAlgorithms' -Force | Out-Null
-
-# Enable KeyExchangeAlgorithms
-Foreach ($keyExchange in $strongKeyExchanges) {
-  $key = (get-item HKLM:\).OpenSubKey("SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\KeyExchangeAlgorithms", $true).CreateSubKey($keyExchange)
-  New-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\SecurityProviders\SCHANNEL\KeyExchangeAlgorithms\$keyExchange" -name 'Enabled' -value '0xffffffff' -PropertyType 'DWord' -Force | Out-Null
-  $key.Close()
-}
-
-# Set cipher order
-$cipherOrderString = [string]::join(',', $cipherOrder)
-New-ItemProperty -path 'HKLM:\SOFTWARE\Policies\Microsoft\Cryptography\Configuration\SSL\00010002' -name 'Functions' -value $cipherOrderString -PropertyType 'String' -Force | Out-Null
-
-Write-Output "TLS hardened."
diff --git a/e2e/terraform/packer/windows-2016-amd64/install-consul.ps1 b/e2e/terraform/packer/windows-2016-amd64/install-consul.ps1
deleted file mode 100755
index 4610d55d4..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/install-consul.ps1
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-Set-StrictMode -Version latest
-$ErrorActionPreference = "Stop"
-
-# Force TLS1.2
-[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
-
-Set-Location C:\opt
-
-Try {
-    $releases = "https://releases.hashicorp.com"
-    $version = "1.11.4+ent"
-    $url = "${releases}/consul/${version}/consul_${version}_windows_amd64.zip"
-
-    New-Item -ItemType Directory -Force -Path C:\opt\consul
-    New-Item -ItemType Directory -Force -Path C:\etc\consul.d
-
-    # TODO: check sha!
-    Write-Output "Downloading Consul from: $url"
-    Invoke-WebRequest -Uri $url -Outfile consul.zip -ErrorAction Stop
-    Expand-Archive .\consul.zip .\ -ErrorAction Stop
-    Move-Item consul.exe C:\opt\consul.exe -Force -ErrorAction Stop
-    C:\opt\consul.exe version
-    rm consul.zip
-
-    New-Service `
-      -Name "Consul" `
-      -BinaryPathName "C:\opt\consul.exe agent -config-dir C:\etc\consul.d" `
-      -StartupType "Automatic" `
-      -ErrorAction Ignore
-
-} Catch {
-    Write-Output "Failed to install Consul."
-    Write-Output $_
-    $host.SetShouldExit(-1)
-    throw
-}
-
-Write-Output "Installed Consul."
diff --git a/e2e/terraform/packer/windows-2016-amd64/install-nomad.ps1 b/e2e/terraform/packer/windows-2016-amd64/install-nomad.ps1
deleted file mode 100755
index 34e9361dd..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/install-nomad.ps1
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-Set-StrictMode -Version latest
-$ErrorActionPreference = "Stop"
-
-# Force TLS1.2
-[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
-
-Set-Location C:\opt
-
-Try {
-    $releases = "https://releases.hashicorp.com"
-    $version = "1.2.6"
-    $url = "${releases}/nomad/${version}/nomad_${version}_windows_amd64.zip"
-
-    New-Item -ItemType Directory -Force -Path C:\opt\nomad
-    New-Item -ItemType Directory -Force -Path C:\etc\nomad.d
-
-    # TODO: check sha!
-    Write-Output "Downloading Nomad from: $url"
-    Invoke-WebRequest -Uri $url -Outfile nomad.zip -ErrorAction Stop
-    Expand-Archive .\nomad.zip .\ -ErrorAction Stop
-    Move-Item nomad.exe C:\opt\nomad.exe -Force -ErrorAction Stop
-    C:\opt\nomad.exe version
-    rm nomad.zip
-
-    New-NetFirewallRule `
-      -DisplayName 'Nomad HTTP Inbound' `
-      -Profile @('Public', 'Domain', 'Private') `
-      -Direction Inbound `
-      -Action Allow `
-      -Protocol TCP `
-      -LocalPort @('4646')
-
-    New-Service `
-      -Name "Nomad" `
-      -BinaryPathName "C:\opt\nomad.exe agent -config C:\etc\nomad.d" `
-      -StartupType "Automatic" `
-      -ErrorAction Ignore
-
-} Catch {
-    Write-Output "Failed to install Nomad."
-    Write-Output $_
-    $host.SetShouldExit(-1)
-    throw
-}
-
-Write-Output "Installed Nomad."
diff --git a/e2e/terraform/packer/windows-2016-amd64/install-nuget.ps1 b/e2e/terraform/packer/windows-2016-amd64/install-nuget.ps1
deleted file mode 100755
index 471402628..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/install-nuget.ps1
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-Set-StrictMode -Version latest
-$ErrorActionPreference = "Stop"
-
-$RunningAsAdmin = ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")
-if (!$RunningAsAdmin) {
-  Write-Error "Must be executed in Administrator level shell."
-  exit 1
-}
-
-# Force TLS1.2
-[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
-
-Try {
-    Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force -ErrorAction Stop
-} Catch {
-    Write-Output "Failed to install NuGet package manager."
-    Write-Output $_
-    $host.SetShouldExit(-1)
-    throw
-}
-
-Write-Output "Installed NuGet."
diff --git a/e2e/terraform/packer/windows-2016-amd64/userdata.ps1 b/e2e/terraform/packer/windows-2016-amd64/userdata.ps1
deleted file mode 100755
index c6a466261..000000000
--- a/e2e/terraform/packer/windows-2016-amd64/userdata.ps1
+++ /dev/null
@@ -1,137 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-<powershell>
-
-Set-StrictMode -Version latest
-$ErrorActionPreference = "Stop"
-
-$RunningAsAdmin = ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")
-if (!$RunningAsAdmin) {
-  Write-Error "Must be executed in Administrator level shell."
-  exit 1
-}
-
-# Force TLS1.2
-[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
-
-Write-Output "Running User Data Script"
-Write-Host "(host) Running User Data Script"
-
-Set-ExecutionPolicy Unrestricted -Scope LocalMachine -Force -ErrorAction Ignore
-
-# Don't set this before Set-ExecutionPolicy as it throws an error
-$ErrorActionPreference = "stop"
-
-# -------------------------------------------
-# WinRM
-
-# Remove HTTP listener
-Remove-Item -Path WSMan:\Localhost\listener\listener* -Recurse
-
-$Cert = New-SelfSignedCertificate `
-  -CertstoreLocation Cert:\LocalMachine\My `
-  -DnsName "packer"
-
-New-Item `
-  -Path WSMan:\LocalHost\Listener `
-  -Transport HTTPS `
-  -Address * `
-  -CertificateThumbPrint $Cert.Thumbprint `
-  -Force
-
-Write-output "Setting up WinRM"
-Write-host "(host) setting up WinRM"
-
-cmd.exe /c winrm quickconfig -q
-cmd.exe /c winrm set "winrm/config" '@{MaxTimeoutms="1800000"}'
-cmd.exe /c winrm set "winrm/config/winrs" '@{MaxMemoryPerShellMB="1024"}'
-cmd.exe /c winrm set "winrm/config/service" '@{AllowUnencrypted="true"}'
-cmd.exe /c winrm set "winrm/config/client" '@{AllowUnencrypted="true"}'
-cmd.exe /c winrm set "winrm/config/service/auth" '@{Basic="true"}'
-cmd.exe /c winrm set "winrm/config/client/auth" '@{Basic="true"}'
-cmd.exe /c winrm set "winrm/config/service/auth" '@{CredSSP="true"}'
-cmd.exe /c winrm set "winrm/config/listener?Address=*+Transport=HTTPS" "@{Port=`"5986`";Hostname=`"packer`";CertificateThumbprint=`"$($Cert.Thumbprint)`"}"
-cmd.exe /c netsh advfirewall firewall set rule group="remote administration" new enable=yes
-cmd.exe /c netsh firewall add portopening TCP 5986 "Port 5986"
-cmd.exe /c net stop winrm
-cmd.exe /c sc config winrm start= auto
-cmd.exe /c net start winrm
-
-
-# -------------------------------------------
-# Disks and Directories
-
-# Bring ebs volume online with read-write access
-Get-Disk | Where-Object IsOffline -Eq $True | Set-Disk -IsOffline $False
-Get-Disk | Where-Object isReadOnly -Eq $True | Set-Disk -IsReadOnly $False
-
-New-Item -ItemType Directory -Force -Path C:\opt -ErrorAction Stop
-
-# -------------------------------------------
-# SSH
-
-Try {
-
-    # install portable SSH instead of the Windows feature because we
-    # need to target 2016
-    $repo = "https://github.com/PowerShell/Win32-OpenSSH"
-    $version = "v8.0.0.0p1-Beta"
-    $url = "${repo}/releases/download/${version}/OpenSSH-Win64.zip"
-
-    # TODO: check sha!
-    Write-Output "Downloading OpenSSH from: $url"
-    Invoke-WebRequest -Uri $url -Outfile "OpenSSH-Win64.zip" -ErrorAction Stop
-    Expand-Archive ".\OpenSSH-Win64.zip" "C:\Program Files" -ErrorAction Stop
-    Rename-Item -Path "C:\Program Files\OpenSSH-Win64" -NewName "OpenSSH" -ErrorAction Stop
-
-    & "C:\Program Files\OpenSSH\install-sshd.ps1"
-
-    # Start the service
-    Start-Service sshd
-    Set-Service -Name sshd -StartupType 'Automatic' -ErrorAction Stop
-
-    Start-Service ssh-agent
-    Set-Service -Name ssh-agent -StartupType 'Automatic' -ErrorAction Stop
-
-    # Enable host firewall rule if it doesn't exist
-    New-NetFirewallRule -Name sshd -DisplayName 'OpenSSH Server (sshd)' `
-      -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 -ErrorAction Stop
-
-    # Note: there appears to be a regression in recent versions of
-    # Terraform for file provisioning over ssh for Windows with
-    # powershell as the default shell
-    # See: https://github.com/hashicorp/terraform/issues/30661
-    #
-    # Set powershell as the OpenSSH login shell
-    # New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" `
-    #   -Name DefaultShell `
-    #   -Value "C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe" `
-    #   -PropertyType String -Force -ErrorAction Stop
-
-    Write-Output "Installed OpenSSH."
-
-} Catch {
-    Write-Output "Failed to install OpenSSH."
-    Write-Output $_
-    $host.SetShouldExit(-1)
-    throw
-}
-
-md "C:\Users\Administrator\.ssh\"
-
-$myKey = "C:\Users\Administrator\.ssh\authorized_keys"
-$adminKey = "C:\ProgramData\ssh\administrators_authorized_keys"
-
-Invoke-RestMethod `
-  -Uri "http://169.254.169.254/latest/meta-data/public-keys/0/openssh-key" `
-  -Outfile $myKey
-
-cp $myKey $adminKey
-
-icacls $adminKey /reset
-icacls $adminKey /inheritance:r
-icacls $adminKey /grant BUILTIN\Administrators:`(F`)
-icacls $adminKey /grant SYSTEM:`(F`)
-
-</powershell>
diff --git a/e2e/terraform/provision-infra/compute.tf b/e2e/terraform/provision-infra/compute.tf
index b6bd45adc..97d8deea3 100644
--- a/e2e/terraform/provision-infra/compute.tf
+++ b/e2e/terraform/provision-infra/compute.tf
@@ -2,9 +2,8 @@
 # SPDX-License-Identifier: BUSL-1.1
 
 locals {
-  ami_prefix         = "nomad-e2e-v3"
-  ubuntu_image_name  = "ubuntu-jammy-${var.instance_arch}"
-  windows_image_name = "windows-2016-${var.instance_arch}"
+  ami_prefix        = "nomad-e2e-v3"
+  ubuntu_image_name = "ubuntu-jammy-${var.instance_arch}"
 }
 
 resource "aws_instance" "server" {
@@ -44,20 +43,20 @@ resource "aws_instance" "client_ubuntu_jammy" {
 
 
 
-resource "aws_instance" "client_windows_2016" {
-  ami                    = data.aws_ami.windows_2016[0].image_id
+resource "aws_instance" "client_windows_2022" {
+  ami                    = data.aws_ami.windows_2022[0].image_id
   instance_type          = var.instance_type
   key_name               = module.keys.key_name
   vpc_security_group_ids = [aws_security_group.clients.id]
-  count                  = var.client_count_windows_2016
+  count                  = var.client_count_windows_2022
   iam_instance_profile   = data.aws_iam_instance_profile.nomad_e2e_cluster.name
   availability_zone      = var.availability_zone
 
-  user_data = file("${path.module}/userdata/windows-2016.ps1")
+  user_data = file("${path.module}/userdata/windows-2022.ps1")
 
   # Instance tags
   tags = {
-    Name           = "${local.random_name}-client-windows-2016-${count.index}"
+    Name           = "${local.random_name}-client-windows-2022-${count.index}"
     ConsulAutoJoin = "auto-join-${local.random_name}"
     User           = data.aws_caller_identity.current.arn
     OS             = "windows"
@@ -138,24 +137,14 @@ data "aws_ami" "ubuntu_jammy" {
   }
 }
 
-data "aws_ami" "windows_2016" {
-  count = var.client_count_windows_2016 > 0 ? 1 : 0
+data "aws_ami" "windows_2022" {
+  count = var.client_count_windows_2022 > 0 ? 1 : 0
 
   most_recent = true
-  owners      = ["self"]
+  owners      = ["amazon"]
 
   filter {
     name   = "name"
-    values = ["${local.ami_prefix}-${local.windows_image_name}-*"]
-  }
-
-  filter {
-    name   = "tag:OS"
-    values = ["Windows2016"]
-  }
-
-  filter {
-    name   = "tag:BuilderSha"
-    values = [data.external.packer_sha.result["sha"]]
+    values = ["Windows_Server-2022-English-Full-ECS_Optimized-2025.*"]
   }
 }
diff --git a/e2e/terraform/provision-infra/nomad.tf b/e2e/terraform/provision-infra/nomad.tf
index fcb9518f1..fb3a47128 100644
--- a/e2e/terraform/provision-infra/nomad.tf
+++ b/e2e/terraform/provision-infra/nomad.tf
@@ -4,7 +4,7 @@
 locals {
   server_binary  = var.nomad_local_binary_server != "" ? var.nomad_local_binary_server : var.nomad_local_binary
   linux_binary   = var.nomad_local_binary_client_ubuntu_jammy != "" ? var.nomad_local_binary_client_ubuntu_jammy : var.nomad_local_binary
-  windows_binary = var.nomad_local_binary_client_windows_2016 != "" ? var.nomad_local_binary_client_windows_2016 : var.nomad_local_binary
+  windows_binary = var.nomad_local_binary_client_windows_2022 != "" ? var.nomad_local_binary_client_windows_2022 : var.nomad_local_binary
 }
 
 module "nomad_server" {
@@ -70,18 +70,16 @@ module "nomad_client_ubuntu_jammy" {
 }
 
 
-# TODO: split out the different Windows targets (2016, 2019) when they're
-# available
-module "nomad_client_windows_2016" {
+module "nomad_client_windows_2022" {
   source     = "./provision-nomad"
-  depends_on = [aws_instance.client_windows_2016]
-  count      = var.client_count_windows_2016
+  depends_on = [aws_instance.client_windows_2022]
+  count      = var.client_count_windows_2022
 
   platform = "windows"
   arch     = "windows_${var.instance_arch}"
   role     = "client"
   index    = count.index
-  instance = aws_instance.client_windows_2016[count.index]
+  instance = aws_instance.client_windows_2022[count.index]
 
   nomad_region       = var.nomad_region
   nomad_license      = var.nomad_license
diff --git a/e2e/terraform/provision-infra/outputs.tf b/e2e/terraform/provision-infra/outputs.tf
index cbbf76f01..8c5d9139e 100644
--- a/e2e/terraform/provision-infra/outputs.tf
+++ b/e2e/terraform/provision-infra/outputs.tf
@@ -10,11 +10,11 @@ output "linux_clients" {
 }
 
 output "windows_clients" {
-  value = aws_instance.client_windows_2016.*.public_ip
+  value = aws_instance.client_windows_2022.*.public_ip
 }
 
 output "clients" {
-  value = concat(aws_instance.client_ubuntu_jammy.*.public_ip, aws_instance.client_windows_2016.*.public_ip)
+  value = concat(aws_instance.client_ubuntu_jammy.*.public_ip, aws_instance.client_windows_2022.*.public_ip)
 }
 
 output "message" {
@@ -38,7 +38,7 @@ ssh into clients with:
 %{for ip in aws_instance.client_ubuntu_jammy.*.public_ip~}
     ssh -i ${local.keys_dir}/${local.random_name}.pem ubuntu@${ip}
 %{endfor~}
-%{for ip in aws_instance.client_windows_2016.*.public_ip~}
+%{for ip in aws_instance.client_windows_2022.*.public_ip~}
     ssh -i ${local.keys_dir}/${local.random_name}.pem Administrator@${ip}
 %{endfor~}
 
diff --git a/e2e/terraform/provision-infra/provision-nomad/install-windows.tf b/e2e/terraform/provision-infra/provision-nomad/install-windows.tf
index 889eae10b..f9972d55c 100644
--- a/e2e/terraform/provision-infra/provision-nomad/install-windows.tf
+++ b/e2e/terraform/provision-infra/provision-nomad/install-windows.tf
@@ -47,10 +47,10 @@ resource "null_resource" "install_consul_configs_windows" {
       "powershell Remove-Item -Force -Recurse -Path C://etc/consul.d",
       "powershell New-Item -Force -Path C:// -Name opt -ItemType directory",
       "powershell New-Item -Force -Path C://etc -Name consul.d -ItemType directory",
-      "powershell Move-Item -Force -Path C://tmp/consul_ca.pem  C://Windows/System32/ca.pem",
-      "powershell Move-Item -Force -Path C://tmp/consul_client_acl.json C://etc/consul.d/acl.json",
-      "powershell Move-Item -Force -Path C://tmp/consul_client.json C://etc/consul.d/consul_client.json",
-      "powershell Move-Item -Force -Path C://tmp/consul_client_base.json C://etc/consul.d/consul_client_base.json",
+      "powershell Move-Item -Force -Path C://tmp/consul_ca.crt C://etc/consul.d/ca.pem",
+      "powershell Move-Item -Force -Path C://tmp/consul_cert.key.pem C://etc/consul.d/cert.key.pem",
+      "powershell Move-Item -Force -Path C://tmp/consul_cert.pem C://etc/consul.d/cert.pem",
+      "powershell Move-Item -Force -Path C://tmp/consul_client.hcl C://etc/consul.d/consul_client.hcl",
     ]
   }
 }
diff --git a/e2e/terraform/provision-infra/userdata/windows-2016.ps1 b/e2e/terraform/provision-infra/userdata/windows-2016.ps1
deleted file mode 100755
index c9bc0d5ee..000000000
--- a/e2e/terraform/provision-infra/userdata/windows-2016.ps1
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) HashiCorp, Inc.
-# SPDX-License-Identifier: BUSL-1.1
-
-<powershell>
-
-# Bring ebs volume online with read-write access
-Get-Disk | Where-Object IsOffline -Eq $True | Set-Disk -IsOffline $False
-Get-Disk | Where-Object isReadOnly -Eq $True | Set-Disk -IsReadOnly $False
-
-md "C:\Users\Administrator\.ssh\"
-
-$myKey = "C:\Users\Administrator\.ssh\authorized_keys"
-$adminKey = "C:\ProgramData\ssh\administrators_authorized_keys"
-
-Invoke-RestMethod `
-  -Uri "http://169.254.169.254/latest/meta-data/public-keys/0/openssh-key" `
-  -Outfile $myKey
-
-cp $myKey $adminKey
-
-icacls $adminKey /reset
-icacls $adminKey /inheritance:r
-icacls $adminKey /grant BUILTIN\Administrators:`(F`)
-icacls $adminKey /grant SYSTEM:`(F`)
-
-# for host volume testing
-New-Item -ItemType Directory -Force -Path C:\tmp\data
-
-</powershell>
diff --git a/e2e/terraform/provision-infra/userdata/windows-2022.ps1 b/e2e/terraform/provision-infra/userdata/windows-2022.ps1
new file mode 100755
index 000000000..3ee082951
--- /dev/null
+++ b/e2e/terraform/provision-infra/userdata/windows-2022.ps1
@@ -0,0 +1,179 @@
+# Copyright (c) HashiCorp, Inc.
+# SPDX-License-Identifier: BUSL-1.1
+
+<powershell>
+
+Set-StrictMode -Version latest
+$ErrorActionPreference = "Stop"
+
+$RunningAsAdmin = ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")
+if (!$RunningAsAdmin) {
+  Write-Error "Must be executed in Administrator level shell."
+  exit 1
+}
+
+# -------------------------------------------
+# Disks and Directories
+
+# Bring ebs volume online with read-write access
+Get-Disk | Where-Object IsOffline -Eq $True | Set-Disk -IsOffline $False
+Get-Disk | Where-Object isReadOnly -Eq $True | Set-Disk -IsReadOnly $False
+
+New-Item -ItemType Directory -Force -Path C:\opt\nomad
+New-Item -ItemType Directory -Force -Path C:\etc\nomad.d
+New-Item -ItemType Directory -Force -Path C:\tmp
+New-Item -ItemType Directory -Force -Path C:\opt\consul
+New-Item -ItemType Directory -Force -Path C:\etc\consul.d
+
+# -------------------------------------------
+# Install Consul Agent
+
+Set-Location C:\opt
+
+Try {
+    $releases = "https://releases.hashicorp.com"
+    $version = "1.21.1+ent"
+    $url = "${releases}/consul/${version}/consul_${version}_windows_amd64.zip"
+
+    Write-Output "Downloading Consul from: $url"
+    Invoke-WebRequest -Uri $url -Outfile consul.zip -ErrorAction Stop
+    Expand-Archive .\consul.zip .\ -ErrorAction Stop
+    Move-Item consul.exe C:\opt\consul.exe -Force -ErrorAction Stop
+    C:\opt\consul.exe version
+    rm consul.zip
+
+    New-Service `
+      -Name "Consul" `
+      -BinaryPathName "C:\opt\consul.exe agent -config-dir C:\etc\consul.d" `
+      -StartupType "Automatic" `
+      -ErrorAction Ignore
+
+} Catch {
+    Write-Output "Failed to install Consul."
+    Write-Output $_
+    $host.SetShouldExit(-1)
+    throw
+}
+
+Write-Output "Installed Consul."
+
+# -------------------------------------------
+# Install service and firewall rules for Nomad
+# Note the service can't run until we upload Nomad too
+
+Try {
+    New-NetFirewallRule `
+      -DisplayName 'Nomad HTTP Inbound' `
+      -Profile @('Public', 'Domain', 'Private') `
+      -Direction Inbound `
+      -Action Allow `
+      -Protocol TCP `
+      -LocalPort @('4646')
+
+    New-Service `
+      -Name "Nomad" `
+      -BinaryPathName "C:\opt\nomad.exe agent -config C:\etc\nomad.d" `
+      -StartupType "Automatic" `
+      -ErrorAction Ignore
+} Catch {
+    Write-Output "Failed to install Nomad."
+    Write-Output $_
+    $host.SetShouldExit(-1)
+    throw
+}
+
+Write-Output "Installed Nomad."
+
+# --------------------------------------------
+# Install firewall rules required to allow tests
+
+Try {
+    New-NetFirewallRule `
+      -DisplayName 'Metrics Inbound' `
+      -Profile @('Public', 'Domain', 'Private') `
+      -Direction Inbound `
+      -Action Allow `
+      -Protocol TCP `
+      -LocalPort @('6120')
+} Catch {
+    Write-Output "Failed to install firewall rules."
+    Write-Output $_
+    $host.SetShouldExit(-1)
+    throw
+}
+
+# -------------------------------------------
+# Install and configure ssh
+
+# Note: we don't set powershell as the default ssh shell because of
+# https://github.com/hashicorp/terraform/issues/30661
+
+# Note: this is after we install services and binaries so that we can block on
+# ssh availability and not race with the provisioning steps in Terraform
+
+Write-Host 'Installing and starting sshd'
+Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
+Set-Service -Name sshd -StartupType Automatic
+Start-Service sshd
+
+Write-Host 'Installing and starting ssh-agent'
+Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
+Set-Service -Name ssh-agent -StartupType Automatic
+Start-Service ssh-agent
+
+# From https://learn.microsoft.com/en-us/windows-server/administration/openssh/openssh_install_firstuse?tabs=powershell&pivots=windows-server-2022
+# Confirm the Firewall rule is configured. It should be created automatically by
+# setup. Run the following to verify
+if (!(Get-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -ErrorAction SilentlyContinue | Select-Object Name, Enabled)) {
+    Write-Output "Firewall Rule 'OpenSSH-Server-In-TCP' does not exist, creating it..."
+    New-NetFirewallRule -Name 'OpenSSH-Server-In-TCP' -DisplayName 'OpenSSH Server (sshd)' -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22
+} else {
+    Write-Output "Firewall rule 'OpenSSH-Server-In-TCP' has been created and exists."
+}
+
+md "C:\Users\Administrator\.ssh\"
+
+$myKey = "C:\Users\Administrator\.ssh\authorized_keys"
+$adminKey = "C:\ProgramData\ssh\administrators_authorized_keys"
+
+Invoke-RestMethod `
+  -Uri "http://169.254.169.254/latest/meta-data/public-keys/0/openssh-key" `
+  -Outfile $myKey
+
+cp $myKey $adminKey
+
+icacls $adminKey /reset
+icacls $adminKey /inheritance:r
+icacls $adminKey /grant BUILTIN\Administrators:`(F`)
+icacls $adminKey /grant SYSTEM:`(F`)
+
+
+# -------------------------------------------
+# Disable automatic updates so we don't get restarts in the middle of tests
+
+$service = Get-WmiObject Win32_Service -Filter 'Name="wuauserv"'
+
+if (!$service) {
+  Write-Error "Failed to retrieve the wauserv service"
+  exit 1
+}
+
+if ($service.StartMode -ne "Disabled") {
+  $result = $service.ChangeStartMode("Disabled").ReturnValue
+  if($result) {
+    Write-Error "Failed to disable the 'wuauserv' service. The return value was $result."
+    exit 1
+  }
+}
+
+if ($service.State -eq "Running") {
+  $result = $service.StopService().ReturnValue
+  if ($result) {
+    Write-Error "Failed to stop the 'wuauserv' service. The return value was $result."
+    exit 1
+  }
+}
+
+Write-Output "Automatic Windows Updates disabled."
+
+</powershell>
diff --git a/e2e/terraform/provision-infra/variables.tf b/e2e/terraform/provision-infra/variables.tf
index 5267a8ab9..de1850aaf 100644
--- a/e2e/terraform/provision-infra/variables.tf
+++ b/e2e/terraform/provision-infra/variables.tf
@@ -36,9 +36,9 @@ variable "client_count_linux" {
   default     = "4"
 }
 
-variable "client_count_windows_2016" {
-  description = "The number of windows 2016 clients to provision."
-  default     = "0"
+variable "client_count_windows_2022" {
+  description = "The number of windows 2022 clients to provision."
+  default     = "1"
 }
 
 variable "restrict_ingress_cidrblock" {
@@ -120,7 +120,7 @@ variable "nomad_local_binary_client_ubuntu_jammy" {
   default     = ""
 }
 
-variable "nomad_local_binary_client_windows_2016" {
+variable "nomad_local_binary_client_windows_2022" {
   description = "A path to an alternative binary to deploy to windows clients, to override nomad_local_binary"
   type        = string
   default     = ""
diff --git a/e2e/terraform/terraform.tfvars b/e2e/terraform/terraform.tfvars
index 284c99dbe..4f7f3612c 100644
--- a/e2e/terraform/terraform.tfvars
+++ b/e2e/terraform/terraform.tfvars
@@ -7,4 +7,4 @@
 # folder
 
 nomad_local_binary                     = "../../pkg/linux_amd64/nomad"
-nomad_local_binary_client_windows_2016 = "../../pkg/windows_amd64/nomad.exe"
+nomad_local_binary_client_windows_2022 = "../../pkg/windows_amd64/nomad.exe"
diff --git a/e2e/terraform/variables.tf b/e2e/terraform/variables.tf
index f8013eafd..47d2e19c7 100644
--- a/e2e/terraform/variables.tf
+++ b/e2e/terraform/variables.tf
@@ -36,8 +36,8 @@ variable "client_count_linux" {
   default     = "4"
 }
 
-variable "client_count_windows_2016" {
-  description = "The number of windows 2016 clients to provision."
+variable "client_count_windows_2022" {
+  description = "The number of windows 2022 clients to provision."
   default     = "0"
 }
 
@@ -111,7 +111,7 @@ variable "nomad_local_binary_client_ubuntu_jammy" {
   default     = ""
 }
 
-variable "nomad_local_binary_client_windows_2016" {
+variable "nomad_local_binary_client_windows_2022" {
   description = "A path to an alternative binary to deploy to windows clients, to override nomad_local_binary"
   type        = string
   default     = ""

From 5e7ec1b32ce328dfd083a17c40ebb231c6bbc68f Mon Sep 17 00:00:00 2001
From: Allison Larson <allison.larson@hashicorp.com>
Date: Mon, 16 Jun 2025 10:17:28 -0700
Subject: [PATCH 06/32] test: waitForKeyring in SignIdentities test (#26051)

---
 nomad/alloc_endpoint_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 715f31115..20686aad3 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -1780,7 +1780,7 @@ func TestAlloc_SignIdentities_Blocking(t *testing.T) {
 	s1, cleanupS1 := TestServer(t, nil)
 	t.Cleanup(cleanupS1)
 	codec := rpcClient(t, s1)
-	testutil.WaitForLeader(t, s1.RPC)
+	testutil.WaitForKeyring(t, s1.RPC, "global")
 	state := s1.fsm.State()
 
 	node := mock.Node()

From d3e077a78e621ff27b35ef38b46714e623cbc4e6 Mon Sep 17 00:00:00 2001
From: James Rasell <jrasell@users.noreply.github.com>
Date: Tue, 17 Jun 2025 08:13:36 +0100
Subject: [PATCH 07/32] enos: Modify Windows TF variable to match new 2022
 value. (#26067)

---
 enos/enos-scenario-upgrade.hcl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl
index 37c1542a8..da9780788 100644
--- a/enos/enos-scenario-upgrade.hcl
+++ b/enos/enos-scenario-upgrade.hcl
@@ -72,7 +72,7 @@ scenario "upgrade" {
       nomad_local_binary_server = step.copy_initial_binary.binary_path[local.server_os]
       server_count              = var.server_count
       client_count_linux        = local.linux_count
-      client_count_windows_2016 = local.windows_count
+      client_count_windows_2022 = local.windows_count
       nomad_license             = var.nomad_license
       consul_license            = var.consul_license
       volumes                   = false

From b392919b71fc8342b2dbe0c183f1868f63715afa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Jun 2025 13:24:06 +0200
Subject: [PATCH 08/32] chore(deps): bump go.etcd.io/bbolt from 1.4.0 to 1.4.1
 (#26062)

Bumps [go.etcd.io/bbolt](https://github.com/etcd-io/bbolt) from 1.4.0 to 1.4.1.
- [Release notes](https://github.com/etcd-io/bbolt/releases)
- [Commits](https://github.com/etcd-io/bbolt/compare/v1.4.0...v1.4.1)

---
updated-dependencies:
- dependency-name: go.etcd.io/bbolt
  dependency-version: 1.4.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 71da6cefa..255823db3 100644
--- a/go.mod
+++ b/go.mod
@@ -125,7 +125,7 @@ require (
 	github.com/stretchr/testify v1.10.0
 	github.com/zclconf/go-cty v1.16.3
 	github.com/zclconf/go-cty-yaml v1.1.0
-	go.etcd.io/bbolt v1.4.0
+	go.etcd.io/bbolt v1.4.1
 	go.uber.org/goleak v1.3.0
 	golang.org/x/crypto v0.38.0
 	golang.org/x/mod v0.25.0
diff --git a/go.sum b/go.sum
index 7166b5390..623faaef0 100644
--- a/go.sum
+++ b/go.sum
@@ -1657,8 +1657,8 @@ github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN
 github.com/zeebo/errs v1.4.0 h1:XNdoD/RRMKP7HD0UhJnIzUy74ISdGGxURlYG8HSWSfM=
 github.com/zeebo/errs v1.4.0/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4=
 github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
-go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
-go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
+go.etcd.io/bbolt v1.4.1 h1:5mOV+HWjIPLEAlUGMsveaUvK2+byZMFOzojoi7bh7uI=
+go.etcd.io/bbolt v1.4.1/go.mod h1:c8zu2BnXWTu2XM4XcICtbGSl9cFwsXtcf9zLt2OncM8=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

From cced11c6d8f264d777cba09f75e1d7feb8109fc8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Jun 2025 13:37:23 +0200
Subject: [PATCH 09/32] chore(deps): bump github.com/aws/aws-sdk-go-v2/config
 (#26061)

Bumps [github.com/aws/aws-sdk-go-v2/config](https://github.com/aws/aws-sdk-go-v2) from 1.29.15 to 1.29.16.
- [Release notes](https://github.com/aws/aws-sdk-go-v2/releases)
- [Changelog](https://github.com/aws/aws-sdk-go-v2/blob/main/changelog-template.json)
- [Commits](https://github.com/aws/aws-sdk-go-v2/compare/config/v1.29.15...config/v1.29.16)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go-v2/config
  dependency-version: 1.29.16
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 20 ++++++++++----------
 go.sum | 40 ++++++++++++++++++++--------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/go.mod b/go.mod
index 255823db3..7ccb0d04c 100644
--- a/go.mod
+++ b/go.mod
@@ -16,8 +16,8 @@ require (
 	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/Microsoft/go-winio v0.6.2
 	github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
-	github.com/aws/aws-sdk-go-v2/config v1.29.15
-	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30
+	github.com/aws/aws-sdk-go-v2/config v1.29.16
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.31
 	github.com/aws/smithy-go v1.22.3
 	github.com/container-storage-interface/spec v1.11.0
 	github.com/containerd/errdefs v1.0.0
@@ -182,18 +182,18 @@ require (
 	github.com/armon/go-metrics v0.4.1 // indirect
 	github.com/armon/go-radix v1.0.0 // indirect
 	github.com/aws/aws-sdk-go v1.55.6 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.36.3 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.17.68 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.36.4 // indirect
+	github.com/aws/aws-sdk-go-v2/credentials v1.17.69 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ec2 v1.200.0 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ecs v1.53.8 // indirect
 	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect
-	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sts v1.33.20 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.16 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.25.4 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.2 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.33.21 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect
 	github.com/bgentry/speakeasy v0.1.0 // indirect
diff --git a/go.sum b/go.sum
index 623faaef0..deb0744ed 100644
--- a/go.sum
+++ b/go.sum
@@ -731,18 +731,18 @@ github.com/aws/aws-sdk-go v1.30.27/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZve
 github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
 github.com/aws/aws-sdk-go v1.55.6 h1:cSg4pvZ3m8dgYcgqB97MrcdjUmZ1BeMYKUxMMB89IPk=
 github.com/aws/aws-sdk-go v1.55.6/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
-github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM=
-github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg=
-github.com/aws/aws-sdk-go-v2/config v1.29.15 h1:I5XjesVMpDZXZEZonVfjI12VNMrYa38LtLnw4NtY5Ss=
-github.com/aws/aws-sdk-go-v2/config v1.29.15/go.mod h1:tNIp4JIPonlsgaO5hxO372a6gjhN63aSWl2GVl5QoBQ=
-github.com/aws/aws-sdk-go-v2/credentials v1.17.68 h1:cFb9yjI02/sWHBSYXAtkamjzCuRymvmeFmt0TC0MbYY=
-github.com/aws/aws-sdk-go-v2/credentials v1.17.68/go.mod h1:H6E+jBzyqUu8u0vGaU6POkK3P0NylYEeRZ6ynBpMqIk=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q=
+github.com/aws/aws-sdk-go-v2 v1.36.4 h1:GySzjhVvx0ERP6eyfAbAuAXLtAda5TEy19E5q5W8I9E=
+github.com/aws/aws-sdk-go-v2 v1.36.4/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg=
+github.com/aws/aws-sdk-go-v2/config v1.29.16 h1:XkruGnXX1nEZ+Nyo9v84TzsX+nj86icbFAeust6uo8A=
+github.com/aws/aws-sdk-go-v2/config v1.29.16/go.mod h1:uCW7PNjGwZ5cOGZ5jr8vCWrYkGIhPoTNV23Q/tpHKzg=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.69 h1:8B8ZQboRc3uaIKjshve/XlvJ570R7BKNy3gftSbS178=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.69/go.mod h1:gPME6I8grR1jCqBFEGthULiolzf/Sexq/Wy42ibKK9c=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.31 h1:oQWSGexYasNpYp4epLGZxxjsDo8BMBh6iNWkTXQvkwk=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.31/go.mod h1:nc332eGUU+djP3vrMI6blS0woaCfHTe3KiSQUVTMRq0=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35 h1:o1v1VFfPcDVlK3ll1L5xHsaQAFdNtZ5GXnNR7SwueC4=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35/go.mod h1:rZUQNYMNG+8uZxz9FOerQJ+FceCiodXvixpeRtdESrU=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35 h1:R5b82ubO2NntENm3SAm0ADME+H630HomNJdgv+yZ3xw=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35/go.mod h1:FuA+nmgMRfkzVKYDNEqQadvEMxtxl9+RLT9ribCwEMs=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=
 github.com/aws/aws-sdk-go-v2/service/ec2 v1.200.0 h1:3hH6o7Z2WeE1twvz44Aitn6Qz8DZN3Dh5IB4Eh2xq7s=
@@ -751,14 +751,14 @@ github.com/aws/aws-sdk-go-v2/service/ecs v1.53.8 h1:v1OectQdV/L+KSFSiqK00fXGN8Fb
 github.com/aws/aws-sdk-go-v2/service/ecs v1.53.8/go.mod h1:F0DbgxpvuSvtYun5poG67EHLvci4SgzsMVO6SsPUqKk=
 github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE=
 github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY=
-github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 h1:1Gw+9ajCV1jogloEv1RRnvfRFia2cL6c9cuKV2Ps+G8=
-github.com/aws/aws-sdk-go-v2/service/sso v1.25.3/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 h1:hXmVKytPfTy5axZ+fYbR5d0cFmC3JvwLm5kM83luako=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs=
-github.com/aws/aws-sdk-go-v2/service/sts v1.33.20 h1:oIaQ1e17CSKaWmUTu62MtraRWVIosn/iONMuZt0gbqc=
-github.com/aws/aws-sdk-go-v2/service/sts v1.33.20/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.16 h1:/ldKrPPXTC421bTNWrUIpq3CxwHwRI/kpc+jPUTJocM=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.16/go.mod h1:5vkf/Ws0/wgIMJDQbjI4p2op86hNW6Hie5QtebrDgT8=
+github.com/aws/aws-sdk-go-v2/service/sso v1.25.4 h1:EU58LP8ozQDVroOEyAfcq0cGc5R/FTZjVoYJ6tvby3w=
+github.com/aws/aws-sdk-go-v2/service/sso v1.25.4/go.mod h1:CrtOgCcysxMvrCoHnvNAD7PHWclmoFG78Q2xLK0KKcs=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.2 h1:XB4z0hbQtpmBnb1FQYvKaCM7UsS6Y/u8jVBwIUGeCTk=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.2/go.mod h1:hwRpqkRxnQ58J9blRDrB4IanlXCpcKmsC83EhG77upg=
+github.com/aws/aws-sdk-go-v2/service/sts v1.33.21 h1:nyLjs8sYJShFYj6aiyjCBI3EcLn1udWrQTjEF+SOXB0=
+github.com/aws/aws-sdk-go-v2/service/sts v1.33.21/go.mod h1:EhdxtZ+g84MSGrSrHzZiUm9PYiZkrADNja15wtRJSJo=
 github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k=
 github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
 github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=

From 9553eb1f4f4c7a8a35277e0ebdc79ec53f2118b4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Jun 2025 17:24:16 +0200
Subject: [PATCH 10/32] chore(deps): bump github.com/hashicorp/go-discover from
 1.0.0 to 1.1.0 (#26059)

Bumps [github.com/hashicorp/go-discover](https://github.com/hashicorp/go-discover) from 1.0.0 to 1.1.0.
- [Release notes](https://github.com/hashicorp/go-discover/releases)
- [Changelog](https://github.com/hashicorp/go-discover/blob/master/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/go-discover/compare/v1.0.0...v1.1.0)

---
updated-dependencies:
- dependency-name: github.com/hashicorp/go-discover
  dependency-version: 1.1.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 7ccb0d04c..7773721ee 100644
--- a/go.mod
+++ b/go.mod
@@ -53,7 +53,7 @@ require (
 	github.com/hashicorp/go-cleanhttp v0.5.2
 	github.com/hashicorp/go-connlimit v0.3.1
 	github.com/hashicorp/go-cty-funcs v0.0.0-20200930094925-2721b1e36840
-	github.com/hashicorp/go-discover v1.0.0
+	github.com/hashicorp/go-discover v1.1.0
 	github.com/hashicorp/go-envparse v0.1.0
 	github.com/hashicorp/go-getter v1.7.8
 	github.com/hashicorp/go-hclog v1.6.3
diff --git a/go.sum b/go.sum
index deb0744ed..216df820a 100644
--- a/go.sum
+++ b/go.sum
@@ -1153,8 +1153,8 @@ github.com/hashicorp/go-connlimit v0.3.1 h1:v5A31V0FfXNYAtWP6BFtRhs8Nhr650a1HJmw
 github.com/hashicorp/go-connlimit v0.3.1/go.mod h1:Duz6KJRveeIrTMrat9ZxH/FaWOxDUmqDumz4qxGdQVM=
 github.com/hashicorp/go-cty-funcs v0.0.0-20200930094925-2721b1e36840 h1:kgvybwEeu0SXktbB2y3uLHX9lklLo+nzUwh59A3jzQc=
 github.com/hashicorp/go-cty-funcs v0.0.0-20200930094925-2721b1e36840/go.mod h1:Abjk0jbRkDaNCzsRhOv2iDCofYpX1eVsjozoiK63qLA=
-github.com/hashicorp/go-discover v1.0.0 h1:yNkCyetOdCDtuZLyMGmYW7oC/mlRmeQou23wcgmRetM=
-github.com/hashicorp/go-discover v1.0.0/go.mod h1:jqvs0vDZPpnKlN21oG80bwkiIKPGCrmKChV6qItAjI0=
+github.com/hashicorp/go-discover v1.1.0 h1:FN5AXXBCXbEMVq/BYk+qkYRhr+lwYgvBro2hMBUtnlA=
+github.com/hashicorp/go-discover v1.1.0/go.mod h1:jqvs0vDZPpnKlN21oG80bwkiIKPGCrmKChV6qItAjI0=
 github.com/hashicorp/go-discover/provider/gce v0.0.0-20241120163552-5eb1507d16b4 h1:ywaDsVo7n5ko12YD8uXjuQ8G2mQhC2mxAc4Kj3WW3GE=
 github.com/hashicorp/go-discover/provider/gce v0.0.0-20241120163552-5eb1507d16b4/go.mod h1:yxikfLXA8Y5JA3FcFTR720PfqVEFd0dZY9FBpmcsO54=
 github.com/hashicorp/go-envparse v0.1.0 h1:bE++6bhIsNCPLvgDZkYqo3nA+/PFI51pkrHdmPSDFPY=

From b38fef5c9afc61ea7f077ab58fc7140b7d8e21e9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Jun 2025 17:54:37 +0200
Subject: [PATCH 11/32] chore(deps): bump brace-expansion in
 /scripts/screenshots/src (#26069)

Bumps [brace-expansion](https://github.com/juliangruber/brace-expansion) from 1.1.11 to 1.1.12.
- [Release notes](https://github.com/juliangruber/brace-expansion/releases)
- [Commits](https://github.com/juliangruber/brace-expansion/compare/1.1.11...v1.1.12)

---
updated-dependencies:
- dependency-name: brace-expansion
  dependency-version: 1.1.12
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 scripts/screenshots/src/yarn.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/screenshots/src/yarn.lock b/scripts/screenshots/src/yarn.lock
index c346abeef..e7edbdd0d 100644
--- a/scripts/screenshots/src/yarn.lock
+++ b/scripts/screenshots/src/yarn.lock
@@ -39,9 +39,9 @@ bl@^4.0.3:
     readable-stream "^3.4.0"
 
 brace-expansion@^1.1.7:
-  version "1.1.11"
-  resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
-  integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==
+  version "1.1.12"
+  resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.12.tgz#ab9b454466e5a8cc3a187beaad580412a9c5b843"
+  integrity sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==
   dependencies:
     balanced-match "^1.0.0"
     concat-map "0.0.1"

From 3c67ba051678524847c9b3deb30fbbab68925ecf Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Tue, 17 Jun 2025 16:03:50 -0400
Subject: [PATCH 12/32] E2E: update TaskAPI test for Windows (#26074)

The current version of Windows we're using ships with curl, so we don't need to
download it as an artifact anymore. Remove the broken reference to this in the TaskAPI
test for Windows.

Ref: https://github.com/hashicorp/nomad-e2e/actions/runs/15708894856/job/44267973319
---
 e2e/workload_id/input/api-win.nomad.hcl | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/e2e/workload_id/input/api-win.nomad.hcl b/e2e/workload_id/input/api-win.nomad.hcl
index a2076ac8c..43173167d 100644
--- a/e2e/workload_id/input/api-win.nomad.hcl
+++ b/e2e/workload_id/input/api-win.nomad.hcl
@@ -20,10 +20,7 @@ job "api-win" {
       driver = "raw_exec"
       config {
         command = "powershell"
-        args    = ["local/curl-7.87.0_4-win64-mingw/bin/curl.exe -H \"Authorization: Bearer $env:NOMAD_TOKEN\" --unix-socket $env:NOMAD_SECRETS_DIR/api.sock -v localhost:4646/v1/agent/health"]
-      }
-      artifact {
-        source = "https://curl.se/windows/dl-7.87.0_4/curl-7.87.0_4-win64-mingw.zip"
+        args    = ["curl.exe -H \"Authorization: Bearer $env:NOMAD_TOKEN\" --unix-socket $env:NOMAD_SECRETS_DIR/api.sock -v localhost:4646/v1/agent/health"]
       }
       identity {
         env = true

From 976ea854b0f1979e4bb3f0bddd53da7f3b065167 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Wed, 18 Jun 2025 17:03:17 -0400
Subject: [PATCH 13/32] E2E: fix scaling test assertion for extra Windows host
 (#26077)

* E2E: fix scaling test assertion for extra Windows host

The scaling test assumes that all nodes will receive the system job. But the job
can only run on Linux hosts, so the count will be wrong if we're running a
Windows host as part of the cluster. Filter the expected count by the OS.

While we're touching this test, let's also migrate it off the legacy framework.

* address comments from code review
---
 e2e/e2e_test.go                               |   2 +-
 e2e/e2eutil/job.go                            |  14 +
 e2e/scaling/doc.go                            |   8 +
 ...efault_1.nomad => namespace_a_1.nomad.hcl} |   3 +-
 ..._1.nomad => namespace_default_1.nomad.hcl} |   3 -
 ..._2.nomad => namespace_default_2.nomad.hcl} |   2 -
 ..._3.nomad => namespace_default_3.nomad.hcl} |   2 -
 ...mad => namespace_default_system.nomad.hcl} |   6 +-
 e2e/scaling/scaling.go                        | 265 ------------------
 e2e/scaling/scaling_test.go                   | 240 ++++++++++++++++
 10 files changed, 269 insertions(+), 276 deletions(-)
 create mode 100644 e2e/scaling/doc.go
 rename e2e/scaling/input/{namespace_default_1.nomad => namespace_a_1.nomad.hcl} (91%)
 rename e2e/scaling/input/{namespace_a_1.nomad => namespace_default_1.nomad.hcl} (86%)
 rename e2e/scaling/input/{namespace_default_2.nomad => namespace_default_2.nomad.hcl} (91%)
 rename e2e/scaling/input/{namespace_default_3.nomad => namespace_default_3.nomad.hcl} (91%)
 rename e2e/scaling/input/{namespace_default_system.nomad => namespace_default_system.nomad.hcl} (81%)
 delete mode 100644 e2e/scaling/scaling.go
 create mode 100644 e2e/scaling/scaling_test.go

diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go
index 13f548f3c..17242c74e 100644
--- a/e2e/e2e_test.go
+++ b/e2e/e2e_test.go
@@ -23,7 +23,6 @@ import (
 	_ "github.com/hashicorp/nomad/e2e/parameterized"
 	_ "github.com/hashicorp/nomad/e2e/periodic"
 	_ "github.com/hashicorp/nomad/e2e/quotas"
-	_ "github.com/hashicorp/nomad/e2e/scaling"
 	_ "github.com/hashicorp/nomad/e2e/scalingpolicies"
 	_ "github.com/hashicorp/nomad/e2e/scheduler_sysbatch"
 	_ "github.com/hashicorp/nomad/e2e/scheduler_system"
@@ -44,6 +43,7 @@ import (
 	_ "github.com/hashicorp/nomad/e2e/oversubscription"
 	_ "github.com/hashicorp/nomad/e2e/podman"
 	_ "github.com/hashicorp/nomad/e2e/rescheduling"
+	_ "github.com/hashicorp/nomad/e2e/scaling"
 	_ "github.com/hashicorp/nomad/e2e/spread"
 	_ "github.com/hashicorp/nomad/e2e/vaultsecrets"
 	_ "github.com/hashicorp/nomad/e2e/volume_mounts"
diff --git a/e2e/e2eutil/job.go b/e2e/e2eutil/job.go
index 505e8476b..6559c0e58 100644
--- a/e2e/e2eutil/job.go
+++ b/e2e/e2eutil/job.go
@@ -240,6 +240,20 @@ func MaybeCleanupJobsAndGC(jobIDs *[]string) func() {
 	}
 }
 
+// MaybeCleanupNamespacedJobsAndGC stops and purges the list of jobIDs in the namespace and runs a
+// system gc. Returns a func so that the return value can be used
+// in t.Cleanup. Similar to CleanupJobsAndGC, but this one does not assert
+// on a successful stop and gc, which is useful for tests that want to stop and
+// gc the jobs themselves but we want a backup Cleanup just in case.
+func MaybeCleanupNamespacedJobsAndGC(ns string, jobIDs []string) func() {
+	return func() {
+		for _, jobID := range jobIDs {
+			_ = StopJob(jobID, "-namespace", ns, "-purge", "-detach")
+		}
+		_, _ = Command("nomad", "system", "gc")
+	}
+}
+
 // CleanupJobsAndGCWithContext stops and purges the list of jobIDs and runs a
 // system gc. The passed context allows callers to cancel the execution of the
 // cleanup as they desire. This is useful for tests which attempt to remove the
diff --git a/e2e/scaling/doc.go b/e2e/scaling/doc.go
new file mode 100644
index 000000000..b5ee24921
--- /dev/null
+++ b/e2e/scaling/doc.go
@@ -0,0 +1,8 @@
+// Copyright (c) HashiCorp, Inc.
+// SPDX-License-Identifier: BUSL-1.1
+
+// Package scaling provides end-to-end tests for scaling Nomad workloads.
+//
+// In order to run this test suite only, from the e2e directory you can trigger
+// go test -v ./spread
+package scaling
diff --git a/e2e/scaling/input/namespace_default_1.nomad b/e2e/scaling/input/namespace_a_1.nomad.hcl
similarity index 91%
rename from e2e/scaling/input/namespace_default_1.nomad
rename to e2e/scaling/input/namespace_a_1.nomad.hcl
index 445aeb6b1..ed2e8795c 100644
--- a/e2e/scaling/input/namespace_default_1.nomad
+++ b/e2e/scaling/input/namespace_a_1.nomad.hcl
@@ -2,8 +2,7 @@
 # SPDX-License-Identifier: BUSL-1.1
 
 job "horizontally_scalable" {
-  datacenters = ["dc1"]
-  type        = "service"
+  namespace = "NamespaceScalingTestA"
 
   update {
     health_check = "task_states"
diff --git a/e2e/scaling/input/namespace_a_1.nomad b/e2e/scaling/input/namespace_default_1.nomad.hcl
similarity index 86%
rename from e2e/scaling/input/namespace_a_1.nomad
rename to e2e/scaling/input/namespace_default_1.nomad.hcl
index 25363b26e..5febce6d7 100644
--- a/e2e/scaling/input/namespace_a_1.nomad
+++ b/e2e/scaling/input/namespace_default_1.nomad.hcl
@@ -2,9 +2,6 @@
 # SPDX-License-Identifier: BUSL-1.1
 
 job "horizontally_scalable" {
-  datacenters = ["dc1"]
-  type        = "service"
-  namespace   = "NamespaceA"
 
   update {
     health_check = "task_states"
diff --git a/e2e/scaling/input/namespace_default_2.nomad b/e2e/scaling/input/namespace_default_2.nomad.hcl
similarity index 91%
rename from e2e/scaling/input/namespace_default_2.nomad
rename to e2e/scaling/input/namespace_default_2.nomad.hcl
index afe3b8ef4..b14004ca4 100644
--- a/e2e/scaling/input/namespace_default_2.nomad
+++ b/e2e/scaling/input/namespace_default_2.nomad.hcl
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: BUSL-1.1
 
 job "horizontally_scalable" {
-  datacenters = ["dc1"]
-  type        = "service"
 
   update {
     health_check = "task_states"
diff --git a/e2e/scaling/input/namespace_default_3.nomad b/e2e/scaling/input/namespace_default_3.nomad.hcl
similarity index 91%
rename from e2e/scaling/input/namespace_default_3.nomad
rename to e2e/scaling/input/namespace_default_3.nomad.hcl
index b963fcf04..70aa90a56 100644
--- a/e2e/scaling/input/namespace_default_3.nomad
+++ b/e2e/scaling/input/namespace_default_3.nomad.hcl
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: BUSL-1.1
 
 job "horizontally_scalable" {
-  datacenters = ["dc1"]
-  type        = "service"
 
   update {
     health_check = "task_states"
diff --git a/e2e/scaling/input/namespace_default_system.nomad b/e2e/scaling/input/namespace_default_system.nomad.hcl
similarity index 81%
rename from e2e/scaling/input/namespace_default_system.nomad
rename to e2e/scaling/input/namespace_default_system.nomad.hcl
index 75a22af86..773a8aefd 100644
--- a/e2e/scaling/input/namespace_default_system.nomad
+++ b/e2e/scaling/input/namespace_default_system.nomad.hcl
@@ -4,6 +4,11 @@
 job "system_job" {
   type = "system"
 
+  constraint {
+    attribute = "${attr.kernel.name}"
+    value     = "linux"
+  }
+
   group "system_job_group" {
 
     task "system_task" {
@@ -22,4 +27,3 @@ job "system_job" {
     }
   }
 }
-
diff --git a/e2e/scaling/scaling.go b/e2e/scaling/scaling.go
deleted file mode 100644
index 5b3580e03..000000000
--- a/e2e/scaling/scaling.go
+++ /dev/null
@@ -1,265 +0,0 @@
-// Copyright (c) HashiCorp, Inc.
-// SPDX-License-Identifier: BUSL-1.1
-
-package scaling
-
-import (
-	"os"
-
-	"github.com/hashicorp/nomad/api"
-	"github.com/hashicorp/nomad/e2e/e2eutil"
-	"github.com/hashicorp/nomad/e2e/framework"
-	"github.com/hashicorp/nomad/helper/pointer"
-	"github.com/hashicorp/nomad/helper/uuid"
-	"github.com/hashicorp/nomad/nomad/structs"
-)
-
-type ScalingE2ETest struct {
-	framework.TC
-	namespaceIDs     []string
-	namespacedJobIDs [][2]string
-}
-
-func init() {
-	framework.AddSuites(&framework.TestSuite{
-		Component:   "Scaling",
-		CanRunLocal: true,
-		Cases: []framework.TestCase{
-			new(ScalingE2ETest),
-		},
-	})
-}
-
-func (tc *ScalingE2ETest) BeforeAll(f *framework.F) {
-	e2eutil.WaitForLeader(f.T(), tc.Nomad())
-	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
-}
-
-func (tc *ScalingE2ETest) AfterEach(f *framework.F) {
-	if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
-		return
-	}
-
-	for _, namespacedJob := range tc.namespacedJobIDs {
-		err := e2eutil.StopJob(namespacedJob[1], "-purge", "-namespace",
-			namespacedJob[0])
-		f.NoError(err)
-	}
-	tc.namespacedJobIDs = [][2]string{}
-
-	for _, ns := range tc.namespaceIDs {
-		_, err := e2eutil.Command("nomad", "namespace", "delete", ns)
-		f.NoError(err)
-	}
-	tc.namespaceIDs = []string{}
-
-	_, err := e2eutil.Command("nomad", "system", "gc")
-	f.NoError(err)
-}
-
-// TestScalingBasic performs basic scaling e2e tests within a single namespace.
-func (tc *ScalingE2ETest) TestScalingBasic(f *framework.F) {
-	defaultNS := "default"
-
-	// Register a job with a scaling policy. The group doesn't include the
-	// count parameter, therefore Nomad should dynamically set this value to
-	// the policy min.
-	jobID := "test-scaling-" + uuid.Generate()[0:8]
-	f.NoError(e2eutil.Register(jobID, "scaling/input/namespace_default_1.nomad"))
-	tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{defaultNS, jobID})
-	f.NoError(e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running"}),
-		"job should be running with 2 allocs")
-
-	// Ensure we wait for the deployment to finish, otherwise scaling will
-	// fail.
-	f.NoError(e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
-
-	// Simple scaling action.
-	testMeta := map[string]interface{}{"scaling-e2e-test": "value"}
-	scaleResp, _, err := tc.Nomad().Jobs().Scale(
-		jobID, "horizontally_scalable", pointer.Of(3),
-		"Nomad e2e testing", false, testMeta, nil)
-	f.NoError(err)
-	f.NotEmpty(scaleResp.EvalID)
-	f.NoError(e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running", "running"}),
-		"job should be running with 3 allocs")
-
-	// Ensure we wait for the deployment to finish, otherwise scaling will
-	// fail for this reason.
-	f.NoError(e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
-
-	// Attempt break break the policy min/max parameters.
-	_, _, err = tc.Nomad().Jobs().Scale(
-		jobID, "horizontally_scalable", pointer.Of(4),
-		"Nomad e2e testing", false, nil, nil)
-	f.Error(err)
-	_, _, err = tc.Nomad().Jobs().Scale(
-		jobID, "horizontally_scalable", pointer.Of(1),
-		"Nomad e2e testing", false, nil, nil)
-	f.Error(err)
-
-	// Check the scaling events.
-	statusResp, _, err := tc.Nomad().Jobs().ScaleStatus(jobID, nil)
-	f.NoError(err)
-	f.Len(statusResp.TaskGroups["horizontally_scalable"].Events, 1)
-	f.Equal(testMeta, statusResp.TaskGroups["horizontally_scalable"].Events[0].Meta)
-
-	// Remove the job.
-	_, _, err = tc.Nomad().Jobs().Deregister(jobID, true, nil)
-	f.NoError(err)
-	f.NoError(tc.Nomad().System().GarbageCollect())
-	tc.namespacedJobIDs = [][2]string{}
-
-	// Attempt job registrations where the group count violates the policy
-	// min/max parameters.
-	f.Error(e2eutil.Register(jobID, "scaling/input/namespace_default_2.nomad"))
-	f.Error(e2eutil.Register(jobID, "scaling/input/namespace_default_3.nomad"))
-}
-
-// TestScalingNamespaces runs tests to ensure the job scaling endpoint adheres
-// to Nomad's basic namespace principles.
-func (tc *ScalingE2ETest) TestScalingNamespaces(f *framework.F) {
-
-	defaultNS := "default"
-	ANS := "NamespaceA"
-
-	// Create our non-default namespace.
-	_, err := e2eutil.Command("nomad", "namespace", "apply", ANS)
-	f.NoError(err, "could not create namespace")
-	tc.namespaceIDs = append(tc.namespaceIDs, ANS)
-
-	defaultJobID := "test-scaling-default-" + uuid.Generate()[0:8]
-	aJobID := "test-scaling-a-" + uuid.Generate()[0:8]
-
-	// Register and wait for the job deployments to succeed.
-	f.NoError(e2eutil.Register(defaultJobID, "scaling/input/namespace_default_1.nomad"))
-	f.NoError(e2eutil.Register(aJobID, "scaling/input/namespace_a_1.nomad"))
-	f.NoError(e2eutil.WaitForLastDeploymentStatus(defaultJobID, defaultNS, "successful", nil))
-	f.NoError(e2eutil.WaitForLastDeploymentStatus(aJobID, ANS, "successful", nil))
-
-	tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{defaultNS, defaultJobID})
-	tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{ANS, aJobID})
-
-	// Setup the WriteOptions for each namespace.
-	defaultWriteOpts := api.WriteOptions{Namespace: defaultNS}
-	aWriteOpts := api.WriteOptions{Namespace: ANS}
-
-	// We shouldn't be able to trigger scaling across the namespace boundary.
-	_, _, err = tc.Nomad().Jobs().Scale(
-		defaultJobID, "horizontally_scalable", pointer.Of(3),
-		"Nomad e2e testing", false, nil, &aWriteOpts)
-	f.Error(err)
-	_, _, err = tc.Nomad().Jobs().Scale(
-		aJobID, "horizontally_scalable", pointer.Of(3),
-		"Nomad e2e testing", false, nil, &defaultWriteOpts)
-	f.Error(err)
-
-	// We should be able to trigger scaling when using the correct namespace,
-	// duh.
-	_, _, err = tc.Nomad().Jobs().Scale(
-		defaultJobID, "horizontally_scalable", pointer.Of(3),
-		"Nomad e2e testing", false, nil, &defaultWriteOpts)
-	f.NoError(err)
-	_, _, err = tc.Nomad().Jobs().Scale(
-		aJobID, "horizontally_scalable", pointer.Of(3),
-		"Nomad e2e testing", false, nil, &aWriteOpts)
-	f.NoError(err)
-}
-
-// TestScalingBasic performs basic scaling e2e tests within a single namespace using
-// using a SystemScheduler.
-func (tc *ScalingE2ETest) TestScalingBasicWithSystemSchedule(f *framework.F) {
-	t := f.T()
-	nomadClient := tc.Nomad()
-
-	// Register a system job with a scaling policy without a group count, it should
-	// default to 1 per node.
-
-	jobID := "test-scaling-" + uuid.Generate()[0:8]
-	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scaling/input/namespace_default_system.nomad", jobID, "")
-
-	jobs := nomadClient.Jobs()
-	initialAllocs, _, err := jobs.Allocations(jobID, true, nil)
-	f.NoError(err)
-
-	nodeStubList, _, err := nomadClient.Nodes().List(&api.QueryOptions{Namespace: "default"})
-	f.NoError(err)
-
-	// A system job will spawn an allocation per node, we need to know how many nodes
-	// there are to know how many allocations to expect.
-	numberOfNodes := len(nodeStubList)
-
-	f.Equal(numberOfNodes, len(initialAllocs))
-	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(initialAllocs)
-
-	// Wait for allocations to get past initial pending state
-	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
-
-	// Try to scale beyond 1
-	testMeta := map[string]interface{}{"scaling-e2e-test": "value"}
-	scaleResp, _, err := tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(3),
-		"Nomad e2e testing", false, testMeta, nil)
-
-	f.Error(err)
-	f.Nil(scaleResp)
-
-	// The same allocs should be running.
-	jobs = nomadClient.Jobs()
-	allocs1, _, err := jobs.Allocations(jobID, true, nil)
-	f.NoError(err)
-
-	f.Equal(len(initialAllocs), len(allocs1))
-
-	for i, a := range allocs1 {
-		f.Equal(a.ID, initialAllocs[i].ID)
-	}
-
-	// Scale down to 0
-	testMeta = map[string]interface{}{"scaling-e2e-test": "value"}
-	scaleResp, _, err = tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(0),
-		"Nomad e2e testing", false, testMeta, nil)
-	f.NoError(err)
-	f.NotEmpty(scaleResp.EvalID)
-
-	// Assert job is still up but no allocs are running
-	stopedAllocs, _, err := jobs.Allocations(jobID, false, nil)
-	f.NoError(err)
-
-	f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, stopedAllocs)))
-	f.Equal(numberOfNodes, len(stopedAllocs))
-
-	// Scale up to 1 again
-	testMeta = map[string]interface{}{"scaling-e2e-test": "value"}
-	scaleResp, _, err = tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(1),
-		"Nomad e2e testing", false, testMeta, nil)
-	f.NoError(err)
-	f.NotEmpty(scaleResp.EvalID)
-
-	// Wait for new allocation to get past initial pending state
-	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
-
-	// Assert job is still running and there is a running allocation again
-	allocs, _, err := jobs.Allocations(jobID, true, nil)
-	f.NoError(err)
-	f.Equal(numberOfNodes*2, len(allocs))
-
-	f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, allocs)))
-	f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusRun, allocs)))
-
-	// Remove the job.
-	_, _, err = tc.Nomad().Jobs().Deregister(jobID, true, nil)
-	f.NoError(err)
-	f.NoError(tc.Nomad().System().GarbageCollect())
-}
-
-func filterAllocsByDesiredStatus(status string, allocs []*api.AllocationListStub) []*api.AllocationListStub {
-	res := []*api.AllocationListStub{}
-
-	for _, a := range allocs {
-		if a.DesiredStatus == status {
-			res = append(res, a)
-		}
-	}
-
-	return res
-}
diff --git a/e2e/scaling/scaling_test.go b/e2e/scaling/scaling_test.go
new file mode 100644
index 000000000..5ab9f0468
--- /dev/null
+++ b/e2e/scaling/scaling_test.go
@@ -0,0 +1,240 @@
+// Copyright (c) HashiCorp, Inc.
+// SPDX-License-Identifier: BUSL-1.1
+
+package scaling
+
+import (
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/api"
+	"github.com/hashicorp/nomad/e2e/e2eutil"
+	"github.com/hashicorp/nomad/e2e/v3/cluster3"
+	"github.com/hashicorp/nomad/helper/pointer"
+	"github.com/hashicorp/nomad/helper/uuid"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/shoenig/test/must"
+	"github.com/shoenig/test/wait"
+)
+
+const defaultNS = "default"
+
+func TestScaling(t *testing.T) {
+	cluster3.Establish(t,
+		cluster3.Leader(),
+		cluster3.LinuxClients(1),
+		cluster3.Timeout(3*time.Second),
+	)
+
+	// Run our test cases.
+	t.Run("TestScaling_Basic", testScalingBasic)
+	t.Run("TestScaling_Namespaces", testScalingNamespaces)
+	t.Run("TestScaling_System", testScalingSystemJob)
+}
+
+func testScalingBasic(t *testing.T) {
+	nomad := e2eutil.NomadClient(t)
+
+	jobID := "scaling-basic-" + uuid.Short()
+	jobIDs := []string{jobID}
+	t.Cleanup(e2eutil.MaybeCleanupJobsAndGC(&jobIDs))
+
+	// start job
+	allocs := e2eutil.RegisterAndWaitForAllocs(t,
+		nomad, "./input/namespace_default_1.nomad.hcl", jobID, "")
+	must.Len(t, 2, allocs, must.Sprint("expected 2 allocs"))
+
+	// Ensure we wait for the deployment to finish, otherwise scaling will fail.
+	must.NoError(t, e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
+
+	// Simple scaling action.
+	testMeta := map[string]any{"scaling-e2e-test": "value"}
+	scaleResp, _, err := nomad.Jobs().Scale(
+		jobID, "horizontally_scalable", pointer.Of(3),
+		"Nomad e2e testing", false, testMeta, nil)
+	must.NoError(t, err)
+	must.NotEq(t, "", scaleResp.EvalID)
+	must.NoError(t, e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running", "running"}),
+		must.Sprint("job should be running with 3 allocs"))
+
+	// Ensure we wait for the deployment to finish, otherwise scaling will
+	// fail for this reason.
+	must.NoError(t, e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
+
+	// Attempt break break the policy min/max parameters.
+	_, _, err = nomad.Jobs().Scale(
+		jobID, "horizontally_scalable", pointer.Of(4),
+		"Nomad e2e testing", false, nil, nil)
+	must.ErrorContains(t, err, "group count was greater than scaling policy maximum")
+	_, _, err = nomad.Jobs().Scale(
+		jobID, "horizontally_scalable", pointer.Of(1),
+		"Nomad e2e testing", false, nil, nil)
+	must.ErrorContains(t, err, "group count was less than scaling policy minimum")
+
+	// Check the scaling events.
+	statusResp, _, err := nomad.Jobs().ScaleStatus(jobID, nil)
+	must.NoError(t, err)
+	must.Len(t, 1, statusResp.TaskGroups["horizontally_scalable"].Events)
+	must.Eq(t, testMeta, statusResp.TaskGroups["horizontally_scalable"].Events[0].Meta)
+
+	// Remove the job.
+	_, _, err = nomad.Jobs().Deregister(jobID, true, nil)
+	must.NoError(t, err)
+	must.NoError(t, nomad.System().GarbageCollect())
+
+	// Attempt job registrations where the group count violates the policy
+	// min/max parameters.
+	err = e2eutil.Register(jobID, "input/namespace_default_2.nomad.hcl")
+	must.ErrorContains(t, err, "task group count must not be greater than maximum count")
+	must.Error(t, e2eutil.Register(jobID, "input/namespace_default_3.nomad.hcl"))
+}
+
+func testScalingNamespaces(t *testing.T) {
+	nomad := e2eutil.NomadClient(t)
+
+	// Create our non-default namespace.
+	ANS := "NamespaceScalingTestA"
+	_, err := e2eutil.Command("nomad", "namespace", "apply", ANS)
+	must.NoError(t, err, must.Sprint("could not create namespace"))
+	e2eutil.CleanupCommand(t, "nomad namespace delete %s", ANS)
+
+	defaultJobID := "test-scaling-default-" + uuid.Generate()[0:8]
+	aJobID := "test-scaling-a-" + uuid.Generate()[0:8]
+
+	// Register and wait for the job deployments to succeed.
+	must.NoError(t, e2eutil.Register(defaultJobID, "input/namespace_default_1.nomad.hcl"))
+	must.NoError(t, e2eutil.Register(aJobID, "input/namespace_a_1.nomad.hcl"))
+	must.NoError(t, e2eutil.WaitForLastDeploymentStatus(defaultJobID, defaultNS, "successful", nil))
+	must.NoError(t, e2eutil.WaitForLastDeploymentStatus(aJobID, ANS, "successful", nil))
+
+	t.Cleanup(e2eutil.MaybeCleanupNamespacedJobsAndGC(ANS, []string{aJobID}))
+	t.Cleanup(e2eutil.MaybeCleanupJobsAndGC(&[]string{defaultJobID}))
+
+	// Setup the WriteOptions for each namespace.
+	defaultWriteOpts := api.WriteOptions{Namespace: defaultNS}
+	aWriteOpts := api.WriteOptions{Namespace: ANS}
+
+	// We shouldn't be able to trigger scaling across the namespace boundary.
+	_, _, err = nomad.Jobs().Scale(
+		defaultJobID, "horizontally_scalable", pointer.Of(3),
+		"Nomad e2e testing", false, nil, &aWriteOpts)
+	must.ErrorContains(t, err, "not found")
+	_, _, err = nomad.Jobs().Scale(
+		aJobID, "horizontally_scalable", pointer.Of(3),
+		"Nomad e2e testing", false, nil, &defaultWriteOpts)
+	must.ErrorContains(t, err, "not found")
+
+	// We should be able to trigger scaling when using the correct namespace,
+	// duh.
+	_, _, err = nomad.Jobs().Scale(
+		defaultJobID, "horizontally_scalable", pointer.Of(3),
+		"Nomad e2e testing", false, nil, &defaultWriteOpts)
+	must.NoError(t, err)
+	_, _, err = nomad.Jobs().Scale(
+		aJobID, "horizontally_scalable", pointer.Of(3),
+		"Nomad e2e testing", false, nil, &aWriteOpts)
+	must.NoError(t, err)
+}
+
+func testScalingSystemJob(t *testing.T) {
+	nomad := e2eutil.NomadClient(t)
+
+	// Register a system job with a scaling policy without a group count, it
+	// should default to 1 per node.
+
+	jobID := "test-scaling-" + uuid.Generate()[0:8]
+	e2eutil.RegisterAndWaitForAllocs(t, nomad,
+		"input/namespace_default_system.nomad.hcl", jobID, "")
+
+	t.Cleanup(e2eutil.CleanupJobsAndGC(t, &[]string{jobID}))
+
+	jobs := nomad.Jobs()
+	initialAllocs, _, err := jobs.Allocations(jobID, true, nil)
+	must.NoError(t, err)
+
+	// A system job will spawn an allocation per feasible node, we need to know
+	// how many nodes there are to know how many allocations to expect.
+	nodeStubList, _, err := nomad.Nodes().List(
+		&api.QueryOptions{
+			Namespace: "default",
+			Params:    map[string]string{"os": "true"},
+			Filter:    `Attributes["os.name"] == "ubuntu"`,
+		})
+	must.NoError(t, err)
+	numberOfNodes := len(nodeStubList)
+
+	must.Len(t, numberOfNodes, initialAllocs)
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(initialAllocs)
+
+	// Wait for allocations to get past initial pending state
+	e2eutil.WaitForAllocsNotPending(t, nomad, allocIDs)
+
+	// Try to scale beyond 1
+	testMeta := map[string]any{"scaling-e2e-test": "value"}
+	scaleResp, _, err := nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(3),
+		"Nomad e2e testing", false, testMeta, nil)
+
+	must.ErrorContains(t, err, "can only be scaled between 0 and 1")
+	must.Nil(t, scaleResp)
+
+	// The same allocs should be running.
+	jobs = nomad.Jobs()
+	allocs1, _, err := jobs.Allocations(jobID, true, nil)
+	must.NoError(t, err)
+
+	must.Eq(t, len(initialAllocs), len(allocs1))
+	for i, a := range allocs1 {
+		must.Eq(t, a.ID, initialAllocs[i].ID)
+	}
+
+	// Scale down to 0
+	testMeta = map[string]any{"scaling-e2e-test": "value"}
+	scaleResp, _, err = nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(0),
+		"Nomad e2e testing", false, testMeta, nil)
+	must.NoError(t, err)
+	must.NotEq(t, "", scaleResp.EvalID)
+
+	// Wait until allocs all stop
+	must.Wait(t, wait.InitialSuccess(
+		wait.BoolFunc(func() bool {
+			allocs, _, err := jobs.Allocations(jobID, false, nil)
+			must.NoError(t, err)
+			stoppedAllocs := filterAllocsByDesiredStatus(
+				structs.AllocDesiredStatusStop, allocs)
+			return len(stoppedAllocs) == numberOfNodes
+		}),
+		wait.Timeout(10*time.Second),
+		wait.Gap(100*time.Millisecond),
+	), must.Sprint("allocs did not stop"))
+
+	// Scale up to 1 again
+	testMeta = map[string]any{"scaling-e2e-test": "value"}
+	scaleResp, _, err = nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(1),
+		"Nomad e2e testing", false, testMeta, nil)
+	must.NoError(t, err)
+	must.NotEq(t, "", scaleResp.EvalID)
+
+	// Wait for new allocation to get past initial pending state
+	e2eutil.WaitForAllocsNotPending(t, nomad, allocIDs)
+
+	// Assert job is still running and there is a running allocation again
+	allocs, _, err := jobs.Allocations(jobID, true, nil)
+	must.NoError(t, err)
+	must.Len(t, numberOfNodes*2, allocs)
+	must.Len(t, numberOfNodes,
+		filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, allocs))
+	must.Len(t, numberOfNodes,
+		filterAllocsByDesiredStatus(structs.AllocDesiredStatusRun, allocs))
+}
+
+func filterAllocsByDesiredStatus(status string, allocs []*api.AllocationListStub) []*api.AllocationListStub {
+	res := []*api.AllocationListStub{}
+
+	for _, a := range allocs {
+		if a.DesiredStatus == status {
+			res = append(res, a)
+		}
+	}
+
+	return res
+}

From 7bfc04576a32bc9635ac757ea699a2e0e10a2cb2 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Wed, 18 Jun 2025 17:03:32 -0400
Subject: [PATCH 14/32] E2E: disable sdnotify for Consul agents (#26078)

In our E2E environment we've seen some flakiness with the Consul-related
tests. As it turns out, the Consul agents are getting restarted every 90s or so
because they're timing out their systemd notification.

> consul.service: start operation timed out. Terminating.

This appears to be a known issue in Consul and we'll try to contribute some help
to hunt down the cause if they want help, but in the meantime let's remove it
from our systemd unit files for the Consul agents.

Ref: https://github.com/hashicorp/consul/issues/16844#issuecomment-1913282248
---
 e2e/terraform/packer/ubuntu-jammy-amd64/consul.service     | 7 ++++---
 .../provision-nomad/etc/consul.d/consul.service            | 1 -
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/e2e/terraform/packer/ubuntu-jammy-amd64/consul.service b/e2e/terraform/packer/ubuntu-jammy-amd64/consul.service
index 5e82288c2..2f1e9f24e 100644
--- a/e2e/terraform/packer/ubuntu-jammy-amd64/consul.service
+++ b/e2e/terraform/packer/ubuntu-jammy-amd64/consul.service
@@ -6,11 +6,12 @@ After=network-online.target
 [Service]
 Restart=on-failure
 Environment=CONSUL_ALLOW_PRIVILEGED_PORTS=true
-ExecStart=/usr/local/bin/consul agent -config-dir="/etc/consul.d"
+WorkingDirectory=/etc/consul.d
+ExecStart=/usr/bin/consul agent -config-dir="/etc/consul.d"
 ExecReload=/bin/kill -HUP $MAINPID
 KillSignal=SIGTERM
-User=root
-Group=root
+User=consul
+Group=consul
 
 [Install]
 WantedBy=multi-user.target
diff --git a/e2e/terraform/provision-infra/provision-nomad/etc/consul.d/consul.service b/e2e/terraform/provision-infra/provision-nomad/etc/consul.d/consul.service
index 56ecccb8c..2f1e9f24e 100644
--- a/e2e/terraform/provision-infra/provision-nomad/etc/consul.d/consul.service
+++ b/e2e/terraform/provision-infra/provision-nomad/etc/consul.d/consul.service
@@ -4,7 +4,6 @@ Requires=network-online.target
 After=network-online.target
 
 [Service]
-Type=notify
 Restart=on-failure
 Environment=CONSUL_ALLOW_PRIVILEGED_PORTS=true
 WorkingDirectory=/etc/consul.d

From c8dcd3c2dbee57543e5f30667a99085fd35653e7 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Thu, 19 Jun 2025 13:48:06 -0400
Subject: [PATCH 15/32] docker: clamp CPU shares to minimum of 2 (#26081)

In #25963 we added normalization of CPU shares for large hosts where the total
compute was larger than the maximum CPU shares. But if the result after
normalization is less than 2, runc will have an integer overflow. We prevent
this in the shared executor for the `exec`/`rawexec` driver by clamping to the
safe minimum value. Do this for the `docker` driver as well and add test
coverage of it for the shared executor too.

Fixes: https://github.com/hashicorp/nomad/issues/26080
Ref: https://github.com/hashicorp/nomad/pull/25963
---
 .changelog/26081.txt                           |  3 +++
 drivers/docker/driver.go                       | 10 +++++++++-
 drivers/docker/driver_linux_test.go            |  6 ++++++
 drivers/shared/executor/executor_linux_test.go |  3 +++
 4 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 .changelog/26081.txt

diff --git a/.changelog/26081.txt b/.changelog/26081.txt
new file mode 100644
index 000000000..16259f821
--- /dev/null
+++ b/.changelog/26081.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+docker: Fixed a bug where very low resources.cpu values could generate invalid cpu weights on hosts with very large client.cpu_total_compute values
+```
diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go
index 102d0947d..6fb6a6413 100644
--- a/drivers/docker/driver.go
+++ b/drivers/docker/driver.go
@@ -951,17 +951,25 @@ func memoryLimits(driverHardLimitMB int64, taskMemory drivers.MemoryResources) (
 // maxCPUShares is the maximum value for cpu_shares in cgroups v1
 // https://github.com/torvalds/linux/blob/v6.15/kernel/sched/sched.h#L503
 const maxCPUShares = 262_144
+const minCPUShares = 2
 
 // cpuResources normalizes the requested CPU shares when the total compute
 // available on the node is larger than the largest share value allowed by the
 // kernel. On cgroups v2, Docker will re-normalize this to be within the
 // acceptable range for cpu.weight [1-10000].
 func (d *Driver) cpuResources(requested int64) int64 {
+	if requested < minCPUShares {
+		return minCPUShares
+	}
 	if d.compute.TotalCompute < maxCPUShares {
 		return requested
 	}
 
-	return int64(float64(requested) / float64(d.compute.TotalCompute) * maxCPUShares)
+	result := int64(float64(requested) / float64(d.compute.TotalCompute) * maxCPUShares)
+	if result < minCPUShares {
+		return minCPUShares
+	}
+	return result
 }
 
 func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
diff --git a/drivers/docker/driver_linux_test.go b/drivers/docker/driver_linux_test.go
index fe79fece5..3f6d3bd19 100644
--- a/drivers/docker/driver_linux_test.go
+++ b/drivers/docker/driver_linux_test.go
@@ -129,6 +129,12 @@ func TestDockerDriver_NormalizeCPUShares(t *testing.T) {
 	driver.compute.TotalCompute = maxCPUShares + 1
 	must.Eq(t, 262143, driver.cpuResources(maxCPUShares))
 
+	driver.compute.TotalCompute = maxCPUShares + 1
+	must.Eq(t, 2, driver.cpuResources(2))
+
+	driver.compute.TotalCompute = maxCPUShares + 1
+	must.Eq(t, 2, driver.cpuResources(1))
+
 	driver.compute.TotalCompute = maxCPUShares * 2
 	must.Eq(t, 500, driver.cpuResources(1000))
 	must.Eq(t, maxCPUShares/2, driver.cpuResources(maxCPUShares))
diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go
index 438311f67..9dc94487f 100644
--- a/drivers/shared/executor/executor_linux_test.go
+++ b/drivers/shared/executor/executor_linux_test.go
@@ -1089,6 +1089,9 @@ func TestExecutor_clampCPUShares(t *testing.T) {
 	le.compute.TotalCompute = MaxCPUShares + 1
 	must.Eq(t, 262143, le.clampCpuShares(MaxCPUShares))
 
+	le.compute.TotalCompute = MaxCPUShares + 1
+	must.Eq(t, 2, le.clampCpuShares(1))
+
 	le.compute = cpustats.Compute{TotalCompute: MaxCPUShares * 2}
 	must.Eq(t, 500, le.clampCpuShares(1000))
 	must.Eq(t, MaxCPUShares/2, le.clampCpuShares(MaxCPUShares))

From b82fd2e159adf9a872e4c61a2bd6e7a468b3dbe9 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 20 Jun 2025 07:37:16 +0200
Subject: [PATCH 16/32] scheduler: refactor cluster reconciler to avoid hidden
 state mutation (#26042)

Cluster reconciler code is notoriously hard to follow because most of its
method continuously mutate the fields of the allocReconciler object. Even
for top-level methods it makes the code hard to follow, but gets really gnarly
with lower-level methods (of which there are many). This changeset proposes a
refactoring that makes the vast majority of said methods return explicit values,
and avoid mutating object fields.
---
 scheduler/generic_sched.go                    |  48 +-
 scheduler/reconciler/allocs.go                | 255 ------
 scheduler/reconciler/allocs_test.go           | 156 ++--
 scheduler/reconciler/filters.go               | 305 ++++++++
 scheduler/reconciler/reconcile_cluster.go     | 733 ++++++++++--------
 .../reconciler/reconcile_cluster_test.go      | 416 ++++------
 6 files changed, 976 insertions(+), 937 deletions(-)
 create mode 100644 scheduler/reconciler/filters.go

diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 15fad0df4..566eb5d94 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -340,51 +340,55 @@ func (s *GenericScheduler) computeJobAllocs() error {
 
 	r := reconciler.NewAllocReconciler(s.logger,
 		genericAllocUpdateFn(s.ctx, s.stack, s.eval.ID),
-		s.batch, s.eval.JobID, s.job, s.deployment, allocs, tainted, s.eval.ID,
-		s.eval.Priority, s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true))
-	r.Compute()
-	s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", r.Result))
+		s.batch, s.eval.JobID, s.job, s.deployment, allocs, s.eval.ID,
+		s.eval.Priority, reconciler.ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true),
+			Now:                         time.Now().UTC(),
+		})
+	result := r.Compute()
+	s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", result))
 
 	if s.eval.AnnotatePlan {
 		s.plan.Annotations = &structs.PlanAnnotations{
-			DesiredTGUpdates: r.Result.DesiredTGUpdates,
+			DesiredTGUpdates: result.DesiredTGUpdates,
 		}
 	}
 
 	// Add the deployment changes to the plan
-	s.plan.Deployment = r.Result.Deployment
-	s.plan.DeploymentUpdates = r.Result.DeploymentUpdates
+	s.plan.Deployment = result.Deployment
+	s.plan.DeploymentUpdates = result.DeploymentUpdates
 
 	// Store all the follow up evaluations from rescheduled allocations
-	if len(r.Result.DesiredFollowupEvals) > 0 {
-		for _, evals := range r.Result.DesiredFollowupEvals {
+	if len(result.DesiredFollowupEvals) > 0 {
+		for _, evals := range result.DesiredFollowupEvals {
 			s.followUpEvals = append(s.followUpEvals, evals...)
 		}
 	}
 
 	// Update the stored deployment
-	if r.Result.Deployment != nil {
-		s.deployment = r.Result.Deployment
+	if result.Deployment != nil {
+		s.deployment = result.Deployment
 	}
 
 	// Handle the stop
-	for _, stop := range r.Result.Stop {
+	for _, stop := range result.Stop {
 		s.plan.AppendStoppedAlloc(stop.Alloc, stop.StatusDescription, stop.ClientStatus, stop.FollowupEvalID)
 	}
 
 	// Handle disconnect updates
-	for _, update := range r.Result.DisconnectUpdates {
+	for _, update := range result.DisconnectUpdates {
 		s.plan.AppendUnknownAlloc(update)
 	}
 
 	// Handle reconnect updates.
 	// Reconnected allocs have a new AllocState entry.
-	for _, update := range r.Result.ReconnectUpdates {
+	for _, update := range result.ReconnectUpdates {
 		s.ctx.Plan().AppendAlloc(update, nil)
 	}
 
 	// Handle the in-place updates
-	for _, update := range r.Result.InplaceUpdate {
+	for _, update := range result.InplaceUpdate {
 		if update.DeploymentID != s.deployment.GetID() {
 			update.DeploymentID = s.deployment.GetID()
 			update.DeploymentStatus = nil
@@ -393,12 +397,12 @@ func (s *GenericScheduler) computeJobAllocs() error {
 	}
 
 	// Handle the annotation updates
-	for _, update := range r.Result.AttributeUpdates {
+	for _, update := range result.AttributeUpdates {
 		s.ctx.Plan().AppendAlloc(update, nil)
 	}
 
 	// Nothing remaining to do if placement is not required
-	if len(r.Result.Place)+len(r.Result.DestructiveUpdate) == 0 {
+	if len(result.Place)+len(result.DestructiveUpdate) == 0 {
 		// If the job has been purged we don't have access to the job. Otherwise
 		// set the queued allocs to zero. This is true if the job is being
 		// stopped as well.
@@ -411,18 +415,18 @@ func (s *GenericScheduler) computeJobAllocs() error {
 	}
 
 	// Compute the placements
-	place := make([]reconciler.PlacementResult, 0, len(r.Result.Place))
-	for _, p := range r.Result.Place {
+	place := make([]reconciler.PlacementResult, 0, len(result.Place))
+	for _, p := range result.Place {
 		s.queuedAllocs[p.TaskGroup().Name] += 1
 		place = append(place, p)
 	}
 
-	destructive := make([]reconciler.PlacementResult, 0, len(r.Result.DestructiveUpdate))
-	for _, p := range r.Result.DestructiveUpdate {
+	destructive := make([]reconciler.PlacementResult, 0, len(result.DestructiveUpdate))
+	for _, p := range result.DestructiveUpdate {
 		s.queuedAllocs[p.TaskGroup().Name] += 1
 		destructive = append(destructive, p)
 	}
-	return s.computePlacements(destructive, place, r.Result.TaskGroupAllocNameIndexes)
+	return s.computePlacements(destructive, place, result.TaskGroupAllocNameIndexes)
 }
 
 // downgradedJobForPlacement returns the previous stable version of the job for
diff --git a/scheduler/reconciler/allocs.go b/scheduler/reconciler/allocs.go
index d5efad6c9..9ddba1046 100644
--- a/scheduler/reconciler/allocs.go
+++ b/scheduler/reconciler/allocs.go
@@ -224,235 +224,6 @@ func (a allocSet) fromKeys(keys ...[]string) allocSet {
 	return from
 }
 
-// filterByTainted takes a set of tainted nodes and filters the allocation set
-// into the following groups:
-// 1. Those that exist on untainted nodes
-// 2. Those exist on nodes that are draining
-// 3. Those that exist on lost nodes or have expired
-// 4. Those that are on nodes that are disconnected, but have not had their ClientState set to unknown
-// 5. Those that are on a node that has reconnected.
-// 6. Those that are in a state that results in a noop.
-func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverSupportsDisconnectedClients bool, now time.Time) (untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring allocSet) {
-	untainted = make(map[string]*structs.Allocation)
-	migrate = make(map[string]*structs.Allocation)
-	lost = make(map[string]*structs.Allocation)
-	disconnecting = make(map[string]*structs.Allocation)
-	reconnecting = make(map[string]*structs.Allocation)
-	ignore = make(map[string]*structs.Allocation)
-	expiring = make(map[string]*structs.Allocation)
-
-	for _, alloc := range a {
-		// make sure we don't apply any reconnect logic to task groups
-		// without max_client_disconnect
-		supportsDisconnectedClients := alloc.SupportsDisconnectedClients(serverSupportsDisconnectedClients)
-
-		reconnect := false
-
-		// Only compute reconnect for unknown, running, and failed since they
-		// need to go through the reconnect logic.
-		if supportsDisconnectedClients &&
-			(alloc.ClientStatus == structs.AllocClientStatusUnknown ||
-				alloc.ClientStatus == structs.AllocClientStatusRunning ||
-				alloc.ClientStatus == structs.AllocClientStatusFailed) {
-			reconnect = alloc.NeedsToReconnect()
-		}
-
-		// Failed allocs that need to be reconnected must be added to
-		// reconnecting so that they can be handled as a failed reconnect.
-		if supportsDisconnectedClients &&
-			reconnect &&
-			alloc.DesiredStatus == structs.AllocDesiredStatusRun &&
-			alloc.ClientStatus == structs.AllocClientStatusFailed {
-			reconnecting[alloc.ID] = alloc
-			continue
-		}
-
-		taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
-		if taintedNode != nil && taintedNode.Status == structs.NodeStatusDisconnected {
-			// Group disconnecting
-			if supportsDisconnectedClients {
-				// Filter running allocs on a node that is disconnected to be marked as unknown.
-				if alloc.ClientStatus == structs.AllocClientStatusRunning {
-					disconnecting[alloc.ID] = alloc
-					continue
-				}
-				// Filter pending allocs on a node that is disconnected to be marked as lost.
-				if alloc.ClientStatus == structs.AllocClientStatusPending {
-					lost[alloc.ID] = alloc
-					continue
-				}
-
-			} else {
-				if alloc.PreventReplaceOnDisconnect() {
-					if alloc.ClientStatus == structs.AllocClientStatusRunning {
-						disconnecting[alloc.ID] = alloc
-						continue
-					}
-
-					untainted[alloc.ID] = alloc
-					continue
-				}
-
-				lost[alloc.ID] = alloc
-				continue
-			}
-		}
-
-		if alloc.TerminalStatus() && !reconnect {
-			// Server-terminal allocs, if supportsDisconnectedClient and not reconnect,
-			// are probably stopped replacements and should be ignored
-			if supportsDisconnectedClients && alloc.ServerTerminalStatus() {
-				ignore[alloc.ID] = alloc
-				continue
-			}
-
-			// Terminal canaries that have been marked for migration need to be
-			// migrated, otherwise we block deployments from progressing by
-			// counting them as running canaries.
-			if alloc.DeploymentStatus.IsCanary() && alloc.DesiredTransition.ShouldMigrate() {
-				migrate[alloc.ID] = alloc
-				continue
-			}
-
-			// Terminal allocs, if not reconnect, are always untainted as they
-			// should never be migrated.
-			untainted[alloc.ID] = alloc
-			continue
-		}
-
-		// Non-terminal allocs that should migrate should always migrate
-		if alloc.DesiredTransition.ShouldMigrate() {
-			migrate[alloc.ID] = alloc
-			continue
-		}
-
-		if supportsDisconnectedClients && alloc.Expired(now) {
-			expiring[alloc.ID] = alloc
-			continue
-		}
-
-		// Acknowledge unknown allocs that we want to reconnect eventually.
-		if supportsDisconnectedClients &&
-			alloc.ClientStatus == structs.AllocClientStatusUnknown &&
-			alloc.DesiredStatus == structs.AllocDesiredStatusRun {
-			untainted[alloc.ID] = alloc
-			continue
-		}
-
-		// Ignore failed allocs that need to be reconnected and that have been
-		// marked to stop by the server.
-		if supportsDisconnectedClients &&
-			reconnect &&
-			alloc.ClientStatus == structs.AllocClientStatusFailed &&
-			alloc.DesiredStatus == structs.AllocDesiredStatusStop {
-			ignore[alloc.ID] = alloc
-			continue
-		}
-
-		if !nodeIsTainted || (taintedNode != nil && taintedNode.Status == structs.NodeStatusReady) {
-			// Filter allocs on a node that is now re-connected to be resumed.
-			if reconnect {
-				// Expired unknown allocs should be processed depending on the max client disconnect
-				// and/or avoid reschedule on lost configurations, they are both treated as
-				// expiring.
-				if alloc.Expired(now) {
-					expiring[alloc.ID] = alloc
-					continue
-				}
-
-				reconnecting[alloc.ID] = alloc
-				continue
-			}
-
-			// Otherwise, Node is untainted so alloc is untainted
-			untainted[alloc.ID] = alloc
-			continue
-		}
-
-		// Allocs on GC'd (nil) or lost nodes are Lost
-		if taintedNode == nil {
-			lost[alloc.ID] = alloc
-			continue
-		}
-
-		// Allocs on terminal nodes that can't be rescheduled need to be treated
-		// differently than those that can.
-		if taintedNode.TerminalStatus() {
-			if alloc.PreventReplaceOnDisconnect() {
-				if alloc.ClientStatus == structs.AllocClientStatusUnknown {
-					untainted[alloc.ID] = alloc
-					continue
-				} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
-					disconnecting[alloc.ID] = alloc
-					continue
-				}
-			}
-
-			lost[alloc.ID] = alloc
-			continue
-		}
-
-		// All other allocs are untainted
-		untainted[alloc.ID] = alloc
-	}
-
-	return
-}
-
-// filterByRescheduleable filters the allocation set to return the set of allocations that are either
-// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
-// at a future time are also returned so that we can create follow up evaluations for them. Allocs are
-// skipped or considered untainted according to logic defined in shouldFilter method.
-func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time.Time, evalID string, deployment *structs.Deployment) (allocSet, allocSet, []*delayedRescheduleInfo) {
-	untainted := make(map[string]*structs.Allocation)
-	rescheduleNow := make(map[string]*structs.Allocation)
-	rescheduleLater := []*delayedRescheduleInfo{}
-
-	for _, alloc := range a {
-		// Ignore disconnecting allocs that are already unknown. This can happen
-		// in the case of canaries that are interrupted by a disconnect.
-		if isDisconnecting && alloc.ClientStatus == structs.AllocClientStatusUnknown {
-			continue
-		}
-
-		var eligibleNow, eligibleLater bool
-		var rescheduleTime time.Time
-
-		// Ignore failing allocs that have already been rescheduled.
-		// Only failed or disconnecting allocs should be rescheduled.
-		// Protects against a bug allowing rescheduling running allocs.
-		if alloc.NextAllocation != "" && alloc.TerminalStatus() {
-			continue
-		}
-
-		isUntainted, ignore := shouldFilter(alloc, isBatch)
-		if isUntainted && !isDisconnecting {
-			untainted[alloc.ID] = alloc
-			continue // these allocs can never be rescheduled, so skip checking
-		}
-
-		if ignore {
-			continue
-		}
-
-		eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID, deployment, isDisconnecting)
-		if eligibleNow {
-			rescheduleNow[alloc.ID] = alloc
-			continue
-		}
-
-		// If the failed alloc is not eligible for rescheduling now we
-		// add it to the untainted set.
-		untainted[alloc.ID] = alloc
-
-		if eligibleLater {
-			rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, alloc, rescheduleTime})
-		}
-
-	}
-	return untainted, rescheduleNow, rescheduleLater
-}
-
 // shouldFilter returns whether the alloc should be ignored or considered untainted.
 //
 // Ignored allocs are filtered out.
@@ -550,32 +321,6 @@ func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID stri
 	return
 }
 
-// filterByTerminal filters out terminal allocs
-func filterByTerminal(untainted allocSet) (nonTerminal allocSet) {
-	nonTerminal = make(map[string]*structs.Allocation)
-	for id, alloc := range untainted {
-		if !alloc.TerminalStatus() {
-			nonTerminal[id] = alloc
-		}
-	}
-	return
-}
-
-// filterByDeployment filters allocations into two sets, those that match the
-// given deployment ID and those that don't
-func (a allocSet) filterByDeployment(id string) (match, nonmatch allocSet) {
-	match = make(map[string]*structs.Allocation)
-	nonmatch = make(map[string]*structs.Allocation)
-	for _, alloc := range a {
-		if alloc.DeploymentID == id {
-			match[alloc.ID] = alloc
-		} else {
-			nonmatch[alloc.ID] = alloc
-		}
-	}
-	return
-}
-
 // delayByStopAfter returns a delay for any lost allocation that's got a
 // disconnect.stop_on_client_after configured
 func (a allocSet) delayByStopAfter() (later []*delayedRescheduleInfo) {
diff --git a/scheduler/reconciler/allocs_test.go b/scheduler/reconciler/allocs_test.go
index 015b749c4..7285d562b 100644
--- a/scheduler/reconciler/allocs_test.go
+++ b/scheduler/reconciler/allocs_test.go
@@ -124,12 +124,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 
 		t.Run(jd.name, func(t *testing.T) {
 			testCases := []struct {
-				name                        string
-				all                         allocSet
-				taintedNodes                map[string]*structs.Node
-				supportsDisconnectedClients bool
-				skipNilNodeTest             bool
-				now                         time.Time
+				name            string
+				all             allocSet
+				state           ClusterState
+				skipNilNodeTest bool
 				// expected results
 				untainted     allocSet
 				migrate       allocSet
@@ -140,11 +138,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				expiring      allocSet
 			}{ // These two cases test that we maintain parity with pre-disconnected-clients behavior.
 				{
-					name:                        "lost-client",
-					supportsDisconnectedClients: false,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "lost-client",
+					state:           ClusterState{nodes, false, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"untainted1": {
 							ID:           "untainted1",
@@ -244,10 +240,8 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring:      allocSet{},
 				},
 				{
-					name:                        "lost-client-only-tainted-nodes",
-					supportsDisconnectedClients: false,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
+					name:  "lost-client-only-tainted-nodes",
+					state: ClusterState{nodes, false, time.Now()},
 					// The logic associated with this test case can only trigger if there
 					// is a tainted node. Therefore, testing with a nil node set produces
 					// false failures, so don't perform that test if in this case.
@@ -292,11 +286,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring: allocSet{},
 				},
 				{
-					name:                        "disco-client-disconnect-unset-max-disconnect",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             true,
+					name:            "disco-client-disconnect-unset-max-disconnect",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: true,
 					all: allocSet{
 						// Non-terminal allocs on disconnected nodes w/o max-disconnect are lost
 						"lost-running": {
@@ -329,11 +321,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 				// Everything below this line tests the disconnected client mode.
 				{
-					name:                        "disco-client-untainted-reconnect-failed-and-replaced",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-untainted-reconnect-failed-and-replaced",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"running-replacement": {
 							ID:                 "running-replacement",
@@ -390,11 +380,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring: allocSet{},
 				},
 				{
-					name:                        "disco-client-reconnecting-running-no-replacement",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-reconnecting-running-no-replacement",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						// Running allocs on reconnected nodes with no replacement are reconnecting.
 						// Node.UpdateStatus has already handled syncing client state so this
@@ -430,11 +418,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring: allocSet{},
 				},
 				{
-					name:                        "disco-client-terminal",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-terminal",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						// Allocs on reconnected nodes that are complete need to be updated to stop
 						"untainted-reconnect-complete": {
@@ -580,11 +566,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring: allocSet{},
 				},
 				{
-					name:                        "disco-client-disconnect",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             true,
+					name:            "disco-client-disconnect",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: true,
 					all: allocSet{
 						// Non-terminal allocs on disconnected nodes are disconnecting
 						"disconnect-running": {
@@ -724,11 +708,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					},
 				},
 				{
-					name:                        "disco-client-reconnect",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-reconnect",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						// Expired allocs on reconnected clients are lost
 						"expired-reconnect": {
@@ -762,11 +744,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					},
 				},
 				{
-					name:                        "disco-client-running-reconnecting-and-replacement-untainted",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-running-reconnecting-and-replacement-untainted",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"running-replacement": {
 							ID:                 "running-replacement",
@@ -824,11 +804,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					// After an alloc is reconnected, it should be considered
 					// "untainted" instead of "reconnecting" to allow changes such as
 					// job updates to be applied properly.
-					name:                        "disco-client-reconnected-alloc-untainted",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-reconnected-alloc-untainted",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"running-reconnected": {
 							ID:            "running-reconnected",
@@ -862,11 +840,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 				// Everything below this line tests the single instance on lost mode.
 				{
-					name:                        "lost-client-single-instance-on",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "lost-client-single-instance-on",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"untainted1": {
 							ID:           "untainted1",
@@ -966,10 +942,8 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring:      allocSet{},
 				},
 				{
-					name:                        "lost-client-only-tainted-nodes-single-instance-on",
-					supportsDisconnectedClients: false,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
+					name:  "lost-client-only-tainted-nodes-single-instance-on",
+					state: ClusterState{nodes, false, time.Now()},
 					// The logic associated with this test case can only trigger if there
 					// is a tainted node. Therefore, testing with a nil node set produces
 					// false failures, so don't perform that test if in this case.
@@ -1014,11 +988,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring: allocSet{},
 				},
 				{
-					name:                        "disco-client-disconnect-unset-max-disconnect-single-instance-on",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             true,
+					name:            "disco-client-disconnect-unset-max-disconnect-single-instance-on",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: true,
 					all: allocSet{
 						// Non-terminal allocs on disconnected nodes w/o max-disconnect are lost
 						"disconnecting-running": {
@@ -1048,11 +1020,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring:     allocSet{},
 				},
 				{
-					name:                        "disco-client-untainted-reconnect-failed-and-replaced-single-instance-on",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-untainted-reconnect-failed-and-replaced-single-instance-on",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"running-replacement": {
 							ID:                 "running-replacement",
@@ -1109,11 +1079,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring: allocSet{},
 				},
 				{
-					name:                        "disco-client-reconnect-single-instance-on",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-reconnect-single-instance-on",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						// Expired allocs on reconnected clients are lost
 						"expired-reconnect": {
@@ -1147,11 +1115,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					},
 				},
 				{
-					name:                        "disco-client-running-reconnecting-and-replacement-untainted-single-instance-on",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-running-reconnecting-and-replacement-untainted-single-instance-on",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"running-replacement": {
 							ID:                 "running-replacement",
@@ -1209,11 +1175,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					// After an alloc is reconnected, it should be considered
 					// "untainted" instead of "reconnecting" to allow changes such as
 					// job updates to be applied properly.
-					name:                        "disco-client-reconnected-alloc-untainted",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             false,
+					name:            "disco-client-reconnected-alloc-untainted",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: false,
 					all: allocSet{
 						"running-reconnected": {
 							ID:            "running-reconnected",
@@ -1246,11 +1210,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					expiring:      allocSet{},
 				},
 				{
-					name:                        "disco-client-reconnected-alloc-untainted-single-instance-on",
-					supportsDisconnectedClients: true,
-					now:                         time.Now(),
-					taintedNodes:                nodes,
-					skipNilNodeTest:             true,
+					name:            "disco-client-reconnected-alloc-untainted-single-instance-on",
+					state:           ClusterState{nodes, true, time.Now()},
+					skipNilNodeTest: true,
 					all: allocSet{
 						"untainted-unknown": {
 							ID:            "untainted-unknown",
@@ -1345,7 +1307,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			for _, tc := range testCases {
 				t.Run(tc.name, func(t *testing.T) {
 					// With tainted nodes
-					untainted, migrate, lost, disconnecting, reconnecting, ignore, expired := tc.all.filterByTainted(tc.taintedNodes, tc.supportsDisconnectedClients, tc.now)
+					untainted, migrate, lost, disconnecting, reconnecting, ignore, expired := filterByTainted(tc.all, tc.state)
 					must.Eq(t, tc.untainted, untainted, must.Sprintf("with-nodes: untainted"))
 					must.Eq(t, tc.migrate, migrate, must.Sprintf("with-nodes: migrate"))
 					must.Eq(t, tc.lost, lost, must.Sprintf("with-nodes: lost"))
@@ -1359,7 +1321,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					}
 
 					// Now again with nodes nil
-					untainted, migrate, lost, disconnecting, reconnecting, ignore, expired = tc.all.filterByTainted(nil, tc.supportsDisconnectedClients, tc.now)
+					state := tc.state
+					state.TaintedNodes = nil
+					untainted, migrate, lost, disconnecting, reconnecting, ignore, expired = filterByTainted(tc.all, state)
 					must.Eq(t, tc.untainted, untainted, must.Sprintf("with-nodes: untainted"))
 					must.Eq(t, tc.migrate, migrate, must.Sprintf("with-nodes: migrate"))
 					must.Eq(t, tc.lost, lost, must.Sprintf("with-nodes: lost"))
diff --git a/scheduler/reconciler/filters.go b/scheduler/reconciler/filters.go
new file mode 100644
index 000000000..573b3f7f1
--- /dev/null
+++ b/scheduler/reconciler/filters.go
@@ -0,0 +1,305 @@
+// Copyright (c) HashiCorp, Inc.
+// SPDX-License-Identifier: BUSL-1.1
+
+package reconciler
+
+import (
+	"slices"
+	"time"
+
+	"github.com/hashicorp/nomad/nomad/structs"
+	sstructs "github.com/hashicorp/nomad/scheduler/structs"
+)
+
+// filterAndStopAll stops all allocations in an allocSet. This is useful in when
+// stopping an entire job or task group.
+func filterAndStopAll(set allocSet, cs ClusterState) (uint64, []AllocStopResult) {
+	untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := filterByTainted(set, cs)
+
+	allocsToStop := slices.Concat(
+		markStop(untainted, "", sstructs.StatusAllocNotNeeded),
+		markStop(migrate, "", sstructs.StatusAllocNotNeeded),
+		markStop(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost),
+		markStop(disconnecting, "", sstructs.StatusAllocNotNeeded),
+		markStop(reconnecting, "", sstructs.StatusAllocNotNeeded),
+		markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", sstructs.StatusAllocNotNeeded),
+		markStop(expiring.filterByClientStatus(structs.AllocClientStatusUnknown), "", sstructs.StatusAllocNotNeeded))
+	return uint64(len(set)), allocsToStop
+}
+
+// filterByTerminal filters out terminal allocs
+func filterByTerminal(untainted allocSet) (nonTerminal allocSet) {
+	nonTerminal = make(map[string]*structs.Allocation)
+	for id, alloc := range untainted {
+		if !alloc.TerminalStatus() {
+			nonTerminal[id] = alloc
+		}
+	}
+	return
+}
+
+// filterByDeployment filters allocations into two sets, those that match the
+// given deployment ID and those that don't
+func (a allocSet) filterByDeployment(id string) (match, nonmatch allocSet) {
+	match = make(map[string]*structs.Allocation)
+	nonmatch = make(map[string]*structs.Allocation)
+	for _, alloc := range a {
+		if alloc.DeploymentID == id {
+			match[alloc.ID] = alloc
+		} else {
+			nonmatch[alloc.ID] = alloc
+		}
+	}
+	return
+}
+
+// filterOldTerminalAllocs filters allocations that should be ignored since they
+// are allocations that are terminal from a previous job version.
+func (a *AllocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignore allocSet) {
+	if !a.batch {
+		return all, nil
+	}
+
+	filtered = filtered.union(all)
+	ignored := make(map[string]*structs.Allocation)
+
+	// Ignore terminal batch jobs from older versions
+	for id, alloc := range filtered {
+		older := alloc.Job.Version < a.job.Version || alloc.Job.CreateIndex < a.job.CreateIndex
+		if older && alloc.TerminalStatus() {
+			delete(filtered, id)
+			ignored[id] = alloc
+		}
+	}
+
+	return filtered, ignored
+}
+
+// filterByTainted takes a set of tainted nodes and filters the allocation set
+// into the following groups:
+// 1. Those that exist on untainted nodes
+// 2. Those exist on nodes that are draining
+// 3. Those that exist on lost nodes or have expired
+// 4. Those that are on nodes that are disconnected, but have not had their ClientState set to unknown
+// 5. Those that are on a node that has reconnected.
+// 6. Those that are in a state that results in a noop.
+func filterByTainted(a allocSet, state ClusterState) (untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring allocSet) {
+	untainted = make(map[string]*structs.Allocation)
+	migrate = make(map[string]*structs.Allocation)
+	lost = make(map[string]*structs.Allocation)
+	disconnecting = make(map[string]*structs.Allocation)
+	reconnecting = make(map[string]*structs.Allocation)
+	ignore = make(map[string]*structs.Allocation)
+	expiring = make(map[string]*structs.Allocation)
+
+	for _, alloc := range a {
+		// make sure we don't apply any reconnect logic to task groups
+		// without max_client_disconnect
+		supportsDisconnectedClients := alloc.SupportsDisconnectedClients(state.SupportsDisconnectedClients)
+
+		reconnect := false
+
+		// Only compute reconnect for unknown, running, and failed since they
+		// need to go through the reconnect logic.
+		if supportsDisconnectedClients &&
+			(alloc.ClientStatus == structs.AllocClientStatusUnknown ||
+				alloc.ClientStatus == structs.AllocClientStatusRunning ||
+				alloc.ClientStatus == structs.AllocClientStatusFailed) {
+			reconnect = alloc.NeedsToReconnect()
+		}
+
+		// Failed allocs that need to be reconnected must be added to
+		// reconnecting so that they can be handled as a failed reconnect.
+		if supportsDisconnectedClients &&
+			reconnect &&
+			alloc.DesiredStatus == structs.AllocDesiredStatusRun &&
+			alloc.ClientStatus == structs.AllocClientStatusFailed {
+			reconnecting[alloc.ID] = alloc
+			continue
+		}
+
+		taintedNode, nodeIsTainted := state.TaintedNodes[alloc.NodeID]
+		if taintedNode != nil && taintedNode.Status == structs.NodeStatusDisconnected {
+			// Group disconnecting
+			if supportsDisconnectedClients {
+				// Filter running allocs on a node that is disconnected to be marked as unknown.
+				if alloc.ClientStatus == structs.AllocClientStatusRunning {
+					disconnecting[alloc.ID] = alloc
+					continue
+				}
+				// Filter pending allocs on a node that is disconnected to be marked as lost.
+				if alloc.ClientStatus == structs.AllocClientStatusPending {
+					lost[alloc.ID] = alloc
+					continue
+				}
+
+			} else {
+				if alloc.PreventReplaceOnDisconnect() {
+					if alloc.ClientStatus == structs.AllocClientStatusRunning {
+						disconnecting[alloc.ID] = alloc
+						continue
+					}
+
+					untainted[alloc.ID] = alloc
+					continue
+				}
+
+				lost[alloc.ID] = alloc
+				continue
+			}
+		}
+
+		if alloc.TerminalStatus() && !reconnect {
+			// Server-terminal allocs, if supportsDisconnectedClient and not reconnect,
+			// are probably stopped replacements and should be ignored
+			if supportsDisconnectedClients && alloc.ServerTerminalStatus() {
+				ignore[alloc.ID] = alloc
+				continue
+			}
+
+			// Terminal canaries that have been marked for migration need to be
+			// migrated, otherwise we block deployments from progressing by
+			// counting them as running canaries.
+			if alloc.DeploymentStatus.IsCanary() && alloc.DesiredTransition.ShouldMigrate() {
+				migrate[alloc.ID] = alloc
+				continue
+			}
+
+			// Terminal allocs, if not reconnect, are always untainted as they
+			// should never be migrated.
+			untainted[alloc.ID] = alloc
+			continue
+		}
+
+		// Non-terminal allocs that should migrate should always migrate
+		if alloc.DesiredTransition.ShouldMigrate() {
+			migrate[alloc.ID] = alloc
+			continue
+		}
+
+		if supportsDisconnectedClients && alloc.Expired(state.Now) {
+			expiring[alloc.ID] = alloc
+			continue
+		}
+
+		// Acknowledge unknown allocs that we want to reconnect eventually.
+		if supportsDisconnectedClients &&
+			alloc.ClientStatus == structs.AllocClientStatusUnknown &&
+			alloc.DesiredStatus == structs.AllocDesiredStatusRun {
+			untainted[alloc.ID] = alloc
+			continue
+		}
+
+		// Ignore failed allocs that need to be reconnected and that have been
+		// marked to stop by the server.
+		if supportsDisconnectedClients &&
+			reconnect &&
+			alloc.ClientStatus == structs.AllocClientStatusFailed &&
+			alloc.DesiredStatus == structs.AllocDesiredStatusStop {
+			ignore[alloc.ID] = alloc
+			continue
+		}
+
+		if !nodeIsTainted || (taintedNode != nil && taintedNode.Status == structs.NodeStatusReady) {
+			// Filter allocs on a node that is now re-connected to be resumed.
+			if reconnect {
+				// Expired unknown allocs should be processed depending on the max client disconnect
+				// and/or avoid reschedule on lost configurations, they are both treated as
+				// expiring.
+				if alloc.Expired(state.Now) {
+					expiring[alloc.ID] = alloc
+					continue
+				}
+
+				reconnecting[alloc.ID] = alloc
+				continue
+			}
+
+			// Otherwise, Node is untainted so alloc is untainted
+			untainted[alloc.ID] = alloc
+			continue
+		}
+
+		// Allocs on GC'd (nil) or lost nodes are Lost
+		if taintedNode == nil {
+			lost[alloc.ID] = alloc
+			continue
+		}
+
+		// Allocs on terminal nodes that can't be rescheduled need to be treated
+		// differently than those that can.
+		if taintedNode.TerminalStatus() {
+			if alloc.PreventReplaceOnDisconnect() {
+				if alloc.ClientStatus == structs.AllocClientStatusUnknown {
+					untainted[alloc.ID] = alloc
+					continue
+				} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
+					disconnecting[alloc.ID] = alloc
+					continue
+				}
+			}
+
+			lost[alloc.ID] = alloc
+			continue
+		}
+
+		// All other allocs are untainted
+		untainted[alloc.ID] = alloc
+	}
+
+	return
+}
+
+// filterByRescheduleable filters the allocation set to return the set of allocations that are either
+// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
+// at a future time are also returned so that we can create follow up evaluations for them. Allocs are
+// skipped or considered untainted according to logic defined in shouldFilter method.
+func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time.Time, evalID string, deployment *structs.Deployment) (allocSet, allocSet, []*delayedRescheduleInfo) {
+	untainted := make(map[string]*structs.Allocation)
+	rescheduleNow := make(map[string]*structs.Allocation)
+	rescheduleLater := []*delayedRescheduleInfo{}
+
+	for _, alloc := range a {
+		// Ignore disconnecting allocs that are already unknown. This can happen
+		// in the case of canaries that are interrupted by a disconnect.
+		if isDisconnecting && alloc.ClientStatus == structs.AllocClientStatusUnknown {
+			continue
+		}
+
+		var eligibleNow, eligibleLater bool
+		var rescheduleTime time.Time
+
+		// Ignore failing allocs that have already been rescheduled.
+		// Only failed or disconnecting allocs should be rescheduled.
+		// Protects against a bug allowing rescheduling running allocs.
+		if alloc.NextAllocation != "" && alloc.TerminalStatus() {
+			continue
+		}
+
+		isUntainted, ignore := shouldFilter(alloc, isBatch)
+		if isUntainted && !isDisconnecting {
+			untainted[alloc.ID] = alloc
+			continue // these allocs can never be rescheduled, so skip checking
+		}
+
+		if ignore {
+			continue
+		}
+
+		eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID, deployment, isDisconnecting)
+		if eligibleNow {
+			rescheduleNow[alloc.ID] = alloc
+			continue
+		}
+
+		// If the failed alloc is not eligible for rescheduling now we
+		// add it to the untainted set.
+		untainted[alloc.ID] = alloc
+
+		if eligibleLater {
+			rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, alloc, rescheduleTime})
+		}
+
+	}
+	return untainted, rescheduleNow, rescheduleLater
+}
diff --git a/scheduler/reconciler/reconcile_cluster.go b/scheduler/reconciler/reconcile_cluster.go
index 9ff4ccb88..9b3c06c7f 100644
--- a/scheduler/reconciler/reconcile_cluster.go
+++ b/scheduler/reconciler/reconcile_cluster.go
@@ -10,6 +10,7 @@ package reconciler
 
 import (
 	"fmt"
+	"maps"
 	"slices"
 	"sort"
 	"time"
@@ -48,6 +49,13 @@ type AllocReconcilerOption func(*AllocReconciler)
 // placement, inplace updating or stopping given the job specification and
 // existing cluster state. The reconciler should only be used for batch and
 // service jobs.
+//
+// TODO: an idea for a future refactoring is to put batch, job, jobID,
+// oldDeployment, deployment, deploymentPaused, deploymentFailed, existingAllocs,
+// evalID and evalPriority into a struct called, say, "InitialState," because
+// these fields are used across the whole package to refer to initial or store
+// intermittent state that is otherwise hard to capture. This would further ease
+// the readability and development of the code in this package.
 type AllocReconciler struct {
 	// logger is used to log debug information. Logging should be kept at a
 	// minimal here
@@ -79,9 +87,6 @@ type AllocReconciler struct {
 	// deploymentFailed marks whether the deployment is failed
 	deploymentFailed bool
 
-	// taintedNodes contains a map of nodes that are tainted
-	taintedNodes map[string]*structs.Node
-
 	// existingAllocs is non-terminal existing allocations
 	existingAllocs []*structs.Allocation
 
@@ -90,19 +95,13 @@ type AllocReconciler struct {
 	evalID       string
 	evalPriority int
 
-	// supportsDisconnectedClients indicates whether all servers meet the required
-	// minimum version to allow application of max_client_disconnect configuration.
-	supportsDisconnectedClients bool
-
-	// now is the time used when determining rescheduling eligibility
-	// defaults to time.Now, and overridden in unit tests
-	now time.Time
-
 	reconnectingPicker reconnectingPickerInterface
 
-	// Result is the results of the reconcile. During computation it can be
-	// used to store intermediate state
-	Result *ReconcileResults
+	// clusterState stores frequently accessed properties of the cluster:
+	// - a map of tainted nodes
+	// - whether we support disconnected clients
+	// - current time
+	clusterState ClusterState
 }
 
 // ReconcileResults contains the results of the reconciliation and should be
@@ -156,6 +155,57 @@ type ReconcileResults struct {
 	TaskGroupAllocNameIndexes map[string]*AllocNameIndex
 }
 
+func (r *ReconcileResults) Merge(new *ReconcileResults) {
+	if new.Deployment != nil {
+		r.Deployment = new.Deployment
+	}
+	if new.DeploymentUpdates != nil {
+		r.DeploymentUpdates = append(r.DeploymentUpdates, new.DeploymentUpdates...)
+	}
+	if new.Place != nil {
+		r.Place = append(r.Place, new.Place...)
+	}
+	if new.DestructiveUpdate != nil {
+		r.DestructiveUpdate = append(r.DestructiveUpdate, new.DestructiveUpdate...)
+	}
+	if new.InplaceUpdate != nil {
+		r.InplaceUpdate = append(r.InplaceUpdate, new.InplaceUpdate...)
+	}
+	if new.Stop != nil {
+		r.Stop = append(r.Stop, new.Stop...)
+	}
+	if r.AttributeUpdates != nil {
+		maps.Copy(r.AttributeUpdates, new.AttributeUpdates)
+	} else {
+		r.AttributeUpdates = new.AttributeUpdates
+	}
+	if r.DisconnectUpdates != nil {
+		maps.Copy(r.DisconnectUpdates, new.DisconnectUpdates)
+	} else {
+		r.DisconnectUpdates = new.DisconnectUpdates
+	}
+	if r.ReconnectUpdates != nil {
+		maps.Copy(r.ReconnectUpdates, new.ReconnectUpdates)
+	} else {
+		r.ReconnectUpdates = new.ReconnectUpdates
+	}
+	if r.DesiredTGUpdates != nil {
+		maps.Copy(r.DesiredTGUpdates, new.DesiredTGUpdates)
+	} else {
+		r.DesiredTGUpdates = new.DesiredTGUpdates
+	}
+	if r.DesiredFollowupEvals != nil {
+		maps.Copy(r.DesiredFollowupEvals, new.DesiredFollowupEvals)
+	} else {
+		r.DesiredFollowupEvals = new.DesiredFollowupEvals
+	}
+	if r.TaskGroupAllocNameIndexes != nil {
+		maps.Copy(r.TaskGroupAllocNameIndexes, new.TaskGroupAllocNameIndexes)
+	} else {
+		r.TaskGroupAllocNameIndexes = new.TaskGroupAllocNameIndexes
+	}
+}
+
 // delayedRescheduleInfo contains the allocation id and a time when its eligible to be rescheduled.
 // this is used to create follow up evaluations
 type delayedRescheduleInfo struct {
@@ -187,35 +237,36 @@ func (r *ReconcileResults) GoString() string {
 	return base
 }
 
+// ClusterState holds frequently used information about the state of the
+// cluster:
+// - a map of tainted nodes
+// - whether we support disconnected clients
+// - current time
+type ClusterState struct {
+	TaintedNodes                map[string]*structs.Node
+	SupportsDisconnectedClients bool
+	Now                         time.Time
+}
+
 // NewAllocReconciler creates a new reconciler that should be used to determine
 // the changes required to bring the cluster state inline with the declared jobspec
 func NewAllocReconciler(logger log.Logger, allocUpdateFn AllocUpdateType, batch bool,
 	jobID string, job *structs.Job, deployment *structs.Deployment,
-	existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node, evalID string,
-	evalPriority int, supportsDisconnectedClients bool, opts ...AllocReconcilerOption) *AllocReconciler {
+	existingAllocs []*structs.Allocation, evalID string,
+	evalPriority int, state ClusterState, opts ...AllocReconcilerOption) *AllocReconciler {
 
 	ar := &AllocReconciler{
-		logger:                      logger.Named("reconciler"),
-		allocUpdateFn:               allocUpdateFn,
-		batch:                       batch,
-		jobID:                       jobID,
-		job:                         job,
-		deployment:                  deployment.Copy(),
-		existingAllocs:              existingAllocs,
-		taintedNodes:                taintedNodes,
-		evalID:                      evalID,
-		evalPriority:                evalPriority,
-		supportsDisconnectedClients: supportsDisconnectedClients,
-		now:                         time.Now().UTC(),
-		Result: &ReconcileResults{
-			AttributeUpdates:          make(map[string]*structs.Allocation),
-			DisconnectUpdates:         make(map[string]*structs.Allocation),
-			ReconnectUpdates:          make(map[string]*structs.Allocation),
-			DesiredTGUpdates:          make(map[string]*structs.DesiredUpdates),
-			DesiredFollowupEvals:      make(map[string][]*structs.Evaluation),
-			TaskGroupAllocNameIndexes: make(map[string]*AllocNameIndex),
-		},
+		logger:             logger.Named("reconciler"),
+		allocUpdateFn:      allocUpdateFn,
+		batch:              batch,
+		jobID:              jobID,
+		job:                job,
+		deployment:         deployment.Copy(),
+		existingAllocs:     existingAllocs,
+		evalID:             evalID,
+		evalPriority:       evalPriority,
 		reconnectingPicker: newReconnectingPicker(logger),
+		clusterState:       state,
 	}
 
 	for _, op := range opts {
@@ -227,231 +278,202 @@ func NewAllocReconciler(logger log.Logger, allocUpdateFn AllocUpdateType, batch
 
 // Compute reconciles the existing cluster state and returns the set of changes
 // required to converge the job spec and state
-func (a *AllocReconciler) Compute() {
+func (a *AllocReconciler) Compute() *ReconcileResults {
+	result := &ReconcileResults{}
+
 	// Create the allocation matrix
 	m := newAllocMatrix(a.job, a.existingAllocs)
 
-	a.cancelUnneededDeployments()
+	a.oldDeployment, a.deployment, result.DeploymentUpdates = cancelUnneededDeployments(a.job, a.deployment)
 
 	// If we are just stopping a job we do not need to do anything more than
 	// stopping all running allocs
 	if a.job.Stopped() {
-		a.handleStop(m)
-		return
+		desiredTGUpdates, allocsToStop := a.handleStop(m)
+		result.DesiredTGUpdates = desiredTGUpdates
+		result.Stop = allocsToStop
+		return result
 	}
 
-	a.computeDeploymentPaused()
-	deploymentComplete := a.computeDeploymentComplete(m)
-	a.computeDeploymentUpdates(deploymentComplete)
-}
-
-func (a *AllocReconciler) computeDeploymentComplete(m allocMatrix) bool {
-	complete := true
-	for group, as := range m {
-		groupComplete := a.computeGroup(group, as)
-		complete = complete && groupComplete
-	}
-
-	return complete
-}
-
-func (a *AllocReconciler) computeDeploymentUpdates(deploymentComplete bool) {
-	if a.deployment != nil {
-		// Mark the deployment as complete if possible
-		if deploymentComplete {
-			if a.job.IsMultiregion() {
-				// the unblocking/successful states come after blocked, so we
-				// need to make sure we don't revert those states
-				if a.deployment.Status != structs.DeploymentStatusUnblocking &&
-					a.deployment.Status != structs.DeploymentStatusSuccessful {
-					a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{
-						DeploymentID:      a.deployment.ID,
-						Status:            structs.DeploymentStatusBlocked,
-						StatusDescription: structs.DeploymentStatusDescriptionBlocked,
-					})
-				}
-			} else {
-				a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{
-					DeploymentID:      a.deployment.ID,
-					Status:            structs.DeploymentStatusSuccessful,
-					StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
-				})
-			}
-		}
-
-		// Mark the deployment as pending since its state is now computed.
-		if a.deployment.Status == structs.DeploymentStatusInitializing {
-			a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{
-				DeploymentID:      a.deployment.ID,
-				Status:            structs.DeploymentStatusPending,
-				StatusDescription: structs.DeploymentStatusDescriptionPendingForPeer,
-			})
-		}
-	}
-
-	// Set the description of a created deployment
-	if d := a.Result.Deployment; d != nil {
-		if d.RequiresPromotion() {
-			if d.HasAutoPromote() {
-				d.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion
-			} else {
-				d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-			}
-		}
-	}
-}
-
-// computeDeploymentPaused is responsible for setting flags on the
-// allocReconciler that indicate the state of the deployment if one
-// is required. The flags that are managed are:
-//  1. deploymentFailed: Did the current deployment fail just as named.
-//  2. deploymentPaused: Set to true when the current deployment is paused,
-//     which is usually a manual user operation, or if the deployment is
-//     pending or initializing, which are the initial states for multi-region
-//     job deployments. This flag tells Compute that we should not make
-//     placements on the deployment.
-func (a *AllocReconciler) computeDeploymentPaused() {
+	// set deployment paused and failed fields, if we currently have a
+	// deployment
 	if a.deployment != nil {
+		// deployment is paused when it's manually paused by a user, or if the
+		// deployment is pending or initializing, which are the initial states
+		// for multi-region job deployments. This flag tells Compute that we
+		// should not make placements on the deployment.
 		a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused ||
 			a.deployment.Status == structs.DeploymentStatusPending ||
 			a.deployment.Status == structs.DeploymentStatusInitializing
 		a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
 	}
+
+	// check if the deployment is complete and set relevant result fields in the
+	// process
+	var deploymentComplete bool
+	result, deploymentComplete = a.computeDeploymentComplete(result, m)
+
+	result.DeploymentUpdates = append(result.DeploymentUpdates, a.computeDeploymentUpdates(deploymentComplete, result.Deployment)...)
+
+	return result
 }
 
-// cancelUnneededDeployments cancels any deployment that is not needed. If the
-// current deployment is not needed the deployment field is set to nil. A deployment
-// update will be staged for jobs that should stop or have the wrong version.
-// Unneeded deployments include:
+// cancelUnneededDeployments cancels any deployment that is not needed.
+// A deployment update will be staged for jobs that should stop or have the
+// wrong version. Unneeded deployments include:
 // 1. Jobs that are marked for stop, but there is a non-terminal deployment.
 // 2. Deployments that are active, but referencing a different job version.
 // 3. Deployments that are already successful.
-func (a *AllocReconciler) cancelUnneededDeployments() {
+//
+// returns: old deployment, current deployment and a slice of deployment status
+// updates.
+func cancelUnneededDeployments(j *structs.Job, d *structs.Deployment) (*structs.Deployment, *structs.Deployment, []*structs.DeploymentStatusUpdate) {
+	var updates []*structs.DeploymentStatusUpdate
+
 	// If the job is stopped and there is a non-terminal deployment, cancel it
-	if a.job.Stopped() {
-		if a.deployment != nil && a.deployment.Active() {
-			a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{
-				DeploymentID:      a.deployment.ID,
+	if j.Stopped() {
+		if d != nil && d.Active() {
+			updates = append(updates, &structs.DeploymentStatusUpdate{
+				DeploymentID:      d.ID,
 				Status:            structs.DeploymentStatusCancelled,
 				StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
 			})
 		}
 
 		// Nothing else to do
-		a.oldDeployment = a.deployment
-		a.deployment = nil
-		return
+		return d, nil, updates
 	}
 
-	d := a.deployment
 	if d == nil {
-		return
+		return nil, nil, nil
 	}
 
 	// Check if the deployment is active and referencing an older job and cancel it
-	if d.JobCreateIndex != a.job.CreateIndex || d.JobVersion != a.job.Version {
+	if d.JobCreateIndex != j.CreateIndex || d.JobVersion != j.Version {
 		if d.Active() {
-			a.Result.DeploymentUpdates = append(a.Result.DeploymentUpdates, &structs.DeploymentStatusUpdate{
-				DeploymentID:      a.deployment.ID,
+			updates = append(updates, &structs.DeploymentStatusUpdate{
+				DeploymentID:      d.ID,
 				Status:            structs.DeploymentStatusCancelled,
 				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
 			})
 		}
 
-		a.oldDeployment = d
-		a.deployment = nil
+		return d, nil, updates
 	}
 
 	// Clear it as the current deployment if it is successful
 	if d.Status == structs.DeploymentStatusSuccessful {
-		a.oldDeployment = d
-		a.deployment = nil
+		return d, nil, updates
 	}
+
+	return nil, d, updates
 }
 
 // handleStop marks all allocations to be stopped, handling the lost case
-func (a *AllocReconciler) handleStop(m allocMatrix) {
+func (a *AllocReconciler) handleStop(m allocMatrix) (map[string]*structs.DesiredUpdates, []AllocStopResult) {
+	result := make(map[string]*structs.DesiredUpdates)
+	allocsToStop := []AllocStopResult{}
+
 	for group, as := range m {
 		as = filterByTerminal(as)
 		desiredChanges := new(structs.DesiredUpdates)
-		desiredChanges.Stop = a.filterAndStopAll(as)
-		a.Result.DesiredTGUpdates[group] = desiredChanges
+		desiredChanges.Stop, allocsToStop = filterAndStopAll(as, a.clusterState)
+		result[group] = desiredChanges
 	}
-}
-
-// filterAndStopAll stops all allocations in an allocSet. This is useful in when
-// stopping an entire job or task group.
-func (a *AllocReconciler) filterAndStopAll(set allocSet) uint64 {
-	untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
-	a.markStop(untainted, "", sstructs.StatusAllocNotNeeded)
-	a.markStop(migrate, "", sstructs.StatusAllocNotNeeded)
-	a.markStop(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost)
-	a.markStop(disconnecting, "", sstructs.StatusAllocNotNeeded)
-	a.markStop(reconnecting, "", sstructs.StatusAllocNotNeeded)
-	a.markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", sstructs.StatusAllocNotNeeded)
-	a.markStop(expiring.filterByClientStatus(structs.AllocClientStatusUnknown), "", sstructs.StatusAllocNotNeeded)
-	return uint64(len(set))
+	return result, allocsToStop
 }
 
 // markStop is a helper for marking a set of allocation for stop with a
 // particular client status and description.
-func (a *AllocReconciler) markStop(allocs allocSet, clientStatus, statusDescription string) {
+func markStop(allocs allocSet, clientStatus, statusDescription string) []AllocStopResult {
+	allocsToStop := []AllocStopResult{}
 	for _, alloc := range allocs {
-		a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+		allocsToStop = append(allocsToStop, AllocStopResult{
 			Alloc:             alloc,
 			ClientStatus:      clientStatus,
 			StatusDescription: statusDescription,
 		})
 	}
+	return allocsToStop
 }
 
 // markDelayed does markStop, but optionally includes a FollowupEvalID so that we can update
 // the stopped alloc with its delayed rescheduling evalID
-func (a *AllocReconciler) markDelayed(allocs allocSet, clientStatus, statusDescription string, followupEvals map[string]string) {
+func markDelayed(allocs allocSet, clientStatus, statusDescription string, followupEvals map[string]string) []AllocStopResult {
+	allocsToStop := []AllocStopResult{}
 	for _, alloc := range allocs {
-		a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+		allocsToStop = append(allocsToStop, AllocStopResult{
 			Alloc:             alloc,
 			ClientStatus:      clientStatus,
 			StatusDescription: statusDescription,
 			FollowupEvalID:    followupEvals[alloc.ID],
 		})
 	}
+	return allocsToStop
+}
+
+// computeDeploymentComplete is the top-level method that computes
+// reconciliation for a given allocation matrix. It returns:
+// - a map of task group allocation name indexes
+// - a slice of allocations to stop
+// - a slice of replacements
+// - a resulting deployment
+// - a boolean that indicates whether the deployment is complete
+func (a *AllocReconciler) computeDeploymentComplete(result *ReconcileResults, m allocMatrix) (*ReconcileResults, bool) {
+	complete := true
+	for group, as := range m {
+		var groupComplete bool
+		var resultForGroup *ReconcileResults
+		resultForGroup, groupComplete = a.computeGroup(group, as)
+		complete = complete && groupComplete
+
+		// merge results for group with overall results
+		result.Merge(resultForGroup)
+	}
+
+	return result, complete
 }
 
 // computeGroup reconciles state for a particular task group. It returns whether
-// the deployment it is for is complete with regards to the task group.
-func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
+// the deployment it is for is complete in regard to the task group.
+//
+// returns: desiredTGUpdates for taskgroup, allocations to stop, alloc name
+// index for taskgroup, resulting deployment, and a boolean that indicates
+// whether the whole group's deployment is complete
+func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileResults, bool) {
 
-	// Create the desired update object for the group
-	desiredChanges := new(structs.DesiredUpdates)
-	a.Result.DesiredTGUpdates[groupName] = desiredChanges
+	// Create the output result object that we'll be continuously writing to
+	result := new(ReconcileResults)
+	result.DesiredTGUpdates = make(map[string]*structs.DesiredUpdates)
+	result.DesiredTGUpdates[group] = new(structs.DesiredUpdates)
 
 	// Get the task group. The task group may be nil if the job was updates such
 	// that the task group no longer exists
-	tg := a.job.LookupTaskGroup(groupName)
+	tg := a.job.LookupTaskGroup(group)
 
 	// If the task group is nil, then the task group has been removed so all we
 	// need to do is stop everything
 	if tg == nil {
-		desiredChanges.Stop = a.filterAndStopAll(all)
-		return true
+		result.DesiredTGUpdates[group].Stop, result.Stop = filterAndStopAll(all, a.clusterState)
+		return result, true
 	}
 
-	dstate, existingDeployment := a.initializeDeploymentState(groupName, tg)
+	dstate, existingDeployment := a.initializeDeploymentState(group, tg)
 
 	// Filter allocations that do not need to be considered because they are
 	// from an older job version and are terminal.
 	all, ignore := a.filterOldTerminalAllocs(all)
-	desiredChanges.Ignore += uint64(len(ignore))
+	result.DesiredTGUpdates[group].Ignore += uint64(len(ignore))
 
-	canaries, all := a.cancelUnneededCanaries(all, desiredChanges)
+	var canaries allocSet
+	canaries, all, result.Stop = a.cancelUnneededCanaries(all, result.DesiredTGUpdates[group])
 
 	// Determine what set of allocations are on tainted nodes
-	untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := all.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
-	desiredChanges.Ignore += uint64(len(ignore))
+	untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := filterByTainted(all, a.clusterState)
+	result.DesiredTGUpdates[group].Ignore += uint64(len(ignore))
 
 	// Determine what set of terminal allocations need to be rescheduled
-	untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, false, a.now, a.evalID, a.deployment)
+	untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, false, a.clusterState.Now, a.evalID, a.deployment)
 
 	// If there are allocations reconnecting we need to reconcile them and
 	// their replacements first because there is specific logic when deciding
@@ -459,23 +481,24 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 	if len(reconnecting) > 0 {
 		// Pass all allocations because the replacements we need to find may be
 		// in any state, including themselves being reconnected.
-		reconnect, stop := a.reconcileReconnecting(reconnecting, all, tg)
+		reconnect, stopAllocSet, stopAllocResult := a.reconcileReconnecting(reconnecting, all, tg)
+		result.Stop = append(result.Stop, stopAllocResult...)
 
 		// Stop the reconciled allocations and remove them from the other sets
 		// since they have been already handled.
-		desiredChanges.Stop += uint64(len(stop))
+		result.DesiredTGUpdates[group].Stop += uint64(len(stopAllocSet))
 
-		untainted = untainted.difference(stop)
-		migrate = migrate.difference(stop)
-		lost = lost.difference(stop)
-		disconnecting = disconnecting.difference(stop)
-		reconnecting = reconnecting.difference(stop)
-		ignore = ignore.difference(stop)
+		untainted = untainted.difference(stopAllocSet)
+		migrate = migrate.difference(stopAllocSet)
+		lost = lost.difference(stopAllocSet)
+		disconnecting = disconnecting.difference(stopAllocSet)
+		reconnecting = reconnecting.difference(stopAllocSet)
+		ignore = ignore.difference(stopAllocSet)
 
 		// Validate and add reconnecting allocations to the plan so they are
 		// logged.
 		if len(reconnect) > 0 {
-			a.computeReconnecting(reconnect)
+			result.ReconnectUpdates = a.computeReconnecting(reconnect)
 			// The rest of the reconnecting allocations is now untainted and will
 			// be further reconciled below.
 			untainted = untainted.union(reconnect)
@@ -489,12 +512,17 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 			lost = lost.union(expiring)
 		}
 	}
+
+	result.DesiredFollowupEvals = map[string][]*structs.Evaluation{}
+	result.DisconnectUpdates = map[string]*structs.Allocation{}
+
 	// Determine what set of disconnecting allocations need to be rescheduled now,
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
 		if tg.GetDisconnectLostTimeout() != 0 {
-			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
+			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(
+				a.batch, true, a.clusterState.Now, a.evalID, a.deployment)
 
 			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
 			untainted = untainted.union(untaintedDisconnecting)
@@ -502,10 +530,13 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 
 			// Find delays for any disconnecting allocs that have max_client_disconnect,
 			// create followup evals, and update the ClientStatus to unknown.
-			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
+			var followupEvals []*structs.Evaluation
+			timeoutLaterEvals, followupEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
+			result.DesiredFollowupEvals[tg.Name] = append(result.DesiredFollowupEvals[tg.Name], followupEvals...)
 		}
 
-		a.appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals, rescheduleNow)
+		updates := appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals, rescheduleNow)
+		maps.Copy(result.DisconnectUpdates, updates)
 	}
 
 	// Find delays for any lost allocs that have stop_after_client_disconnect
@@ -514,7 +545,9 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 
 	if len(lost) > 0 {
 		lostLater = lost.delayByStopAfter()
-		lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
+		var followupEvals []*structs.Evaluation
+		lostLaterEvals, followupEvals = a.createLostLaterEvals(lostLater)
+		result.DesiredFollowupEvals[tg.Name] = append(result.DesiredFollowupEvals[tg.Name], followupEvals...)
 	}
 
 	// Merge disconnecting with the stop_after_client_disconnect set into the
@@ -524,29 +557,35 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 	if len(rescheduleLater) > 0 {
 		// Create batched follow-up evaluations for allocations that are
 		// reschedulable later and mark the allocations for in place updating
-		a.createRescheduleLaterEvals(rescheduleLater, all, tg.Name)
+		var followups []*structs.Evaluation
+		followups, result.AttributeUpdates = a.createRescheduleLaterEvals(rescheduleLater, all, result.DisconnectUpdates)
+		result.DesiredFollowupEvals[tg.Name] = append(result.DesiredFollowupEvals[tg.Name], followups...)
 	}
 	// Create a structure for choosing names. Seed with the taken names
 	// which is the union of untainted, rescheduled, allocs on migrating
 	// nodes, and allocs on down nodes (includes canaries)
-	nameIndex := newAllocNameIndex(a.jobID, groupName, tg.Count, untainted.union(migrate, rescheduleNow, lost))
-	a.Result.TaskGroupAllocNameIndexes[groupName] = nameIndex
+	nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, rescheduleNow, lost))
+	allocNameIndexForGroup := nameIndex
+	result.TaskGroupAllocNameIndexes = map[string]*AllocNameIndex{group: allocNameIndexForGroup}
 
 	// Stop any unneeded allocations and update the untainted set to not
 	// include stopped allocations.
 	isCanarying := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
 
-	stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, isCanarying, lostLaterEvals)
+	stop, stopAllocs := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, isCanarying, lostLaterEvals)
+	result.Stop = append(result.Stop, stopAllocs...)
 
-	desiredChanges.Stop += uint64(len(stop))
+	result.DesiredTGUpdates[group].Stop += uint64(len(stop))
 	untainted = untainted.difference(stop)
 
 	// Do inplace upgrades where possible and capture the set of upgrades that
 	// need to be done destructively.
-	ignoreUpdates, inplace, destructive := a.computeUpdates(tg, untainted)
+	var inplaceUpdateResult []*structs.Allocation
+	ignoreUpdates, inplace, inplaceUpdateResult, destructive := a.computeUpdates(tg, untainted)
+	result.InplaceUpdate = inplaceUpdateResult
 
-	desiredChanges.Ignore += uint64(len(ignoreUpdates))
-	desiredChanges.InPlaceUpdate += uint64(len(inplace))
+	result.DesiredTGUpdates[group].Ignore += uint64(len(ignoreUpdates))
+	result.DesiredTGUpdates[group].InPlaceUpdate += uint64(len(inplace))
 	if !existingDeployment {
 		dstate.DesiredTotal += len(destructive) + len(inplace)
 	}
@@ -556,9 +595,10 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 	if isCanarying {
 		untainted = untainted.difference(canaries)
 	}
-	requiresCanaries := a.requiresCanaries(tg, dstate, destructive, canaries)
+	requiresCanaries := requiresCanaries(tg, dstate, destructive, canaries)
 	if requiresCanaries {
-		a.computeCanaries(tg, dstate, destructive, canaries, desiredChanges, nameIndex)
+		placeCanaries := a.computeCanaries(tg, dstate, destructive, canaries, result.DesiredTGUpdates[group], nameIndex)
+		result.Place = append(result.Place, placeCanaries...)
 	}
 
 	// Determine how many non-canary allocs we can place
@@ -573,7 +613,7 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// * An alloc was lost
 	var place []AllocPlaceResult
 	if len(lostLater) == 0 {
-		place = a.computePlacements(tg, nameIndex, untainted, migrate, rescheduleNow, lost, isCanarying)
+		place = computePlacements(tg, nameIndex, untainted, migrate, rescheduleNow, lost, isCanarying)
 		if !existingDeployment {
 			dstate.DesiredTotal += len(place)
 		}
@@ -583,27 +623,83 @@ func (a *AllocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// placements can be made without any other consideration.
 	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !isCanarying
 
-	underProvisionedBy = a.computeReplacements(deploymentPlaceReady, desiredChanges, place, rescheduleNow, lost, underProvisionedBy)
+	underProvisionedBy, replacements, replacementsAllocsToStop := a.computeReplacements(
+		deploymentPlaceReady, result.DesiredTGUpdates[group], place, rescheduleNow, lost, result.DisconnectUpdates, underProvisionedBy)
+	result.Stop = append(result.Stop, replacementsAllocsToStop...)
+	result.Place = append(result.Place, replacements...)
 
 	if deploymentPlaceReady {
-		a.computeDestructiveUpdates(destructive, underProvisionedBy, desiredChanges, tg)
+		result.DestructiveUpdate = a.computeDestructiveUpdates(destructive, underProvisionedBy, result.DesiredTGUpdates[group], tg)
 	} else {
-		desiredChanges.Ignore += uint64(len(destructive))
+		result.DesiredTGUpdates[group].Ignore += uint64(len(destructive))
 	}
 
-	a.computeMigrations(desiredChanges, migrate, tg, isCanarying)
-	a.createDeployment(tg.Name, tg.Update, existingDeployment, dstate, all, destructive)
+	stopMigrations, placeMigrations := a.computeMigrations(result.DesiredTGUpdates[group], migrate, tg, isCanarying)
+	result.Stop = append(result.Stop, stopMigrations...)
+	result.Place = append(result.Place, placeMigrations...)
+	result.Deployment = a.createDeployment(
+		tg.Name, tg.Update, existingDeployment, dstate, all, destructive, int(result.DesiredTGUpdates[group].InPlaceUpdate))
 
 	// Deployments that are still initializing need to be sent in full in the
 	// plan so its internal state can be persisted by the plan applier.
 	if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusInitializing {
-		a.Result.Deployment = a.deployment
+		result.Deployment = a.deployment
 	}
 
-	deploymentComplete := a.isDeploymentComplete(groupName, destructive, inplace,
-		migrate, rescheduleNow, place, rescheduleLater, requiresCanaries)
+	deploymentComplete := a.isDeploymentComplete(group, destructive, inplace,
+		migrate, rescheduleNow, result.Place, rescheduleLater, requiresCanaries)
 
-	return deploymentComplete
+	return result, deploymentComplete
+}
+
+// FIXME: this method should be renamed
+func (a *AllocReconciler) computeDeploymentUpdates(deploymentComplete bool, createdDeployment *structs.Deployment) []*structs.DeploymentStatusUpdate {
+	var updates []*structs.DeploymentStatusUpdate
+
+	if a.deployment != nil {
+		// Mark the deployment as complete if possible
+		if deploymentComplete {
+			if a.job.IsMultiregion() {
+				// the unblocking/successful states come after blocked, so we
+				// need to make sure we don't revert those states
+				if a.deployment.Status != structs.DeploymentStatusUnblocking &&
+					a.deployment.Status != structs.DeploymentStatusSuccessful {
+					updates = append(updates, &structs.DeploymentStatusUpdate{
+						DeploymentID:      a.deployment.ID,
+						Status:            structs.DeploymentStatusBlocked,
+						StatusDescription: structs.DeploymentStatusDescriptionBlocked,
+					})
+				}
+			} else {
+				updates = append(updates, &structs.DeploymentStatusUpdate{
+					DeploymentID:      a.deployment.ID,
+					Status:            structs.DeploymentStatusSuccessful,
+					StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
+				})
+			}
+		}
+
+		// Mark the deployment as pending since its state is now computed.
+		if a.deployment.Status == structs.DeploymentStatusInitializing {
+			updates = append(updates, &structs.DeploymentStatusUpdate{
+				DeploymentID:      a.deployment.ID,
+				Status:            structs.DeploymentStatusPending,
+				StatusDescription: structs.DeploymentStatusDescriptionPendingForPeer,
+			})
+		}
+	}
+
+	// Set the description of a created deployment
+	if createdDeployment != nil {
+		if createdDeployment.RequiresPromotion() {
+			if createdDeployment.HasAutoPromote() {
+				createdDeployment.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion
+			} else {
+				createdDeployment.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+			}
+		}
+	}
+	return updates
 }
 
 func (a *AllocReconciler) initializeDeploymentState(group string, tg *structs.TaskGroup) (*structs.DeploymentState, bool) {
@@ -627,7 +723,7 @@ func (a *AllocReconciler) initializeDeploymentState(group string, tg *structs.Ta
 }
 
 // If we have destructive updates, and have fewer canaries than is desired, we need to create canaries.
-func (a *AllocReconciler) requiresCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, destructive, canaries allocSet) bool {
+func requiresCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState, destructive, canaries allocSet) bool {
 	canariesPromoted := dstate != nil && dstate.Promoted
 	return tg.Update != nil &&
 		len(destructive) != 0 &&
@@ -636,47 +732,30 @@ func (a *AllocReconciler) requiresCanaries(tg *structs.TaskGroup, dstate *struct
 }
 
 func (a *AllocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs.DeploymentState,
-	destructive, canaries allocSet, desiredChanges *structs.DesiredUpdates, nameIndex *AllocNameIndex) {
+	destructive, canaries allocSet, desiredChanges *structs.DesiredUpdates, nameIndex *AllocNameIndex) []AllocPlaceResult {
 	dstate.DesiredCanaries = tg.Update.Canary
 
+	placementResult := []AllocPlaceResult{}
+
 	if !a.deploymentPaused && !a.deploymentFailed {
 		desiredChanges.Canary += uint64(tg.Update.Canary - len(canaries))
 		for _, name := range nameIndex.NextCanaries(uint(desiredChanges.Canary), canaries, destructive) {
-			a.Result.Place = append(a.Result.Place, AllocPlaceResult{
+			placementResult = append(placementResult, AllocPlaceResult{
 				name:      name,
 				canary:    true,
 				taskGroup: tg,
 			})
 		}
 	}
-}
 
-// filterOldTerminalAllocs filters allocations that should be ignored since they
-// are allocations that are terminal from a previous job version.
-func (a *AllocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignore allocSet) {
-	if !a.batch {
-		return all, nil
-	}
-
-	filtered = filtered.union(all)
-	ignored := make(map[string]*structs.Allocation)
-
-	// Ignore terminal batch jobs from older versions
-	for id, alloc := range filtered {
-		older := alloc.Job.Version < a.job.Version || alloc.Job.CreateIndex < a.job.CreateIndex
-		if older && alloc.TerminalStatus() {
-			delete(filtered, id)
-			ignored[id] = alloc
-		}
-	}
-
-	return filtered, ignored
+	return placementResult
 }
 
 // cancelUnneededCanaries handles the canaries for the group by stopping the
 // unneeded ones and returning the current set of canaries and the updated total
 // set of allocs for the group
-func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChanges *structs.DesiredUpdates) (canaries, all allocSet) {
+func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChanges *structs.DesiredUpdates) (
+	canaries, all allocSet, allocsToStop []AllocStopResult) {
 	// Stop any canary from an older deployment or from a failed one
 	var stop []string
 
@@ -703,7 +782,7 @@ func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 	// stopSet is the allocSet that contains the canaries we desire to stop from
 	// above.
 	stopSet := all.fromKeys(stop)
-	a.markStop(stopSet, "", sstructs.StatusAllocNotNeeded)
+	allocsToStop = markStop(stopSet, "", sstructs.StatusAllocNotNeeded)
 	desiredChanges.Stop += uint64(len(stopSet))
 	all = all.difference(stopSet)
 
@@ -716,12 +795,14 @@ func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 		}
 
 		canaries = all.fromKeys(canaryIDs)
-		untainted, migrate, lost, _, _, _, _ := canaries.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
+		untainted, migrate, lost, _, _, _, _ := filterByTainted(canaries, a.clusterState)
 		// We don't add these stops to desiredChanges because the deployment is
 		// still active. DesiredChanges is used to report deployment progress/final
 		// state. These transient failures aren't meaningful.
-		a.markStop(migrate, "", sstructs.StatusAllocMigrating)
-		a.markStop(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost)
+		allocsToStop = slices.Concat(allocsToStop,
+			markStop(migrate, "", sstructs.StatusAllocMigrating),
+			markStop(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost),
+		)
 
 		canaries = untainted
 		all = all.difference(migrate, lost)
@@ -778,7 +859,7 @@ func (a *AllocReconciler) computeUnderProvisionedBy(group *structs.TaskGroup, un
 // definition, the set of untainted, migrating and reschedule allocations for the group.
 //
 // Placements will meet or exceed group count.
-func (a *AllocReconciler) computePlacements(group *structs.TaskGroup,
+func computePlacements(group *structs.TaskGroup,
 	nameIndex *AllocNameIndex, untainted, migrate, reschedule, lost allocSet,
 	isCanarying bool) []AllocPlaceResult {
 
@@ -842,32 +923,36 @@ func (a *AllocReconciler) computePlacements(group *structs.TaskGroup,
 // The input deploymentPlaceReady is calculated as the deployment is not paused, failed, or canarying.
 // It returns the number of allocs still needed.
 func (a *AllocReconciler) computeReplacements(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
-	place []AllocPlaceResult, rescheduleNow, lost allocSet, underProvisionedBy int) int {
+	place []AllocPlaceResult, rescheduleNow, lost allocSet, disconnectUpdates map[string]*structs.Allocation,
+	underProvisionedBy int) (int, []AllocPlaceResult, []AllocStopResult) {
 
 	// Disconnecting allocs are not failing, but are included in rescheduleNow.
 	// Create a new set that only includes the actual failures and compute
 	// replacements based off that.
 	failed := make(allocSet)
 	for id, alloc := range rescheduleNow {
-		_, ok := a.Result.DisconnectUpdates[id]
+		_, ok := disconnectUpdates[id]
 		if !ok && alloc.ClientStatus != structs.AllocClientStatusUnknown {
 			failed[id] = alloc
 		}
 	}
 
+	resultingPlacements := []AllocPlaceResult{}
+	resultingAllocsToStop := []AllocStopResult{}
+
 	// If the deployment is place ready, apply all placements and return
 	if deploymentPlaceReady {
 		desiredChanges.Place += uint64(len(place))
 		// This relies on the computePlacements having built this set, which in
 		// turn relies on len(lostLater) == 0.
-		a.Result.Place = append(a.Result.Place, place...)
+		resultingPlacements = append(resultingPlacements, place...)
 
-		a.markStop(failed, "", sstructs.StatusAllocRescheduled)
+		resultingAllocsToStop = markStop(failed, "", sstructs.StatusAllocRescheduled)
 		desiredChanges.Stop += uint64(len(failed))
 
 		minimum := min(len(place), underProvisionedBy)
 		underProvisionedBy -= minimum
-		return underProvisionedBy
+		return underProvisionedBy, resultingPlacements, resultingAllocsToStop
 	}
 
 	// We do not want to place additional allocations but in the case we
@@ -879,12 +964,12 @@ func (a *AllocReconciler) computeReplacements(deploymentPlaceReady bool, desired
 	if len(lost) != 0 {
 		allowed := min(len(lost), len(place))
 		desiredChanges.Place += uint64(allowed)
-		a.Result.Place = append(a.Result.Place, place[:allowed]...)
+		resultingPlacements = append(resultingPlacements, place[:allowed]...)
 	}
 
 	// if no failures or there are no pending placements return.
 	if len(rescheduleNow) == 0 || len(place) == 0 {
-		return underProvisionedBy
+		return underProvisionedBy, resultingPlacements, nil
 	}
 
 	// Handle rescheduling of failed allocations even if the deployment is failed.
@@ -895,15 +980,15 @@ func (a *AllocReconciler) computeReplacements(deploymentPlaceReady bool, desired
 		partOfFailedDeployment := a.deploymentFailed && prev != nil && a.deployment.ID == prev.DeploymentID
 
 		if !partOfFailedDeployment && p.IsRescheduling() {
-			a.Result.Place = append(a.Result.Place, p)
+			resultingPlacements = append(resultingPlacements, p)
 			desiredChanges.Place++
 
-			_, prevIsDisconnecting := a.Result.DisconnectUpdates[prev.ID]
+			_, prevIsDisconnecting := disconnectUpdates[prev.ID]
 			if prevIsDisconnecting {
 				continue
 			}
 
-			a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+			resultingAllocsToStop = append(resultingAllocsToStop, AllocStopResult{
 				Alloc:             prev,
 				StatusDescription: sstructs.StatusAllocRescheduled,
 			})
@@ -911,34 +996,43 @@ func (a *AllocReconciler) computeReplacements(deploymentPlaceReady bool, desired
 		}
 	}
 
-	return underProvisionedBy
+	return underProvisionedBy, resultingPlacements, resultingAllocsToStop
 }
 
 func (a *AllocReconciler) computeDestructiveUpdates(destructive allocSet, underProvisionedBy int,
-	desiredChanges *structs.DesiredUpdates, tg *structs.TaskGroup) {
+	desiredChanges *structs.DesiredUpdates, tg *structs.TaskGroup) []allocDestructiveResult {
+
+	destructiveResult := []allocDestructiveResult{}
 
 	// Do all destructive updates
 	minimum := min(len(destructive), underProvisionedBy)
 	desiredChanges.DestructiveUpdate += uint64(minimum)
 	desiredChanges.Ignore += uint64(len(destructive) - minimum)
 	for _, alloc := range destructive.nameOrder()[:minimum] {
-		a.Result.DestructiveUpdate = append(a.Result.DestructiveUpdate, allocDestructiveResult{
+		destructiveResult = append(destructiveResult, allocDestructiveResult{
 			placeName:             alloc.Name,
 			placeTaskGroup:        tg,
 			stopAlloc:             alloc,
 			stopStatusDescription: sstructs.StatusAllocUpdating,
 		})
 	}
+
+	return destructiveResult
 }
 
-func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdates, migrate allocSet, tg *structs.TaskGroup, isCanarying bool) {
+func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdates, migrate allocSet,
+	tg *structs.TaskGroup, isCanarying bool) ([]AllocStopResult, []AllocPlaceResult) {
+
+	allocsToStop := []AllocStopResult{}
+	allocsToPlace := []AllocPlaceResult{}
+
 	desiredChanges.Migrate += uint64(len(migrate))
 	for _, alloc := range migrate.nameOrder() {
-		a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+		allocsToStop = append(allocsToStop, AllocStopResult{
 			Alloc:             alloc,
 			StatusDescription: sstructs.StatusAllocMigrating,
 		})
-		a.Result.Place = append(a.Result.Place, AllocPlaceResult{
+		allocsToPlace = append(allocsToPlace, AllocPlaceResult{
 			name:          alloc.Name,
 			canary:        alloc.DeploymentStatus.IsCanary(),
 			taskGroup:     tg,
@@ -948,18 +1042,20 @@ func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdat
 			minJobVersion:      alloc.Job.Version,
 		})
 	}
+
+	return allocsToStop, allocsToPlace
 }
 
 func (a *AllocReconciler) createDeployment(groupName string, strategy *structs.UpdateStrategy,
-	existingDeployment bool, dstate *structs.DeploymentState, all, destructive allocSet) {
+	existingDeployment bool, dstate *structs.DeploymentState, all, destructive allocSet, inPlaceUpdates int) *structs.Deployment {
 	// Guard the simple cases that require no computation first.
 	if existingDeployment ||
 		strategy.IsEmpty() ||
 		dstate.DesiredTotal == 0 {
-		return
+		return nil
 	}
 
-	updatingSpec := len(destructive) != 0 || len(a.Result.InplaceUpdate) != 0
+	updatingSpec := len(destructive) != 0 || inPlaceUpdates != 0
 
 	hadRunning := false
 	for _, alloc := range all {
@@ -972,17 +1068,23 @@ func (a *AllocReconciler) createDeployment(groupName string, strategy *structs.U
 	// Don't create a deployment if it's not the first time running the job
 	// and there are no updates to the spec.
 	if hadRunning && !updatingSpec {
-		return
+		return nil
 	}
 
+	var resultingDeployment *structs.Deployment
+
 	// A previous group may have made the deployment already. If not create one.
 	if a.deployment == nil {
-		a.deployment = structs.NewDeployment(a.job, a.evalPriority, a.now.UnixNano())
-		a.Result.Deployment = a.deployment
+		// FIXME this method still mutates state :/
+		a.deployment = structs.NewDeployment(a.job, a.evalPriority, a.clusterState.Now.UnixNano())
+		resultingDeployment = a.deployment
 	}
 
 	// Attach the groups deployment state to the deployment
+	// FIXME this method still mutates state :/
 	a.deployment.TaskGroups[groupName] = dstate
+
+	return resultingDeployment
 }
 
 func (a *AllocReconciler) isDeploymentComplete(groupName string, destructive, inplace, migrate, rescheduleNow allocSet,
@@ -1010,13 +1112,16 @@ func (a *AllocReconciler) isDeploymentComplete(groupName string, destructive, in
 // the group definition, the set of allocations in various states and whether we
 // are canarying.
 func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *AllocNameIndex,
-	untainted, migrate, lost, canaries allocSet, isCanarying bool, followupEvals map[string]string) allocSet {
+	untainted, migrate, lost, canaries allocSet, isCanarying bool, followupEvals map[string]string) (allocSet, []AllocStopResult) {
 
-	// Mark all lost allocations for stop.
-	var stop allocSet
-	stop = stop.union(lost)
+	// Mark all lost allocations for stopAllocSet.
+	var stopAllocSet allocSet
+	stopAllocSet = stopAllocSet.union(lost)
 
-	a.markDelayed(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost, followupEvals)
+	var stopAllocResult []AllocStopResult
+
+	delayedResult := markDelayed(lost, structs.AllocClientStatusLost, sstructs.StatusAllocLost, followupEvals)
+	stopAllocResult = append(stopAllocResult, delayedResult...)
 
 	// If we are still deploying or creating canaries, don't stop them
 	if isCanarying {
@@ -1036,7 +1141,7 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 	// corrected in `computePlacements`
 	remove := len(knownUntainted) + len(migrate) - group.Count
 	if remove <= 0 {
-		return stop
+		return stopAllocSet, stopAllocResult
 	}
 
 	// Filter out any terminal allocations from the untainted set
@@ -1049,8 +1154,8 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 		canaryNames := canaries.nameSet()
 		for id, alloc := range untainted.difference(canaries) {
 			if _, match := canaryNames[alloc.Name]; match {
-				stop[id] = alloc
-				a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+				stopAllocSet[id] = alloc
+				stopAllocResult = append(stopAllocResult, AllocStopResult{
 					Alloc:             alloc,
 					StatusDescription: sstructs.StatusAllocNotNeeded,
 				})
@@ -1058,7 +1163,7 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 
 				remove--
 				if remove == 0 {
-					return stop
+					return stopAllocSet, stopAllocResult
 				}
 			}
 		}
@@ -1072,17 +1177,17 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 			if _, match := removeNames[alloc.Name]; !match {
 				continue
 			}
-			a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+			stopAllocResult = append(stopAllocResult, AllocStopResult{
 				Alloc:             alloc,
 				StatusDescription: sstructs.StatusAllocNotNeeded,
 			})
 			delete(migrate, id)
-			stop[id] = alloc
+			stopAllocSet[id] = alloc
 			nameIndex.UnsetIndex(alloc.Index())
 
 			remove--
 			if remove == 0 {
-				return stop
+				return stopAllocSet, stopAllocResult
 			}
 		}
 	}
@@ -1091,8 +1196,8 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 	removeNames := nameIndex.Highest(uint(remove))
 	for id, alloc := range untainted {
 		if _, ok := removeNames[alloc.Name]; ok {
-			stop[id] = alloc
-			a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+			stopAllocSet[id] = alloc
+			stopAllocResult = append(stopAllocResult, AllocStopResult{
 				Alloc:             alloc,
 				StatusDescription: sstructs.StatusAllocNotNeeded,
 			})
@@ -1100,7 +1205,7 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 
 			remove--
 			if remove == 0 {
-				return stop
+				return stopAllocSet, stopAllocResult
 			}
 		}
 	}
@@ -1108,8 +1213,8 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 	// It is possible that we didn't stop as many as we should have if there
 	// were allocations with duplicate names.
 	for id, alloc := range untainted {
-		stop[id] = alloc
-		a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+		stopAllocSet[id] = alloc
+		stopAllocResult = append(stopAllocResult, AllocStopResult{
 			Alloc:             alloc,
 			StatusDescription: sstructs.StatusAllocNotNeeded,
 		})
@@ -1117,11 +1222,11 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 
 		remove--
 		if remove == 0 {
-			return stop
+			return stopAllocSet, stopAllocResult
 		}
 	}
 
-	return stop
+	return stopAllocSet, stopAllocResult
 }
 
 // reconcileReconnecting receives the set of allocations that are reconnecting
@@ -1137,9 +1242,10 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 //   - If the reconnecting allocation is to be stopped, its replacements may
 //     not be present in any of the returned sets. The rest of the reconciler
 //     logic will handle them.
-func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all allocSet, tg *structs.TaskGroup) (allocSet, allocSet) {
+func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all allocSet, tg *structs.TaskGroup) (allocSet, allocSet, []AllocStopResult) {
 	stop := make(allocSet)
 	reconnect := make(allocSet)
+	stopAllocResult := []AllocStopResult{}
 
 	for _, reconnectingAlloc := range reconnecting {
 
@@ -1149,7 +1255,7 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 
 		if reconnectFailed {
 			stop[reconnectingAlloc.ID] = reconnectingAlloc
-			a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+			stopAllocResult = append(stopAllocResult, AllocStopResult{
 				Alloc:             reconnectingAlloc,
 				ClientStatus:      structs.AllocClientStatusFailed,
 				StatusDescription: sstructs.StatusAllocRescheduled,
@@ -1168,7 +1274,7 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 
 		if stopReconnecting {
 			stop[reconnectingAlloc.ID] = reconnectingAlloc
-			a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+			stopAllocResult = append(stopAllocResult, AllocStopResult{
 				Alloc:             reconnectingAlloc,
 				StatusDescription: sstructs.StatusAllocNotNeeded,
 			})
@@ -1212,7 +1318,7 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 				// reconnecting if not stopped yet.
 				if _, ok := stop[reconnectingAlloc.ID]; !ok {
 					stop[reconnectingAlloc.ID] = reconnectingAlloc
-					a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+					stopAllocResult = append(stopAllocResult, AllocStopResult{
 						Alloc:             reconnectingAlloc,
 						StatusDescription: sstructs.StatusAllocNotNeeded,
 					})
@@ -1222,7 +1328,7 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 				// that are not in server terminal status or stopped already.
 				if _, ok := stop[replacementAlloc.ID]; !ok {
 					stop[replacementAlloc.ID] = replacementAlloc
-					a.Result.Stop = append(a.Result.Stop, AllocStopResult{
+					stopAllocResult = append(stopAllocResult, AllocStopResult{
 						Alloc:             replacementAlloc,
 						StatusDescription: sstructs.StatusAllocReconnected,
 					})
@@ -1238,7 +1344,7 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 		}
 	}
 
-	return reconnect, stop
+	return reconnect, stop, stopAllocResult
 }
 
 // computeUpdates determines which allocations for the passed group require
@@ -1247,10 +1353,12 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 // 2. Those that can be upgraded in-place. These are added to the results
 // automatically since the function contains the correct state to do so,
 // 3. Those that require destructive updates
-func (a *AllocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) {
+func (a *AllocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (
+	ignore, inplaceUpdateMap allocSet, inplaceUpdateSlice []*structs.Allocation, destructive allocSet) {
 	// Determine the set of allocations that need to be updated
 	ignore = make(map[string]*structs.Allocation)
-	inplace = make(map[string]*structs.Allocation)
+	inplaceUpdateMap = make(map[string]*structs.Allocation)
+	inplaceUpdateSlice = make([]*structs.Allocation, 0)
 	destructive = make(map[string]*structs.Allocation)
 
 	for _, alloc := range untainted {
@@ -1260,20 +1368,23 @@ func (a *AllocReconciler) computeUpdates(group *structs.TaskGroup, untainted all
 		} else if destructiveChange {
 			destructive[alloc.ID] = alloc
 		} else {
-			inplace[alloc.ID] = alloc
-			a.Result.InplaceUpdate = append(a.Result.InplaceUpdate, inplaceAlloc)
+			inplaceUpdateMap[alloc.ID] = alloc
+			inplaceUpdateSlice = append(inplaceUpdateSlice, inplaceAlloc)
 		}
 	}
-
 	return
 }
 
 // createRescheduleLaterEvals creates batched followup evaluations with the WaitUntil field
 // set for allocations that are eligible to be rescheduled later, and marks the alloc with
-// the followupEvalID
-func (a *AllocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) {
+// the followupEvalID. this function modifies disconnectUpdates in place.
+func (a *AllocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedRescheduleInfo, all allocSet,
+	disconnectUpdates map[string]*structs.Allocation) ([]*structs.Evaluation, map[string]*structs.Allocation) {
+
 	// followupEvals are created in the same way as for delayed lost allocs
-	allocIDToFollowupEvalID := a.createLostLaterEvals(rescheduleLater, tgName)
+	allocIDToFollowupEvalID, followupEvals := a.createLostLaterEvals(rescheduleLater)
+
+	var attributeUpdates = make(map[string]*structs.Allocation)
 
 	// Create updates that will be applied to the allocs to mark the FollowupEvalID
 	for _, laterAlloc := range rescheduleLater {
@@ -1282,12 +1393,14 @@ func (a *AllocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedR
 		updatedAlloc.FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID]
 
 		// Can't updated an allocation that is disconnected
-		if _, ok := a.Result.DisconnectUpdates[laterAlloc.allocID]; !ok {
-			a.Result.AttributeUpdates[laterAlloc.allocID] = updatedAlloc
+		if _, ok := disconnectUpdates[laterAlloc.allocID]; !ok {
+			attributeUpdates[laterAlloc.allocID] = updatedAlloc
 		} else {
-			a.Result.DisconnectUpdates[laterAlloc.allocID].FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID]
+			disconnectUpdates[laterAlloc.allocID].FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID]
 		}
 	}
+
+	return followupEvals, attributeUpdates
 }
 
 // computeReconnecting copies existing allocations in the unknown state, but
@@ -1295,7 +1408,9 @@ func (a *AllocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedR
 // set to running, and these allocs are appended to the Plan as non-destructive
 // updates. Clients are responsible for reconciling the DesiredState with the
 // actual state as the node comes back online.
-func (a *AllocReconciler) computeReconnecting(reconnecting allocSet) {
+func (a *AllocReconciler) computeReconnecting(reconnecting allocSet) map[string]*structs.Allocation {
+
+	reconnectingUpdates := map[string]*structs.Allocation{}
 
 	// Create updates that will be appended to the plan.
 	for _, alloc := range reconnecting {
@@ -1323,16 +1438,17 @@ func (a *AllocReconciler) computeReconnecting(reconnecting allocSet) {
 		// Use a copy to prevent mutating the object from statestore.
 		reconnectedAlloc := alloc.Copy()
 		reconnectedAlloc.AppendState(structs.AllocStateFieldClientStatus, alloc.ClientStatus)
-		a.Result.ReconnectUpdates[reconnectedAlloc.ID] = reconnectedAlloc
+		reconnectingUpdates[reconnectedAlloc.ID] = reconnectedAlloc
 	}
+	return reconnectingUpdates
 }
 
 // handleDelayedLost creates batched followup evaluations with the WaitUntil field set for
 // lost allocations. followupEvals are appended to a.result as a side effect, we return a
 // map of alloc IDs to their followupEval IDs.
-func (a *AllocReconciler) createLostLaterEvals(rescheduleLater []*delayedRescheduleInfo, tgName string) map[string]string {
+func (a *AllocReconciler) createLostLaterEvals(rescheduleLater []*delayedRescheduleInfo) (map[string]string, []*structs.Evaluation) {
 	if len(rescheduleLater) == 0 {
-		return map[string]string{}
+		return map[string]string{}, nil
 	}
 
 	// Sort by time
@@ -1384,24 +1500,22 @@ func (a *AllocReconciler) createLostLaterEvals(rescheduleLater []*delayedResched
 		emitRescheduleInfo(allocReschedInfo.alloc, eval)
 	}
 
-	a.appendFollowupEvals(tgName, evals)
-
-	return allocIDToFollowupEvalID
+	return allocIDToFollowupEvalID, evals
 }
 
 // createTimeoutLaterEvals creates followup evaluations with the
 // WaitUntil field set for allocations in an unknown state on disconnected nodes.
 // It returns a map of allocIDs to their associated followUpEvalIDs.
-func (a *AllocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName string) map[string]string {
+func (a *AllocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName string) (map[string]string, []*structs.Evaluation) {
 	if len(disconnecting) == 0 {
-		return map[string]string{}
+		return map[string]string{}, nil
 	}
 
-	timeoutDelays, err := disconnecting.delayByLostAfter(a.now)
+	timeoutDelays, err := disconnecting.delayByLostAfter(a.clusterState.Now)
 	if err != nil {
 		a.logger.Error("error for task_group",
 			"task_group", tgName, "error", err)
-		return map[string]string{}
+		return map[string]string{}, nil
 	}
 
 	// Sort by time
@@ -1457,21 +1571,21 @@ func (a *AllocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName
 
 	}
 
-	a.appendFollowupEvals(tgName, evals)
-
-	return allocIDToFollowupEvalID
+	return allocIDToFollowupEvalID, evals
 }
 
 // Create updates that will be applied to the allocs to mark the FollowupEvalID
 // and the unknown ClientStatus and AllocState.
-func (a *AllocReconciler) appendUnknownDisconnectingUpdates(disconnecting allocSet, allocIDToFollowupEvalID map[string]string, rescheduleNow allocSet) {
+func appendUnknownDisconnectingUpdates(disconnecting allocSet,
+	allocIDToFollowupEvalID map[string]string, rescheduleNow allocSet) map[string]*structs.Allocation {
+	resultingDisconnectUpdates := map[string]*structs.Allocation{}
 	for id, alloc := range disconnecting {
 		updatedAlloc := alloc.Copy()
 		updatedAlloc.ClientStatus = structs.AllocClientStatusUnknown
 		updatedAlloc.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown)
 		updatedAlloc.ClientDescription = sstructs.StatusAllocUnknown
 		updatedAlloc.FollowupEvalID = allocIDToFollowupEvalID[id]
-		a.Result.DisconnectUpdates[updatedAlloc.ID] = updatedAlloc
+		resultingDisconnectUpdates[updatedAlloc.ID] = updatedAlloc
 
 		// update the reschedule set so that any placements holding onto this
 		// pointer are using the right pointer for PreviousAllocation()
@@ -1481,17 +1595,8 @@ func (a *AllocReconciler) appendUnknownDisconnectingUpdates(disconnecting allocS
 			}
 		}
 	}
-}
 
-// appendFollowupEvals appends a set of followup evals for a task group to the
-// desiredFollowupEvals map which is later added to the scheduler's followUpEvals set.
-func (a *AllocReconciler) appendFollowupEvals(tgName string, evals []*structs.Evaluation) {
-	// Merge with
-	if existingFollowUpEvals, ok := a.Result.DesiredFollowupEvals[tgName]; ok {
-		evals = append(existingFollowUpEvals, evals...)
-	}
-
-	a.Result.DesiredFollowupEvals[tgName] = evals
+	return resultingDisconnectUpdates
 }
 
 // emitRescheduleInfo emits metrics about the rescheduling decision of an evaluation. If a followup evaluation is
diff --git a/scheduler/reconciler/reconcile_cluster_test.go b/scheduler/reconciler/reconcile_cluster_test.go
index 77fb9cfeb..22622478a 100644
--- a/scheduler/reconciler/reconcile_cluster_test.go
+++ b/scheduler/reconciler/reconcile_cluster_test.go
@@ -351,9 +351,8 @@ func TestReconciler_Place_NoExisting(t *testing.T) {
 	job := mock.Job()
 	reconciler := NewAllocReconciler(
 		testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, nil, nil, "", job.Priority, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, nil, "", job.Priority, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -391,9 +390,8 @@ func TestReconciler_Place_Existing(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -433,9 +431,8 @@ func TestReconciler_ScaleDown_Partial(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -476,9 +473,8 @@ func TestReconciler_ScaleDown_Zero(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -520,9 +516,8 @@ func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -559,9 +554,8 @@ func TestReconciler_Inplace(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -601,9 +595,8 @@ func TestReconciler_Inplace_ScaleUp(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -645,9 +638,8 @@ func TestReconciler_Inplace_ScaleDown(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -710,9 +702,8 @@ func TestReconciler_Inplace_Rollback(t *testing.T) {
 	}, allocUpdateFnDestructive)
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFn,
-		false, job.ID, job, nil, allocs, nil, uuid.Generate(), 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		false, job.ID, job, nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -757,9 +748,8 @@ func TestReconciler_Destructive(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -794,9 +784,8 @@ func TestReconciler_DestructiveMaxParallel(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -834,9 +823,8 @@ func TestReconciler_Destructive_ScaleUp(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -877,9 +865,8 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -926,9 +913,8 @@ func TestReconciler_LostNode(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -980,9 +966,8 @@ func TestReconciler_LostNode_ScaleUp(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1034,9 +1019,8 @@ func TestReconciler_LostNode_ScaleDown(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1083,9 +1067,8 @@ func TestReconciler_DrainNode(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1139,9 +1122,8 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1196,9 +1178,8 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1245,9 +1226,8 @@ func TestReconciler_RemovedTG(t *testing.T) {
 	job.TaskGroups[0].Name = newName
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1311,9 +1291,8 @@ func TestReconciler_JobStopped(t *testing.T) {
 			}
 
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				nil, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
@@ -1381,9 +1360,9 @@ func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) {
 			}
 
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				nil, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
+
 			must.SliceEmpty(t, r.Stop)
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
@@ -1421,9 +1400,8 @@ func TestReconciler_MultiTG(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1477,9 +1455,8 @@ func TestReconciler_MultiTG_SingleUpdateBlock(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -1555,9 +1532,8 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) {
 	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, nil, uuid.Generate(), 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Two reschedule attempts were already made, one more can be made at a future time
 	// Verify that the follow up eval has the expected waitUntil time
@@ -1637,9 +1613,8 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, nil, uuid.Generate(), 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Verify that two follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -1734,10 +1709,8 @@ func TestReconciler_RescheduleNow_Batch(t *testing.T) {
 	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.now = now
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, now})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -1811,9 +1784,8 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) {
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, uuid.Generate(), 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Should place a new placement and create a follow up eval for the delayed reschedule
 	// Verify that the follow up eval has the expected waitUntil time
@@ -1884,9 +1856,8 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
 	allocs[4].ClientStatus = structs.AllocClientStatusComplete
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Should place a new placement for the alloc that was marked complete
 	assertResults(t, r, &resultExpectation{
@@ -1944,9 +1915,8 @@ func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Should place a new placement for the alloc that was marked stopped
 	assertResults(t, r, &resultExpectation{
@@ -2022,9 +1992,8 @@ func TestReconciler_RescheduleNow_Service(t *testing.T) {
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -2102,10 +2071,8 @@ func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
 	allocs[1].ClientStatus = structs.AllocClientStatusFailed
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.now = now
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, now})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -2184,11 +2151,10 @@ func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
 	allocs[1].ClientStatus = structs.AllocClientStatusFailed
 	allocs[1].FollowupEvalID = evalID
 
+	now = now.Add(-30 * time.Second)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, evalID, 50, true)
-	reconciler.now = now.Add(-30 * time.Second)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, evalID, 50, ClusterState{nil, true, now})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -2296,9 +2262,8 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -2421,10 +2386,8 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, nil, "", 50, true)
-	reconciler.now = now
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, now})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -2550,10 +2513,8 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, nil, "", 50, true)
-	reconciler.now = now
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, now})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -2619,9 +2580,8 @@ func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Should place 1 - one is a new placement to make up the desired count of 5
 	// failing allocs are not rescheduled
@@ -2710,9 +2670,8 @@ func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
 			}
 
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				c.deployment, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				c.deployment, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			var updates []*structs.DeploymentStatusUpdate
 			if c.cancel {
@@ -2791,9 +2750,8 @@ func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
 			}
 
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				c.deployment, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				c.deployment, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			var updates []*structs.DeploymentStatusUpdate
 			if c.cancel {
@@ -2844,9 +2802,8 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -2893,9 +2850,8 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -2941,9 +2897,8 @@ func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -2991,9 +2946,8 @@ func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -3073,9 +3027,8 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
 
 			mockUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
@@ -3142,9 +3095,8 @@ func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
 			}
 
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
@@ -3220,9 +3172,8 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing
 
 			mockUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
@@ -3298,9 +3249,8 @@ func TestReconciler_DrainNode_Canary(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -3374,9 +3324,8 @@ func TestReconciler_LostNode_Canary(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -3444,9 +3393,8 @@ func TestReconciler_StopOldCanaries(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d,
-		allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -3503,9 +3451,8 @@ func TestReconciler_NewCanaries(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -3557,9 +3504,8 @@ func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -3614,9 +3560,8 @@ func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -3673,9 +3618,8 @@ func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -3727,9 +3671,8 @@ func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -3810,9 +3753,8 @@ func TestReconciler_NewCanaries_FillNames(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -3883,9 +3825,8 @@ func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -3961,9 +3902,8 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
 		{
@@ -4064,9 +4004,8 @@ func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
 
 			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
-			reconciler.Compute()
-			r := reconciler.Result
+				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			r := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
@@ -4149,9 +4088,8 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -4238,9 +4176,8 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -4298,9 +4235,8 @@ func TestReconciler_CompleteDeployment(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -4357,9 +4293,8 @@ func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID,
-		job, d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		job, d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
 		{
@@ -4456,9 +4391,8 @@ func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -4529,9 +4463,8 @@ func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
 	jobNew.Version += 100
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
 	// otherwise there will be a discrepancy
@@ -4588,9 +4521,8 @@ func TestReconciler_MarkDeploymentComplete(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
 		{
@@ -4661,9 +4593,8 @@ func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -4700,9 +4631,8 @@ func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	d := structs.NewDeployment(job, 50, r.Deployment.CreateTime)
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
@@ -4756,9 +4686,8 @@ func TestReconciler_Batch_Rerun(t *testing.T) {
 	job2.CreateIndex++
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
@@ -4821,9 +4750,8 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
 		FinishedAt: now.Add(-10 * time.Second)}}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert that no rescheduled placements were created
 	assertResults(t, r, &resultExpectation{
@@ -4880,9 +4808,8 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert that no rescheduled placements were created
 	assertResults(t, r, &resultExpectation{
@@ -4969,9 +4896,8 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
 		{
@@ -5035,9 +4961,8 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Assert that rescheduled placements were created
 	assertResults(t, r, &resultExpectation{
@@ -5101,9 +5026,8 @@ func TestReconciler_ForceReschedule_Service(t *testing.T) {
 	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -5185,9 +5109,8 @@ func TestReconciler_RescheduleNot_Service(t *testing.T) {
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -5582,22 +5505,20 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 				allocs = append(allocs, replacements...)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
-				nil, allocs, map[string]*structs.Node{testNode.ID: testNode}, "", 50, true)
-
-			reconciler.now = time.Now()
+			now := time.Now()
 			if tc.maxDisconnect != nil {
-				reconciler.now = time.Now().Add(*tc.maxDisconnect * 20)
+				now = time.Now().Add(*tc.maxDisconnect * 20)
 			}
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
+				nil, allocs, "", 50, ClusterState{map[string]*structs.Node{testNode.ID: testNode}, true, now})
 
 			mpc := &mockPicker{
 				result: tc.pickResult,
 			}
 
 			reconciler.reconnectingPicker = mpc
-			reconciler.Compute()
+			results := reconciler.Compute()
 
-			results := reconciler.Result
 			assertResults(t, results, tc.expected)
 
 			must.Eq(t, tc.reconcileStrategy, mpc.strategy)
@@ -5677,10 +5598,8 @@ func TestReconciler_RescheduleNot_Batch(t *testing.T) {
 	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.now = now
-	reconciler.Compute()
-	r := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, now})
+	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.DesiredFollowupEvals[tgName]
@@ -5709,16 +5628,15 @@ func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
 	// Build a map of disconnected nodes
 	nodes := buildDisconnectedNodes(allocs, 2)
 
+	now := time.Now().UTC()
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nodes, "", 50, true)
-	reconciler.now = time.Now().UTC()
-	reconciler.Compute()
-	results := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nodes, true, now})
+	results := reconciler.Compute()
 
 	// Verify that 1 follow up eval was created with the values we expect.
 	evals := results.DesiredFollowupEvals[job.TaskGroups[0].Name]
 	must.SliceLen(t, 1, evals)
-	expectedTime := reconciler.now.Add(5 * time.Minute)
+	expectedTime := now.Add(5 * time.Minute)
 
 	eval := evals[0]
 	must.NotNil(t, eval.WaitUntil)
@@ -5773,9 +5691,8 @@ func TestReconciler_Disconnect_UpdateJobAfterReconnect(t *testing.T) {
 	}
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.Compute()
-	results := reconciler.Result
+		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	results := reconciler.Compute()
 
 	// Assert both allocations will be updated.
 	assertResults(t, results, &resultExpectation{
@@ -6124,9 +6041,8 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
 
 			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, updatedJob.ID, updatedJob,
-				deployment, allocs, tainted, "", 50, true)
-			reconciler.Compute()
-			result := reconciler.Result
+				deployment, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+			result := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, result, tc.expectedResult)
@@ -6275,7 +6191,7 @@ func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
 
 			reconciler := NewAllocReconciler(
 				testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, deployment,
-				nil, nil, "", job.Priority, true)
+				nil, "", job.Priority, ClusterState{nil, true, time.Now().UTC()})
 
 			reconciler.Compute()
 

From 1030760d3f776243039becc80a1db466f277a596 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 20 Jun 2025 17:23:31 +0200
Subject: [PATCH 17/32] scheduler: adjust method comments and names to reflect
 recent refactoring (#26085)

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 scheduler/reconciler/reconcile_cluster.go | 43 ++++++++++++-----------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/scheduler/reconciler/reconcile_cluster.go b/scheduler/reconciler/reconcile_cluster.go
index 9b3c06c7f..7b96e42a5 100644
--- a/scheduler/reconciler/reconcile_cluster.go
+++ b/scheduler/reconciler/reconcile_cluster.go
@@ -155,6 +155,7 @@ type ReconcileResults struct {
 	TaskGroupAllocNameIndexes map[string]*AllocNameIndex
 }
 
+// Merge merges two instances of ReconcileResults
 func (r *ReconcileResults) Merge(new *ReconcileResults) {
 	if new.Deployment != nil {
 		r.Deployment = new.Deployment
@@ -313,7 +314,7 @@ func (a *AllocReconciler) Compute() *ReconcileResults {
 	var deploymentComplete bool
 	result, deploymentComplete = a.computeDeploymentComplete(result, m)
 
-	result.DeploymentUpdates = append(result.DeploymentUpdates, a.computeDeploymentUpdates(deploymentComplete, result.Deployment)...)
+	result.DeploymentUpdates = append(result.DeploymentUpdates, a.setDeploymentStatusAndUpdates(deploymentComplete, result.Deployment)...)
 
 	return result
 }
@@ -369,7 +370,9 @@ func cancelUnneededDeployments(j *structs.Job, d *structs.Deployment) (*structs.
 	return nil, d, updates
 }
 
-// handleStop marks all allocations to be stopped, handling the lost case
+// handleStop marks all allocations to be stopped, handling the lost case.
+// Returns result structure with desired changes field set to stopped allocations
+// and an array of stopped allocations.
 func (a *AllocReconciler) handleStop(m allocMatrix) (map[string]*structs.DesiredUpdates, []AllocStopResult) {
 	result := make(map[string]*structs.DesiredUpdates)
 	allocsToStop := []AllocStopResult{}
@@ -384,7 +387,8 @@ func (a *AllocReconciler) handleStop(m allocMatrix) (map[string]*structs.Desired
 }
 
 // markStop is a helper for marking a set of allocation for stop with a
-// particular client status and description.
+// particular client status and description. Returns a slice of alloc stop
+// result.
 func markStop(allocs allocSet, clientStatus, statusDescription string) []AllocStopResult {
 	allocsToStop := []AllocStopResult{}
 	for _, alloc := range allocs {
@@ -413,12 +417,8 @@ func markDelayed(allocs allocSet, clientStatus, statusDescription string, follow
 }
 
 // computeDeploymentComplete is the top-level method that computes
-// reconciliation for a given allocation matrix. It returns:
-// - a map of task group allocation name indexes
-// - a slice of allocations to stop
-// - a slice of replacements
-// - a resulting deployment
-// - a boolean that indicates whether the deployment is complete
+// reconciliation for a given allocation matrix. It returns ReconcileResults
+// struct and a boolean that indicates whether the deployment is complete.
 func (a *AllocReconciler) computeDeploymentComplete(result *ReconcileResults, m allocMatrix) (*ReconcileResults, bool) {
 	complete := true
 	for group, as := range m {
@@ -437,9 +437,8 @@ func (a *AllocReconciler) computeDeploymentComplete(result *ReconcileResults, m
 // computeGroup reconciles state for a particular task group. It returns whether
 // the deployment it is for is complete in regard to the task group.
 //
-// returns: desiredTGUpdates for taskgroup, allocations to stop, alloc name
-// index for taskgroup, resulting deployment, and a boolean that indicates
-// whether the whole group's deployment is complete
+// returns: ReconcileResults object and a boolean that indicates whether the
+// whole group's deployment is complete
 func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileResults, bool) {
 
 	// Create the output result object that we'll be continuously writing to
@@ -623,7 +622,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 	// placements can be made without any other consideration.
 	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !isCanarying
 
-	underProvisionedBy, replacements, replacementsAllocsToStop := a.computeReplacements(
+	underProvisionedBy, replacements, replacementsAllocsToStop := a.placeAllocs(
 		deploymentPlaceReady, result.DesiredTGUpdates[group], place, rescheduleNow, lost, result.DisconnectUpdates, underProvisionedBy)
 	result.Stop = append(result.Stop, replacementsAllocsToStop...)
 	result.Place = append(result.Place, replacements...)
@@ -652,8 +651,9 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 	return result, deploymentComplete
 }
 
-// FIXME: this method should be renamed
-func (a *AllocReconciler) computeDeploymentUpdates(deploymentComplete bool, createdDeployment *structs.Deployment) []*structs.DeploymentStatusUpdate {
+// setDeploymentStatusAndUpdates sets status for a.deployment if necessary and
+// returns an array of DeploymentStatusUpdates.
+func (a *AllocReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool, createdDeployment *structs.Deployment) []*structs.DeploymentStatusUpdate {
 	var updates []*structs.DeploymentStatusUpdate
 
 	if a.deployment != nil {
@@ -917,12 +917,13 @@ func computePlacements(group *structs.TaskGroup,
 	return place
 }
 
-// computeReplacements either applies the placements calculated by computePlacements,
-// or computes more placements based on whether the deployment is ready for placement
-// and if the placement is already rescheduling or part of a failed deployment.
-// The input deploymentPlaceReady is calculated as the deployment is not paused, failed, or canarying.
-// It returns the number of allocs still needed.
-func (a *AllocReconciler) computeReplacements(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
+// placeAllocs either applies the placements calculated by computePlacements,
+// or computes more placements based on whether the deployment is ready for
+// and if allocations are already rescheduling or part of a failed
+// deployment. The input deploymentPlaceReady is calculated as the deployment
+// is not paused, failed, or canarying. It returns the number of allocs still
+// needed, allocations to place, and allocations to stop.
+func (a *AllocReconciler) placeAllocs(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
 	place []AllocPlaceResult, rescheduleNow, lost allocSet, disconnectUpdates map[string]*structs.Allocation,
 	underProvisionedBy int) (int, []AllocPlaceResult, []AllocStopResult) {
 

From 732a671da68c3afe874d8c4f0a9f7bc6643fce1a Mon Sep 17 00:00:00 2001
From: Allison Larson <allison.larson@hashicorp.com>
Date: Fri, 20 Jun 2025 11:54:50 -0700
Subject: [PATCH 18/32] ci: pass go_tags to linux docker builder (#26090)

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index aa63352f4..c698487ca 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -211,7 +211,7 @@ jobs:
           CGO_ENABLED: 1
         run: |
           go clean -cache
-          docker run --user "$(id --user):$(id --group)" --env HOME=/tmp -v "$(pwd)":/build localhost:5000/nomad-builder:${{ github.sha }} make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
+          docker run --user "$(id --user):$(id --group)" --env HOME=/tmp --env GO_TAGS=${{env.GO_TAGS}} -v "$(pwd)":/build localhost:5000/nomad-builder:${{ github.sha }} make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
           mv pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
       - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:

From cdde082362bb34d5fe4a76f5f5011a9fec0c2d21 Mon Sep 17 00:00:00 2001
From: Aimee Ukasick <aimee.ukasick@hashicorp.com>
Date: Fri, 20 Jun 2025 17:16:33 -0500
Subject: [PATCH 19/32] Docs bug: Fix broken link on concepts/job.mdx (#26093)

---
 website/content/docs/concepts/job.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/content/docs/concepts/job.mdx b/website/content/docs/concepts/job.mdx
index effaa1b10..94fe05048 100644
--- a/website/content/docs/concepts/job.mdx
+++ b/website/content/docs/concepts/job.mdx
@@ -15,7 +15,7 @@ other tasks. Review job statuses and how Nomad versions your jobs.
 
 In Nomad, a _job_ is a user-specified state for a workload. The user expresses the job that should be running, but not where it should run. Nomad allocates resources and ensures that the actual state matches the user's desired state. A job consists of one or more tasks that you can organize into [task groups][task-groups].
 
-Declare the desired state of your job in a [job specification][job-specification], or _jobspec_, that describes
+Declare the desired state of your job in a [job specification][job-spec], or _jobspec_, that describes
 the tasks and resources necessary for the job to run. You can also
 include job constraints to control which clients Nomad runs the job on.
 

From d1f77a48ab542e9ef37bf315fe7e58dc3b4a8125 Mon Sep 17 00:00:00 2001
From: James Rasell <jrasell@users.noreply.github.com>
Date: Mon, 23 Jun 2025 07:44:32 +0100
Subject: [PATCH 20/32] rpc: Use client only auth for node get client allocs
 endpoint. (#26084)

The RPC is only ever called from a Nomad client which means we
can move it away from the generic Authenticate function to the
tighter AuthenticateClientOnly one. An addition check to ensure
the ACL object allows client operations is performed, mimicking
other endpoints of this nature.
---
 nomad/node_endpoint.go      | 10 +++++++++-
 nomad/node_endpoint_test.go | 35 ++++++++++++++++++++++++++++-------
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 6ab97ce7a..e0743379e 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -1101,7 +1101,11 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
 	reply *structs.NodeClientAllocsResponse) error {
 
-	authErr := n.srv.Authenticate(n.ctx, args)
+	// This RPC is only ever called by Nomad clients, so we can use the tightly
+	// scoped AuthenticateClientOnly method to authenticate and authorize the
+	// request.
+	aclObj, authErr := n.srv.AuthenticateClientOnly(n.ctx, args)
+
 	isForwarded := args.IsForwarded()
 	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
 		// We have a valid node connection since there is no error from the
@@ -1120,6 +1124,10 @@ func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
 
+	if !aclObj.AllowClientOp() {
+		return structs.ErrPermissionDenied
+	}
+
 	// Verify the arguments
 	if args.NodeID == "" {
 		return fmt.Errorf("missing node ID")
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 1d6774ba9..a69ae4470 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -2406,6 +2406,19 @@ func TestClientEndpoint_GetClientAllocs(t *testing.T) {
 	// Check that we have no client connections
 	require.Empty(s1.connectedNodes())
 
+	// The RPC is client only, so perform a test using the leader ACL token to
+	// ensure that even this powerful token cannot access the endpoint.
+	leaderACLReq := structs.NodeSpecificRequest{
+		NodeID: uuid.Generate(),
+		QueryOptions: structs.QueryOptions{
+			Region:    "global",
+			AuthToken: s1.leaderAcl,
+		},
+	}
+	var leaderACLResp structs.NodeClientAllocsResponse
+	err := msgpackrpc.CallWithCodec(codec, "Node.GetClientAllocs", &leaderACLReq, &leaderACLResp)
+	must.ErrorContains(t, err, "Permission denied")
+
 	// Create the register request
 	node := mock.Node()
 	state := s1.fsm.State()
@@ -2415,16 +2428,19 @@ func TestClientEndpoint_GetClientAllocs(t *testing.T) {
 	alloc := mock.Alloc()
 	alloc.NodeID = node.ID
 	state.UpsertJobSummary(99, mock.JobSummary(alloc.JobID))
-	err := state.UpsertAllocs(structs.MsgTypeTestSetup, 100, []*structs.Allocation{alloc})
+	err = state.UpsertAllocs(structs.MsgTypeTestSetup, 100, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	// Lookup the allocs
 	get := &structs.NodeSpecificRequest{
-		NodeID:       node.ID,
-		SecretID:     node.SecretID,
-		QueryOptions: structs.QueryOptions{Region: "global"},
+		NodeID:   node.ID,
+		SecretID: node.SecretID,
+		QueryOptions: structs.QueryOptions{
+			Region:    "global",
+			AuthToken: node.SecretID,
+		},
 	}
 	var resp2 structs.NodeClientAllocsResponse
 	if err := msgpackrpc.CallWithCodec(codec, "Node.GetClientAllocs", get, &resp2); err != nil {
@@ -2517,6 +2533,7 @@ func TestClientEndpoint_GetClientAllocs_Blocking(t *testing.T) {
 		NodeID:   node.ID,
 		SecretID: node.SecretID,
 		QueryOptions: structs.QueryOptions{
+			AuthToken:     node.SecretID,
 			Region:        "global",
 			MinQueryIndex: 50,
 			MaxQueryTime:  time.Second,
@@ -2635,6 +2652,7 @@ func TestClientEndpoint_GetClientAllocs_Blocking_GC(t *testing.T) {
 		NodeID:   node.ID,
 		SecretID: node.SecretID,
 		QueryOptions: structs.QueryOptions{
+			AuthToken:     node.SecretID,
 			Region:        "global",
 			MinQueryIndex: 50,
 			MaxQueryTime:  time.Second,
@@ -2711,9 +2729,12 @@ func TestClientEndpoint_GetClientAllocs_WithoutMigrateTokens(t *testing.T) {
 
 	// Lookup the allocs
 	get := &structs.NodeSpecificRequest{
-		NodeID:       node.ID,
-		SecretID:     node.SecretID,
-		QueryOptions: structs.QueryOptions{Region: "global"},
+		NodeID:   node.ID,
+		SecretID: node.SecretID,
+		QueryOptions: structs.QueryOptions{
+			AuthToken: node.SecretID,
+			Region:    "global",
+		},
 	}
 	var resp2 structs.NodeClientAllocsResponse
 

From 8f98dca8f86e4c1d029823d8a90c3ce4a64ecf63 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Mon, 23 Jun 2025 10:14:47 +0200
Subject: [PATCH 21/32] ci: docker GO_TAGS must be quoted (#26105)

ent builds use multiple tags
---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c698487ca..d91120023 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -211,7 +211,7 @@ jobs:
           CGO_ENABLED: 1
         run: |
           go clean -cache
-          docker run --user "$(id --user):$(id --group)" --env HOME=/tmp --env GO_TAGS=${{env.GO_TAGS}} -v "$(pwd)":/build localhost:5000/nomad-builder:${{ github.sha }} make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
+          docker run --user "$(id --user):$(id --group)" --env HOME=/tmp --env GO_TAGS="${{env.GO_TAGS}}" -v "$(pwd)":/build localhost:5000/nomad-builder:${{ github.sha }} make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
           mv pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
       - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:

From e2a30df14ce55d8289263e6d875985c73e517eaf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mattias=20Fjellstr=C3=B6m?=
 <36640518+mattias-fjellstrom@users.noreply.github.com>
Date: Mon, 23 Jun 2025 15:34:56 +0200
Subject: [PATCH 22/32] docs: clarified azure cloud join requirements (#26091)

---
 website/content/docs/configuration/server_join.mdx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/website/content/docs/configuration/server_join.mdx b/website/content/docs/configuration/server_join.mdx
index f01165f4c..0559a836d 100644
--- a/website/content/docs/configuration/server_join.mdx
+++ b/website/content/docs/configuration/server_join.mdx
@@ -208,7 +208,9 @@ region which have the given `tag_key` and `tag_value`.
 
 This returns the first private IP address of all servers in the given region
 which have the given `tag_key` and `tag_value` in the tenant and subscription, or in
-the given `resource_group` of a `vm_scale_set` for Virtual Machine Scale Sets.
+the given `resource_group` of a `vm_scale_set` for Virtual Machine Scale Sets. If using tags,
+the `tag_key` and `tag_value` must be set on the network interface resource attached to the server
+not on the virtual machine resource itself.
 
 ```json
 {
@@ -221,6 +223,7 @@ the given `resource_group` of a `vm_scale_set` for Virtual Machine Scale Sets.
 - `provider` (required) - the name of the provider ("azure" in this case).
 - `tenant_id` (required) - the tenant to join machines in.
 - `client_id` (required) - the client to authenticate with.
+- `subscription_id` (required) - the Azure subscription ID.
 - `secret_access_key` (required) - the secret client key.
 
 Use these configuration parameters when using tags:

From 12ddb6db94a9bd823974692b9f8020b98155fec1 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Mon, 23 Jun 2025 15:36:39 +0200
Subject: [PATCH 23/32] scheduler: capture reconciler state in ReconcilerState
 object (#26088)

This changeset separates reconciler fields into their own sub-struct to make
testing easier and the code more explicit about what fields relate to which
state.
---
 scheduler/generic_sched.go                    |   12 +-
 scheduler/reconciler/filters.go               |   16 +-
 scheduler/reconciler/reconcile_cluster.go     |  218 ++-
 .../reconciler/reconcile_cluster_test.go      | 1207 ++++++++++++++---
 4 files changed, 1160 insertions(+), 293 deletions(-)

diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 566eb5d94..03259410c 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -340,8 +340,16 @@ func (s *GenericScheduler) computeJobAllocs() error {
 
 	r := reconciler.NewAllocReconciler(s.logger,
 		genericAllocUpdateFn(s.ctx, s.stack, s.eval.ID),
-		s.batch, s.eval.JobID, s.job, s.deployment, allocs, s.eval.ID,
-		s.eval.Priority, reconciler.ClusterState{
+		reconciler.ReconcilerState{
+			Job:               s.job,
+			JobID:             s.eval.JobID,
+			JobIsBatch:        s.batch,
+			DeploymentCurrent: s.deployment,
+			ExistingAllocs:    allocs,
+			EvalID:            s.eval.ID,
+			EvalPriority:      s.eval.Priority,
+		},
+		reconciler.ClusterState{
 			TaintedNodes:                tainted,
 			SupportsDisconnectedClients: s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true),
 			Now:                         time.Now().UTC(),
diff --git a/scheduler/reconciler/filters.go b/scheduler/reconciler/filters.go
index 573b3f7f1..2760096c3 100644
--- a/scheduler/reconciler/filters.go
+++ b/scheduler/reconciler/filters.go
@@ -55,8 +55,8 @@ func (a allocSet) filterByDeployment(id string) (match, nonmatch allocSet) {
 
 // filterOldTerminalAllocs filters allocations that should be ignored since they
 // are allocations that are terminal from a previous job version.
-func (a *AllocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignore allocSet) {
-	if !a.batch {
+func filterOldTerminalAllocs(a ReconcilerState, all allocSet) (filtered, ignore allocSet) {
+	if !a.JobIsBatch {
 		return all, nil
 	}
 
@@ -65,7 +65,7 @@ func (a *AllocReconciler) filterOldTerminalAllocs(all allocSet) (filtered, ignor
 
 	// Ignore terminal batch jobs from older versions
 	for id, alloc := range filtered {
-		older := alloc.Job.Version < a.job.Version || alloc.Job.CreateIndex < a.job.CreateIndex
+		older := alloc.Job.Version < a.Job.Version || alloc.Job.CreateIndex < a.Job.CreateIndex
 		if older && alloc.TerminalStatus() {
 			delete(filtered, id)
 			ignored[id] = alloc
@@ -250,10 +250,12 @@ func filterByTainted(a allocSet, state ClusterState) (untainted, migrate, lost,
 	return
 }
 
-// filterByRescheduleable filters the allocation set to return the set of allocations that are either
-// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
-// at a future time are also returned so that we can create follow up evaluations for them. Allocs are
-// skipped or considered untainted according to logic defined in shouldFilter method.
+// filterByRescheduleable filters the allocation set to return the set of
+// allocations that are either untainted or a set of allocations that must
+// be rescheduled now. Allocations that can be rescheduled at a future time
+// are also returned so that we can create follow up evaluations for them.
+// Allocs are skipped or considered untainted according to logic defined in
+// shouldFilter method.
 func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time.Time, evalID string, deployment *structs.Deployment) (allocSet, allocSet, []*delayedRescheduleInfo) {
 	untainted := make(map[string]*structs.Allocation)
 	rescheduleNow := make(map[string]*structs.Allocation)
diff --git a/scheduler/reconciler/reconcile_cluster.go b/scheduler/reconciler/reconcile_cluster.go
index 7b96e42a5..c86fdc561 100644
--- a/scheduler/reconciler/reconcile_cluster.go
+++ b/scheduler/reconciler/reconcile_cluster.go
@@ -45,17 +45,27 @@ type AllocUpdateType func(existing *structs.Allocation, newJob *structs.Job,
 
 type AllocReconcilerOption func(*AllocReconciler)
 
+// ReconcilerState holds initial and intermittent state of the reconciler
+type ReconcilerState struct {
+	Job        *structs.Job
+	JobID      string // stored separately because the job can be nil
+	JobIsBatch bool
+
+	DeploymentOld     *structs.Deployment
+	DeploymentCurrent *structs.Deployment
+	DeploymentPaused  bool
+	DeploymentFailed  bool
+
+	ExistingAllocs []*structs.Allocation
+
+	EvalID       string
+	EvalPriority int
+}
+
 // AllocReconciler is used to determine the set of allocations that require
 // placement, inplace updating or stopping given the job specification and
 // existing cluster state. The reconciler should only be used for batch and
 // service jobs.
-//
-// TODO: an idea for a future refactoring is to put batch, job, jobID,
-// oldDeployment, deployment, deploymentPaused, deploymentFailed, existingAllocs,
-// evalID and evalPriority into a struct called, say, "InitialState," because
-// these fields are used across the whole package to refer to initial or store
-// intermittent state that is otherwise hard to capture. This would further ease
-// the readability and development of the code in this package.
 type AllocReconciler struct {
 	// logger is used to log debug information. Logging should be kept at a
 	// minimal here
@@ -64,36 +74,8 @@ type AllocReconciler struct {
 	// canInplace is used to check if the allocation can be inplace upgraded
 	allocUpdateFn AllocUpdateType
 
-	// batch marks whether the job is a batch job
-	batch bool
-
-	// job is the job being operated on, it may be nil if the job is being
-	// stopped via a purge
-	job *structs.Job
-
-	// jobID is the ID of the job being operated on. The job may be nil if it is
-	// being stopped so we require this separately.
-	jobID string
-
-	// oldDeployment is the last deployment for the job
-	oldDeployment *structs.Deployment
-
-	// deployment is the current deployment for the job
-	deployment *structs.Deployment
-
-	// deploymentPaused marks whether the deployment is paused
-	deploymentPaused bool
-
-	// deploymentFailed marks whether the deployment is failed
-	deploymentFailed bool
-
-	// existingAllocs is non-terminal existing allocations
-	existingAllocs []*structs.Allocation
-
-	// evalID and evalPriority is the ID and Priority of the evaluation that
-	// triggered the reconciler.
-	evalID       string
-	evalPriority int
+	// jobState holds information about job, deployment, allocs and eval
+	jobState ReconcilerState
 
 	reconnectingPicker reconnectingPickerInterface
 
@@ -251,23 +233,15 @@ type ClusterState struct {
 
 // NewAllocReconciler creates a new reconciler that should be used to determine
 // the changes required to bring the cluster state inline with the declared jobspec
-func NewAllocReconciler(logger log.Logger, allocUpdateFn AllocUpdateType, batch bool,
-	jobID string, job *structs.Job, deployment *structs.Deployment,
-	existingAllocs []*structs.Allocation, evalID string,
-	evalPriority int, state ClusterState, opts ...AllocReconcilerOption) *AllocReconciler {
+func NewAllocReconciler(logger log.Logger, allocUpdateFn AllocUpdateType,
+	reconcilerState ReconcilerState, clusterState ClusterState, opts ...AllocReconcilerOption) *AllocReconciler {
 
 	ar := &AllocReconciler{
 		logger:             logger.Named("reconciler"),
 		allocUpdateFn:      allocUpdateFn,
-		batch:              batch,
-		jobID:              jobID,
-		job:                job,
-		deployment:         deployment.Copy(),
-		existingAllocs:     existingAllocs,
-		evalID:             evalID,
-		evalPriority:       evalPriority,
+		jobState:           reconcilerState,
+		clusterState:       clusterState,
 		reconnectingPicker: newReconnectingPicker(logger),
-		clusterState:       state,
 	}
 
 	for _, op := range opts {
@@ -283,13 +257,13 @@ func (a *AllocReconciler) Compute() *ReconcileResults {
 	result := &ReconcileResults{}
 
 	// Create the allocation matrix
-	m := newAllocMatrix(a.job, a.existingAllocs)
+	m := newAllocMatrix(a.jobState.Job, a.jobState.ExistingAllocs)
 
-	a.oldDeployment, a.deployment, result.DeploymentUpdates = cancelUnneededDeployments(a.job, a.deployment)
+	a.jobState.DeploymentOld, a.jobState.DeploymentCurrent, result.DeploymentUpdates = cancelUnneededDeployments(a.jobState.Job, a.jobState.DeploymentCurrent)
 
 	// If we are just stopping a job we do not need to do anything more than
 	// stopping all running allocs
-	if a.job.Stopped() {
+	if a.jobState.Job.Stopped() {
 		desiredTGUpdates, allocsToStop := a.handleStop(m)
 		result.DesiredTGUpdates = desiredTGUpdates
 		result.Stop = allocsToStop
@@ -298,15 +272,15 @@ func (a *AllocReconciler) Compute() *ReconcileResults {
 
 	// set deployment paused and failed fields, if we currently have a
 	// deployment
-	if a.deployment != nil {
+	if a.jobState.DeploymentCurrent != nil {
 		// deployment is paused when it's manually paused by a user, or if the
 		// deployment is pending or initializing, which are the initial states
 		// for multi-region job deployments. This flag tells Compute that we
 		// should not make placements on the deployment.
-		a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused ||
-			a.deployment.Status == structs.DeploymentStatusPending ||
-			a.deployment.Status == structs.DeploymentStatusInitializing
-		a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
+		a.jobState.DeploymentPaused = a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusPaused ||
+			a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusPending ||
+			a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusInitializing
+		a.jobState.DeploymentFailed = a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusFailed
 	}
 
 	// check if the deployment is complete and set relevant result fields in the
@@ -448,7 +422,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 
 	// Get the task group. The task group may be nil if the job was updates such
 	// that the task group no longer exists
-	tg := a.job.LookupTaskGroup(group)
+	tg := a.jobState.Job.LookupTaskGroup(group)
 
 	// If the task group is nil, then the task group has been removed so all we
 	// need to do is stop everything
@@ -461,7 +435,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 
 	// Filter allocations that do not need to be considered because they are
 	// from an older job version and are terminal.
-	all, ignore := a.filterOldTerminalAllocs(all)
+	all, ignore := filterOldTerminalAllocs(a.jobState, all)
 	result.DesiredTGUpdates[group].Ignore += uint64(len(ignore))
 
 	var canaries allocSet
@@ -472,7 +446,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 	result.DesiredTGUpdates[group].Ignore += uint64(len(ignore))
 
 	// Determine what set of terminal allocations need to be rescheduled
-	untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, false, a.clusterState.Now, a.evalID, a.deployment)
+	untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.jobState.JobIsBatch, false, a.clusterState.Now, a.jobState.EvalID, a.jobState.DeploymentCurrent)
 
 	// If there are allocations reconnecting we need to reconcile them and
 	// their replacements first because there is specific logic when deciding
@@ -521,7 +495,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 	if len(disconnecting) > 0 {
 		if tg.GetDisconnectLostTimeout() != 0 {
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(
-				a.batch, true, a.clusterState.Now, a.evalID, a.deployment)
+				a.jobState.JobIsBatch, true, a.clusterState.Now, a.jobState.EvalID, a.jobState.DeploymentCurrent)
 
 			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
 			untainted = untainted.union(untaintedDisconnecting)
@@ -563,7 +537,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 	// Create a structure for choosing names. Seed with the taken names
 	// which is the union of untainted, rescheduled, allocs on migrating
 	// nodes, and allocs on down nodes (includes canaries)
-	nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, rescheduleNow, lost))
+	nameIndex := newAllocNameIndex(a.jobState.JobID, group, tg.Count, untainted.union(migrate, rescheduleNow, lost))
 	allocNameIndexForGroup := nameIndex
 	result.TaskGroupAllocNameIndexes = map[string]*AllocNameIndex{group: allocNameIndexForGroup}
 
@@ -620,7 +594,7 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 
 	// deploymentPlaceReady tracks whether the deployment is in a state where
 	// placements can be made without any other consideration.
-	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !isCanarying
+	deploymentPlaceReady := !a.jobState.DeploymentPaused && !a.jobState.DeploymentFailed && !isCanarying
 
 	underProvisionedBy, replacements, replacementsAllocsToStop := a.placeAllocs(
 		deploymentPlaceReady, result.DesiredTGUpdates[group], place, rescheduleNow, lost, result.DisconnectUpdates, underProvisionedBy)
@@ -641,8 +615,8 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 
 	// Deployments that are still initializing need to be sent in full in the
 	// plan so its internal state can be persisted by the plan applier.
-	if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusInitializing {
-		result.Deployment = a.deployment
+	if a.jobState.DeploymentCurrent != nil && a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusInitializing {
+		result.Deployment = a.jobState.DeploymentCurrent
 	}
 
 	deploymentComplete := a.isDeploymentComplete(group, destructive, inplace,
@@ -656,23 +630,23 @@ func (a *AllocReconciler) computeGroup(group string, all allocSet) (*ReconcileRe
 func (a *AllocReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool, createdDeployment *structs.Deployment) []*structs.DeploymentStatusUpdate {
 	var updates []*structs.DeploymentStatusUpdate
 
-	if a.deployment != nil {
+	if a.jobState.DeploymentCurrent != nil {
 		// Mark the deployment as complete if possible
 		if deploymentComplete {
-			if a.job.IsMultiregion() {
+			if a.jobState.Job.IsMultiregion() {
 				// the unblocking/successful states come after blocked, so we
 				// need to make sure we don't revert those states
-				if a.deployment.Status != structs.DeploymentStatusUnblocking &&
-					a.deployment.Status != structs.DeploymentStatusSuccessful {
+				if a.jobState.DeploymentCurrent.Status != structs.DeploymentStatusUnblocking &&
+					a.jobState.DeploymentCurrent.Status != structs.DeploymentStatusSuccessful {
 					updates = append(updates, &structs.DeploymentStatusUpdate{
-						DeploymentID:      a.deployment.ID,
+						DeploymentID:      a.jobState.DeploymentCurrent.ID,
 						Status:            structs.DeploymentStatusBlocked,
 						StatusDescription: structs.DeploymentStatusDescriptionBlocked,
 					})
 				}
 			} else {
 				updates = append(updates, &structs.DeploymentStatusUpdate{
-					DeploymentID:      a.deployment.ID,
+					DeploymentID:      a.jobState.DeploymentCurrent.ID,
 					Status:            structs.DeploymentStatusSuccessful,
 					StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
 				})
@@ -680,9 +654,9 @@ func (a *AllocReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool,
 		}
 
 		// Mark the deployment as pending since its state is now computed.
-		if a.deployment.Status == structs.DeploymentStatusInitializing {
+		if a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusInitializing {
 			updates = append(updates, &structs.DeploymentStatusUpdate{
-				DeploymentID:      a.deployment.ID,
+				DeploymentID:      a.jobState.DeploymentCurrent.ID,
 				Status:            structs.DeploymentStatusPending,
 				StatusDescription: structs.DeploymentStatusDescriptionPendingForPeer,
 			})
@@ -706,8 +680,8 @@ func (a *AllocReconciler) initializeDeploymentState(group string, tg *structs.Ta
 	var dstate *structs.DeploymentState
 	existingDeployment := false
 
-	if a.deployment != nil {
-		dstate, existingDeployment = a.deployment.TaskGroups[group]
+	if a.jobState.DeploymentCurrent != nil {
+		dstate, existingDeployment = a.jobState.DeploymentCurrent.TaskGroups[group]
 	}
 
 	if !existingDeployment {
@@ -737,7 +711,7 @@ func (a *AllocReconciler) computeCanaries(tg *structs.TaskGroup, dstate *structs
 
 	placementResult := []AllocPlaceResult{}
 
-	if !a.deploymentPaused && !a.deploymentFailed {
+	if !a.jobState.DeploymentPaused && !a.jobState.DeploymentFailed {
 		desiredChanges.Canary += uint64(tg.Update.Canary - len(canaries))
 		for _, name := range nameIndex.NextCanaries(uint(desiredChanges.Canary), canaries, destructive) {
 			placementResult = append(placementResult, AllocPlaceResult{
@@ -762,8 +736,8 @@ func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 	all = original
 
 	// Cancel any non-promoted canaries from the older deployment
-	if a.oldDeployment != nil {
-		for _, dstate := range a.oldDeployment.TaskGroups {
+	if a.jobState.DeploymentOld != nil {
+		for _, dstate := range a.jobState.DeploymentOld.TaskGroups {
 			if !dstate.Promoted {
 				stop = append(stop, dstate.PlacedCanaries...)
 			}
@@ -771,8 +745,8 @@ func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 	}
 
 	// Cancel any non-promoted canaries from a failed deployment
-	if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusFailed {
-		for _, dstate := range a.deployment.TaskGroups {
+	if a.jobState.DeploymentCurrent != nil && a.jobState.DeploymentCurrent.Status == structs.DeploymentStatusFailed {
+		for _, dstate := range a.jobState.DeploymentCurrent.TaskGroups {
 			if !dstate.Promoted {
 				stop = append(stop, dstate.PlacedCanaries...)
 			}
@@ -788,9 +762,9 @@ func (a *AllocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 
 	// Capture our current set of canaries and handle any migrations that are
 	// needed by just stopping them.
-	if a.deployment != nil {
+	if a.jobState.DeploymentCurrent != nil {
 		var canaryIDs []string
-		for _, dstate := range a.deployment.TaskGroups {
+		for _, dstate := range a.jobState.DeploymentCurrent.TaskGroups {
 			canaryIDs = append(canaryIDs, dstate.PlacedCanaries...)
 		}
 
@@ -822,19 +796,19 @@ func (a *AllocReconciler) computeUnderProvisionedBy(group *structs.TaskGroup, un
 	}
 
 	// If the deployment is nil, allow MaxParallel placements
-	if a.deployment == nil {
+	if a.jobState.DeploymentCurrent == nil {
 		return group.Update.MaxParallel
 	}
 
 	// If the deployment is paused, failed, or we have un-promoted canaries, do not create anything else.
-	if a.deploymentPaused ||
-		a.deploymentFailed ||
+	if a.jobState.DeploymentPaused ||
+		a.jobState.DeploymentFailed ||
 		isCanarying {
 		return 0
 	}
 
 	underProvisionedBy := group.Update.MaxParallel
-	partOf, _ := untainted.filterByDeployment(a.deployment.ID)
+	partOf, _ := untainted.filterByDeployment(a.jobState.DeploymentCurrent.ID)
 	for _, alloc := range partOf {
 		// An unhealthy allocation means nothing else should happen.
 		if alloc.DeploymentStatus.IsUnhealthy() {
@@ -978,7 +952,7 @@ func (a *AllocReconciler) placeAllocs(deploymentPlaceReady bool, desiredChanges
 	// to the place set. Add the previous alloc to the stop set unless it is disconnecting.
 	for _, p := range place {
 		prev := p.PreviousAllocation()
-		partOfFailedDeployment := a.deploymentFailed && prev != nil && a.deployment.ID == prev.DeploymentID
+		partOfFailedDeployment := a.jobState.DeploymentFailed && prev != nil && a.jobState.DeploymentCurrent.ID == prev.DeploymentID
 
 		if !partOfFailedDeployment && p.IsRescheduling() {
 			resultingPlacements = append(resultingPlacements, p)
@@ -1047,6 +1021,8 @@ func (a *AllocReconciler) computeMigrations(desiredChanges *structs.DesiredUpdat
 	return allocsToStop, allocsToPlace
 }
 
+// createDeployment creates a new deployment if necessary.
+// WARNING: this method mutates reconciler state field deploymentCurrent
 func (a *AllocReconciler) createDeployment(groupName string, strategy *structs.UpdateStrategy,
 	existingDeployment bool, dstate *structs.DeploymentState, all, destructive allocSet, inPlaceUpdates int) *structs.Deployment {
 	// Guard the simple cases that require no computation first.
@@ -1060,7 +1036,7 @@ func (a *AllocReconciler) createDeployment(groupName string, strategy *structs.U
 
 	hadRunning := false
 	for _, alloc := range all {
-		if alloc.Job.Version == a.job.Version && alloc.Job.CreateIndex == a.job.CreateIndex {
+		if alloc.Job.Version == a.jobState.Job.Version && alloc.Job.CreateIndex == a.jobState.Job.CreateIndex {
 			hadRunning = true
 			break
 		}
@@ -1075,15 +1051,13 @@ func (a *AllocReconciler) createDeployment(groupName string, strategy *structs.U
 	var resultingDeployment *structs.Deployment
 
 	// A previous group may have made the deployment already. If not create one.
-	if a.deployment == nil {
-		// FIXME this method still mutates state :/
-		a.deployment = structs.NewDeployment(a.job, a.evalPriority, a.clusterState.Now.UnixNano())
-		resultingDeployment = a.deployment
+	if a.jobState.DeploymentCurrent == nil {
+		a.jobState.DeploymentCurrent = structs.NewDeployment(a.jobState.Job, a.jobState.EvalPriority, a.clusterState.Now.UnixNano())
+		resultingDeployment = a.jobState.DeploymentCurrent
 	}
 
 	// Attach the groups deployment state to the deployment
-	// FIXME this method still mutates state :/
-	a.deployment.TaskGroups[groupName] = dstate
+	a.jobState.DeploymentCurrent.TaskGroups[groupName] = dstate
 
 	return resultingDeployment
 }
@@ -1094,12 +1068,12 @@ func (a *AllocReconciler) isDeploymentComplete(groupName string, destructive, in
 	complete := len(destructive)+len(inplace)+len(place)+len(migrate)+len(rescheduleNow)+len(rescheduleLater) == 0 &&
 		!requiresCanaries
 
-	if !complete || a.deployment == nil {
+	if !complete || a.jobState.DeploymentCurrent == nil {
 		return false
 	}
 
 	// Final check to see if the deployment is complete is to ensure everything is healthy
-	if dstate, ok := a.deployment.TaskGroups[groupName]; ok {
+	if dstate, ok := a.jobState.DeploymentCurrent.TaskGroups[groupName]; ok {
 		if dstate.HealthyAllocs < max(dstate.DesiredTotal, dstate.DesiredCanaries) || // Make sure we have enough healthy allocs
 			(dstate.DesiredCanaries > 0 && !dstate.Promoted) { // Make sure we are promoted if we have canaries
 			complete = false
@@ -1172,7 +1146,7 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
 
 	// Prefer selecting from the migrating set before stopping existing allocs
 	if len(migrate) != 0 {
-		migratingNames := newAllocNameIndex(a.jobID, group.Name, group.Count, migrate)
+		migratingNames := newAllocNameIndex(a.jobState.JobID, group.Name, group.Count, migrate)
 		removeNames := migratingNames.Highest(uint(remove))
 		for id, alloc := range migrate {
 			if _, match := removeNames[alloc.Name]; !match {
@@ -1270,8 +1244,8 @@ func (a *AllocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 			reconnectingAlloc.DesiredTransition.ShouldMigrate() ||
 			reconnectingAlloc.DesiredTransition.ShouldReschedule() ||
 			reconnectingAlloc.DesiredTransition.ShouldForceReschedule() ||
-			reconnectingAlloc.Job.Version < a.job.Version ||
-			reconnectingAlloc.Job.CreateIndex < a.job.CreateIndex
+			reconnectingAlloc.Job.Version < a.jobState.Job.Version ||
+			reconnectingAlloc.Job.CreateIndex < a.jobState.Job.CreateIndex
 
 		if stopReconnecting {
 			stop[reconnectingAlloc.ID] = reconnectingAlloc
@@ -1363,7 +1337,7 @@ func (a *AllocReconciler) computeUpdates(group *structs.TaskGroup, untainted all
 	destructive = make(map[string]*structs.Allocation)
 
 	for _, alloc := range untainted {
-		ignoreChange, destructiveChange, inplaceAlloc := a.allocUpdateFn(alloc, a.job, group)
+		ignoreChange, destructiveChange, inplaceAlloc := a.allocUpdateFn(alloc, a.jobState.Job, group)
 		if ignoreChange {
 			ignore[alloc.ID] = alloc
 		} else if destructiveChange {
@@ -1419,8 +1393,8 @@ func (a *AllocReconciler) computeReconnecting(reconnecting allocSet) map[string]
 		if alloc.DesiredTransition.ShouldMigrate() ||
 			alloc.DesiredTransition.ShouldReschedule() ||
 			alloc.DesiredTransition.ShouldForceReschedule() ||
-			alloc.Job.Version < a.job.Version ||
-			alloc.Job.CreateIndex < a.job.CreateIndex {
+			alloc.Job.Version < a.jobState.Job.Version ||
+			alloc.Job.CreateIndex < a.jobState.Job.CreateIndex {
 			continue
 		}
 
@@ -1464,12 +1438,12 @@ func (a *AllocReconciler) createLostLaterEvals(rescheduleLater []*delayedResched
 	// Create a new eval for the first batch
 	eval := &structs.Evaluation{
 		ID:                uuid.Generate(),
-		Namespace:         a.job.Namespace,
-		Priority:          a.evalPriority,
-		Type:              a.job.Type,
+		Namespace:         a.jobState.Job.Namespace,
+		Priority:          a.jobState.EvalPriority,
+		Type:              a.jobState.Job.Type,
 		TriggeredBy:       structs.EvalTriggerRetryFailedAlloc,
-		JobID:             a.job.ID,
-		JobModifyIndex:    a.job.ModifyIndex,
+		JobID:             a.jobState.Job.ID,
+		JobModifyIndex:    a.jobState.Job.ModifyIndex,
 		Status:            structs.EvalStatusPending,
 		StatusDescription: sstructs.DescReschedulingFollowupEval,
 		WaitUntil:         nextReschedTime,
@@ -1485,12 +1459,12 @@ func (a *AllocReconciler) createLostLaterEvals(rescheduleLater []*delayedResched
 			// Create a new eval for the new batch
 			eval = &structs.Evaluation{
 				ID:             uuid.Generate(),
-				Namespace:      a.job.Namespace,
-				Priority:       a.evalPriority,
-				Type:           a.job.Type,
+				Namespace:      a.jobState.Job.Namespace,
+				Priority:       a.jobState.EvalPriority,
+				Type:           a.jobState.Job.Type,
 				TriggeredBy:    structs.EvalTriggerRetryFailedAlloc,
-				JobID:          a.job.ID,
-				JobModifyIndex: a.job.ModifyIndex,
+				JobID:          a.jobState.Job.ID,
+				JobModifyIndex: a.jobState.Job.ModifyIndex,
 				Status:         structs.EvalStatusPending,
 				WaitUntil:      nextReschedTime,
 			}
@@ -1530,12 +1504,12 @@ func (a *AllocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName
 
 	eval := &structs.Evaluation{
 		ID:                uuid.Generate(),
-		Namespace:         a.job.Namespace,
-		Priority:          a.evalPriority,
-		Type:              a.job.Type,
+		Namespace:         a.jobState.Job.Namespace,
+		Priority:          a.jobState.EvalPriority,
+		Type:              a.jobState.Job.Type,
 		TriggeredBy:       structs.EvalTriggerMaxDisconnectTimeout,
-		JobID:             a.job.ID,
-		JobModifyIndex:    a.job.ModifyIndex,
+		JobID:             a.jobState.Job.ID,
+		JobModifyIndex:    a.jobState.Job.ModifyIndex,
 		Status:            structs.EvalStatusPending,
 		StatusDescription: sstructs.DescDisconnectTimeoutFollowupEval,
 		WaitUntil:         nextReschedTime,
@@ -1554,12 +1528,12 @@ func (a *AllocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName
 			// Create a new eval for the new batch
 			eval = &structs.Evaluation{
 				ID:                uuid.Generate(),
-				Namespace:         a.job.Namespace,
-				Priority:          a.evalPriority,
-				Type:              a.job.Type,
+				Namespace:         a.jobState.Job.Namespace,
+				Priority:          a.jobState.EvalPriority,
+				Type:              a.jobState.Job.Type,
 				TriggeredBy:       structs.EvalTriggerMaxDisconnectTimeout,
-				JobID:             a.job.ID,
-				JobModifyIndex:    a.job.ModifyIndex,
+				JobID:             a.jobState.Job.ID,
+				JobModifyIndex:    a.jobState.Job.ModifyIndex,
 				Status:            structs.EvalStatusPending,
 				StatusDescription: sstructs.DescDisconnectTimeoutFollowupEval,
 				WaitUntil:         timeoutInfo.rescheduleTime,
diff --git a/scheduler/reconciler/reconcile_cluster_test.go b/scheduler/reconciler/reconcile_cluster_test.go
index 22622478a..d49802eff 100644
--- a/scheduler/reconciler/reconcile_cluster_test.go
+++ b/scheduler/reconciler/reconcile_cluster_test.go
@@ -350,8 +350,18 @@ func TestReconciler_Place_NoExisting(t *testing.T) {
 
 	job := mock.Job()
 	reconciler := NewAllocReconciler(
-		testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, nil, "", job.Priority, ClusterState{nil, true, time.Now().UTC()})
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    nil,
+			EvalPriority:      job.Priority,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -389,8 +399,19 @@ func TestReconciler_Place_Existing(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -430,8 +451,20 @@ func TestReconciler_ScaleDown_Partial(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
+
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -472,8 +505,19 @@ func TestReconciler_ScaleDown_Zero(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -515,8 +559,19 @@ func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
 		expectedStopped = append(expectedStopped, i%2)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -553,8 +608,19 @@ func TestReconciler_Inplace(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnInplace, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -594,8 +660,19 @@ func TestReconciler_Inplace_ScaleUp(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnInplace, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -637,8 +714,19 @@ func TestReconciler_Inplace_ScaleDown(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnInplace, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -701,8 +789,20 @@ func TestReconciler_Inplace_Rollback(t *testing.T) {
 		allocs[0].ID: allocUpdateFnInplace,
 	}, allocUpdateFnDestructive)
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFn,
-		false, job.ID, job, nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalID:            uuid.Generate(),
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -747,8 +847,19 @@ func TestReconciler_Destructive(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -783,8 +894,19 @@ func TestReconciler_DestructiveMaxParallel(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -822,8 +944,19 @@ func TestReconciler_Destructive_ScaleUp(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -864,8 +997,19 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -912,8 +1056,19 @@ func TestReconciler_LostNode(t *testing.T) {
 		tainted[n.ID] = n
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -965,8 +1120,19 @@ func TestReconciler_LostNode_ScaleUp(t *testing.T) {
 		tainted[n.ID] = n
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1018,8 +1184,19 @@ func TestReconciler_LostNode_ScaleDown(t *testing.T) {
 		tainted[n.ID] = n
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1066,8 +1243,19 @@ func TestReconciler_DrainNode(t *testing.T) {
 		tainted[n.ID] = n
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1121,8 +1309,19 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
 		tainted[n.ID] = n
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1177,8 +1376,19 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
 		tainted[n.ID] = n
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1225,8 +1435,19 @@ func TestReconciler_RemovedTG(t *testing.T) {
 	newName := "different"
 	job.TaskGroups[0].Name = newName
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1290,8 +1511,19 @@ func TestReconciler_JobStopped(t *testing.T) {
 				allocs = append(allocs, alloc)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             c.jobID,
+					Job:               c.job,
+					DeploymentCurrent: nil,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			// Assert the correct results
@@ -1359,8 +1591,19 @@ func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) {
 				allocs = append(allocs, alloc)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             c.jobID,
+					Job:               c.job,
+					DeploymentCurrent: nil,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			must.SliceEmpty(t, r.Stop)
@@ -1399,8 +1642,19 @@ func TestReconciler_MultiTG(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1454,8 +1708,19 @@ func TestReconciler_MultiTG_SingleUpdateBlock(t *testing.T) {
 		DesiredTotal: 10,
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -1531,8 +1796,20 @@ func TestReconciler_RescheduleLater_Batch(t *testing.T) {
 	// Mark one as complete
 	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        true,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalID:            uuid.Generate(),
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Two reschedule attempts were already made, one more can be made at a future time
@@ -1612,8 +1889,20 @@ func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
 			FinishedAt: now.Add(10 * time.Second)}}
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        true,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalID:            uuid.Generate(),
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Verify that two follow up evals were created
@@ -1708,8 +1997,19 @@ func TestReconciler_RescheduleNow_Batch(t *testing.T) {
 	// Mark one as complete
 	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        true,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -1783,8 +2083,20 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) {
 	// Mark one as desired state stop
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, uuid.Generate(), 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalID:            uuid.Generate(),
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Should place a new placement and create a follow up eval for the delayed reschedule
@@ -1855,8 +2167,19 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
 	// Mark one as client status complete
 	allocs[4].ClientStatus = structs.AllocClientStatusComplete
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Should place a new placement for the alloc that was marked complete
@@ -1914,8 +2237,19 @@ func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
 	allocs[4].ClientStatus = structs.AllocClientStatusFailed
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Should place a new placement for the alloc that was marked stopped
@@ -1991,8 +2325,19 @@ func TestReconciler_RescheduleNow_Service(t *testing.T) {
 	// Mark one as desired state stop
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2070,8 +2415,19 @@ func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
 		FinishedAt: now.Add(-4 * time.Second)}}
 	allocs[1].ClientStatus = structs.AllocClientStatusFailed
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         now,
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2152,8 +2508,20 @@ func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
 	allocs[1].FollowupEvalID = evalID
 
 	now = now.Add(-30 * time.Second)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, evalID, 50, ClusterState{nil, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalID:            evalID,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         now,
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2261,8 +2629,19 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job2,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2385,8 +2764,19 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, "", 50, ClusterState{nil, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job2,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         now,
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2512,8 +2902,19 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, "", 50, ClusterState{nil, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job2,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         now,
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2579,8 +2980,19 @@ func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
 	// Mark one as desired state stop
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Should place 1 - one is a new placement to make up the desired count of 5
@@ -2669,8 +3081,19 @@ func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
 				allocs = append(allocs, alloc)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				c.deployment, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             c.jobID,
+					Job:               c.job,
+					DeploymentCurrent: c.deployment,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			var updates []*structs.DeploymentStatusUpdate
@@ -2749,8 +3172,19 @@ func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
 				allocs = append(allocs, alloc)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				c.deployment, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: c.deployment,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			var updates []*structs.DeploymentStatusUpdate
@@ -2801,8 +3235,19 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -2849,8 +3294,19 @@ func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnInplace, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -2896,8 +3352,19 @@ func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -2945,8 +3412,19 @@ func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -3026,8 +3504,19 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
 			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
 
 			mockUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: d,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			// Assert the correct results
@@ -3094,8 +3583,19 @@ func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
 				allocs = append(allocs, alloc)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: d,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			// Assert the correct results
@@ -3171,8 +3671,19 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing
 			allocs = append(allocs, newAlloc)
 
 			mockUpdateFn := allocUpdateFnMock(map[string]AllocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: d,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			// Assert the correct results
@@ -3248,8 +3759,19 @@ func TestReconciler_DrainNode_Canary(t *testing.T) {
 	tainted[n.ID] = n
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -3323,8 +3845,19 @@ func TestReconciler_LostNode_Canary(t *testing.T) {
 	tainted[n.ID] = n
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -3392,8 +3925,19 @@ func TestReconciler_StopOldCanaries(t *testing.T) {
 		allocs = append(allocs, canary)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d,
-		allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -3450,8 +3994,19 @@ func TestReconciler_NewCanaries(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -3503,8 +4058,19 @@ func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -3559,8 +4125,19 @@ func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
 		}
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -3617,8 +4194,19 @@ func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -3670,8 +4258,19 @@ func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -3752,8 +4351,19 @@ func TestReconciler_NewCanaries_FillNames(t *testing.T) {
 		allocs = append(allocs, canary)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -3824,8 +4434,19 @@ func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
 	}
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -3901,8 +4522,19 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
 	}
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
@@ -4003,8 +4635,19 @@ func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
 			}
 
 			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: d,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			r := reconciler.Compute()
 
 			// Assert the correct results
@@ -4087,8 +4730,19 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
 	}
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4175,8 +4829,19 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
 	}
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                tainted,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4234,8 +4899,19 @@ func TestReconciler_CompleteDeployment(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4292,8 +4968,19 @@ func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID,
-		job, d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
@@ -4390,8 +5077,19 @@ func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
 	}
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4462,8 +5160,19 @@ func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
 	jobNew := job.Copy()
 	jobNew.Version += 100
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               jobNew,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// reconciler sets the creation time automatically so we have to copy here,
@@ -4520,8 +5229,19 @@ func TestReconciler_MarkDeploymentComplete(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
@@ -4592,8 +5312,19 @@ func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
 	}
 
 	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4630,8 +5361,19 @@ func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	d := structs.NewDeployment(job, 50, r.Deployment.CreateTime)
@@ -4685,8 +5427,19 @@ func TestReconciler_Batch_Rerun(t *testing.T) {
 	job2 := job.Copy()
 	job2.CreateIndex++
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        true,
+			JobID:             job2.ID,
+			Job:               job2,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4749,8 +5502,19 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
 		StartedAt:  now.Add(-1 * time.Hour),
 		FinishedAt: now.Add(-10 * time.Second)}}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert that no rescheduled placements were created
@@ -4807,8 +5571,19 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
 		allocs[i].DesiredTransition.Reschedule = pointer.Of(true)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert that no rescheduled placements were created
@@ -4895,8 +5670,19 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
 		allocs = append(allocs, new)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               jobv2,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
@@ -4960,8 +5746,19 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnDestructive, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: d,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Assert that rescheduled placements were created
@@ -5025,8 +5822,19 @@ func TestReconciler_ForceReschedule_Service(t *testing.T) {
 	// Mark DesiredTransition ForceReschedule
 	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -5108,8 +5916,19 @@ func TestReconciler_RescheduleNot_Service(t *testing.T) {
 	// Mark one as desired state stop
 	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -5509,8 +6328,20 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			if tc.maxDisconnect != nil {
 				now = time.Now().Add(*tc.maxDisconnect * 20)
 			}
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
-				nil, allocs, "", 50, ClusterState{map[string]*structs.Node{testNode.ID: testNode}, true, now})
+
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        tc.isBatch,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: nil,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                map[string]*structs.Node{testNode.ID: testNode},
+					SupportsDisconnectedClients: true,
+					Now:                         now,
+				})
 
 			mpc := &mockPicker{
 				result: tc.pickResult,
@@ -5597,8 +6428,19 @@ func TestReconciler_RescheduleNot_Batch(t *testing.T) {
 	// Mark one as complete
 	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        true,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         now,
+		})
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -5629,8 +6471,19 @@ func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
 	nodes := buildDisconnectedNodes(allocs, 2)
 
 	now := time.Now().UTC()
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nodes, true, now})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nodes,
+			SupportsDisconnectedClients: true,
+			Now:                         now,
+		})
 	results := reconciler.Compute()
 
 	// Verify that 1 follow up eval was created with the values we expect.
@@ -5690,8 +6543,19 @@ func TestReconciler_Disconnect_UpdateJobAfterReconnect(t *testing.T) {
 		},
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, "", 50, ClusterState{nil, true, time.Now().UTC()})
+	reconciler := NewAllocReconciler(
+		testlog.HCLogger(t), allocUpdateFnInplace, ReconcilerState{
+			JobIsBatch:        false,
+			JobID:             job.ID,
+			Job:               job,
+			DeploymentCurrent: nil,
+			ExistingAllocs:    allocs,
+			EvalPriority:      50,
+		}, ClusterState{
+			TaintedNodes:                nil,
+			SupportsDisconnectedClients: true,
+			Now:                         time.Now().UTC(),
+		})
 	results := reconciler.Compute()
 
 	// Assert both allocations will be updated.
@@ -6040,8 +6904,19 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
 			allocs = append(allocs, tc.canaryAllocs[disconnectedNode]...)
 
 			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, updatedJob.ID, updatedJob,
-				deployment, allocs, "", 50, ClusterState{tainted, true, time.Now().UTC()})
+			reconciler := NewAllocReconciler(
+				testlog.HCLogger(t), mockUpdateFn, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             updatedJob.ID,
+					Job:               updatedJob,
+					DeploymentCurrent: deployment,
+					ExistingAllocs:    allocs,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                tainted,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			result := reconciler.Compute()
 
 			// Assert the correct results
@@ -6190,12 +7065,20 @@ func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
 			}
 
 			reconciler := NewAllocReconciler(
-				testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, deployment,
-				nil, "", job.Priority, ClusterState{nil, true, time.Now().UTC()})
-
+				testlog.HCLogger(t), allocUpdateFnIgnore, ReconcilerState{
+					JobIsBatch:        false,
+					JobID:             job.ID,
+					Job:               job,
+					DeploymentCurrent: deployment,
+					EvalPriority:      50,
+				}, ClusterState{
+					TaintedNodes:                nil,
+					SupportsDisconnectedClients: true,
+					Now:                         time.Now().UTC(),
+				})
 			reconciler.Compute()
 
-			must.Eq(t, tc.expected, reconciler.deploymentPaused)
+			must.Eq(t, tc.expected, reconciler.jobState.DeploymentPaused)
 		})
 	}
 }

From 74389cc3060998e2670aca4442a26170ab934def Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Mon, 23 Jun 2025 10:02:12 -0400
Subject: [PATCH 24/32] update Vault API dependency and pin HCL dependencies
 (#26089)

For reasons of backwards compatibility, Nomad uses an older branch of
HCL1 (`v1.0.1-nomad`) and HCL2 (`v2.20.2-nomad-1`) and backports a limited set
of changes to those branches.

But the Vault API also has their own HCL1 branch, currently tagged as
`v1.0.1-vault-7`. Normally this isn't a problem because Nomad pins to our own
branch and we don't call any of the Vault API package's HCL code anyways. But in
Vault's branch some functions were changed that break our build unless we
backport them.

We've backported enough of Vault's changes to make our HCL1 branch build, and
now have tags on the HCL repo so that we can pin to specific tags instead of
random commits.

Fixes: https://hashicorp.atlassian.net/browse/NMD-850
Fixes: https://github.com/hashicorp/nomad/pull/26006
Ref: https://github.com/hashicorp/hcl/pull/760
---
 go.mod |  8 ++++----
 go.sum | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/go.mod b/go.mod
index 7773721ee..62db290d8 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,7 @@ go 1.24.4
 // Pinned dependencies are noted in github.com/hashicorp/nomad/issues/11826.
 replace (
 	github.com/Microsoft/go-winio => github.com/endocrimes/go-winio v0.4.13-0.20190628114223-fb47a8b41948
-	github.com/hashicorp/hcl => github.com/hashicorp/hcl v1.0.1-0.20201016140508-a07e7d50bbee
+	github.com/hashicorp/hcl => github.com/hashicorp/hcl v1.0.1-nomad-1
 )
 
 // Nomad is built using the current source of the API module.
@@ -77,8 +77,8 @@ require (
 	github.com/hashicorp/go-uuid v1.0.3
 	github.com/hashicorp/go-version v1.7.0
 	github.com/hashicorp/golang-lru/v2 v2.0.7
-	github.com/hashicorp/hcl v1.0.1-vault-3
-	github.com/hashicorp/hcl/v2 v2.20.2-0.20240517235513-55d9c02d147d
+	github.com/hashicorp/hcl v1.0.1-vault-7
+	github.com/hashicorp/hcl/v2 v2.20.2-nomad-1
 	github.com/hashicorp/hil v0.0.0-20210521165536-27a72121fd40
 	github.com/hashicorp/memberlist v0.5.3
 	github.com/hashicorp/net-rpc-msgpackrpc/v2 v2.0.1
@@ -87,7 +87,7 @@ require (
 	github.com/hashicorp/raft-autopilot v0.1.6
 	github.com/hashicorp/raft-boltdb/v2 v2.3.1
 	github.com/hashicorp/serf v0.10.2
-	github.com/hashicorp/vault/api v1.16.0
+	github.com/hashicorp/vault/api v1.20.0
 	github.com/hashicorp/yamux v0.1.2
 	github.com/hpcloud/tail v1.0.1-0.20170814160653-37f427138745
 	github.com/klauspost/cpuid/v2 v2.2.10
diff --git a/go.sum b/go.sum
index 216df820a..6216aa312 100644
--- a/go.sum
+++ b/go.sum
@@ -1244,10 +1244,10 @@ github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iP
 github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
-github.com/hashicorp/hcl v1.0.1-0.20201016140508-a07e7d50bbee h1:8B4HqvMUtYSjsGkYjiQGStc9pXffY2J+Z2SPQAj+wMY=
-github.com/hashicorp/hcl v1.0.1-0.20201016140508-a07e7d50bbee/go.mod h1:gwlu9+/P9MmKtYrMsHeFRZPXj2CTPm11TDnMeaRHS7g=
-github.com/hashicorp/hcl/v2 v2.20.2-0.20240517235513-55d9c02d147d h1:7abftkc86B+tlA/0cDy5f6C4LgWfFOCpsGg3RJZsfbw=
-github.com/hashicorp/hcl/v2 v2.20.2-0.20240517235513-55d9c02d147d/go.mod h1:62ZYHrXgPoX8xBnzl8QzbWq4dyDsDtfCRgIq1rbJEvA=
+github.com/hashicorp/hcl v1.0.1-nomad-1 h1:0hOV+/m12cRBAfvHpVOgGdM68XU7uTxGafEuUB2UES8=
+github.com/hashicorp/hcl v1.0.1-nomad-1/go.mod h1:gwlu9+/P9MmKtYrMsHeFRZPXj2CTPm11TDnMeaRHS7g=
+github.com/hashicorp/hcl/v2 v2.20.2-nomad-1 h1:FVr/cgKVheQ9KGEq4sFAiDqls5Yp2Y5+K/WL1Wm5l/s=
+github.com/hashicorp/hcl/v2 v2.20.2-nomad-1/go.mod h1:62ZYHrXgPoX8xBnzl8QzbWq4dyDsDtfCRgIq1rbJEvA=
 github.com/hashicorp/hil v0.0.0-20210521165536-27a72121fd40 h1:ExwaL+hUy1ys2AWDbsbh/lxQS2EVCYxuj0LoyLTdB3Y=
 github.com/hashicorp/hil v0.0.0-20210521165536-27a72121fd40/go.mod h1:n2TSygSNwsLJ76m8qFXTSc7beTb+auJxYdqrnoqwZWE=
 github.com/hashicorp/mdns v1.0.5 h1:1M5hW1cunYeoXOqHwEb/GBDDHAFo0Yqb/uz/beC6LbE=
@@ -1270,8 +1270,8 @@ github.com/hashicorp/raft-boltdb/v2 v2.3.1/go.mod h1:n4S+g43dXF1tqDT+yzcXHhXM6y7
 github.com/hashicorp/serf v0.10.2 h1:m5IORhuNSjaxeljg5DeQVDlQyVkhRIjJDimbkCa8aAc=
 github.com/hashicorp/serf v0.10.2/go.mod h1:T1CmSGfSeGfnfNy/w0odXQUR1rfECGd2Qdsp84DjOiY=
 github.com/hashicorp/vault/api v1.10.0/go.mod h1:jo5Y/ET+hNyz+JnKDt8XLAdKs+AM0G5W0Vp1IrFI8N8=
-github.com/hashicorp/vault/api v1.16.0 h1:nbEYGJiAPGzT9U4oWgaaB0g+Rj8E59QuHKyA5LhwQN4=
-github.com/hashicorp/vault/api v1.16.0/go.mod h1:KhuUhzOD8lDSk29AtzNjgAu2kxRA9jL9NAbkFlqvkBA=
+github.com/hashicorp/vault/api v1.20.0 h1:KQMHElgudOsr+IbJgmbjHnCTxEpKs9LnozA1D3nozU4=
+github.com/hashicorp/vault/api v1.20.0/go.mod h1:GZ4pcjfzoOWpkJ3ijHNpEoAxKEsBJnVljyTe3jM2Sms=
 github.com/hashicorp/vault/api/auth/kubernetes v0.5.0 h1:CXO0fD7M3iCGovP/UApeHhPcH4paDFKcu7AjEXi94rI=
 github.com/hashicorp/vault/api/auth/kubernetes v0.5.0/go.mod h1:afrElBIO9Q4sHFVuVWgNevG4uAs1bT2AZFA9aEiI608=
 github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 h1:O/pT5C1Q3mVXMyuqg7yuAWUg/jMZR1/0QTzTRdNR6Uw=

From 05c3b5050c0cec4b3def03b0f331c1e301088911 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Mon, 23 Jun 2025 17:13:22 +0200
Subject: [PATCH 25/32] ci: align CE build command with ENT (#26108)

In hashicorp/nomad-enterprise#2592 we introduced a
divergence in how Nomad CE and ENT build their binaries. Nomad CE used a more
sophisticated approach, setting uid, gid and home environment variables in the
docker run command. Despite mine (and others) best efforts, we were not able
to do the same in the ENT repo, which relies on special git settings that allow
it to pull dependencies from private repositories, and left a different docker
run command there, that just inherited GHA runner user and copied the resulting
tarball instead of moving it. #26090 then attempted to remedy #25910 resulting
from docker run command ignoring ${{ env.GO_TAGS }} if run with custom
--env, but the resulting backport broke ent builds.

This PR restores ENT behavior of building Nomad builds with GHA runner user,
thus inheriting runner's environment on ent.
---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d91120023..cbfa593f5 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -211,8 +211,8 @@ jobs:
           CGO_ENABLED: 1
         run: |
           go clean -cache
-          docker run --user "$(id --user):$(id --group)" --env HOME=/tmp --env GO_TAGS="${{env.GO_TAGS}}" -v "$(pwd)":/build localhost:5000/nomad-builder:${{ github.sha }} make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
-          mv pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
+          docker run --env GO_TAGS="${{env.GO_TAGS}}" -v "$(pwd)":/build localhost:5000/nomad-builder:${{ github.sha }} make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
+          cp pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
       - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:
           name: ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip

From 13e32429b29d16c04a3a5a8dd8d9659bc356acad Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Jun 2025 17:39:57 +0200
Subject: [PATCH 26/32] chore(deps): bump github.com/aws/aws-sdk-go-v2/config
 (#26098)

Bumps [github.com/aws/aws-sdk-go-v2/config](https://github.com/aws/aws-sdk-go-v2) from 1.29.16 to 1.29.17.
- [Release notes](https://github.com/aws/aws-sdk-go-v2/releases)
- [Changelog](https://github.com/aws/aws-sdk-go-v2/blob/main/changelog-template.json)
- [Commits](https://github.com/aws/aws-sdk-go-v2/compare/config/v1.29.16...config/v1.29.17)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go-v2/config
  dependency-version: 1.29.17
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 24 ++++++++++++------------
 go.sum | 48 ++++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/go.mod b/go.mod
index 62db290d8..99dd1545a 100644
--- a/go.mod
+++ b/go.mod
@@ -16,9 +16,9 @@ require (
 	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/Microsoft/go-winio v0.6.2
 	github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
-	github.com/aws/aws-sdk-go-v2/config v1.29.16
-	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.31
-	github.com/aws/smithy-go v1.22.3
+	github.com/aws/aws-sdk-go-v2/config v1.29.17
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32
+	github.com/aws/smithy-go v1.22.4
 	github.com/container-storage-interface/spec v1.11.0
 	github.com/containerd/errdefs v1.0.0
 	github.com/containerd/go-cni v1.1.12
@@ -182,18 +182,18 @@ require (
 	github.com/armon/go-metrics v0.4.1 // indirect
 	github.com/armon/go-radix v1.0.0 // indirect
 	github.com/aws/aws-sdk-go v1.55.6 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.36.4 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.17.69 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
+	github.com/aws/aws-sdk-go-v2/credentials v1.17.70 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ec2 v1.200.0 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ecs v1.53.8 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.16 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sso v1.25.4 // indirect
-	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sts v1.33.21 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.25.5 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect
 	github.com/bgentry/speakeasy v0.1.0 // indirect
diff --git a/go.sum b/go.sum
index 6216aa312..64eff1b8c 100644
--- a/go.sum
+++ b/go.sum
@@ -731,36 +731,36 @@ github.com/aws/aws-sdk-go v1.30.27/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZve
 github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
 github.com/aws/aws-sdk-go v1.55.6 h1:cSg4pvZ3m8dgYcgqB97MrcdjUmZ1BeMYKUxMMB89IPk=
 github.com/aws/aws-sdk-go v1.55.6/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
-github.com/aws/aws-sdk-go-v2 v1.36.4 h1:GySzjhVvx0ERP6eyfAbAuAXLtAda5TEy19E5q5W8I9E=
-github.com/aws/aws-sdk-go-v2 v1.36.4/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg=
-github.com/aws/aws-sdk-go-v2/config v1.29.16 h1:XkruGnXX1nEZ+Nyo9v84TzsX+nj86icbFAeust6uo8A=
-github.com/aws/aws-sdk-go-v2/config v1.29.16/go.mod h1:uCW7PNjGwZ5cOGZ5jr8vCWrYkGIhPoTNV23Q/tpHKzg=
-github.com/aws/aws-sdk-go-v2/credentials v1.17.69 h1:8B8ZQboRc3uaIKjshve/XlvJ570R7BKNy3gftSbS178=
-github.com/aws/aws-sdk-go-v2/credentials v1.17.69/go.mod h1:gPME6I8grR1jCqBFEGthULiolzf/Sexq/Wy42ibKK9c=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.31 h1:oQWSGexYasNpYp4epLGZxxjsDo8BMBh6iNWkTXQvkwk=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.31/go.mod h1:nc332eGUU+djP3vrMI6blS0woaCfHTe3KiSQUVTMRq0=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35 h1:o1v1VFfPcDVlK3ll1L5xHsaQAFdNtZ5GXnNR7SwueC4=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35/go.mod h1:rZUQNYMNG+8uZxz9FOerQJ+FceCiodXvixpeRtdESrU=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35 h1:R5b82ubO2NntENm3SAm0ADME+H630HomNJdgv+yZ3xw=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35/go.mod h1:FuA+nmgMRfkzVKYDNEqQadvEMxtxl9+RLT9ribCwEMs=
+github.com/aws/aws-sdk-go-v2 v1.36.5 h1:0OF9RiEMEdDdZEMqF9MRjevyxAQcf6gY+E7vwBILFj0=
+github.com/aws/aws-sdk-go-v2 v1.36.5/go.mod h1:EYrzvCCN9CMUTa5+6lf6MM4tq3Zjp8UhSGR/cBsjai0=
+github.com/aws/aws-sdk-go-v2/config v1.29.17 h1:jSuiQ5jEe4SAMH6lLRMY9OVC+TqJLP5655pBGjmnjr0=
+github.com/aws/aws-sdk-go-v2/config v1.29.17/go.mod h1:9P4wwACpbeXs9Pm9w1QTh6BwWwJjwYvJ1iCt5QbCXh8=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.70 h1:ONnH5CM16RTXRkS8Z1qg7/s2eDOhHhaXVd72mmyv4/0=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.70/go.mod h1:M+lWhhmomVGgtuPOhO85u4pEa3SmssPTdcYpP/5J/xc=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32 h1:KAXP9JSHO1vKGCr5f4O6WmlVKLFFXgWYAGoJosorxzU=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32/go.mod h1:h4Sg6FQdexC1yYG9RDnOvLbW1a/P986++/Y/a+GyEM8=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36 h1:SsytQyTMHMDPspp+spo7XwXTP44aJZZAC7fBV2C5+5s=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36/go.mod h1:Q1lnJArKRXkenyog6+Y+zr7WDpk4e6XlR6gs20bbeNo=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36 h1:i2vNHQiXUvKhs3quBR6aqlgJaiaexz/aNvdCktW/kAM=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36/go.mod h1:UdyGa7Q91id/sdyHPwth+043HhmP6yP9MBHgbZM0xo8=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=
 github.com/aws/aws-sdk-go-v2/service/ec2 v1.200.0 h1:3hH6o7Z2WeE1twvz44Aitn6Qz8DZN3Dh5IB4Eh2xq7s=
 github.com/aws/aws-sdk-go-v2/service/ec2 v1.200.0/go.mod h1:I76S7jN0nfsYTBtuTgTsJtK2Q8yJVDgrLr5eLN64wMA=
 github.com/aws/aws-sdk-go-v2/service/ecs v1.53.8 h1:v1OectQdV/L+KSFSiqK00fXGN8FbaljRfNFysmWB8D0=
 github.com/aws/aws-sdk-go-v2/service/ecs v1.53.8/go.mod h1:F0DbgxpvuSvtYun5poG67EHLvci4SgzsMVO6SsPUqKk=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.16 h1:/ldKrPPXTC421bTNWrUIpq3CxwHwRI/kpc+jPUTJocM=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.16/go.mod h1:5vkf/Ws0/wgIMJDQbjI4p2op86hNW6Hie5QtebrDgT8=
-github.com/aws/aws-sdk-go-v2/service/sso v1.25.4 h1:EU58LP8ozQDVroOEyAfcq0cGc5R/FTZjVoYJ6tvby3w=
-github.com/aws/aws-sdk-go-v2/service/sso v1.25.4/go.mod h1:CrtOgCcysxMvrCoHnvNAD7PHWclmoFG78Q2xLK0KKcs=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.2 h1:XB4z0hbQtpmBnb1FQYvKaCM7UsS6Y/u8jVBwIUGeCTk=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.2/go.mod h1:hwRpqkRxnQ58J9blRDrB4IanlXCpcKmsC83EhG77upg=
-github.com/aws/aws-sdk-go-v2/service/sts v1.33.21 h1:nyLjs8sYJShFYj6aiyjCBI3EcLn1udWrQTjEF+SOXB0=
-github.com/aws/aws-sdk-go-v2/service/sts v1.33.21/go.mod h1:EhdxtZ+g84MSGrSrHzZiUm9PYiZkrADNja15wtRJSJo=
-github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k=
-github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 h1:CXV68E2dNqhuynZJPB80bhPQwAKqBWVer887figW6Jc=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4/go.mod h1:/xFi9KtvBXP97ppCz1TAEvU1Uf66qvid89rbem3wCzQ=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17 h1:t0E6FzREdtCsiLIoLCWsYliNsRBgyGD/MCK571qk4MI=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17/go.mod h1:ygpklyoaypuyDvOM5ujWGrYWpAK3h7ugnmKCU/76Ys4=
+github.com/aws/aws-sdk-go-v2/service/sso v1.25.5 h1:AIRJ3lfb2w/1/8wOOSqYb9fUKGwQbtysJ2H1MofRUPg=
+github.com/aws/aws-sdk-go-v2/service/sso v1.25.5/go.mod h1:b7SiVprpU+iGazDUqvRSLf5XmCdn+JtT1on7uNL6Ipc=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3 h1:BpOxT3yhLwSJ77qIY3DoHAQjZsc4HEGfMCE4NGy3uFg=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3/go.mod h1:vq/GQR1gOFLquZMSrxUK/cpvKCNVYibNyJ1m7JrU88E=
+github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 h1:NFOJ/NXEGV4Rq//71Hs1jC/NvPs1ezajK+yQmkwnPV0=
+github.com/aws/aws-sdk-go-v2/service/sts v1.34.0/go.mod h1:7ph2tGpfQvwzgistp2+zga9f+bCjlQJPkPUmMgDSD7w=
+github.com/aws/smithy-go v1.22.4 h1:uqXzVZNuNexwc/xrh6Tb56u89WDlJY6HS+KC0S4QSjw=
+github.com/aws/smithy-go v1.22.4/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
 github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=

From cda267814f1237e8e0ee0a58c3d78cbe020541a5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Jun 2025 17:51:26 +0200
Subject: [PATCH 27/32] chore(deps): bump golang.org/x/crypto from 0.38.0 to
 0.39.0 (#26101)

Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.38.0 to 0.39.0.
- [Commits](https://github.com/golang/crypto/compare/v0.38.0...v0.39.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.39.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index 99dd1545a..03344d100 100644
--- a/go.mod
+++ b/go.mod
@@ -127,7 +127,7 @@ require (
 	github.com/zclconf/go-cty-yaml v1.1.0
 	go.etcd.io/bbolt v1.4.1
 	go.uber.org/goleak v1.3.0
-	golang.org/x/crypto v0.38.0
+	golang.org/x/crypto v0.39.0
 	golang.org/x/mod v0.25.0
 	golang.org/x/sync v0.15.0
 	golang.org/x/sys v0.33.0
@@ -337,7 +337,7 @@ require (
 	golang.org/x/net v0.40.0 // indirect
 	golang.org/x/oauth2 v0.30.0 // indirect
 	golang.org/x/term v0.32.0 // indirect
-	golang.org/x/text v0.25.0 // indirect
+	golang.org/x/text v0.26.0 // indirect
 	golang.org/x/tools v0.33.0 // indirect
 	google.golang.org/api v0.217.0 // indirect
 	google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f // indirect
diff --git a/go.sum b/go.sum
index 64eff1b8c..f4ff01784 100644
--- a/go.sum
+++ b/go.sum
@@ -1729,8 +1729,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY
 golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
 golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
 golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
-golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
-golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
+golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
+golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -2065,8 +2065,8 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
-golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
-golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
+golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
+golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

From 949b23602c6d1170eba5723d77f37be87e178378 Mon Sep 17 00:00:00 2001
From: Daniel Bennett <dbennett@hashicorp.com>
Date: Mon, 23 Jun 2025 13:31:11 -0400
Subject: [PATCH 28/32] e2e: ui: bump playwright version (#26119)

---
 e2e/ui/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/e2e/ui/run.sh b/e2e/ui/run.sh
index adcbbb5ab..bc7ddd689 100755
--- a/e2e/ui/run.sh
+++ b/e2e/ui/run.sh
@@ -33,7 +33,7 @@ EOF
 }
 
 
-IMAGE="mcr.microsoft.com/playwright:v1.52.0-jammy"
+IMAGE="mcr.microsoft.com/playwright:v1.53.1-jammy"
 pushd $(dirname "${BASH_SOURCE[0]}") > /dev/null
 
 run_tests() {

From 1e328e8341518eacceeae9c2db6206a54773a7d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20B=C4=99za?=
 <43823276+pawelbeza@users.noreply.github.com>
Date: Mon, 23 Jun 2025 20:16:35 +0200
Subject: [PATCH 29/32] Docs: fix indentation in job annotations description
 for `/v1/job/:job_id/plan` response (#26115)

---
 website/content/api-docs/jobs.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/content/api-docs/jobs.mdx b/website/content/api-docs/jobs.mdx
index 693e7ba59..5bca56111 100644
--- a/website/content/api-docs/jobs.mdx
+++ b/website/content/api-docs/jobs.mdx
@@ -2259,7 +2259,7 @@ $ curl \
   occurred for the Task Group.
 
 - `Annotations` - Annotations include the `DesiredTGUpdates`, which tracks what
-- the scheduler would do given enough resources for each Task Group.
+  the scheduler would do given enough resources for each Task Group.
 
 ## Force New Periodic Instance
 

From 9cbadf3e34efe11b4a115509cad9745b51c8c138 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Jun 2025 21:06:14 +0200
Subject: [PATCH 30/32] chore(deps): bump google.golang.org/grpc from 1.72.2 to
 1.73.0 (#26102)

---
updated-dependencies:
- dependency-name: google.golang.org/grpc
  dependency-version: 1.73.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 24 ++++++++++++------------
 go.sum | 48 ++++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/go.mod b/go.mod
index 03344d100..aca931b89 100644
--- a/go.mod
+++ b/go.mod
@@ -132,14 +132,14 @@ require (
 	golang.org/x/sync v0.15.0
 	golang.org/x/sys v0.33.0
 	golang.org/x/time v0.12.0
-	google.golang.org/grpc v1.72.2
+	google.golang.org/grpc v1.73.0
 	google.golang.org/protobuf v1.36.6
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7
 	oss.indeed.com/go/libtime v1.6.0
 )
 
 require (
-	cel.dev/expr v0.20.0 // indirect
+	cel.dev/expr v0.23.0 // indirect
 	cloud.google.com/go v0.118.0 // indirect
 	cloud.google.com/go/auth v0.14.0 // indirect
 	cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
@@ -170,7 +170,7 @@ require (
 	github.com/AzureAD/microsoft-authentication-library-for-go v1.3.3 // indirect
 	github.com/BurntSushi/toml v1.3.2 // indirect
 	github.com/DataDog/datadog-go v3.2.0+incompatible // indirect
-	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.26.0 // indirect
+	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect
 	github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.49.0 // indirect
 	github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.49.0 // indirect
 	github.com/Masterminds/goutils v1.1.1 // indirect
@@ -206,7 +206,7 @@ require (
 	github.com/cilium/ebpf v0.16.0 // indirect
 	github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible // indirect
 	github.com/circonus-labs/circonusllhist v0.1.3 // indirect
-	github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect
+	github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f // indirect
 	github.com/containerd/console v1.0.4 // indirect
 	github.com/containerd/errdefs/pkg v0.3.0 // indirect
 	github.com/containerd/log v0.1.0 // indirect
@@ -324,15 +324,15 @@ require (
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
 	github.com/zeebo/errs v1.4.0 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
-	go.opentelemetry.io/contrib/detectors/gcp v1.34.0 // indirect
+	go.opentelemetry.io/contrib/detectors/gcp v1.35.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
-	go.opentelemetry.io/otel v1.34.0 // indirect
+	go.opentelemetry.io/otel v1.35.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.3.0 // indirect
-	go.opentelemetry.io/otel/metric v1.34.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.34.0 // indirect
-	go.opentelemetry.io/otel/sdk/metric v1.34.0 // indirect
-	go.opentelemetry.io/otel/trace v1.34.0 // indirect
+	go.opentelemetry.io/otel/metric v1.35.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.35.0 // indirect
+	go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
+	go.opentelemetry.io/otel/trace v1.35.0 // indirect
 	golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
 	golang.org/x/net v0.40.0 // indirect
 	golang.org/x/oauth2 v0.30.0 // indirect
@@ -341,8 +341,8 @@ require (
 	golang.org/x/tools v0.33.0 // indirect
 	google.golang.org/api v0.217.0 // indirect
 	google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463 // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/resty.v1 v1.12.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
diff --git a/go.sum b/go.sum
index f4ff01784..4f1f0da18 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,5 @@
-cel.dev/expr v0.20.0 h1:OunBvVCfvpWlt4dN7zg3FM6TDkzOePe1+foGJ9AXeeI=
-cel.dev/expr v0.20.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
+cel.dev/expr v0.23.0 h1:wUb94w6OYQS4uXraxo9U+wUAs9jT47Xvl4iPgAwM2ss=
+cel.dev/expr v0.23.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
@@ -676,8 +676,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
 github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
 github.com/DataDog/datadog-go v3.2.0+incompatible h1:qSG2N4FghB1He/r2mFrWKCaL7dXCilEuNEeAn20fdD4=
 github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
-github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.26.0 h1:f2Qw/Ehhimh5uO1fayV0QIW7DShEQqhtUfhYc+cBPlw=
-github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.26.0/go.mod h1:2bIszWvQRlJVmJLiuLhukLImRjKPcYdzzsx6darK02A=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 h1:ErKg/3iS1AKcTkf3yixlZ54f9U1rljCkQyEXWUnIUxc=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0/go.mod h1:yAZHSGnqScoU556rBOVkwLze6WP5N+U11RHuWaGVxwY=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.49.0 h1:o90wcURuxekmXrtxmYWTyNla0+ZEHhud6DI1ZTxd1vI=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.49.0/go.mod h1:6fTWu4m3jocfUZLYF5KsZC1TUfRvEjs7lM4crme/irw=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.49.0 h1:jJKWl98inONJAr/IZrdFQUWcwUO95DLY1XMD1ZIut+g=
@@ -818,8 +818,8 @@ github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWH
 github.com/cncf/xds/go v0.0.0-20220314180256-7f1daf1720fc/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk=
-github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f h1:C5bqEmzEPLsHm9Mv73lSE9e9bKV23aB1vxOsmZrkl3k=
+github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/container-storage-interface/spec v1.11.0 h1:H/YKTOeUZwHtyPOr9raR+HgFmGluGCklulxDYxSdVNM=
 github.com/container-storage-interface/spec v1.11.0/go.mod h1:DtUvaQszPml1YJfIK7c00mlv6/g4wNMLanLgiUbKFRI=
 github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw=
@@ -1670,15 +1670,15 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/detectors/gcp v1.34.0 h1:JRxssobiPg23otYU5SbWtQC//snGVIM3Tx6QRzlQBao=
-go.opentelemetry.io/contrib/detectors/gcp v1.34.0/go.mod h1:cV4BMFcscUR/ckqLkbfQmF0PRsq8w/lMGzdbCSveBHo=
+go.opentelemetry.io/contrib/detectors/gcp v1.35.0 h1:bGvFt68+KTiAKFlacHW6AhA56GF2rS0bdD3aJYEnmzA=
+go.opentelemetry.io/contrib/detectors/gcp v1.35.0/go.mod h1:qGWP8/+ILwMRIUf9uIVLloR1uo5ZYAslM4O6OqUi1DA=
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
 go.opentelemetry.io/otel v1.3.0/go.mod h1:PWIKzi6JCp7sM0k9yZ43VX+T345uNbAkDKwHVjb2PTs=
-go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
-go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
+go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
+go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
 go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.3.0 h1:R/OBkMoGgfy2fLhs2QhkCI1w4HLEQX92GCcJB6SSdNk=
 go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.3.0/go.mod h1:VpP4/RMn8bv8gNo9uK7/IMY4mtWLELsS+JIP0inH0h4=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.3.0 h1:giGm8w67Ja7amYNfYMdme7xSp2pIxThWopw8+QP51Yk=
@@ -1687,16 +1687,16 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.3.0 h1:Ydage/
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.3.0/go.mod h1:QNX1aly8ehqqX1LEa6YniTU7VY9I6R3X/oPxhGdTceE=
 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0 h1:WDdP9acbMYjbKIyJUhTvtzj601sVJOqgWdUxSdR/Ysc=
 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0/go.mod h1:BLbf7zbNIONBLPwvFnwNHGj4zge8uTCM/UPIVW1Mq2I=
-go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
-go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
+go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
+go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
 go.opentelemetry.io/otel/sdk v1.3.0/go.mod h1:rIo4suHNhQwBIPg9axF8V9CA72Wz2mKF1teNrup8yzs=
-go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
-go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
-go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
-go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
+go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
+go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
+go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
+go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
 go.opentelemetry.io/otel/trace v1.3.0/go.mod h1:c/VDhno8888bvQYmbYLqe41/Ldmr/KKunbvWM4/fEjk=
-go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
-go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
+go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
+go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
 go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
 go.opentelemetry.io/proto/otlp v0.11.0/go.mod h1:QpEjXPrNQzrFDZgoTo49dgHR9RYRSrg3NAKnUGl9YpQ=
 go.opentelemetry.io/proto/otlp v0.15.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U=
@@ -2363,10 +2363,10 @@ google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOl
 google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU=
 google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f h1:387Y+JbxF52bmesc8kq1NyYIp33dnxCw6eiA7JMsTmw=
 google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:0joYwWwLQh18AOj8zMYeZLjzuqcYTU3/nC5JdCvC3JI=
-google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a h1:nwKuGPlUAt+aR+pcrkfFRrTU1BVrSmYyYMxYbUIVHr0=
-google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a/go.mod h1:3kWAYMk1I75K4vykHtKt2ycnOgpA6974V7bREqbsenU=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a h1:51aaUVRocpvUOSQKM6Q7VuoaktNIaMCLuhZB6DKksq4=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a/go.mod h1:uRxBH1mhmO8PGhU89cMcHaXKZqO+OfakD8QQO0oYwlQ=
+google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 h1:hE3bRWtU6uceqlh4fhrSnUyjKHMKB9KrTLLG+bc0ddM=
+google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463/go.mod h1:U90ffi8eUL9MwPcrJylN5+Mk2v3vuPDptd5yyNUiRR8=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463 h1:e0AIkUUhxyBKh6ssZNrAMeqhA7RKUj42346d1y02i2g=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
 google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
@@ -2408,8 +2408,8 @@ google.golang.org/grpc v1.52.3/go.mod h1:pu6fVzoFb+NBYNAvQL08ic+lvB2IojljRYuun5v
 google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw=
 google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g=
 google.golang.org/grpc v1.56.3/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s=
-google.golang.org/grpc v1.72.2 h1:TdbGzwb82ty4OusHWepvFWGLgIbNo1/SUynEN0ssqv8=
-google.golang.org/grpc v1.72.2/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
+google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok=
+google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc=
 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=

From a3e096b0c98e3e76bcf153e0eaae4ef7d5184e42 Mon Sep 17 00:00:00 2001
From: James Rasell <jrasell@users.noreply.github.com>
Date: Tue, 24 Jun 2025 08:30:15 +0100
Subject: [PATCH 31/32] tls: Reset server TLS authenticator when TLS config
 reloaded. (#26107)

The Nomad server uses an authenticator backend for RPC handling
which includes TLS verification. This verification setting is
configured based on the servers TLS configuration object and is
built when a new server is constructed.

The bug occurs when a servers TLS configuration is reloaded which
can change the desired TLS verification handling. In this case,
the authenticator is not updated, meaning the RPC mTLS verification
is not modified, even if the configuration indicates it should.

This change adds a new function on the authenticator to allow
updating its TLS verification rule. This new function is called
when a servers TLS configuration is reloaded.
---
 .changelog/26107.txt |   3 ++
 nomad/auth/auth.go   |  25 +++++++---
 nomad/server.go      |   3 ++
 nomad/server_test.go | 112 ++++++++++++++++++++++++++++++-------------
 4 files changed, 105 insertions(+), 38 deletions(-)
 create mode 100644 .changelog/26107.txt

diff --git a/.changelog/26107.txt b/.changelog/26107.txt
new file mode 100644
index 000000000..65f50366d
--- /dev/null
+++ b/.changelog/26107.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+tls: Fixed a bug where reloading the Nomad server process with an updated `tls.verify_server_hostname` configuration parameter would not apply an update to internal RPC handler verification and require a full server restart
+```
diff --git a/nomad/auth/auth.go b/nomad/auth/auth.go
index 95a4fed7d..9435a2e61 100644
--- a/nomad/auth/auth.go
+++ b/nomad/auth/auth.go
@@ -10,6 +10,7 @@ import (
 	"net"
 	"slices"
 	"strings"
+	"sync/atomic"
 	"time"
 
 	"github.com/hashicorp/go-hclog"
@@ -40,8 +41,13 @@ type Encrypter interface {
 }
 
 type Authenticator struct {
-	aclsEnabled  bool
-	verifyTLS    bool
+	aclsEnabled bool
+
+	// verifyTLS is used to determine whether the server should verify TLS and
+	// is an atomic bool, so that the server TLS reload can update it at runtime
+	// with a race condition.
+	verifyTLS *atomic.Bool
+
 	logger       hclog.Logger
 	getState     StateGetter
 	getLeaderACL LeaderACLGetter
@@ -69,9 +75,9 @@ type AuthenticatorConfig struct {
 }
 
 func NewAuthenticator(cfg *AuthenticatorConfig) *Authenticator {
-	return &Authenticator{
+	a := Authenticator{
 		aclsEnabled:          cfg.AclsEnabled,
-		verifyTLS:            cfg.VerifyTLS,
+		verifyTLS:            &atomic.Bool{},
 		logger:               cfg.Logger.With("auth"),
 		getState:             cfg.StateFn,
 		getLeaderACL:         cfg.GetLeaderACLFn,
@@ -84,8 +90,15 @@ func NewAuthenticator(cfg *AuthenticatorConfig) *Authenticator {
 			"server." + cfg.Region + ".nomad",
 		},
 	}
+
+	a.verifyTLS.Store(cfg.VerifyTLS)
+	return &a
 }
 
+// SetVerifyTLS is a helper method to set the verifyTLS field. This is used in
+// when the server TLS configuration is updated.
+func (s *Authenticator) SetVerifyTLS(verifyTLS bool) { s.verifyTLS.Store(verifyTLS) }
+
 // Authenticate extracts an AuthenticatedIdentity from the request context or
 // provided token and sets the identity on the request. The caller can extract
 // an acl.ACL, WorkloadIdentity, or other identifying tokens to use for
@@ -255,7 +268,7 @@ func (s *Authenticator) AuthenticateServerOnly(ctx RPCContext, args structs.Requ
 	identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
 	defer args.SetIdentity(identity) // always set the identity, even on errors
 
-	if s.verifyTLS && !ctx.IsStatic() {
+	if s.verifyTLS.Load() && !ctx.IsStatic() {
 		tlsCert := ctx.Certificate()
 		if tlsCert == nil {
 			return nil, errors.New("missing certificate information")
@@ -298,7 +311,7 @@ func (s *Authenticator) AuthenticateClientOnly(ctx RPCContext, args structs.Requ
 	identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
 	defer args.SetIdentity(identity) // always set the identity, even on errors
 
-	if s.verifyTLS && !ctx.IsStatic() {
+	if s.verifyTLS.Load() && !ctx.IsStatic() {
 		tlsCert := ctx.Certificate()
 		if tlsCert == nil {
 			return nil, errors.New("missing certificate information")
diff --git a/nomad/server.go b/nomad/server.go
index ca255783a..c6f7b0611 100644
--- a/nomad/server.go
+++ b/nomad/server.go
@@ -692,6 +692,9 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error {
 	// Kill any old listeners
 	s.rpcCancel()
 
+	// Update the authenticator, so any changes in TLS verification are applied.
+	s.auth.SetVerifyTLS(s.config.TLSConfig != nil && s.config.TLSConfig.EnableRPC && s.config.TLSConfig.VerifyServerHostname)
+
 	s.rpcTLS = incomingTLS
 	s.connPool.ReloadTLS(tlsWrap)
 
diff --git a/nomad/server_test.go b/nomad/server_test.go
index a4422df38..0777fd24a 100644
--- a/nomad/server_test.go
+++ b/nomad/server_test.go
@@ -210,7 +210,6 @@ func connectionReset(msg string) bool {
 // upgrading from plaintext to TLS if the server's TLS configuration changes.
 func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) {
 	ci.Parallel(t)
-	assert := assert.New(t)
 
 	const (
 		cafile  = "../helper/tlsutil/testdata/nomad-agent-ca.pem"
@@ -224,8 +223,15 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) {
 	})
 	defer cleanupS1()
 
+	originalRPCCodec := rpcClient(t, s1)
+
+	// Upsert a node into state, so we can use the Node.GetClientAllocs RPC
+	// to test the TLS connection.
+	mockNode := mock.Node()
+	must.NoError(t, s1.State().UpsertNode(structs.MsgTypeTestSetup, 10, mockNode))
+
 	// assert that the server started in plaintext mode
-	assert.Equal(s1.config.TLSConfig.CertFile, "")
+	must.Eq(t, s1.config.TLSConfig.CertFile, "")
 
 	newTLSConfig := &config.TLSConfig{
 		EnableHTTP:           true,
@@ -236,29 +242,48 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) {
 		KeyFile:              fookey,
 	}
 
-	err := s1.reloadTLSConnections(newTLSConfig)
-	assert.Nil(err)
-	assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig))
+	must.NoError(t, s1.reloadTLSConnections(newTLSConfig))
+
+	certEq, err := s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)
+	must.NoError(t, err)
+	must.True(t, certEq)
 
 	codec := rpcClient(t, s1)
+	tlsCodec := rpcClientWithTLS(t, s1, newTLSConfig)
 
-	node := mock.Node()
-	req := &structs.NodeRegisterRequest{
-		Node:         node,
-		WriteRequest: structs.WriteRequest{Region: "global"},
+	req := &structs.NodeSpecificRequest{
+		NodeID:   mockNode.ID,
+		SecretID: mockNode.SecretID,
+		QueryOptions: structs.QueryOptions{
+			Region:    "global",
+			AuthToken: mockNode.SecretID,
+		},
 	}
 
-	var resp structs.GenericResponse
-	err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp)
-	assert.NotNil(err)
-	assert.True(connectionReset(err.Error()))
+	var resp structs.NodeClientAllocsResponse
+
+	// Perform a request using the original codec. This should fail with a
+	// permission denied error, as the server has now switched to TLS and is
+	// performing TLS verification.
+	err = msgpackrpc.CallWithCodec(originalRPCCodec, "Node.GetClientAllocs", req, &resp)
+	must.ErrorContains(t, err, "Permission denied")
+
+	// Perform a request using a non-TLS codec. This should fail with a
+	// connection reset error, as the server has now switched to TLS.
+	err = msgpackrpc.CallWithCodec(codec, "Node.GetClientAllocs", req, &resp)
+	must.Error(t, err)
+	must.True(t, connectionReset(err.Error()))
+
+	// Perform a request using the new TLS codec. This should succeed, as the
+	// server is now configured to accept and verify TLS connections.
+	err = msgpackrpc.CallWithCodec(tlsCodec, "Node.GetClientAllocs", req, &resp)
+	must.NoError(t, err)
 }
 
 // Tests that the server will successfully reload its network connections,
 // downgrading from TLS to plaintext if the server's TLS configuration changes.
 func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) {
 	ci.Parallel(t)
-	assert := assert.New(t)
 
 	const (
 		cafile  = "../helper/tlsutil/testdata/nomad-agent-ca.pem"
@@ -268,36 +293,59 @@ func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) {
 
 	dir := t.TempDir()
 
+	tlsConfig := config.TLSConfig{
+		EnableHTTP:           true,
+		EnableRPC:            true,
+		VerifyServerHostname: true,
+		CAFile:               cafile,
+		CertFile:             foocert,
+		KeyFile:              fookey,
+	}
+
 	s1, cleanupS1 := TestServer(t, func(c *Config) {
 		c.DataDir = path.Join(dir, "nodeB")
-		c.TLSConfig = &config.TLSConfig{
-			EnableHTTP:           true,
-			EnableRPC:            true,
-			VerifyServerHostname: true,
-			CAFile:               cafile,
-			CertFile:             foocert,
-			KeyFile:              fookey,
-		}
+		c.TLSConfig = &tlsConfig
 	})
 	defer cleanupS1()
 
+	originalRPCTLSCodec := rpcClientWithTLS(t, s1, &tlsConfig)
+
+	// Upsert a node into state, so we can use the Node.GetClientAllocs RPC
+	// to test the TLS connection.
+	mockNode := mock.Node()
+	must.NoError(t, s1.State().UpsertNode(structs.MsgTypeTestSetup, 10, mockNode))
+
 	newTLSConfig := &config.TLSConfig{}
 
-	err := s1.reloadTLSConnections(newTLSConfig)
-	assert.Nil(err)
-	assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig))
+	must.NoError(t, s1.reloadTLSConnections(newTLSConfig))
+
+	certEq, err := s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)
+	must.NoError(t, err)
+	must.True(t, certEq)
 
 	codec := rpcClient(t, s1)
 
-	node := mock.Node()
-	req := &structs.NodeRegisterRequest{
-		Node:         node,
-		WriteRequest: structs.WriteRequest{Region: "global"},
+	req := &structs.NodeSpecificRequest{
+		NodeID:   mockNode.ID,
+		SecretID: mockNode.SecretID,
+		QueryOptions: structs.QueryOptions{
+			Region:    "global",
+			AuthToken: mockNode.SecretID,
+		},
 	}
 
-	var resp structs.GenericResponse
-	err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp)
-	assert.Nil(err)
+	var resp structs.NodeClientAllocsResponse
+
+	// Perform a request using the original TLS codec. This should fail with a
+	// connection reset error, as the server has now switched to plaintext.
+	err = msgpackrpc.CallWithCodec(originalRPCTLSCodec, "Node.GetClientAllocs", req, &resp)
+	must.Error(t, err)
+	must.True(t, connectionReset(err.Error()))
+
+	// Perform a request using a non-TLS codec. This should succeed, as the
+	// server is now configured to accept plaintext connections.
+	err = msgpackrpc.CallWithCodec(codec, "Node.GetClientAllocs", req, &resp)
+	must.NoError(t, err)
 }
 
 // Tests that the server will successfully reload its network connections,

From 27da75044ed07d551adb8d2b3cd597e63dd7f083 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 24 Jun 2025 09:31:10 +0200
Subject: [PATCH 32/32] scheduler: move tests that depend on calling schedulers
 into `integration` package (#26037)

---
 ci/test-core.json                             |   1 +
 scheduler/feasible/preemption_test.go         | 158 ++++++------------
 scheduler/integration/README.md               |   3 +
 .../{ => integration}/preemption_test.go      |  51 +-----
 scheduler/{ => integration}/spread_test.go    |   9 +-
 scheduler/tests/testing.go                    |  55 ++++++
 6 files changed, 120 insertions(+), 157 deletions(-)
 create mode 100644 scheduler/integration/README.md
 rename scheduler/{ => integration}/preemption_test.go (72%)
 rename scheduler/{ => integration}/spread_test.go (98%)

diff --git a/ci/test-core.json b/ci/test-core.json
index 82d58df37..d117e2a0b 100644
--- a/ci/test-core.json
+++ b/ci/test-core.json
@@ -47,6 +47,7 @@
     "plugins/...",
     "scheduler/...",
     "scheduler/feasible/...",
+    "scheduler/integration/...",
     "scheduler/reconciler/...",
     "testutil/..."
   ]
diff --git a/scheduler/feasible/preemption_test.go b/scheduler/feasible/preemption_test.go
index 649ab40ab..d3a1c9802 100644
--- a/scheduler/feasible/preemption_test.go
+++ b/scheduler/feasible/preemption_test.go
@@ -273,7 +273,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "No preemption because existing allocs are not low priority",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      3200,
 					MemoryMB: 7256,
 					DiskMB:   4 * 1024,
@@ -305,7 +305,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Preempting low priority allocs not enough to meet resource ask",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      3200,
 					MemoryMB: 7256,
 					DiskMB:   4 * 1024,
@@ -337,7 +337,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "preemption impossible - static port needed is used by higher priority alloc",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1200,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -349,7 +349,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], highPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -393,7 +393,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "preempt only from device that has allocation with unused reserved port",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1200,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -405,7 +405,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], highPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -423,7 +423,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -486,7 +486,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Combination of high/low priority allocs, without static ports",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      2800,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -498,7 +498,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAllocWithTaskgroupNetwork(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithTaskgroupNetwork(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -514,7 +514,7 @@ func TestPreemption_Normal(t *testing.T) {
 					IP:     "192.168.0.201",
 					MBits:  300,
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -526,7 +526,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[3], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[3], lowPrioJob, &structs.Resources{
 					CPU:      700,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -556,12 +556,12 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "preempt allocs with network devices",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      2800,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -596,12 +596,12 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "ignore allocs with close enough priority for network devices",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      2800,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -634,7 +634,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Preemption needed for all resources except network",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      2800,
 					MemoryMB: 2256,
 					DiskMB:   40 * 1024,
@@ -646,7 +646,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -658,12 +658,12 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 512,
 					DiskMB:   25 * 1024,
 				}),
-				createAlloc(allocIDs[3], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[3], lowPrioJob, &structs.Resources{
 					CPU:      700,
 					MemoryMB: 276,
 					DiskMB:   20 * 1024,
@@ -693,7 +693,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Only one low priority alloc needs to be preempted",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1200,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -705,7 +705,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -717,7 +717,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -752,7 +752,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "one alloc meets static port need, another meets remaining mbits needed",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1200,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -764,7 +764,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -782,7 +782,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -824,7 +824,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "alloc that meets static port need also meets other needs",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1200,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -836,7 +836,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -854,7 +854,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -895,7 +895,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "alloc from job that has existing evictions not chosen for preemption",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1200,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -907,7 +907,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -919,7 +919,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob2, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob2, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -948,7 +948,7 @@ func TestPreemption_Normal(t *testing.T) {
 				},
 			},
 			currentPreemptions: []*structs.Allocation{
-				createAlloc(allocIDs[4], lowPrioJob2, &structs.Resources{
+				tests.CreateAlloc(allocIDs[4], lowPrioJob2, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -969,7 +969,7 @@ func TestPreemption_Normal(t *testing.T) {
 			desc: "Preemption with one device instance per alloc",
 			// Add allocations that use two device instances
 			currentAllocations: []*structs.Allocation{
-				createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      500,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -979,7 +979,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[0]},
 				}),
-				createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1011,7 +1011,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Preemption multiple devices used",
 			currentAllocations: []*structs.Allocation{
-				createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      500,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1021,7 +1021,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[0], deviceIDs[1], deviceIDs[2], deviceIDs[3]},
 				}),
-				createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1055,7 +1055,7 @@ func TestPreemption_Normal(t *testing.T) {
 			// same device should be chosen for preemption
 			desc: "Preemption with allocs across multiple devices that match",
 			currentAllocations: []*structs.Allocation{
-				createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      500,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1065,7 +1065,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[0], deviceIDs[1]},
 				}),
-				createAllocWithDevice(allocIDs[1], highPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[1], highPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 100,
 					DiskMB:   4 * 1024,
@@ -1075,7 +1075,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[2]},
 				}),
-				createAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -1085,7 +1085,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "2080ti",
 					DeviceIDs: []string{deviceIDs[4], deviceIDs[5]},
 				}),
-				createAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{
 					CPU:      100,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -1095,7 +1095,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "2080ti",
 					DeviceIDs: []string{deviceIDs[6], deviceIDs[7]},
 				}),
-				createAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1130,7 +1130,7 @@ func TestPreemption_Normal(t *testing.T) {
 			// priority are chosen
 			desc: "Preemption with lower/higher priority combinations",
 			currentAllocations: []*structs.Allocation{
-				createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      500,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1140,7 +1140,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[0], deviceIDs[1]},
 				}),
-				createAllocWithDevice(allocIDs[1], lowPrioJob2, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[1], lowPrioJob2, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 100,
 					DiskMB:   4 * 1024,
@@ -1150,7 +1150,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[2], deviceIDs[3]},
 				}),
-				createAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -1160,7 +1160,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "2080ti",
 					DeviceIDs: []string{deviceIDs[4], deviceIDs[5]},
 				}),
-				createAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{
 					CPU:      100,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -1170,7 +1170,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "2080ti",
 					DeviceIDs: []string{deviceIDs[6], deviceIDs[7]},
 				}),
-				createAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{
 					CPU:      100,
 					MemoryMB: 256,
 					DiskMB:   4 * 1024,
@@ -1180,7 +1180,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "2080ti",
 					DeviceIDs: []string{deviceIDs[8]},
 				}),
-				createAllocWithDevice(allocIDs[5], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[5], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1212,7 +1212,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Device preemption not possible due to more instances needed than available",
 			currentAllocations: []*structs.Allocation{
-				createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
 					CPU:      500,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1222,7 +1222,7 @@ func TestPreemption_Normal(t *testing.T) {
 					Name:      "1080ti",
 					DeviceIDs: []string{deviceIDs[0], deviceIDs[1], deviceIDs[2], deviceIDs[3]},
 				}),
-				createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      200,
 					MemoryMB: 512,
 					DiskMB:   4 * 1024,
@@ -1252,7 +1252,7 @@ func TestPreemption_Normal(t *testing.T) {
 		{
 			desc: "Filter out allocs whose resource usage superset is also in the preemption list",
 			currentAllocations: []*structs.Allocation{
-				createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[0], highPrioJob, &structs.Resources{
 					CPU:      1800,
 					MemoryMB: 2256,
 					DiskMB:   4 * 1024,
@@ -1264,7 +1264,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
 					CPU:      1500,
 					MemoryMB: 256,
 					DiskMB:   5 * 1024,
@@ -1276,7 +1276,7 @@ func TestPreemption_Normal(t *testing.T) {
 						},
 					},
 				}),
-				createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
+				tests.CreateAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
 					CPU:      600,
 					MemoryMB: 256,
 					DiskMB:   5 * 1024,
@@ -1366,57 +1366,3 @@ func TestPreemption_Normal(t *testing.T) {
 		})
 	}
 }
-
-// helper method to create allocations with given jobs and resources
-func createAlloc(id string, job *structs.Job, resource *structs.Resources) *structs.Allocation {
-	return createAllocInner(id, job, resource, nil, nil)
-}
-
-// helper method to create allocation with network at the task group level
-func createAllocWithTaskgroupNetwork(id string, job *structs.Job, resource *structs.Resources, tgNet *structs.NetworkResource) *structs.Allocation {
-	return createAllocInner(id, job, resource, nil, tgNet)
-}
-
-func createAllocWithDevice(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource) *structs.Allocation {
-	return createAllocInner(id, job, resource, allocatedDevices, nil)
-}
-
-func createAllocInner(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource, tgNetwork *structs.NetworkResource) *structs.Allocation {
-	alloc := &structs.Allocation{
-		ID:    id,
-		Job:   job,
-		JobID: job.ID,
-		TaskResources: map[string]*structs.Resources{
-			"web": resource,
-		},
-		Namespace:     structs.DefaultNamespace,
-		EvalID:        uuid.Generate(),
-		DesiredStatus: structs.AllocDesiredStatusRun,
-		ClientStatus:  structs.AllocClientStatusRunning,
-		TaskGroup:     "web",
-		AllocatedResources: &structs.AllocatedResources{
-			Tasks: map[string]*structs.AllocatedTaskResources{
-				"web": {
-					Cpu: structs.AllocatedCpuResources{
-						CpuShares: int64(resource.CPU),
-					},
-					Memory: structs.AllocatedMemoryResources{
-						MemoryMB: int64(resource.MemoryMB),
-					},
-					Networks: resource.Networks,
-				},
-			},
-		},
-	}
-
-	if allocatedDevices != nil {
-		alloc.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{allocatedDevices}
-	}
-
-	if tgNetwork != nil {
-		alloc.AllocatedResources.Shared = structs.AllocatedSharedResources{
-			Networks: []*structs.NetworkResource{tgNetwork},
-		}
-	}
-	return alloc
-}
diff --git a/scheduler/integration/README.md b/scheduler/integration/README.md
new file mode 100644
index 000000000..4c58aa83e
--- /dev/null
+++ b/scheduler/integration/README.md
@@ -0,0 +1,3 @@
+# Integration tests
+
+This package holds tests that depend on calling different schedulers.
\ No newline at end of file
diff --git a/scheduler/preemption_test.go b/scheduler/integration/preemption_test.go
similarity index 72%
rename from scheduler/preemption_test.go
rename to scheduler/integration/preemption_test.go
index 35bf3a9fd..724fc908a 100644
--- a/scheduler/preemption_test.go
+++ b/scheduler/integration/preemption_test.go
@@ -1,7 +1,7 @@
 // Copyright (c) HashiCorp, Inc.
 // SPDX-License-Identifier: BUSL-1.1
 
-package scheduler
+package integration
 
 import (
 	"fmt"
@@ -12,6 +12,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
+	"github.com/hashicorp/nomad/scheduler"
 	"github.com/hashicorp/nomad/scheduler/tests"
 	"github.com/shoenig/test/must"
 )
@@ -100,7 +101,7 @@ func TestPreemptionMultiple(t *testing.T) {
 	allocs := []*structs.Allocation{}
 	allocIDs := map[string]struct{}{}
 	for i := 0; i < 4; i++ {
-		alloc := createAllocWithDevice(uuid.Generate(), lowPrioJob, lowPrioJob.TaskGroups[0].Tasks[0].Resources, &structs.AllocatedDeviceResource{
+		alloc := tests.CreateAllocWithDevice(uuid.Generate(), lowPrioJob, lowPrioJob.TaskGroups[0].Tasks[0].Resources, &structs.AllocatedDeviceResource{
 			Type:      "gpu",
 			Vendor:    "nvidia",
 			Name:      "1080ti",
@@ -138,7 +139,7 @@ func TestPreemptionMultiple(t *testing.T) {
 	must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
 
 	// Process the evaluation
-	must.NoError(t, h.Process(NewServiceScheduler, eval))
+	must.NoError(t, h.Process(scheduler.NewServiceScheduler, eval))
 	must.Len(t, 1, h.Plans)
 	must.MapContainsKey(t, h.Plans[0].NodePreemptions, node.ID)
 
@@ -148,47 +149,3 @@ func TestPreemptionMultiple(t *testing.T) {
 	}
 	must.Eq(t, allocIDs, preempted)
 }
-
-func createAllocWithDevice(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource) *structs.Allocation {
-	return createAllocInner(id, job, resource, allocatedDevices, nil)
-}
-
-func createAllocInner(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource, tgNetwork *structs.NetworkResource) *structs.Allocation {
-	alloc := &structs.Allocation{
-		ID:    id,
-		Job:   job,
-		JobID: job.ID,
-		TaskResources: map[string]*structs.Resources{
-			"web": resource,
-		},
-		Namespace:     structs.DefaultNamespace,
-		EvalID:        uuid.Generate(),
-		DesiredStatus: structs.AllocDesiredStatusRun,
-		ClientStatus:  structs.AllocClientStatusRunning,
-		TaskGroup:     "web",
-		AllocatedResources: &structs.AllocatedResources{
-			Tasks: map[string]*structs.AllocatedTaskResources{
-				"web": {
-					Cpu: structs.AllocatedCpuResources{
-						CpuShares: int64(resource.CPU),
-					},
-					Memory: structs.AllocatedMemoryResources{
-						MemoryMB: int64(resource.MemoryMB),
-					},
-					Networks: resource.Networks,
-				},
-			},
-		},
-	}
-
-	if allocatedDevices != nil {
-		alloc.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{allocatedDevices}
-	}
-
-	if tgNetwork != nil {
-		alloc.AllocatedResources.Shared = structs.AllocatedSharedResources{
-			Networks: []*structs.NetworkResource{tgNetwork},
-		}
-	}
-	return alloc
-}
diff --git a/scheduler/spread_test.go b/scheduler/integration/spread_test.go
similarity index 98%
rename from scheduler/spread_test.go
rename to scheduler/integration/spread_test.go
index 50d12db76..882410ca8 100644
--- a/scheduler/spread_test.go
+++ b/scheduler/integration/spread_test.go
@@ -1,7 +1,7 @@
 // Copyright (c) HashiCorp, Inc.
 // SPDX-License-Identifier: BUSL-1.1
 
-package scheduler
+package integration
 
 import (
 	"fmt"
@@ -15,6 +15,7 @@ import (
 	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/scheduler"
 	"github.com/hashicorp/nomad/scheduler/feasible"
 	"github.com/hashicorp/nomad/scheduler/tests"
 	"github.com/shoenig/test"
@@ -97,7 +98,7 @@ func TestSpreadOnLargeCluster(t *testing.T) {
 			must.NoError(t, err)
 
 			start := time.Now()
-			err = h.Process(NewServiceScheduler, eval)
+			err = h.Process(scheduler.NewServiceScheduler, eval)
 			must.NoError(t, err)
 			must.LessEq(t, time.Duration(60*time.Second), time.Since(start),
 				must.Sprint("time to evaluate exceeded EvalNackTimeout"))
@@ -352,7 +353,7 @@ func TestSpreadPanicDowngrade(t *testing.T) {
 		h.NextIndex(), []*structs.Evaluation{eval})
 	must.NoError(t, err)
 
-	processErr := h.Process(NewServiceScheduler, eval)
+	processErr := h.Process(scheduler.NewServiceScheduler, eval)
 	must.NoError(t, processErr, must.Sprintf("..."))
 	must.Len(t, 1, h.Plans)
 }
@@ -467,7 +468,7 @@ func TestSpread_ImplicitTargets(t *testing.T) {
 			h := tests.NewHarness(t)
 			nodesToDcs := setupNodes(h)
 			eval := setupJob(h, tc.spread)
-			must.NoError(t, h.Process(NewServiceScheduler, eval))
+			must.NoError(t, h.Process(scheduler.NewServiceScheduler, eval))
 			must.Len(t, 1, h.Plans)
 
 			plan := h.Plans[0]
diff --git a/scheduler/tests/testing.go b/scheduler/tests/testing.go
index b46bedf6f..21587951f 100644
--- a/scheduler/tests/testing.go
+++ b/scheduler/tests/testing.go
@@ -12,6 +12,7 @@ import (
 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/go-version"
 	"github.com/hashicorp/nomad/helper/testlog"
+	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
 	sstructs "github.com/hashicorp/nomad/scheduler/structs"
@@ -314,3 +315,57 @@ func (h *Harness) AssertEvalStatus(t testing.TB, state string) {
 func (h *Harness) SetNoSubmit() {
 	h.noSubmit = true
 }
+
+// helper method to create allocations with given jobs and resources
+func CreateAlloc(id string, job *structs.Job, resource *structs.Resources) *structs.Allocation {
+	return CreateAllocInner(id, job, resource, nil, nil)
+}
+
+// helper method to create allocation with network at the task group level
+func CreateAllocWithTaskgroupNetwork(id string, job *structs.Job, resource *structs.Resources, tgNet *structs.NetworkResource) *structs.Allocation {
+	return CreateAllocInner(id, job, resource, nil, tgNet)
+}
+
+func CreateAllocWithDevice(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource) *structs.Allocation {
+	return CreateAllocInner(id, job, resource, allocatedDevices, nil)
+}
+
+func CreateAllocInner(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource, tgNetwork *structs.NetworkResource) *structs.Allocation {
+	alloc := &structs.Allocation{
+		ID:    id,
+		Job:   job,
+		JobID: job.ID,
+		TaskResources: map[string]*structs.Resources{
+			"web": resource,
+		},
+		Namespace:     structs.DefaultNamespace,
+		EvalID:        uuid.Generate(),
+		DesiredStatus: structs.AllocDesiredStatusRun,
+		ClientStatus:  structs.AllocClientStatusRunning,
+		TaskGroup:     "web",
+		AllocatedResources: &structs.AllocatedResources{
+			Tasks: map[string]*structs.AllocatedTaskResources{
+				"web": {
+					Cpu: structs.AllocatedCpuResources{
+						CpuShares: int64(resource.CPU),
+					},
+					Memory: structs.AllocatedMemoryResources{
+						MemoryMB: int64(resource.MemoryMB),
+					},
+					Networks: resource.Networks,
+				},
+			},
+		},
+	}
+
+	if allocatedDevices != nil {
+		alloc.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{allocatedDevices}
+	}
+
+	if tgNetwork != nil {
+		alloc.AllocatedResources.Shared = structs.AllocatedSharedResources{
+			Networks: []*structs.NetworkResource{tgNetwork},
+		}
+	}
+	return alloc
+}