diff --git a/scheduler/reconciler/reconcile_cluster_prop_test.go b/scheduler/reconciler/reconcile_cluster_prop_test.go index a0917e637..d8518816f 100644 --- a/scheduler/reconciler/reconcile_cluster_prop_test.go +++ b/scheduler/reconciler/reconcile_cluster_prop_test.go @@ -80,28 +80,111 @@ func TestAllocReconciler_PropTest(t *testing.T) { })) } +func TestAllocReconciler_cancelUnneededCanaries(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + idg := &idGenerator{} + job := genJob( + rapid.SampledFrom([]string{structs.JobTypeService, structs.JobTypeBatch}).Draw(t, "job_type"), + idg, + ).Draw(t, "job") + + clusterState := genClusterState(idg, time.Now()).Draw(t, "cluster_state") + jobState := genReconcilerState(idg, job, clusterState).Draw(t, "reconciler_state") + + logger := testlog.HCLogger(t) + ar := NewAllocReconciler(logger, allocUpdateFnInplace, jobState, clusterState) + + m := newAllocMatrix(job, jobState.ExistingAllocs) + group := job.TaskGroups[0].Name + all := m[group] // <-- allocset of all allocs for tg + all, _ = filterOldTerminalAllocs(jobState, all) + + // runs the method under test + canaries, _, stopAllocs := ar.cancelUnneededCanaries(all, new(structs.DesiredUpdates)) + + expectedStopped := []string{} + if jobState.DeploymentOld != nil { + for _, dstate := range jobState.DeploymentOld.TaskGroups { + if !dstate.Promoted { + expectedStopped = append(expectedStopped, dstate.PlacedCanaries...) + } + } + } + if jobState.DeploymentCurrent != nil && jobState.DeploymentCurrent.Status == structs.DeploymentStatusFailed { + for _, dstate := range jobState.DeploymentCurrent.TaskGroups { + if !dstate.Promoted { + expectedStopped = append(expectedStopped, dstate.PlacedCanaries...) + } + } + } + stopSet := all.fromKeys(expectedStopped) + all = all.difference(stopSet) + + expectedCanaries := []string{} + if jobState.DeploymentCurrent != nil { + for _, dstate := range jobState.DeploymentCurrent.TaskGroups { + expectedCanaries = append(expectedCanaries, dstate.PlacedCanaries...) + } + } + canarySet := all.fromKeys(expectedCanaries) + canariesOnUntaintedNodes, migrate, lost, _, _, _, _ := filterByTainted(canarySet, clusterState) + + stopSet = stopSet.union(migrate, lost) + + must.Eq(t, len(stopAllocs), len(stopSet)) + must.Eq(t, len(canaries), len(canariesOnUntaintedNodes)) + }) +} + func genAllocReconciler(jobType string, idg *idGenerator) *rapid.Generator[*AllocReconciler] { return rapid.Custom(func(t *rapid.T) *AllocReconciler { now := time.Now() // note: you can only use offsets from this + clusterState := genClusterState(idg, now).Draw(t, "cluster_state") + job := genJob(jobType, idg).Draw(t, "job") + + reconcilerState := genReconcilerState(idg, job, clusterState).Draw(t, "reconciler_state") + updateFn := rapid.SampledFrom([]AllocUpdateType{ + allocUpdateFnDestructive, + allocUpdateFnIgnore, + allocUpdateFnInplace, + }).Draw(t, "update_function") + + logger := testlog.HCLogger(t) + ar := NewAllocReconciler(logger, + updateFn, + reconcilerState, + clusterState, + ) + + return ar + }) +} + +func genClusterState(idg *idGenerator, now time.Time) *rapid.Generator[ClusterState] { + return rapid.Custom(func(t *rapid.T) ClusterState { nodes := rapid.SliceOfN(genNode(idg), 0, 5).Draw(t, "nodes") taintedNodes := helper.SliceToMap[map[string]*structs.Node]( nodes, func(n *structs.Node) string { return n.ID }) - clusterState := ClusterState{ + return ClusterState{ TaintedNodes: taintedNodes, SupportsDisconnectedClients: rapid.Bool().Draw(t, "supports_disconnected_clients"), Now: now, } - job := genJob(jobType, idg).Draw(t, "job") + }) +} + +func genReconcilerState(idg *idGenerator, job *structs.Job, clusterState ClusterState) *rapid.Generator[ReconcilerState] { + return rapid.Custom(func(t *rapid.T) ReconcilerState { oldJob := job.Copy() oldJob.Version-- oldJob.CreateIndex = 100 currentAllocs := rapid.SliceOfN( - genExistingAllocMaybeTainted(idg, job, taintedNodes, now), 0, 15).Draw(t, "allocs") + genExistingAllocMaybeTainted(idg, job, clusterState.TaintedNodes, clusterState.Now), 0, 15).Draw(t, "allocs") oldAllocs := rapid.SliceOfN( - genExistingAllocMaybeTainted(idg, oldJob, taintedNodes, now), 0, 15).Draw(t, "old_allocs") + genExistingAllocMaybeTainted(idg, oldJob, clusterState.TaintedNodes, clusterState.Now), 0, 15).Draw(t, "old_allocs") // tie together a subset of allocations so we can exercise reconnection previousAllocID := "" @@ -119,7 +202,7 @@ func genAllocReconciler(jobType string, idg *idGenerator) *rapid.Generator[*Allo oldDeploy := genDeployment(idg, oldJob, oldAllocs).Draw(t, "old_deploy") currentDeploy := genDeployment(idg, job, currentAllocs).Draw(t, "current_deploy") - reconcilerState := ReconcilerState{ + return ReconcilerState{ Job: job, JobID: job.ID, JobIsBatch: job.Type == structs.JobTypeBatch, @@ -130,21 +213,6 @@ func genAllocReconciler(jobType string, idg *idGenerator) *rapid.Generator[*Allo ExistingAllocs: allocs, EvalID: idg.nextID(), } - - updateFn := rapid.SampledFrom([]AllocUpdateType{ - allocUpdateFnDestructive, - allocUpdateFnIgnore, - allocUpdateFnInplace, - }).Draw(t, "update_function") - - logger := testlog.HCLogger(t) - ar := NewAllocReconciler(logger, - updateFn, - reconcilerState, - clusterState, - ) - - return ar }) }