scheduler: property testing of cancelUnneededCanaries (#26204)

In the spirit of #26180

Internal ref: https://hashicorp.atlassian.net/browse/NMD-814
This commit is contained in:
Piotr Kazmierczak
2025-07-09 19:46:13 +02:00
committed by GitHub
parent 7c6c1ed0d3
commit e50db4d1b8

View File

@@ -80,28 +80,111 @@ func TestAllocReconciler_PropTest(t *testing.T) {
}))
}
func TestAllocReconciler_cancelUnneededCanaries(t *testing.T) {
rapid.Check(t, func(t *rapid.T) {
idg := &idGenerator{}
job := genJob(
rapid.SampledFrom([]string{structs.JobTypeService, structs.JobTypeBatch}).Draw(t, "job_type"),
idg,
).Draw(t, "job")
clusterState := genClusterState(idg, time.Now()).Draw(t, "cluster_state")
jobState := genReconcilerState(idg, job, clusterState).Draw(t, "reconciler_state")
logger := testlog.HCLogger(t)
ar := NewAllocReconciler(logger, allocUpdateFnInplace, jobState, clusterState)
m := newAllocMatrix(job, jobState.ExistingAllocs)
group := job.TaskGroups[0].Name
all := m[group] // <-- allocset of all allocs for tg
all, _ = filterOldTerminalAllocs(jobState, all)
// runs the method under test
canaries, _, stopAllocs := ar.cancelUnneededCanaries(all, new(structs.DesiredUpdates))
expectedStopped := []string{}
if jobState.DeploymentOld != nil {
for _, dstate := range jobState.DeploymentOld.TaskGroups {
if !dstate.Promoted {
expectedStopped = append(expectedStopped, dstate.PlacedCanaries...)
}
}
}
if jobState.DeploymentCurrent != nil && jobState.DeploymentCurrent.Status == structs.DeploymentStatusFailed {
for _, dstate := range jobState.DeploymentCurrent.TaskGroups {
if !dstate.Promoted {
expectedStopped = append(expectedStopped, dstate.PlacedCanaries...)
}
}
}
stopSet := all.fromKeys(expectedStopped)
all = all.difference(stopSet)
expectedCanaries := []string{}
if jobState.DeploymentCurrent != nil {
for _, dstate := range jobState.DeploymentCurrent.TaskGroups {
expectedCanaries = append(expectedCanaries, dstate.PlacedCanaries...)
}
}
canarySet := all.fromKeys(expectedCanaries)
canariesOnUntaintedNodes, migrate, lost, _, _, _, _ := filterByTainted(canarySet, clusterState)
stopSet = stopSet.union(migrate, lost)
must.Eq(t, len(stopAllocs), len(stopSet))
must.Eq(t, len(canaries), len(canariesOnUntaintedNodes))
})
}
func genAllocReconciler(jobType string, idg *idGenerator) *rapid.Generator[*AllocReconciler] {
return rapid.Custom(func(t *rapid.T) *AllocReconciler {
now := time.Now() // note: you can only use offsets from this
clusterState := genClusterState(idg, now).Draw(t, "cluster_state")
job := genJob(jobType, idg).Draw(t, "job")
reconcilerState := genReconcilerState(idg, job, clusterState).Draw(t, "reconciler_state")
updateFn := rapid.SampledFrom([]AllocUpdateType{
allocUpdateFnDestructive,
allocUpdateFnIgnore,
allocUpdateFnInplace,
}).Draw(t, "update_function")
logger := testlog.HCLogger(t)
ar := NewAllocReconciler(logger,
updateFn,
reconcilerState,
clusterState,
)
return ar
})
}
func genClusterState(idg *idGenerator, now time.Time) *rapid.Generator[ClusterState] {
return rapid.Custom(func(t *rapid.T) ClusterState {
nodes := rapid.SliceOfN(genNode(idg), 0, 5).Draw(t, "nodes")
taintedNodes := helper.SliceToMap[map[string]*structs.Node](
nodes, func(n *structs.Node) string { return n.ID })
clusterState := ClusterState{
return ClusterState{
TaintedNodes: taintedNodes,
SupportsDisconnectedClients: rapid.Bool().Draw(t, "supports_disconnected_clients"),
Now: now,
}
job := genJob(jobType, idg).Draw(t, "job")
})
}
func genReconcilerState(idg *idGenerator, job *structs.Job, clusterState ClusterState) *rapid.Generator[ReconcilerState] {
return rapid.Custom(func(t *rapid.T) ReconcilerState {
oldJob := job.Copy()
oldJob.Version--
oldJob.CreateIndex = 100
currentAllocs := rapid.SliceOfN(
genExistingAllocMaybeTainted(idg, job, taintedNodes, now), 0, 15).Draw(t, "allocs")
genExistingAllocMaybeTainted(idg, job, clusterState.TaintedNodes, clusterState.Now), 0, 15).Draw(t, "allocs")
oldAllocs := rapid.SliceOfN(
genExistingAllocMaybeTainted(idg, oldJob, taintedNodes, now), 0, 15).Draw(t, "old_allocs")
genExistingAllocMaybeTainted(idg, oldJob, clusterState.TaintedNodes, clusterState.Now), 0, 15).Draw(t, "old_allocs")
// tie together a subset of allocations so we can exercise reconnection
previousAllocID := ""
@@ -119,7 +202,7 @@ func genAllocReconciler(jobType string, idg *idGenerator) *rapid.Generator[*Allo
oldDeploy := genDeployment(idg, oldJob, oldAllocs).Draw(t, "old_deploy")
currentDeploy := genDeployment(idg, job, currentAllocs).Draw(t, "current_deploy")
reconcilerState := ReconcilerState{
return ReconcilerState{
Job: job,
JobID: job.ID,
JobIsBatch: job.Type == structs.JobTypeBatch,
@@ -130,21 +213,6 @@ func genAllocReconciler(jobType string, idg *idGenerator) *rapid.Generator[*Allo
ExistingAllocs: allocs,
EvalID: idg.nextID(),
}
updateFn := rapid.SampledFrom([]AllocUpdateType{
allocUpdateFnDestructive,
allocUpdateFnIgnore,
allocUpdateFnInplace,
}).Draw(t, "update_function")
logger := testlog.HCLogger(t)
ar := NewAllocReconciler(logger,
updateFn,
reconcilerState,
clusterState,
)
return ar
})
}