scheduler: property testing of reconcile reconnecting (#26180)

To help break down the larger property tests we're doing in #26167 and #26172
into more manageable chunks, pull out a property test for just the
`reconcileReconnecting` method. This method helpfully already defines its
important properties, so we can implement those as test assertions.

Ref: https://hashicorp.atlassian.net/browse/NMD-814
Ref: https://github.com/hashicorp/nomad/pull/26167
Ref: https://github.com/hashicorp/nomad/pull/26172
This commit is contained in:
Tim Gross
2025-07-07 09:40:49 -04:00
committed by GitHub
parent d4ab277154
commit c043d1c850
2 changed files with 93 additions and 1 deletions

View File

@@ -1221,7 +1221,7 @@ func (a *AllocReconciler) computeStop(group *structs.TaskGroup, nameIndex *Alloc
// reconcileReconnecting receives the set of allocations that are reconnecting
// and all other allocations for the same group and determines which ones to
// reconnect which ones or stop.
// reconnect, which ones to stop, and the stop results for the latter.
//
// - Every reconnecting allocation MUST be present in one, and only one, of
// the returned set.

View File

@@ -15,6 +15,7 @@ import (
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"pgregory.net/rapid"
)
@@ -397,3 +398,94 @@ func (idg *idGenerator) nextID() string {
buf[8:10],
buf[10:16])
}
func TestAllocReconciler_ReconnectingProps(t *testing.T) {
rapid.Check(t, func(t *rapid.T) {
now := time.Now()
idg := &idGenerator{}
job := genJob(structs.JobTypeBatch, idg).Draw(t, "job")
tg := job.TaskGroups[0]
tg.Disconnect.Reconcile = rapid.SampledFrom([]string{
"", structs.ReconcileOptionBestScore, structs.ReconcileOptionLongestRunning,
structs.ReconcileOptionKeepOriginal, structs.ReconcileOptionKeepReplacement},
).Draw(t, "strategy")
tg.Tasks = []*structs.Task{{Name: "task"}}
reconnecting, all := allocSet{}, allocSet{}
reconnectingAllocs := rapid.SliceOfN(
genExistingAlloc(idg, job, idg.nextID(), now), 1, 10).Draw(t, "allocs")
for _, alloc := range reconnectingAllocs {
numRestarts := rapid.IntRange(0, 2).Draw(t, "")
startTime := now.Add(-time.Minute * time.Duration(rapid.IntRange(2, 5).Draw(t, "")))
lastRestart := startTime.Add(time.Minute)
alloc.TaskStates = map[string]*structs.TaskState{"task": {
Restarts: uint64(numRestarts),
LastRestart: lastRestart,
StartedAt: startTime,
}}
reconnecting[alloc.ID] = alloc
}
allAllocs := rapid.SliceOfN(
genExistingAlloc(idg, job, idg.nextID(), now), 0, 10).Draw(t, "allocs")
for i, alloc := range allAllocs {
numRestarts := rapid.IntRange(0, 2).Draw(t, "")
startTime := now.Add(-time.Minute * time.Duration(rapid.IntRange(2, 5).Draw(t, "")))
lastRestart := startTime.Add(time.Minute)
alloc.TaskStates = map[string]*structs.TaskState{"task": {
Restarts: uint64(numRestarts),
LastRestart: lastRestart,
StartedAt: startTime,
}}
// wire up the next/previous relationship for a subset
if i%2 == 0 && len(reconnecting) > i {
alloc.PreviousAllocation = reconnectingAllocs[i].ID
reconnecting[alloc.PreviousAllocation].NextAllocation = alloc.ID
}
all[alloc.ID] = alloc
}
logger := testlog.HCLogger(t)
ar := NewAllocReconciler(logger,
allocUpdateFnInplace, // not relevant to function
ReconcilerState{Job: job},
ClusterState{Now: now},
)
keep, stop, stopResults := ar.reconcileReconnecting(reconnecting, all, tg)
for reconnectedID := range reconnecting {
_, isKeep := keep[reconnectedID]
_, isStop := stop[reconnectedID]
if isKeep && isStop {
t.Fatal("reconnecting alloc should not be both kept and stopped")
}
if !(isKeep || isStop) {
t.Fatal("reconnecting alloc must be either kept or stopped")
}
}
for keepID := range keep {
if _, ok := reconnecting[keepID]; !ok {
t.Fatal("only reconnecting allocations are allowed to be present in the returned reconnect set.")
}
}
for stopID := range stop {
if alloc, ok := reconnecting[stopID]; ok {
nextID := alloc.NextAllocation
_, nextIsKeep := keep[nextID]
_, nextIsStop := stop[nextID]
if nextIsKeep || nextIsStop {
t.Fatal("replacements should not be in either set")
}
}
}
must.Eq(t, len(stop), len(stopResults),
must.Sprint("every stop should have a stop result"))
})
}