diff --git a/.changelog/12008.txt b/.changelog/12008.txt new file mode 100644 index 000000000..710fdc465 --- /dev/null +++ b/.changelog/12008.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: Seed node shuffling with the evaluation ID to make the order reproducible +``` diff --git a/scheduler/context_test.go b/scheduler/context_test.go index 75bc7ed36..8187e9cbf 100644 --- a/scheduler/context_test.go +++ b/scheduler/context_test.go @@ -14,6 +14,7 @@ import ( func testContext(t testing.TB) (*state.StateStore, *EvalContext) { state := state.TestStateStore(t) plan := &structs.Plan{ + EvalID: uuid.Generate(), NodeUpdate: make(map[string][]*structs.Allocation), NodeAllocation: make(map[string][]*structs.Allocation), NodePreemptions: make(map[string][]*structs.Allocation), diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 3b10331c5..4bb0a3253 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -122,7 +122,8 @@ func (iter *StaticIterator) SetNodes(nodes []*structs.Node) { // is applied in-place func NewRandomIterator(ctx Context, nodes []*structs.Node) *StaticIterator { // shuffle with the Fisher-Yates algorithm - shuffleNodes(nodes) + idx, _ := ctx.State().LatestIndex() + shuffleNodes(ctx.Plan(), idx, nodes) // Create a static iterator return NewStaticIterator(ctx, nodes) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index b472041e0..11d66a054 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -107,6 +107,9 @@ type State interface { // CSIVolumeByID fetch CSI volumes, containing controller jobs CSIVolumesByNodeID(memdb.WatchSet, string, string) (memdb.ResultIterator, error) + + // LatestIndex returns the greatest index value for all indexes. + LatestIndex() (uint64, error) } // Planner interface is used to submit a task allocation plan. diff --git a/scheduler/stack.go b/scheduler/stack.go index d2b546107..b71916af0 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -70,7 +70,8 @@ type GenericStack struct { func (s *GenericStack) SetNodes(baseNodes []*structs.Node) { // Shuffle base nodes - shuffleNodes(baseNodes) + idx, _ := s.ctx.State().LatestIndex() + shuffleNodes(s.ctx.Plan(), idx, baseNodes) // Update the set of base nodes s.source.SetNodes(baseNodes) diff --git a/scheduler/util.go b/scheduler/util.go index 869442b78..28113afba 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -1,6 +1,7 @@ package scheduler import ( + "encoding/binary" "fmt" "math/rand" "reflect" @@ -376,11 +377,25 @@ func taintedNodes(state State, allocs []*structs.Allocation) (map[string]*struct return out, nil } -// shuffleNodes randomizes the slice order with the Fisher-Yates algorithm -func shuffleNodes(nodes []*structs.Node) { +// shuffleNodes randomizes the slice order with the Fisher-Yates +// algorithm. We seed the random source with the eval ID (which is +// random) to aid in postmortem debugging of specific evaluations and +// state snapshots. +func shuffleNodes(plan *structs.Plan, index uint64, nodes []*structs.Node) { + + // use the last 4 bytes because those are the random bits + // if we have sortable IDs + buf := []byte(plan.EvalID) + seed := binary.BigEndian.Uint64(buf[len(buf)-8:]) + + // for retried plans the index is the plan result's RefreshIndex + // so that we don't retry with the exact same shuffle + seed ^= index + r := rand.New(rand.NewSource(int64(seed >> 2))) + n := len(nodes) for i := n - 1; i > 0; i-- { - j := rand.Intn(i + 1) + j := r.Intn(i + 1) nodes[i], nodes[j] = nodes[j], nodes[i] } } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 1b5ec8d9a..f03114ba7 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -507,8 +507,17 @@ func TestShuffleNodes(t *testing.T) { } orig := make([]*structs.Node, len(nodes)) copy(orig, nodes) - shuffleNodes(nodes) + eval := mock.Eval() // will have random EvalID + plan := eval.MakePlan(mock.Job()) + shuffleNodes(plan, 1000, nodes) require.False(t, reflect.DeepEqual(nodes, orig)) + + nodes2 := make([]*structs.Node, len(nodes)) + copy(nodes2, orig) + shuffleNodes(plan, 1000, nodes2) + + require.True(t, reflect.DeepEqual(nodes, nodes2)) + } func TestTaskUpdatedAffinity(t *testing.T) {