Files
nomad/nomad/structs/cpucompat_linux_test.go
Tim Gross 7d73065066 numa: fix scheduler panic due to topology serialization bug (#23284)
The NUMA topology struct field `NodeIDs` is a `idset.Set`, which has no public
members. As a result, this field is never serialized via msgpack and persisted
in state. When `numa.affinity = "prefer"`, the scheduler dereferences this nil
field and panics the scheduler worker.

Ideally we would fix this by adding a msgpack serialization extension, but
because the field already exists and is just always empty, this breaks RPC wire
compatibility across upgrades. Instead, create a new field that's populated at
the same time we populate the more useful `idset.Set`, and repopulate the set on
demand.

Fixes: https://hashicorp.atlassian.net/browse/NET-9924
2024-06-11 08:55:00 -04:00

90 lines
2.1 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
//go:build linux
package structs
import (
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
"github.com/shoenig/test/must"
)
func TestNUMA_topologyFromLegacy_plain(t *testing.T) {
ci.Parallel(t)
old := LegacyNodeCpuResources{
CpuShares: 12800,
TotalCpuCores: 4,
ReservableCpuCores: []uint16{
0, 1, 2, 3,
},
}
result := topologyFromLegacy(old)
exp := &numalib.Topology{
Distances: numalib.SLIT{{10}},
Cores: []numalib.Core{
makeLegacyCore(0),
makeLegacyCore(1),
makeLegacyCore(2),
makeLegacyCore(3),
},
OverrideTotalCompute: 12800,
OverrideWitholdCompute: 0,
}
exp.SetNodes(idset.From[hw.NodeID]([]hw.NodeID{0}))
// only compares total compute
must.Equal(t, exp, result)
// check underlying fields
must.Eq(t, exp.GetNodes(), result.GetNodes())
must.Eq(t, exp.Distances, result.Distances)
must.Eq(t, exp.Cores, result.Cores)
must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute)
must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute)
}
func TestNUMA_topologyFromLegacy_reservations(t *testing.T) {
ci.Parallel(t)
old := LegacyNodeCpuResources{
CpuShares: 9600,
TotalCpuCores: 4,
ReservableCpuCores: []uint16{
1, 2, 3, // core 0 excluded
},
}
result := topologyFromLegacy(old)
exp := &numalib.Topology{
Distances: numalib.SLIT{{10}},
Cores: []numalib.Core{
makeLegacyCore(1),
makeLegacyCore(2),
makeLegacyCore(3),
},
OverrideTotalCompute: 9600,
OverrideWitholdCompute: 3200, // core 0 excluded
}
exp.SetNodes(idset.From[hw.NodeID]([]hw.NodeID{0}))
// only compares total compute
must.Equal(t, exp, result)
// check underlying fields
must.Eq(t, exp.GetNodes(), result.GetNodes())
must.Eq(t, exp.Distances, result.Distances)
must.Eq(t, exp.Cores, result.Cores)
must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute)
must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute)
}