core: account for linux systems with no reservable cores (#19458)

* core: account for linux systems with no reservable cores

* cl: add cl

* core: remove condition on reservable cores for legacy empty check
This commit is contained in:
Seth Hoenig
2023-12-13 13:06:45 -06:00
committed by GitHub
parent 6e4d57b330
commit 7e43317e37
5 changed files with 62 additions and 40 deletions

3
.changelog/19458.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where linux nodes with no reservable cores would panic the scheduler
```

View File

@@ -6,9 +6,7 @@
package structs
import (
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
)
// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor
@@ -40,41 +38,5 @@ func (n *NodeResources) Compatibility() {
}
func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
coreCount := old.TotalCpuCores
// interpret per-core frequency given total compute and total core count
frequency := hw.MHz(old.CpuShares / (int64(coreCount)))
// synthesize a set of cores that abstractly matches the legacy cpu specs
cores := make([]numalib.Core, 0, coreCount)
for i := 0; i < int(coreCount); i++ {
cores = append(cores, numalib.Core{
ID: hw.CoreID(i),
SocketID: 0, // no numa support on non-linux
NodeID: 0, // no numa support on non-linux
Grade: numalib.Performance, // assume P-cores
Disable: false, // no reservable cores on non-linux
GuessSpeed: frequency,
})
}
withheld := (frequency * hw.MHz(coreCount)) - hw.MHz(old.CpuShares)
return &numalib.Topology{
// legacy: assume one node with id 0
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
// legacy: with one node the distance matrix is 1-D
Distances: numalib.SLIT{{10}},
// legacy: a pseudo representation of each actual core profile
Cores: cores,
// legacy: set since we have the value
OverrideTotalCompute: hw.MHz(old.CpuShares),
// legacy: set since we can compute the value
OverrideWitholdCompute: withheld,
}
return topologyFromLegacyGeneric(old)
}

View File

@@ -0,0 +1,50 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package structs
import (
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
)
func topologyFromLegacyGeneric(old LegacyNodeCpuResources) *numalib.Topology {
coreCount := old.TotalCpuCores
// interpret per-core frequency given total compute and total core count
frequency := hw.MHz(old.CpuShares / (int64(coreCount)))
// synthesize a set of cores that abstractly matches the legacy cpu specs
cores := make([]numalib.Core, 0, coreCount)
for i := 0; i < int(coreCount); i++ {
cores = append(cores, numalib.Core{
ID: hw.CoreID(i),
SocketID: 0, // no numa support on non-linux
NodeID: 0, // no numa support on non-linux
Grade: numalib.Performance, // assume P-cores
Disable: false, // no reservable cores on non-linux
GuessSpeed: frequency,
})
}
withheld := (frequency * hw.MHz(coreCount)) - hw.MHz(old.CpuShares)
return &numalib.Topology{
// legacy: assume one node with id 0
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
// legacy: with one node the distance matrix is 1-D
Distances: numalib.SLIT{{10}},
// legacy: a pseudo representation of each actual core profile
Cores: cores,
// legacy: set since we have the value
OverrideTotalCompute: hw.MHz(old.CpuShares),
// legacy: set since we can compute the value
OverrideWitholdCompute: withheld,
}
}

View File

@@ -45,6 +45,13 @@ func (n *NodeResources) Compatibility() {
}
func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
if len(old.ReservableCpuCores) == 0 {
return topologyFromLegacyGeneric(old)
}
return topologyFromLegacyLinux(old)
}
func topologyFromLegacyLinux(old LegacyNodeCpuResources) *numalib.Topology {
// interpret per-core frequency given total compute and total core count
frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores))))

View File

@@ -82,7 +82,7 @@ type LegacyNodeCpuResources struct {
// partial struct serialization / copy / merge sadness means this struct can
// exist with no data, which is a condition we must detect during the upgrade path
func (r LegacyNodeCpuResources) empty() bool {
return r.CpuShares == 0 || r.TotalCpuCores == 0 || len(r.ReservableCpuCores) == 0
return r.CpuShares == 0 || r.TotalCpuCores == 0
}
// NomadProcessorResources captures the CPU hardware resources of the Nomad node.