mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
core: account for linux systems with no reservable cores (#19458)
* core: account for linux systems with no reservable cores * cl: add cl * core: remove condition on reservable cores for legacy empty check
This commit is contained in:
3
.changelog/19458.txt
Normal file
3
.changelog/19458.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:bug
|
||||||
|
core: Fixed a bug where linux nodes with no reservable cores would panic the scheduler
|
||||||
|
```
|
||||||
@@ -6,9 +6,7 @@
|
|||||||
package structs
|
package structs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/hashicorp/nomad/client/lib/idset"
|
|
||||||
"github.com/hashicorp/nomad/client/lib/numalib"
|
"github.com/hashicorp/nomad/client/lib/numalib"
|
||||||
"github.com/hashicorp/nomad/client/lib/numalib/hw"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor
|
// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor
|
||||||
@@ -40,41 +38,5 @@ func (n *NodeResources) Compatibility() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
|
func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
|
||||||
coreCount := old.TotalCpuCores
|
return topologyFromLegacyGeneric(old)
|
||||||
|
|
||||||
// interpret per-core frequency given total compute and total core count
|
|
||||||
frequency := hw.MHz(old.CpuShares / (int64(coreCount)))
|
|
||||||
|
|
||||||
// synthesize a set of cores that abstractly matches the legacy cpu specs
|
|
||||||
cores := make([]numalib.Core, 0, coreCount)
|
|
||||||
|
|
||||||
for i := 0; i < int(coreCount); i++ {
|
|
||||||
cores = append(cores, numalib.Core{
|
|
||||||
ID: hw.CoreID(i),
|
|
||||||
SocketID: 0, // no numa support on non-linux
|
|
||||||
NodeID: 0, // no numa support on non-linux
|
|
||||||
Grade: numalib.Performance, // assume P-cores
|
|
||||||
Disable: false, // no reservable cores on non-linux
|
|
||||||
GuessSpeed: frequency,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
withheld := (frequency * hw.MHz(coreCount)) - hw.MHz(old.CpuShares)
|
|
||||||
|
|
||||||
return &numalib.Topology{
|
|
||||||
// legacy: assume one node with id 0
|
|
||||||
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
|
|
||||||
|
|
||||||
// legacy: with one node the distance matrix is 1-D
|
|
||||||
Distances: numalib.SLIT{{10}},
|
|
||||||
|
|
||||||
// legacy: a pseudo representation of each actual core profile
|
|
||||||
Cores: cores,
|
|
||||||
|
|
||||||
// legacy: set since we have the value
|
|
||||||
OverrideTotalCompute: hw.MHz(old.CpuShares),
|
|
||||||
|
|
||||||
// legacy: set since we can compute the value
|
|
||||||
OverrideWitholdCompute: withheld,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
50
nomad/structs/cpucompat_generic.go
Normal file
50
nomad/structs/cpucompat_generic.go
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
package structs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/hashicorp/nomad/client/lib/idset"
|
||||||
|
"github.com/hashicorp/nomad/client/lib/numalib"
|
||||||
|
"github.com/hashicorp/nomad/client/lib/numalib/hw"
|
||||||
|
)
|
||||||
|
|
||||||
|
func topologyFromLegacyGeneric(old LegacyNodeCpuResources) *numalib.Topology {
|
||||||
|
coreCount := old.TotalCpuCores
|
||||||
|
|
||||||
|
// interpret per-core frequency given total compute and total core count
|
||||||
|
frequency := hw.MHz(old.CpuShares / (int64(coreCount)))
|
||||||
|
|
||||||
|
// synthesize a set of cores that abstractly matches the legacy cpu specs
|
||||||
|
cores := make([]numalib.Core, 0, coreCount)
|
||||||
|
|
||||||
|
for i := 0; i < int(coreCount); i++ {
|
||||||
|
cores = append(cores, numalib.Core{
|
||||||
|
ID: hw.CoreID(i),
|
||||||
|
SocketID: 0, // no numa support on non-linux
|
||||||
|
NodeID: 0, // no numa support on non-linux
|
||||||
|
Grade: numalib.Performance, // assume P-cores
|
||||||
|
Disable: false, // no reservable cores on non-linux
|
||||||
|
GuessSpeed: frequency,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
withheld := (frequency * hw.MHz(coreCount)) - hw.MHz(old.CpuShares)
|
||||||
|
|
||||||
|
return &numalib.Topology{
|
||||||
|
// legacy: assume one node with id 0
|
||||||
|
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
|
||||||
|
|
||||||
|
// legacy: with one node the distance matrix is 1-D
|
||||||
|
Distances: numalib.SLIT{{10}},
|
||||||
|
|
||||||
|
// legacy: a pseudo representation of each actual core profile
|
||||||
|
Cores: cores,
|
||||||
|
|
||||||
|
// legacy: set since we have the value
|
||||||
|
OverrideTotalCompute: hw.MHz(old.CpuShares),
|
||||||
|
|
||||||
|
// legacy: set since we can compute the value
|
||||||
|
OverrideWitholdCompute: withheld,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -45,6 +45,13 @@ func (n *NodeResources) Compatibility() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
|
func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
|
||||||
|
if len(old.ReservableCpuCores) == 0 {
|
||||||
|
return topologyFromLegacyGeneric(old)
|
||||||
|
}
|
||||||
|
return topologyFromLegacyLinux(old)
|
||||||
|
}
|
||||||
|
|
||||||
|
func topologyFromLegacyLinux(old LegacyNodeCpuResources) *numalib.Topology {
|
||||||
// interpret per-core frequency given total compute and total core count
|
// interpret per-core frequency given total compute and total core count
|
||||||
frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores))))
|
frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores))))
|
||||||
|
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ type LegacyNodeCpuResources struct {
|
|||||||
// partial struct serialization / copy / merge sadness means this struct can
|
// partial struct serialization / copy / merge sadness means this struct can
|
||||||
// exist with no data, which is a condition we must detect during the upgrade path
|
// exist with no data, which is a condition we must detect during the upgrade path
|
||||||
func (r LegacyNodeCpuResources) empty() bool {
|
func (r LegacyNodeCpuResources) empty() bool {
|
||||||
return r.CpuShares == 0 || r.TotalCpuCores == 0 || len(r.ReservableCpuCores) == 0
|
return r.CpuShares == 0 || r.TotalCpuCores == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// NomadProcessorResources captures the CPU hardware resources of the Nomad node.
|
// NomadProcessorResources captures the CPU hardware resources of the Nomad node.
|
||||||
|
|||||||
Reference in New Issue
Block a user