mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
client: fixed a bug where AMD CPUs were not correctly fingerprinting base speed (#24415)
Relates to: #19468
This commit is contained in:
3
.changelog/24415.txt
Normal file
3
.changelog/24415.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
client: fixed a bug where AMD CPUs were not correctly fingerprinting base speed
|
||||
```
|
||||
@@ -30,16 +30,18 @@ func PlatformScanners() []SystemScanner {
|
||||
}
|
||||
|
||||
const (
|
||||
sysRoot = "/sys/devices/system"
|
||||
nodeOnline = sysRoot + "/node/online"
|
||||
cpuOnline = sysRoot + "/cpu/online"
|
||||
distanceFile = sysRoot + "/node/node%d/distance"
|
||||
cpulistFile = sysRoot + "/node/node%d/cpulist"
|
||||
cpuMaxFile = sysRoot + "/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
|
||||
cpuBaseFile = sysRoot + "/cpu/cpu%d/cpufreq/base_frequency"
|
||||
cpuSocketFile = sysRoot + "/cpu/cpu%d/topology/physical_package_id"
|
||||
cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list"
|
||||
deviceFiles = "/sys/bus/pci/devices"
|
||||
sysRoot = "/sys/devices/system"
|
||||
nodeOnline = sysRoot + "/node/online"
|
||||
cpuOnline = sysRoot + "/cpu/online"
|
||||
distanceFile = sysRoot + "/node/node%d/distance"
|
||||
cpulistFile = sysRoot + "/node/node%d/cpulist"
|
||||
cpuDriverFile = sysRoot + "/cpu/cpu%d/cpufreq/scaling_driver"
|
||||
cpuMaxFile = sysRoot + "/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
|
||||
cpuCpccNominalFile = sysRoot + "/cpu/cpu%d/acpi_cppc/nominal_freq"
|
||||
cpuIntelBaseFile = sysRoot + "/cpu/cpu%d/cpufreq/base_frequency"
|
||||
cpuSocketFile = sysRoot + "/cpu/cpu%d/topology/physical_package_id"
|
||||
cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list"
|
||||
deviceFiles = "/sys/bus/pci/devices"
|
||||
)
|
||||
|
||||
// pathReaderFn is a path reader function, injected into all value getters to
|
||||
@@ -131,8 +133,8 @@ func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
|
||||
st.nodeIDs = idset.From[hw.NodeID]([]hw.NodeID{0})
|
||||
const node = 0
|
||||
const socket = 0
|
||||
cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, 64, readerFunc, core)
|
||||
base, _ := getNumeric[hw.KHz](cpuBaseFile, 64, readerFunc, core)
|
||||
|
||||
base, cpuMax := discoverCoreSpeeds(core, readerFunc)
|
||||
st.insert(node, socket, core, Performance, cpuMax, base)
|
||||
st.Nodes = st.nodeIDs.Slice()
|
||||
return nil
|
||||
@@ -149,9 +151,8 @@ func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
|
||||
_ = cores.ForEach(func(core hw.CoreID) error {
|
||||
// best effort, zero values are defaults
|
||||
socket, _ := getNumeric[hw.SocketID](cpuSocketFile, 8, readerFunc, core)
|
||||
cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, 64, readerFunc, core)
|
||||
base, _ := getNumeric[hw.KHz](cpuBaseFile, 64, readerFunc, core)
|
||||
siblings, _ := getIDSet[hw.CoreID](cpuSiblingFile, readerFunc, core)
|
||||
base, cpuMax := discoverCoreSpeeds(core, readerFunc)
|
||||
|
||||
// if we get an incorrect core number, this means we're not getting the right
|
||||
// data from SysFS. In this case we bail and set default values.
|
||||
@@ -167,6 +168,28 @@ func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
|
||||
}
|
||||
}
|
||||
|
||||
func discoverCoreSpeeds(core hw.CoreID, readerFunc pathReaderFn) (hw.KHz, hw.KHz) {
|
||||
baseSpeed := hw.KHz(0)
|
||||
maxSpeed := hw.KHz(0)
|
||||
|
||||
driver, _ := getString(cpuDriverFile, readerFunc, core)
|
||||
|
||||
switch driver {
|
||||
case "acpi-cpufreq":
|
||||
// Indicates the highest sustained performance level of the processor
|
||||
baseSpeedMHz, _ := getNumeric[hw.MHz](cpuCpccNominalFile, 64, readerFunc, core)
|
||||
baseSpeed = baseSpeedMHz.KHz()
|
||||
default:
|
||||
// COMPAT(1.9.x): while the `base_frequency` file is specific to the `intel_pstate` scaling driver, we should
|
||||
// preserve the default while we may uncover more scaling driver specific implementations.
|
||||
baseSpeed, _ = getNumeric[hw.KHz](cpuIntelBaseFile, 64, readerFunc, core)
|
||||
}
|
||||
|
||||
maxSpeed, _ = getNumeric[hw.KHz](cpuMaxFile, 64, readerFunc, core)
|
||||
|
||||
return baseSpeed, maxSpeed
|
||||
}
|
||||
|
||||
func getIDSet[T idset.ID](path string, readerFunc pathReaderFn, args ...any) (*idset.Set[T], error) {
|
||||
path = fmt.Sprintf(path, args...)
|
||||
s, err := readerFunc(path)
|
||||
|
||||
@@ -68,6 +68,37 @@ func goodSysData(path string) ([]byte, error) {
|
||||
}[path], nil
|
||||
}
|
||||
|
||||
func goodSysDataAMD(path string) ([]byte, error) {
|
||||
return map[string][]byte{
|
||||
"/sys/devices/system/node/online": []byte("0-1"),
|
||||
"/sys/devices/system/cpu/online": []byte("0-3"),
|
||||
"/sys/devices/system/node/node0/distance": []byte("10"),
|
||||
"/sys/devices/system/node/node0/cpulist": []byte("0-3"),
|
||||
"/sys/devices/system/node/node1/distance": []byte("10"),
|
||||
"/sys/devices/system/node/node1/cpulist": []byte("0-3"),
|
||||
"/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq": []byte("2450"),
|
||||
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq": []byte("3500000"),
|
||||
"/sys/devices/system/cpu/cpu0/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
|
||||
"/sys/devices/system/cpu/cpu0/topology/physical_package_id": []byte("0"),
|
||||
"/sys/devices/system/cpu/cpu0/topology/thread_siblings_list": []byte("0,2"),
|
||||
"/sys/devices/system/cpu/cpu1/acpi_cppc/nominal_freq": []byte("2450"),
|
||||
"/sys/devices/system/cpu/cpu1/cpufreq/cpuinfo_max_freq": []byte("3500000"),
|
||||
"/sys/devices/system/cpu/cpu1/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
|
||||
"/sys/devices/system/cpu/cpu1/topology/physical_package_id": []byte("0"),
|
||||
"/sys/devices/system/cpu/cpu1/topology/thread_siblings_list": []byte("1,3"),
|
||||
"/sys/devices/system/cpu/cpu2/acpi_cppc/nominal_freq": []byte("2450"),
|
||||
"/sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_max_freq": []byte("3500000"),
|
||||
"/sys/devices/system/cpu/cpu2/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
|
||||
"/sys/devices/system/cpu/cpu2/topology/physical_package_id": []byte("0"),
|
||||
"/sys/devices/system/cpu/cpu2/topology/thread_siblings_list": []byte("0,2"),
|
||||
"/sys/devices/system/cpu/cpu3/acpi_cppc/nominal_freq": []byte("2450"),
|
||||
"/sys/devices/system/cpu/cpu3/cpufreq/cpuinfo_max_freq": []byte("3500000"),
|
||||
"/sys/devices/system/cpu/cpu3/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
|
||||
"/sys/devices/system/cpu/cpu3/topology/physical_package_id": []byte("0"),
|
||||
"/sys/devices/system/cpu/cpu3/topology/thread_siblings_list": []byte("1,3"),
|
||||
}[path], nil
|
||||
}
|
||||
|
||||
func TestSysfs_discoverOnline(t *testing.T) {
|
||||
st := MockTopology(&idset.Set[hw.NodeID]{}, SLIT{}, []Core{})
|
||||
goodIDSet := idset.From[hw.NodeID]([]uint8{0, 1})
|
||||
@@ -195,6 +226,44 @@ func TestSysfs_discoverCores(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}},
|
||||
{"two nodes and good sys AMD data", twoNodes, goodSysDataAMD, &Topology{
|
||||
nodeIDs: twoNodes,
|
||||
Nodes: twoNodes.Slice(),
|
||||
Cores: []Core{
|
||||
{
|
||||
SocketID: 1,
|
||||
NodeID: 0,
|
||||
ID: 0,
|
||||
Grade: Performance,
|
||||
BaseSpeed: 2450,
|
||||
MaxSpeed: 3500,
|
||||
},
|
||||
{
|
||||
SocketID: 1,
|
||||
NodeID: 0,
|
||||
ID: 1,
|
||||
Grade: Performance,
|
||||
BaseSpeed: 2450,
|
||||
MaxSpeed: 3500,
|
||||
},
|
||||
{
|
||||
SocketID: 1,
|
||||
NodeID: 0,
|
||||
ID: 2,
|
||||
Grade: Performance,
|
||||
BaseSpeed: 2450,
|
||||
MaxSpeed: 3500,
|
||||
},
|
||||
{
|
||||
SocketID: 1,
|
||||
NodeID: 0,
|
||||
ID: 3,
|
||||
Grade: Performance,
|
||||
BaseSpeed: 2450,
|
||||
MaxSpeed: 3500,
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
@@ -16,6 +16,10 @@ func (khz KHz) MHz() MHz {
|
||||
return MHz(khz / 1000)
|
||||
}
|
||||
|
||||
func (mhz MHz) KHz() KHz {
|
||||
return KHz(mhz * 1000)
|
||||
}
|
||||
|
||||
func (khz KHz) String() string {
|
||||
return strconv.FormatUint(uint64(khz.MHz()), 10)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user