diff --git a/.changelog/19383.txt b/.changelog/19383.txt new file mode 100644 index 000000000..4dd87ff5b --- /dev/null +++ b/.changelog/19383.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fix a panic in building CPU topology when inaccurate CPU data is provided +``` diff --git a/client/lib/numalib/detect_linux.go b/client/lib/numalib/detect_linux.go index 7e78427e7..c4c88d1a8 100644 --- a/client/lib/numalib/detect_linux.go +++ b/client/lib/numalib/detect_linux.go @@ -38,6 +38,10 @@ const ( cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list" ) +// pathReaderFn is a path reader function, injected into all value getters to +// ease testing. +type pathReaderFn func(string) ([]byte, error) + // Sysfs implements SystemScanner for Linux by reading system topology data // from /sys/devices/system. This is the best source of truth on Linux and // should always be used first - additional scanners can provide more context @@ -46,27 +50,27 @@ type Sysfs struct{} func (s *Sysfs) ScanSystem(top *Topology) { // detect the online numa nodes - s.discoverOnline(top) + s.discoverOnline(top, os.ReadFile) // detect cross numa node latency costs - s.discoverCosts(top) + s.discoverCosts(top, os.ReadFile) // detect core performance data - s.discoverCores(top) + s.discoverCores(top, os.ReadFile) } func (*Sysfs) available() bool { return true } -func (*Sysfs) discoverOnline(st *Topology) { - ids, err := getIDSet[hw.NodeID](nodeOnline) +func (*Sysfs) discoverOnline(st *Topology, readerFunc pathReaderFn) { + ids, err := getIDSet[hw.NodeID](nodeOnline, readerFunc) if err == nil { st.NodeIDs = ids } } -func (*Sysfs) discoverCosts(st *Topology) { +func (*Sysfs) discoverCosts(st *Topology, readerFunc pathReaderFn) { if st.NodeIDs.Empty() { return } @@ -78,7 +82,7 @@ func (*Sysfs) discoverCosts(st *Topology) { } _ = st.NodeIDs.ForEach(func(id hw.NodeID) error { - s, err := getString(distanceFile, id) + s, err := getString(distanceFile, readerFunc, id) if err != nil { return err } @@ -91,8 +95,8 @@ func (*Sysfs) discoverCosts(st *Topology) { }) } -func (*Sysfs) discoverCores(st *Topology) { - onlineCores, err := getIDSet[hw.CoreID](cpuOnline) +func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) { + onlineCores, err := getIDSet[hw.CoreID](cpuOnline, readerFunc) if err != nil { return } @@ -105,15 +109,15 @@ func (*Sysfs) discoverCores(st *Topology) { st.NodeIDs = idset.From[hw.NodeID]([]hw.NodeID{0}) const node = 0 const socket = 0 - max, _ := getNumeric[hw.KHz](cpuMaxFile, core) - base, _ := getNumeric[hw.KHz](cpuBaseFile, core) - st.insert(node, socket, core, Performance, max, base) + cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, readerFunc, core) + base, _ := getNumeric[hw.KHz](cpuBaseFile, readerFunc, core) + st.insert(node, socket, core, Performance, cpuMax, base) return nil }) default: // We found node data, associate cores to nodes _ = st.NodeIDs.ForEach(func(node hw.NodeID) error { - s, err := os.ReadFile(fmt.Sprintf(cpulistFile, node)) + s, err := readerFunc(fmt.Sprintf(cpulistFile, node)) if err != nil { return err } @@ -121,11 +125,18 @@ func (*Sysfs) discoverCores(st *Topology) { cores := idset.Parse[hw.CoreID](string(s)) _ = cores.ForEach(func(core hw.CoreID) error { // best effort, zero values are defaults - socket, _ := getNumeric[hw.SocketID](cpuSocketFile, core) - max, _ := getNumeric[hw.KHz](cpuMaxFile, core) - base, _ := getNumeric[hw.KHz](cpuBaseFile, core) - siblings, _ := getIDSet[hw.CoreID](cpuSiblingFile, core) - st.insert(node, socket, core, gradeOf(siblings), max, base) + socket, _ := getNumeric[hw.SocketID](cpuSocketFile, readerFunc, core) + cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, readerFunc, core) + base, _ := getNumeric[hw.KHz](cpuBaseFile, readerFunc, core) + siblings, _ := getIDSet[hw.CoreID](cpuSiblingFile, readerFunc, core) + + // if we get an incorrect core number, this means we're not getting the right + // data from SysFS. In this case we bail and set default values. + if int(core) >= len(st.Cores) { + return nil + } + + st.insert(node, socket, core, gradeOf(siblings), cpuMax, base) return nil }) return nil @@ -133,18 +144,18 @@ func (*Sysfs) discoverCores(st *Topology) { } } -func getIDSet[T idset.ID](path string, args ...any) (*idset.Set[T], error) { +func getIDSet[T idset.ID](path string, readerFunc pathReaderFn, args ...any) (*idset.Set[T], error) { path = fmt.Sprintf(path, args...) - s, err := os.ReadFile(path) + s, err := readerFunc(path) if err != nil { return nil, err } return idset.Parse[T](string(s)), nil } -func getNumeric[T int | idset.ID](path string, args ...any) (T, error) { +func getNumeric[T int | idset.ID](path string, readerFunc pathReaderFn, args ...any) (T, error) { path = fmt.Sprintf(path, args...) - s, err := os.ReadFile(path) + s, err := readerFunc(path) if err != nil { return 0, err } @@ -155,9 +166,9 @@ func getNumeric[T int | idset.ID](path string, args ...any) (T, error) { return T(i), nil } -func getString(path string, args ...any) (string, error) { +func getString(path string, readerFunc pathReaderFn, args ...any) (string, error) { path = fmt.Sprintf(path, args...) - s, err := os.ReadFile(path) + s, err := readerFunc(path) if err != nil { return "", err } diff --git a/client/lib/numalib/detect_linux_test.go b/client/lib/numalib/detect_linux_test.go new file mode 100644 index 000000000..93851499f --- /dev/null +++ b/client/lib/numalib/detect_linux_test.go @@ -0,0 +1,205 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build linux + +package numalib + +import ( + "os" + "testing" + + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/shoenig/test/must" +) + +// badSysData are example values from sysfs on unsupported platforms, e.g., +// containers, virtualization guests +func badSysData(path string) ([]byte, error) { + return map[string][]byte{ + "/sys/devices/system/node/online": []byte("0"), + "/sys/devices/system/cpu/online": []byte("1,3"), // cpuOnline data indicates 2 CPU IDs online: 1 and 3 + "/sys/devices/system/node/node0/distance": []byte("10"), + "/sys/devices/system/node/node0/cpulist": []byte("0-3"), // cpuList data indicates 4 CPU cores available on node0 (can't be true) + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu0/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu0/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu0/topology/thread_siblings_list": []byte("0,2"), + "/sys/devices/system/cpu/cpu1/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu1/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu1/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu1/topology/thread_siblings_list": []byte("1,3"), + "/sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu2/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu2/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu2/topology/thread_siblings_list": []byte("0,2"), + "/sys/devices/system/cpu/cpu3/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu3/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu3/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu3/topology/thread_siblings_list": []byte("1,3"), + }[path], nil +} + +func goodSysData(path string) ([]byte, error) { + return map[string][]byte{ + "/sys/devices/system/node/online": []byte("0-1"), + "/sys/devices/system/cpu/online": []byte("0-3"), + "/sys/devices/system/node/node0/distance": []byte("10"), + "/sys/devices/system/node/node0/cpulist": []byte("0-3"), + "/sys/devices/system/node/node1/distance": []byte("10"), + "/sys/devices/system/node/node1/cpulist": []byte("0-3"), + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu0/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu0/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu0/topology/thread_siblings_list": []byte("0,2"), + "/sys/devices/system/cpu/cpu1/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu1/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu1/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu1/topology/thread_siblings_list": []byte("1,3"), + "/sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu2/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu2/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu2/topology/thread_siblings_list": []byte("0,2"), + "/sys/devices/system/cpu/cpu3/cpufreq/cpuinfo_max_freq": []byte("3500000"), + "/sys/devices/system/cpu/cpu3/cpufreq/base_frequency": []byte("2100000"), + "/sys/devices/system/cpu/cpu3/topology/physical_package_id": []byte("0"), + "/sys/devices/system/cpu/cpu3/topology/thread_siblings_list": []byte("1,3"), + }[path], nil +} + +func TestSysfs_discoverOnline(t *testing.T) { + st := NewTopology(&idset.Set[hw.NodeID]{}, SLIT{}, []Core{}) + goodIDSet := idset.From[hw.NodeID]([]uint8{0, 1}) + oneNode := idset.From[hw.NodeID]([]uint8{0}) + + tests := []struct { + name string + readerFunc pathReaderFn + expectedIDSet *idset.Set[hw.NodeID] + }{ + {"lxc values", badSysData, oneNode}, + {"good values", goodSysData, goodIDSet}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sy := &Sysfs{} + sy.discoverOnline(st, tt.readerFunc) + must.Eq(t, tt.expectedIDSet, st.NodeIDs) + }) + } +} + +func TestSysfs_discoverCosts(t *testing.T) { + st := NewTopology(idset.Empty[hw.NodeID](), SLIT{}, []Core{}) + twoNodes := idset.From[hw.NodeID]([]uint8{1, 3}) + + tests := []struct { + name string + nodeIDs *idset.Set[hw.NodeID] + readerFunc pathReaderFn + expectedDistances SLIT + }{ + {"empty node IDs", idset.Empty[hw.NodeID](), os.ReadFile, SLIT{}}, + {"two nodes and bad sys data", twoNodes, badSysData, SLIT{ + []Cost{0, 0}, + []Cost{0, 0}, + }}, + {"two nodes and good sys data", twoNodes, goodSysData, SLIT{ + []Cost{0, 0}, + []Cost{10, 0}, + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sy := &Sysfs{} + st.NodeIDs = tt.nodeIDs + sy.discoverCosts(st, tt.readerFunc) + must.Eq(t, tt.expectedDistances, st.Distances) + }) + } +} + +func TestSysfs_discoverCores(t *testing.T) { + st := NewTopology(idset.Empty[hw.NodeID](), SLIT{}, []Core{}) + oneNode := idset.From[hw.NodeID]([]uint8{0}) + twoNodes := idset.From[hw.NodeID]([]uint8{1, 3}) + + tests := []struct { + name string + nodeIDs *idset.Set[hw.NodeID] + readerFunc pathReaderFn + expectedTopology *Topology + }{ + {"empty core and node IDs", idset.Empty[hw.NodeID](), os.ReadFile, &Topology{}}, + {"empty node IDs", idset.Empty[hw.NodeID](), goodSysData, &Topology{}}, + + // issue#19372 + {"one node and bad sys data", oneNode, badSysData, &Topology{ + NodeIDs: oneNode, + Cores: []Core{ + { + SocketID: 0, + NodeID: 0, + ID: 0, + Grade: Performance, + BaseSpeed: 2100, + MaxSpeed: 3500, + }, + { + SocketID: 0, + NodeID: 0, + ID: 1, + Grade: Performance, + BaseSpeed: 2100, + MaxSpeed: 3500, + }, + }, + }}, + {"two nodes and good sys data", twoNodes, goodSysData, &Topology{ + NodeIDs: twoNodes, + Cores: []Core{ + { + SocketID: 1, + NodeID: 0, + ID: 0, + Grade: Performance, + BaseSpeed: 2100, + MaxSpeed: 3500, + }, + { + SocketID: 1, + NodeID: 0, + ID: 1, + Grade: Performance, + BaseSpeed: 2100, + MaxSpeed: 3500, + }, + { + SocketID: 1, + NodeID: 0, + ID: 2, + Grade: Performance, + BaseSpeed: 2100, + MaxSpeed: 3500, + }, + { + SocketID: 1, + NodeID: 0, + ID: 3, + Grade: Performance, + BaseSpeed: 2100, + MaxSpeed: 3500, + }, + }, + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sy := &Sysfs{} + st.NodeIDs = tt.nodeIDs + sy.discoverCores(st, tt.readerFunc) + must.Eq(t, tt.expectedTopology, st) + }) + } +} diff --git a/client/lib/numalib/topology.go b/client/lib/numalib/topology.go index 1b32fb7da..d7efd7b09 100644 --- a/client/lib/numalib/topology.go +++ b/client/lib/numalib/topology.go @@ -63,6 +63,12 @@ type Topology struct { OverrideWitholdCompute hw.MHz } +// NewTopology is a constructor for the Topology object, only used in tests for +// mocking. +func NewTopology(nodeIDs *idset.Set[hw.NodeID], distances SLIT, cores []Core) *Topology { + return &Topology{NodeIDs: nodeIDs, Distances: distances, Cores: cores} +} + // A Core represents one logical (vCPU) core on a processor. Basically the slice // of cores detected should match up with the vCPU description in cloud providers. type Core struct {