numa: account for incorrect core number on topology.insert (#19383)

Unsupported environments like containers or guests OSs inside LXD can
incorrectly number of available cores thus leading to numalib having trouble
detecting cores and panicking. This code adds tests for linux sysfs detection
methods and fixes the panic.
This commit is contained in:
Piotr Kazmierczak
2023-12-13 17:40:26 +01:00
committed by GitHub
parent d2fc7cc0c4
commit b6dd376100
4 changed files with 249 additions and 24 deletions

3
.changelog/19383.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
client: Fix a panic in building CPU topology when inaccurate CPU data is provided
```

View File

@@ -38,6 +38,10 @@ const (
cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list"
)
// pathReaderFn is a path reader function, injected into all value getters to
// ease testing.
type pathReaderFn func(string) ([]byte, error)
// Sysfs implements SystemScanner for Linux by reading system topology data
// from /sys/devices/system. This is the best source of truth on Linux and
// should always be used first - additional scanners can provide more context
@@ -46,27 +50,27 @@ type Sysfs struct{}
func (s *Sysfs) ScanSystem(top *Topology) {
// detect the online numa nodes
s.discoverOnline(top)
s.discoverOnline(top, os.ReadFile)
// detect cross numa node latency costs
s.discoverCosts(top)
s.discoverCosts(top, os.ReadFile)
// detect core performance data
s.discoverCores(top)
s.discoverCores(top, os.ReadFile)
}
func (*Sysfs) available() bool {
return true
}
func (*Sysfs) discoverOnline(st *Topology) {
ids, err := getIDSet[hw.NodeID](nodeOnline)
func (*Sysfs) discoverOnline(st *Topology, readerFunc pathReaderFn) {
ids, err := getIDSet[hw.NodeID](nodeOnline, readerFunc)
if err == nil {
st.NodeIDs = ids
}
}
func (*Sysfs) discoverCosts(st *Topology) {
func (*Sysfs) discoverCosts(st *Topology, readerFunc pathReaderFn) {
if st.NodeIDs.Empty() {
return
}
@@ -78,7 +82,7 @@ func (*Sysfs) discoverCosts(st *Topology) {
}
_ = st.NodeIDs.ForEach(func(id hw.NodeID) error {
s, err := getString(distanceFile, id)
s, err := getString(distanceFile, readerFunc, id)
if err != nil {
return err
}
@@ -91,8 +95,8 @@ func (*Sysfs) discoverCosts(st *Topology) {
})
}
func (*Sysfs) discoverCores(st *Topology) {
onlineCores, err := getIDSet[hw.CoreID](cpuOnline)
func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
onlineCores, err := getIDSet[hw.CoreID](cpuOnline, readerFunc)
if err != nil {
return
}
@@ -105,15 +109,15 @@ func (*Sysfs) discoverCores(st *Topology) {
st.NodeIDs = idset.From[hw.NodeID]([]hw.NodeID{0})
const node = 0
const socket = 0
max, _ := getNumeric[hw.KHz](cpuMaxFile, core)
base, _ := getNumeric[hw.KHz](cpuBaseFile, core)
st.insert(node, socket, core, Performance, max, base)
cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, readerFunc, core)
base, _ := getNumeric[hw.KHz](cpuBaseFile, readerFunc, core)
st.insert(node, socket, core, Performance, cpuMax, base)
return nil
})
default:
// We found node data, associate cores to nodes
_ = st.NodeIDs.ForEach(func(node hw.NodeID) error {
s, err := os.ReadFile(fmt.Sprintf(cpulistFile, node))
s, err := readerFunc(fmt.Sprintf(cpulistFile, node))
if err != nil {
return err
}
@@ -121,11 +125,18 @@ func (*Sysfs) discoverCores(st *Topology) {
cores := idset.Parse[hw.CoreID](string(s))
_ = cores.ForEach(func(core hw.CoreID) error {
// best effort, zero values are defaults
socket, _ := getNumeric[hw.SocketID](cpuSocketFile, core)
max, _ := getNumeric[hw.KHz](cpuMaxFile, core)
base, _ := getNumeric[hw.KHz](cpuBaseFile, core)
siblings, _ := getIDSet[hw.CoreID](cpuSiblingFile, core)
st.insert(node, socket, core, gradeOf(siblings), max, base)
socket, _ := getNumeric[hw.SocketID](cpuSocketFile, readerFunc, core)
cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, readerFunc, core)
base, _ := getNumeric[hw.KHz](cpuBaseFile, readerFunc, core)
siblings, _ := getIDSet[hw.CoreID](cpuSiblingFile, readerFunc, core)
// if we get an incorrect core number, this means we're not getting the right
// data from SysFS. In this case we bail and set default values.
if int(core) >= len(st.Cores) {
return nil
}
st.insert(node, socket, core, gradeOf(siblings), cpuMax, base)
return nil
})
return nil
@@ -133,18 +144,18 @@ func (*Sysfs) discoverCores(st *Topology) {
}
}
func getIDSet[T idset.ID](path string, args ...any) (*idset.Set[T], error) {
func getIDSet[T idset.ID](path string, readerFunc pathReaderFn, args ...any) (*idset.Set[T], error) {
path = fmt.Sprintf(path, args...)
s, err := os.ReadFile(path)
s, err := readerFunc(path)
if err != nil {
return nil, err
}
return idset.Parse[T](string(s)), nil
}
func getNumeric[T int | idset.ID](path string, args ...any) (T, error) {
func getNumeric[T int | idset.ID](path string, readerFunc pathReaderFn, args ...any) (T, error) {
path = fmt.Sprintf(path, args...)
s, err := os.ReadFile(path)
s, err := readerFunc(path)
if err != nil {
return 0, err
}
@@ -155,9 +166,9 @@ func getNumeric[T int | idset.ID](path string, args ...any) (T, error) {
return T(i), nil
}
func getString(path string, args ...any) (string, error) {
func getString(path string, readerFunc pathReaderFn, args ...any) (string, error) {
path = fmt.Sprintf(path, args...)
s, err := os.ReadFile(path)
s, err := readerFunc(path)
if err != nil {
return "", err
}

View File

@@ -0,0 +1,205 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
//go:build linux
package numalib
import (
"os"
"testing"
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
"github.com/shoenig/test/must"
)
// badSysData are example values from sysfs on unsupported platforms, e.g.,
// containers, virtualization guests
func badSysData(path string) ([]byte, error) {
return map[string][]byte{
"/sys/devices/system/node/online": []byte("0"),
"/sys/devices/system/cpu/online": []byte("1,3"), // cpuOnline data indicates 2 CPU IDs online: 1 and 3
"/sys/devices/system/node/node0/distance": []byte("10"),
"/sys/devices/system/node/node0/cpulist": []byte("0-3"), // cpuList data indicates 4 CPU cores available on node0 (can't be true)
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu0/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu0/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu0/topology/thread_siblings_list": []byte("0,2"),
"/sys/devices/system/cpu/cpu1/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu1/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu1/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu1/topology/thread_siblings_list": []byte("1,3"),
"/sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu2/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu2/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu2/topology/thread_siblings_list": []byte("0,2"),
"/sys/devices/system/cpu/cpu3/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu3/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu3/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu3/topology/thread_siblings_list": []byte("1,3"),
}[path], nil
}
func goodSysData(path string) ([]byte, error) {
return map[string][]byte{
"/sys/devices/system/node/online": []byte("0-1"),
"/sys/devices/system/cpu/online": []byte("0-3"),
"/sys/devices/system/node/node0/distance": []byte("10"),
"/sys/devices/system/node/node0/cpulist": []byte("0-3"),
"/sys/devices/system/node/node1/distance": []byte("10"),
"/sys/devices/system/node/node1/cpulist": []byte("0-3"),
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu0/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu0/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu0/topology/thread_siblings_list": []byte("0,2"),
"/sys/devices/system/cpu/cpu1/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu1/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu1/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu1/topology/thread_siblings_list": []byte("1,3"),
"/sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu2/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu2/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu2/topology/thread_siblings_list": []byte("0,2"),
"/sys/devices/system/cpu/cpu3/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu3/cpufreq/base_frequency": []byte("2100000"),
"/sys/devices/system/cpu/cpu3/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu3/topology/thread_siblings_list": []byte("1,3"),
}[path], nil
}
func TestSysfs_discoverOnline(t *testing.T) {
st := NewTopology(&idset.Set[hw.NodeID]{}, SLIT{}, []Core{})
goodIDSet := idset.From[hw.NodeID]([]uint8{0, 1})
oneNode := idset.From[hw.NodeID]([]uint8{0})
tests := []struct {
name string
readerFunc pathReaderFn
expectedIDSet *idset.Set[hw.NodeID]
}{
{"lxc values", badSysData, oneNode},
{"good values", goodSysData, goodIDSet},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sy := &Sysfs{}
sy.discoverOnline(st, tt.readerFunc)
must.Eq(t, tt.expectedIDSet, st.NodeIDs)
})
}
}
func TestSysfs_discoverCosts(t *testing.T) {
st := NewTopology(idset.Empty[hw.NodeID](), SLIT{}, []Core{})
twoNodes := idset.From[hw.NodeID]([]uint8{1, 3})
tests := []struct {
name string
nodeIDs *idset.Set[hw.NodeID]
readerFunc pathReaderFn
expectedDistances SLIT
}{
{"empty node IDs", idset.Empty[hw.NodeID](), os.ReadFile, SLIT{}},
{"two nodes and bad sys data", twoNodes, badSysData, SLIT{
[]Cost{0, 0},
[]Cost{0, 0},
}},
{"two nodes and good sys data", twoNodes, goodSysData, SLIT{
[]Cost{0, 0},
[]Cost{10, 0},
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sy := &Sysfs{}
st.NodeIDs = tt.nodeIDs
sy.discoverCosts(st, tt.readerFunc)
must.Eq(t, tt.expectedDistances, st.Distances)
})
}
}
func TestSysfs_discoverCores(t *testing.T) {
st := NewTopology(idset.Empty[hw.NodeID](), SLIT{}, []Core{})
oneNode := idset.From[hw.NodeID]([]uint8{0})
twoNodes := idset.From[hw.NodeID]([]uint8{1, 3})
tests := []struct {
name string
nodeIDs *idset.Set[hw.NodeID]
readerFunc pathReaderFn
expectedTopology *Topology
}{
{"empty core and node IDs", idset.Empty[hw.NodeID](), os.ReadFile, &Topology{}},
{"empty node IDs", idset.Empty[hw.NodeID](), goodSysData, &Topology{}},
// issue#19372
{"one node and bad sys data", oneNode, badSysData, &Topology{
NodeIDs: oneNode,
Cores: []Core{
{
SocketID: 0,
NodeID: 0,
ID: 0,
Grade: Performance,
BaseSpeed: 2100,
MaxSpeed: 3500,
},
{
SocketID: 0,
NodeID: 0,
ID: 1,
Grade: Performance,
BaseSpeed: 2100,
MaxSpeed: 3500,
},
},
}},
{"two nodes and good sys data", twoNodes, goodSysData, &Topology{
NodeIDs: twoNodes,
Cores: []Core{
{
SocketID: 1,
NodeID: 0,
ID: 0,
Grade: Performance,
BaseSpeed: 2100,
MaxSpeed: 3500,
},
{
SocketID: 1,
NodeID: 0,
ID: 1,
Grade: Performance,
BaseSpeed: 2100,
MaxSpeed: 3500,
},
{
SocketID: 1,
NodeID: 0,
ID: 2,
Grade: Performance,
BaseSpeed: 2100,
MaxSpeed: 3500,
},
{
SocketID: 1,
NodeID: 0,
ID: 3,
Grade: Performance,
BaseSpeed: 2100,
MaxSpeed: 3500,
},
},
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sy := &Sysfs{}
st.NodeIDs = tt.nodeIDs
sy.discoverCores(st, tt.readerFunc)
must.Eq(t, tt.expectedTopology, st)
})
}
}

View File

@@ -63,6 +63,12 @@ type Topology struct {
OverrideWitholdCompute hw.MHz
}
// NewTopology is a constructor for the Topology object, only used in tests for
// mocking.
func NewTopology(nodeIDs *idset.Set[hw.NodeID], distances SLIT, cores []Core) *Topology {
return &Topology{NodeIDs: nodeIDs, Distances: distances, Cores: cores}
}
// A Core represents one logical (vCPU) core on a processor. Basically the slice
// of cores detected should match up with the vCPU description in cloud providers.
type Core struct {