Files
nomad/client/lib/numalib/topology.go

328 lines
8.5 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
// Package numalib provides information regarding the system NUMA, CPU, and
// device topology of the system.
//
// https://docs.kernel.org/6.2/x86/topology.html
package numalib
import (
"fmt"
"runtime"
"strings"
"github.com/hashicorp/nomad/client/lib/cpustats"
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
)
// CoreGrade describes whether a specific core is a performance or efficiency
// core type. A performance core generally has a higher clockspeed and can do
// more than an efficiency core.
type CoreGrade bool
const (
Performance CoreGrade = true
Efficiency CoreGrade = false
)
func gradeOf(siblings *idset.Set[hw.CoreID]) CoreGrade {
switch siblings.Size() {
case 0, 1:
return Efficiency
default:
return Performance
}
}
func (g CoreGrade) String() string {
switch g {
case Performance:
return "performance"
default:
return "efficiency"
}
}
type (
Cost uint8
)
// A Topology provides a bird-eye view of the system NUMA topology.
//
// The JSON encoding is not used yet but my be part of the gRPC plumbing
// in the future.
type Topology struct {
// COMPAT: idset.Set wasn't being serialized correctly but we can't change
// the encoding of a field once its shipped. Nodes is the wire
// representation
nodeIDs *idset.Set[hw.NodeID]
Nodes []uint8
Distances SLIT
Cores []Core
// BusAssociativity maps the specific bus each PCI device is plugged into
// with its hardware associated numa node
//
// e.g. "0000:03:00.0" -> 1
//
// Note that the key may not exactly match the Locality.PciBusID from the
// fingerprint of the device with regard to the domain value.
//
//
// 0000:03:00.0
// ^ ^ ^ ^
// | | | |-- function (identifies functionality of device)
// | | |-- device (identifies the device number on the bus)
// | |
// | |-- bus (identifies which bus segment the device is connected to)
// |
// |-- domain (basically always 0, may be 0000 or 00000000)
BusAssociativity map[string]hw.NodeID
// explicit overrides from client configuration
OverrideTotalCompute hw.MHz
OverrideWitholdCompute hw.MHz
}
func (st *Topology) SetNodes(nodes *idset.Set[hw.NodeID]) {
st.nodeIDs = nodes
if !nodes.Empty() {
st.Nodes = nodes.Slice()
} else {
st.Nodes = []uint8{}
}
}
func (st *Topology) SetNodesFrom(nodes []uint8) {
st.nodeIDs = idset.From[hw.NodeID](nodes)
st.Nodes = nodes
}
// A Core represents one logical (vCPU) core on a processor. Basically the slice
// of cores detected should match up with the vCPU description in cloud providers.
type Core struct {
SocketID hw.SocketID
NodeID hw.NodeID
ID hw.CoreID
Grade CoreGrade
Disable bool // indicates whether Nomad must not use this core
BaseSpeed hw.MHz // cpuinfo_base_freq (primary choice)
MaxSpeed hw.MHz // cpuinfo_max_freq (second choice)
GuessSpeed hw.MHz // best effort (fallback)
}
func (c Core) String() string {
return fmt.Sprintf(
"(%d %d %d %s %d %d)",
c.NodeID, c.SocketID, c.ID, c.Grade, c.MaxSpeed, c.BaseSpeed,
)
}
func (c Core) MHz() hw.MHz {
switch {
case c.BaseSpeed > 0:
return c.BaseSpeed
case c.MaxSpeed > 0:
return c.MaxSpeed
}
return c.GuessSpeed
}
// SLIT (system locality information table) describes the relative cost for
// accessing memory across each combination of NUMA node boundary.
type SLIT [][]Cost
func (d SLIT) cost(a, b hw.NodeID) Cost {
return d[a][b]
}
func (st *Topology) NodeDistance(node hw.NodeID, core Core) Cost {
// todo(shoenig) we should memoize these values - they never change but
// they get used a lot in numa scheduling
// fast path, core is on node
if core.NodeID == node {
// return the distance to itself (100%)
return st.Distances.cost(node, node)
}
// find a core on node to compare with
for _, target := range st.Cores {
if target.NodeID == node {
return st.Distances.cost(target.NodeID, core.NodeID)
}
}
// should not be possible
panic("topology: no node distance")
}
// SupportsNUMA returns whether Nomad supports NUMA detection on the client's
// operating system. Currently only supported on Linux.
func (st *Topology) SupportsNUMA() bool {
switch runtime.GOOS {
case "linux":
return true
default:
return false
}
}
// GetNodes returns the set of NUMA Node IDs.
func (st *Topology) GetNodes() *idset.Set[hw.NodeID] {
if st.nodeIDs.Empty() {
st.nodeIDs = idset.From[hw.NodeID](st.Nodes)
}
return st.nodeIDs
}
// NodeCores returns the set of Core IDs for the given NUMA Node ID.
func (st *Topology) NodeCores(node hw.NodeID) *idset.Set[hw.CoreID] {
result := idset.Empty[hw.CoreID]()
for _, cpu := range st.Cores {
if cpu.NodeID == node {
result.Insert(cpu.ID)
}
}
return result
}
func (st *Topology) insert(node hw.NodeID, socket hw.SocketID, core hw.CoreID, grade CoreGrade, max, base hw.KHz) {
st.Cores[core] = Core{
NodeID: node,
SocketID: socket,
ID: core,
Grade: grade,
MaxSpeed: max.MHz(),
BaseSpeed: base.MHz(),
}
}
func (st *Topology) String() string {
var sb strings.Builder
for _, cpu := range st.Cores {
sb.WriteString(cpu.String())
}
return sb.String()
}
// TotalCompute returns the amount of compute in MHz the detected hardware is
// ultimately capable of delivering. The UsableCompute will be equal to or
// less than this value.
//
// If the client configuration includes an override for total compute, that
// value is used instead even if it violates the above invariant.
func (st *Topology) TotalCompute() hw.MHz {
if st.OverrideTotalCompute > 0 {
// TODO(shoenig) Starting in Nomad 1.7 we should warn about setting
// cpu_total_compute override, and suggeset users who think they still
// need this to file a bug so we can understand what is not detectable.
return st.OverrideTotalCompute
}
var total hw.MHz
for _, cpu := range st.Cores {
total += cpu.MHz()
}
return total
}
// UsableCompute returns the amount of compute in MHz the Nomad client is able
// to make use of for running tasks. This value will be less than or equal to
// the TotalCompute of the system. Nomad must subtract off any reserved compute
// (reserved.cpu or reserved.cores) from the total hardware compute.
func (st *Topology) UsableCompute() hw.MHz {
if st.OverrideTotalCompute > 0 {
// TODO(shoenig) Starting in Nomad 1.7 we should warn about setting
// cpu_total_compute override, and suggeset users who think they still
// need this to file a bug so we can understand what is not detectable.
return st.OverrideTotalCompute
}
var total hw.MHz
for _, cpu := range st.Cores {
// only use cores allowable by config
if !cpu.Disable {
total += cpu.MHz()
}
}
// only use compute allowable by config
return total - st.OverrideWitholdCompute
}
// NumCores returns the number of logical cores detected. This includes both
// power and efficiency cores.
func (st *Topology) NumCores() int {
return len(st.Cores)
}
// NumPCores returns the number of logical performance cores detected.
func (st *Topology) NumPCores() int {
var total int
for _, cpu := range st.Cores {
if cpu.Grade == Performance {
total++
}
}
return total
}
// NumECores returns the number of logical efficiency cores detected.
func (st *Topology) NumECores() int {
var total int
for _, cpu := range st.Cores {
if cpu.Grade == Efficiency {
total++
}
}
return total
}
// UsableCores returns the number of logical cores usable by the Nomad client
// for running tasks. Nomad must subtract off any reserved cores (reserved.cores)
// and/or must mask the cpuset to the one set in config (config.reservable_cores).
func (st *Topology) UsableCores() *idset.Set[hw.CoreID] {
result := idset.Empty[hw.CoreID]()
for _, cpu := range st.Cores {
if !cpu.Disable {
result.Insert(cpu.ID)
}
}
return result
}
// CoreSpeeds returns the frequency in MHz of the performance and efficiency
// core types. If the CPU does not have effiency cores that value will be zero.
func (st *Topology) CoreSpeeds() (hw.MHz, hw.MHz) {
var pCore, eCore hw.MHz
for _, cpu := range st.Cores {
switch cpu.Grade {
case Performance:
pCore = cpu.MHz()
case Efficiency:
eCore = cpu.MHz()
}
}
return pCore, eCore
}
func (st *Topology) Compute() cpustats.Compute {
return cpustats.Compute{
TotalCompute: st.TotalCompute(),
NumCores: st.NumCores(),
}
}
func (st *Topology) Equal(o *Topology) bool {
if st == nil || o == nil {
return st == o
}
// simply iterates each core; the topology never changes for a node once
// it has been created at agent startup
return st.TotalCompute() == o.TotalCompute()
}