mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
328 lines
8.5 KiB
Go
328 lines
8.5 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
// Package numalib provides information regarding the system NUMA, CPU, and
|
|
// device topology of the system.
|
|
//
|
|
// https://docs.kernel.org/6.2/x86/topology.html
|
|
package numalib
|
|
|
|
import (
|
|
"fmt"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"github.com/hashicorp/nomad/client/lib/cpustats"
|
|
"github.com/hashicorp/nomad/client/lib/idset"
|
|
"github.com/hashicorp/nomad/client/lib/numalib/hw"
|
|
)
|
|
|
|
// CoreGrade describes whether a specific core is a performance or efficiency
|
|
// core type. A performance core generally has a higher clockspeed and can do
|
|
// more than an efficiency core.
|
|
type CoreGrade bool
|
|
|
|
const (
|
|
Performance CoreGrade = true
|
|
Efficiency CoreGrade = false
|
|
)
|
|
|
|
func gradeOf(siblings *idset.Set[hw.CoreID]) CoreGrade {
|
|
switch siblings.Size() {
|
|
case 0, 1:
|
|
return Efficiency
|
|
default:
|
|
return Performance
|
|
}
|
|
}
|
|
|
|
func (g CoreGrade) String() string {
|
|
switch g {
|
|
case Performance:
|
|
return "performance"
|
|
default:
|
|
return "efficiency"
|
|
}
|
|
}
|
|
|
|
type (
|
|
Cost uint8
|
|
)
|
|
|
|
// A Topology provides a bird-eye view of the system NUMA topology.
|
|
//
|
|
// The JSON encoding is not used yet but my be part of the gRPC plumbing
|
|
// in the future.
|
|
type Topology struct {
|
|
// COMPAT: idset.Set wasn't being serialized correctly but we can't change
|
|
// the encoding of a field once its shipped. Nodes is the wire
|
|
// representation
|
|
nodeIDs *idset.Set[hw.NodeID]
|
|
Nodes []uint8
|
|
|
|
Distances SLIT
|
|
Cores []Core
|
|
|
|
// BusAssociativity maps the specific bus each PCI device is plugged into
|
|
// with its hardware associated numa node
|
|
//
|
|
// e.g. "0000:03:00.0" -> 1
|
|
//
|
|
// Note that the key may not exactly match the Locality.PciBusID from the
|
|
// fingerprint of the device with regard to the domain value.
|
|
//
|
|
//
|
|
// 0000:03:00.0
|
|
// ^ ^ ^ ^
|
|
// | | | |-- function (identifies functionality of device)
|
|
// | | |-- device (identifies the device number on the bus)
|
|
// | |
|
|
// | |-- bus (identifies which bus segment the device is connected to)
|
|
// |
|
|
// |-- domain (basically always 0, may be 0000 or 00000000)
|
|
BusAssociativity map[string]hw.NodeID
|
|
|
|
// explicit overrides from client configuration
|
|
OverrideTotalCompute hw.MHz
|
|
OverrideWitholdCompute hw.MHz
|
|
}
|
|
|
|
func (st *Topology) SetNodes(nodes *idset.Set[hw.NodeID]) {
|
|
st.nodeIDs = nodes
|
|
if !nodes.Empty() {
|
|
st.Nodes = nodes.Slice()
|
|
} else {
|
|
st.Nodes = []uint8{}
|
|
}
|
|
}
|
|
|
|
func (st *Topology) SetNodesFrom(nodes []uint8) {
|
|
st.nodeIDs = idset.From[hw.NodeID](nodes)
|
|
st.Nodes = nodes
|
|
}
|
|
|
|
// A Core represents one logical (vCPU) core on a processor. Basically the slice
|
|
// of cores detected should match up with the vCPU description in cloud providers.
|
|
type Core struct {
|
|
SocketID hw.SocketID
|
|
NodeID hw.NodeID
|
|
ID hw.CoreID
|
|
Grade CoreGrade
|
|
Disable bool // indicates whether Nomad must not use this core
|
|
BaseSpeed hw.MHz // cpuinfo_base_freq (primary choice)
|
|
MaxSpeed hw.MHz // cpuinfo_max_freq (second choice)
|
|
GuessSpeed hw.MHz // best effort (fallback)
|
|
}
|
|
|
|
func (c Core) String() string {
|
|
return fmt.Sprintf(
|
|
"(%d %d %d %s %d %d)",
|
|
c.NodeID, c.SocketID, c.ID, c.Grade, c.MaxSpeed, c.BaseSpeed,
|
|
)
|
|
}
|
|
|
|
func (c Core) MHz() hw.MHz {
|
|
switch {
|
|
case c.BaseSpeed > 0:
|
|
return c.BaseSpeed
|
|
case c.MaxSpeed > 0:
|
|
return c.MaxSpeed
|
|
}
|
|
return c.GuessSpeed
|
|
}
|
|
|
|
// SLIT (system locality information table) describes the relative cost for
|
|
// accessing memory across each combination of NUMA node boundary.
|
|
type SLIT [][]Cost
|
|
|
|
func (d SLIT) cost(a, b hw.NodeID) Cost {
|
|
return d[a][b]
|
|
}
|
|
|
|
func (st *Topology) NodeDistance(node hw.NodeID, core Core) Cost {
|
|
// todo(shoenig) we should memoize these values - they never change but
|
|
// they get used a lot in numa scheduling
|
|
|
|
// fast path, core is on node
|
|
if core.NodeID == node {
|
|
// return the distance to itself (100%)
|
|
return st.Distances.cost(node, node)
|
|
}
|
|
|
|
// find a core on node to compare with
|
|
for _, target := range st.Cores {
|
|
if target.NodeID == node {
|
|
return st.Distances.cost(target.NodeID, core.NodeID)
|
|
}
|
|
}
|
|
|
|
// should not be possible
|
|
panic("topology: no node distance")
|
|
}
|
|
|
|
// SupportsNUMA returns whether Nomad supports NUMA detection on the client's
|
|
// operating system. Currently only supported on Linux.
|
|
func (st *Topology) SupportsNUMA() bool {
|
|
switch runtime.GOOS {
|
|
case "linux":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// GetNodes returns the set of NUMA Node IDs.
|
|
func (st *Topology) GetNodes() *idset.Set[hw.NodeID] {
|
|
if st.nodeIDs.Empty() {
|
|
st.nodeIDs = idset.From[hw.NodeID](st.Nodes)
|
|
}
|
|
return st.nodeIDs
|
|
}
|
|
|
|
// NodeCores returns the set of Core IDs for the given NUMA Node ID.
|
|
func (st *Topology) NodeCores(node hw.NodeID) *idset.Set[hw.CoreID] {
|
|
result := idset.Empty[hw.CoreID]()
|
|
for _, cpu := range st.Cores {
|
|
if cpu.NodeID == node {
|
|
result.Insert(cpu.ID)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (st *Topology) insert(node hw.NodeID, socket hw.SocketID, core hw.CoreID, grade CoreGrade, max, base hw.KHz) {
|
|
st.Cores[core] = Core{
|
|
NodeID: node,
|
|
SocketID: socket,
|
|
ID: core,
|
|
Grade: grade,
|
|
MaxSpeed: max.MHz(),
|
|
BaseSpeed: base.MHz(),
|
|
}
|
|
}
|
|
|
|
func (st *Topology) String() string {
|
|
var sb strings.Builder
|
|
for _, cpu := range st.Cores {
|
|
sb.WriteString(cpu.String())
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
// TotalCompute returns the amount of compute in MHz the detected hardware is
|
|
// ultimately capable of delivering. The UsableCompute will be equal to or
|
|
// less than this value.
|
|
//
|
|
// If the client configuration includes an override for total compute, that
|
|
// value is used instead even if it violates the above invariant.
|
|
func (st *Topology) TotalCompute() hw.MHz {
|
|
if st.OverrideTotalCompute > 0 {
|
|
// TODO(shoenig) Starting in Nomad 1.7 we should warn about setting
|
|
// cpu_total_compute override, and suggeset users who think they still
|
|
// need this to file a bug so we can understand what is not detectable.
|
|
return st.OverrideTotalCompute
|
|
}
|
|
|
|
var total hw.MHz
|
|
for _, cpu := range st.Cores {
|
|
total += cpu.MHz()
|
|
}
|
|
return total
|
|
}
|
|
|
|
// UsableCompute returns the amount of compute in MHz the Nomad client is able
|
|
// to make use of for running tasks. This value will be less than or equal to
|
|
// the TotalCompute of the system. Nomad must subtract off any reserved compute
|
|
// (reserved.cpu or reserved.cores) from the total hardware compute.
|
|
func (st *Topology) UsableCompute() hw.MHz {
|
|
if st.OverrideTotalCompute > 0 {
|
|
// TODO(shoenig) Starting in Nomad 1.7 we should warn about setting
|
|
// cpu_total_compute override, and suggeset users who think they still
|
|
// need this to file a bug so we can understand what is not detectable.
|
|
return st.OverrideTotalCompute
|
|
}
|
|
|
|
var total hw.MHz
|
|
for _, cpu := range st.Cores {
|
|
// only use cores allowable by config
|
|
if !cpu.Disable {
|
|
total += cpu.MHz()
|
|
}
|
|
}
|
|
|
|
// only use compute allowable by config
|
|
return total - st.OverrideWitholdCompute
|
|
}
|
|
|
|
// NumCores returns the number of logical cores detected. This includes both
|
|
// power and efficiency cores.
|
|
func (st *Topology) NumCores() int {
|
|
return len(st.Cores)
|
|
}
|
|
|
|
// NumPCores returns the number of logical performance cores detected.
|
|
func (st *Topology) NumPCores() int {
|
|
var total int
|
|
for _, cpu := range st.Cores {
|
|
if cpu.Grade == Performance {
|
|
total++
|
|
}
|
|
}
|
|
return total
|
|
}
|
|
|
|
// NumECores returns the number of logical efficiency cores detected.
|
|
func (st *Topology) NumECores() int {
|
|
var total int
|
|
for _, cpu := range st.Cores {
|
|
if cpu.Grade == Efficiency {
|
|
total++
|
|
}
|
|
}
|
|
return total
|
|
}
|
|
|
|
// UsableCores returns the number of logical cores usable by the Nomad client
|
|
// for running tasks. Nomad must subtract off any reserved cores (reserved.cores)
|
|
// and/or must mask the cpuset to the one set in config (config.reservable_cores).
|
|
func (st *Topology) UsableCores() *idset.Set[hw.CoreID] {
|
|
result := idset.Empty[hw.CoreID]()
|
|
for _, cpu := range st.Cores {
|
|
if !cpu.Disable {
|
|
result.Insert(cpu.ID)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// CoreSpeeds returns the frequency in MHz of the performance and efficiency
|
|
// core types. If the CPU does not have effiency cores that value will be zero.
|
|
func (st *Topology) CoreSpeeds() (hw.MHz, hw.MHz) {
|
|
var pCore, eCore hw.MHz
|
|
for _, cpu := range st.Cores {
|
|
switch cpu.Grade {
|
|
case Performance:
|
|
pCore = cpu.MHz()
|
|
case Efficiency:
|
|
eCore = cpu.MHz()
|
|
}
|
|
}
|
|
return pCore, eCore
|
|
}
|
|
|
|
func (st *Topology) Compute() cpustats.Compute {
|
|
return cpustats.Compute{
|
|
TotalCompute: st.TotalCompute(),
|
|
NumCores: st.NumCores(),
|
|
}
|
|
}
|
|
|
|
func (st *Topology) Equal(o *Topology) bool {
|
|
if st == nil || o == nil {
|
|
return st == o
|
|
}
|
|
// simply iterates each core; the topology never changes for a node once
|
|
// it has been created at agent startup
|
|
return st.TotalCompute() == o.TotalCompute()
|
|
}
|