fingerprint: implement client fingerprinting of reservable cores

on Linux systems this is derived from the configure cpuset cgroup parent (defaults to /nomad)
for non Linux systems and Linux systems where cgroups are not enabled, the client defaults to using all cores
This commit is contained in:
Nick Ethier
2021-03-24 22:09:37 -04:00
parent 83164f21d2
commit b8397a712d
9 changed files with 233 additions and 12 deletions

View File

@@ -33,6 +33,7 @@ import (
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/client/dynamicplugins"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/client/lib/cgutil"
"github.com/hashicorp/nomad/client/pluginmanager"
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
@@ -632,6 +633,14 @@ func (c *Client) init() error {
}
c.logger.Info("using alloc directory", "alloc_dir", c.config.AllocDir)
// Ensure cgroups are created on linux platform
if err := cgutil.InitCpusetParent(c.config.CgroupParent); err != nil {
// if the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager
// will be disabled. this is common when running in dev mode under a non-root user for example
c.logger.Warn("could not initialize cpuset cgroup subsystem, cpuset management disabled", "error", err)
c.config.DisableCgroupManagement = true
}
return nil
}

View File

@@ -8,6 +8,8 @@ import (
"strings"
"time"
"github.com/hashicorp/nomad/client/lib/cgutil"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/helper"
@@ -265,6 +267,14 @@ type Config struct {
//
// This configuration is only considered if no host networks are defined.
BindWildcardDefaultHostNetwork bool
// CgroupParent is the parent cgroup Nomad should use when managing any cgroup subsystems.
// Currently this only includes the 'cpuset' cgroup subsystem
CgroupParent string
// DisableCgroupManagement if true disables all management of cgroup subsystems by the Nomad client. It does
// not prevent individual drivers from manging their own cgroups.
DisableCgroupManagement bool
}
type ClientTemplateConfig struct {
@@ -323,6 +333,7 @@ func DefaultConfig() *Config {
CNIConfigDir: "/opt/cni/config",
CNIInterfacePrefix: "eth",
HostNetworks: map[string]*structs.ClientHostNetworkConfig{},
CgroupParent: cgutil.DefaultCgroupParent,
}
}

View File

@@ -3,6 +3,8 @@ package fingerprint
import (
"fmt"
"github.com/hashicorp/nomad/lib/cpuset"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/nomad/structs"
@@ -30,7 +32,7 @@ func NewCPUFingerprint(logger log.Logger) Fingerprint {
func (f *CPUFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error {
cfg := req.Config
setResourcesCPU := func(totalCompute int) {
setResourcesCPU := func(totalCompute int, totalCores uint16, reservableCores []uint16) {
// COMPAT(0.10): Remove in 0.10
resp.Resources = &structs.Resources{
CPU: totalCompute,
@@ -38,7 +40,9 @@ func (f *CPUFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR
resp.NodeResources = &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: int64(totalCompute),
CpuShares: int64(totalCompute),
TotalCpuCores: totalCores,
ReservableCpuCores: reservableCores,
},
}
}
@@ -56,11 +60,20 @@ func (f *CPUFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR
f.logger.Debug("detected cpu frequency", "MHz", log.Fmt("%.0f", mhz))
}
if numCores := stats.CPUNumCores(); numCores > 0 {
var numCores int
if numCores = stats.CPUNumCores(); numCores > 0 {
resp.AddAttribute("cpu.numcores", fmt.Sprintf("%d", numCores))
f.logger.Debug("detected core count", "cores", numCores)
}
var reservableCores []uint16
if cores, err := f.deriveReservableCores(req, numCores); err != nil {
f.logger.Warn("failed to detect set of reservable cores", "error", err)
} else {
reservableCores = cpuset.New(cores...).Difference(cpuset.New(req.Node.ReservedResources.Cpu.ReservedCpuCores...)).ToSlice()
f.logger.Debug("detected reservable cores", "cpuset", reservableCores)
}
tt := int(stats.TotalTicksAvailable())
if cfg.CpuCompute > 0 {
f.logger.Debug("using user specified cpu compute", "cpu_compute", cfg.CpuCompute)
@@ -77,8 +90,16 @@ func (f *CPUFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR
}
resp.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", tt))
setResourcesCPU(tt)
setResourcesCPU(tt, uint16(numCores), reservableCores)
resp.Detected = true
return nil
}
func defaultReservableCores(totalCores int) []uint16 {
cores := make([]uint16, totalCores)
for i := range cores {
cores[i] = uint16(i)
}
return cores
}

View File

@@ -0,0 +1,7 @@
//+build !linux
package fingerprint
func (f *CPUFingerprint) deriveReservableCores(req *FingerprintRequest, totalCores int) ([]uint16, error) {
return defaultReservableCores(totalCores), nil
}

View File

@@ -0,0 +1,13 @@
package fingerprint
import (
"github.com/hashicorp/nomad/client/lib/cgutil"
)
func (f *CPUFingerprint) deriveReservableCores(req *FingerprintRequest, totalCores int) ([]uint16, error) {
if req.Config.DisableCgroupManagement {
return defaultReservableCores(totalCores), nil
}
return cgutil.GetCPUsFromCgroup(req.Config.CgroupParent)
}

View File

@@ -0,0 +1,9 @@
// +build !linux
package cgutil
const (
DefaultCgroupParent = ""
)
func InitCpusetParent(string) error { return nil }

View File

@@ -0,0 +1,140 @@
package cgutil
import (
"os"
"path/filepath"
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
const (
DefaultCgroupParent = "/nomad"
SharedCpusetCgroupName = "shared"
)
// InitCpusetParent checks that the cgroup parent and expected child cgroups have been created
// If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared
// cgroup is initialized. The /nomad/reserved cgroup will be lazily created when a workload
// with reserved cores is created
func InitCpusetParent(cgroupParent string) error {
if cgroupParent == "" {
cgroupParent = DefaultCgroupParent
}
var err error
if cgroupParent, err = getCgroupPathHelper("cpuset", cgroupParent); err != nil {
return err
}
// 'ensureParent' start with parent because we don't want to
// explicitly inherit from parent, it could conflict with
// 'cpuset.cpu_exclusive'.
if err := cpusetEnsureParent(cgroupParent); err != nil {
return err
}
if err := os.Mkdir(filepath.Join(cgroupParent, SharedCpusetCgroupName), 0755); err != nil && !os.IsExist(err) {
return err
}
return nil
}
func GetCPUsFromCgroup(group string) ([]uint16, error) {
cgroupPath, err := getCgroupPathHelper("cpuset", group)
if err != nil {
return nil, err
}
man := cgroupFs.NewManager(&configs.Cgroup{Path: group}, map[string]string{"cpuset": cgroupPath}, false)
stats, err := man.GetStats()
if err != nil {
return nil, err
}
return stats.CPUSetStats.CPUs, nil
}
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
if cpus, err = fscommon.ReadFile(parent, "cpuset.cpus"); err != nil {
return
}
if mems, err = fscommon.ReadFile(parent, "cpuset.mems"); err != nil {
return
}
return cpus, mems, nil
}
// cpusetEnsureParent makes sure that the parent directories of current
// are created and populated with the proper cpus and mems files copied
// from their respective parent. It does that recursively, starting from
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
func cpusetEnsureParent(current string) error {
var st unix.Statfs_t
parent := filepath.Dir(current)
err := unix.Statfs(parent, &st)
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
return nil
}
// Treat non-existing directory as cgroupfs as it will be created,
// and the root cpuset directory obviously exists.
if err != nil && err != unix.ENOENT {
return &os.PathError{Op: "statfs", Path: parent, Err: err}
}
if err := cpusetEnsureParent(parent); err != nil {
return err
}
if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return cpusetCopyIfNeeded(current, parent)
}
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func cpusetCopyIfNeeded(current, parent string) error {
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
if err != nil {
return err
}
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
if err != nil {
return err
}
if isEmptyCpuset(currentCpus) {
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
return err
}
}
if isEmptyCpuset(currentMems) {
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
return err
}
}
return nil
}
func isEmptyCpuset(str string) bool {
return str == "" || str == "\n"
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}

View File

@@ -14,6 +14,8 @@ import (
"sync"
"time"
"github.com/hashicorp/nomad/lib/cpuset"
metrics "github.com/armon/go-metrics"
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
@@ -613,6 +615,13 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB)
res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB)
res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts
if agentConfig.Client.Reserved.Cores != "" {
cores, err := cpuset.Parse(agentConfig.Client.Reserved.Cores)
if err != nil {
return nil, fmt.Errorf("failed to parse client > reserved > cores value %q: %v", agentConfig.Client.Reserved.Cores, err)
}
res.Cpu.ReservedCpuCores = cores.ToSlice()
}
conf.Version = agentConfig.Version
@@ -661,6 +670,8 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
}
conf.BindWildcardDefaultHostNetwork = agentConfig.Client.BindWildcardDefaultHostNetwork
conf.CgroupParent = agentConfig.Client.CgroupParent
return conf, nil
}

View File

@@ -299,6 +299,10 @@ type ClientConfig struct {
// matching any destination address (true). Defaults to true
BindWildcardDefaultHostNetwork bool `hcl:"bind_wildcard_default_host_network"`
// CgroupParent sets the parent cgroup for subsystems managed by Nomad. If the cgroup
// doest not exist Nomad will attempt to create it during startup. Defaults to '/nomad'
CgroupParent string `hcl:"cgroup_parent"`
// ExtraKeysHCL is used by hcl to surface unexpected keys
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
}
@@ -720,18 +724,11 @@ type Resources struct {
MemoryMB int `hcl:"memory"`
DiskMB int `hcl:"disk"`
ReservedPorts string `hcl:"reserved_ports"`
Cores string `hcl:"cores"`
// ExtraKeysHCL is used by hcl to surface unexpected keys
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
}
// CanParseReserved returns if the reserved ports specification is parsable.
// The supported syntax is comma separated integers or ranges separated by
// hyphens. For example, "80,120-150,160"
func (r *Resources) CanParseReserved() error {
_, err := structs.ParsePortRanges(r.ReservedPorts)
return err
}
// devModeConfig holds the config for the -dev and -dev-connect flags
type devModeConfig struct {
// mode flags are set at the command line via -dev and -dev-connect
@@ -1741,6 +1738,9 @@ func (r *Resources) Merge(b *Resources) *Resources {
if b.ReservedPorts != "" {
result.ReservedPorts = b.ReservedPorts
}
if b.Cores != "" {
result.Cores = b.Cores
}
return &result
}