From a4cc76bd3e4c7d4f7e623721caa8a716b5a0151f Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 10 Aug 2023 17:05:30 -0500 Subject: [PATCH] numa: enable numa topology detection (#18146) * client: refactor cgroups management in client * client: fingerprint numa topology * client: plumb numa and cgroups changes to drivers * client: cleanup task resource accounting * client: numa client and config plumbing * lib: add a stack implementation * tools: remove ec2info tool * plugins: fixup testing for cgroups / numa changes * build: update makefile and package tests and cl --- .changelog/18146.txt | 3 + GNUmakefile | 5 - ci/test-core.json | 2 +- client/alloc_endpoint_test.go | 7 + client/allocrunner/alloc_runner.go | 21 +- client/allocrunner/alloc_runner_hooks.go | 1 - client/allocrunner/alloc_runner_test.go | 19 +- client/allocrunner/cgroup_hook.go | 35 - client/allocrunner/taskrunner/task_runner.go | 108 ++- .../taskrunner/task_runner_hooks.go | 1 + .../taskrunner/task_runner_test.go | 37 +- .../allocrunner/taskrunner/wrangler_hook.go | 52 ++ client/allocrunner/testing.go | 5 +- client/client.go | 73 +- client/client_test.go | 16 +- client/config/arconfig.go | 8 +- client/config/config.go | 11 +- client/fingerprint/cgroup.go | 93 +-- client/fingerprint/cgroup_default.go | 10 - client/fingerprint/cgroup_linux.go | 47 -- client/fingerprint/cgroup_test.go | 159 ---- client/fingerprint/cpu.go | 183 +++-- client/fingerprint/cpu_default.go | 10 - client/fingerprint/cpu_default_test.go | 59 +- client/fingerprint/cpu_linux.go | 20 - client/fingerprint/env_aws.go | 41 +- client/fingerprint/env_aws_cpu.go | 688 ------------------ client/fingerprint/env_aws_test.go | 36 - client/fingerprint/env_azure.go | 3 +- client/fingerprint/env_gce.go | 2 +- client/fingerprint/fingerprint_linux.go | 2 +- client/fingerprint/structs.go | 11 + client/fingerprint_manager.go | 15 +- client/fingerprint_manager_test.go | 57 +- client/gc.go | 11 +- client/gc_test.go | 8 +- client/{stats => hoststats}/host.go | 52 +- client/{stats => hoststats}/host_test.go | 2 +- client/interfaces/client.go | 9 +- client/lib/cgroupslib/default.go | 16 + client/lib/cgroupslib/editor.go | 248 +++++++ client/lib/cgroupslib/init.go | 109 +++ client/lib/cgroupslib/memory.go | 40 + client/lib/cgroupslib/memory_test.go | 18 + client/lib/cgroupslib/mode.go | 14 + client/lib/cgroupslib/mount.go | 42 ++ client/lib/cgroupslib/mount_test.go | 67 ++ client/lib/cgroupslib/switch_default.go | 11 + client/lib/cgroupslib/switch_linux.go | 40 + client/lib/cgutil/cgutil_linux.go | 174 ----- client/lib/cgutil/cgutil_linux_test.go | 141 ---- client/lib/cgutil/cgutil_noop.go | 45 -- client/lib/cgutil/cpuset_manager.go | 75 -- client/lib/cgutil/cpuset_manager_test.go | 31 - client/lib/cgutil/cpuset_manager_v1.go | 429 ----------- client/lib/cgutil/cpuset_manager_v1_test.go | 167 ----- client/lib/cgutil/cpuset_manager_v2.go | 371 ---------- client/lib/cgutil/cpuset_manager_v2_test.go | 95 --- client/lib/cgutil/editor.go | 30 - client/lib/cgutil/editor_test.go | 42 -- client/lib/cgutil/group_killer.go | 182 ----- client/lib/cpustats/stats.go | 74 ++ client/lib/idset/idset.go | 165 +++++ client/lib/idset/idset_test.go | 81 +++ client/lib/numalib/detect.go | 83 +++ client/lib/numalib/detect_darwin.go | 74 ++ client/lib/numalib/detect_default.go | 60 ++ client/lib/numalib/detect_linux.go | 191 +++++ client/lib/numalib/detect_noimpl.go | 18 + client/lib/numalib/detect_noimpl_test.go | 23 + client/lib/numalib/detect_smbios.go | 81 +++ client/lib/numalib/detect_smbios_test.go | 44 ++ client/lib/numalib/detect_test.go | 19 + client/lib/numalib/topology.go | 252 +++++++ client/lib/proclib/config.go | 15 + client/lib/proclib/wrangler.go | 85 +++ client/lib/proclib/wrangler_cg1_linux.go | 73 ++ client/lib/proclib/wrangler_cg2_linux.go | 48 ++ client/lib/proclib/wrangler_cg2_linux_test.go | 8 + client/lib/proclib/wrangler_default.go | 40 + client/lib/proclib/wrangler_linux.go | 28 + client/lib/resources/containment.go | 16 - client/lib/resources/containment_linux.go | 110 --- client/lib/resources/pid.go | 28 - client/state/upgrade_int_test.go | 2 + client/stats/cpu.go | 146 ---- client/stats/cpu_darwin_test.go | 38 - client/stats/cpu_test.go | 54 -- client/structs/structs.go | 4 +- command/agent/agent.go | 10 - command/agent/agent_test.go | 20 +- command/agent/consul/int_test.go | 2 + drivers/docker/cmd/main.go | 4 +- drivers/docker/config.go | 4 +- drivers/docker/driver.go | 41 +- drivers/docker/driver_darwin.go | 10 - drivers/docker/driver_linux.go | 22 - drivers/docker/driver_test.go | 42 +- drivers/docker/driver_windows.go | 4 - drivers/docker/fingerprint.go | 6 +- drivers/docker/fingerprint_test.go | 4 +- drivers/docker/reconcile_cpuset.go | 128 ---- drivers/docker/reconcile_cpuset_noop.go | 22 - drivers/docker/reconcile_cpuset_test.go | 39 - drivers/docker/stats.go | 13 +- drivers/docker/stats_test.go | 2 +- drivers/docker/util/stats_posix.go | 20 +- drivers/docker/util/stats_windows.go | 15 +- drivers/exec/driver.go | 50 +- drivers/exec/driver_test.go | 28 +- drivers/exec/driver_unix_test.go | 41 +- drivers/java/driver.go | 15 +- drivers/java/driver_test.go | 8 +- drivers/rawexec/driver.go | 16 +- drivers/rawexec/driver_test.go | 206 ++++-- drivers/rawexec/driver_unix_test.go | 133 ++-- drivers/shared/executor/executor.go | 141 ++-- drivers/shared/executor/executor_basic.go | 24 +- drivers/shared/executor/executor_linux.go | 201 ++--- .../shared/executor/executor_linux_test.go | 162 +---- drivers/shared/executor/executor_plugin.go | 13 +- drivers/shared/executor/executor_test.go | 36 +- .../executor/executor_universal_linux.go | 233 ++++-- drivers/shared/executor/executor_unix.go | 2 +- drivers/shared/executor/pid_collector.go | 214 ------ drivers/shared/executor/pid_collector_test.go | 49 -- drivers/shared/executor/plugins.go | 11 +- drivers/shared/executor/procstats/getstats.go | 135 ++++ .../shared/executor/procstats/list_default.go | 51 ++ .../shared/executor/procstats/list_linux.go | 32 + .../shared/executor/procstats/procstats.go | 82 +++ drivers/shared/executor/utils.go | 16 +- drivers/shared/executor/z_executor_cmd.go | 2 - go.mod | 1 + go.sum | 3 + helper/{stats => goruntime}/runtime.go | 3 +- helper/stats/cpu.go | 83 --- helper/stats/cpu_test.go | 24 - lib/lang/stack.go | 43 ++ lib/lang/stack_test.go | 32 + nomad/server.go | 4 +- plugins/drivers/testutils/exec_testing.go | 15 +- plugins/drivers/testutils/testing.go | 42 -- plugins/drivers/testutils/testing_default.go | 10 + plugins/drivers/testutils/testing_linux.go | 25 + tools/ec2info/cpu_table.go.template | 48 -- tools/ec2info/main.go | 213 ------ tools/go.mod | 6 +- tools/go.sum | 42 -- 149 files changed, 3677 insertions(+), 5395 deletions(-) create mode 100644 .changelog/18146.txt delete mode 100644 client/allocrunner/cgroup_hook.go create mode 100644 client/allocrunner/taskrunner/wrangler_hook.go delete mode 100644 client/fingerprint/cgroup_default.go delete mode 100644 client/fingerprint/cgroup_linux.go delete mode 100644 client/fingerprint/cgroup_test.go delete mode 100644 client/fingerprint/cpu_default.go delete mode 100644 client/fingerprint/cpu_linux.go delete mode 100644 client/fingerprint/env_aws_cpu.go rename client/{stats => hoststats}/host.go (85%) rename client/{stats => hoststats}/host_test.go (97%) create mode 100644 client/lib/cgroupslib/default.go create mode 100644 client/lib/cgroupslib/editor.go create mode 100644 client/lib/cgroupslib/init.go create mode 100644 client/lib/cgroupslib/memory.go create mode 100644 client/lib/cgroupslib/memory_test.go create mode 100644 client/lib/cgroupslib/mode.go create mode 100644 client/lib/cgroupslib/mount.go create mode 100644 client/lib/cgroupslib/mount_test.go create mode 100644 client/lib/cgroupslib/switch_default.go create mode 100644 client/lib/cgroupslib/switch_linux.go delete mode 100644 client/lib/cgutil/cgutil_linux.go delete mode 100644 client/lib/cgutil/cgutil_linux_test.go delete mode 100644 client/lib/cgutil/cgutil_noop.go delete mode 100644 client/lib/cgutil/cpuset_manager.go delete mode 100644 client/lib/cgutil/cpuset_manager_test.go delete mode 100644 client/lib/cgutil/cpuset_manager_v1.go delete mode 100644 client/lib/cgutil/cpuset_manager_v1_test.go delete mode 100644 client/lib/cgutil/cpuset_manager_v2.go delete mode 100644 client/lib/cgutil/cpuset_manager_v2_test.go delete mode 100644 client/lib/cgutil/editor.go delete mode 100644 client/lib/cgutil/editor_test.go delete mode 100644 client/lib/cgutil/group_killer.go create mode 100644 client/lib/cpustats/stats.go create mode 100644 client/lib/idset/idset.go create mode 100644 client/lib/idset/idset_test.go create mode 100644 client/lib/numalib/detect.go create mode 100644 client/lib/numalib/detect_darwin.go create mode 100644 client/lib/numalib/detect_default.go create mode 100644 client/lib/numalib/detect_linux.go create mode 100644 client/lib/numalib/detect_noimpl.go create mode 100644 client/lib/numalib/detect_noimpl_test.go create mode 100644 client/lib/numalib/detect_smbios.go create mode 100644 client/lib/numalib/detect_smbios_test.go create mode 100644 client/lib/numalib/detect_test.go create mode 100644 client/lib/numalib/topology.go create mode 100644 client/lib/proclib/config.go create mode 100644 client/lib/proclib/wrangler.go create mode 100644 client/lib/proclib/wrangler_cg1_linux.go create mode 100644 client/lib/proclib/wrangler_cg2_linux.go create mode 100644 client/lib/proclib/wrangler_cg2_linux_test.go create mode 100644 client/lib/proclib/wrangler_default.go create mode 100644 client/lib/proclib/wrangler_linux.go delete mode 100644 client/lib/resources/containment.go delete mode 100644 client/lib/resources/containment_linux.go delete mode 100644 client/lib/resources/pid.go delete mode 100644 client/stats/cpu.go delete mode 100644 client/stats/cpu_darwin_test.go delete mode 100644 client/stats/cpu_test.go delete mode 100644 drivers/docker/driver_darwin.go delete mode 100644 drivers/docker/driver_linux.go delete mode 100644 drivers/docker/reconcile_cpuset.go delete mode 100644 drivers/docker/reconcile_cpuset_noop.go delete mode 100644 drivers/docker/reconcile_cpuset_test.go delete mode 100644 drivers/shared/executor/pid_collector.go delete mode 100644 drivers/shared/executor/pid_collector_test.go create mode 100644 drivers/shared/executor/procstats/getstats.go create mode 100644 drivers/shared/executor/procstats/list_default.go create mode 100644 drivers/shared/executor/procstats/list_linux.go create mode 100644 drivers/shared/executor/procstats/procstats.go rename helper/{stats => goruntime}/runtime.go (85%) delete mode 100644 helper/stats/cpu.go delete mode 100644 helper/stats/cpu_test.go create mode 100644 lib/lang/stack.go create mode 100644 lib/lang/stack_test.go create mode 100644 plugins/drivers/testutils/testing_default.go create mode 100644 plugins/drivers/testutils/testing_linux.go delete mode 100644 tools/ec2info/cpu_table.go.template delete mode 100644 tools/ec2info/main.go diff --git a/.changelog/18146.txt b/.changelog/18146.txt new file mode 100644 index 000000000..25668bf8b --- /dev/null +++ b/.changelog/18146.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: enable detection of numa topology +``` diff --git a/GNUmakefile b/GNUmakefile index 499b25f4e..33a6757c5 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -407,11 +407,6 @@ missing: ## Check for packages not being tested @echo "==> Checking for packages not being tested ..." @go run -modfile tools/go.mod tools/missing/main.go ci/test-core.json -.PHONY: ec2info -ec2info: ## Generate AWS EC2 CPU specification table - @echo "==> Generating AWS EC2 specifications ..." - @go run -modfile tools/go.mod tools/ec2info/main.go - .PHONY: cl cl: ## Create a new Changelog entry @go run -modfile tools/go.mod tools/cl-entry/main.go diff --git a/ci/test-core.json b/ci/test-core.json index 281589f6f..740b8fde9 100644 --- a/ci/test-core.json +++ b/ci/test-core.json @@ -23,7 +23,7 @@ "client/servers/...", "client/serviceregistration/...", "client/state/...", - "client/stats/...", + "client/hoststats/...", "client/structs/...", "client/taskenv/...", "command/agent/...", diff --git a/client/alloc_endpoint_test.go b/client/alloc_endpoint_test.go index cf59d6ccb..4c560d385 100644 --- a/client/alloc_endpoint_test.go +++ b/client/alloc_endpoint_test.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/lib/proclib" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/pluginutils/catalog" "github.com/hashicorp/nomad/helper/uuid" @@ -81,6 +82,12 @@ func TestAllocations_RestartAllTasks(t *testing.T) { alloc := mock.LifecycleAlloc() require.Nil(client.addAlloc(alloc, "")) + // setup process wranglers for our tasks to make sure they work with restart + client.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "web"}) + client.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "init"}) + client.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "side"}) + client.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "poststart"}) + // Can't restart all tasks while specifying a task name. req := &nstructs.AllocRestartRequest{ AllocID: alloc.ID, diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 19474faae..3d54fc635 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -11,8 +11,6 @@ import ( log "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" - "golang.org/x/exp/maps" - "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/allocrunner/state" @@ -23,7 +21,7 @@ import ( "github.com/hashicorp/nomad/client/devicemanager" "github.com/hashicorp/nomad/client/dynamicplugins" cinterfaces "github.com/hashicorp/nomad/client/interfaces" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" "github.com/hashicorp/nomad/client/serviceregistration" @@ -36,6 +34,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/drivers" + "golang.org/x/exp/maps" ) // allocRunner is used to run all the tasks in a given allocation @@ -164,9 +163,6 @@ type allocRunner struct { // runner to manage their mounting csiManager csimanager.Manager - // cpusetManager is responsible for configuring task cgroups if supported by the platform - cpusetManager cgutil.CpusetManager - // devicemanager is used to mount devices as well as lookup device // statistics devicemanager devicemanager.Manager @@ -200,6 +196,9 @@ type allocRunner struct { // getter is an interface for retrieving artifacts. getter cinterfaces.ArtifactGetter + + // wranglers is an interface for managing unix/windows processes. + wranglers cinterfaces.ProcessWranglers } // NewAllocRunner returns a new allocation runner. @@ -233,7 +232,6 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e prevAllocMigrator: config.PrevAllocMigrator, dynamicRegistry: config.DynamicRegistry, csiManager: config.CSIManager, - cpusetManager: config.CpusetManager, devicemanager: config.DeviceManager, driverManager: config.DriverManager, serversContactedCh: config.ServersContactedCh, @@ -241,6 +239,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e serviceRegWrapper: config.ServiceRegWrapper, checkStore: config.CheckStore, getter: config.Getter, + wranglers: config.Wranglers, hookResources: cstructs.NewAllocHookResources(), } @@ -297,13 +296,10 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { ShutdownDelayCtx: ar.shutdownDelayCtx, ServiceRegWrapper: ar.serviceRegWrapper, Getter: ar.getter, + Wranglers: ar.wranglers, AllocHookResources: ar.hookResources, } - if ar.cpusetManager != nil { - trConfig.CpusetCgroupPathGetter = ar.cpusetManager.CgroupPathFor(ar.id, task.Name) - } - // Create, but do not Run, the task runner tr, err := taskrunner.NewTaskRunner(trConfig) if err != nil { @@ -450,6 +446,9 @@ func (ar *allocRunner) Restore() error { return err } states[tr.Task().Name] = tr.TaskState() + + // restore process wrangler for task + ar.wranglers.Setup(proclib.Task{AllocID: tr.Alloc().ID, Task: tr.Task().Name}) } ar.taskCoordinator.Restore(states) diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index b03f72a6a..a2cd9611d 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -119,7 +119,6 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { alloc := ar.Alloc() ar.runnerHooks = []interfaces.RunnerHook{ newAllocDirHook(hookLogger, ar.allocDir), - newCgroupHook(ar.Alloc(), ar.cpusetManager), newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher), newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir), newAllocHealthWatcherHook(hookLogger, alloc, newEnvBuilder, hs, ar.Listener(), ar.consulClient, ar.checkStore), diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 775b1d5ff..1e34bc85c 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -14,10 +14,6 @@ import ( "github.com/hashicorp/consul/api" multierror "github.com/hashicorp/go-multierror" - "github.com/shoenig/test/must" - "github.com/shoenig/test/wait" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allochealth" "github.com/hashicorp/nomad/client/allocrunner/interfaces" @@ -25,6 +21,7 @@ import ( "github.com/hashicorp/nomad/client/allocrunner/tasklifecycle" "github.com/hashicorp/nomad/client/allocrunner/taskrunner" "github.com/hashicorp/nomad/client/allocwatcher" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/serviceregistration" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" "github.com/hashicorp/nomad/client/state" @@ -33,6 +30,9 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" + "github.com/stretchr/testify/require" ) // destroy does a blocking destroy on an alloc runner @@ -1220,6 +1220,10 @@ func TestAllocRunner_TaskLeader_StopRestoredTG(t *testing.T) { ar, err := NewAllocRunner(conf) must.NoError(t, err) + // setup process wranglers for these tasks + ar.(*allocRunner).wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: task.Name}) + ar.(*allocRunner).wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: task2.Name}) + // Mimic Nomad exiting before the leader stopping is able to stop other tasks. ar.(*allocRunner).tasks["leader"].UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled)) ar.(*allocRunner).tasks["follower1"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) @@ -1274,6 +1278,13 @@ func TestAllocRunner_Restore_LifecycleHooks(t *testing.T) { must.NoError(t, err) ar := arIface.(*allocRunner) + // Setup the process wranglers for the initial alloc runner, these should + // be recovered by the second alloc runner. + ar.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "init"}) + ar.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "side"}) + ar.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "web"}) + ar.wranglers.Setup(proclib.Task{AllocID: alloc.ID, Task: "poststart"}) + go ar.Run() defer destroy(ar) diff --git a/client/allocrunner/cgroup_hook.go b/client/allocrunner/cgroup_hook.go deleted file mode 100644 index 8b14ce848..000000000 --- a/client/allocrunner/cgroup_hook.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package allocrunner - -import ( - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/hashicorp/nomad/nomad/structs" -) - -func newCgroupHook(alloc *structs.Allocation, man cgutil.CpusetManager) *cgroupHook { - return &cgroupHook{ - alloc: alloc, - cpusetManager: man, - } -} - -type cgroupHook struct { - alloc *structs.Allocation - cpusetManager cgutil.CpusetManager -} - -func (c *cgroupHook) Name() string { - return "cgroup" -} - -func (c *cgroupHook) Prerun() error { - c.cpusetManager.AddAlloc(c.alloc) - return nil -} - -func (c *cgroupHook) Postrun() error { - c.cpusetManager.RemoveAlloc(c.alloc.ID) - return nil -} diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 3f64299ed..fc57ea229 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -11,13 +11,10 @@ import ( "sync" "time" - "golang.org/x/exp/slices" - metrics "github.com/armon/go-metrics" log "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/hcl/v2/hcldec" - "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts" @@ -27,7 +24,7 @@ import ( "github.com/hashicorp/nomad/client/devicemanager" "github.com/hashicorp/nomad/client/dynamicplugins" cinterfaces "github.com/hashicorp/nomad/client/interfaces" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" "github.com/hashicorp/nomad/client/serviceregistration" @@ -43,6 +40,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" bstructs "github.com/hashicorp/nomad/plugins/base/structs" "github.com/hashicorp/nomad/plugins/drivers" + "golang.org/x/exp/slices" ) const ( @@ -231,9 +229,6 @@ type TaskRunner struct { // statistics devicemanager devicemanager.Manager - // cpusetCgroupPathGetter is used to lookup the cgroup path if supported by the platform - cpusetCgroupPathGetter cgutil.CgroupPathGetter - // driverManager is used to dispense driver plugins and register event // handlers driverManager drivermanager.Manager @@ -266,6 +261,10 @@ type TaskRunner struct { // getter is an interface for retrieving artifacts. getter cinterfaces.ArtifactGetter + + // wranglers manage unix/windows processes leveraging operating + // system features like cgroups + wranglers cinterfaces.ProcessWranglers } type Config struct { @@ -303,9 +302,6 @@ type Config struct { // CSIManager is used to manage the mounting of CSI volumes into tasks CSIManager csimanager.Manager - // CpusetCgroupPathGetter is used to lookup the cgroup path if supported by the platform - CpusetCgroupPathGetter cgutil.CgroupPathGetter - // DeviceManager is used to mount devices as well as lookup device // statistics DeviceManager devicemanager.Manager @@ -335,6 +331,9 @@ type Config struct { // Getter is an interface for retrieving artifacts. Getter cinterfaces.ArtifactGetter + // Wranglers is an interface for managing OS processes. + Wranglers cinterfaces.ProcessWranglers + // AllocHookResources is how taskrunner hooks can get state written by // allocrunner hooks AllocHookResources *cstructs.AllocHookResources @@ -362,43 +361,43 @@ func NewTaskRunner(config *Config) (*TaskRunner, error) { } tr := &TaskRunner{ - alloc: config.Alloc, - allocID: config.Alloc.ID, - clientConfig: config.ClientConfig, - task: config.Task, - taskDir: config.TaskDir, - taskName: config.Task.Name, - taskLeader: config.Task.Leader, - envBuilder: envBuilder, - dynamicRegistry: config.DynamicRegistry, - consulServiceClient: config.Consul, - consulProxiesClient: config.ConsulProxies, - siClient: config.ConsulSI, - vaultClient: config.Vault, - state: tstate, - localState: state.NewLocalState(), - allocHookResources: config.AllocHookResources, - stateDB: config.StateDB, - stateUpdater: config.StateUpdater, - deviceStatsReporter: config.DeviceStatsReporter, - killCtx: killCtx, - killCtxCancel: killCancel, - shutdownCtx: trCtx, - shutdownCtxCancel: trCancel, - triggerUpdateCh: make(chan struct{}, triggerUpdateChCap), - restartCh: make(chan struct{}, restartChCap), - waitCh: make(chan struct{}), - csiManager: config.CSIManager, - cpusetCgroupPathGetter: config.CpusetCgroupPathGetter, - devicemanager: config.DeviceManager, - driverManager: config.DriverManager, - maxEvents: defaultMaxEvents, - serversContactedCh: config.ServersContactedCh, - startConditionMetCh: config.StartConditionMetCh, - shutdownDelayCtx: config.ShutdownDelayCtx, - shutdownDelayCancelFn: config.ShutdownDelayCancelFn, - serviceRegWrapper: config.ServiceRegWrapper, - getter: config.Getter, + alloc: config.Alloc, + allocID: config.Alloc.ID, + clientConfig: config.ClientConfig, + task: config.Task, + taskDir: config.TaskDir, + taskName: config.Task.Name, + taskLeader: config.Task.Leader, + envBuilder: envBuilder, + dynamicRegistry: config.DynamicRegistry, + consulServiceClient: config.Consul, + consulProxiesClient: config.ConsulProxies, + siClient: config.ConsulSI, + vaultClient: config.Vault, + state: tstate, + localState: state.NewLocalState(), + allocHookResources: config.AllocHookResources, + stateDB: config.StateDB, + stateUpdater: config.StateUpdater, + deviceStatsReporter: config.DeviceStatsReporter, + killCtx: killCtx, + killCtxCancel: killCancel, + shutdownCtx: trCtx, + shutdownCtxCancel: trCancel, + triggerUpdateCh: make(chan struct{}, triggerUpdateChCap), + restartCh: make(chan struct{}, restartChCap), + waitCh: make(chan struct{}), + csiManager: config.CSIManager, + devicemanager: config.DeviceManager, + driverManager: config.DriverManager, + maxEvents: defaultMaxEvents, + serversContactedCh: config.ServersContactedCh, + startConditionMetCh: config.StartConditionMetCh, + shutdownDelayCtx: config.ShutdownDelayCtx, + shutdownDelayCancelFn: config.ShutdownDelayCancelFn, + serviceRegWrapper: config.ServiceRegWrapper, + getter: config.Getter, + wranglers: config.Wranglers, } // Create the logger based on the allocation ID @@ -841,19 +840,16 @@ func (tr *TaskRunner) shouldRestart() (bool, time.Duration) { } } +func (tr *TaskRunner) assignCgroup(taskConfig *drivers.TaskConfig) { + p := cgroupslib.LinuxResourcesPath(taskConfig.AllocID, taskConfig.Name) + taskConfig.Resources.LinuxResources.CpusetCgroupPath = p +} + // runDriver runs the driver and waits for it to exit // runDriver emits an appropriate task event on success/failure func (tr *TaskRunner) runDriver() error { - taskConfig := tr.buildTaskConfig() - if tr.cpusetCgroupPathGetter != nil { - tr.logger.Trace("waiting for cgroup to exist for", "allocID", tr.allocID, "task", tr.task) - cpusetCgroupPath, err := tr.cpusetCgroupPathGetter(tr.killCtx) - if err != nil { - return err - } - taskConfig.Resources.LinuxResources.CpusetCgroupPath = cpusetCgroupPath - } + tr.assignCgroup(taskConfig) // Build hcl context variables vars, errs, err := tr.envBuilder.Build().AllValues() diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 22fd09066..64845edaf 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -72,6 +72,7 @@ func (tr *TaskRunner) initHooks() { newStatsHook(tr, tr.clientConfig.StatsCollectionInterval, hookLogger), newDeviceHook(tr.devicemanager, hookLogger), newAPIHook(tr.shutdownCtx, tr.clientConfig.APIListenerRegistrar, hookLogger), + newWranglerHook(tr.wranglers, task.Name, alloc.ID, hookLogger), } // If the task has a CSI block, add the hook. diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index 24757b6ce..c419b37db 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -16,12 +16,6 @@ import ( "time" "github.com/golang/snappy" - "github.com/kr/pretty" - "github.com/shoenig/test" - "github.com/shoenig/test/must" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" @@ -29,7 +23,7 @@ import ( "github.com/hashicorp/nomad/client/config" consulapi "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" "github.com/hashicorp/nomad/client/serviceregistration/wrapper" @@ -47,6 +41,11 @@ import ( "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/testutil" + "github.com/kr/pretty" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type MockTaskStateUpdater struct { @@ -98,26 +97,10 @@ func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName stri } taskDir := allocDir.NewTaskDir(taskName) - // Compute the name of the v2 cgroup in case we need it in creation, configuration, and cleanup - cgroup := filepath.Join(cgutil.CgroupRoot, "testing.slice", cgutil.CgroupScope(alloc.ID, taskName)) - - // Create the cgroup if we are in v2 mode - if cgutil.UseV2 { - if err := os.MkdirAll(cgroup, 0755); err != nil { - t.Fatalf("failed to setup v2 cgroup for test: %v:", err) - } - } - trCleanup := func() { if err := allocDir.Destroy(); err != nil { t.Logf("error destroying alloc dir: %v", err) } - - // Cleanup the cgroup if we are in v2 mode - if cgutil.UseV2 { - _ = os.RemoveAll(cgroup) - } - cleanup() } @@ -152,13 +135,7 @@ func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName stri ShutdownDelayCancelFn: shutdownDelayCancelFn, ServiceRegWrapper: wrapperMock, Getter: getter.TestSandbox(t), - } - - // Set the cgroup path getter if we are in v2 mode - if cgutil.UseV2 { - conf.CpusetCgroupPathGetter = func(context.Context) (string, error) { - return filepath.Join(cgutil.CgroupRoot, "testing.slice", alloc.ID, thisTask.Name), nil - } + Wranglers: proclib.New(&proclib.Configs{Logger: testlog.HCLogger(t)}), } return conf, trCleanup diff --git a/client/allocrunner/taskrunner/wrangler_hook.go b/client/allocrunner/taskrunner/wrangler_hook.go new file mode 100644 index 000000000..ddd1c1f18 --- /dev/null +++ b/client/allocrunner/taskrunner/wrangler_hook.go @@ -0,0 +1,52 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package taskrunner + +import ( + "context" + + "github.com/hashicorp/go-hclog" + ifs "github.com/hashicorp/nomad/client/allocrunner/interfaces" + cifs "github.com/hashicorp/nomad/client/interfaces" + "github.com/hashicorp/nomad/client/lib/proclib" +) + +const ( + wranglerHookName = "wrangler" +) + +// A wranglerHook provides a mechanism through which the Client can be sure any +// processes spawned by a task forcefully get killed when the task is stopped. +// +// Currently only does anything on Linux with cgroups. +type wranglerHook struct { + wranglers cifs.ProcessWranglers + task proclib.Task + log hclog.Logger +} + +func newWranglerHook(wranglers cifs.ProcessWranglers, task, allocID string, log hclog.Logger) *wranglerHook { + return &wranglerHook{ + log: log.Named(wranglerHookName), + wranglers: wranglers, + task: proclib.Task{ + AllocID: allocID, + Task: task, + }, + } +} + +func (*wranglerHook) Name() string { + return wranglerHookName +} + +func (wh *wranglerHook) Prestart(_ context.Context, request *ifs.TaskPrestartRequest, _ *ifs.TaskPrestartResponse) error { + wh.log.Trace("setting up client process management", "task", wh.task) + return wh.wranglers.Setup(wh.task) +} + +func (wh *wranglerHook) Stop(_ context.Context, request *ifs.TaskStopRequest, _ *ifs.TaskStopResponse) error { + wh.log.Trace("stopping client process mangagement", "task", wh.task) + return wh.wranglers.Destroy(wh.task) +} diff --git a/client/allocrunner/testing.go b/client/allocrunner/testing.go index 679f24f7a..f755fb559 100644 --- a/client/allocrunner/testing.go +++ b/client/allocrunner/testing.go @@ -18,13 +18,14 @@ import ( clientconfig "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" "github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore" "github.com/hashicorp/nomad/client/serviceregistration/mock" "github.com/hashicorp/nomad/client/serviceregistration/wrapper" "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/client/vaultclient" + "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/require" @@ -91,11 +92,11 @@ func testAllocRunnerConfig(t *testing.T, alloc *structs.Allocation) (*config.All PrevAllocMigrator: allocwatcher.NoopPrevAlloc{}, DeviceManager: devicemanager.NoopMockManager(), DriverManager: drivermanager.TestDriverManager(t), - CpusetManager: new(cgutil.NoopCpusetManager), ServersContactedCh: make(chan struct{}), ServiceRegWrapper: wrapper.NewHandlerWrapper(clientConf.Logger, consulRegMock, nomadRegMock), CheckStore: checkstore.NewStore(clientConf.Logger, stateDB), Getter: getter.TestSandbox(t), + Wranglers: proclib.New(&proclib.Configs{Logger: testlog.HCLogger(t)}), } return conf, cleanup diff --git a/client/client.go b/client/client.go index 8ce000dcf..6476f08c3 100644 --- a/client/client.go +++ b/client/client.go @@ -31,8 +31,10 @@ import ( "github.com/hashicorp/nomad/client/devicemanager" "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/hoststats" cinterfaces "github.com/hashicorp/nomad/client/interfaces" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/pluginmanager" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" @@ -42,15 +44,14 @@ import ( "github.com/hashicorp/nomad/client/serviceregistration/nsd" "github.com/hashicorp/nomad/client/serviceregistration/wrapper" "github.com/hashicorp/nomad/client/state" - "github.com/hashicorp/nomad/client/stats" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/vaultclient" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/envoy" + "github.com/hashicorp/nomad/helper/goruntime" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/pool" - hstats "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" @@ -134,7 +135,7 @@ type ClientStatsReporter interface { GetAllocStats(allocID string) (interfaces.AllocStatsReporter, error) // LatestHostStats returns the latest resource usage stats for the host - LatestHostStats() *stats.HostStats + LatestHostStats() *hoststats.HostStats } // Client is used to implement the client interaction with Nomad. Clients @@ -242,7 +243,7 @@ type Client struct { consulCatalog consul.CatalogAPI // HostStatsCollector collects host resource usage stats - hostStatsCollector *stats.HostStatsCollector + hostStatsCollector *hoststats.HostStatsCollector // shutdown is true when the Client has been shutdown. Must hold // shutdownLock to access. @@ -315,14 +316,19 @@ type Client struct { // with a nomad client. Currently only used for CSI. dynamicRegistry dynamicplugins.Registry - // cpusetManager configures cpusets on supported platforms - cpusetManager cgutil.CpusetManager - // EnterpriseClient is used to set and check enterprise features for clients EnterpriseClient *EnterpriseClient // getter is an interface for retrieving artifacts. getter cinterfaces.ArtifactGetter + + // wranglers is used to keep track of processes and manage their interaction + // with drivers and stuff + wranglers *proclib.Wranglers + + // topology represents the system memory / cpu topology detected via + // fingerprinting + topology *numalib.Topology } var ( @@ -382,7 +388,6 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie serversContactedOnce: sync.Once{}, registeredCh: make(chan struct{}), registeredOnce: sync.Once{}, - cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger), getter: getter.New(cfg.Artifact, logger), EnterpriseClient: newEnterpriseClient(logger), allocrunnerFactory: cfg.AllocRunnerFactory, @@ -433,16 +438,28 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie } c.fingerprintManager = NewFingerprintManager( - cfg.PluginSingletonLoader, c.GetConfig, cfg.Node, - c.shutdownCh, c.updateNodeFromFingerprint, c.logger) - + cfg.PluginSingletonLoader, + c.GetConfig, + cfg.Node, + c.shutdownCh, + c.updateNodeFromFingerprint, + c.logger, + ) c.pluginManagers = pluginmanager.New(c.logger) // Fingerprint the node and scan for drivers - if err := c.fingerprintManager.Run(); err != nil { + if ir, err := c.fingerprintManager.Run(); err != nil { return nil, fmt.Errorf("fingerprinting failed: %v", err) + } else { + c.topology = numalib.NoImpl(ir.Topology) } + // Create the process wranglers + wranglers := proclib.New(&proclib.Configs{ + Logger: c.logger.Named("proclib"), + }) + c.wranglers = wranglers + // Build the allow/denylists of drivers. // COMPAT(1.0) uses inclusive language. white/blacklist are there for backward compatible reasons only. allowlistDrivers := cfg.ReadStringListToMap("driver.allowlist", "driver.whitelist") @@ -506,7 +523,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie go c.heartbeatStop.watch() // Add the stats collector - statsCollector := stats.NewHostStatsCollector(c.logger, c.GetConfig().AllocDir, c.devicemanager.AllStats) + statsCollector := hoststats.NewHostStatsCollector(c.logger, c.topology, c.GetConfig().AllocDir, c.devicemanager.AllStats) c.hostStatsCollector = statsCollector // Add the garbage collector @@ -680,9 +697,6 @@ func (c *Client) init() error { "reserved", reserved, ) - // startup the CPUSet manager - c.cpusetManager.Init() - // setup the nsd check store c.checkStore = checkstore.NewStore(c.logger, c.stateDB) @@ -893,7 +907,7 @@ func (c *Client) Stats() map[string]map[string]string { "last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat())), "heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL), }, - "runtime": hstats.RuntimeStats(), + "runtime": goruntime.RuntimeStats(), } return stats } @@ -993,7 +1007,7 @@ func (c *Client) GetAllocStats(allocID string) (interfaces.AllocStatsReporter, e } // LatestHostStats returns all the stats related to a Nomad client. -func (c *Client) LatestHostStats() *stats.HostStats { +func (c *Client) LatestHostStats() *hoststats.HostStats { return c.hostStatsCollector.Stats() } @@ -1225,7 +1239,6 @@ func (c *Client) restoreState() error { PrevAllocMigrator: prevAllocMigrator, DynamicRegistry: c.dynamicRegistry, CSIManager: c.csimanager, - CpusetManager: c.cpusetManager, DeviceManager: c.devicemanager, DriverManager: c.drivermanager, ServersContactedCh: c.serversContactedCh, @@ -1233,6 +1246,7 @@ func (c *Client) restoreState() error { CheckStore: c.checkStore, RPCClient: c, Getter: c.getter, + Wranglers: c.wranglers, } ar, err := c.allocrunnerFactory(arConf) @@ -1488,6 +1502,7 @@ func (c *Client) setupNode() error { node.NodeResources = &structs.NodeResources{} node.NodeResources.MinDynamicPort = newConfig.MinDynamicPort node.NodeResources.MaxDynamicPort = newConfig.MaxDynamicPort + node.NodeResources.Cpu = newConfig.Node.NodeResources.Cpu } if node.ReservedResources == nil { node.ReservedResources = &structs.NodeReservedResources{} @@ -1640,9 +1655,7 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp // update the response networks with the config // if we still have node changes, merge them if response.NodeResources != nil { - response.NodeResources.Networks = updateNetworks( - response.NodeResources.Networks, - newConfig) + response.NodeResources.Networks = updateNetworks(response.NodeResources.Networks, newConfig) if !newConfig.Node.NodeResources.Equal(response.NodeResources) { newConfig.Node.NodeResources.Merge(response.NodeResources) nodeHasChanged = true @@ -1655,6 +1668,10 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp nodeHasChanged = true } + // update config with total cpu compute if it was detected + if cpu := int(response.NodeResources.Cpu.CpuShares); cpu > 0 { + newConfig.CpuCompute = cpu + } } if nodeHasChanged { @@ -2701,13 +2718,13 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error PrevAllocMigrator: prevAllocMigrator, DynamicRegistry: c.dynamicRegistry, CSIManager: c.csimanager, - CpusetManager: c.cpusetManager, DeviceManager: c.devicemanager, DriverManager: c.drivermanager, ServiceRegWrapper: c.serviceRegWrapper, CheckStore: c.checkStore, RPCClient: c, Getter: c.getter, + Wranglers: c.wranglers, } ar, err := c.allocrunnerFactory(arConf) @@ -3095,7 +3112,7 @@ func (c *Client) emitStats() { } // setGaugeForMemoryStats proxies metrics for memory specific statistics -func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { +func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *hoststats.HostStats, baseLabels []metrics.Label) { metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels) metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels) metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels) @@ -3103,7 +3120,7 @@ func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, } // setGaugeForCPUStats proxies metrics for CPU specific statistics -func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { +func (c *Client) setGaugeForCPUStats(nodeID string, hStats *hoststats.HostStats, baseLabels []metrics.Label) { labels := make([]metrics.Label, len(baseLabels)) copy(labels, baseLabels) @@ -3126,7 +3143,7 @@ func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, bas } // setGaugeForDiskStats proxies metrics for disk specific statistics -func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { +func (c *Client) setGaugeForDiskStats(nodeID string, hStats *hoststats.HostStats, baseLabels []metrics.Label) { labels := make([]metrics.Label, len(baseLabels)) copy(labels, baseLabels) @@ -3194,7 +3211,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics. } // No labels are required so we emit with only a key/value syntax -func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics.Label) { +func (c *Client) setGaugeForUptime(hStats *hoststats.HostStats, baseLabels []metrics.Label) { metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels) } diff --git a/client/client_test.go b/client/client_test.go index 53d801d52..2d835f07f 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -26,7 +26,6 @@ import ( trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/fingerprint" - "github.com/hashicorp/nomad/client/lib/cgutil" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" cstate "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/command/agent/consul" @@ -1008,18 +1007,13 @@ func TestClient_Init(t *testing.T) { config.Node = mock.Node() client := &Client{ - config: config, - logger: testlog.HCLogger(t), - cpusetManager: new(cgutil.NoopCpusetManager), + config: config, + logger: testlog.HCLogger(t), } - if err := client.init(); err != nil { - t.Fatalf("err: %s", err) - } - - if _, err := os.Stat(allocDir); err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, client.init()) + _, err := os.Stat(allocDir) + must.NoError(t, err) } func TestClient_BlockedAllocations(t *testing.T) { diff --git a/client/config/arconfig.go b/client/config/arconfig.go index 7496bc266..0441e164d 100644 --- a/client/config/arconfig.go +++ b/client/config/arconfig.go @@ -7,14 +7,12 @@ import ( "context" log "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/allocdir" arinterfaces "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/interfaces" - "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" "github.com/hashicorp/nomad/client/serviceregistration" @@ -90,9 +88,6 @@ type AllocRunnerConfig struct { // DriverManager handles dispensing of driver plugins DriverManager drivermanager.Manager - // CpusetManager configures the cpuset cgroup if supported by the platform - CpusetManager cgutil.CpusetManager - // ServersContactedCh is closed when the first GetClientAllocs call to // servers succeeds and allocs are synced. ServersContactedCh chan struct{} @@ -110,6 +105,9 @@ type AllocRunnerConfig struct { // Getter is an interface for retrieving artifacts. Getter interfaces.ArtifactGetter + + // Wranglers is an interface for managing unix/windows processes. + Wranglers interfaces.ProcessWranglers } // PrevAllocWatcher allows AllocRunners to wait for a previous allocation to diff --git a/client/config/config.go b/client/config/config.go index d4a1c474a..f494a9c29 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -15,11 +15,8 @@ import ( "github.com/hashicorp/consul-template/config" log "github.com/hashicorp/go-hclog" - "golang.org/x/exp/maps" - "golang.org/x/exp/slices" - "github.com/hashicorp/nomad/client/allocrunner/interfaces" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/command/agent/host" "github.com/hashicorp/nomad/helper/bufconndialer" @@ -29,6 +26,8 @@ import ( structsc "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/version" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" ) var ( @@ -299,7 +298,7 @@ type Config struct { CgroupParent string // ReservableCores if set overrides the set of reservable cores reported in fingerprinting. - ReservableCores []uint16 + ReservableCores []numalib.CoreID // NomadServiceDiscovery determines whether the Nomad native service // discovery client functionality is enabled. @@ -795,7 +794,7 @@ func DefaultConfig() *Config { CNIConfigDir: "/opt/cni/config", CNIInterfacePrefix: "eth", HostNetworks: map[string]*structs.ClientHostNetworkConfig{}, - CgroupParent: cgutil.GetCgroupParent(""), + CgroupParent: "nomad.slice", // SETH todo MaxDynamicPort: structs.DefaultMinDynamicPort, MinDynamicPort: structs.DefaultMaxDynamicPort, } diff --git a/client/fingerprint/cgroup.go b/client/fingerprint/cgroup.go index d3bdd72d3..17ff27384 100644 --- a/client/fingerprint/cgroup.go +++ b/client/fingerprint/cgroup.go @@ -4,87 +4,30 @@ package fingerprint import ( - "time" - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" ) -const ( - cgroupUnavailable = "unavailable" // "available" is over in cgroup_linux - - cgroupMountPointAttribute = "unique.cgroup.mountpoint" - cgroupVersionAttribute = "unique.cgroup.version" - - cgroupDetectInterval = 15 * time.Second -) - -type CGroupFingerprint struct { - logger hclog.Logger - lastState string - mountPointDetector MountPointDetector - versionDetector CgroupVersionDetector +type CgroupFingerprint struct { + StaticFingerprinter + logger hclog.Logger } -// MountPointDetector isolates calls to the cgroup library. -// -// This facilitates testing where we can implement fake mount points to test -// various code paths. -type MountPointDetector interface { - // MountPoint returns a cgroup mount-point. - // - // In v1, this is one arbitrary subsystem (e.g. /sys/fs/cgroup/cpu). - // - // In v2, this is the actual root mount point (i.e. /sys/fs/cgroup). - MountPoint() (string, error) -} - -// DefaultMountPointDetector implements the interface detector which calls the cgroups -// library directly -type DefaultMountPointDetector struct { -} - -// MountPoint calls out to the default cgroup library. -func (*DefaultMountPointDetector) MountPoint() (string, error) { - return cgutil.FindCgroupMountpointDir() -} - -// CgroupVersionDetector isolates calls to the cgroup library. -type CgroupVersionDetector interface { - // CgroupVersion returns v1 or v2 depending on the cgroups version in use. - CgroupVersion() string -} - -// DefaultCgroupVersionDetector implements the version detector which calls the -// cgroups library directly. -type DefaultCgroupVersionDetector struct { -} - -func (*DefaultCgroupVersionDetector) CgroupVersion() string { - if cgutil.UseV2 { - return "v2" - } - return "v1" -} - -// NewCGroupFingerprint returns a new cgroup fingerprinter -func NewCGroupFingerprint(logger hclog.Logger) Fingerprint { - return &CGroupFingerprint{ - logger: logger.Named("cgroup"), - lastState: cgroupUnavailable, - mountPointDetector: new(DefaultMountPointDetector), - versionDetector: new(DefaultCgroupVersionDetector), +func NewCgroupFingerprint(logger hclog.Logger) Fingerprint { + return &CgroupFingerprint{ + logger: logger.Named("cgroup"), } } -// clearCGroupAttributes clears any node attributes related to cgroups that might -// have been set in a previous fingerprint run. -func (f *CGroupFingerprint) clearCGroupAttributes(r *FingerprintResponse) { - r.RemoveAttribute(cgroupMountPointAttribute) - r.RemoveAttribute(cgroupVersionAttribute) -} - -// Periodic determines the interval at which the periodic fingerprinter will run. -func (f *CGroupFingerprint) Periodic() (bool, time.Duration) { - return true, cgroupDetectInterval +func (f *CgroupFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error { + const versionKey = "os.cgroups.version" + switch cgroupslib.GetMode() { + case cgroupslib.CG1: + response.AddAttribute(versionKey, "1") + f.logger.Debug("detected cgroups", "version", "1") + case cgroupslib.CG2: + response.AddAttribute(versionKey, "2") + f.logger.Debug("detected cgroups", "version", "2") + } + return nil } diff --git a/client/fingerprint/cgroup_default.go b/client/fingerprint/cgroup_default.go deleted file mode 100644 index 4d13af63b..000000000 --- a/client/fingerprint/cgroup_default.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build !linux - -package fingerprint - -func (f *CGroupFingerprint) Fingerprint(*FingerprintRequest, *FingerprintResponse) error { - return nil -} diff --git a/client/fingerprint/cgroup_linux.go b/client/fingerprint/cgroup_linux.go deleted file mode 100644 index 8a805b69a..000000000 --- a/client/fingerprint/cgroup_linux.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package fingerprint - -import ( - "fmt" -) - -const ( - cgroupAvailable = "available" -) - -// Fingerprint tries to find a valid cgroup mount point and the version of cgroups -// if a mount-point is present. -func (f *CGroupFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error { - mount, err := f.mountPointDetector.MountPoint() - if err != nil { - f.clearCGroupAttributes(resp) - return fmt.Errorf("failed to discover cgroup mount point: %s", err) - } - - // Check if a cgroup mount point was found. - if mount == "" { - f.clearCGroupAttributes(resp) - if f.lastState == cgroupAvailable { - f.logger.Warn("cgroups are now unavailable") - } - f.lastState = cgroupUnavailable - return nil - } - - // Check the version in use. - version := f.versionDetector.CgroupVersion() - - resp.AddAttribute(cgroupMountPointAttribute, mount) - resp.AddAttribute(cgroupVersionAttribute, version) - resp.Detected = true - - if f.lastState == cgroupUnavailable { - f.logger.Info("cgroups are available") - } - f.lastState = cgroupAvailable - return nil -} diff --git a/client/fingerprint/cgroup_test.go b/client/fingerprint/cgroup_test.go deleted file mode 100644 index 4039f195a..000000000 --- a/client/fingerprint/cgroup_test.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package fingerprint - -import ( - "fmt" - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/stretchr/testify/require" -) - -// A fake mount point detector that returns an empty path -type MountPointDetectorNoMountPoint struct{} - -func (m *MountPointDetectorNoMountPoint) MountPoint() (string, error) { - return "", nil -} - -// A fake mount point detector that returns an error -type MountPointDetectorMountPointFail struct{} - -func (m *MountPointDetectorMountPointFail) MountPoint() (string, error) { - return "", fmt.Errorf("cgroup mountpoint discovery failed") -} - -// A fake mount point detector that returns a valid path -type MountPointDetectorValidMountPoint struct{} - -func (m *MountPointDetectorValidMountPoint) MountPoint() (string, error) { - return "/sys/fs/cgroup", nil -} - -// A fake mount point detector that returns an empty path -type MountPointDetectorEmptyMountPoint struct{} - -func (m *MountPointDetectorEmptyMountPoint) MountPoint() (string, error) { - return "", nil -} - -// A fake version detector that returns the set version. -type FakeVersionDetector struct { - version string -} - -func (f *FakeVersionDetector) CgroupVersion() string { - return f.version -} - -func newRequest(node *structs.Node) *FingerprintRequest { - return &FingerprintRequest{ - Config: new(config.Config), - Node: node, - } -} - -func newNode() *structs.Node { - return &structs.Node{ - Attributes: make(map[string]string), - } -} - -func TestCgroup_MountPoint(t *testing.T) { - ci.Parallel(t) - - t.Run("mount-point fail", func(t *testing.T) { - f := &CGroupFingerprint{ - logger: testlog.HCLogger(t), - lastState: cgroupUnavailable, - mountPointDetector: new(MountPointDetectorMountPointFail), - versionDetector: new(DefaultCgroupVersionDetector), - } - - request := newRequest(newNode()) - var response FingerprintResponse - err := f.Fingerprint(request, &response) - require.EqualError(t, err, "failed to discover cgroup mount point: cgroup mountpoint discovery failed") - require.Empty(t, response.Attributes[cgroupMountPointAttribute]) - }) - - t.Run("mount-point available", func(t *testing.T) { - f := &CGroupFingerprint{ - logger: testlog.HCLogger(t), - lastState: cgroupUnavailable, - mountPointDetector: new(MountPointDetectorValidMountPoint), - versionDetector: new(DefaultCgroupVersionDetector), - } - - request := newRequest(newNode()) - var response FingerprintResponse - err := f.Fingerprint(request, &response) - require.NoError(t, err) - require.Equal(t, "/sys/fs/cgroup", response.Attributes[cgroupMountPointAttribute]) - }) - - t.Run("mount-point empty", func(t *testing.T) { - f := &CGroupFingerprint{ - logger: testlog.HCLogger(t), - lastState: cgroupUnavailable, - mountPointDetector: new(MountPointDetectorEmptyMountPoint), - versionDetector: new(DefaultCgroupVersionDetector), - } - - var response FingerprintResponse - err := f.Fingerprint(newRequest(newNode()), &response) - require.NoError(t, err) - require.Empty(t, response.Attributes[cgroupMountPointAttribute]) - }) - - t.Run("mount-point already present", func(t *testing.T) { - f := &CGroupFingerprint{ - logger: testlog.HCLogger(t), - lastState: cgroupAvailable, - mountPointDetector: new(MountPointDetectorValidMountPoint), - versionDetector: new(DefaultCgroupVersionDetector), - } - - var response FingerprintResponse - err := f.Fingerprint(newRequest(newNode()), &response) - require.NoError(t, err) - require.Equal(t, "/sys/fs/cgroup", response.Attributes[cgroupMountPointAttribute]) - }) -} - -func TestCgroup_Version(t *testing.T) { - t.Run("version v1", func(t *testing.T) { - f := &CGroupFingerprint{ - logger: testlog.HCLogger(t), - lastState: cgroupUnavailable, - mountPointDetector: new(MountPointDetectorValidMountPoint), - versionDetector: &FakeVersionDetector{version: "v1"}, - } - - var response FingerprintResponse - err := f.Fingerprint(newRequest(newNode()), &response) - require.NoError(t, err) - require.Equal(t, "v1", response.Attributes[cgroupVersionAttribute]) - }) - - t.Run("without mount-point", func(t *testing.T) { - f := &CGroupFingerprint{ - logger: testlog.HCLogger(t), - lastState: cgroupUnavailable, - mountPointDetector: new(MountPointDetectorEmptyMountPoint), - versionDetector: &FakeVersionDetector{version: "v1"}, - } - - var response FingerprintResponse - err := f.Fingerprint(newRequest(newNode()), &response) - require.NoError(t, err) - require.Empty(t, response.Attributes[cgroupMountPointAttribute]) - }) -} diff --git a/client/fingerprint/cpu.go b/client/fingerprint/cpu.go index ca3a9099c..e11fc8c03 100644 --- a/client/fingerprint/cpu.go +++ b/client/fingerprint/cpu.go @@ -5,35 +5,29 @@ package fingerprint import ( "fmt" + "runtime" "strconv" - "github.com/hashicorp/nomad/lib/cpuset" - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/stats" - shelpers "github.com/hashicorp/nomad/helper/stats" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" -) - -const ( - // defaultCPUTicks is the default amount of CPU resources assumed to be - // available if the CPU performance data is unable to be detected. This is - // common on EC2 instances, where the env_aws fingerprinter will follow up, - // setting an accurate value. - defaultCPUTicks = 1000 // 1 core * 1 GHz + "github.com/klauspost/cpuid/v2" ) // CPUFingerprint is used to fingerprint the CPU type CPUFingerprint struct { StaticFingerprinter logger hclog.Logger + top *numalib.Topology // accumulates result in these resource structs resources *structs.Resources nodeResources *structs.NodeResources } -// NewCPUFingerprint is used to create a CPU fingerprint +// NewCPUFingerprint is used to create a CPU fingerprint. func NewCPUFingerprint(logger hclog.Logger) Fingerprint { return &CPUFingerprint{ logger: logger.Named("cpu"), @@ -51,45 +45,81 @@ func (f *CPUFingerprint) Fingerprint(request *FingerprintRequest, response *Fing f.setCoreCount(response) - f.setReservableCores(request, response) + f.setReservableCores(response) - f.setTotalCompute(request, response) + f.setTotalCompute(response) + + f.setNUMA(response) f.setResponseResources(response) + // indicate we successfully detected the system cpu / memory configuration response.Detected = true + // pass the topology back up to the client + response.UpdateInitialResult = func(ir *InitialResult) { + ir.Topology = f.top + } + return nil } -func (f *CPUFingerprint) initialize(request *FingerprintRequest) { - if err := stats.Init(uint64(request.Config.CpuCompute)); err != nil { - f.logger.Warn("failed initializing stats collector", "error", err) +func (*CPUFingerprint) reservedCompute(request *FingerprintRequest) structs.NodeReservedCpuResources { + switch { + case request.Config.Node == nil: + return structs.NodeReservedCpuResources{} + case request.Config.Node.ReservedResources == nil: + return structs.NodeReservedCpuResources{} + default: + return request.Config.Node.ReservedResources.Cpu } } +func (f *CPUFingerprint) initialize(request *FingerprintRequest) { + var ( + reservableCores *idset.Set[numalib.CoreID] + totalCompute = request.Config.CpuCompute + reservedCompute = f.reservedCompute(request) + reservedCores = idset.From[numalib.CoreID](reservedCompute.ReservedCpuCores) + ) + + if rc := request.Config.ReservableCores; rc != nil { + reservableCores = idset.From[numalib.CoreID](rc) + } + + f.top = numalib.Scan(append( + numalib.PlatformScanners(), + &numalib.ConfigScanner{ + ReservableCores: reservableCores, + ReservedCores: reservedCores, + TotalCompute: numalib.MHz(totalCompute), + ReservedCompute: numalib.MHz(reservedCompute.CpuShares), + }, + )) +} + func (f *CPUFingerprint) setModelName(response *FingerprintResponse) { - if modelName := stats.CPUModelName(); modelName != "" { - response.AddAttribute("cpu.modelname", modelName) - f.logger.Debug("detected CPU model", "name", modelName) + if model := cpuid.CPU.BrandName; model != "" { + response.AddAttribute("cpu.modelname", model) + f.logger.Debug("detected CPU model", "name", model) } } -func (*CPUFingerprint) frequency(mhz uint64) string { - return fmt.Sprintf("%.0f", float64(mhz)) +func (*CPUFingerprint) frequency(mhz numalib.MHz) string { + return strconv.FormatUint(uint64(mhz), 10) } func (f *CPUFingerprint) setFrequency(response *FingerprintResponse) { - power, efficiency := stats.CPUMHzPerCore() + performance, efficiency := f.top.CoreSpeeds() switch { case efficiency > 0: response.AddAttribute("cpu.frequency.efficiency", f.frequency(efficiency)) - response.AddAttribute("cpu.frequency.power", f.frequency(power)) + response.AddAttribute("cpu.frequency.performance", f.frequency(performance)) f.logger.Debug("detected CPU efficiency core speed", "mhz", efficiency) - f.logger.Debug("detected CPU power core speed", "mhz", power) - case power > 0: - response.AddAttribute("cpu.frequency", f.frequency(power)) - f.logger.Debug("detected CPU frequency", "mhz", power) + f.logger.Debug("detected CPU performance core speed", "mhz", performance) + case performance > 0: + response.AddAttribute("cpu.frequency", f.frequency(performance)) + f.logger.Debug("detected CPU frequency", "mhz", performance) } } @@ -97,55 +127,72 @@ func (*CPUFingerprint) cores(count int) string { return strconv.Itoa(count) } +func (*CPUFingerprint) nodes(count int) string { + return strconv.Itoa(count) +} + func (f *CPUFingerprint) setCoreCount(response *FingerprintResponse) { - power, efficiency := stats.CPUNumCores() + total := f.top.NumCores() + performance := f.top.NumPCores() + efficiency := f.top.NumECores() switch { case efficiency > 0: response.AddAttribute("cpu.numcores.efficiency", f.cores(efficiency)) - response.AddAttribute("cpu.numcores.power", f.cores(power)) + response.AddAttribute("cpu.numcores.performance", f.cores(performance)) + response.AddAttribute("cpu.numcores", f.cores(total)) f.logger.Debug("detected CPU efficiency core count", "cores", efficiency) - f.logger.Debug("detected CPU power core count", "cores", power) - case power > 0: - response.AddAttribute("cpu.numcores", f.cores(power)) - f.logger.Debug("detected CPU core count", power) - } - f.nodeResources.Cpu.TotalCpuCores = uint16(power + efficiency) -} - -func (f *CPUFingerprint) setReservableCores(request *FingerprintRequest, response *FingerprintResponse) { - reservable := request.Config.ReservableCores - if len(reservable) > 0 { - f.logger.Debug("reservable cores set by config", "cpuset", reservable) - } else { - cgroupParent := request.Config.CgroupParent - if reservable = f.deriveReservableCores(cgroupParent); reservable != nil { - if request.Node.ReservedResources != nil { - forNode := request.Node.ReservedResources.Cpu.ReservedCpuCores - reservable = cpuset.New(reservable...).Difference(cpuset.New(forNode...)).ToSlice() - f.logger.Debug("client configuration reserves these cores for node", "cores", forNode) - } - f.logger.Debug("set of reservable cores available for tasks", "cores", reservable) - } - } - - response.AddAttribute("cpu.reservablecores", strconv.Itoa(len(reservable))) - f.nodeResources.Cpu.ReservableCpuCores = reservable -} - -func (f *CPUFingerprint) setTotalCompute(request *FingerprintRequest, response *FingerprintResponse) { - var ticks uint64 - switch { - case shelpers.CpuTotalTicks() > 0: - ticks = shelpers.CpuTotalTicks() + f.logger.Debug("detected CPU performance core count", "cores", performance) + f.logger.Debug("detected CPU core count", "cores", total) default: - ticks = defaultCPUTicks + response.AddAttribute("cpu.numcores", f.cores(total)) + f.logger.Debug("detected CPU core count", "cores", total) } - response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks)) - f.resources.CPU = int(ticks) - f.nodeResources.Cpu.CpuShares = int64(ticks) + f.nodeResources.Cpu.TotalCpuCores = uint16(total) +} + +func (f *CPUFingerprint) setReservableCores(response *FingerprintResponse) { + switch runtime.GOOS { + case "linux": + // topology has already reduced to the intersection of usable cores + usable := f.top.UsableCores() + response.AddAttribute("cpu.reservablecores", f.cores(usable.Size())) + f.nodeResources.Cpu.ReservableCpuCores = helper.ConvertSlice( + usable.Slice(), func(id numalib.CoreID) uint16 { + return uint16(id) + }) + default: + response.AddAttribute("cpu.reservablecores", "0") + } +} + +func (f *CPUFingerprint) setTotalCompute(response *FingerprintResponse) { + totalCompute := f.top.TotalCompute() + usableCompute := f.top.UsableCompute() + + response.AddAttribute("cpu.totalcompute", f.frequency(totalCompute)) + response.AddAttribute("cpu.usablecompute", f.frequency(usableCompute)) + + f.resources.CPU = int(totalCompute) + f.nodeResources.Cpu.CpuShares = int64(totalCompute) } func (f *CPUFingerprint) setResponseResources(response *FingerprintResponse) { response.Resources = f.resources response.NodeResources = f.nodeResources } + +func (f *CPUFingerprint) setNUMA(response *FingerprintResponse) { + if !f.top.SupportsNUMA() { + return + } + + nodes := f.top.Nodes() + response.AddAttribute("numa.node.count", f.nodes(nodes.Size())) + + nodes.ForEach(func(id numalib.NodeID) error { + key := fmt.Sprintf("numa.node%d.cores", id) + cores := f.top.NodeCores(id) + response.AddAttribute(key, cores.String()) + return nil + }) +} diff --git a/client/fingerprint/cpu_default.go b/client/fingerprint/cpu_default.go deleted file mode 100644 index 118fb8eb4..000000000 --- a/client/fingerprint/cpu_default.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build !linux - -package fingerprint - -func (_ *CPUFingerprint) deriveReservableCores(string) []uint16 { - return nil -} diff --git a/client/fingerprint/cpu_default_test.go b/client/fingerprint/cpu_default_test.go index e1a9593f5..8616f9236 100644 --- a/client/fingerprint/cpu_default_test.go +++ b/client/fingerprint/cpu_default_test.go @@ -11,7 +11,8 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" @@ -22,10 +23,6 @@ func TestCPUFingerprint_Classic(t *testing.T) { logger := testlog.HCLogger(t) - // create cpuset manager so we can ensure cgroup tree is correct - mgr := cgutil.CreateCPUSetManager("", nil, logger) - mgr.Init() - // create the fingerprinter f := NewCPUFingerprint(logger) node := &structs.Node{Attributes: make(map[string]string)} @@ -42,29 +39,31 @@ func TestCPUFingerprint_Classic(t *testing.T) { must.NotNil(t, attributes) must.MapContainsKey(t, attributes, "cpu.numcores") must.MapContainsKey(t, attributes, "cpu.modelname") - must.MapContainsKey(t, attributes, "cpu.frequency") must.MapContainsKey(t, attributes, "cpu.totalcompute") must.Positive(t, response.Resources.CPU) must.Positive(t, response.NodeResources.Cpu.CpuShares) must.Positive(t, response.NodeResources.Cpu.SharesPerCore()) must.SliceNotEmpty(t, response.NodeResources.Cpu.ReservableCpuCores) - // asymetric core detection currently only works with apple silicon - must.MapNotContainsKey(t, attributes, "cpu.numcores.power") - must.MapNotContainsKey(t, attributes, "cpu.numcores.efficiency") + _, frequencyPresent := attributes["cpu.frequency"] + _, performancePresent := attributes["cpu.frequency.performance"] + _, efficiencyPresent := attributes["cpu.frequency.efficiency"] + ok := frequencyPresent || (performancePresent && efficiencyPresent) + must.True(t, ok, must.Sprint("expected cpu.frequency or cpu.frequency.performance and cpu.frequency.efficiency")) } // TestCPUFingerprint_OverrideCompute asserts that setting cpu_total_compute in // the client config overrides the detected CPU freq (if any). func TestCPUFingerprint_OverrideCompute(t *testing.T) { ci.Parallel(t) + testutil.MinimumCores(t, 4) f := NewCPUFingerprint(testlog.HCLogger(t)) node := &structs.Node{ Attributes: make(map[string]string), } cfg := &config.Config{ - ReservableCores: []uint16{0, 1, 2}, + ReservableCores: []numalib.CoreID{0, 1, 2}, } var originalCPU int @@ -72,22 +71,11 @@ func TestCPUFingerprint_OverrideCompute(t *testing.T) { request := &FingerprintRequest{Config: cfg, Node: node} var response FingerprintResponse err := f.Fingerprint(request, &response) - if err != nil { - t.Fatalf("err: %v", err) - } - - if !response.Detected { - t.Fatalf("expected response to be applicable") - } - - if attr := response.Attributes["cpu.reservablecores"]; attr != "3" { - t.Fatalf("expected cpu.reservablecores == 3 but found %s", attr) - } - - if response.Resources.CPU == 0 { - t.Fatalf("expected fingerprint of cpu of but found 0") - } + must.NoError(t, err) + must.True(t, response.Detected) + must.Eq(t, "3", response.Attributes["cpu.reservablecores"], must.Sprint("override of cpu.reservablecores is incorrect")) + must.Positive(t, response.Resources.CPU) originalCPU = response.Resources.CPU } @@ -99,23 +87,12 @@ func TestCPUFingerprint_OverrideCompute(t *testing.T) { request := &FingerprintRequest{Config: cfg, Node: node} var response FingerprintResponse err := f.Fingerprint(request, &response) - if err != nil { - t.Fatalf("err: %v", err) - } + must.NoError(t, err) // COMPAT(0.10): Remove in 0.10 - if response.Resources.CPU != cfg.CpuCompute { - t.Fatalf("expected override cpu of %d but found %d", cfg.CpuCompute, response.Resources.CPU) - } - if response.NodeResources.Cpu.CpuShares != int64(cfg.CpuCompute) { - t.Fatalf("expected override cpu of %d but found %d", cfg.CpuCompute, response.NodeResources.Cpu.CpuShares) - } - if response.Attributes["cpu.totalcompute"] != strconv.Itoa(cfg.CpuCompute) { - t.Fatalf("expected override cpu.totalcompute of %d but found %s", cfg.CpuCompute, response.Attributes["cpu.totalcompute"]) - } - - if attr := response.Attributes["cpu.reservablecores"]; attr != "3" { - t.Fatalf("expected cpu.reservablecores == 3 but found %s", attr) - } + must.Eq(t, cfg.CpuCompute, response.Resources.CPU, must.Sprint("cpu override did not take affect")) + must.Eq(t, int64(cfg.CpuCompute), response.NodeResources.Cpu.CpuShares, must.Sprint("cpu override did not take affect")) + must.Eq(t, strconv.Itoa(cfg.CpuCompute), response.Attributes["cpu.totalcompute"], must.Sprint("cpu override did not take affect")) + must.Eq(t, "3", response.Attributes["cpu.reservablecores"], must.Sprint("cpu override did not take affect")) } } diff --git a/client/fingerprint/cpu_linux.go b/client/fingerprint/cpu_linux.go deleted file mode 100644 index b7795693a..000000000 --- a/client/fingerprint/cpu_linux.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package fingerprint - -import ( - "github.com/hashicorp/nomad/client/lib/cgutil" -) - -func (f *CPUFingerprint) deriveReservableCores(cgroupParent string) []uint16 { - // The cpuset cgroup manager is initialized (on linux), but not accessible - // from the finger-printer. So we reach in and grab the information manually. - // We may assume the hierarchy is already setup. - cpuset, err := cgutil.GetCPUsFromCgroup(cgroupParent) - if err != nil { - f.logger.Warn("failed to detect set of reservable cores", "error", err) - return nil - } - return cpuset -} diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 9b75eff7b..9382dfd82 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -16,10 +16,8 @@ import ( "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" - log "github.com/hashicorp/go-hclog" - cleanhttp "github.com/hashicorp/go-cleanhttp" - "github.com/hashicorp/nomad/helper/stats" + log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" ) @@ -135,13 +133,11 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F } // accumulate resource information, then assign to response - var resources *structs.Resources - var nodeResources *structs.NodeResources + nodeResources := new(structs.NodeResources) // copy over network specific information if val, ok := response.Attributes["unique.platform.aws.local-ipv4"]; ok && val != "" { response.AddAttribute("unique.network.ip-address", val) - nodeResources = new(structs.NodeResources) nodeResources.Networks = []*structs.NetworkResource{ { Mode: "host", @@ -177,30 +173,6 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F } } - // copy over CPU speed information - if specs := f.lookupCPU(ec2meta); specs != nil { - response.AddAttribute("cpu.frequency", fmt.Sprintf("%d", specs.MHz)) - response.AddAttribute("cpu.numcores", fmt.Sprintf("%d", specs.Cores)) - f.logger.Debug("lookup ec2 cpu", "cores", specs.Cores, "ghz", log.Fmt("%.1f", specs.GHz())) - - if ticks := specs.Ticks(); request.Config.CpuCompute <= 0 { - response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks)) - f.logger.Debug("setting ec2 cpu", "ticks", ticks) - stats.SetCpuTotalTicks(uint64(ticks)) - resources = new(structs.Resources) - resources.CPU = ticks - if nodeResources == nil { - nodeResources = new(structs.NodeResources) - } - nodeResources.Cpu = structs.NodeCpuResources{CpuShares: int64(ticks)} - } else { - response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", request.Config.CpuCompute)) - } - } else { - f.logger.Warn("failed to find the cpu specification for this instance type") - } - - response.Resources = resources response.NodeResources = nodeResources // populate Links @@ -220,15 +192,6 @@ func (f *EnvAWSFingerprint) instanceType(ec2meta *ec2metadata.EC2Metadata) (stri return strings.TrimSpace(response), nil } -func (f *EnvAWSFingerprint) lookupCPU(ec2meta *ec2metadata.EC2Metadata) *CPU { - instanceType, err := f.instanceType(ec2meta) - if err != nil { - f.logger.Warn("failed to read EC2 metadata instance-type", "error", err) - return nil - } - return LookupEC2CPU(instanceType) -} - func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2metadata.EC2Metadata, ip string) int { throughput := request.Config.NetworkSpeed if throughput != 0 { diff --git a/client/fingerprint/env_aws_cpu.go b/client/fingerprint/env_aws_cpu.go deleted file mode 100644 index e76f4b572..000000000 --- a/client/fingerprint/env_aws_cpu.go +++ /dev/null @@ -1,688 +0,0 @@ -// Code generated from hashicorp/nomad/tools/ec2info; DO NOT EDIT. - -package fingerprint - -// CPU contains virtual core count and processor baseline performance. -type CPU struct { - // use small units to reduce size of the embedded table - Cores uint32 // good for 4 billion cores - MHz uint32 // good for 4 billion MHz -} - -// Ticks computes the total number of cycles available across the virtual -// cores of a CPU. -func (c CPU) Ticks() int { - return int(c.MHz) * int(c.Cores) -} - -// GHz returns the speed of CPU in ghz. -func (c CPU) GHz() float64 { - return float64(c.MHz) / 1000.0 -} - -// newCPU create a CPUSpecs from the given virtual core count and core speed. -func newCPU(cores uint32, ghz float64) CPU { - return CPU{ - Cores: cores, - MHz: uint32(ghz * 1000), - } -} - -// LookupEC2CPU returns the virtual core count and core speed information from a -// lookup table generated from the Amazon EC2 API. -// -// If the instance type does not exist, nil is returned. -func LookupEC2CPU(instanceType string) *CPU { - specs, exists := instanceTypeCPU[instanceType] - if !exists { - return nil - } - return &specs -} - -var instanceTypeCPU = map[string]CPU{ - - "a1.2xlarge": newCPU(8, 2.3), - "a1.4xlarge": newCPU(16, 2.3), - "a1.large": newCPU(2, 2.3), - "a1.medium": newCPU(1, 2.3), - "a1.metal": newCPU(16, 2.3), - "a1.xlarge": newCPU(4, 2.3), - "c3.2xlarge": newCPU(8, 2.8), - "c3.4xlarge": newCPU(16, 2.8), - "c3.8xlarge": newCPU(32, 2.8), - "c3.large": newCPU(2, 2.8), - "c3.xlarge": newCPU(4, 2.8), - "c4.2xlarge": newCPU(8, 2.9), - "c4.4xlarge": newCPU(16, 2.9), - "c4.8xlarge": newCPU(36, 2.9), - "c4.large": newCPU(2, 2.9), - "c4.xlarge": newCPU(4, 2.9), - "c5.12xlarge": newCPU(48, 3.6), - "c5.18xlarge": newCPU(72, 3.4), - "c5.24xlarge": newCPU(96, 3.6), - "c5.2xlarge": newCPU(8, 3.4), - "c5.4xlarge": newCPU(16, 3.4), - "c5.9xlarge": newCPU(36, 3.4), - "c5.large": newCPU(2, 3.4), - "c5.metal": newCPU(96, 3.6), - "c5.xlarge": newCPU(4, 3.4), - "c5a.12xlarge": newCPU(48, 3.3), - "c5a.16xlarge": newCPU(64, 3.3), - "c5a.24xlarge": newCPU(96, 3.3), - "c5a.2xlarge": newCPU(8, 3.3), - "c5a.4xlarge": newCPU(16, 3.3), - "c5a.8xlarge": newCPU(32, 3.3), - "c5a.large": newCPU(2, 3.3), - "c5a.xlarge": newCPU(4, 3.3), - "c5ad.12xlarge": newCPU(48, 3.3), - "c5ad.16xlarge": newCPU(64, 3.3), - "c5ad.24xlarge": newCPU(96, 3.3), - "c5ad.2xlarge": newCPU(8, 3.3), - "c5ad.4xlarge": newCPU(16, 3.3), - "c5ad.8xlarge": newCPU(32, 3.3), - "c5ad.large": newCPU(2, 3.3), - "c5ad.xlarge": newCPU(4, 3.3), - "c5d.12xlarge": newCPU(48, 3.6), - "c5d.18xlarge": newCPU(72, 3.4), - "c5d.24xlarge": newCPU(96, 3.6), - "c5d.2xlarge": newCPU(8, 3.4), - "c5d.4xlarge": newCPU(16, 3.4), - "c5d.9xlarge": newCPU(36, 3.4), - "c5d.large": newCPU(2, 3.4), - "c5d.metal": newCPU(96, 3.6), - "c5d.xlarge": newCPU(4, 3.4), - "c5n.18xlarge": newCPU(72, 3.4), - "c5n.2xlarge": newCPU(8, 3.4), - "c5n.4xlarge": newCPU(16, 3.4), - "c5n.9xlarge": newCPU(36, 3.4), - "c5n.large": newCPU(2, 3.4), - "c5n.metal": newCPU(72, 3.4), - "c5n.xlarge": newCPU(4, 3.4), - "c6a.12xlarge": newCPU(48, 3.6), - "c6a.16xlarge": newCPU(64, 3.6), - "c6a.24xlarge": newCPU(96, 3.6), - "c6a.2xlarge": newCPU(8, 3.6), - "c6a.32xlarge": newCPU(128, 3.6), - "c6a.48xlarge": newCPU(192, 3.6), - "c6a.4xlarge": newCPU(16, 3.6), - "c6a.8xlarge": newCPU(32, 3.6), - "c6a.large": newCPU(2, 3.6), - "c6a.metal": newCPU(192, 3.6), - "c6a.xlarge": newCPU(4, 3.6), - "c6g.12xlarge": newCPU(48, 2.5), - "c6g.16xlarge": newCPU(64, 2.5), - "c6g.2xlarge": newCPU(8, 2.5), - "c6g.4xlarge": newCPU(16, 2.5), - "c6g.8xlarge": newCPU(32, 2.5), - "c6g.large": newCPU(2, 2.5), - "c6g.medium": newCPU(1, 2.5), - "c6g.metal": newCPU(64, 2.5), - "c6g.xlarge": newCPU(4, 2.5), - "c6gd.12xlarge": newCPU(48, 2.5), - "c6gd.16xlarge": newCPU(64, 2.5), - "c6gd.2xlarge": newCPU(8, 2.5), - "c6gd.4xlarge": newCPU(16, 2.5), - "c6gd.8xlarge": newCPU(32, 2.5), - "c6gd.large": newCPU(2, 2.5), - "c6gd.medium": newCPU(1, 2.5), - "c6gd.metal": newCPU(64, 2.5), - "c6gd.xlarge": newCPU(4, 2.5), - "c6gn.12xlarge": newCPU(48, 2.5), - "c6gn.16xlarge": newCPU(64, 2.5), - "c6gn.2xlarge": newCPU(8, 2.5), - "c6gn.4xlarge": newCPU(16, 2.5), - "c6gn.8xlarge": newCPU(32, 2.5), - "c6gn.large": newCPU(2, 2.5), - "c6gn.medium": newCPU(1, 2.5), - "c6gn.xlarge": newCPU(4, 2.5), - "c6i.12xlarge": newCPU(48, 3.5), - "c6i.16xlarge": newCPU(64, 3.5), - "c6i.24xlarge": newCPU(96, 3.5), - "c6i.2xlarge": newCPU(8, 3.5), - "c6i.32xlarge": newCPU(128, 3.5), - "c6i.4xlarge": newCPU(16, 3.5), - "c6i.8xlarge": newCPU(32, 3.5), - "c6i.large": newCPU(2, 3.5), - "c6i.metal": newCPU(128, 3.5), - "c6i.xlarge": newCPU(4, 3.5), - "c6id.12xlarge": newCPU(48, 3.5), - "c6id.16xlarge": newCPU(64, 3.5), - "c6id.24xlarge": newCPU(96, 3.5), - "c6id.2xlarge": newCPU(8, 3.5), - "c6id.32xlarge": newCPU(128, 3.5), - "c6id.4xlarge": newCPU(16, 3.5), - "c6id.8xlarge": newCPU(32, 3.5), - "c6id.large": newCPU(2, 3.5), - "c6id.metal": newCPU(128, 3.5), - "c6id.xlarge": newCPU(4, 3.5), - "c6in.12xlarge": newCPU(48, 3.5), - "c6in.16xlarge": newCPU(64, 3.5), - "c6in.24xlarge": newCPU(96, 3.5), - "c6in.2xlarge": newCPU(8, 3.5), - "c6in.32xlarge": newCPU(128, 3.5), - "c6in.4xlarge": newCPU(16, 3.5), - "c6in.8xlarge": newCPU(32, 3.5), - "c6in.large": newCPU(2, 3.5), - "c6in.metal": newCPU(128, 3.5), - "c6in.xlarge": newCPU(4, 3.5), - "c7g.12xlarge": newCPU(48, 2.6), - "c7g.16xlarge": newCPU(64, 2.6), - "c7g.2xlarge": newCPU(8, 2.6), - "c7g.4xlarge": newCPU(16, 2.6), - "c7g.8xlarge": newCPU(32, 2.6), - "c7g.large": newCPU(2, 2.6), - "c7g.medium": newCPU(1, 2.6), - "c7g.metal": newCPU(64, 2.6), - "c7g.xlarge": newCPU(4, 2.6), - "c7gn.12xlarge": newCPU(48, 2.6), - "c7gn.16xlarge": newCPU(64, 2.6), - "c7gn.2xlarge": newCPU(8, 2.6), - "c7gn.4xlarge": newCPU(16, 2.6), - "c7gn.8xlarge": newCPU(32, 2.6), - "c7gn.large": newCPU(2, 2.6), - "c7gn.medium": newCPU(1, 2.6), - "c7gn.xlarge": newCPU(4, 2.6), - "d2.2xlarge": newCPU(8, 2.4), - "d2.4xlarge": newCPU(16, 2.4), - "d2.8xlarge": newCPU(36, 2.4), - "d2.xlarge": newCPU(4, 2.4), - "d3.2xlarge": newCPU(8, 3.1), - "d3.4xlarge": newCPU(16, 3.1), - "d3.8xlarge": newCPU(32, 3.1), - "d3.xlarge": newCPU(4, 3.1), - "d3en.12xlarge": newCPU(48, 3.1), - "d3en.2xlarge": newCPU(8, 3.1), - "d3en.4xlarge": newCPU(16, 3.1), - "d3en.6xlarge": newCPU(24, 3.1), - "d3en.8xlarge": newCPU(32, 3.1), - "d3en.xlarge": newCPU(4, 3.1), - "dl1.24xlarge": newCPU(96, 3), - "f1.16xlarge": newCPU(64, 2.3), - "f1.2xlarge": newCPU(8, 2.3), - "f1.4xlarge": newCPU(16, 2.3), - "g2.2xlarge": newCPU(8, 2.6), - "g2.8xlarge": newCPU(32, 2.6), - "g3.16xlarge": newCPU(64, 2.3), - "g3.4xlarge": newCPU(16, 2.7), - "g3.8xlarge": newCPU(32, 2.7), - "g3s.xlarge": newCPU(4, 2.7), - "g4ad.16xlarge": newCPU(64, 3), - "g4ad.2xlarge": newCPU(8, 3), - "g4ad.4xlarge": newCPU(16, 3), - "g4ad.8xlarge": newCPU(32, 3), - "g4ad.xlarge": newCPU(4, 3), - "g4dn.12xlarge": newCPU(48, 2.5), - "g4dn.16xlarge": newCPU(64, 2.5), - "g4dn.2xlarge": newCPU(8, 2.5), - "g4dn.4xlarge": newCPU(16, 2.5), - "g4dn.8xlarge": newCPU(32, 2.5), - "g4dn.metal": newCPU(96, 2.5), - "g4dn.xlarge": newCPU(4, 2.5), - "g5.12xlarge": newCPU(48, 3.3), - "g5.16xlarge": newCPU(64, 3.3), - "g5.24xlarge": newCPU(96, 3.3), - "g5.2xlarge": newCPU(8, 3.3), - "g5.48xlarge": newCPU(192, 3.3), - "g5.4xlarge": newCPU(16, 3.3), - "g5.8xlarge": newCPU(32, 3.3), - "g5.xlarge": newCPU(4, 3.3), - "g5g.16xlarge": newCPU(64, 2.5), - "g5g.2xlarge": newCPU(8, 2.5), - "g5g.4xlarge": newCPU(16, 2.5), - "g5g.8xlarge": newCPU(32, 2.5), - "g5g.metal": newCPU(64, 2.5), - "g5g.xlarge": newCPU(4, 2.5), - "h1.16xlarge": newCPU(64, 2.3), - "h1.2xlarge": newCPU(8, 2.3), - "h1.4xlarge": newCPU(16, 2.3), - "h1.8xlarge": newCPU(32, 2.3), - "hpc6a.48xlarge": newCPU(96, 3.6), - "hpc6id.32xlarge": newCPU(64, 3.5), - "hpc7g.16xlarge": newCPU(64, 2.6), - "hpc7g.4xlarge": newCPU(16, 2.6), - "hpc7g.8xlarge": newCPU(32, 2.6), - "i2.2xlarge": newCPU(8, 2.5), - "i2.4xlarge": newCPU(16, 2.5), - "i2.8xlarge": newCPU(32, 2.5), - "i2.xlarge": newCPU(4, 2.5), - "i3.16xlarge": newCPU(64, 2.3), - "i3.2xlarge": newCPU(8, 2.3), - "i3.4xlarge": newCPU(16, 2.3), - "i3.8xlarge": newCPU(32, 2.3), - "i3.large": newCPU(2, 2.3), - "i3.metal": newCPU(72, 2.3), - "i3.xlarge": newCPU(4, 2.3), - "i3en.12xlarge": newCPU(48, 3.1), - "i3en.24xlarge": newCPU(96, 3.1), - "i3en.2xlarge": newCPU(8, 3.1), - "i3en.3xlarge": newCPU(12, 3.1), - "i3en.6xlarge": newCPU(24, 3.1), - "i3en.large": newCPU(2, 3.1), - "i3en.metal": newCPU(96, 3.1), - "i3en.xlarge": newCPU(4, 3.1), - "i4g.16xlarge": newCPU(64, 2.5), - "i4g.2xlarge": newCPU(8, 2.5), - "i4g.4xlarge": newCPU(16, 2.5), - "i4g.8xlarge": newCPU(32, 2.5), - "i4g.large": newCPU(2, 2.5), - "i4g.xlarge": newCPU(4, 2.5), - "i4i.16xlarge": newCPU(64, 3.5), - "i4i.2xlarge": newCPU(8, 3.5), - "i4i.32xlarge": newCPU(128, 3.5), - "i4i.4xlarge": newCPU(16, 3.5), - "i4i.8xlarge": newCPU(32, 3.5), - "i4i.large": newCPU(2, 3.5), - "i4i.metal": newCPU(128, 3.5), - "i4i.xlarge": newCPU(4, 3.5), - "im4gn.16xlarge": newCPU(64, 2.5), - "im4gn.2xlarge": newCPU(8, 2.5), - "im4gn.4xlarge": newCPU(16, 2.5), - "im4gn.8xlarge": newCPU(32, 2.5), - "im4gn.large": newCPU(2, 2.5), - "im4gn.xlarge": newCPU(4, 2.5), - "inf1.24xlarge": newCPU(96, 2.5), - "inf1.2xlarge": newCPU(8, 2.5), - "inf1.6xlarge": newCPU(24, 2.5), - "inf1.xlarge": newCPU(4, 2.5), - "inf2.24xlarge": newCPU(96, 3.6), - "inf2.48xlarge": newCPU(192, 3.6), - "inf2.8xlarge": newCPU(32, 3.6), - "inf2.xlarge": newCPU(4, 3.6), - "is4gen.2xlarge": newCPU(8, 2.5), - "is4gen.4xlarge": newCPU(16, 2.5), - "is4gen.8xlarge": newCPU(32, 2.5), - "is4gen.large": newCPU(2, 2.5), - "is4gen.medium": newCPU(1, 2.5), - "is4gen.xlarge": newCPU(4, 2.5), - "m3.2xlarge": newCPU(8, 2.5), - "m3.large": newCPU(2, 2.5), - "m3.medium": newCPU(1, 2.5), - "m3.xlarge": newCPU(4, 2.5), - "m4.10xlarge": newCPU(40, 2.4), - "m4.16xlarge": newCPU(64, 2.3), - "m4.2xlarge": newCPU(8, 2.4), - "m4.4xlarge": newCPU(16, 2.4), - "m4.large": newCPU(2, 2.4), - "m4.xlarge": newCPU(4, 2.4), - "m5.12xlarge": newCPU(48, 3.1), - "m5.16xlarge": newCPU(64, 3.1), - "m5.24xlarge": newCPU(96, 3.1), - "m5.2xlarge": newCPU(8, 3.1), - "m5.4xlarge": newCPU(16, 3.1), - "m5.8xlarge": newCPU(32, 3.1), - "m5.large": newCPU(2, 3.1), - "m5.metal": newCPU(96, 3.1), - "m5.xlarge": newCPU(4, 3.1), - "m5a.12xlarge": newCPU(48, 2.5), - "m5a.16xlarge": newCPU(64, 2.5), - "m5a.24xlarge": newCPU(96, 2.5), - "m5a.2xlarge": newCPU(8, 2.5), - "m5a.4xlarge": newCPU(16, 2.5), - "m5a.8xlarge": newCPU(32, 2.5), - "m5a.large": newCPU(2, 2.5), - "m5a.xlarge": newCPU(4, 2.5), - "m5ad.12xlarge": newCPU(48, 2.2), - "m5ad.16xlarge": newCPU(64, 2.5), - "m5ad.24xlarge": newCPU(96, 2.2), - "m5ad.2xlarge": newCPU(8, 2.2), - "m5ad.4xlarge": newCPU(16, 2.2), - "m5ad.8xlarge": newCPU(32, 2.5), - "m5ad.large": newCPU(2, 2.2), - "m5ad.xlarge": newCPU(4, 2.2), - "m5d.12xlarge": newCPU(48, 3.1), - "m5d.16xlarge": newCPU(64, 3.1), - "m5d.24xlarge": newCPU(96, 3.1), - "m5d.2xlarge": newCPU(8, 3.1), - "m5d.4xlarge": newCPU(16, 3.1), - "m5d.8xlarge": newCPU(32, 3.1), - "m5d.large": newCPU(2, 3.1), - "m5d.metal": newCPU(96, 3.1), - "m5d.xlarge": newCPU(4, 3.1), - "m5dn.12xlarge": newCPU(48, 3.1), - "m5dn.16xlarge": newCPU(64, 3.1), - "m5dn.24xlarge": newCPU(96, 3.1), - "m5dn.2xlarge": newCPU(8, 3.1), - "m5dn.4xlarge": newCPU(16, 3.1), - "m5dn.8xlarge": newCPU(32, 3.1), - "m5dn.large": newCPU(2, 3.1), - "m5dn.metal": newCPU(96, 3.1), - "m5dn.xlarge": newCPU(4, 3.1), - "m5n.12xlarge": newCPU(48, 3.1), - "m5n.16xlarge": newCPU(64, 3.1), - "m5n.24xlarge": newCPU(96, 3.1), - "m5n.2xlarge": newCPU(8, 3.1), - "m5n.4xlarge": newCPU(16, 3.1), - "m5n.8xlarge": newCPU(32, 3.1), - "m5n.large": newCPU(2, 3.1), - "m5n.metal": newCPU(96, 3.1), - "m5n.xlarge": newCPU(4, 3.1), - "m5zn.12xlarge": newCPU(48, 4.5), - "m5zn.2xlarge": newCPU(8, 4.5), - "m5zn.3xlarge": newCPU(12, 4.5), - "m5zn.6xlarge": newCPU(24, 4.5), - "m5zn.large": newCPU(2, 4.5), - "m5zn.metal": newCPU(48, 4.5), - "m5zn.xlarge": newCPU(4, 4.5), - "m6a.12xlarge": newCPU(48, 3.6), - "m6a.16xlarge": newCPU(64, 3.6), - "m6a.24xlarge": newCPU(96, 3.6), - "m6a.2xlarge": newCPU(8, 3.6), - "m6a.32xlarge": newCPU(128, 3.6), - "m6a.48xlarge": newCPU(192, 3.6), - "m6a.4xlarge": newCPU(16, 3.6), - "m6a.8xlarge": newCPU(32, 3.6), - "m6a.large": newCPU(2, 3.6), - "m6a.metal": newCPU(192, 3.6), - "m6a.xlarge": newCPU(4, 3.6), - "m6g.12xlarge": newCPU(48, 2.5), - "m6g.16xlarge": newCPU(64, 2.5), - "m6g.2xlarge": newCPU(8, 2.5), - "m6g.4xlarge": newCPU(16, 2.5), - "m6g.8xlarge": newCPU(32, 2.5), - "m6g.large": newCPU(2, 2.5), - "m6g.medium": newCPU(1, 2.5), - "m6g.metal": newCPU(64, 2.5), - "m6g.xlarge": newCPU(4, 2.5), - "m6gd.12xlarge": newCPU(48, 2.5), - "m6gd.16xlarge": newCPU(64, 2.5), - "m6gd.2xlarge": newCPU(8, 2.5), - "m6gd.4xlarge": newCPU(16, 2.5), - "m6gd.8xlarge": newCPU(32, 2.5), - "m6gd.large": newCPU(2, 2.5), - "m6gd.medium": newCPU(1, 2.5), - "m6gd.metal": newCPU(64, 2.5), - "m6gd.xlarge": newCPU(4, 2.5), - "m6i.12xlarge": newCPU(48, 3.5), - "m6i.16xlarge": newCPU(64, 3.5), - "m6i.24xlarge": newCPU(96, 3.5), - "m6i.2xlarge": newCPU(8, 3.5), - "m6i.32xlarge": newCPU(128, 3.5), - "m6i.4xlarge": newCPU(16, 3.5), - "m6i.8xlarge": newCPU(32, 3.5), - "m6i.large": newCPU(2, 3.5), - "m6i.metal": newCPU(128, 3.5), - "m6i.xlarge": newCPU(4, 3.5), - "m6id.12xlarge": newCPU(48, 3.5), - "m6id.16xlarge": newCPU(64, 3.5), - "m6id.24xlarge": newCPU(96, 3.5), - "m6id.2xlarge": newCPU(8, 3.5), - "m6id.32xlarge": newCPU(128, 3.5), - "m6id.4xlarge": newCPU(16, 3.5), - "m6id.8xlarge": newCPU(32, 3.5), - "m6id.large": newCPU(2, 3.5), - "m6id.metal": newCPU(128, 3.5), - "m6id.xlarge": newCPU(4, 3.5), - "m6idn.12xlarge": newCPU(48, 3.5), - "m6idn.16xlarge": newCPU(64, 3.5), - "m6idn.24xlarge": newCPU(96, 3.5), - "m6idn.2xlarge": newCPU(8, 3.5), - "m6idn.32xlarge": newCPU(128, 3.5), - "m6idn.4xlarge": newCPU(16, 3.5), - "m6idn.8xlarge": newCPU(32, 3.5), - "m6idn.large": newCPU(2, 3.5), - "m6idn.metal": newCPU(128, 3.5), - "m6idn.xlarge": newCPU(4, 3.5), - "m6in.12xlarge": newCPU(48, 3.5), - "m6in.16xlarge": newCPU(64, 3.5), - "m6in.24xlarge": newCPU(96, 3.5), - "m6in.2xlarge": newCPU(8, 3.5), - "m6in.32xlarge": newCPU(128, 3.5), - "m6in.4xlarge": newCPU(16, 3.5), - "m6in.8xlarge": newCPU(32, 3.5), - "m6in.large": newCPU(2, 3.5), - "m6in.metal": newCPU(128, 3.5), - "m6in.xlarge": newCPU(4, 3.5), - "m7g.12xlarge": newCPU(48, 2.6), - "m7g.16xlarge": newCPU(64, 2.6), - "m7g.2xlarge": newCPU(8, 2.6), - "m7g.4xlarge": newCPU(16, 2.6), - "m7g.8xlarge": newCPU(32, 2.6), - "m7g.large": newCPU(2, 2.6), - "m7g.medium": newCPU(1, 2.6), - "m7g.metal": newCPU(64, 2.6), - "m7g.xlarge": newCPU(4, 2.6), - "mac1.metal": newCPU(12, 3.2), - "mac2.metal": newCPU(8, 3.2), - "p2.16xlarge": newCPU(64, 2.3), - "p2.8xlarge": newCPU(32, 2.7), - "p2.xlarge": newCPU(4, 2.7), - "p3.16xlarge": newCPU(64, 2.7), - "p3.2xlarge": newCPU(8, 2.7), - "p3.8xlarge": newCPU(32, 2.7), - "p3dn.24xlarge": newCPU(96, 2.5), - "p4d.24xlarge": newCPU(96, 3), - "r3.2xlarge": newCPU(8, 2.5), - "r3.4xlarge": newCPU(16, 2.5), - "r3.8xlarge": newCPU(32, 2.5), - "r3.large": newCPU(2, 2.5), - "r3.xlarge": newCPU(4, 2.5), - "r4.16xlarge": newCPU(64, 2.3), - "r4.2xlarge": newCPU(8, 2.3), - "r4.4xlarge": newCPU(16, 2.3), - "r4.8xlarge": newCPU(32, 2.3), - "r4.large": newCPU(2, 2.3), - "r4.xlarge": newCPU(4, 2.3), - "r5.12xlarge": newCPU(48, 3.1), - "r5.16xlarge": newCPU(64, 3.1), - "r5.24xlarge": newCPU(96, 3.1), - "r5.2xlarge": newCPU(8, 3.1), - "r5.4xlarge": newCPU(16, 3.1), - "r5.8xlarge": newCPU(32, 3.1), - "r5.large": newCPU(2, 3.1), - "r5.metal": newCPU(96, 3.1), - "r5.xlarge": newCPU(4, 3.1), - "r5a.12xlarge": newCPU(48, 2.5), - "r5a.16xlarge": newCPU(64, 2.5), - "r5a.24xlarge": newCPU(96, 2.5), - "r5a.2xlarge": newCPU(8, 2.5), - "r5a.4xlarge": newCPU(16, 2.5), - "r5a.8xlarge": newCPU(32, 2.5), - "r5a.large": newCPU(2, 2.5), - "r5a.xlarge": newCPU(4, 2.5), - "r5ad.12xlarge": newCPU(48, 2.2), - "r5ad.16xlarge": newCPU(64, 2.5), - "r5ad.24xlarge": newCPU(96, 2.2), - "r5ad.2xlarge": newCPU(8, 2.2), - "r5ad.4xlarge": newCPU(16, 2.2), - "r5ad.8xlarge": newCPU(32, 2.5), - "r5ad.large": newCPU(2, 2.2), - "r5ad.xlarge": newCPU(4, 2.2), - "r5b.12xlarge": newCPU(48, 3.1), - "r5b.16xlarge": newCPU(64, 3.1), - "r5b.24xlarge": newCPU(96, 3.1), - "r5b.2xlarge": newCPU(8, 3.1), - "r5b.4xlarge": newCPU(16, 3.1), - "r5b.8xlarge": newCPU(32, 3.1), - "r5b.large": newCPU(2, 3.1), - "r5b.metal": newCPU(96, 3.1), - "r5b.xlarge": newCPU(4, 3.1), - "r5d.12xlarge": newCPU(48, 3.1), - "r5d.16xlarge": newCPU(64, 3.1), - "r5d.24xlarge": newCPU(96, 3.1), - "r5d.2xlarge": newCPU(8, 3.1), - "r5d.4xlarge": newCPU(16, 3.1), - "r5d.8xlarge": newCPU(32, 3.1), - "r5d.large": newCPU(2, 3.1), - "r5d.metal": newCPU(96, 3.1), - "r5d.xlarge": newCPU(4, 3.1), - "r5dn.12xlarge": newCPU(48, 3.1), - "r5dn.16xlarge": newCPU(64, 3.1), - "r5dn.24xlarge": newCPU(96, 3.1), - "r5dn.2xlarge": newCPU(8, 3.1), - "r5dn.4xlarge": newCPU(16, 3.1), - "r5dn.8xlarge": newCPU(32, 3.1), - "r5dn.large": newCPU(2, 3.1), - "r5dn.metal": newCPU(96, 3.1), - "r5dn.xlarge": newCPU(4, 3.1), - "r5n.12xlarge": newCPU(48, 3.1), - "r5n.16xlarge": newCPU(64, 3.1), - "r5n.24xlarge": newCPU(96, 3.1), - "r5n.2xlarge": newCPU(8, 3.1), - "r5n.4xlarge": newCPU(16, 3.1), - "r5n.8xlarge": newCPU(32, 3.1), - "r5n.large": newCPU(2, 3.1), - "r5n.metal": newCPU(96, 3.1), - "r5n.xlarge": newCPU(4, 3.1), - "r6a.12xlarge": newCPU(48, 3.6), - "r6a.16xlarge": newCPU(64, 3.6), - "r6a.24xlarge": newCPU(96, 3.6), - "r6a.2xlarge": newCPU(8, 3.6), - "r6a.32xlarge": newCPU(128, 3.6), - "r6a.48xlarge": newCPU(192, 3.6), - "r6a.4xlarge": newCPU(16, 3.6), - "r6a.8xlarge": newCPU(32, 3.6), - "r6a.large": newCPU(2, 3.6), - "r6a.metal": newCPU(192, 3.6), - "r6a.xlarge": newCPU(4, 3.6), - "r6g.12xlarge": newCPU(48, 2.5), - "r6g.16xlarge": newCPU(64, 2.5), - "r6g.2xlarge": newCPU(8, 2.5), - "r6g.4xlarge": newCPU(16, 2.5), - "r6g.8xlarge": newCPU(32, 2.5), - "r6g.large": newCPU(2, 2.5), - "r6g.medium": newCPU(1, 2.5), - "r6g.metal": newCPU(64, 2.5), - "r6g.xlarge": newCPU(4, 2.5), - "r6gd.12xlarge": newCPU(48, 2.5), - "r6gd.16xlarge": newCPU(64, 2.5), - "r6gd.2xlarge": newCPU(8, 2.5), - "r6gd.4xlarge": newCPU(16, 2.5), - "r6gd.8xlarge": newCPU(32, 2.5), - "r6gd.large": newCPU(2, 2.5), - "r6gd.medium": newCPU(1, 2.5), - "r6gd.metal": newCPU(64, 2.5), - "r6gd.xlarge": newCPU(4, 2.5), - "r6i.12xlarge": newCPU(48, 3.5), - "r6i.16xlarge": newCPU(64, 3.5), - "r6i.24xlarge": newCPU(96, 3.5), - "r6i.2xlarge": newCPU(8, 3.5), - "r6i.32xlarge": newCPU(128, 3.5), - "r6i.4xlarge": newCPU(16, 3.5), - "r6i.8xlarge": newCPU(32, 3.5), - "r6i.large": newCPU(2, 3.5), - "r6i.metal": newCPU(128, 3.5), - "r6i.xlarge": newCPU(4, 3.5), - "r6id.12xlarge": newCPU(48, 3.5), - "r6id.16xlarge": newCPU(64, 3.5), - "r6id.24xlarge": newCPU(96, 3.5), - "r6id.2xlarge": newCPU(8, 3.5), - "r6id.32xlarge": newCPU(128, 3.5), - "r6id.4xlarge": newCPU(16, 3.5), - "r6id.8xlarge": newCPU(32, 3.5), - "r6id.large": newCPU(2, 3.5), - "r6id.metal": newCPU(128, 3.5), - "r6id.xlarge": newCPU(4, 3.5), - "r6idn.12xlarge": newCPU(48, 3.5), - "r6idn.16xlarge": newCPU(64, 3.5), - "r6idn.24xlarge": newCPU(96, 3.5), - "r6idn.2xlarge": newCPU(8, 3.5), - "r6idn.32xlarge": newCPU(128, 3.5), - "r6idn.4xlarge": newCPU(16, 3.5), - "r6idn.8xlarge": newCPU(32, 3.5), - "r6idn.large": newCPU(2, 3.5), - "r6idn.metal": newCPU(128, 3.5), - "r6idn.xlarge": newCPU(4, 3.5), - "r6in.12xlarge": newCPU(48, 3.5), - "r6in.16xlarge": newCPU(64, 3.5), - "r6in.24xlarge": newCPU(96, 3.5), - "r6in.2xlarge": newCPU(8, 3.5), - "r6in.32xlarge": newCPU(128, 3.5), - "r6in.4xlarge": newCPU(16, 3.5), - "r6in.8xlarge": newCPU(32, 3.5), - "r6in.large": newCPU(2, 3.5), - "r6in.metal": newCPU(128, 3.5), - "r6in.xlarge": newCPU(4, 3.5), - "r7g.12xlarge": newCPU(48, 2.6), - "r7g.16xlarge": newCPU(64, 2.6), - "r7g.2xlarge": newCPU(8, 2.6), - "r7g.4xlarge": newCPU(16, 2.6), - "r7g.8xlarge": newCPU(32, 2.6), - "r7g.large": newCPU(2, 2.6), - "r7g.medium": newCPU(1, 2.6), - "r7g.metal": newCPU(64, 2.6), - "r7g.xlarge": newCPU(4, 2.6), - "t2.2xlarge": newCPU(8, 2.3), - "t2.large": newCPU(2, 2.3), - "t2.medium": newCPU(2, 2.3), - "t2.micro": newCPU(1, 2.5), - "t2.nano": newCPU(1, 2.4), - "t2.small": newCPU(1, 2.5), - "t2.xlarge": newCPU(4, 2.3), - "t3.2xlarge": newCPU(8, 2.5), - "t3.large": newCPU(2, 2.5), - "t3.medium": newCPU(2, 2.5), - "t3.micro": newCPU(2, 2.5), - "t3.nano": newCPU(2, 2.5), - "t3.small": newCPU(2, 2.5), - "t3.xlarge": newCPU(4, 2.5), - "t3a.2xlarge": newCPU(8, 2.2), - "t3a.large": newCPU(2, 2.2), - "t3a.medium": newCPU(2, 2.2), - "t3a.micro": newCPU(2, 2.2), - "t3a.nano": newCPU(2, 2.2), - "t3a.small": newCPU(2, 2.2), - "t3a.xlarge": newCPU(4, 2.2), - "t4g.2xlarge": newCPU(8, 2.5), - "t4g.large": newCPU(2, 2.5), - "t4g.medium": newCPU(2, 2.5), - "t4g.micro": newCPU(2, 2.5), - "t4g.nano": newCPU(2, 2.5), - "t4g.small": newCPU(2, 2.5), - "t4g.xlarge": newCPU(4, 2.5), - "trn1.2xlarge": newCPU(8, 3.5), - "trn1.32xlarge": newCPU(128, 3.5), - "trn1n.32xlarge": newCPU(128, 3.5), - "u-12tb1.112xlarge": newCPU(448, 2.1), - "u-18tb1.112xlarge": newCPU(448, 2.7), - "u-24tb1.112xlarge": newCPU(448, 2.7), - "u-3tb1.56xlarge": newCPU(224, 2.1), - "u-6tb1.112xlarge": newCPU(448, 2.1), - "u-6tb1.56xlarge": newCPU(224, 2.1), - "u-9tb1.112xlarge": newCPU(448, 2.1), - "vt1.24xlarge": newCPU(96, 2.5), - "vt1.3xlarge": newCPU(12, 2.5), - "vt1.6xlarge": newCPU(24, 2.5), - "x1.16xlarge": newCPU(64, 2.3), - "x1.32xlarge": newCPU(128, 2.3), - "x1e.16xlarge": newCPU(64, 2.3), - "x1e.2xlarge": newCPU(8, 2.3), - "x1e.32xlarge": newCPU(128, 2.3), - "x1e.4xlarge": newCPU(16, 2.3), - "x1e.8xlarge": newCPU(32, 2.3), - "x1e.xlarge": newCPU(4, 2.3), - "x2gd.12xlarge": newCPU(48, 2.5), - "x2gd.16xlarge": newCPU(64, 2.5), - "x2gd.2xlarge": newCPU(8, 2.5), - "x2gd.4xlarge": newCPU(16, 2.5), - "x2gd.8xlarge": newCPU(32, 2.5), - "x2gd.large": newCPU(2, 2.5), - "x2gd.medium": newCPU(1, 2.5), - "x2gd.metal": newCPU(64, 2.5), - "x2gd.xlarge": newCPU(4, 2.5), - "x2idn.16xlarge": newCPU(64, 3.5), - "x2idn.24xlarge": newCPU(96, 3.5), - "x2idn.32xlarge": newCPU(128, 3.5), - "x2idn.metal": newCPU(128, 3.5), - "x2iedn.16xlarge": newCPU(64, 3.5), - "x2iedn.24xlarge": newCPU(96, 3.5), - "x2iedn.2xlarge": newCPU(8, 3.5), - "x2iedn.32xlarge": newCPU(128, 3.5), - "x2iedn.4xlarge": newCPU(16, 3.5), - "x2iedn.8xlarge": newCPU(32, 3.5), - "x2iedn.metal": newCPU(128, 3.5), - "x2iedn.xlarge": newCPU(4, 3.5), - "x2iezn.12xlarge": newCPU(48, 4.5), - "x2iezn.2xlarge": newCPU(8, 4.5), - "x2iezn.4xlarge": newCPU(16, 4.5), - "x2iezn.6xlarge": newCPU(24, 4.5), - "x2iezn.8xlarge": newCPU(32, 4.5), - "x2iezn.metal": newCPU(48, 4.5), - "z1d.12xlarge": newCPU(48, 4), - "z1d.2xlarge": newCPU(8, 4), - "z1d.3xlarge": newCPU(12, 4), - "z1d.6xlarge": newCPU(24, 4), - "z1d.large": newCPU(2, 4), - "z1d.metal": newCPU(48, 4), - "z1d.xlarge": newCPU(4, 4), -} diff --git a/client/fingerprint/env_aws_test.go b/client/fingerprint/env_aws_test.go index 35f756bb2..ad59c073e 100644 --- a/client/fingerprint/env_aws_test.go +++ b/client/fingerprint/env_aws_test.go @@ -235,36 +235,6 @@ func TestCPUFingerprint_AWS_InstanceFound(t *testing.T) { err := f.Fingerprint(request, &response) require.NoError(t, err) require.True(t, response.Detected) - require.Equal(t, "2200", response.Attributes["cpu.frequency"]) - require.Equal(t, "8", response.Attributes["cpu.numcores"]) - require.Equal(t, "17600", response.Attributes["cpu.totalcompute"]) - require.Equal(t, 17600, response.Resources.CPU) - require.Equal(t, int64(17600), response.NodeResources.Cpu.CpuShares) -} - -func TestCPUFingerprint_AWS_OverrideCompute(t *testing.T) { - ci.Parallel(t) - - endpoint, cleanup := startFakeEC2Metadata(t, awsStubs) - defer cleanup() - - f := NewEnvAWSFingerprint(testlog.HCLogger(t)) - f.(*EnvAWSFingerprint).endpoint = endpoint - - node := &structs.Node{Attributes: make(map[string]string)} - - request := &FingerprintRequest{Config: &config.Config{ - CpuCompute: 99999, - }, Node: node} - var response FingerprintResponse - err := f.Fingerprint(request, &response) - require.NoError(t, err) - require.True(t, response.Detected) - require.Equal(t, "2200", response.Attributes["cpu.frequency"]) - require.Equal(t, "8", response.Attributes["cpu.numcores"]) - require.Equal(t, "99999", response.Attributes["cpu.totalcompute"]) - require.Nil(t, response.Resources) // defaults in cpu fingerprinter - require.Zero(t, response.NodeResources.Cpu) // defaults in cpu fingerprinter } func TestCPUFingerprint_AWS_InstanceNotFound(t *testing.T) { @@ -283,12 +253,6 @@ func TestCPUFingerprint_AWS_InstanceNotFound(t *testing.T) { err := f.Fingerprint(request, &response) require.NoError(t, err) require.True(t, response.Detected) - require.NotContains(t, response.Attributes, "cpu.modelname") - require.NotContains(t, response.Attributes, "cpu.frequency") - require.NotContains(t, response.Attributes, "cpu.numcores") - require.NotContains(t, response.Attributes, "cpu.totalcompute") - require.Nil(t, response.Resources) - require.Nil(t, response.NodeResources) } /// Utility functions for tests diff --git a/client/fingerprint/env_azure.go b/client/fingerprint/env_azure.go index 28d579d65..24ddf1dea 100644 --- a/client/fingerprint/env_azure.go +++ b/client/fingerprint/env_azure.go @@ -15,7 +15,6 @@ import ( cleanhttp "github.com/hashicorp/go-cleanhttp" log "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/helper/useragent" "github.com/hashicorp/nomad/nomad/structs" ) @@ -106,7 +105,7 @@ func (f *EnvAzureFingerprint) Get(attribute string, format string) (string, erro return "", err } - if res.StatusCode >= 400 { + if res.StatusCode >= http.StatusBadRequest { return "", ReqError{res.StatusCode} } diff --git a/client/fingerprint/env_gce.go b/client/fingerprint/env_gce.go index 10936c179..d1b59b607 100644 --- a/client/fingerprint/env_gce.go +++ b/client/fingerprint/env_gce.go @@ -121,7 +121,7 @@ func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error return "", err } - if res.StatusCode >= 400 { + if res.StatusCode >= http.StatusBadRequest { return "", ReqError{res.StatusCode} } diff --git a/client/fingerprint/fingerprint_linux.go b/client/fingerprint/fingerprint_linux.go index 874575a73..a7a349cf1 100644 --- a/client/fingerprint/fingerprint_linux.go +++ b/client/fingerprint/fingerprint_linux.go @@ -4,6 +4,6 @@ package fingerprint func initPlatformFingerprints(fps map[string]Factory) { - fps["cgroup"] = NewCGroupFingerprint + fps["cgroup"] = NewCgroupFingerprint fps["bridge"] = NewBridgeFingerprint } diff --git a/client/fingerprint/structs.go b/client/fingerprint/structs.go index 27ecba06c..35d857504 100644 --- a/client/fingerprint/structs.go +++ b/client/fingerprint/structs.go @@ -5,6 +5,7 @@ package fingerprint import ( "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/nomad/structs" ) @@ -26,6 +27,16 @@ type FingerprintResponse struct { // Detected is a boolean indicating whether the fingerprinter detected // if the resource was available Detected bool + + // UpdateInitialResult can be used by a fingerprinter to pass information it + // discovers back up to the client. + UpdateInitialResult func(*InitialResult) +} + +// InitialResult can be updated by individual fingerprinters to bubble information +// it detected back up to the client after the first pass. +type InitialResult struct { + Topology *numalib.Topology } // AddAttribute adds the name and value for a node attribute to the fingerprint diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 82a01442b..821641b7b 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -29,6 +29,10 @@ type FingerprintManager struct { reloadableFps map[string]fingerprint.ReloadableFingerprint + // initialResult is used to pass information detected during the first pass + // of fingerprinting back to the client + initialResult *fingerprint.InitialResult + logger log.Logger } @@ -50,6 +54,7 @@ func NewFingerprintManager( shutdownCh: shutdownCh, logger: logger.Named("fingerprint_mgr"), reloadableFps: make(map[string]fingerprint.ReloadableFingerprint), + initialResult: new(fingerprint.InitialResult), } } @@ -71,7 +76,7 @@ func (fm *FingerprintManager) getNode() *structs.Node { // identifying allowlisted and denylisted fingerprints/drivers. Then, for // those which require periotic checking, it starts a periodic process for // each. -func (fm *FingerprintManager) Run() error { +func (fm *FingerprintManager) Run() (*fingerprint.InitialResult, error) { // First, set up all fingerprints cfg := fm.getConfig() // COMPAT(1.0) using inclusive language, whitelist is kept for backward compatibility. @@ -100,7 +105,7 @@ func (fm *FingerprintManager) Run() error { } if err := fm.setupFingerprinters(availableFingerprints); err != nil { - return err + return nil, err } if len(skippedFingerprints) != 0 { @@ -108,7 +113,7 @@ func (fm *FingerprintManager) Run() error { "skipped_fingerprinters", skippedFingerprints) } - return nil + return fm.initialResult, nil } // Reload will reload any registered ReloadableFingerprinters and immediately call Fingerprint @@ -203,5 +208,9 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint fm.setNode(node) } + if response.UpdateInitialResult != nil { + response.UpdateInitialResult(fm.initialResult) + } + return response.Detected, nil } diff --git a/client/fingerprint_manager_test.go b/client/fingerprint_manager_test.go index c46df756a..ae85c8836 100644 --- a/client/fingerprint_manager_test.go +++ b/client/fingerprint_manager_test.go @@ -8,12 +8,11 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/config" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestFingerprintManager_Run_ResourcesFingerprint(t *testing.T) { ci.Parallel(t) - require := require.New(t) testClient, cleanup := TestClient(t, nil) defer cleanup() @@ -27,19 +26,18 @@ func TestFingerprintManager_Run_ResourcesFingerprint(t *testing.T) { testClient.logger, ) - err := fm.Run() - require.Nil(err) + _, err := fm.Run() + must.NoError(t, err) node := testClient.config.Node - require.NotEqual(0, node.Resources.CPU) - require.NotEqual(0, node.Resources.MemoryMB) - require.NotZero(node.Resources.DiskMB) + must.Positive(t, node.Resources.CPU) + must.Positive(t, node.Resources.MemoryMB) + must.Positive(t, node.Resources.DiskMB) } func TestFimgerprintManager_Run_InWhitelist(t *testing.T) { ci.Parallel(t) - require := require.New(t) testClient, cleanup := TestClient(t, func(c *config.Config) { c.Options = map[string]string{ @@ -58,17 +56,15 @@ func TestFimgerprintManager_Run_InWhitelist(t *testing.T) { testClient.logger, ) - err := fm.Run() - require.Nil(err) + _, err := fm.Run() + must.NoError(t, err) node := testClient.config.Node - - require.NotEqual(node.Attributes["cpu.frequency"], "") + must.NotEq(t, "", node.Attributes["cpu.numcores"]) } func TestFingerprintManager_Run_InDenylist(t *testing.T) { ci.Parallel(t) - require := require.New(t) testClient, cleanup := TestClient(t, func(c *config.Config) { c.Options = map[string]string{ @@ -87,18 +83,17 @@ func TestFingerprintManager_Run_InDenylist(t *testing.T) { testClient.logger, ) - err := fm.Run() - require.Nil(err) + _, err := fm.Run() + must.NoError(t, err) node := testClient.config.Node - require.NotContains(node.Attributes, "cpu.frequency") - require.NotEqual(node.Attributes["memory.totalbytes"], "") + must.MapNotContainsKey(t, node.Attributes, "cpu.frequency") + must.NotEq(t, node.Attributes["memory.totalbytes"], "") } func TestFingerprintManager_Run_Combination(t *testing.T) { ci.Parallel(t) - require := require.New(t) testClient, cleanup := TestClient(t, func(c *config.Config) { c.Options = map[string]string{ @@ -117,20 +112,19 @@ func TestFingerprintManager_Run_Combination(t *testing.T) { testClient.logger, ) - err := fm.Run() - require.Nil(err) + _, err := fm.Run() + must.NoError(t, err) node := testClient.config.Node - require.NotEqual(node.Attributes["cpu.frequency"], "") - require.NotEqual(node.Attributes["cpu.arch"], "") - require.NotContains(node.Attributes, "memory.totalbytes") - require.NotContains(node.Attributes, "os.name") + must.NotEq(t, "", node.Attributes["cpu.numcores"]) + must.NotEq(t, "", node.Attributes["cpu.arch"]) + must.MapNotContainsKey(t, node.Attributes, "memory.totalbytes") + must.MapNotContainsKey(t, node.Attributes, "os.name") } func TestFingerprintManager_Run_CombinationLegacyNames(t *testing.T) { ci.Parallel(t) - require := require.New(t) testClient, cleanup := TestClient(t, func(c *config.Config) { c.Options = map[string]string{ @@ -149,13 +143,12 @@ func TestFingerprintManager_Run_CombinationLegacyNames(t *testing.T) { testClient.logger, ) - err := fm.Run() - require.Nil(err) + _, err := fm.Run() + must.NoError(t, err) node := testClient.config.Node - - require.NotEqual(node.Attributes["cpu.frequency"], "") - require.NotEqual(node.Attributes["cpu.arch"], "") - require.NotContains(node.Attributes, "memory.totalbytes") - require.NotContains(node.Attributes, "os.name") + must.NotEq(t, "", node.Attributes["cpu.numcores"]) + must.NotEq(t, "", node.Attributes["cpu.arch"]) + must.MapNotContainsKey(t, node.Attributes, "memory.totalbytes") + must.MapNotContainsKey(t, node.Attributes, "os.name") } diff --git a/client/gc.go b/client/gc.go index 4bf319b44..a7ade1155 100644 --- a/client/gc.go +++ b/client/gc.go @@ -9,10 +9,9 @@ import ( "sync" "time" - hclog "github.com/hashicorp/go-hclog" - + "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocrunner/interfaces" - "github.com/hashicorp/nomad/client/stats" + "github.com/hashicorp/nomad/client/hoststats" "github.com/hashicorp/nomad/nomad/structs" ) @@ -47,7 +46,7 @@ type AllocGarbageCollector struct { allocRunners *IndexedGCAllocPQ // statsCollector for node based thresholds (eg disk) - statsCollector stats.NodeStatsCollector + statsCollector hoststats.NodeStatsCollector // allocCounter return the number of un-GC'd allocs on this node allocCounter AllocCounter @@ -68,7 +67,7 @@ type AllocGarbageCollector struct { // NewAllocGarbageCollector returns a garbage collector for terminated // allocations on a node. Must call Run() in a goroutine enable periodic // garbage collection. -func NewAllocGarbageCollector(logger hclog.Logger, statsCollector stats.NodeStatsCollector, ac AllocCounter, config *GCConfig) *AllocGarbageCollector { +func NewAllocGarbageCollector(logger hclog.Logger, statsCollector hoststats.NodeStatsCollector, ac AllocCounter, config *GCConfig) *AllocGarbageCollector { logger = logger.Named("gc") // Require at least 1 to make progress if config.ParallelDestroys <= 0 { @@ -296,7 +295,7 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e // Collect host stats and see if we still need to remove older // allocations - var allocDirStats *stats.DiskStats + var allocDirStats *hoststats.DiskStats if err := a.statsCollector.Collect(); err == nil { if hostStats := a.statsCollector.Stats(); hostStats != nil { allocDirStats = hostStats.AllocDirStats diff --git a/client/gc_test.go b/client/gc_test.go index c5c820c44..a92721b3e 100644 --- a/client/gc_test.go +++ b/client/gc_test.go @@ -12,7 +12,7 @@ import ( "github.com/hashicorp/nomad/client/allocrunner" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/stats" + "github.com/hashicorp/nomad/client/hoststats" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/mock" @@ -106,7 +106,7 @@ func (m *MockStatsCollector) Collect() error { return nil } -func (m *MockStatsCollector) Stats() *stats.HostStats { +func (m *MockStatsCollector) Stats() *hoststats.HostStats { if len(m.availableValues) == 0 { return nil } @@ -118,8 +118,8 @@ func (m *MockStatsCollector) Stats() *stats.HostStats { if m.index < len(m.availableValues)-1 { m.index = m.index + 1 } - return &stats.HostStats{ - AllocDirStats: &stats.DiskStats{ + return &hoststats.HostStats{ + AllocDirStats: &hoststats.DiskStats{ Available: available, UsedPercent: usedPercent, InodesUsedPercent: inodePercent, diff --git a/client/stats/host.go b/client/hoststats/host.go similarity index 85% rename from client/stats/host.go rename to client/hoststats/host.go index bd6edb32d..9ff686961 100644 --- a/client/stats/host.go +++ b/client/hoststats/host.go @@ -1,15 +1,15 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 -package stats +package hoststats import ( "math" - "runtime" "sync" "time" - hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/plugins/device" "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/disk" @@ -73,7 +73,7 @@ type NodeStatsCollector interface { // HostStatsCollector collects host resource usage stats type HostStatsCollector struct { - numCores int + top *numalib.Topology statsCalculator map[string]*HostCpuStatsCalculator hostStats *HostStats hostStatsLock sync.RWMutex @@ -90,19 +90,15 @@ type HostStatsCollector struct { // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in // so that we can present the disk related statistics for the mountpoint where // the allocation directory lives -func NewHostStatsCollector(logger hclog.Logger, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector { - logger = logger.Named("host_stats") - numCores := runtime.NumCPU() - statsCalculator := make(map[string]*HostCpuStatsCalculator) - collector := &HostStatsCollector{ - statsCalculator: statsCalculator, - numCores: numCores, - logger: logger, +func NewHostStatsCollector(logger hclog.Logger, top *numalib.Topology, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector { + return &HostStatsCollector{ + logger: logger.Named("host_stats"), + top: top, + statsCalculator: make(map[string]*HostCpuStatsCalculator), allocDir: allocDir, badParts: make(map[string]struct{}), deviceStatsCollector: deviceStatsCollector, } - return collector } // Collect collects stats related to resource usage of a host @@ -308,3 +304,33 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u h.prevBusy = currentBusy return } + +func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) { + ticksConsumed := 0.0 + cpuStats, err := cpu.Times(true) + if err != nil { + return nil, 0.0, err + } + cs := make([]*CPUStats, len(cpuStats)) + for idx, cpuStat := range cpuStats { + percentCalculator, ok := h.statsCalculator[cpuStat.CPU] + if !ok { + percentCalculator = NewHostCpuStatsCalculator() + h.statsCalculator[cpuStat.CPU] = percentCalculator + } + idle, user, system, total := percentCalculator.Calculate(cpuStat) + totalCompute := h.top.TotalCompute() + ticks := (total / 100.0) * (float64(totalCompute) / float64(len(cpuStats))) + cs[idx] = &CPUStats{ + CPU: cpuStat.CPU, + User: user, + System: system, + Idle: idle, + TotalPercent: total, + TotalTicks: ticks, + } + ticksConsumed += ticks + } + + return cs, ticksConsumed, nil +} diff --git a/client/stats/host_test.go b/client/hoststats/host_test.go similarity index 97% rename from client/stats/host_test.go rename to client/hoststats/host_test.go index 3d939cd10..aae34e755 100644 --- a/client/stats/host_test.go +++ b/client/hoststats/host_test.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 -package stats +package hoststats import ( "testing" diff --git a/client/interfaces/client.go b/client/interfaces/client.go index 99af32ce9..822860a55 100644 --- a/client/interfaces/client.go +++ b/client/interfaces/client.go @@ -4,6 +4,7 @@ package interfaces import ( + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/device" ) @@ -38,5 +39,11 @@ type EnvReplacer interface { // ArtifactGetter is an interface satisfied by the getter package. type ArtifactGetter interface { // Get artifact and put it in the task directory. - Get(taskEnv EnvReplacer, artifact *structs.TaskArtifact) error + Get(EnvReplacer, *structs.TaskArtifact) error +} + +// ProcessWranglers is an interface satisfied by the proclib package. +type ProcessWranglers interface { + Setup(proclib.Task) error + Destroy(proclib.Task) error } diff --git a/client/lib/cgroupslib/default.go b/client/lib/cgroupslib/default.go new file mode 100644 index 000000000..ef84307bb --- /dev/null +++ b/client/lib/cgroupslib/default.go @@ -0,0 +1,16 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !linux + +package cgroupslib + +// LinuxResourcesPath does nothing on non-Linux systems +func LinuxResourcesPath(string, string) string { + return "" +} + +// MaybeDisableMemorySwappiness does nothing on non-Linux systems +func MaybeDisableMemorySwappiness() *int { + return nil +} diff --git a/client/lib/cgroupslib/editor.go b/client/lib/cgroupslib/editor.go new file mode 100644 index 000000000..98c741c60 --- /dev/null +++ b/client/lib/cgroupslib/editor.go @@ -0,0 +1,248 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/hashicorp/go-set" + "golang.org/x/sys/unix" +) + +const ( + root = "/sys/fs/cgroup" +) + +// OpenPath creates a handle for modifying cgroup interface files under +// the given directory. +// +// In cgroups v1 this will be like, "///". +// In cgroups v2 this will be like, "//". +func OpenPath(dir string) Interface { + return &editor{ + dpath: dir, + } +} + +// OpenFromCpusetCG1 creates a handle for modifying cgroup interface files of +// the given interface, given a path to the cpuset interface. +// +// This is useful because a Nomad task resources struct only keeps track of +// the cpuset cgroup directory in the cgroups v1 regime, but nowadays we want +// to modify more than the cpuset in some cases. +func OpenFromCpusetCG1(dir, iface string) Interface { + return &editor{ + dpath: strings.Replace(dir, "cpuset", iface, 1), + } +} + +// An Interface can be used to read and write the interface files of a cgroup. +type Interface interface { + // Read the content of filename. + Read(filename string) (string, error) + + // Write content to filename. + Write(filename, content string) error + + // PIDs returns the set of process IDs listed in the cgroup.procs + // interface file. We use a set here because the kernel recommends doing + // so. + // + // This list is not guaranteed to be sorted or free of duplicate TGIDs, + // and userspace should sort/uniquify the list if this property is required. + // + // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/cgroups.html + PIDs() (*set.Set[int], error) +} + +type editor struct { + dpath string +} + +func (e *editor) Read(filename string) (string, error) { + path := filepath.Join(e.dpath, filename) + b, err := os.ReadFile(path) + if err != nil { + return "", err + } + return string(bytes.TrimSpace(b)), nil +} + +func (e *editor) PIDs() (*set.Set[int], error) { + path := filepath.Join(e.dpath, "cgroup.procs") + return getPIDs(path) +} + +func (e *editor) Write(filename, content string) error { + path := filepath.Join(e.dpath, filename) + return os.WriteFile(path, []byte(content), 0644) +} + +// A Factory creates a Lifecycle which is an abstraction over the setup and +// teardown routines used for creating and destroying cgroups used for +// constraining Nomad tasks. +func Factory(allocID, task string) Lifecycle { + switch GetMode() { + case CG1: + return &lifeCG1{ + allocID: allocID, + task: task, + } + default: + return &lifeCG2{ + dpath: pathCG2(allocID, task), + } + } +} + +// A Lifecycle manages the lifecycle of the cgroup(s) of a task from the +// perspective of the Nomad client. That is, it creates and deletes the cgroups +// for a task, as well as provides last effort kill semantics for ensuring a +// process cannot stay alive beyond the intent of the client. +type Lifecycle interface { + Setup() error + Kill() error + Teardown() error +} + +// -------- cgroups v1 --------- + +type lifeCG1 struct { + allocID string + task string +} + +func (l *lifeCG1) Setup() error { + paths := l.paths() + for _, p := range paths { + err := os.MkdirAll(p, 0755) + if err != nil { + return err + } + } + return nil +} + +func (l *lifeCG1) Teardown() error { + paths := l.paths() + for _, p := range paths { + err := os.RemoveAll(p) + if err != nil { + return err + } + } + return nil +} + +func (l *lifeCG1) Kill() error { + if err := l.freeze(); err != nil { + return err + } + + pids, err := l.pids() + if err != nil { + return err + } + + signal := unix.SignalNum("SIGKILL") + pids.ForEach(func(pid int) bool { + _ = unix.Kill(pid, signal) + return true + }) + + return l.thaw() +} + +func (l *lifeCG1) edit(iface string) *editor { + scope := scopeCG1(l.allocID, l.task) + return &editor{ + dpath: filepath.Join(root, iface, NomadCgroupParent, scope), + } +} + +func (l *lifeCG1) freeze() error { + ed := l.edit("freezer") + return ed.Write("freezer.state", "FROZEN") +} + +func (l *lifeCG1) pids() (*set.Set[int], error) { + ed := l.edit("freezer") + return ed.PIDs() +} + +func (l *lifeCG1) thaw() error { + ed := l.edit("freezer") + return ed.Write("freezer.state", "THAWED") +} + +func (l *lifeCG1) paths() []string { + scope := scopeCG1(l.allocID, l.task) + ifaces := []string{"freezer", "cpu", "memory", "cpuset"} + paths := make([]string, 0, len(ifaces)) + for _, iface := range ifaces { + paths = append(paths, filepath.Join( + root, iface, NomadCgroupParent, scope, + )) + } + return paths +} + +// -------- cgroups v2 -------- + +type lifeCG2 struct { + dpath string +} + +func (l *lifeCG2) edit() *editor { + return &editor{dpath: l.dpath} +} + +func (l *lifeCG2) Setup() error { + return os.MkdirAll(l.dpath, 0755) +} + +func (l *lifeCG2) Teardown() error { + return os.RemoveAll(l.dpath) +} + +func (l *lifeCG2) Kill() error { + ed := l.edit() + return ed.Write("cgroup.kill", "1") +} + +// -------- helpers --------- + +func getPIDs(file string) (*set.Set[int], error) { + b, err := os.ReadFile(file) + if err != nil { + return nil, err + } + tokens := bytes.Fields(bytes.TrimSpace(b)) + result := set.New[int](len(tokens)) + for _, token := range tokens { + if i, err := strconv.Atoi(string(token)); err == nil { + result.Insert(i) + } + } + return result, nil +} + +func scopeCG1(allocID, task string) string { + return fmt.Sprintf("%s.%s", allocID, task) +} + +func scopeCG2(allocID, task string) string { + return fmt.Sprintf("%s.%s.scope", allocID, task) +} + +func pathCG2(allocID, task string) string { + return filepath.Join(root, NomadCgroupParent, scopeCG2(allocID, task)) +} diff --git a/client/lib/cgroupslib/init.go b/client/lib/cgroupslib/init.go new file mode 100644 index 000000000..01241ee6f --- /dev/null +++ b/client/lib/cgroupslib/init.go @@ -0,0 +1,109 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "bytes" + "os" + "path/filepath" + "strings" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-set" +) + +func Init(log hclog.Logger) { + switch GetMode() { + case CG1: + // create the /nomad cgroup (or whatever the name is configured to be) + // for each cgroup controller we are going to use + controllers := []string{"freezer", "memory", "cpu", "cpuset"} + for _, ctrl := range controllers { + p := filepath.Join(root, ctrl, NomadCgroupParent) + if err := os.MkdirAll(p, 0755); err != nil { + log.Error("failed to create nomad cgroup", "controller", ctrl, "error", err) + } + } + case CG2: + // minimum controllers must be set first + s, err := readRootCG2("cgroup.subtree_control") + if err != nil { + log.Error("failed to create nomad cgroup", "error", err) + return + } + + required := set.From([]string{"cpuset", "cpu", "io", "memory", "pids"}) + enabled := set.From(strings.Fields(s)) + needed := required.Difference(enabled) + + if needed.Size() == 0 { + log.Debug("top level nomad.slice cgroup already exists") + return // already setup + } + + sb := new(strings.Builder) + for _, controller := range needed.List() { + sb.WriteString("+" + controller + " ") + } + + activation := strings.TrimSpace(sb.String()) + if err = writeRootCG2("cgroup.subtree_control", activation); err != nil { + log.Error("failed to create nomad cgroup", "error", err) + return + } + + nomadSlice := filepath.Join("/sys/fs/cgroup", NomadCgroupParent) + if err := os.MkdirAll(nomadSlice, 0755); err != nil { + log.Error("failed to create nomad cgroup", "error", err) + return + } + + log.Debug("top level nomad.slice cgroup initialized", "controllers", needed) + } +} + +func readRootCG2(filename string) (string, error) { + p := filepath.Join(root, filename) + b, err := os.ReadFile(p) + return string(bytes.TrimSpace(b)), err +} + +func writeRootCG2(filename, content string) error { + p := filepath.Join(root, filename) + return os.WriteFile(p, []byte(content), 0644) +} + +// ReadNomadCG2 reads an interface file under the nomad.slice parent cgroup +// (or whatever its name is configured to be) +func ReadNomadCG2(filename string) (string, error) { + p := filepath.Join(root, NomadCgroupParent, filename) + b, err := os.ReadFile(p) + return string(bytes.TrimSpace(b)), err +} + +// ReadNomadCG1 reads an interface file under the /nomad cgroup of the given +// cgroup interface. +func ReadNomadCG1(iface, filename string) (string, error) { + p := filepath.Join(root, iface, NomadCgroupParent, filename) + b, err := os.ReadFile(p) + return string(bytes.TrimSpace(b)), err +} + +func WriteNomadCG1(iface, filename, content string) error { + p := filepath.Join(root, iface, NomadCgroupParent, filename) + return os.WriteFile(p, []byte(content), 0644) +} + +// LinuxResourcesPath returns the filepath to the directory that the field +// x.Resources.LinuxResources.CpusetCgroupPath is expected to hold on to +func LinuxResourcesPath(allocID, task string) string { + switch GetMode() { + case CG1: + return filepath.Join(root, "cpuset", NomadCgroupParent, scopeCG1(allocID, task)) + default: + return filepath.Join(root, NomadCgroupParent, scopeCG2(allocID, task)) + } +} diff --git a/client/lib/cgroupslib/memory.go b/client/lib/cgroupslib/memory.go new file mode 100644 index 000000000..5f8b29374 --- /dev/null +++ b/client/lib/cgroupslib/memory.go @@ -0,0 +1,40 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "sync" + + "github.com/hashicorp/nomad/helper/pointer" +) + +var ( + disableMemorySwapOnce sync.Once + disableMemorySwap *uint64 +) + +// MaybeDisableMemorySwappiness will disable memory swappiness, if that controller +// is available. Always the case for cgroups v2, but is not always the case on +// very old kernels with cgroups v1. +func MaybeDisableMemorySwappiness() *uint64 { + disableMemorySwapOnce.Do(func() { + disableMemorySwap = detectMemorySwap() + }) + return disableMemorySwap +} + +func detectMemorySwap() *uint64 { + switch GetMode() { + case CG1: + err := WriteNomadCG1("memory", "memory.swappiness", "0") + if err == nil { + return pointer.Of[uint64](0) + } + return nil + default: + return pointer.Of[uint64](0) + } +} diff --git a/client/lib/cgroupslib/memory_test.go b/client/lib/cgroupslib/memory_test.go new file mode 100644 index 000000000..2ce649a5a --- /dev/null +++ b/client/lib/cgroupslib/memory_test.go @@ -0,0 +1,18 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "testing" + + "github.com/shoenig/test/must" +) + +func Test_MaybeDisableMemorySwappiness(t *testing.T) { + disable := MaybeDisableMemorySwappiness() + var zero = uint64(0) + must.Eq(t, &zero, disable) +} diff --git a/client/lib/cgroupslib/mode.go b/client/lib/cgroupslib/mode.go new file mode 100644 index 000000000..d30baa81c --- /dev/null +++ b/client/lib/cgroupslib/mode.go @@ -0,0 +1,14 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package cgroupslib + +// Mode indicates whether the Client node is configured with cgroups v1 or v2, +// or is not configured with cgroups enabled. +type Mode byte + +const ( + OFF = iota + CG1 + CG2 +) diff --git a/client/lib/cgroupslib/mount.go b/client/lib/cgroupslib/mount.go new file mode 100644 index 000000000..990f4bd5d --- /dev/null +++ b/client/lib/cgroupslib/mount.go @@ -0,0 +1,42 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "bufio" + "io" + "os" + "strings" + + "github.com/hashicorp/go-set" +) + +func detect() Mode { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return OFF + } + defer func() { + _ = f.Close() + }() + return scan(f) +} + +func scan(in io.Reader) Mode { + scanner := bufio.NewScanner(in) + for scanner.Scan() { + tokens := set.From(strings.Fields(scanner.Text())) + if tokens.Contains("/sys/fs/cgroup") { + if tokens.Contains("tmpfs") { + return CG1 + } + if tokens.Contains("cgroup2") { + return CG2 + } + } + } + return OFF +} diff --git a/client/lib/cgroupslib/mount_test.go b/client/lib/cgroupslib/mount_test.go new file mode 100644 index 000000000..07951a1a7 --- /dev/null +++ b/client/lib/cgroupslib/mount_test.go @@ -0,0 +1,67 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "strings" + "testing" + + "github.com/shoenig/test/must" +) + +const ( + cg1 = ` +33 29 0:27 / /run/lock rw,nosuid,nodev,noexec,relatime shared:6 - tmpfs tmpfs rw,size=5120k +34 25 0:28 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:9 - tmpfs tmpfs ro,mode=755 +35 34 0:29 / /sys/fs/cgroup/unified rw,nosuid,nodev,noexec,relatime shared:10 - cgroup2 cgroup2 rw,nsdelegate +36 34 0:30 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,xattr,name=systemd +` + + cg2 = ` +34 28 0:29 / /run/lock rw,nosuid,nodev,noexec,relatime shared:6 - tmpfs tmpfs rw,size=5120k,inode64 +35 24 0:30 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:9 - cgroup2 cgroup2 rw,nsdelegate,memory_recursiveprot +36 24 0:31 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:10 - pstore pstore rw +37 24 0:32 / /sys/firmware/efi/efivars rw,nosuid,nodev,noexec,relatime shared:11 - efivarfs efivarfs rw +` +) + +func Test_scan(t *testing.T) { + cases := []struct { + name string + input string + exp Mode + }{ + { + name: "v1", + input: cg1, + exp: CG1, + }, + { + name: "v2", + input: cg2, + exp: CG2, + }, + { + name: "empty", + input: "", + exp: OFF, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + in := strings.NewReader(tc.input) + result := scan(in) + must.Eq(t, tc.exp, result) + }) + } +} + +func TestGetMode(t *testing.T) { + mode := GetMode() + ok := mode == CG1 || mode == CG2 + must.True(t, ok) +} diff --git a/client/lib/cgroupslib/switch_default.go b/client/lib/cgroupslib/switch_default.go new file mode 100644 index 000000000..ce8b1c301 --- /dev/null +++ b/client/lib/cgroupslib/switch_default.go @@ -0,0 +1,11 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !linux + +package cgroupslib + +// GetMode returns OFF on non-Linux systems. +func GetMode() Mode { + return OFF +} diff --git a/client/lib/cgroupslib/switch_linux.go b/client/lib/cgroupslib/switch_linux.go new file mode 100644 index 000000000..fb42631fa --- /dev/null +++ b/client/lib/cgroupslib/switch_linux.go @@ -0,0 +1,40 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package cgroupslib + +import ( + "sync" +) + +var ( + // NomadCgroupParent is a global variable because setting this value + // from the Nomad client initialization is much less painful than trying to + // plumb it through in every place we need to reference it. This value will + // be written to only once, during init, and after that it's only reads. + NomadCgroupParent = defaultParent() +) + +func defaultParent() string { + switch GetMode() { + case CG1: + return "/nomad" + default: + return "nomad.slice" + } +} + +var ( + mode Mode + detection sync.Once +) + +// GetMode returns the cgroups Mode of operation. +func GetMode() Mode { + detection.Do(func() { + mode = detect() + }) + return mode +} diff --git a/client/lib/cgutil/cgutil_linux.go b/client/lib/cgutil/cgutil_linux.go deleted file mode 100644 index ffb3f4c18..000000000 --- a/client/lib/cgutil/cgutil_linux.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/helper/pointer" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/opencontainers/runc/libcontainer/cgroups" - lcc "github.com/opencontainers/runc/libcontainer/configs" -) - -// UseV2 indicates whether only cgroups.v2 is enabled. If cgroups.v2 is not -// enabled or is running in hybrid mode with cgroups.v1, Nomad will make use of -// cgroups.v1 -// -// This is a read-only value. -var UseV2 = safelyDetectUnifiedMode() - -// Currently it is possible for the runc utility function to panic -// https://github.com/opencontainers/runc/pull/3745 -func safelyDetectUnifiedMode() (result bool) { - defer func() { - if r := recover(); r != nil { - result = false - } - }() - result = cgroups.IsCgroup2UnifiedMode() - return -} - -// GetCgroupParent returns the mount point under the root cgroup in which Nomad -// will create cgroups. If parent is not set, an appropriate name for the version -// of cgroups will be used. -func GetCgroupParent(parent string) string { - switch { - case parent != "": - return parent - case UseV2: - return DefaultCgroupParentV2 - default: - return DefaultCgroupV1Parent - } -} - -// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration. -func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { - parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config - switch { - case UseV2: - return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2")) - default: - return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1")) - } -} - -// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup. -func GetCPUsFromCgroup(group string) ([]uint16, error) { - group = GetCgroupParent(group) - if UseV2 { - return getCPUsFromCgroupV2(group) - } - return getCPUsFromCgroupV1(group) -} - -// CgroupScope returns the name of the scope for Nomad's managed cgroups for -// the given allocID and task. -// -// e.g. "..scope" -// -// Only useful for v2. -func CgroupScope(allocID, task string) string { - return fmt.Sprintf("%s.%s.scope", allocID, task) -} - -// ConfigureBasicCgroups will initialize a cgroup and modify config to contain -// a reference to its path. -// -// v1: creates a random "freezer" cgroup which can later be used for cleanup of processes. -// v2: does nothing. -func ConfigureBasicCgroups(config *lcc.Config) error { - if UseV2 { - return nil - } - - id := uuid.Generate() - // In v1 we must setup the freezer cgroup ourselves. - subsystem := "freezer" - path, err := GetCgroupPathHelperV1(subsystem, filepath.Join(DefaultCgroupV1Parent, id)) - if err != nil { - return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err) - } - if err = os.MkdirAll(path, 0755); err != nil { - return err - } - config.Cgroups.Path = path - return nil -} - -// FindCgroupMountpointDir is used to find the cgroup mount point on a Linux -// system. -// -// Note that in cgroups.v1, this returns one of many subsystems that are mounted. -// e.g. a return value of "/sys/fs/cgroup/systemd" really implies the root is -// "/sys/fs/cgroup", which is interesting on hybrid systems where the 'unified' -// subsystem is mounted as if it were a subsystem, but the actual root is different. -// (i.e. /sys/fs/cgroup/unified). -// -// As far as Nomad is concerned, UseV2 is the source of truth for which hierarchy -// to use, and that will only be a true value if cgroups.v2 is mounted on -// /sys/fs/cgroup (i.e. system is not in v1 or hybrid mode). -// -// âžœ mount -l | grep cgroup -// tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755,inode64) -// cgroup2 on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate) -// cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd) -// cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory) -// (etc.) -func FindCgroupMountpointDir() (string, error) { - mount, err := cgroups.GetCgroupMounts(false) - if err != nil { - return "", err - } - // It's okay if the mount point is not discovered - if len(mount) == 0 { - return "", nil - } - return mount[0].Mountpoint, nil -} - -// CopyCpuset copies the cpuset.cpus value from source into destination. -func CopyCpuset(source, destination string) error { - correct, err := cgroups.ReadFile(source, "cpuset.cpus") - if err != nil { - return err - } - - err = cgroups.WriteFile(destination, "cpuset.cpus", correct) - if err != nil { - return err - } - - return nil -} - -// MaybeDisableMemorySwappiness will disable memory swappiness, if that controller -// is available. Always the case for cgroups v2, but is not always the case on -// very old kernels with cgroups v1. -func MaybeDisableMemorySwappiness() *uint64 { - bypass := (*uint64)(nil) - zero := pointer.Of[uint64](0) - - // cgroups v2 always set zero - if UseV2 { - return zero - } - - // cgroups v1 detect if swappiness is supported by attempting to write to - // the nomad parent cgroup swappiness interface - e := &editor{fromRoot: "memory/nomad"} - err := e.write("memory.swappiness", "0") - if err != nil { - return bypass - } - - return zero -} diff --git a/client/lib/cgutil/cgutil_linux_test.go b/client/lib/cgutil/cgutil_linux_test.go deleted file mode 100644 index 9c51e3ef3..000000000 --- a/client/lib/cgutil/cgutil_linux_test.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "path/filepath" - "strings" - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/testutil" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/shoenig/test/must" - "github.com/stretchr/testify/require" -) - -func TestUtil_GetCgroupParent(t *testing.T) { - ci.Parallel(t) - - t.Run("v1", func(t *testing.T) { - testutil.CgroupsCompatibleV1(t) - t.Run("default", func(t *testing.T) { - exp := "/nomad" - parent := GetCgroupParent("") - require.Equal(t, exp, parent) - }) - - t.Run("configured", func(t *testing.T) { - exp := "/bar" - parent := GetCgroupParent("/bar") - require.Equal(t, exp, parent) - }) - }) - - t.Run("v2", func(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - t.Run("default", func(t *testing.T) { - exp := "nomad.slice" - parent := GetCgroupParent("") - require.Equal(t, exp, parent) - }) - - t.Run("configured", func(t *testing.T) { - exp := "abc.slice" - parent := GetCgroupParent("abc.slice") - require.Equal(t, exp, parent) - }) - }) -} - -func TestUtil_CreateCPUSetManager(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - t.Run("v1", func(t *testing.T) { - testutil.CgroupsCompatibleV1(t) - parent := "/" + uuid.Short() - manager := CreateCPUSetManager(parent, []uint16{0}, logger) - manager.Init() - _, ok := manager.(*cpusetManagerV1) - must.True(t, ok) - must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) - }) - - t.Run("v2", func(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - parent := uuid.Short() + ".slice" - manager := CreateCPUSetManager(parent, []uint16{0}, logger) - manager.Init() - _, ok := manager.(*cpusetManagerV2) - must.True(t, ok) - must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) - }) -} - -func TestUtil_GetCPUsFromCgroup(t *testing.T) { - ci.Parallel(t) - - t.Run("v2", func(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - cpus, err := GetCPUsFromCgroup("system.slice") // thanks, systemd! - require.NoError(t, err) - require.NotEmpty(t, cpus) - }) -} - -func create(t *testing.T, name string) { - mgr, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, name)) - require.NoError(t, err) - if err = mgr.Apply(CreationPID); err != nil { - _ = cgroups.RemovePath(name) - t.Fatal("failed to create cgroup for test") - } -} - -func cleanup(t *testing.T, name string) { - err := cgroups.RemovePath(name) - require.NoError(t, err) -} - -func TestUtil_CopyCpuset(t *testing.T) { - ci.Parallel(t) - - t.Run("v2", func(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - source := uuid.Short() + ".scope" - create(t, source) - defer cleanup(t, source) - require.NoError(t, cgroups.WriteFile(filepath.Join(CgroupRoot, source), "cpuset.cpus", "0-1")) - - destination := uuid.Short() + ".scope" - create(t, destination) - defer cleanup(t, destination) - - err := CopyCpuset( - filepath.Join(CgroupRoot, source), - filepath.Join(CgroupRoot, destination), - ) - require.NoError(t, err) - - value, readErr := cgroups.ReadFile(filepath.Join(CgroupRoot, destination), "cpuset.cpus") - require.NoError(t, readErr) - require.Equal(t, "0-1", strings.TrimSpace(value)) - }) -} - -func TestUtil_MaybeDisableMemorySwappiness(t *testing.T) { - ci.Parallel(t) - - // will return 0 on any reasonable kernel (both cgroups v1 and v2) - value := MaybeDisableMemorySwappiness() - must.NotNil(t, value) - must.Eq(t, 0, *value) -} diff --git a/client/lib/cgutil/cgutil_noop.go b/client/lib/cgutil/cgutil_noop.go deleted file mode 100644 index f9b9373d5..000000000 --- a/client/lib/cgutil/cgutil_noop.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build !linux - -package cgutil - -import ( - "github.com/hashicorp/go-hclog" -) - -const ( - // DefaultCgroupParent does not apply to non-Linux operating systems. - DefaultCgroupParent = "" -) - -// UseV2 is always false on non-Linux systems. -// -// This is a read-only value. -var UseV2 = false - -// CreateCPUSetManager creates a no-op CpusetManager for non-Linux operating systems. -func CreateCPUSetManager(string, []uint16, hclog.Logger) CpusetManager { - return new(NoopCpusetManager) -} - -// FindCgroupMountpointDir returns nothing for non-Linux operating systems. -func FindCgroupMountpointDir() (string, error) { - return "", nil -} - -// GetCgroupParent returns nothing for non-Linux operating systems. -func GetCgroupParent(string) string { - return DefaultCgroupParent -} - -// GetCPUsFromCgroup returns nothing for non-Linux operating systems. -func GetCPUsFromCgroup(string) ([]uint16, error) { - return nil, nil -} - -// CgroupScope returns nothing for non-Linux operating systems. -func CgroupScope(allocID, task string) string { - return "" -} diff --git a/client/lib/cgutil/cpuset_manager.go b/client/lib/cgutil/cpuset_manager.go deleted file mode 100644 index eceb6c7be..000000000 --- a/client/lib/cgutil/cpuset_manager.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package cgutil - -import ( - "context" - "path/filepath" - "strings" - - "github.com/hashicorp/nomad/lib/cpuset" - "github.com/hashicorp/nomad/nomad/structs" -) - -const ( - // CgroupRoot is hard-coded in the cgroups specification. - // It only applies to linux but helpers have references to it in driver(s). - CgroupRoot = "/sys/fs/cgroup" -) - -// CpusetManager is used to setup cpuset cgroups for each task. -type CpusetManager interface { - // Init should be called before the client starts running allocations. This - // is where the cpuset manager should start doing background operations. - Init() - - // AddAlloc adds an allocation to the manager - AddAlloc(alloc *structs.Allocation) - - // RemoveAlloc removes an alloc by ID from the manager - RemoveAlloc(allocID string) - - // CgroupPathFor returns a callback for getting the cgroup path and any error that may have occurred during - // cgroup initialization. The callback will block if the cgroup has not been created - CgroupPathFor(allocID, taskName string) CgroupPathGetter -} - -type NoopCpusetManager struct{} - -func (n NoopCpusetManager) Init() { -} - -func (n NoopCpusetManager) AddAlloc(alloc *structs.Allocation) { -} - -func (n NoopCpusetManager) RemoveAlloc(allocID string) { -} - -func (n NoopCpusetManager) CgroupPathFor(allocID, task string) CgroupPathGetter { - return func(context.Context) (string, error) { return "", nil } -} - -// CgroupPathGetter is a function which returns the cgroup path and any error which -// occurred during cgroup initialization. -// -// It should block until the cgroup has been created or an error is reported. -type CgroupPathGetter func(context.Context) (path string, err error) - -type TaskCgroupInfo struct { - CgroupPath string - RelativeCgroupPath string - Cpuset cpuset.CPUSet - Error error -} - -// SplitPath determines the parent and cgroup from p. -// p must contain at least 2 elements (parent + cgroup). -// -// Handles the cgroup root if present. -func SplitPath(p string) (string, string) { - p = strings.TrimPrefix(p, CgroupRoot) - p = strings.Trim(p, "/") - parts := strings.Split(p, "/") - return parts[0], "/" + filepath.Join(parts[1:]...) -} diff --git a/client/lib/cgutil/cpuset_manager_test.go b/client/lib/cgutil/cpuset_manager_test.go deleted file mode 100644 index 805c1306c..000000000 --- a/client/lib/cgutil/cpuset_manager_test.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package cgutil - -import ( - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/stretchr/testify/require" -) - -func TestUtil_SplitPath(t *testing.T) { - ci.Parallel(t) - - try := func(input, expParent, expCgroup string) { - parent, cgroup := SplitPath(input) - require.Equal(t, expParent, parent) - require.Equal(t, expCgroup, cgroup) - } - - // foo, /bar - try("foo/bar", "foo", "/bar") - try("/foo/bar/", "foo", "/bar") - try("/sys/fs/cgroup/foo/bar", "foo", "/bar") - - // foo, /bar/baz - try("/foo/bar/baz/", "foo", "/bar/baz") - try("foo/bar/baz", "foo", "/bar/baz") - try("/sys/fs/cgroup/foo/bar/baz", "foo", "/bar/baz") -} diff --git a/client/lib/cgutil/cpuset_manager_v1.go b/client/lib/cgutil/cpuset_manager_v1.go deleted file mode 100644 index bd2c69607..000000000 --- a/client/lib/cgutil/cpuset_manager_v1.go +++ /dev/null @@ -1,429 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "context" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "time" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/lib/cpuset" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/configs" - "golang.org/x/sys/unix" -) - -const ( - DefaultCgroupV1Parent = "/nomad" - SharedCpusetCgroupName = "shared" - ReservedCpusetCgroupName = "reserved" -) - -// NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1 -func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager { - if cgroupParent == "" { - cgroupParent = DefaultCgroupV1Parent - } - - cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent) - if err != nil { - logger.Warn("failed to get cgroup path; disable cpuset management", "error", err) - return new(NoopCpusetManager) - } - - // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created - if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { - logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err) - return new(NoopCpusetManager) - } - - parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) - if err != nil { - logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err) - return new(NoopCpusetManager) - } - - parentCpuset, err := cpuset.Parse(parentCpus) - if err != nil { - logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err) - return new(NoopCpusetManager) - } - - // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup - if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil { - if !errors.Is(err, os.ErrExist) { - logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err) - return new(NoopCpusetManager) - } - } - - if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { - logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err) - return new(NoopCpusetManager) - } - - return &cpusetManagerV1{ - parentCpuset: parentCpuset, - cgroupParent: cgroupParent, - cgroupParentPath: cgroupParentPath, - cgroupInfo: map[string]allocTaskCgroupInfo{}, - logger: logger, - } -} - -var ( - cpusetReconcileInterval = 30 * time.Second -) - -type cpusetManagerV1 struct { - // cgroupParent relative to the cgroup root. ex. '/nomad' - cgroupParent string - // cgroupParentPath is the absolute path to the cgroup parent. - cgroupParentPath string - - parentCpuset cpuset.CPUSet - - // all exported functions are synchronized - mu sync.Mutex - - cgroupInfo map[string]allocTaskCgroupInfo - - doneCh chan struct{} - signalCh chan struct{} - logger hclog.Logger -} - -func (c *cpusetManagerV1) AddAlloc(alloc *structs.Allocation) { - if alloc == nil || alloc.AllocatedResources == nil { - return - } - allocInfo := allocTaskCgroupInfo{} - for task, resources := range alloc.AllocatedResources.Tasks { - taskCpuset := cpuset.New(resources.Cpu.ReservedCores...) - cgroupPath := filepath.Join(c.cgroupParentPath, SharedCpusetCgroupName) - relativeCgroupPath := filepath.Join(c.cgroupParent, SharedCpusetCgroupName) - if taskCpuset.Size() > 0 { - cgroupPath, relativeCgroupPath = c.getCgroupPathsForTask(alloc.ID, task) - } - allocInfo[task] = &TaskCgroupInfo{ - CgroupPath: cgroupPath, - RelativeCgroupPath: relativeCgroupPath, - Cpuset: taskCpuset, - } - } - c.mu.Lock() - c.cgroupInfo[alloc.ID] = allocInfo - c.mu.Unlock() - go c.signalReconcile() -} - -func (c *cpusetManagerV1) RemoveAlloc(allocID string) { - c.mu.Lock() - delete(c.cgroupInfo, allocID) - c.mu.Unlock() - go c.signalReconcile() -} - -func (c *cpusetManagerV1) CgroupPathFor(allocID, task string) CgroupPathGetter { - return func(ctx context.Context) (string, error) { - c.mu.Lock() - allocInfo, ok := c.cgroupInfo[allocID] - if !ok { - c.mu.Unlock() - return "", fmt.Errorf("alloc not found for id %q", allocID) - } - - taskInfo, ok := allocInfo[task] - c.mu.Unlock() - if !ok { - return "", fmt.Errorf("task %q not found", task) - } - - timer, stop := helper.NewSafeTimer(0) - defer stop() - - for { - - if taskInfo.Error != nil { - break - } - - if _, err := os.Stat(taskInfo.CgroupPath); os.IsNotExist(err) { - select { - case <-ctx.Done(): - return taskInfo.CgroupPath, ctx.Err() - case <-timer.C: - timer.Reset(100 * time.Millisecond) - continue - } - } - break - } - - return taskInfo.CgroupPath, taskInfo.Error - } - -} - -// task name -> task cgroup info -type allocTaskCgroupInfo map[string]*TaskCgroupInfo - -// Init checks that the cgroup parent and expected child cgroups have been created -// If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared -// cgroup is initialized. -func (c *cpusetManagerV1) Init() { - c.doneCh = make(chan struct{}) - c.signalCh = make(chan struct{}) - c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String()) - go c.reconcileLoop() -} - -func (c *cpusetManagerV1) reconcileLoop() { - timer := time.NewTimer(0) - if !timer.Stop() { - <-timer.C - } - defer timer.Stop() - - for { - select { - case <-c.doneCh: - c.logger.Debug("shutting down reconcile loop") - return - case <-c.signalCh: - timer.Reset(500 * time.Millisecond) - case <-timer.C: - c.reconcileCpusets() - timer.Reset(cpusetReconcileInterval) - } - } -} - -func (c *cpusetManagerV1) reconcileCpusets() { - c.mu.Lock() - defer c.mu.Unlock() - sharedCpuset := cpuset.New(c.parentCpuset.ToSlice()...) - reservedCpuset := cpuset.New() - taskCpusets := map[string]*TaskCgroupInfo{} - for _, alloc := range c.cgroupInfo { - for _, task := range alloc { - if task.Cpuset.Size() == 0 { - continue - } - sharedCpuset = sharedCpuset.Difference(task.Cpuset) - reservedCpuset = reservedCpuset.Union(task.Cpuset) - taskCpusets[task.CgroupPath] = task - } - } - - // look for reserved cpusets which we don't know about and remove - files, err := os.ReadDir(c.reservedCpusetPath()) - if err != nil { - c.logger.Error("failed to list files in reserved cgroup path during reconciliation", "path", c.reservedCpusetPath(), "error", err) - } - for _, f := range files { - if !f.IsDir() { - continue - } - path := filepath.Join(c.reservedCpusetPath(), f.Name()) - if _, ok := taskCpusets[path]; ok { - continue - } - c.logger.Debug("removing reserved cpuset cgroup", "path", path) - err := cgroups.RemovePaths(map[string]string{"cpuset": path}) - if err != nil { - c.logger.Error("removal of existing cpuset cgroup failed", "path", path, "error", err) - } - } - - if err := c.setCgroupCpusetCPUs(c.sharedCpusetPath(), sharedCpuset.String()); err != nil { - c.logger.Error("could not write shared cpuset.cpus", "path", c.sharedCpusetPath(), "cpuset.cpus", sharedCpuset.String(), "error", err) - } - if err := c.setCgroupCpusetCPUs(c.reservedCpusetPath(), reservedCpuset.String()); err != nil { - c.logger.Error("could not write reserved cpuset.cpus", "path", c.reservedCpusetPath(), "cpuset.cpus", reservedCpuset.String(), "error", err) - } - for _, info := range taskCpusets { - if err := os.Mkdir(info.CgroupPath, 0755); err != nil && !os.IsExist(err) { - c.logger.Error("failed to create new cgroup path for task", "path", info.CgroupPath, "error", err) - info.Error = err - continue - } - - // copy cpuset.mems from parent - _, parentMems, err := getCpusetSubsystemSettingsV1(filepath.Dir(info.CgroupPath)) - if err != nil { - c.logger.Error("failed to read parent cgroup settings for task", "path", info.CgroupPath, "error", err) - info.Error = err - continue - } - if err := cgroups.WriteFile(info.CgroupPath, "cpuset.mems", parentMems); err != nil { - c.logger.Error("failed to write cgroup cpuset.mems setting for task", "path", info.CgroupPath, "mems", parentMems, "error", err) - info.Error = err - continue - } - if err := c.setCgroupCpusetCPUs(info.CgroupPath, info.Cpuset.String()); err != nil { - c.logger.Error("failed to write cgroup cpuset.cpus settings for task", "path", info.CgroupPath, "cpus", info.Cpuset.String(), "error", err) - info.Error = err - continue - } - } -} - -// setCgroupCpusetCPUs will compare an existing cpuset.cpus value with an expected value, overwriting the existing if different -// must hold a lock on cpusetManagerV1.mu before calling -func (_ *cpusetManagerV1) setCgroupCpusetCPUs(path, cpus string) error { - currentCpusRaw, err := cgroups.ReadFile(path, "cpuset.cpus") - if err != nil { - return err - } - - if cpus != strings.TrimSpace(currentCpusRaw) { - if err := cgroups.WriteFile(path, "cpuset.cpus", cpus); err != nil { - return err - } - } - return nil -} - -func (c *cpusetManagerV1) signalReconcile() { - select { - case c.signalCh <- struct{}{}: - case <-c.doneCh: - } -} - -func (c *cpusetManagerV1) getCgroupPathsForTask(allocID, task string) (absolute, relative string) { - return filepath.Join(c.reservedCpusetPath(), fmt.Sprintf("%s-%s", allocID, task)), - filepath.Join(c.cgroupParent, ReservedCpusetCgroupName, fmt.Sprintf("%s-%s", allocID, task)) -} - -func (c *cpusetManagerV1) sharedCpusetPath() string { - return filepath.Join(c.cgroupParentPath, SharedCpusetCgroupName) -} - -func (c *cpusetManagerV1) reservedCpusetPath() string { - return filepath.Join(c.cgroupParentPath, ReservedCpusetCgroupName) -} - -func getCPUsFromCgroupV1(group string) ([]uint16, error) { - cgroupPath, err := GetCgroupPathHelperV1("cpuset", group) - if err != nil { - return nil, err - } - - cgroup := &configs.Cgroup{ - Path: group, - Resources: new(configs.Resources), - } - - paths := map[string]string{ - "cpuset": cgroupPath, - } - - man, err := fs.NewManager(cgroup, paths) - if err != nil { - return nil, err - } - - stats, err := man.GetStats() - if err != nil { - return nil, err - } - - return stats.CPUSetStats.CPUs, nil -} - -// cpusetEnsureParentV1 makes sure that the parent directories of current -// are created and populated with the proper cpus and mems files copied -// from their respective parent. It does that recursively, starting from -// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point). -func cpusetEnsureParentV1(current string) error { - var st unix.Statfs_t - - parent := filepath.Dir(current) - err := unix.Statfs(parent, &st) - if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC { - return nil - } - // Treat non-existing directory as cgroupfs as it will be created, - // and the root cpuset directory obviously exists. - if err != nil && err != unix.ENOENT { - return &os.PathError{Op: "statfs", Path: parent, Err: err} - } - - if err := cpusetEnsureParentV1(parent); err != nil { - return err - } - if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) { - return err - } - return cpusetCopyIfNeededV1(current, parent) -} - -// cpusetCopyIfNeededV1 copies the cpuset.cpus and cpuset.mems from the parent -// directory to the current directory if the file's contents are 0 -func cpusetCopyIfNeededV1(current, parent string) error { - currentCpus, currentMems, err := getCpusetSubsystemSettingsV1(current) - if err != nil { - return err - } - parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(parent) - if err != nil { - return err - } - - if isEmptyCpusetV1(currentCpus) { - if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil { - return err - } - } - if isEmptyCpusetV1(currentMems) { - if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil { - return err - } - } - return nil -} - -func getCpusetSubsystemSettingsV1(parent string) (cpus, mems string, err error) { - if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil { - return - } - if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil { - return - } - return cpus, mems, nil -} - -func isEmptyCpusetV1(str string) bool { - return str == "" || str == "\n" -} - -func GetCgroupPathHelperV1(subsystem, cgroup string) (string, error) { - mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem) - if err != nil { - return "", err - } - - // This is needed for nested containers, because in /proc/self/cgroup we - // see paths from host, which don't exist in container. - relCgroup, err := filepath.Rel(root, cgroup) - if err != nil { - return "", err - } - - result := filepath.Join(mnt, relCgroup) - return result, nil -} diff --git a/client/lib/cgutil/cpuset_manager_v1_test.go b/client/lib/cgutil/cpuset_manager_v1_test.go deleted file mode 100644 index c982bb531..000000000 --- a/client/lib/cgutil/cpuset_manager_v1_test.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "os" - "path/filepath" - "testing" - - "github.com/hashicorp/nomad/client/testutil" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/lib/cpuset" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/stretchr/testify/require" -) - -func tmpCpusetManagerV1(t *testing.T) (*cpusetManagerV1, func()) { - mount, err := FindCgroupMountpointDir() - if err != nil || mount == "" { - t.Skipf("Failed to find cgroup mount: %v %v", mount, err) - } - - parent := "/gotest-" + uuid.Short() - require.NoError(t, cpusetEnsureParentV1(parent)) - - parentPath, err := GetCgroupPathHelperV1("cpuset", parent) - require.NoError(t, err) - - manager := NewCpusetManagerV1(parent, nil, testlog.HCLogger(t)).(*cpusetManagerV1) - return manager, func() { require.NoError(t, cgroups.RemovePaths(map[string]string{"cpuset": parentPath})) } -} - -func TestCpusetManager_V1_Init(t *testing.T) { - testutil.CgroupsCompatibleV1(t) - - manager, cleanup := tmpCpusetManagerV1(t) - defer cleanup() - manager.Init() - - require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) - require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - sharedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) - require.NoError(t, err) - require.Exactly(t, manager.parentCpuset.ToSlice(), sharedCpus.ToSlice()) - require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) -} - -func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { - testutil.CgroupsCompatibleV1(t) - - manager, cleanup := tmpCpusetManagerV1(t) - defer cleanup() - manager.Init() - - alloc := mock.Alloc() - // reserve just one core (the 0th core, which probably exists) - alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(0).ToSlice() - manager.AddAlloc(alloc) - - // force reconcile - manager.reconcileCpusets() - - // check that the 0th core is no longer available in the shared group - // actual contents of shared group depends on machine core count - require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) - require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - sharedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) - require.NoError(t, err) - require.NotEmpty(t, sharedCpus.ToSlice()) - require.NotContains(t, sharedCpus.ToSlice(), uint16(0)) - - // check that the 0th core is allocated to reserved cgroup - require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) - reservedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - reservedCpus, err := cpuset.Parse(string(reservedCpusRaw)) - require.NoError(t, err) - require.Exactly(t, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores, reservedCpus.ToSlice()) - - // check that task cgroup exists and cpuset matches expected reserved cores - allocInfo, ok := manager.cgroupInfo[alloc.ID] - require.True(t, ok) - require.Len(t, allocInfo, 1) - taskInfo, ok := allocInfo["web"] - require.True(t, ok) - - require.DirExists(t, taskInfo.CgroupPath) - taskCpusRaw, err := os.ReadFile(filepath.Join(taskInfo.CgroupPath, "cpuset.cpus")) - require.NoError(t, err) - taskCpus, err := cpuset.Parse(string(taskCpusRaw)) - require.NoError(t, err) - require.Exactly(t, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores, taskCpus.ToSlice()) -} - -func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { - testutil.CgroupsCompatibleV1(t) - - // This case tests adding 2 allocations, reconciling then removing 1 alloc. - // It requires the system to have at least 3 cpu cores (one for each alloc), - // BUT plus another one because writing an empty cpuset causes the cgroup to - // inherit the parent. - testutil.MinimumCores(t, 3) - - manager, cleanup := tmpCpusetManagerV1(t) - defer cleanup() - manager.Init() - - alloc1 := mock.Alloc() - alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0]) - alloc1.AllocatedResources.Tasks["web"].Cpu.ReservedCores = alloc1Cpuset.ToSlice() - manager.AddAlloc(alloc1) - - alloc2 := mock.Alloc() - alloc2Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[1]) - alloc2.AllocatedResources.Tasks["web"].Cpu.ReservedCores = alloc2Cpuset.ToSlice() - manager.AddAlloc(alloc2) - - //force reconcile - manager.reconcileCpusets() - - // shared cpuset should not include any expected cores - sharedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) - require.NoError(t, err) - require.False(t, sharedCpus.ContainsAny(alloc1Cpuset.Union(alloc2Cpuset))) - - // reserved cpuset should equal the expected cpus - reservedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - reservedCpus, err := cpuset.Parse(string(reservedCpusRaw)) - require.NoError(t, err) - require.True(t, reservedCpus.Equal(alloc1Cpuset.Union(alloc2Cpuset))) - - // remove first allocation - alloc1TaskPath := manager.cgroupInfo[alloc1.ID]["web"].CgroupPath - manager.RemoveAlloc(alloc1.ID) - manager.reconcileCpusets() - - // alloc1's task reserved cgroup should be removed - require.NoDirExists(t, alloc1TaskPath) - - // shared cpuset should now include alloc1's cores - sharedCpusRaw, err = os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - sharedCpus, err = cpuset.Parse(string(sharedCpusRaw)) - require.NoError(t, err) - require.False(t, sharedCpus.ContainsAny(alloc2Cpuset)) - require.True(t, sharedCpus.IsSupersetOf(alloc1Cpuset)) - - // reserved cpuset should only include alloc2's cores - reservedCpusRaw, err = os.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) - require.NoError(t, err) - reservedCpus, err = cpuset.Parse(string(reservedCpusRaw)) - require.NoError(t, err) - require.True(t, reservedCpus.Equal(alloc2Cpuset)) - -} diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go deleted file mode 100644 index 5dd103f5d..000000000 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "context" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "time" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/go-set" - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/lib/cpuset" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/configs" -) - -const ( - // CreationPID is a special PID in libcontainer used to denote a cgroup - // should be created, but with no process added. - // - // https://github.com/opencontainers/runc/blob/v1.0.3/libcontainer/cgroups/utils.go#L372 - CreationPID = -1 - - // DefaultCgroupParentV2 is the name of Nomad's default parent cgroup, under which - // all other cgroups are managed. This can be changed with client configuration - // in case for e.g. Nomad tasks should be further constrained by an externally - // configured systemd cgroup. - DefaultCgroupParentV2 = "nomad.slice" -) - -// nothing is used for treating a map like a set with no values -type nothing struct{} - -// present indicates something exists -var present = nothing{} - -type cpusetManagerV2 struct { - logger hclog.Logger - - parent string // relative to cgroup root (e.g. "nomad.slice") - parentAbs string // absolute path (e.g. "/sys/fs/cgroup/nomad.slice") - initial cpuset.CPUSet // set of initial cores (never changes) - - lock sync.Mutex // hold this when managing pool / sharing / isolating - pool cpuset.CPUSet // pool of cores being shared among all tasks - sharing map[identity]nothing // sharing tasks using cores only from the pool - isolating map[identity]cpuset.CPUSet // isolating tasks using cores from the pool + reserved cores -} - -func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { - if err := minimumRootControllers(); err != nil { - logger.Error("failed to enabled minimum set of cgroup controllers; disabling cpuset management", "error", err) - return new(NoopCpusetManager) - } - - parentAbs := filepath.Join(CgroupRoot, parent) - if err := os.MkdirAll(parentAbs, 0o755); err != nil { - logger.Error("failed to ensure nomad parent cgroup exists; disabling cpuset management", "error", err) - return new(NoopCpusetManager) - } - - if len(reservable) == 0 { - // read from group - if cpus, err := GetCPUsFromCgroup(parent); err != nil { - logger.Error("failed to lookup cpus from parent cgroup; disabling cpuset management", "error", err) - return new(NoopCpusetManager) - } else { - reservable = cpus - } - } - - return &cpusetManagerV2{ - initial: cpuset.New(reservable...), - parent: parent, - parentAbs: parentAbs, - logger: logger, - sharing: make(map[identity]nothing), - isolating: make(map[identity]cpuset.CPUSet), - } -} - -// minimumControllers sets the minimum set of required controllers on the -// /sys/fs/cgroup/cgroup.subtree_control file - ensuring [cpuset, cpu, io, memory, pids] -// are enabled. -func minimumRootControllers() error { - e := new(editor) - s, err := e.read("cgroup.subtree_control") - if err != nil { - return err - } - - required := set.From[string]([]string{"cpuset", "cpu", "io", "memory", "pids"}) - enabled := set.From[string](strings.Fields(s)) - needed := required.Difference(enabled) - - if needed.Size() == 0 { - return nil // already sufficient - } - - sb := new(strings.Builder) - for _, controller := range needed.List() { - sb.WriteString("+" + controller + " ") - } - - activation := strings.TrimSpace(sb.String()) - return e.write("cgroup.subtree_control", activation) -} - -func (c *cpusetManagerV2) Init() { - c.logger.Debug("initializing with", "cores", c.initial) -} - -func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) { - if alloc == nil || alloc.AllocatedResources == nil { - return - } - c.logger.Trace("add allocation", "name", alloc.Name, "id", alloc.ID) - - // grab write lock while we recompute and apply changes - c.lock.Lock() - defer c.lock.Unlock() - - // first update our tracking of isolating and sharing tasks - for task, resources := range alloc.AllocatedResources.Tasks { - id := makeID(alloc.ID, task) - if len(resources.Cpu.ReservedCores) > 0 { - c.isolating[id] = cpuset.New(resources.Cpu.ReservedCores...) - } else { - c.sharing[id] = present - } - } - - // recompute the available sharable cpu cores - c.recalculate() - - // now write out the entire cgroups space - c.reconcile() - - // no need to cleanup on adds, we did not remove a task -} - -func (c *cpusetManagerV2) RemoveAlloc(allocID string) { - c.logger.Trace("remove allocation", "id", allocID) - - // grab write lock while we recompute and apply changes. - c.lock.Lock() - defer c.lock.Unlock() - - // remove tasks of allocID from the sharing set - for id := range c.sharing { - if strings.HasPrefix(string(id), allocID) { - delete(c.sharing, id) - } - } - - // remove tasks of allocID from the isolating set - for id := range c.isolating { - if strings.HasPrefix(string(id), allocID) { - delete(c.isolating, id) - } - } - - // recompute available sharable cpu cores - c.recalculate() - - // now write out the entire cgroups space - c.reconcile() - - // now remove any tasks no longer running - c.cleanup() -} - -func (c *cpusetManagerV2) CgroupPathFor(allocID, task string) CgroupPathGetter { - // The CgroupPathFor implementation must block until cgroup for allocID.task - // exists [and can accept a PID]. - return func(ctx context.Context) (string, error) { - ticks, cancel := helper.NewSafeTimer(100 * time.Millisecond) - defer cancel() - - for { - path := c.pathOf(makeID(allocID, task)) - mgr, err := fs2.NewManager(nil, path) - if err != nil { - return "", err - } - - if mgr.Exists() { - return path, nil - } - - select { - case <-ctx.Done(): - return "", ctx.Err() - case <-ticks.C: - continue - } - } - } -} - -// recalculate the number of cores sharable by non-isolating tasks (and isolating tasks) -// -// must be called while holding c.lock -func (c *cpusetManagerV2) recalculate() { - remaining := c.initial.Copy() - for _, set := range c.isolating { - remaining = remaining.Difference(set) - } - c.pool = remaining -} - -// reconcile will actually write the cpuset values for all tracked tasks. -// -// must be called while holding c.lock -func (c *cpusetManagerV2) reconcile() { - for id := range c.sharing { - c.write(id, c.pool) - } - - for id, set := range c.isolating { - c.write(id, c.pool.Union(set)) - } -} - -// cleanup will remove any cgroups for allocations no longer being tracked -// -// must be called while holding c.lock -func (c *cpusetManagerV2) cleanup() { - // create a map to lookup ids we know about - size := len(c.sharing) + len(c.isolating) - ids := make(map[identity]nothing, size) - for id := range c.sharing { - ids[id] = present - } - for id := range c.isolating { - ids[id] = present - } - - if err := filepath.WalkDir(c.parentAbs, func(path string, entry os.DirEntry, err error) error { - // skip anything we cannot read - if err != nil { - return nil - } - - // a cgroup is a directory - if !entry.IsDir() { - return nil - } - - dir := filepath.Dir(path) - base := filepath.Base(path) - - // only manage scopes directly under nomad.slice - if dir != c.parentAbs || !strings.HasSuffix(base, ".scope") { - return nil - } - - // only remove the scope if we do not track it - id := identity(strings.TrimSuffix(base, ".scope")) - _, exists := ids[id] - if !exists { - c.remove(path) - } - - return nil - }); err != nil { - c.logger.Error("failed to cleanup cgroup", "error", err) - } -} - -// pathOf returns the absolute path to a task with identity id. -func (c *cpusetManagerV2) pathOf(id identity) string { - return filepath.Join(c.parentAbs, makeScope(id)) -} - -// remove does the actual fs delete of the cgroup -// -// We avoid removing a cgroup if it still contains a PID, as the cpuset manager -// may be initially empty on a Nomad client restart. -func (c *cpusetManagerV2) remove(path string) { - mgr, err := fs2.NewManager(nil, path) - if err != nil { - c.logger.Warn("failed to create manager", "path", path, "error", err) - return - } - - // get the list of pids managed by this scope (should be 0 or 1) - pids, _ := mgr.GetPids() - - // do not destroy the scope if a PID is still present - // this is a normal condition when an agent restarts with running tasks - // and the v2 manager is still rebuilding its tracked tasks - if len(pids) > 0 { - return - } - - // remove the cgroup - if err3 := mgr.Destroy(); err3 != nil { - c.logger.Warn("failed to cleanup cgroup", "path", path, "error", err) - return - } -} - -// write does the actual write of cpuset set for cgroup id -func (c *cpusetManagerV2) write(id identity, set cpuset.CPUSet) { - path := c.pathOf(id) - - // make a manager for the cgroup - m, err := fs2.NewManager(new(configs.Cgroup), path) - if err != nil { - c.logger.Error("failed to manage cgroup", "path", path, "error", err) - return - } - - // create the cgroup - if err = m.Apply(CreationPID); err != nil { - c.logger.Error("failed to apply cgroup", "path", path, "error", err) - return - } - - // set the cpuset value for the cgroup - if err = m.Set(&configs.Resources{ - CpusetCpus: set.String(), - SkipDevices: true, - }); err != nil { - c.logger.Error("failed to set cgroup", "path", path, "error", err) - return - } -} - -// fromRoot returns the joined filepath of group on the CgroupRoot -func fromRoot(group string) string { - return filepath.Join(CgroupRoot, group) -} - -// getCPUsFromCgroupV2 retrieves the effective cpuset for the group, which must -// be directly under the cgroup root (i.e. the parent, like nomad.slice). -func getCPUsFromCgroupV2(group string) ([]uint16, error) { - path := fromRoot(group) - effective, err := cgroups.ReadFile(path, "cpuset.cpus.effective") - if err != nil { - return nil, err - } - set, err := cpuset.Parse(effective) - if err != nil { - return nil, err - } - return set.ToSlice(), nil -} - -// identity is the "." string that uniquely identifies an -// individual instance of a task within the flat cgroup namespace -type identity string - -func makeID(allocID, task string) identity { - return identity(fmt.Sprintf("%s.%s", allocID, task)) -} - -func makeScope(id identity) string { - return string(id) + ".scope" -} diff --git a/client/lib/cgutil/cpuset_manager_v2_test.go b/client/lib/cgutil/cpuset_manager_v2_test.go deleted file mode 100644 index fd3ee6f18..000000000 --- a/client/lib/cgutil/cpuset_manager_v2_test.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "path/filepath" - "strings" - "testing" - - "github.com/hashicorp/nomad/client/testutil" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/lib/cpuset" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/stretchr/testify/require" -) - -// Note: these tests need to run on GitHub Actions runners with only 2 cores. -// It is not possible to write more cores to a cpuset than are actually available, -// so make sure tests account for that by setting systemCores as the full set of -// usable cores. -var systemCores = []uint16{0, 1} - -func TestCpusetManager_V2_AddAlloc(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - testutil.MinimumCores(t, 2) - - logger := testlog.HCLogger(t) - parent := uuid.Short() + ".scope" - create(t, parent) - cleanup(t, parent) - - // setup the cpuset manager - manager := NewCpusetManagerV2(parent, systemCores, logger) - manager.Init() - - // add our first alloc, isolating 1 core - t.Run("first", func(t *testing.T) { - alloc := mock.Alloc() - alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(0).ToSlice() - manager.AddAlloc(alloc) - cpusetIs(t, "0-1", parent, alloc.ID, "web") - }) - - // add second alloc, isolating 1 core - t.Run("second", func(t *testing.T) { - alloc := mock.Alloc() - alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(1).ToSlice() - manager.AddAlloc(alloc) - cpusetIs(t, "1", parent, alloc.ID, "web") - }) - - // note that the scheduler, not the cpuset manager, is what prevents over-subscription - // and as such no logic exists here to prevent that -} - -func cpusetIs(t *testing.T, exp, parent, allocID, task string) { - scope := makeScope(makeID(allocID, task)) - value, err := cgroups.ReadFile(filepath.Join(CgroupRoot, parent, scope), "cpuset.cpus") - require.NoError(t, err) - require.Equal(t, exp, strings.TrimSpace(value)) -} - -func TestCpusetManager_V2_RemoveAlloc(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - testutil.MinimumCores(t, 2) - - logger := testlog.HCLogger(t) - parent := uuid.Short() + ".scope" - create(t, parent) - cleanup(t, parent) - - // setup the cpuset manager - manager := NewCpusetManagerV2(parent, systemCores, logger) - manager.Init() - - // alloc1 gets core 0 - alloc1 := mock.Alloc() - alloc1.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(0).ToSlice() - manager.AddAlloc(alloc1) - - // alloc2 gets core 1 - alloc2 := mock.Alloc() - alloc2.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(1).ToSlice() - manager.AddAlloc(alloc2) - cpusetIs(t, "1", parent, alloc2.ID, "web") - - // with alloc1 gone, alloc2 gets the now shared core - manager.RemoveAlloc(alloc1.ID) - cpusetIs(t, "0-1", parent, alloc2.ID, "web") -} diff --git a/client/lib/cgutil/editor.go b/client/lib/cgutil/editor.go deleted file mode 100644 index a366f4211..000000000 --- a/client/lib/cgutil/editor.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "os" - "path/filepath" - "strings" -) - -// editor provides a simple mechanism for reading and writing cgroup files. -type editor struct { - fromRoot string -} - -func (e *editor) path(file string) string { - return filepath.Join(CgroupRoot, e.fromRoot, file) -} - -func (e *editor) write(file, content string) error { - return os.WriteFile(e.path(file), []byte(content), 0o644) -} - -func (e *editor) read(file string) (string, error) { - b, err := os.ReadFile(e.path(file)) - return strings.TrimSpace(string(b)), err -} diff --git a/client/lib/cgutil/editor_test.go b/client/lib/cgutil/editor_test.go deleted file mode 100644 index ce8dc32be..000000000 --- a/client/lib/cgutil/editor_test.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "os" - "path/filepath" - "testing" - - "github.com/hashicorp/nomad/client/testutil" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/shoenig/test/must" -) - -func createCG(t *testing.T) (string, func()) { - name := uuid.Short() + ".scope" - path := filepath.Join(CgroupRoot, name) - err := os.Mkdir(path, 0o755) - must.NoError(t, err) - - return name, func() { - _ = os.Remove(path) - } -} - -func TestCG_editor(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - - cg, rm := createCG(t) - t.Cleanup(rm) - - edits := &editor{cg} - writeErr := edits.write("cpu.weight.nice", "13") - must.NoError(t, writeErr) - - b, readErr := edits.read("cpu.weight.nice") - must.NoError(t, readErr) - must.Eq(t, "13", b) -} diff --git a/client/lib/cgutil/group_killer.go b/client/lib/cgutil/group_killer.go deleted file mode 100644 index ef299e9d5..000000000 --- a/client/lib/cgutil/group_killer.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package cgutil - -import ( - "errors" - "fmt" - "os" - "strconv" - "time" - - "github.com/hashicorp/go-hclog" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/configs" -) - -// freezer is the name of the cgroup subsystem used for stopping / starting -// a group of processes -const freezer = "freezer" - -// thawed and frozen are the two states we put a cgroup in when trying to remove it -var ( - thawed = &configs.Resources{Freezer: configs.Thawed} - frozen = &configs.Resources{Freezer: configs.Frozen} -) - -// GroupKiller is used for SIGKILL-ing the process tree[s] of a cgroup by leveraging -// the freezer cgroup subsystem. -type GroupKiller interface { - KillGroup(cgroup *configs.Cgroup) error -} - -// NewGroupKiller creates a GroupKiller with executor PID pid. -func NewGroupKiller(logger hclog.Logger, pid int) GroupKiller { - return &killer{ - logger: logger.Named("group_killer"), - pid: pid, - } -} - -type killer struct { - logger hclog.Logger - pid int -} - -// KillGroup will SIGKILL the process tree present in cgroup, using the freezer -// subsystem to prevent further forking, etc. -func (d *killer) KillGroup(cgroup *configs.Cgroup) error { - if UseV2 { - return d.v2(cgroup) - } - return d.v1(cgroup) -} - -func (d *killer) v1(cgroup *configs.Cgroup) error { - if cgroup == nil { - return errors.New("missing cgroup") - } - - // the actual path to our tasks freezer cgroup - path := cgroup.Path - - d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v1", "executor_pid", d.pid) - - // move executor PID into the init freezer cgroup so we can kill the task - // pids without killing the executor (which is the process running this code, - // doing the killing) - initPath, err := cgroups.GetInitCgroupPath(freezer) - if err != nil { - return fmt.Errorf("failed to find init cgroup: %w", err) - } - m := map[string]string{freezer: initPath} - if err = cgroups.EnterPid(m, d.pid); err != nil { - return fmt.Errorf("failed to add executor pid to init cgroup: %w", err) - } - - // ability to freeze the cgroup - freeze := func() { - _ = new(fs.FreezerGroup).Set(path, frozen) - } - - // ability to thaw the cgroup - thaw := func() { - _ = new(fs.FreezerGroup).Set(path, thawed) - } - - // do the common kill logic - if err = d.kill(path, freeze, thaw); err != nil { - return err - } - - // remove the cgroup from disk - return cgroups.RemovePath(path) -} - -func (d *killer) v2(cgroup *configs.Cgroup) error { - if cgroup == nil || cgroup.Path == "" { - return errors.New("missing cgroup") - } - - // move executor (d.PID) into init.scope - editSelf := &editor{"init.scope"} - if err := editSelf.write("cgroup.procs", strconv.Itoa(d.pid)); err != nil { - return err - } - - // write "1" to cgroup.kill - editTask := &editor{cgroup.Path} - if err := editTask.write("cgroup.kill", "1"); err != nil { - return err - } - - // note: do NOT remove the cgroup from disk; leave that to the Client, at - // least until #14375 is implemented. - return nil -} - -// kill is used to SIGKILL all processes in cgroup -// -// The order of operations is -// 0. before calling this method, the executor pid has been moved outside of cgroup -// 1. freeze cgroup (so processes cannot fork further) -// 2. scan the cgroup to collect all pids -// 3. issue SIGKILL to each pid found -// 4. thaw the cgroup so processes can go die -// 5. wait on each processes until it is confirmed dead -func (d *killer) kill(cgroup string, freeze func(), thaw func()) error { - // freeze the cgroup stopping further forking - freeze() - - d.logger.Trace("search for pids in", "cgroup", cgroup) - - // find all the pids we intend to kill - pids, err := cgroups.GetPids(cgroup) - if err != nil { - // if we fail to get pids, re-thaw before bailing so there is at least - // a chance the processes can go die out of band - thaw() - return fmt.Errorf("failed to find pids: %w", err) - } - - d.logger.Trace("send sigkill to frozen processes", "cgroup", cgroup, "pids", pids) - - var processes []*os.Process - - // kill the processes in cgroup - for _, pid := range pids { - p, findErr := os.FindProcess(pid) - if findErr != nil { - d.logger.Trace("failed to find process of pid to kill", "pid", pid, "error", findErr) - continue - } - processes = append(processes, p) - if killErr := p.Kill(); killErr != nil { - d.logger.Trace("failed to kill process", "pid", pid, "error", killErr) - continue - } - } - - // thawed the cgroup so we can wait on each process - thaw() - - // wait on each process - for _, p := range processes { - // do not capture error; errors are normal here - pState, _ := p.Wait() - d.logger.Trace("return from wait on process", "pid", p.Pid, "state", pState) - } - - // cgroups are not atomic, the OS takes a moment to un-mark the cgroup as in-use; - // a tiny sleep here goes a long way for not creating noisy (but functionally benign) - // errors about removing busy cgroup - // - // alternatively we could do the removal in a loop and silence the interim errors, but meh - time.Sleep(50 * time.Millisecond) - - return nil -} diff --git a/client/lib/cpustats/stats.go b/client/lib/cpustats/stats.go new file mode 100644 index 000000000..027394cd6 --- /dev/null +++ b/client/lib/cpustats/stats.go @@ -0,0 +1,74 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +// Package cpustats provides utilities for tracking CPU usage statistics. +package cpustats + +import ( + "time" + + "github.com/hashicorp/nomad/client/lib/numalib" + "oss.indeed.com/go/libtime" +) + +// Topology is an interface of what is needed from numalib.Topology for computing +// the CPU resource utilization of a process. +type Topology interface { + TotalCompute() numalib.MHz + NumCores() int +} + +// A Tracker keeps track of one aspect of CPU utilization (i.e. one of system, +// user, or total time). +type Tracker struct { + prevCPUTime float64 + prevTime time.Time + + totalCompute numalib.MHz + numCPUs int + + clock libtime.Clock +} + +// New creates a fresh Tracker with no data. +func New(top Topology) *Tracker { + return &Tracker{ + totalCompute: top.TotalCompute(), + numCPUs: top.NumCores(), + clock: libtime.SystemClock(), + } +} + +// Percent calculates the CPU usage percentage based on the current CPU usage +// and the previous CPU usage where usage is given as a time in nanoseconds +// spent using the CPU. +func (t *Tracker) Percent(cpuTime float64) float64 { + now := t.clock.Now() + + if t.prevCPUTime == 0.0 { + t.prevCPUTime = cpuTime + t.prevTime = now + return 0.0 + } + + timeDelta := now.Sub(t.prevTime).Nanoseconds() + ret := t.calculatePercent(t.prevCPUTime, cpuTime, timeDelta) + t.prevCPUTime = cpuTime + t.prevTime = now + return ret +} + +func (t *Tracker) calculatePercent(t1, t2 float64, timeDelta int64) float64 { + vDelta := t2 - t1 + if timeDelta <= 0 || vDelta <= 0 { + return 0.0 + } + return (vDelta / float64(timeDelta)) * 100.0 +} + +// TicksConsumed calculates the total bandwidth consumed by the process across +// all system CPU cores (not just the ones available to Nomad or this particular +// process. +func (t *Tracker) TicksConsumed(percent float64) float64 { + return (percent / 100) * float64(t.totalCompute) / float64(t.numCPUs) +} diff --git a/client/lib/idset/idset.go b/client/lib/idset/idset.go new file mode 100644 index 000000000..3ecffa66e --- /dev/null +++ b/client/lib/idset/idset.go @@ -0,0 +1,165 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +// Package idset provides a Set implementation for keeping track of various +// types of numeric IDs (e.g. CoreID, ProcessID, etc.). +package idset + +import ( + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/hashicorp/go-set" + "golang.org/x/exp/slices" +) + +// An ID is representative of a non-negative identifier of something like +// a CPU core ID, a NUMA node ID, etc. +type ID interface { + ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uint +} + +// A Set contains some IDs. +// +// See the List Format section of +// https://www.man7.org/linux/man-pages/man7/cpuset.7.html +// for more information on the syntax and utility of these sets. +type Set[T ID] struct { + items *set.Set[T] +} + +// Empty creates a fresh Set with no elements. +func Empty[T ID]() *Set[T] { + return &Set[T]{ + items: set.New[T](0), + } +} + +var ( + numberRe = regexp.MustCompile(`^\d+$`) + spanRe = regexp.MustCompile(`^(\d+)-(\d+)$`) +) + +func atoi[T ID](s string) T { + i, _ := strconv.Atoi(s) + return T(i) +} + +func order[T ID](a, b T) (T, T) { + if a < b { + return a, b + } + return b, a +} + +// Parse the given cpuset into a set. +// +// The input is assumed to be valid. +func Parse[T ID](list string) *Set[T] { + result := Empty[T]() + + add := func(s string) { + s = strings.TrimSpace(s) + switch { + case numberRe.MatchString(s): + result.items.Insert(atoi[T](s)) + case spanRe.MatchString(s): + values := spanRe.FindStringSubmatch(s) + low, high := order(atoi[T](values[1]), atoi[T](values[2])) + for i := low; i <= high; i++ { + result.items.Insert(i) + } + } + } + + pieces := strings.Split(list, ",") + for _, piece := range pieces { + add(piece) + } + + return result +} + +// From returns Set created from the given slice. +func From[T, U ID](slice []U) *Set[T] { + result := Empty[T]() + for _, item := range slice { + result.items.Insert(T(item)) + } + return result +} + +// Contains returns whether the Set contains item. +func (s *Set[T]) Contains(item T) bool { + return s.items.Contains(item) +} + +// Insert item into the Set. +func (s *Set[T]) Insert(item T) { + s.items.Insert(item) +} + +// Slice returns a slice copy of the Set. +func (s *Set[T]) Slice() []T { + items := s.items.Slice() + slices.Sort(items) + return items +} + +// String creates a well-formed cpuset string representation of the Set. +func (s *Set[T]) String() string { + if s.items.Empty() { + return "" + } + + var parts []string + ids := s.Slice() + + low, high := ids[0], ids[0] + for i := 1; i < len(ids); i++ { + switch { + case ids[i] == high+1: + high = ids[i] + continue + case low == high: + parts = append(parts, fmt.Sprintf("%d", low)) + default: + parts = append(parts, fmt.Sprintf("%d-%d", low, high)) + } + low, high = ids[i], ids[i] // new range + } + + if low == high { + parts = append(parts, fmt.Sprintf("%d", low)) + } else { + parts = append(parts, fmt.Sprintf("%d-%d", low, high)) + } + + return strings.Join(parts, ",") +} + +// ForEach iterates the elements in the set and applies f. Iteration stops +// if the result of f is a non-nil error. +func (s *Set[T]) ForEach(f func(id T) error) error { + for _, id := range s.items.Slice() { + if err := f(id); err != nil { + return err + } + } + return nil +} + +// Size returns the number of elements in the Set. +func (s *Set[T]) Size() int { + return s.items.Size() +} + +// Empty returns whether the set is empty. +func (s *Set[T]) Empty() bool { + if s == nil { + return true + } + return s.items.Empty() +} diff --git a/client/lib/idset/idset_test.go b/client/lib/idset/idset_test.go new file mode 100644 index 000000000..5eb35b7dc --- /dev/null +++ b/client/lib/idset/idset_test.go @@ -0,0 +1,81 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package idset + +import ( + "testing" + + "github.com/shoenig/test/must" +) + +func Test_Parse(t *testing.T) { + cases := []struct { + input string + exp []uint16 + }{ + { + input: "0", + exp: []uint16{0}, + }, + { + input: "1,3,5,9", + exp: []uint16{1, 3, 5, 9}, + }, + { + input: "1-2", + exp: []uint16{1, 2}, + }, + { + input: "3-6", + exp: []uint16{3, 4, 5, 6}, + }, + { + input: "1,3-5,9,11-14", + exp: []uint16{1, 3, 4, 5, 9, 11, 12, 13, 14}, + }, + { + input: " 4-2 , 9-9 , 11-7\n", + exp: []uint16{2, 3, 4, 7, 8, 9, 10, 11}, + }, + } + + for _, tc := range cases { + t.Run("("+tc.input+")", func(t *testing.T) { + result := Parse[uint16](tc.input).Slice() + must.SliceContainsAll(t, tc.exp, result, must.Sprint("got", result)) + }) + } +} + +func Test_String(t *testing.T) { + cases := []struct { + input string + exp string + }{ + { + input: "0", + exp: "0", + }, + { + input: "1-3", + exp: "1-3", + }, + { + input: "1, 2, 3", + exp: "1-3", + }, + { + input: "7, 1-3, 12-9", + exp: "1-3,7,9-12", + }, + } + + for _, tc := range cases { + t.Run("("+tc.input+")", func(t *testing.T) { + result := Parse[uint16](tc.input) + str := result.String() + must.Eq(t, tc.exp, str, must.Sprint("slice", result.Slice())) + }) + } +} diff --git a/client/lib/numalib/detect.go b/client/lib/numalib/detect.go new file mode 100644 index 000000000..e67c9654e --- /dev/null +++ b/client/lib/numalib/detect.go @@ -0,0 +1,83 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package numalib + +import ( + "github.com/hashicorp/nomad/client/lib/idset" +) + +// A SystemScanner represents one methodology of detecting CPU hardware on a +// system. Detectable information is accumulated into a given Topology. +type SystemScanner interface { + ScanSystem(*Topology) +} + +// Scan each of the given scanners in order and accumulate the results into +// a single Topology, which can then be used to answer questions about the CPU +// topology of the system. +func Scan(scanners []SystemScanner) *Topology { + top := new(Topology) + for _, scanner := range scanners { + scanner.ScanSystem(top) + } + return top +} + +// ConfigScanner provides override values coming from Nomad Client configuration. +// This scanner must run last as the client configuration has the final say if +// values there are set by an operator. +type ConfigScanner struct { + // ReservableCores comes from client.reservable_cores. + // + // Not yet documented as of 1.6. + // + // Only meaningful on Linux, this value can be used to override the set of + // CPU core IDs we may make use of. Normally these are detected by reading + // Nomad parent cgroup cpuset interface file. + ReservableCores *idset.Set[CoreID] + + // TotalCompute comes from client.cpu_total_compute. + // + // Used to set the total MHz of available CPU bandwidth on a system. This + // value is used by the scheduler for fitment, and by the client for computing + // task / alloc / client resource utilization. Therefor this value: + // - Should NOT be set if Nomad was able to fingerprint a value. + // - Should NOT be used to over/under provision compute resources. + TotalCompute MHz + + // ReservedCores comes from client.reserved.cores. + // + // Used to withhold a set of cores from being used by Nomad for scheduling. + ReservedCores *idset.Set[CoreID] + + // ReservedCompute comes from client.reserved.cpu. + // + // Used to withhold an amount of MHz of CPU bandwidth from being used by + // Nomad for scheduling. + ReservedCompute MHz +} + +func (cs *ConfigScanner) ScanSystem(top *Topology) { + // disable cores that are not reservable (i.e. override effective cpuset) + if cs.ReservableCores != nil { + for i := 0; i < len(top.Cores); i++ { + if !cs.ReservableCores.Contains(top.Cores[i].ID) { + top.Cores[i].Disable = true + } + } + } + + // disable cores that are not usable (i.e. hide from scheduler) + for i := 0; i < len(top.Cores); i++ { + if cs.ReservedCores.Contains(top.Cores[i].ID) { + top.Cores[i].Disable = true + } + } + + // set total compute from client configuration + top.OverrideTotalCompute = cs.TotalCompute + + // set the reserved compute from client configuration + top.OverrideWitholdCompute = cs.ReservedCompute +} diff --git a/client/lib/numalib/detect_darwin.go b/client/lib/numalib/detect_darwin.go new file mode 100644 index 000000000..e13303bd1 --- /dev/null +++ b/client/lib/numalib/detect_darwin.go @@ -0,0 +1,74 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build darwin + +package numalib + +import ( + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/shoenig/go-m1cpu" + "golang.org/x/sys/unix" +) + +// PlatformScanners returns the set of SystemScanner for macOS. +func PlatformScanners() []SystemScanner { + return []SystemScanner{ + new(MacOS), + } +} + +const ( + nodeID = NodeID(0) + socketID = SocketID(0) + maxSpeed = KHz(0) +) + +// MacOS implements SystemScanner for macOS systems (both arm64 and x86). +type MacOS struct{} + +func (m *MacOS) ScanSystem(top *Topology) { + // all apple hardware is non-numa; just assume as much + top.NodeIDs = idset.Empty[NodeID]() + top.NodeIDs.Insert(nodeID) + + // arch specific detection + switch m1cpu.IsAppleSilicon() { + case true: + m.scanAppleSilicon(top) + case false: + m.scanLegacyX86(top) + } +} + +func (m *MacOS) scanAppleSilicon(top *Topology) { + pCoreCount := m1cpu.PCoreCount() + pCoreSpeed := KHz(m1cpu.PCoreHz() / 1000) + + eCoreCount := m1cpu.ECoreCount() + eCoreSpeed := KHz(m1cpu.ECoreHz() / 1000) + + top.Cores = make([]Core, pCoreCount+eCoreCount) + nthCore := CoreID(0) + + for i := 0; i < pCoreCount; i++ { + top.insert(nodeID, socketID, nthCore, performance, maxSpeed, pCoreSpeed) + nthCore++ + } + + for i := 0; i < eCoreCount; i++ { + top.insert(nodeID, socketID, nthCore, efficiency, maxSpeed, eCoreSpeed) + nthCore++ + } +} + +func (m *MacOS) scanLegacyX86(top *Topology) { + coreCount, _ := unix.SysctlUint32("machdep.cpu.core_count") + hz, _ := unix.SysctlUint64("hw.cpufrequency") + coreSpeed := KHz(hz / 1_000_000) + + top.Cores = make([]Core, coreCount) + for i := 0; i < int(coreCount); i++ { + top.insert(nodeID, socketID, CoreID(i), performance, maxSpeed, coreSpeed) + } +} diff --git a/client/lib/numalib/detect_default.go b/client/lib/numalib/detect_default.go new file mode 100644 index 000000000..a3e2095c2 --- /dev/null +++ b/client/lib/numalib/detect_default.go @@ -0,0 +1,60 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !linux && !darwin + +package numalib + +import ( + "context" + "time" + + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/shirou/gopsutil/v3/cpu" +) + +// PlatformScanners returns the set of SystemScanner for systems without a +// specific implementation. +func PlatformScanners() []SystemScanner { + return []SystemScanner{ + new(Generic), + } +} + +const ( + nodeID = NodeID(0) + socketID = SocketID(0) + maxSpeed = KHz(0) +) + +// Generic implements SystemScanner as a fallback for operating systems without +// a specific implementation. +type Generic struct{} + +func (g *Generic) ScanSystem(top *Topology) { + // hardware may or may not be NUMA, but for now we only + // detect such topology on linux systems + top.NodeIDs = idset.Empty[NodeID]() + top.NodeIDs.Insert(nodeID) + + // cores + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + count, err := cpu.CountsWithContext(ctx, true) + if err != nil { + return + } + top.Cores = make([]Core, count) + + infos, err := cpu.InfoWithContext(ctx) + if err != nil || len(infos) == 0 { + return + } + + for i := 0; i < count; i++ { + info := infos[0] + speed := KHz(MHz(info.Mhz) * 1000) + top.insert(nodeID, socketID, CoreID(i), performance, maxSpeed, speed) + } +} diff --git a/client/lib/numalib/detect_linux.go b/client/lib/numalib/detect_linux.go new file mode 100644 index 000000000..ea3dd8d12 --- /dev/null +++ b/client/lib/numalib/detect_linux.go @@ -0,0 +1,191 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package numalib + +import ( + "fmt" + "os" + "strconv" + "strings" + + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/client/lib/idset" +) + +// PlatformScanners returns the set of SystemScanner for Linux. +func PlatformScanners() []SystemScanner { + return []SystemScanner{ + new(Sysfs), + new(Smbios), + new(Cgroups1), + new(Cgroups2), + } +} + +const ( + sysRoot = "/sys/devices/system" + nodeOnline = sysRoot + "/node/online" + cpuOnline = sysRoot + "/cpu/online" + distanceFile = sysRoot + "/node/node%d/distance" + cpulistFile = sysRoot + "/node/node%d/cpulist" + cpuMaxFile = sysRoot + "/cpu/cpu%d/cpufreq/cpuinfo_max_freq" + cpuBaseFile = sysRoot + "/cpu/cpu%d/cpufreq/base_frequency" + cpuSocketFile = sysRoot + "/cpu/cpu%d/topology/physical_package_id" + cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list" +) + +// Sysfs implements SystemScanner for Linux by reading system topology data +// from /sys/devices/system. This is the best source of truth on Linux and +// should always be used first - additional scanners can provide more context +// on top of what is initiallly detected here. +type Sysfs struct{} + +func (s *Sysfs) ScanSystem(top *Topology) { + // detect the online numa nodes + s.discoverOnline(top) + + // detect cross numa node latency costs + s.discoverCosts(top) + + // detect core performance data + s.discoverCores(top) +} + +func (*Sysfs) available() bool { + return true +} + +func (*Sysfs) discoverOnline(st *Topology) { + ids, err := getIDSet[NodeID](nodeOnline) + if err == nil { + st.NodeIDs = ids + } +} + +func (*Sysfs) discoverCosts(st *Topology) { + dimension := st.NodeIDs.Size() + st.Distances = make(SLIT, st.NodeIDs.Size()) + for i := 0; i < dimension; i++ { + st.Distances[i] = make([]Cost, dimension) + } + + _ = st.NodeIDs.ForEach(func(id NodeID) error { + s, err := getString(distanceFile, id) + if err != nil { + return err + } + + for i, c := range strings.Fields(s) { + cost, _ := strconv.Atoi(c) + st.Distances[id][i] = Cost(cost) + } + return nil + }) +} + +func (*Sysfs) discoverCores(st *Topology) { + onlineCores, err := getIDSet[CoreID](cpuOnline) + if err != nil { + return + } + st.Cores = make([]Core, onlineCores.Size()) + + _ = st.NodeIDs.ForEach(func(node NodeID) error { + s, err := os.ReadFile(fmt.Sprintf(cpulistFile, node)) + if err != nil { + return err + } + + cores := idset.Parse[CoreID](string(s)) + _ = cores.ForEach(func(core CoreID) error { + // best effort, zero values are defaults + socket, _ := getNumeric[SocketID](cpuSocketFile, core) + max, _ := getNumeric[KHz](cpuMaxFile, core) + base, _ := getNumeric[KHz](cpuBaseFile, core) + siblings, _ := getIDSet[CoreID](cpuSiblingFile, core) + st.insert(node, socket, core, gradeOf(siblings), max, base) + return nil + }) + return nil + }) +} + +func getIDSet[T idset.ID](path string, args ...any) (*idset.Set[T], error) { + path = fmt.Sprintf(path, args...) + s, err := os.ReadFile(path) + if err != nil { + return nil, err + } + return idset.Parse[T](string(s)), nil +} + +func getNumeric[T int | idset.ID](path string, args ...any) (T, error) { + path = fmt.Sprintf(path, args...) + s, err := os.ReadFile(path) + if err != nil { + return 0, err + } + i, err := strconv.Atoi(strings.TrimSpace(string(s))) + if err != nil { + return 0, err + } + return T(i), nil +} + +func getString(path string, args ...any) (string, error) { + path = fmt.Sprintf(path, args...) + s, err := os.ReadFile(path) + if err != nil { + return "", err + } + return strings.TrimSpace(string(s)), nil +} + +// Cgroups1 reads effective cores information from cgroups v1 +type Cgroups1 struct{} + +func (s *Cgroups1) ScanSystem(top *Topology) { + if cgroupslib.GetMode() != cgroupslib.CG1 { + return + } + + // detect effective cores in the cpuset/nomad cgroup + content, err := cgroupslib.ReadNomadCG1("cpuset", "cpuset.effective_cpus") + if err != nil { + return + } + + // extract IDs from file of ids + scanIDs(top, content) +} + +// Cgroups2 reads effective cores information from cgroups v2 +type Cgroups2 struct{} + +func (s *Cgroups2) ScanSystem(top *Topology) { + if cgroupslib.GetMode() != cgroupslib.CG2 { + return + } + + // detect effective cores in the nomad.slice cgroup + content, err := cgroupslib.ReadNomadCG2("cpuset.cpus.effective") + if err != nil { + return + } + + // extract IDs from file of ids + scanIDs(top, content) +} + +// combine scanCgroups +func scanIDs(top *Topology, content string) { + ids := idset.Parse[CoreID](content) + for _, cpu := range top.Cores { + if !ids.Contains(cpu.ID) { + cpu.Disable = true + } + } +} diff --git a/client/lib/numalib/detect_noimpl.go b/client/lib/numalib/detect_noimpl.go new file mode 100644 index 000000000..7394573a0 --- /dev/null +++ b/client/lib/numalib/detect_noimpl.go @@ -0,0 +1,18 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package numalib + +// NoImpl will check that the topology has been set, otherwise set a default +// value of 1 core @ 1 ghz. This should only be activated in tests that +// disable the cpu fingerprinter. +func NoImpl(top *Topology) *Topology { + if top == nil || len(top.Cores) == 0 { + return &Topology{ + Cores: []Core{ + {GuessSpeed: 1000}, + }, + } + } + return top +} diff --git a/client/lib/numalib/detect_noimpl_test.go b/client/lib/numalib/detect_noimpl_test.go new file mode 100644 index 000000000..56e730bdd --- /dev/null +++ b/client/lib/numalib/detect_noimpl_test.go @@ -0,0 +1,23 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package numalib + +import ( + "testing" + + "github.com/shoenig/test/must" +) + +func Test_NoImpl_yes(t *testing.T) { + original := new(Topology) + fallback := NoImpl(original) + must.NotEqOp(t, original, fallback) // pointer is replaced + must.Len(t, 1, fallback.Cores) +} + +func Test_NoImpl_no(t *testing.T) { + original := Scan(PlatformScanners()) + fallback := NoImpl(original) + must.EqOp(t, original, fallback) // pointer is same +} diff --git a/client/lib/numalib/detect_smbios.go b/client/lib/numalib/detect_smbios.go new file mode 100644 index 000000000..7ad1840f1 --- /dev/null +++ b/client/lib/numalib/detect_smbios.go @@ -0,0 +1,81 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package numalib + +import ( + "context" + "os/exec" + "regexp" + "strconv" + "time" +) + +const ( + dmidecodeCmd = "dmidecode" +) + +var ( + dmiCurSpeedRe = regexp.MustCompile(`Current Speed:\s+(\d+)\s+MHz`) +) + +type Smbios struct { + data string +} + +func (s *Smbios) ScanSystem(top *Topology) { + if !s.available() { + return + } + + // sysfs should work on ec2 for detecting numa nodes + // and so we skip those steps here at least for now, because reading + // smbios is very platform specific + + // detect guess-level core performance data + s.discoverCores(top) +} + +func (s *Smbios) available() bool { + path, err := exec.LookPath(dmidecodeCmd) + if err != nil { + return false + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, path, "-t", "4") + b, err := cmd.CombinedOutput() + if err != nil { + return false + } + + s.data = string(b) + return true +} + +func (s *Smbios) discoverCores(top *Topology) { + curSpeeds := dmiCurSpeedRe.FindStringSubmatch(s.data) + + if len(curSpeeds) < 2 { + return + } + + maxCurSpeed := 0 + for i := 1; i < len(curSpeeds); i++ { + curSpeed, err := strconv.Atoi(curSpeeds[i]) + if err == nil { + if curSpeed > maxCurSpeed { + maxCurSpeed = curSpeed + } + } + } + + // set the guess speed to the highest detected current speed + for i := 0; i < len(top.Cores); i++ { + top.Cores[i].GuessSpeed = MHz(maxCurSpeed) + } +} diff --git a/client/lib/numalib/detect_smbios_test.go b/client/lib/numalib/detect_smbios_test.go new file mode 100644 index 000000000..67e32254b --- /dev/null +++ b/client/lib/numalib/detect_smbios_test.go @@ -0,0 +1,44 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package numalib + +import ( + "os" + "os/exec" + "testing" + + "github.com/shoenig/test/must" +) + +func requiresSMBIOS(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("requires root") + } + + p, err := exec.LookPath("dmidecode") + if err != nil { + t.Skip("requires dmidecode package") + } + + if p == "" { + t.Skip("requires dmidecode on path") + } +} + +func TestSmbios_detectSpeed(t *testing.T) { + requiresSMBIOS(t) + + top := new(Topology) + sysfs := new(Sysfs) + smbios := new(Smbios) + + sysfs.ScanSystem(top) + smbios.ScanSystem(top) + + for _, core := range top.Cores { + must.Positive(t, core.GuessSpeed) + } +} diff --git a/client/lib/numalib/detect_test.go b/client/lib/numalib/detect_test.go new file mode 100644 index 000000000..b9ef8714c --- /dev/null +++ b/client/lib/numalib/detect_test.go @@ -0,0 +1,19 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package numalib + +import ( + "testing" + + "github.com/shoenig/test/must" +) + +// TestScanTopology is going to be different on every machine; even the CI +// systems change sometimes so it's hard to make good assertions here. +func TestScanTopology(t *testing.T) { + top := Scan(PlatformScanners()) + must.Positive(t, top.UsableCompute()) + must.Positive(t, top.TotalCompute()) + must.Positive(t, top.NumCores()) +} diff --git a/client/lib/numalib/topology.go b/client/lib/numalib/topology.go new file mode 100644 index 000000000..031f4b620 --- /dev/null +++ b/client/lib/numalib/topology.go @@ -0,0 +1,252 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +// Package numalib provides information regarding the system NUMA, CPU, and +// device topology of the system. +// +// https://docs.kernel.org/6.2/x86/topology.html +package numalib + +import ( + "fmt" + "runtime" + "strconv" + "strings" + + "github.com/hashicorp/nomad/client/lib/idset" +) + +// CoreGrade describes whether a specific core is a performance or efficiency +// core type. A performance core generally has a higher clockspeed and can do +// more than an efficiency core. +type CoreGrade bool + +const ( + performance CoreGrade = true + efficiency CoreGrade = false +) + +func gradeOf(siblings *idset.Set[CoreID]) CoreGrade { + switch siblings.Size() { + case 0, 1: + return efficiency + default: + return performance + } +} + +func (g CoreGrade) String() string { + switch g { + case performance: + return "performance" + default: + return "efficiency" + } +} + +type ( + NodeID uint8 + SocketID uint8 + CoreID uint16 + KHz uint64 + MHz uint64 + GHz float64 + Cost uint8 +) + +func (hz KHz) MHz() MHz { + return MHz(hz / 1000) +} + +func (hz KHz) String() string { + return strconv.FormatUint(uint64(hz.MHz()), 10) +} + +// A Topology provides a bird-eye view of the system NUMA topology. +// +// The JSON encoding is not used yet but my be part of the gRPC plumbing +// in the future. +type Topology struct { + NodeIDs *idset.Set[NodeID] `json:"node_ids"` + Distances SLIT `json:"distances"` + Cores []Core `json:"cores"` + + // explicit overrides from client configuration + OverrideTotalCompute MHz `json:"override_total_compute"` + OverrideWitholdCompute MHz `json:"override_withhold_compute"` +} + +// A Core represents one logical (vCPU) core on a processor. Basically the slice +// of cores detected should match up with the vCPU description in cloud providers. +type Core struct { + NodeID NodeID `json:"node_id"` + SocketID SocketID `json:"socket_id"` + ID CoreID `json:"id"` + Grade CoreGrade `json:"grade"` + Disable bool `json:"disable"` // indicates whether Nomad must not use this core + BaseSpeed MHz `json:"base_speed"` // cpuinfo_base_freq (primary choice) + MaxSpeed MHz `json:"max_speed"` // cpuinfo_max_freq (second choice) + GuessSpeed MHz `json:"guess_speed"` // best effort (fallback) +} + +func (c Core) String() string { + return fmt.Sprintf( + "(%d %d %d %s %d %d)", + c.NodeID, c.SocketID, c.ID, c.Grade, c.MaxSpeed, c.BaseSpeed, + ) +} + +func (c Core) MHz() MHz { + switch { + case c.BaseSpeed > 0: + return c.BaseSpeed + case c.MaxSpeed > 0: + return c.MaxSpeed + } + return c.GuessSpeed +} + +// SLIT (system locality information table) describes the relative cost for +// accessing memory across each combination of NUMA boundary. +type SLIT [][]Cost + +func (d SLIT) cost(a, b NodeID) Cost { + return d[a][b] +} + +// SupportsNUMA returns whether Nomad supports NUMA detection on the client's +// operating system. Currently only supported on Linux. +func (st *Topology) SupportsNUMA() bool { + switch runtime.GOOS { + case "linux": + return true + default: + return false + } +} + +// Nodes returns the set of NUMA Node IDs. +func (st *Topology) Nodes() *idset.Set[NodeID] { + if !st.SupportsNUMA() { + return nil + } + return st.NodeIDs +} + +// NodeCores returns the set of Core IDs for the given NUMA Node ID. +func (st *Topology) NodeCores(node NodeID) *idset.Set[CoreID] { + result := idset.Empty[CoreID]() + for _, cpu := range st.Cores { + if cpu.NodeID == node { + result.Insert(cpu.ID) + } + } + return result +} + +func (st *Topology) insert(node NodeID, socket SocketID, core CoreID, grade CoreGrade, max, base KHz) { + st.Cores[core] = Core{ + NodeID: node, + SocketID: socket, + ID: core, + Grade: grade, + MaxSpeed: max.MHz(), + BaseSpeed: base.MHz(), + } +} + +func (st *Topology) String() string { + var sb strings.Builder + for _, cpu := range st.Cores { + sb.WriteString(cpu.String()) + } + return sb.String() +} + +// TotalCompute returns the amount of compute in MHz the detected hardware is +// ultimately capable of delivering. The UsableCompute will be equal to or +// less than this value. +// +// If the client configuration includes an override for total compute, that +// value is used instead even if it violates the above invariant. +func (st *Topology) TotalCompute() MHz { + if st.OverrideTotalCompute > 0 { + return st.OverrideTotalCompute + } + + var total MHz + for _, cpu := range st.Cores { + total += cpu.MHz() + } + return total +} + +// UsableCompute returns the amount of compute in MHz the Nomad client is able +// to make use of for running tasks. This value will be less than or equal to +// the TotalCompute of the system. Nomad must subtract off any reserved compute +// (reserved.cpu or reserved.cores) from the total hardware compute. +func (st *Topology) UsableCompute() MHz { + var total MHz + for _, cpu := range st.Cores { + if !cpu.Disable { + total += cpu.MHz() + } + } + return total +} + +// NumCores returns the number of logical cores detected. This includes both +// power and efficiency cores. +func (st *Topology) NumCores() int { + return len(st.Cores) +} + +// NumPCores returns the number of logical performance cores detected. +func (st *Topology) NumPCores() int { + var total int + for _, cpu := range st.Cores { + if cpu.Grade == performance { + total++ + } + } + return total +} + +// NumECores returns the number of logical efficiency cores detected. +func (st *Topology) NumECores() int { + var total int + for _, cpu := range st.Cores { + if cpu.Grade == efficiency { + total++ + } + } + return total +} + +// UsableCores returns the number of logical cores usable by the Nomad client +// for running tasks. Nomad must subtract off any reserved cores (reserved.cores) +// and/or must mask the cpuset to the one set in config (config.reservable_cores). +func (st *Topology) UsableCores() *idset.Set[CoreID] { + result := idset.Empty[CoreID]() + for _, cpu := range st.Cores { + if !cpu.Disable { + result.Insert(cpu.ID) + } + } + return result +} + +// CoreSpeeds returns the frequency in MHz of the performance and efficiency +// core types. If the CPU does not have effiency cores that value will be zero. +func (st *Topology) CoreSpeeds() (MHz, MHz) { + var pCore, eCore MHz + for _, cpu := range st.Cores { + switch cpu.Grade { + case performance: + pCore = cpu.MHz() + case efficiency: + eCore = cpu.MHz() + } + } + return pCore, eCore +} diff --git a/client/lib/proclib/config.go b/client/lib/proclib/config.go new file mode 100644 index 000000000..2f7383be6 --- /dev/null +++ b/client/lib/proclib/config.go @@ -0,0 +1,15 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package proclib + +import ( + "github.com/hashicorp/go-hclog" +) + +// Configs is used to pass along values from client configuration that are +// build-tag specific. These are not the final representative values, just what +// was set in agent configuration. +type Configs struct { + Logger hclog.Logger +} diff --git a/client/lib/proclib/wrangler.go b/client/lib/proclib/wrangler.go new file mode 100644 index 000000000..2d3807784 --- /dev/null +++ b/client/lib/proclib/wrangler.go @@ -0,0 +1,85 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package proclib + +import ( + "fmt" + "sync" +) + +// Task records the unique coordinates of a task from the perspective of a Nomad +// client running the task, that is to say (alloc_id, task_name). +type Task struct { + AllocID string + Task string +} + +func (task Task) String() string { + return fmt.Sprintf("%s/%s", task.AllocID[0:8], task.Task) +} + +type create func(Task) ProcessWrangler + +// Wranglers keeps track of the ProcessWrangler created for each task. Some +// operating systems may implement ProcessWranglers to ensure that all of the +// processes created by a Task are killed, going a step beyond trusting the +// task drivers to properly clean things up. (Well, on Linux anyway.) +// +// This state must be restored on Client agent startup. +type Wranglers struct { + configs *Configs + create create + + lock sync.Mutex + m map[Task]ProcessWrangler +} + +// Setup any process management technique relevant to the operating system and +// its particular configuration. +func (w *Wranglers) Setup(task Task) error { + w.configs.Logger.Trace("setup client process management", "task", task) + + // create process wrangler for task + pw := w.create(task) + + // perform any initialization if necessary + pw.Initialize() + + w.lock.Lock() + defer w.lock.Unlock() + + // keep track of the process wrangler for task + w.m[task] = pw + + return nil +} + +// Destroy any processes still running that were spawned by task. Ideally the +// task driver should be implemented well enough for this to not be necessary, +// but we protect the Client as best we can regardless. +// +// Note that this is called from a TR.Stop which must be idempotent. +func (w *Wranglers) Destroy(task Task) error { + w.configs.Logger.Trace("destroy and cleanup remnant task processes", "task", task) + + w.lock.Lock() + defer w.lock.Unlock() + + if pw, exists := w.m[task]; exists { + pw.Kill() + pw.Cleanup() + delete(w.m, task) + } + + return nil +} + +// A ProcessWrangler "owns" a particular Task on a client, enabling the client +// to kill and cleanup processes created by that Task, without help from the +// task driver. Currently we have implementations only for Linux (via cgroups). +type ProcessWrangler interface { + Initialize() error + Kill() error + Cleanup() error +} diff --git a/client/lib/proclib/wrangler_cg1_linux.go b/client/lib/proclib/wrangler_cg1_linux.go new file mode 100644 index 000000000..faedc1062 --- /dev/null +++ b/client/lib/proclib/wrangler_cg1_linux.go @@ -0,0 +1,73 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package proclib + +import ( + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "oss.indeed.com/go/libtime/decay" +) + +// LinuxWranglerCG1 is an implementation of ProcessWrangler that leverages +// cgroups v1 on older Linux systems. +// +// e.g. Ubuntu 20.04 / RHEL 8 and previous versions. +type LinuxWranglerCG1 struct { + task Task + log hclog.Logger + cg cgroupslib.Lifecycle +} + +func newCG1(c *Configs) create { + logger := c.Logger.Named("cg1") + cgroupslib.Init(logger) + return func(task Task) ProcessWrangler { + return &LinuxWranglerCG1{ + task: task, + log: logger, + cg: cgroupslib.Factory(task.AllocID, task.Task), + } + } +} + +func (w *LinuxWranglerCG1) Initialize() error { + w.log.Trace("initialize cgroups", "task", w.task) + return w.cg.Setup() +} + +func (w *LinuxWranglerCG1) Kill() error { + w.log.Trace("force kill processes in cgroup", "task", w.task) + return w.cg.Kill() +} + +func (w *LinuxWranglerCG1) Cleanup() error { + w.log.Trace("remove cgroups", "task", w.task) + + // need to give the kernel an opportunity to cleanup procs; which could + // take some time while the procs wake from being thawed only to find they + // have been issued a kill signal and need to be reaped + + rm := func() (bool, error) { + err := w.cg.Teardown() + if err != nil { + return true, err + } + return false, nil + } + + go func() { + if err := decay.Backoff(rm, decay.BackoffOptions{ + MaxSleepTime: 30 * time.Second, + InitialGapSize: 1 * time.Second, + }); err != nil { + w.log.Debug("failed to cleanup cgroups", "alloc", w.task.AllocID, "task", w.task.Task, "error", err) + } + }() + + return nil +} diff --git a/client/lib/proclib/wrangler_cg2_linux.go b/client/lib/proclib/wrangler_cg2_linux.go new file mode 100644 index 000000000..40f58927c --- /dev/null +++ b/client/lib/proclib/wrangler_cg2_linux.go @@ -0,0 +1,48 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package proclib + +import ( + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/lib/cgroupslib" +) + +// LinuxWranglerCG2 is an implementation of ProcessWrangler that leverages +// cgroups v2 on modern Linux systems. +// +// e.g. Ubuntu 22.04 / RHEL 9 and later versions. +type LinuxWranglerCG2 struct { + task Task + log hclog.Logger + cg cgroupslib.Lifecycle +} + +func newCG2(c *Configs) create { + logger := c.Logger.Named("cg2") + cgroupslib.Init(logger) + return func(task Task) ProcessWrangler { + return &LinuxWranglerCG2{ + task: task, + log: c.Logger, + cg: cgroupslib.Factory(task.AllocID, task.Task), + } + } +} + +func (w LinuxWranglerCG2) Initialize() error { + w.log.Trace("initialize cgroup", "task", w.task) + return w.cg.Setup() +} + +func (w *LinuxWranglerCG2) Kill() error { + w.log.Trace("force kill processes in cgroup", "task", w.task) + return w.cg.Kill() +} + +func (w *LinuxWranglerCG2) Cleanup() error { + w.log.Trace("remove cgroup", "task", w.task) + return w.cg.Teardown() +} diff --git a/client/lib/proclib/wrangler_cg2_linux_test.go b/client/lib/proclib/wrangler_cg2_linux_test.go new file mode 100644 index 000000000..a9b88ee4d --- /dev/null +++ b/client/lib/proclib/wrangler_cg2_linux_test.go @@ -0,0 +1,8 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package proclib + +var _ ProcessWrangler = (*LinuxWranglerCG2)(nil) diff --git a/client/lib/proclib/wrangler_default.go b/client/lib/proclib/wrangler_default.go new file mode 100644 index 000000000..dcb8eb24a --- /dev/null +++ b/client/lib/proclib/wrangler_default.go @@ -0,0 +1,40 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !linux + +package proclib + +// New creates a Wranglers backed by the DefaultWrangler implementation, which +// does not do anything. +func New(configs *Configs) *Wranglers { + w := &Wranglers{ + configs: configs, + m: make(map[Task]ProcessWrangler), + create: doNothing(configs), + } + + return w +} + +func doNothing(*Configs) create { + return func(Task) ProcessWrangler { + return new(DefaultWrangler) + } +} + +// A DefaultWrangler has a no-op implementation. In the task drivers +// we trust for cleaning themselves up. +type DefaultWrangler struct{} + +func (w *DefaultWrangler) Initialize() error { + return nil +} + +func (w *DefaultWrangler) Kill() error { + return nil +} + +func (w *DefaultWrangler) Cleanup() error { + return nil +} diff --git a/client/lib/proclib/wrangler_linux.go b/client/lib/proclib/wrangler_linux.go new file mode 100644 index 000000000..1edf07692 --- /dev/null +++ b/client/lib/proclib/wrangler_linux.go @@ -0,0 +1,28 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package proclib + +import ( + "github.com/hashicorp/nomad/client/lib/cgroupslib" +) + +// New creates a Wranglers factory for creating ProcessWrangler's appropriate +// for the given system (i.e. cgroups v1 or cgroups v2). +func New(configs *Configs) *Wranglers { + w := &Wranglers{ + configs: configs, + m: make(map[Task]ProcessWrangler), + } + + switch cgroupslib.GetMode() { + case cgroupslib.CG1: + w.create = newCG1(configs) + default: + w.create = newCG2(configs) + } + + return w +} diff --git a/client/lib/resources/containment.go b/client/lib/resources/containment.go deleted file mode 100644 index 8df561c72..000000000 --- a/client/lib/resources/containment.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package resources - -// A Containment will cleanup resources created by an executor. -type Containment interface { - // Apply enables containment on pid. - Apply(pid int) error - - // Cleanup will purge executor resources like cgroups. - Cleanup() error - - // GetPIDs will return the processes overseen by the Containment - GetPIDs() PIDs -} diff --git a/client/lib/resources/containment_linux.go b/client/lib/resources/containment_linux.go deleted file mode 100644 index 73002dd90..000000000 --- a/client/lib/resources/containment_linux.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package resources - -import ( - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type containment struct { - lock sync.RWMutex - cgroup *configs.Cgroup - logger hclog.Logger -} - -func Contain(logger hclog.Logger, cgroup *configs.Cgroup) *containment { - return &containment{ - cgroup: cgroup, - logger: logger.Named("containment"), - } -} - -func (c *containment) Apply(pid int) error { - c.lock.Lock() - defer c.lock.Unlock() - - c.logger.Trace("create containment for", "cgroup", c.cgroup, "pid", pid) - - // for v2 use manager to create and enter the cgroup - if cgutil.UseV2 { - mgr, err := fs2.NewManager(c.cgroup, "") - if err != nil { - return fmt.Errorf("failed to create v2 cgroup manager for containment: %w", err) - } - - // add the pid to the cgroup - if err = mgr.Apply(pid); err != nil { - return fmt.Errorf("failed to apply v2 cgroup containment: %w", err) - } - - // in v2 it is important to set the device resource configuration - if err = mgr.Set(c.cgroup.Resources); err != nil { - return fmt.Errorf("failed to set v2 cgroup resources: %w", err) - } - - return nil - } - - // for v1 a random cgroup was created already; just enter it - if err := cgroups.EnterPid(map[string]string{"freezer": c.cgroup.Path}, pid); err != nil { - return fmt.Errorf("failed to add pid to v1 cgroup: %w", err) - } - - return nil -} - -func (c *containment) Cleanup() error { - c.lock.Lock() - defer c.lock.Unlock() - - // the current pid is of the executor, who manages the task process cleanup - executorPID := os.Getpid() - c.logger.Trace("cleanup on", "cgroup", c.cgroup, "executor_pid", executorPID) - - // destroy the task processes - destroyer := cgutil.NewGroupKiller(c.logger, executorPID) - return destroyer.KillGroup(c.cgroup) -} - -func (c *containment) GetPIDs() PIDs { - c.lock.Lock() - defer c.lock.Unlock() - - m := make(PIDs) - if c.cgroup == nil { - return m - } - - // get the cgroup path under containment - var path string - if cgutil.UseV2 { - path = filepath.Join(cgutil.CgroupRoot, c.cgroup.Path) - } else { - path = c.cgroup.Path - } - - // find the pids in the cgroup under containment - pids, err := cgroups.GetAllPids(path) - if err != nil { - c.logger.Debug("failed to get pids", "cgroup", c.cgroup, "error", err) - return m - } - - for _, pid := range pids { - m[pid] = NewPID(pid) - } - - return m -} diff --git a/client/lib/resources/pid.go b/client/lib/resources/pid.go deleted file mode 100644 index 8365f1671..000000000 --- a/client/lib/resources/pid.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package resources - -import ( - "github.com/hashicorp/nomad/helper/stats" -) - -// PIDs holds all of a task's pids and their cpu percentage calculators -type PIDs map[int]*PID - -// PID holds one task's pid and it's cpu percentage calculator -type PID struct { - PID int - StatsTotalCPU *stats.CpuStats - StatsUserCPU *stats.CpuStats - StatsSysCPU *stats.CpuStats -} - -func NewPID(pid int) *PID { - return &PID{ - PID: pid, - StatsTotalCPU: stats.NewCpuStats(), - StatsUserCPU: stats.NewCpuStats(), - StatsSysCPU: stats.NewCpuStats(), - } -} diff --git a/client/state/upgrade_int_test.go b/client/state/upgrade_int_test.go index 8832424b4..5972deab9 100644 --- a/client/state/upgrade_int_test.go +++ b/client/state/upgrade_int_test.go @@ -20,6 +20,7 @@ import ( clientconfig "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/devicemanager" dmstate "github.com/hashicorp/nomad/client/devicemanager/state" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" . "github.com/hashicorp/nomad/client/state" @@ -213,6 +214,7 @@ func checkUpgradedAlloc(t *testing.T, path string, db StateDB, alloc *structs.Al PrevAllocMigrator: allocwatcher.NoopPrevAlloc{}, DeviceManager: devicemanager.NoopMockManager(), DriverManager: drivermanager.TestDriverManager(t), + Wranglers: proclib.New(&proclib.Configs{Logger: testlog.HCLogger(t)}), } ar, err := allocrunner.NewAllocRunner(conf) require.NoError(t, err) diff --git a/client/stats/cpu.go b/client/stats/cpu.go deleted file mode 100644 index b193cb6f5..000000000 --- a/client/stats/cpu.go +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package stats - -import ( - "context" - "errors" - "fmt" - "sync" - "time" - - "github.com/hashicorp/nomad/helper/stats" - "github.com/shirou/gopsutil/v3/cpu" - "github.com/shoenig/go-m1cpu" -) - -const ( - // cpuInfoTimeout is the timeout used when gathering CPU info. This is used - // to override the default timeout in gopsutil which has a tendency to - // timeout on Windows. - cpuInfoTimeout = 60 * time.Second -) - -var ( - cpuPowerCoreCount int - cpuPowerCoreMHz uint64 - cpuEfficiencyCoreCount int - cpuEfficiencyCoreMHz uint64 - cpuModelName string -) - -var ( - detectedCpuTotalTicks uint64 - initErr error - onceLer sync.Once -) - -func Init(configCpuTotalCompute uint64) error { - onceLer.Do(func() { - switch { - case m1cpu.IsAppleSilicon(): - cpuModelName = m1cpu.ModelName() - cpuPowerCoreCount = m1cpu.PCoreCount() - cpuPowerCoreMHz = m1cpu.PCoreHz() / 1_000_000 - cpuEfficiencyCoreCount = m1cpu.ECoreCount() - cpuEfficiencyCoreMHz = m1cpu.ECoreHz() / 1_000_000 - bigTicks := uint64(cpuPowerCoreCount) * cpuPowerCoreMHz - littleTicks := uint64(cpuEfficiencyCoreCount) * cpuEfficiencyCoreMHz - detectedCpuTotalTicks = bigTicks + littleTicks - default: - // for now, all other cpu types assume only power cores - // todo: this is already not true for Intel 13th generation - - var err error - if cpuPowerCoreCount, err = cpu.Counts(true); err != nil { - initErr = errors.Join(initErr, fmt.Errorf("failed to detect number of CPU cores: %w", err)) - } - - ctx, cancel := context.WithTimeout(context.Background(), cpuInfoTimeout) - defer cancel() - - var cpuInfoStats []cpu.InfoStat - if cpuInfoStats, err = cpu.InfoWithContext(ctx); err != nil { - initErr = errors.Join(initErr, fmt.Errorf("Unable to obtain CPU information: %w", err)) - } - - for _, infoStat := range cpuInfoStats { - cpuModelName = infoStat.ModelName - if uint64(infoStat.Mhz) > cpuPowerCoreMHz { - cpuPowerCoreMHz = uint64(infoStat.Mhz) - } - } - - // compute ticks using only power core, until we add support for - // detecting little cores on non-apple platforms - detectedCpuTotalTicks = uint64(cpuPowerCoreCount) * cpuPowerCoreMHz - - initErr = err - } - - stats.SetCpuTotalTicks(detectedCpuTotalTicks) - }) - - // override the computed value with the config value if it is set - if configCpuTotalCompute > 0 { - stats.SetCpuTotalTicks(configCpuTotalCompute) - } - - return initErr -} - -// CPUNumCores returns the number of CPU cores available. -// -// This is represented with two values - (Power (P), Efficiency (E)) so we can -// correctly compute total compute for processors with asymetric cores such as -// Apple Silicon. -// -// For platforms with symetric cores (or where we do not correcly detect asymetric -// cores), all cores are presented as P cores. -func CPUNumCores() (int, int) { - return cpuPowerCoreCount, cpuEfficiencyCoreCount -} - -// CPUMHzPerCore returns the MHz per CPU (P, E) core type. -// -// As with CPUNumCores, asymetric core detection currently only works with -// Apple Silicon CPUs. -func CPUMHzPerCore() (uint64, uint64) { - return cpuPowerCoreMHz, cpuEfficiencyCoreMHz -} - -// CPUModelName returns the model name of the CPU. -func CPUModelName() string { - return cpuModelName -} - -func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) { - - ticksConsumed := 0.0 - cpuStats, err := cpu.Times(true) - if err != nil { - return nil, 0.0, err - } - cs := make([]*CPUStats, len(cpuStats)) - for idx, cpuStat := range cpuStats { - percentCalculator, ok := h.statsCalculator[cpuStat.CPU] - if !ok { - percentCalculator = NewHostCpuStatsCalculator() - h.statsCalculator[cpuStat.CPU] = percentCalculator - } - idle, user, system, total := percentCalculator.Calculate(cpuStat) - ticks := (total / 100.0) * (float64(stats.CpuTotalTicks()) / float64(len(cpuStats))) - cs[idx] = &CPUStats{ - CPU: cpuStat.CPU, - User: user, - System: system, - Idle: idle, - TotalPercent: total, - TotalTicks: ticks, - } - ticksConsumed += ticks - } - - return cs, ticksConsumed, nil -} diff --git a/client/stats/cpu_darwin_test.go b/client/stats/cpu_darwin_test.go deleted file mode 100644 index f5b21b4f9..000000000 --- a/client/stats/cpu_darwin_test.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build darwin && arm64 && cgo - -package stats - -import ( - "testing" - - "github.com/shoenig/test/must" -) - -func TestCPU_Init(t *testing.T) { - must.NoError(t, Init()) -} - -func TestCPU_CPUNumCores(t *testing.T) { - big, little := CPUNumCores() - must.Between(t, 4, big, 32) - must.Between(t, 2, little, 8) -} - -func TestCPU_CPUMHzPerCore(t *testing.T) { - big, little := CPUMHzPerCore() - must.Between(t, 3_000, big, 6_000) - must.Between(t, 2_000, little, 4_000) -} - -func TestCPU_CPUModelName(t *testing.T) { - name := CPUModelName() - must.NotEq(t, "", name) -} - -func TestCPU_CPUCpuTotalTicks(t *testing.T) { - ticks := CpuTotalTicks() - must.Positive(t, ticks) -} diff --git a/client/stats/cpu_test.go b/client/stats/cpu_test.go deleted file mode 100644 index 8bbea2be7..000000000 --- a/client/stats/cpu_test.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package stats - -import ( - "math" - "os" - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/stretchr/testify/assert" -) - -func TestHostStats_CPU(t *testing.T) { - ci.Parallel(t) - - assert := assert.New(t) - assert.Nil(Init(0)) - - logger := testlog.HCLogger(t) - cwd, err := os.Getwd() - assert.Nil(err) - hs := NewHostStatsCollector(logger, cwd, nil) - - // Collect twice so we can calculate percents we need to generate some work - // so that the cpu values change - assert.Nil(hs.Collect()) - total := 0 - for i := 1; i < 1000000000; i++ { - total *= i - total = total % i - } - assert.Nil(hs.Collect()) - stats := hs.Stats() - - assert.NotZero(stats.CPUTicksConsumed) - assert.NotZero(len(stats.CPU)) - - for _, cpu := range stats.CPU { - assert.False(math.IsNaN(cpu.Idle)) - assert.False(math.IsNaN(cpu.TotalPercent)) - assert.False(math.IsNaN(cpu.TotalTicks)) - assert.False(math.IsNaN(cpu.System)) - assert.False(math.IsNaN(cpu.User)) - - assert.False(math.IsInf(cpu.Idle, 0)) - assert.False(math.IsInf(cpu.TotalPercent, 0)) - assert.False(math.IsInf(cpu.TotalTicks, 0)) - assert.False(math.IsInf(cpu.System, 0)) - assert.False(math.IsInf(cpu.User, 0)) - } -} diff --git a/client/structs/structs.go b/client/structs/structs.go index 3e0340c32..21883e192 100644 --- a/client/structs/structs.go +++ b/client/structs/structs.go @@ -9,7 +9,7 @@ import ( "errors" "time" - "github.com/hashicorp/nomad/client/stats" + "github.com/hashicorp/nomad/client/hoststats" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/device" ) @@ -33,7 +33,7 @@ func (r *RpcError) Error() string { // ClientStatsResponse is used to return statistics about a node. type ClientStatsResponse struct { - HostStats *stats.HostStats + HostStats *hoststats.HostStats structs.QueryMeta } diff --git a/command/agent/agent.go b/command/agent/agent.go index 89242ca44..51d4803d9 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -23,7 +23,6 @@ import ( uuidparse "github.com/hashicorp/go-uuid" "github.com/hashicorp/nomad/client" clientconfig "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/command/agent/event" @@ -842,15 +841,6 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { } conf.BindWildcardDefaultHostNetwork = agentConfig.Client.BindWildcardDefaultHostNetwork - conf.CgroupParent = cgutil.GetCgroupParent(agentConfig.Client.CgroupParent) - if agentConfig.Client.ReserveableCores != "" { - cores, err := cpuset.Parse(agentConfig.Client.ReserveableCores) - if err != nil { - return nil, fmt.Errorf("failed to parse 'reservable_cores': %v", err) - } - conf.ReservableCores = cores.ToSlice() - } - if agentConfig.Client.NomadServiceDiscovery != nil { conf.NomadServiceDiscovery = *agentConfig.Client.NomadServiceDiscovery } diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 29f5e2452..b60f724f8 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -12,10 +12,6 @@ import ( "testing" "time" - "github.com/shoenig/test/must" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/pointer" @@ -24,6 +20,9 @@ import ( "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/raft" + "github.com/shoenig/test/must" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestAgent_RPC_Ping(t *testing.T) { @@ -745,19 +744,6 @@ func TestAgent_ClientConfig_JobMaxSourceSize(t *testing.T) { must.Eq(t, 1e6, serverConf.JobMaxSourceSize) } -func TestAgent_ClientConfig_ReservedCores(t *testing.T) { - ci.Parallel(t) - conf := DefaultConfig() - conf.Client.Enabled = true - conf.Client.ReserveableCores = "0-7" - conf.Client.Reserved.Cores = "0,2-3" - a := &Agent{config: conf} - c, err := a.clientConfig() - must.NoError(t, err) - must.Eq(t, []uint16{0, 1, 2, 3, 4, 5, 6, 7}, c.ReservableCores) - must.Eq(t, []uint16{0, 2, 3}, c.Node.ReservedResources.Cpu.ReservedCpuCores) -} - // Clients should inherit telemetry configuration func TestAgent_Client_TelemetryConfiguration(t *testing.T) { ci.Parallel(t) diff --git a/command/agent/consul/int_test.go b/command/agent/consul/int_test.go index 48efe019c..2bb575c87 100644 --- a/command/agent/consul/int_test.go +++ b/command/agent/consul/int_test.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/client/allocrunner/taskrunner" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/devicemanager" + "github.com/hashicorp/nomad/client/lib/proclib" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" "github.com/hashicorp/nomad/client/serviceregistration/wrapper" @@ -169,6 +170,7 @@ func TestConsul_Integration(t *testing.T) { DriverManager: drivermanager.TestDriverManager(t), StartConditionMetCh: closedCh, ServiceRegWrapper: wrapper.NewHandlerWrapper(logger, serviceClient, regMock.NewServiceRegistrationHandler(logger)), + Wranglers: proclib.New(&proclib.Configs{Logger: testlog.HCLogger(t)}), } tr, err := taskrunner.NewTaskRunner(config) diff --git a/drivers/docker/cmd/main.go b/drivers/docker/cmd/main.go index 350956d05..1ed812882 100644 --- a/drivers/docker/cmd/main.go +++ b/drivers/docker/cmd/main.go @@ -14,6 +14,7 @@ import ( log "github.com/hashicorp/go-hclog" plugin "github.com/hashicorp/go-plugin" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/drivers/docker" "github.com/hashicorp/nomad/drivers/docker/docklog" "github.com/hashicorp/nomad/plugins" @@ -51,5 +52,6 @@ func main() { // factory returns a new instance of the docker driver plugin func factory(ctx context.Context, log log.Logger) interface{} { - return docker.NewDockerDriver(ctx, log) + top := numalib.Scan(numalib.PlatformScanners()) + return docker.NewDockerDriver(ctx, top, log) } diff --git a/drivers/docker/config.go b/drivers/docker/config.go index 22f4d1436..945a32e01 100644 --- a/drivers/docker/config.go +++ b/drivers/docker/config.go @@ -124,7 +124,7 @@ var ( // PluginConfig is the docker config factory function registered in the plugin catalog. PluginConfig = &loader.InternalPluginConfig{ Config: map[string]interface{}{}, - Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewDockerDriver(ctx, l) }, + Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewDockerDriver(ctx, nil, l) }, } // pluginInfo is the response returned for the PluginInfo RPC. @@ -781,8 +781,6 @@ func (d *Driver) SetConfig(c *base.Config) error { d.danglingReconciler = newReconciler(d) - d.cpusetFixer = newCpusetFixer(d) - go d.recoverPauseContainers(d.ctx) return nil diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index 49624a53a..984f19fd9 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -23,7 +23,9 @@ import ( multierror "github.com/hashicorp/go-multierror" plugin "github.com/hashicorp/go-plugin" "github.com/hashicorp/go-set" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/client/lib/cpustats" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/drivers/docker/docklog" "github.com/hashicorp/nomad/drivers/shared/capabilities" @@ -138,6 +140,9 @@ type Driver struct { // gpuRuntime indicates nvidia-docker runtime availability gpuRuntime bool + // top contains information about the system topology + top cpustats.Topology + // A tri-state boolean to know if the fingerprinting has happened and // whether it has been successful fingerprintSuccess *bool @@ -153,11 +158,10 @@ type Driver struct { infinityClient *docker.Client // for wait and stop calls (use getInfinityClient()) danglingReconciler *containerReconciler - cpusetFixer CpusetFixer } // NewDockerDriver returns a docker implementation of a driver plugin -func NewDockerDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { +func NewDockerDriver(ctx context.Context, top cpustats.Topology, logger hclog.Logger) drivers.DriverPlugin { logger = logger.Named(pluginName) driver := &Driver{ eventer: eventer.NewEventer(ctx, logger), @@ -166,8 +170,8 @@ func NewDockerDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlu pauseContainers: newPauseContainerStore(), ctx: ctx, logger: logger, + top: numalib.Scan(numalib.PlatformScanners()), // TODO(shoenig) grpc plumbing } - return driver } @@ -403,17 +407,6 @@ CREATE: container.ID, "container_state", container.State.String()) } - if !cgutil.UseV2 { - // This does not apply to cgroups.v2, which only allows setting the PID - // into exactly 1 group. For cgroups.v2, we use the cpuset fixer to reconcile - // the cpuset value into the cgroups created by docker in the background. - if containerCfg.HostConfig.CPUSet == "" && cfg.Resources.LinuxResources.CpusetCgroupPath != "" { - if err := setCPUSetCgroup(cfg.Resources.LinuxResources.CpusetCgroupPath, container.State.Pid); err != nil { - return nil, nil, fmt.Errorf("failed to set the cpuset cgroup for container: %v", err) - } - } - } - collectingLogs := loggingIsEnabled(d.config, cfg) var dlogger docklog.DockerLogger @@ -911,15 +904,6 @@ func memoryLimits(driverHardLimitMB int64, taskMemory drivers.MemoryResources) ( return hard * 1024 * 1024, softBytes } -// Extract the cgroup parent from the nomad cgroup (only for linux/v2) -func cgroupParent(resources *drivers.Resources) string { - var parent string - if cgutil.UseV2 && resources != nil && resources.LinuxResources != nil { - parent, _ = cgutil.SplitPath(resources.LinuxResources.CpusetCgroupPath) - } - return parent -} - func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, imageID string) (docker.CreateContainerOptions, error) { @@ -992,7 +976,7 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T } hostConfig := &docker.HostConfig{ - CgroupParent: cgroupParent(task.Resources), // if applicable + // TODO(shoenig) set cgroup parent when we do partitioning Memory: memory, // hard limit MemoryReservation: memoryReservation, // soft limit @@ -1051,9 +1035,8 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T hostConfig.MemorySwap = memory // disable swap explicitly in non-Windows environments - var swapiness int64 = 0 - hostConfig.MemorySwappiness = &swapiness - + swappiness := int64(*(cgroupslib.MaybeDisableMemorySwappiness())) + hostConfig.MemorySwappiness = &swappiness } loggingDriver := driverConfig.Logging.Type @@ -1683,7 +1666,7 @@ func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Dur return nil, drivers.ErrTaskNotFound } - return h.Stats(ctx, interval) + return h.Stats(ctx, interval, d.top) } func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { diff --git a/drivers/docker/driver_darwin.go b/drivers/docker/driver_darwin.go deleted file mode 100644 index 5e3052dba..000000000 --- a/drivers/docker/driver_darwin.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build darwin - -package docker - -func setCPUSetCgroup(path string, pid int) error { - return nil -} diff --git a/drivers/docker/driver_linux.go b/drivers/docker/driver_linux.go deleted file mode 100644 index a11cff490..000000000 --- a/drivers/docker/driver_linux.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package docker - -import ( - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" -) - -func setCPUSetCgroup(path string, pid int) error { - // Sometimes the container exits before we can write the - // cgroup resulting in an error which can be ignored. - err := cgroups.WriteCgroupProc(path, pid) - if err != nil && strings.Contains(err.Error(), "no such process") { - return nil - } - return err -} diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index 92e0bc373..ffd9272fa 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -20,11 +20,8 @@ import ( docker "github.com/fsouza/go-dockerclient" hclog "github.com/hashicorp/go-hclog" - "github.com/shoenig/test/must" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/drivers/shared/capabilities" @@ -38,6 +35,9 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils" tu "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var ( @@ -57,6 +57,10 @@ var ( } ) +var ( + top = numalib.Scan(numalib.PlatformScanners()) +) + func dockerIsRemote(t *testing.T) bool { client, err := docker.NewClientFromEnv() if err != nil { @@ -183,7 +187,7 @@ func dockerDriverHarness(t *testing.T, cfg map[string]interface{}) *dtestutil.Dr logger := testlog.HCLogger(t) ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(func() { cancel() }) - harness := dtestutil.NewDriverHarness(t, NewDockerDriver(ctx, logger)) + harness := dtestutil.NewDriverHarness(t, NewDockerDriver(ctx, top, logger)) if cfg == nil { cfg = map[string]interface{}{ "gc": map[string]interface{}{ @@ -2086,7 +2090,7 @@ func TestDockerDriver_Stats(t *testing.T) { defer d.DestroyTask(task.ID, true) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - ch, err := handle.Stats(ctx, 1*time.Second) + ch, err := handle.Stats(ctx, 1*time.Second, top) assert.NoError(t, err) select { case ru := <-ch: @@ -2906,32 +2910,6 @@ func TestDockerDriver_memoryLimits(t *testing.T) { } } -func TestDockerDriver_cgroupParent(t *testing.T) { - ci.Parallel(t) - - t.Run("v1", func(t *testing.T) { - testutil.CgroupsCompatibleV1(t) - - parent := cgroupParent(&drivers.Resources{ - LinuxResources: &drivers.LinuxResources{ - CpusetCgroupPath: "/sys/fs/cgroup/cpuset/nomad", - }, - }) - require.Equal(t, "", parent) - }) - - t.Run("v2", func(t *testing.T) { - testutil.CgroupsCompatibleV2(t) - - parent := cgroupParent(&drivers.Resources{ - LinuxResources: &drivers.LinuxResources{ - CpusetCgroupPath: "/sys/fs/cgroup/nomad.slice", - }, - }) - require.Equal(t, "nomad.slice", parent) - }) -} - func TestDockerDriver_parseSignal(t *testing.T) { ci.Parallel(t) diff --git a/drivers/docker/driver_windows.go b/drivers/docker/driver_windows.go index 61275c4ca..65bf0c273 100644 --- a/drivers/docker/driver_windows.go +++ b/drivers/docker/driver_windows.go @@ -15,7 +15,3 @@ func getPortBinding(ip string, port string) docker.PortBinding { func tweakCapabilities(basics, adds, drops []string) ([]string, error) { return nil, nil } - -func setCPUSetCgroup(path string, pid int) error { - return nil -} diff --git a/drivers/docker/fingerprint.go b/drivers/docker/fingerprint.go index 0ee93d645..dbff7ef7f 100644 --- a/drivers/docker/fingerprint.go +++ b/drivers/docker/fingerprint.go @@ -10,7 +10,6 @@ import ( "strings" "time" - "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/drivers/utils" @@ -21,7 +20,6 @@ func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, // Start docker reconcilers when we start fingerprinting, a workaround for // task drivers not having a kind of post-setup hook. d.danglingReconciler.Start() - d.cpusetFixer.Start() ch := make(chan *drivers.Fingerprint) go d.handleFingerprint(ctx, ch) @@ -90,8 +88,8 @@ func (d *Driver) buildFingerprint() *drivers.Fingerprint { HealthDescription: drivers.DriverHealthy, } - // disable if cgv2 && non-root - if cgutil.UseV2 && !utils.IsUnixRoot() { + // disable if non-root on linux systems + if runtime.GOOS == "linux" && !utils.IsUnixRoot() { fp.Health = drivers.HealthStateUndetected fp.HealthDescription = drivers.DriverRequiresRootMessage d.setFingerprintFailure() diff --git a/drivers/docker/fingerprint_test.go b/drivers/docker/fingerprint_test.go index 8a1d444b2..7f6c7b83d 100644 --- a/drivers/docker/fingerprint_test.go +++ b/drivers/docker/fingerprint_test.go @@ -25,7 +25,7 @@ func TestDockerDriver_FingerprintHealth(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - d := NewDockerDriver(ctx, testlog.HCLogger(t)).(*Driver) + d := NewDockerDriver(ctx, top, testlog.HCLogger(t)).(*Driver) fp := d.buildFingerprint() must.Eq(t, drivers.HealthStateHealthy, fp.Health) @@ -42,7 +42,7 @@ func TestDockerDriver_NonRoot_CGV2(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - d := NewDockerDriver(ctx, testlog.HCLogger(t)).(*Driver) + d := NewDockerDriver(ctx, top, testlog.HCLogger(t)).(*Driver) fp := d.buildFingerprint() must.Eq(t, drivers.HealthStateUndetected, fp.Health) diff --git a/drivers/docker/reconcile_cpuset.go b/drivers/docker/reconcile_cpuset.go deleted file mode 100644 index 347daf221..000000000 --- a/drivers/docker/reconcile_cpuset.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package docker - -import ( - "context" - "fmt" - "path/filepath" - "sync" - "time" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/hashicorp/nomad/helper" -) - -const ( - cpusetReconcileInterval = 1 * time.Second -) - -type CpusetFixer interface { - Start() -} - -// cpusetFixer adjusts the cpuset.cpus cgroup value to the assigned value by Nomad. -// -// Due to Docker not allowing the configuration of the full cgroup path, we must -// manually fix the cpuset values for all docker containers continuously, as the -// values will change as tasks of any driver using reserved cores are started and -// stopped, changing the size of the remaining shared cpu pool. -// -// The exec/java, podman, and containerd runtimes let you specify the cgroup path, -// making use of the cgroup Nomad creates and manages on behalf of the task. -type cpusetFixer struct { - ctx context.Context - logger hclog.Logger - interval time.Duration - once sync.Once - tasks func() map[coordinate]struct{} -} - -func newCpusetFixer(d *Driver) CpusetFixer { - return &cpusetFixer{ - interval: cpusetReconcileInterval, - ctx: d.ctx, - logger: d.logger, - tasks: d.trackedTasks, - } -} - -// Start will start the background cpuset reconciliation until the cf context is -// cancelled for shutdown. -// -// Only runs if cgroups.v2 is in use. -func (cf *cpusetFixer) Start() { - cf.once.Do(func() { - if cgutil.UseV2 { - go cf.loop() - } - }) -} - -func (cf *cpusetFixer) loop() { - timer, cancel := helper.NewSafeTimer(0) - defer cancel() - - for { - select { - case <-cf.ctx.Done(): - return - case <-timer.C: - timer.Stop() - cf.apply() - timer.Reset(cf.interval) - } - } -} - -func (cf *cpusetFixer) apply() { - coordinates := cf.tasks() - for c := range coordinates { - cf.fix(c) - } -} - -func (cf *cpusetFixer) fix(c coordinate) { - source := c.NomadCgroup() - destination := c.DockerCgroup() - if err := cgutil.CopyCpuset(source, destination); err != nil { - cf.logger.Debug("failed to copy cpuset", "error", err) - } -} - -type coordinate struct { - containerID string - allocID string - task string - path string -} - -func (c coordinate) NomadCgroup() string { - parent, _ := cgutil.SplitPath(c.path) - return filepath.Join(cgutil.CgroupRoot, parent, cgutil.CgroupScope(c.allocID, c.task)) -} - -func (c coordinate) DockerCgroup() string { - parent, _ := cgutil.SplitPath(c.path) - return filepath.Join(cgutil.CgroupRoot, parent, fmt.Sprintf("docker-%s.scope", c.containerID)) -} - -func (d *Driver) trackedTasks() map[coordinate]struct{} { - d.tasks.lock.RLock() - defer d.tasks.lock.RUnlock() - - m := make(map[coordinate]struct{}, len(d.tasks.store)) - for _, h := range d.tasks.store { - m[coordinate{ - containerID: h.containerID, - allocID: h.task.AllocID, - task: h.task.Name, - path: h.task.Resources.LinuxResources.CpusetCgroupPath, - }] = struct{}{} - } - return m -} diff --git a/drivers/docker/reconcile_cpuset_noop.go b/drivers/docker/reconcile_cpuset_noop.go deleted file mode 100644 index e8e8c77d0..000000000 --- a/drivers/docker/reconcile_cpuset_noop.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build !linux - -package docker - -type CpusetFixer interface { - Start() -} - -func newCpusetFixer(*Driver) CpusetFixer { - return new(noop) -} - -type noop struct { - // empty -} - -func (*noop) Start() { - // empty -} diff --git a/drivers/docker/reconcile_cpuset_test.go b/drivers/docker/reconcile_cpuset_test.go deleted file mode 100644 index 857f19103..000000000 --- a/drivers/docker/reconcile_cpuset_test.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -//go:build linux - -package docker - -import ( - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/stretchr/testify/require" -) - -func TestCoordinate_NomadCgroup(t *testing.T) { - ci.Parallel(t) - - result := (coordinate{ - containerID: "c6d05b36f4f56619ca59fbce921115e87dda1661860a4670e3e35ecfa3571ba1", - allocID: "27ee5321-28d6-22d7-9426-4e1888da8e7d", - task: "redis", - path: "/nomad.scope/27ee5321-28d6-22d7-9426-4e1888da8e7d.redis.scope", - }).NomadCgroup() - exp := "/sys/fs/cgroup/nomad.scope/27ee5321-28d6-22d7-9426-4e1888da8e7d.redis.scope" - require.Equal(t, exp, result) -} - -func TestCoordinate_DockerCgroup(t *testing.T) { - ci.Parallel(t) - - result := (coordinate{ - containerID: "c6d05b36f4f56619ca59fbce921115e87dda1661860a4670e3e35ecfa3571ba1", - allocID: "27ee5321-28d6-22d7-9426-4e1888da8e7d", - task: "redis", - path: "/nomad.scope/27ee5321-28d6-22d7-9426-4e1888da8e7d.redis.scope", - }).DockerCgroup() - exp := "/sys/fs/cgroup/nomad.scope/docker-c6d05b36f4f56619ca59fbce921115e87dda1661860a4670e3e35ecfa3571ba1.scope" - require.Equal(t, exp, result) -} diff --git a/drivers/docker/stats.go b/drivers/docker/stats.go index 23097ee99..dc6854b0a 100644 --- a/drivers/docker/stats.go +++ b/drivers/docker/stats.go @@ -11,6 +11,7 @@ import ( "time" docker "github.com/fsouza/go-dockerclient" + "github.com/hashicorp/nomad/client/lib/cpustats" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/drivers/docker/util" "github.com/hashicorp/nomad/helper" @@ -78,7 +79,7 @@ func (u *usageSender) close() { // Stats starts collecting stats from the docker daemon and sends them on the // returned channel. -func (h *taskHandle) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { +func (h *taskHandle) Stats(ctx context.Context, interval time.Duration, top cpustats.Topology) (<-chan *cstructs.TaskResourceUsage, error) { select { case <-h.doneCh: return nil, nstructs.NewRecoverableError(fmt.Errorf("container stopped"), false) @@ -86,12 +87,12 @@ func (h *taskHandle) Stats(ctx context.Context, interval time.Duration) (<-chan } destCh, recvCh := newStatsChanPipe() - go h.collectStats(ctx, destCh, interval) + go h.collectStats(ctx, destCh, interval, top) return recvCh, nil } // collectStats starts collecting resource usage stats of a docker container -func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, interval time.Duration) { +func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, interval time.Duration, top cpustats.Topology) { defer destCh.close() // backoff and retry used if the docker stats API returns an error @@ -120,7 +121,7 @@ func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, inte // receive stats from docker and emit nomad stats // statsCh will always be closed by docker client. statsCh := make(chan *docker.Stats) - go dockerStatsCollector(destCh, statsCh, interval) + go dockerStatsCollector(destCh, statsCh, interval, top) statsOpts := docker.StatsOptions{ ID: h.containerID, @@ -150,7 +151,7 @@ func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, inte } } -func dockerStatsCollector(destCh *usageSender, statsCh <-chan *docker.Stats, interval time.Duration) { +func dockerStatsCollector(destCh *usageSender, statsCh <-chan *docker.Stats, interval time.Duration, top cpustats.Topology) { var resourceUsage *cstructs.TaskResourceUsage // hasSentInitialStats is used so as to emit the first stats received from @@ -180,7 +181,7 @@ func dockerStatsCollector(destCh *usageSender, statsCh <-chan *docker.Stats, int } // s should always be set, but check and skip just in case if s != nil { - resourceUsage = util.DockerStatsToTaskResourceUsage(s) + resourceUsage = util.DockerStatsToTaskResourceUsage(s, top) // send stats next interation if this is the first time received // from docker if !hasSentInitialStats { diff --git a/drivers/docker/stats_test.go b/drivers/docker/stats_test.go index 65b511687..3cbdc77a3 100644 --- a/drivers/docker/stats_test.go +++ b/drivers/docker/stats_test.go @@ -38,7 +38,7 @@ func TestDriver_DockerStatsCollector(t *testing.T) { stats.MemoryStats.CommitPeak = 321323 stats.MemoryStats.PrivateWorkingSet = 62222 - go dockerStatsCollector(dst, src, time.Second) + go dockerStatsCollector(dst, src, time.Second, top) select { case src <- stats: diff --git a/drivers/docker/util/stats_posix.go b/drivers/docker/util/stats_posix.go index f3951fc91..3b854b44d 100644 --- a/drivers/docker/util/stats_posix.go +++ b/drivers/docker/util/stats_posix.go @@ -6,11 +6,9 @@ package util import ( - "runtime" - docker "github.com/fsouza/go-dockerclient" + "github.com/hashicorp/nomad/client/lib/cpustats" cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/helper/stats" ) var ( @@ -21,7 +19,12 @@ var ( DockerCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} ) -func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage { +func DockerStatsToTaskResourceUsage(s *docker.Stats, top cpustats.Topology) *cstructs.TaskResourceUsage { + var ( + totalCompute = top.TotalCompute() + totalCores = top.NumCores() + ) + measuredMems := DockerCgroupV1MeasuredMemStats // use a simple heuristic to check if cgroup-v2 is used. @@ -49,14 +52,15 @@ func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage // Calculate percentage cs.Percent = CalculateCPUPercent( s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, - s.CPUStats.SystemCPUUsage, s.PreCPUStats.SystemCPUUsage, runtime.NumCPU()) + s.CPUStats.SystemCPUUsage, s.PreCPUStats.SystemCPUUsage, totalCores) cs.SystemMode = CalculateCPUPercent( s.CPUStats.CPUUsage.UsageInKernelmode, s.PreCPUStats.CPUUsage.UsageInKernelmode, - s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, runtime.NumCPU()) + s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, totalCores) cs.UserMode = CalculateCPUPercent( s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode, - s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, runtime.NumCPU()) - cs.TotalTicks = (cs.Percent / 100) * float64(stats.CpuTotalTicks()) / float64(runtime.NumCPU()) + s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, totalCores) + + cs.TotalTicks = (cs.Percent / 100) * float64(totalCompute) / float64(totalCores) return &cstructs.TaskResourceUsage{ ResourceUsage: &cstructs.ResourceUsage{ diff --git a/drivers/docker/util/stats_windows.go b/drivers/docker/util/stats_windows.go index ae4d2dbc0..15d6326d2 100644 --- a/drivers/docker/util/stats_windows.go +++ b/drivers/docker/util/stats_windows.go @@ -1,14 +1,14 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 +//go:build windows + package util import ( - "runtime" - docker "github.com/fsouza/go-dockerclient" + "github.com/hashicorp/nomad/client/lib/cpustats" cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/helper/stats" ) var ( @@ -17,7 +17,12 @@ var ( DockerMeasuredMemStats = []string{"RSS", "Usage", "Max Usage"} ) -func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage { +func DockerStatsToTaskResourceUsage(s *docker.Stats, top cpustats.Topology) *cstructs.TaskResourceUsage { + var ( + totalCompute = top.TotalCompute() + totalCores = top.NumCores() + ) + ms := &cstructs.MemoryStats{ RSS: s.MemoryStats.PrivateWorkingSet, Usage: s.MemoryStats.Commit, @@ -45,7 +50,7 @@ func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods, ThrottledTime: s.CPUStats.ThrottlingData.ThrottledTime, Percent: cpuPercent, - TotalTicks: (cpuPercent / 100) * float64(stats.CpuTotalTicks()) / float64(runtime.NumCPU()), + TotalTicks: (cpuPercent / 100) * float64(totalCompute) / float64(totalCores), Measured: DockerMeasuredCPUStats, } diff --git a/drivers/exec/driver.go b/drivers/exec/driver.go index c8c8d594b..ea01bb4ef 100644 --- a/drivers/exec/driver.go +++ b/drivers/exec/driver.go @@ -14,7 +14,8 @@ import ( "github.com/hashicorp/consul-template/signals" hclog "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/drivers/shared/capabilities" "github.com/hashicorp/nomad/drivers/shared/eventer" "github.com/hashicorp/nomad/drivers/shared/executor" @@ -135,6 +136,9 @@ type Driver struct { // whether it has been successful fingerprintSuccess *bool fingerprintLock sync.Mutex + + // topology contains the system cpu / memory topology + topology *numalib.Topology } // Config is the driver configuration set by the SetConfig RPC call @@ -240,10 +244,11 @@ type TaskState struct { func NewExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { logger = logger.Named(pluginName) return &Driver{ - eventer: eventer.NewEventer(ctx, logger), - tasks: newTaskStore(), - ctx: ctx, - logger: logger, + eventer: eventer.NewEventer(ctx, logger), + tasks: newTaskStore(), + ctx: ctx, + logger: logger, + topology: numalib.Scan(numalib.PlatformScanners()), } } @@ -350,20 +355,9 @@ func (d *Driver) buildFingerprint() *drivers.Fingerprint { return fp } - mount, err := cgutil.FindCgroupMountpointDir() - if err != nil { + if cgroupslib.GetMode() == cgroupslib.OFF { fp.Health = drivers.HealthStateUnhealthy fp.HealthDescription = drivers.NoCgroupMountMessage - if d.fingerprintSuccessful() { - d.logger.Warn(fp.HealthDescription, "error", err) - } - d.setFingerprintFailure() - return fp - } - - if mount == "" { - fp.Health = drivers.HealthStateUnhealthy - fp.HealthDescription = drivers.CgroupMountEmpty d.setFingerprintFailure() return fp } @@ -584,25 +578,6 @@ func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) e return nil } -// resetCgroup will re-create the v2 cgroup for the task after the task has been -// destroyed by libcontainer. In the case of a task restart we call DestroyTask -// which removes the cgroup - but we still need it! -// -// Ideally the cgroup management would be more unified - and we could do the creation -// on a task runner pre-start hook, eliminating the need for this hack. -func (d *Driver) resetCgroup(handle *taskHandle) { - if cgutil.UseV2 { - if handle.taskConfig.Resources != nil && - handle.taskConfig.Resources.LinuxResources != nil && - handle.taskConfig.Resources.LinuxResources.CpusetCgroupPath != "" { - err := os.Mkdir(handle.taskConfig.Resources.LinuxResources.CpusetCgroupPath, 0755) - if err != nil { - d.logger.Trace("failed to reset cgroup", "path", handle.taskConfig.Resources.LinuxResources.CpusetCgroupPath) - } - } - } -} - func (d *Driver) DestroyTask(taskID string, force bool) error { handle, ok := d.tasks.Get(taskID) if !ok { @@ -621,9 +596,6 @@ func (d *Driver) DestroyTask(taskID string, force bool) error { handle.pluginClient.Kill() } - // workaround for the case where DestroyTask was issued on task restart - d.resetCgroup(handle) - d.tasks.Delete(taskID) return nil } diff --git a/drivers/exec/driver_test.go b/drivers/exec/driver_test.go index 6225de5d0..c21e5db94 100644 --- a/drivers/exec/driver_test.go +++ b/drivers/exec/driver_test.go @@ -19,7 +19,7 @@ import ( "time" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" ctestutils "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/drivers/shared/executor" "github.com/hashicorp/nomad/helper/pluginutils/hclutils" @@ -34,10 +34,6 @@ import ( "github.com/stretchr/testify/require" ) -const ( - cgroupParent = "testing.slice" -) - func TestMain(m *testing.M) { if !testtask.Run() { os.Exit(m.Run()) @@ -61,13 +57,10 @@ func testResources(allocID, task string) *drivers.Resources { LinuxResources: &drivers.LinuxResources{ MemoryLimitBytes: 134217728, CPUShares: 100, + CpusetCgroupPath: cgroupslib.LinuxResourcesPath(allocID, task), }, } - if cgutil.UseV2 { - r.LinuxResources.CpusetCgroupPath = filepath.Join(cgutil.CgroupRoot, cgroupParent, cgutil.CgroupScope(allocID, task)) - } - return r } @@ -319,14 +312,12 @@ func TestExecDriver_NoOrphans(t *testing.T) { require.NoError(t, harness.SetConfig(baseConfig)) allocID := uuid.Generate() + taskName := "test" task := &drivers.TaskConfig{ - AllocID: allocID, - ID: uuid.Generate(), - Name: "test", - } - - if cgutil.UseV2 { - task.Resources = testResources(allocID, "test") + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, true) @@ -590,7 +581,8 @@ func TestExecDriver_HandlerExec(t *testing.T) { require.NoError(t, err) require.True(t, res.ExitResult.Successful()) stdout := strings.TrimSpace(string(res.Stdout)) - if !cgutil.UseV2 { + switch cgroupslib.GetMode() { + case cgroupslib.CG1: for _, line := range strings.Split(stdout, "\n") { // skip empty lines if line == "" { @@ -605,7 +597,7 @@ func TestExecDriver_HandlerExec(t *testing.T) { t.Fatalf("not a member of the allocs nomad cgroup: %q", line) } } - } else { + default: require.True(t, strings.HasSuffix(stdout, ".scope"), "actual stdout %q", stdout) } diff --git a/drivers/exec/driver_unix_test.go b/drivers/exec/driver_unix_test.go index 5035c30be..7707b666f 100644 --- a/drivers/exec/driver_unix_test.go +++ b/drivers/exec/driver_unix_test.go @@ -13,7 +13,6 @@ import ( "time" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/lib/cgutil" ctestutils "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/drivers/shared/capabilities" "github.com/hashicorp/nomad/drivers/shared/executor" @@ -101,15 +100,12 @@ func TestExec_ExecTaskStreaming(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - ID: uuid.Generate(), - Name: "sleep", - } - - if cgutil.UseV2 { - allocID := uuid.Generate() - task.AllocID = allocID - task.Resources = testResources(allocID, "sleep") + ID: allocID, + Name: taskName, + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) @@ -165,15 +161,13 @@ func TestExec_dnsConfig(t *testing.T) { } for _, c := range cases { + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - ID: uuid.Generate(), - Name: "sleep", - DNS: c.cfg, - } - - if cgutil.UseV2 { - allocID := uuid.Generate() - task.Resources = testResources(allocID, "sleep") + ID: allocID, + Name: taskName, + DNS: c.cfg, + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) @@ -197,15 +191,12 @@ func TestExecDriver_Capabilities(t *testing.T) { ci.Parallel(t) ctestutils.ExecCompatible(t) + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - ID: uuid.Generate(), - Name: "sleep", - } - - if cgutil.UseV2 { - allocID := uuid.Generate() - task.AllocID = allocID - task.Resources = testResources(allocID, "sleep") + ID: allocID, + Name: taskName, + Resources: testResources(allocID, taskName), } for _, tc := range []struct { diff --git a/drivers/java/driver.go b/drivers/java/driver.go index 7ff471e5a..47566aef9 100644 --- a/drivers/java/driver.go +++ b/drivers/java/driver.go @@ -12,11 +12,10 @@ import ( "runtime" "time" - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/hashicorp/nomad/drivers/shared/capabilities" - "github.com/hashicorp/consul-template/signals" hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/drivers/shared/capabilities" "github.com/hashicorp/nomad/drivers/shared/eventer" "github.com/hashicorp/nomad/drivers/shared/executor" "github.com/hashicorp/nomad/drivers/shared/resolvconf" @@ -332,17 +331,9 @@ func (d *Driver) buildFingerprint() *drivers.Fingerprint { return fp } - mount, err := cgutil.FindCgroupMountpointDir() - if err != nil { + if cgroupslib.GetMode() == cgroupslib.OFF { fp.Health = drivers.HealthStateUnhealthy fp.HealthDescription = drivers.NoCgroupMountMessage - d.logger.Warn(fp.HealthDescription, "error", err) - return fp - } - - if mount == "" { - fp.Health = drivers.HealthStateUnhealthy - fp.HealthDescription = drivers.CgroupMountEmpty return fp } } diff --git a/drivers/java/driver_test.go b/drivers/java/driver_test.go index ecd7482c5..9a713c13e 100644 --- a/drivers/java/driver_test.go +++ b/drivers/java/driver_test.go @@ -13,9 +13,8 @@ import ( "testing" "time" - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/cgroupslib" ctestutil "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/pluginutils/hclutils" "github.com/hashicorp/nomad/helper/testlog" @@ -300,14 +299,11 @@ func basicTask(t *testing.T, name string, taskConfig *TaskConfig) *drivers.TaskC LinuxResources: &drivers.LinuxResources{ MemoryLimitBytes: 134217728, CPUShares: 100, + CpusetCgroupPath: cgroupslib.LinuxResourcesPath(allocID, name), }, }, } - if cgutil.UseV2 { - task.Resources.LinuxResources.CpusetCgroupPath = filepath.Join(cgutil.CgroupRoot, "testing.slice", cgutil.CgroupScope(allocID, name)) - } - require.NoError(t, task.EncodeConcreteDriverConfig(&taskConfig)) return task } diff --git a/drivers/rawexec/driver.go b/drivers/rawexec/driver.go index 631d4230d..4e325ce97 100644 --- a/drivers/rawexec/driver.go +++ b/drivers/rawexec/driver.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/consul-template/signals" "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/drivers/shared/eventer" "github.com/hashicorp/nomad/drivers/shared/executor" "github.com/hashicorp/nomad/helper/pluginutils/loader" @@ -131,6 +132,9 @@ type Driver struct { // logger will log to the Nomad agent logger hclog.Logger + + // topology contains cpu / memory info + topology *numalib.Topology } // Config is the driver configuration set by the SetConfig RPC call @@ -163,11 +167,12 @@ type TaskState struct { func NewRawExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { logger = logger.Named(pluginName) return &Driver{ - eventer: eventer.NewEventer(ctx, logger), - config: &Config{}, - tasks: newTaskStore(), - ctx: ctx, - logger: logger, + eventer: eventer.NewEventer(ctx, logger), + config: &Config{}, + tasks: newTaskStore(), + ctx: ctx, + logger: logger, + topology: numalib.Scan(numalib.PlatformScanners()), } } @@ -341,6 +346,7 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive StdoutPath: cfg.StdoutPath, StderrPath: cfg.StderrPath, NetworkIsolation: cfg.NetworkIsolation, + Resources: cfg.Resources.Copy(), } ps, err := exec.Launch(execCmd) diff --git a/drivers/rawexec/driver_test.go b/drivers/rawexec/driver_test.go index 5536c7feb..a28bb54f7 100644 --- a/drivers/rawexec/driver_test.go +++ b/drivers/rawexec/driver_test.go @@ -4,7 +4,9 @@ package rawexec import ( + "bytes" "context" + "errors" "fmt" "os" "path/filepath" @@ -16,32 +18,53 @@ import ( "time" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" ctestutil "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/pluginutils/hclutils" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/testtask" "github.com/hashicorp/nomad/helper/uuid" + nstructs "github.com/hashicorp/nomad/nomad/structs" basePlug "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/plugins/drivers" dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils" pstructs "github.com/hashicorp/nomad/plugins/shared/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" "github.com/stretchr/testify/require" ) // defaultEnv creates the default environment for raw exec tasks func defaultEnv() map[string]string { m := make(map[string]string) - if cgutil.UseV2 { - // normally the taskenv.Builder will set this automatically from the - // Node object which gets created via Client configuration, but none of - // that exists in the test harness so just set it here. - m["NOMAD_PARENT_CGROUP"] = "nomad.slice" - } return m } +func testResources(allocID, task string) *drivers.Resources { + if allocID == "" || task == "" { + panic("must be set") + } + + r := &drivers.Resources{ + NomadResources: &nstructs.AllocatedTaskResources{ + Memory: nstructs.AllocatedMemoryResources{ + MemoryMB: 128, + }, + Cpu: nstructs.AllocatedCpuResources{ + CpuShares: 100, + }, + }, + LinuxResources: &drivers.LinuxResources{ + MemoryLimitBytes: 134217728, + CPUShares: 100, + CpusetCgroupPath: cgroupslib.LinuxResourcesPath(allocID, task), + }, + } + + return r +} + func TestMain(m *testing.M) { if !testtask.Run() { os.Exit(m.Run()) @@ -171,11 +194,15 @@ func TestRawExecDriver_StartWait(t *testing.T) { d := newEnabledRawExecDriver(t) harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + + allocID := uuid.Generate() + taskName := "test" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "test", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } tc := &TaskConfig{ @@ -188,6 +215,8 @@ func TestRawExecDriver_StartWait(t *testing.T) { cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + handle, _, err := harness.StartTask(task) require.NoError(err) @@ -223,11 +252,14 @@ func TestRawExecDriver_StartWaitRecoverWaitStop(t *testing.T) { bconfig := &basePlug.Config{PluginConfig: data} require.NoError(harness.SetConfig(bconfig)) + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "sleep", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } tc := &TaskConfig{ Command: testtask.Path(), @@ -236,9 +268,12 @@ func TestRawExecDriver_StartWaitRecoverWaitStop(t *testing.T) { require.NoError(task.EncodeConcreteDriverConfig(&tc)) testtask.SetTaskConfigEnv(task) + cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + handle, _, err := harness.StartTask(task) require.NoError(err) @@ -301,16 +336,21 @@ func TestRawExecDriver_Start_Wait_AllocDir(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "sleep", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + exp := []byte("win") file := "output.txt" outPath := fmt.Sprintf(`%s/%s`, task.TaskDir().SharedAllocDir, file) @@ -352,98 +392,92 @@ func TestRawExecDriver_Start_Kill_Wait_Cgroup(t *testing.T) { ci.Parallel(t) ctestutil.ExecCompatible(t) - require := require.New(t) pidFile := "pid" d := newEnabledRawExecDriver(t) harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "sleep", - User: "root", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + User: "root", + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + tc := &TaskConfig{ Command: testtask.Path(), - Args: []string{"fork/exec", pidFile, "pgrp", "0", "sleep", "20s"}, + Args: []string{"fork/exec", pidFile, "pgrp", "0", "sleep", "7s"}, } - require.NoError(task.EncodeConcreteDriverConfig(&tc)) + must.NoError(t, task.EncodeConcreteDriverConfig(&tc)) testtask.SetTaskConfigEnv(task) _, _, err := harness.StartTask(task) - require.NoError(err) + must.NoError(t, err) // Find the process var pidData []byte - testutil.WaitForResult(func() (bool, error) { - var err error - pidData, err = os.ReadFile(filepath.Join(task.TaskDir().Dir, pidFile)) - if err != nil { - return false, err - } - if len(pidData) == 0 { - return false, fmt.Errorf("pidFile empty") - } - - return true, nil - }, func(err error) { - require.NoError(err) - }) + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(func() error { + data, err := os.ReadFile(filepath.Join(task.TaskDir().Dir, pidFile)) + if err != nil { + return err + } + if len(bytes.TrimSpace(data)) == 0 { + return errors.New("pidFile empty") + } + pidData = data + return nil + }), + wait.Gap(1*time.Second), + wait.Timeout(3*time.Second), + )) pid, err := strconv.Atoi(string(pidData)) - require.NoError(err, "failed to read pidData: %s", string(pidData)) + must.NoError(t, err) // Check the pid is up process, err := os.FindProcess(pid) - require.NoError(err) - require.NoError(process.Signal(syscall.Signal(0))) + must.NoError(t, err) + must.NoError(t, process.Signal(syscall.Signal(0))) - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - time.Sleep(1 * time.Second) - err := harness.StopTask(task.ID, 0, "") - - // Can't rely on the ordering between wait and kill on CI/travis... - if !testutil.IsCI() { - require.NoError(err) - } - }() + // Stop the task + must.NoError(t, harness.StopTask(task.ID, 0, "")) // Task should terminate quickly waitCh, err := harness.WaitTask(context.Background(), task.ID) - require.NoError(err) + must.NoError(t, err) select { case res := <-waitCh: - require.False(res.Successful()) - case <-time.After(time.Duration(testutil.TestMultiplier()*5) * time.Second): - require.Fail("WaitTask timeout") + must.False(t, res.Successful()) + case <-time.After(10 * time.Second): + must.Unreachable(t, must.Sprint("exceeded wait timeout")) } - testutil.WaitForResult(func() (bool, error) { - if err := process.Signal(syscall.Signal(0)); err == nil { - return false, fmt.Errorf("process should not exist: %v", pid) - } + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(func() bool { + return process.Signal(syscall.Signal(0)) == nil + }), + wait.Gap(1*time.Second), + wait.Timeout(3*time.Second), + )) - return true, nil - }, func(err error) { - require.NoError(err) - }) - - wg.Wait() - require.NoError(harness.DestroyTask(task.ID, true)) + must.NoError(t, harness.DestroyTask(task.ID, true)) } func TestRawExecDriver_ParentCgroup(t *testing.T) { + t.Skip("TODO: seth will fix this during the cpuset partitioning work") + ci.Parallel(t) ctestutil.ExecCompatible(t) ctestutil.CgroupsCompatibleV2(t) @@ -452,10 +486,12 @@ func TestRawExecDriver_ParentCgroup(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), + AllocID: allocID, ID: uuid.Generate(), - Name: "sleep", + Name: taskName, Env: map[string]string{ "NOMAD_PARENT_CGROUP": "custom.slice", }, @@ -464,6 +500,8 @@ func TestRawExecDriver_ParentCgroup(t *testing.T) { cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + // run sleep task tc := &TaskConfig{ Command: testtask.Path(), @@ -500,16 +538,21 @@ func TestRawExecDriver_Exec(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "sleep", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + tc := &TaskConfig{ Command: testtask.Path(), Args: []string{"sleep", "9000s"}, @@ -578,10 +621,13 @@ func TestRawExecDriver_Disabled(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + + allocID := uuid.Generate() + taskName := "test" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), + AllocID: allocID, ID: uuid.Generate(), - Name: "test", + Name: taskName, Env: defaultEnv(), } diff --git a/drivers/rawexec/driver_unix_test.go b/drivers/rawexec/driver_unix_test.go index 1bd5de764..87012b163 100644 --- a/drivers/rawexec/driver_unix_test.go +++ b/drivers/rawexec/driver_unix_test.go @@ -69,16 +69,21 @@ func TestRawExecDriver_Signal(t *testing.T) { d := newEnabledRawExecDriver(t) harness := dtestutil.NewDriverHarness(t, d) + allocID := uuid.Generate() + taskName := "signal" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "signal", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, true) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + tc := &TaskConfig{ Command: "/bin/bash", Args: []string{"test.sh"}, @@ -143,16 +148,19 @@ func TestRawExecDriver_StartWaitStop(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() - // Disable cgroups so test works without root - config := &Config{NoCgroups: true, Enabled: true} + config := &Config{NoCgroups: false, Enabled: true} var data []byte require.NoError(basePlug.MsgPackEncode(&data, config)) bconfig := &basePlug.Config{PluginConfig: data} require.NoError(harness.SetConfig(bconfig)) + allocID := uuid.Generate() + taskName := "test" task := &drivers.TaskConfig{ - ID: uuid.Generate(), - Name: "test", + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Resources: testResources(allocID, taskName), } taskConfig := map[string]interface{}{} @@ -164,6 +172,8 @@ func TestRawExecDriver_StartWaitStop(t *testing.T) { cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + handle, _, err := harness.StartTask(task) require.NoError(err) @@ -212,16 +222,21 @@ func TestRawExecDriver_DestroyKillsAll(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "test" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "test", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, true) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + taskConfig := map[string]interface{}{} taskConfig["command"] = "/bin/sh" taskConfig["args"] = []string{"-c", fmt.Sprintf(`sleep 3600 & echo "SLEEP_PID=$!"`)} @@ -314,16 +329,21 @@ func TestRawExec_ExecTaskStreaming(t *testing.T) { harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "sleep", - Env: defaultEnv(), + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + Env: defaultEnv(), + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + tc := &TaskConfig{ Command: testtask.Path(), Args: []string{"sleep", "9000s"}, @@ -340,33 +360,30 @@ func TestRawExec_ExecTaskStreaming(t *testing.T) { } func TestRawExec_ExecTaskStreaming_User(t *testing.T) { + t.Skip("todo(shoenig): this test has always been broken, now we skip instead of paving over it") ci.Parallel(t) clienttestutil.RequireLinux(t) d := newEnabledRawExecDriver(t) - // because we cannot set AllocID, see below - d.config.NoCgroups = true - harness := dtestutil.NewDriverHarness(t, d) defer harness.Kill() + allocID := uuid.Generate() + taskName := "sleep" task := &drivers.TaskConfig{ - // todo(shoenig) - Setting AllocID causes test to fail - with or without - // cgroups, and with or without chroot. It has to do with MkAllocDir - // creating the directory structure, but the actual root cause is still - // TBD. The symptom is that any command you try to execute will result - // in "permission denied" coming from os/exec. This test is imperfect, - // the actual feature of running commands as another user works fine. - // AllocID: uuid.Generate() - ID: uuid.Generate(), - Name: "sleep", - User: "nobody", + AllocID: allocID, + ID: uuid.Generate(), + Name: taskName, + User: "nobody", + Resources: testResources(allocID, taskName), } cleanup := harness.MkAllocDir(task, false) defer cleanup() + harness.MakeTaskCgroup(allocID, taskName) + err := os.Chmod(task.AllocDir, 0777) require.NoError(t, err) @@ -386,61 +403,3 @@ func TestRawExec_ExecTaskStreaming_User(t *testing.T) { require.Empty(t, stderr) require.Contains(t, stdout, "nobody") } - -func TestRawExecDriver_NoCgroup(t *testing.T) { - ci.Parallel(t) - clienttestutil.RequireLinux(t) - - expectedBytes, err := os.ReadFile("/proc/self/cgroup") - require.NoError(t, err) - expected := strings.TrimSpace(string(expectedBytes)) - - d := newEnabledRawExecDriver(t) - d.config.NoCgroups = true - harness := dtestutil.NewDriverHarness(t, d) - - task := &drivers.TaskConfig{ - AllocID: uuid.Generate(), - ID: uuid.Generate(), - Name: "nocgroup", - } - - cleanup := harness.MkAllocDir(task, true) - defer cleanup() - - tc := &TaskConfig{ - Command: "/bin/cat", - Args: []string{"/proc/self/cgroup"}, - } - require.NoError(t, task.EncodeConcreteDriverConfig(&tc)) - testtask.SetTaskConfigEnv(task) - - _, _, err = harness.StartTask(task) - require.NoError(t, err) - - // Task should terminate quickly - waitCh, err := harness.WaitTask(context.Background(), task.ID) - require.NoError(t, err) - select { - case res := <-waitCh: - require.True(t, res.Successful()) - require.Zero(t, res.ExitCode) - case <-time.After(time.Duration(testutil.TestMultiplier()*6) * time.Second): - require.Fail(t, "WaitTask timeout") - } - - // Check the log file to see it exited because of the signal - outputFile := filepath.Join(task.TaskDir().LogDir, "nocgroup.stdout.0") - testutil.WaitForResult(func() (bool, error) { - act, err := os.ReadFile(outputFile) - if err != nil { - return false, fmt.Errorf("Couldn't read expected output: %v", err) - } - - if strings.TrimSpace(string(act)) != expected { - t.Logf("Read from %v", outputFile) - return false, fmt.Errorf("Command outputted\n%v; want\n%v", string(act), expected) - } - return true, nil - }, func(err error) { require.NoError(t, err) }) -} diff --git a/drivers/shared/executor/executor.go b/drivers/shared/executor/executor.go index aa2cc73c9..cfc730b2f 100644 --- a/drivers/shared/executor/executor.go +++ b/drivers/shared/executor/executor.go @@ -21,10 +21,11 @@ import ( hclog "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/client/allocdir" + "github.com/hashicorp/nomad/client/lib/cpustats" "github.com/hashicorp/nomad/client/lib/fifo" - "github.com/hashicorp/nomad/client/lib/resources" + "github.com/hashicorp/nomad/client/lib/numalib" cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/helper/stats" + "github.com/hashicorp/nomad/drivers/shared/executor/procstats" "github.com/hashicorp/nomad/plugins/drivers" "github.com/syndtr/gocapability/capability" ) @@ -157,6 +158,21 @@ type ExecCommand struct { Capabilities []string } +// Cgroup returns the path to the cgroup the Nomad client is managing for the +// task that is about to be run. +// +// On cgroups v1 systems this returns the path to the cpuset cgroup specifically. +// +// On cgroups v2 systems this returns the patah to the task's scope. +// +// On non-Linux systems this returns the empty string and has no meaning. +func (c *ExecCommand) Cgroup() string { + if c == nil || c.Resources == nil || c.Resources.LinuxResources == nil { + return "" + } + return c.Resources.LinuxResources.CpusetCgroupPath +} + // SetWriters sets the writer for the process stdout and stderr. This should // not be used if writing to a file path such as a fifo file. SetStdoutWriter // is mainly used for unit testing purposes. @@ -239,37 +255,34 @@ func (v *ExecutorVersion) GoString() string { // supervises processes. In addition to process supervision it provides resource // and file system isolation type UniversalExecutor struct { - childCmd exec.Cmd - commandCfg *ExecCommand + childCmd exec.Cmd + command *ExecCommand exitState *ProcessState processExited chan interface{} - // containment is used to cleanup resources created by the executor - // currently only used for killing pids via freezer cgroup on linux - containment resources.Containment - - totalCpuStats *stats.CpuStats - userCpuStats *stats.CpuStats - systemCpuStats *stats.CpuStats - pidCollector *pidCollector + top cpustats.Topology + totalCpuStats *cpustats.Tracker + userCpuStats *cpustats.Tracker + systemCpuStats *cpustats.Tracker + processStats procstats.ProcessStats logger hclog.Logger } // NewExecutor returns an Executor -func NewExecutor(logger hclog.Logger, cpuTotalTicks uint64) Executor { - logger = logger.Named("executor") - stats.SetCpuTotalTicks(cpuTotalTicks) - - return &UniversalExecutor{ - logger: logger, +func NewExecutor(logger hclog.Logger) Executor { + top := numalib.Scan(numalib.PlatformScanners()) // TODO(shoenig) grpc plumbing + ue := &UniversalExecutor{ + logger: logger.Named("executor"), + top: top, processExited: make(chan interface{}), - totalCpuStats: stats.NewCpuStats(), - userCpuStats: stats.NewCpuStats(), - systemCpuStats: stats.NewCpuStats(), - pidCollector: newPidCollector(logger), + totalCpuStats: cpustats.New(top), + userCpuStats: cpustats.New(top), + systemCpuStats: cpustats.New(top), } + ue.processStats = procstats.New(top, ue) + return ue } // Version returns the api version of the executor @@ -282,7 +295,7 @@ func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) { e.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " ")) - e.commandCfg = command + e.command = command // setting the user of the process if command.User != "" { @@ -293,27 +306,26 @@ func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) } // set the task dir as the working directory for the command - e.childCmd.Dir = e.commandCfg.TaskDir + e.childCmd.Dir = e.command.TaskDir // start command in separate process group if err := e.setNewProcessGroup(); err != nil { return nil, err } - // Maybe setup containment (for now, cgroups only only on linux) - if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup { - pid := os.Getpid() - if err := e.configureResourceContainer(pid); err != nil { - e.logger.Error("failed to configure resource container", "pid", pid, "error", err) - return nil, err - } + // setup containment (i.e. cgroups on linux) + if cleanup, err := e.configureResourceContainer(command, os.Getpid()); err != nil { + e.logger.Error("failed to configure resource container", "error", err) + return nil, err + } else { + defer cleanup() } - stdout, err := e.commandCfg.Stdout() + stdout, err := e.command.Stdout() if err != nil { return nil, err } - stderr, err := e.commandCfg.Stderr() + stderr, err := e.command.Stderr() if err != nil { return nil, err } @@ -336,14 +348,13 @@ func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) // Set the commands arguments e.childCmd.Path = path e.childCmd.Args = append([]string{e.childCmd.Path}, command.Args...) - e.childCmd.Env = e.commandCfg.Env + e.childCmd.Env = e.command.Env // Start the process if err = withNetworkIsolation(e.childCmd.Start, command.NetworkIsolation); err != nil { return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err) } - go e.pidCollector.collectPids(e.processExited, e.getAllPids) go e.wait() return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil } @@ -352,7 +363,14 @@ func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) { ctx, cancel := context.WithDeadline(context.Background(), deadline) defer cancel() - return ExecScript(ctx, e.childCmd.Dir, e.commandCfg.Env, e.childCmd.SysProcAttr, e.commandCfg.NetworkIsolation, name, args) + + if cleanup, err := e.setSubCmdCgroup(&e.childCmd, e.command.Cgroup()); err != nil { + return nil, 0, err + } else { + defer cleanup() + } + + return ExecScript(ctx, e.childCmd.Dir, e.command.Env, e.childCmd.SysProcAttr, e.command.NetworkIsolation, name, args) } // ExecScript executes cmd with args and returns the output, exit code, and @@ -364,6 +382,7 @@ func ExecScript(ctx context.Context, dir string, env []string, attrs *syscall.Sy // Copy runtime environment from the main command cmd.SysProcAttr = attrs + cmd.Dir = dir cmd.Env = env @@ -431,13 +450,18 @@ func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string, return nil }, processStart: func() error { - if u := e.commandCfg.User; u != "" { + if u := e.command.User; u != "" { if err := setCmdUser(cmd, u); err != nil { return err } } - - return withNetworkIsolation(cmd.Start, e.commandCfg.NetworkIsolation) + cgroup := e.command.Cgroup() + if cleanup, err := e.setSubCmdCgroup(cmd, cgroup); err != nil { + return err + } else { + defer cleanup() + } + return withNetworkIsolation(cmd.Start, e.command.NetworkIsolation) }, processWait: func() (*os.ProcessState, error) { err := cmd.Wait() @@ -464,7 +488,7 @@ func (e *UniversalExecutor) UpdateResources(resources *drivers.Resources) error func (e *UniversalExecutor) wait() { defer close(e.processExited) - defer e.commandCfg.Close() + defer e.command.Close() pid := e.childCmd.Process.Pid err := e.childCmd.Wait() if err == nil { @@ -514,7 +538,7 @@ func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error { var merr multierror.Error // If the executor did not launch a process, return. - if e.commandCfg == nil { + if e.command == nil { return nil } @@ -559,6 +583,11 @@ func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error { proc.Kill() } + // Issue sigkill to the process group (if possible) + if err = e.killProcessTree(proc); err != nil { + e.logger.Warn("failed to shutdown process group", "pid", proc.Pid, "error", err) + } + // Wait for process to exit select { case <-e.processExited: @@ -567,26 +596,10 @@ func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error { merr.Errors = append(merr.Errors, fmt.Errorf("process did not exit after 15 seconds")) } - // prefer killing the process via platform-dependent resource containment - killByContainment := e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup - - if !killByContainment { - // there is no containment, so kill the group the old fashioned way by sending - // SIGKILL to the negative pid - if cleanupChildrenErr := e.killProcessTree(proc); cleanupChildrenErr != nil && cleanupChildrenErr.Error() != finishedErr { - merr.Errors = append(merr.Errors, - fmt.Errorf("can't kill process with pid %d: %v", e.childCmd.Process.Pid, cleanupChildrenErr)) - } - } else { - // there is containment available (e.g. cgroups) so defer to that implementation - // for killing the processes - if cleanupErr := e.containment.Cleanup(); cleanupErr != nil { - e.logger.Warn("containment cleanup failed", "error", cleanupErr) - merr.Errors = append(merr.Errors, cleanupErr) - } - } - if err = merr.ErrorOrNil(); err != nil { + // Note that proclib in the TR shutdown may also dispatch a final platform + // cleanup technique (e.g. cgroup kill), but if we get to the point where + // that matters the Task was doing something naughty. e.logger.Warn("failed to shutdown due to some error", "error", err.Error()) return err } @@ -628,16 +641,12 @@ func (e *UniversalExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx timer.Reset(interval) } - pidStats, err := e.pidCollector.pidStats() - if err != nil { - e.logger.Warn("error collecting stats", "error", err) - return - } + stats := e.processStats.StatProcesses() select { case <-ctx.Done(): return - case ch <- aggregatedResourceUsage(e.systemCpuStats, pidStats): + case ch <- procstats.Aggregate(e.systemCpuStats, stats): } } } diff --git a/drivers/shared/executor/executor_basic.go b/drivers/shared/executor/executor_basic.go index ae0c8fab9..2240e6357 100644 --- a/drivers/shared/executor/executor_basic.go +++ b/drivers/shared/executor/executor_basic.go @@ -8,21 +8,21 @@ package executor import ( "os/exec" - hclog "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/lib/resources" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-set" + "github.com/hashicorp/nomad/drivers/shared/executor/procstats" "github.com/hashicorp/nomad/plugins/drivers" ) -func NewExecutorWithIsolation(logger hclog.Logger, cpuTotalTicks uint64) Executor { +func NewExecutorWithIsolation(logger hclog.Logger) Executor { logger = logger.Named("executor") logger.Error("isolation executor is not supported on this platform, using default") - return NewExecutor(logger, cpuTotalTicks) + return NewExecutor(logger) } -func (e *UniversalExecutor) configureResourceContainer(_ int) error { return nil } - -func (e *UniversalExecutor) getAllPids() (resources.PIDs, error) { - return getAllPidsByScanning() +func (e *UniversalExecutor) configureResourceContainer(_ *ExecCommand, _ int) (func(), error) { + nothing := func() {} + return nothing, nil } func (e *UniversalExecutor) start(command *ExecCommand) error { @@ -34,3 +34,11 @@ func withNetworkIsolation(f func() error, _ *drivers.NetworkIsolationSpec) error } func setCmdUser(*exec.Cmd, string) error { return nil } + +func (e *UniversalExecutor) ListProcesses() *set.Set[int] { + return procstats.List(e.childCmd.Process.Pid) +} + +func (e *UniversalExecutor) setSubCmdCgroup(*exec.Cmd, string) (func(), error) { + return func() {}, nil +} diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 0227fc80a..a509426ac 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -21,12 +21,14 @@ import ( "github.com/armon/circbuf" "github.com/hashicorp/consul-template/signals" hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-set" "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/hashicorp/nomad/client/lib/resources" + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/client/lib/cpustats" + "github.com/hashicorp/nomad/client/lib/numalib" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/drivers/shared/capabilities" - "github.com/hashicorp/nomad/helper/stats" + "github.com/hashicorp/nomad/drivers/shared/executor/procstats" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -59,10 +61,11 @@ type LibcontainerExecutor struct { logger hclog.Logger - totalCpuStats *stats.CpuStats - userCpuStats *stats.CpuStats - systemCpuStats *stats.CpuStats - pidCollector *pidCollector + top cpustats.Topology + totalCpuStats *cpustats.Tracker + userCpuStats *cpustats.Tracker + systemCpuStats *cpustats.Tracker + processStats procstats.ProcessStats container libcontainer.Container userProc *libcontainer.Process @@ -70,18 +73,22 @@ type LibcontainerExecutor struct { exitState *ProcessState } -func NewExecutorWithIsolation(logger hclog.Logger, cpuTotalTicks uint64) Executor { - logger = logger.Named("isolated_executor") - stats.SetCpuTotalTicks(cpuTotalTicks) - - return &LibcontainerExecutor{ +func NewExecutorWithIsolation(logger hclog.Logger) Executor { + top := numalib.Scan(numalib.PlatformScanners()) // TODO(shoenig) grpc plumbing + le := &LibcontainerExecutor{ id: strings.ReplaceAll(uuid.Generate(), "-", "_"), - logger: logger, - totalCpuStats: stats.NewCpuStats(), - userCpuStats: stats.NewCpuStats(), - systemCpuStats: stats.NewCpuStats(), - pidCollector: newPidCollector(logger), + logger: logger.Named("isolated_executor"), + totalCpuStats: cpustats.New(top), + userCpuStats: cpustats.New(top), + systemCpuStats: cpustats.New(top), + top: top, } + le.processStats = procstats.New(top, le) + return le +} + +func (l *LibcontainerExecutor) ListProcesses() *set.Set[int] { + return procstats.List(l.command) } // Launch creates a new container in libcontainer and starts a new process with it @@ -109,7 +116,7 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro } // A container groups processes under the same isolation enforcement - containerCfg, err := newLibcontainerConfig(command) + containerCfg, err := l.newLibcontainerConfig(command) if err != nil { return nil, fmt.Errorf("failed to configure container(%s): %v", l.id, err) } @@ -155,9 +162,9 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro } l.userProc = process - l.totalCpuStats = stats.NewCpuStats() - l.userCpuStats = stats.NewCpuStats() - l.systemCpuStats = stats.NewCpuStats() + l.totalCpuStats = cpustats.New(l.top) + l.userCpuStats = cpustats.New(l.top) + l.systemCpuStats = cpustats.New(l.top) // Starts the task if err := container.Run(process); err != nil { @@ -174,7 +181,7 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro // start a goroutine to wait on the process to complete, so Wait calls can // be multiplexed l.userProcExited = make(chan interface{}) - go l.pidCollector.collectPids(l.userProcExited, l.getAllPids) + go l.wait() return &ProcessState{ @@ -184,18 +191,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro }, nil } -func (l *LibcontainerExecutor) getAllPids() (resources.PIDs, error) { - pids, err := l.container.Processes() - if err != nil { - return nil, err - } - m := make(resources.PIDs, 1) - for _, pid := range pids { - m[pid] = resources.NewPID(pid) - } - return m, nil -} - // Wait waits until a process has exited and returns it's exitcode and errors func (l *LibcontainerExecutor) Wait(ctx context.Context) (*ProcessState, error) { select { @@ -291,6 +286,7 @@ func (l *LibcontainerExecutor) Shutdown(signal string, grace time.Duration) erro } else { err := l.container.Signal(os.Kill, true) if err != nil { + l.logger.Info("no grace fail", "error", err) return err } } @@ -325,9 +321,12 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, defer close(ch) timer := time.NewTimer(0) - measuredMemStats := ExecutorCgroupV1MeasuredMemStats - if cgroups.IsCgroup2UnifiedMode() { - measuredMemStats = ExecutorCgroupV2MeasuredMemStats + var measurableMemStats []string + switch cgroupslib.GetMode() { + case cgroupslib.CG1: + measurableMemStats = ExecutorCgroupV1MeasuredMemStats + case cgroupslib.CG2: + measurableMemStats = ExecutorCgroupV2MeasuredMemStats } for { @@ -339,21 +338,20 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, timer.Reset(interval) } + // the moment we collect this round of stats + ts := time.Now() + + // get actual stats from the container lstats, err := l.container.Stats() if err != nil { l.logger.Warn("error collecting stats", "error", err) return } - - pidStats, err := l.pidCollector.pidStats() - if err != nil { - l.logger.Warn("error collecting stats", "error", err) - return - } - - ts := time.Now() stats := lstats.CgroupStats + // get the map of process pids in this container + pstats := l.processStats.StatProcesses() + // Memory Related Stats swap := stats.MemoryStats.SwapUsage maxUsage := stats.MemoryStats.Usage.MaxUsage @@ -369,7 +367,7 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, MaxUsage: maxUsage, KernelUsage: stats.MemoryStats.KernelUsage.Usage, KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, - Measured: measuredMemStats, + Measured: measurableMemStats, } // CPU Related Stats @@ -393,7 +391,7 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, CpuStats: cs, }, Timestamp: ts.UTC().UnixNano(), - Pids: pidStats, + Pids: pstats, } select { @@ -648,33 +646,42 @@ func configureIsolation(cfg *lconfigs.Config, command *ExecCommand) error { return nil } -func configureCgroups(cfg *lconfigs.Config, command *ExecCommand) error { - // If resources are not limited then manually create cgroups needed +func (l *LibcontainerExecutor) configureCgroups(cfg *lconfigs.Config, command *ExecCommand) error { + // note: an alloc TR hook pre-creates the cgroup(s) in both v1 and v2 + if !command.ResourceLimits { - return cgutil.ConfigureBasicCgroups(cfg) - } - - // set cgroups path - if cgutil.UseV2 { - // in v2, the cgroup must have been created by the client already, - // which breaks a lot of existing tests that run drivers without a client - if command.Resources == nil || command.Resources.LinuxResources == nil || command.Resources.LinuxResources.CpusetCgroupPath == "" { - return errors.New("cgroup path must be set") - } - parent, cgroup := cgutil.SplitPath(command.Resources.LinuxResources.CpusetCgroupPath) - cfg.Cgroups.Path = filepath.Join("/", parent, cgroup) - } else { - // in v1, the cgroup is created using /nomad, which is a bug because it - // does not respect the cgroup_parent client configuration - // (but makes testing easy) - id := uuid.Generate() - cfg.Cgroups.Path = filepath.Join("/", cgutil.DefaultCgroupV1Parent, id) - } - - if command.Resources == nil || command.Resources.NomadResources == nil { return nil } + cg := command.Cgroup() + if cg == "" { + return errors.New("cgroup must be set") + } + + // set the libcontainer hook for writing the PID to cgroup.procs file + l.configureCgroupHook(cfg, command) + + // set the libcontainer memory limits + l.configureCgroupMemory(cfg, command) + + // set cgroup v1/v2 specific attributes (cpu, path) + switch cgroupslib.GetMode() { + case cgroupslib.CG1: + return l.configureCG1(cfg, command, cg) + default: + return l.configureCG2(cfg, command, cg) + } +} + +func (*LibcontainerExecutor) configureCgroupHook(cfg *lconfigs.Config, command *ExecCommand) { + cfg.Hooks = lconfigs.Hooks{ + lconfigs.CreateRuntime: lconfigs.HookList{ + newSetCPUSetCgroupHook(command.Resources.LinuxResources.CpusetCgroupPath), + }, + } +} + +func (l *LibcontainerExecutor) configureCgroupMemory(cfg *lconfigs.Config, command *ExecCommand) { // Total amount of memory allowed to consume res := command.Resources.NomadResources memHard, memSoft := res.Memory.MemoryMaxMB, res.Memory.MemoryMB @@ -683,35 +690,41 @@ func configureCgroups(cfg *lconfigs.Config, command *ExecCommand) error { memSoft = 0 } - if memHard > 0 { - cfg.Cgroups.Resources.Memory = memHard * 1024 * 1024 - cfg.Cgroups.Resources.MemoryReservation = memSoft * 1024 * 1024 + cfg.Cgroups.Resources.Memory = memHard * 1024 * 1024 + cfg.Cgroups.Resources.MemoryReservation = memSoft * 1024 * 1024 - // Disable swap if possible, to avoid issues on the machine - cfg.Cgroups.Resources.MemorySwappiness = cgutil.MaybeDisableMemorySwappiness() - } + // Disable swap if possible, to avoid issues on the machine + cfg.Cgroups.Resources.MemorySwappiness = cgroupslib.MaybeDisableMemorySwappiness() +} - cpuShares := res.Cpu.CpuShares - if cpuShares < 2 { - return fmt.Errorf("resources.Cpu.CpuShares must be equal to or greater than 2: %v", cpuShares) - } - - // Set the relative CPU shares for this cgroup, and convert for cgroupv2 - cfg.Cgroups.Resources.CpuShares = uint64(cpuShares) - cfg.Cgroups.Resources.CpuWeight = cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares)) - - if command.Resources.LinuxResources != nil && command.Resources.LinuxResources.CpusetCgroupPath != "" { - cfg.Hooks = lconfigs.Hooks{ - lconfigs.CreateRuntime: lconfigs.HookList{ - newSetCPUSetCgroupHook(command.Resources.LinuxResources.CpusetCgroupPath), - }, - } - } +func (*LibcontainerExecutor) configureCG1(cfg *lconfigs.Config, command *ExecCommand, cg string) error { + // Set the v1 parent relative path (i.e. /nomad/) + scope := filepath.Base(cg) + cfg.Cgroups.Path = filepath.Join("/", cgroupslib.NomadCgroupParent, scope) + // set cpu.shares + res := command.Resources.NomadResources + cfg.Cgroups.CpuShares = uint64(res.Cpu.CpuShares) return nil } -func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { +func (l *LibcontainerExecutor) configureCG2(cfg *lconfigs.Config, command *ExecCommand, cg string) error { + // Set the v2 specific unified path + scope := filepath.Base(cg) + cfg.Cgroups.Path = filepath.Join("/", cgroupslib.NomadCgroupParent, scope) + + res := command.Resources.NomadResources + cpuShares := res.Cpu.CpuShares // a cgroups v1 concept + cpuWeight := cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares)) + // sets cpu.weight, which the kernel also translates to cpu.weight.nice + // despite what the libcontainer docs say, this sets priority not bandwidth + cfg.Cgroups.Resources.CpuWeight = cpuWeight + + // todo: we will also want to set cpu bandwidth (i.e. cpu_hard_limit) + return nil +} + +func (l *LibcontainerExecutor) newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { cfg := &lconfigs.Config{ Cgroups: &lconfigs.Cgroup{ Resources: &lconfigs.Resources{ @@ -735,7 +748,7 @@ func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { return nil, err } - if err := configureCgroups(cfg, command); err != nil { + if err := l.configureCgroups(cfg, command); err != nil { return nil, err } diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index a26a90a5d..8c02c1d19 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -16,7 +16,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/drivers/shared/capabilities" @@ -24,7 +24,6 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/plugins/drivers" tu "github.com/hashicorp/nomad/testutil" - "github.com/opencontainers/runc/libcontainer/cgroups" lconfigs "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" "github.com/shoenig/test" @@ -84,15 +83,12 @@ func testExecutorCommandWithChroot(t *testing.T) *testExecCmd { TaskDir: td.Dir, Resources: &drivers.Resources{ NomadResources: alloc.AllocatedResources.Tasks[task.Name], + LinuxResources: &drivers.LinuxResources{ + CpusetCgroupPath: cgroupslib.LinuxResourcesPath(alloc.ID, task.Name), + }, }, } - if cgutil.UseV2 { - cmd.Resources.LinuxResources = &drivers.LinuxResources{ - CpusetCgroupPath: filepath.Join(cgutil.CgroupRoot, "testing.scope", cgutil.CgroupScope(alloc.ID, task.Name)), - } - } - testCmd := &testExecCmd{ command: cmd, allocDir: allocDir, @@ -147,7 +143,7 @@ func TestExecutor_Isolation_PID_and_IPC_hostMode(t *testing.T) { execCmd.ModePID = "host" // disable PID namespace execCmd.ModeIPC = "host" // disable IPC namespace - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) ps, err := executor.Launch(execCmd) @@ -190,7 +186,7 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) { execCmd.ModePID = "private" execCmd.ModeIPC = "private" - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) ps, err := executor.Launch(execCmd) @@ -282,7 +278,7 @@ func TestExecutor_CgroupPaths(t *testing.T) { execCmd.ResourceLimits = true - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) ps, err := executor.Launch(execCmd) @@ -295,11 +291,11 @@ func TestExecutor_CgroupPaths(t *testing.T) { tu.WaitForResult(func() (bool, error) { output := strings.TrimSpace(testExecCmd.stdout.String()) - switch cgutil.UseV2 { - case true: + switch cgroupslib.GetMode() { + case cgroupslib.CG2: isScope := strings.HasSuffix(output, ".scope") require.True(isScope) - case false: + default: // Verify that we got some cgroups if !strings.Contains(output, ":devices:") { return false, fmt.Errorf("was expected cgroup files but found:\n%v", output) @@ -328,93 +324,6 @@ func TestExecutor_CgroupPaths(t *testing.T) { }, func(err error) { t.Error(err) }) } -// TestExecutor_CgroupPaths asserts that all cgroups created for a task -// are destroyed on shutdown -func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) { - ci.Parallel(t) - testutil.ExecCompatible(t) - - require := require.New(t) - - testExecCmd := testExecutorCommandWithChroot(t) - execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir - execCmd.Cmd = "/bin/bash" - execCmd.Args = []string{"-c", "sleep 0.2; cat /proc/self/cgroup"} - defer allocDir.Destroy() - - execCmd.ResourceLimits = true - - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) - defer executor.Shutdown("SIGKILL", 0) - - ps, err := executor.Launch(execCmd) - require.NoError(err) - require.NotZero(ps.Pid) - - state, err := executor.Wait(context.Background()) - require.NoError(err) - require.Zero(state.ExitCode) - - var cgroupsPaths string - tu.WaitForResult(func() (bool, error) { - output := strings.TrimSpace(testExecCmd.stdout.String()) - - switch cgutil.UseV2 { - case true: - isScope := strings.HasSuffix(output, ".scope") - require.True(isScope) - case false: - // Verify that we got some cgroups - if !strings.Contains(output, ":devices:") { - return false, fmt.Errorf("was expected cgroup files but found:\n%v", output) - } - lines := strings.Split(output, "\n") - for _, line := range lines { - // Every cgroup entry should be /nomad/$ALLOC_ID - if line == "" { - continue - } - - // Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker - // don't isolate it by default. And also misc. - if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") || strings.Contains(line, ":misc:") { - continue - } - - if !strings.Contains(line, ":/nomad/") { - return false, fmt.Errorf("Not a member of the alloc's cgroup: expected=...:/nomad/... -- found=%q", line) - } - } - } - cgroupsPaths = output - return true, nil - }, func(err error) { t.Error(err) }) - - // shutdown executor and test that cgroups are destroyed - executor.Shutdown("SIGKILL", 0) - - // test that the cgroup paths are not visible - tmpFile, err := os.CreateTemp("", "") - require.NoError(err) - defer os.Remove(tmpFile.Name()) - - _, err = tmpFile.WriteString(cgroupsPaths) - require.NoError(err) - tmpFile.Close() - - subsystems, err := cgroups.ParseCgroupFile(tmpFile.Name()) - require.NoError(err) - - for subsystem, cgroup := range subsystems { - if subsystem == "" || !strings.Contains(cgroup, "nomad/") { - continue - } - p, err := cgutil.GetCgroupPathHelperV1(subsystem, cgroup) - require.NoError(err) - require.Falsef(cgroups.PathExists(p), "cgroup for %s %s still exists", subsystem, cgroup) - } -} - func TestExecutor_LookupTaskBin(t *testing.T) { ci.Parallel(t) @@ -547,7 +456,7 @@ func TestExecutor_EscapeContainer(t *testing.T) { execCmd.ResourceLimits = true - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) _, err := executor.Launch(execCmd) @@ -597,7 +506,7 @@ func TestExecutor_DoesNotInheritOomScoreAdj(t *testing.T) { execCmd.Cmd = "/bin/bash" execCmd.Args = []string{"-c", "cat /proc/self/oom_score_adj"} - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) _, err = executor.Launch(execCmd) @@ -691,7 +600,7 @@ CapAmb: 0000000000000400`, execCmd.Capabilities = capsAllowed } - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) _, err := executor.Launch(execCmd) @@ -739,7 +648,7 @@ func TestExecutor_ClientCleanup(t *testing.T) { execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir defer allocDir.Destroy() - executor := NewExecutorWithIsolation(testlog.HCLogger(t), 0) + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) defer executor.Shutdown("", 0) // Need to run a command which will produce continuous output but not @@ -843,46 +752,3 @@ func TestExecutor_cmdMounts(t *testing.T) { require.EqualValues(t, expected, cmdMounts(input)) } - -// TestUniversalExecutor_NoCgroup asserts that commands are executed in the -// same cgroup as parent process -func TestUniversalExecutor_NoCgroup(t *testing.T) { - ci.Parallel(t) - testutil.ExecCompatible(t) - - expectedBytes, err := os.ReadFile("/proc/self/cgroup") - require.NoError(t, err) - - expected := strings.TrimSpace(string(expectedBytes)) - - testExecCmd := testExecutorCommand(t) - execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir - execCmd.Cmd = "/bin/cat" - execCmd.Args = []string{"/proc/self/cgroup"} - defer allocDir.Destroy() - - execCmd.BasicProcessCgroup = false - execCmd.ResourceLimits = false - - executor := NewExecutor(testlog.HCLogger(t), 0) - defer executor.Shutdown("SIGKILL", 0) - - _, err = executor.Launch(execCmd) - require.NoError(t, err) - - _, err = executor.Wait(context.Background()) - require.NoError(t, err) - - tu.WaitForResult(func() (bool, error) { - act := strings.TrimSpace(string(testExecCmd.stdout.String())) - if expected != act { - return false, fmt.Errorf("expected:\n%s actual:\n%s", expected, act) - } - return true, nil - }, func(err error) { - stderr := strings.TrimSpace(string(testExecCmd.stderr.String())) - t.Logf("stderr: %v", stderr) - require.NoError(t, err) - }) - -} diff --git a/drivers/shared/executor/executor_plugin.go b/drivers/shared/executor/executor_plugin.go index 05be90b7c..830da0e1d 100644 --- a/drivers/shared/executor/executor_plugin.go +++ b/drivers/shared/executor/executor_plugin.go @@ -6,8 +6,8 @@ package executor import ( "context" - hclog "github.com/hashicorp/go-hclog" - plugin "github.com/hashicorp/go-plugin" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/drivers/shared/executor/proto" "google.golang.org/grpc" ) @@ -15,16 +15,15 @@ import ( type ExecutorPlugin struct { // TODO: support backwards compatibility with pre 0.9 NetRPC plugin plugin.NetRPCUnsupportedPlugin - logger hclog.Logger - fsIsolation bool - cpuTotalTicks uint64 + logger hclog.Logger + fsIsolation bool } func (p *ExecutorPlugin) GRPCServer(broker *plugin.GRPCBroker, s *grpc.Server) error { if p.fsIsolation { - proto.RegisterExecutorServer(s, &grpcExecutorServer{impl: NewExecutorWithIsolation(p.logger, p.cpuTotalTicks)}) + proto.RegisterExecutorServer(s, &grpcExecutorServer{impl: NewExecutorWithIsolation(p.logger)}) } else { - proto.RegisterExecutorServer(s, &grpcExecutorServer{impl: NewExecutor(p.logger, p.cpuTotalTicks)}) + proto.RegisterExecutorServer(s, &grpcExecutorServer{impl: NewExecutor(p.logger)}) } return nil } diff --git a/drivers/shared/executor/executor_test.go b/drivers/shared/executor/executor_test.go index a02261fad..368915690 100644 --- a/drivers/shared/executor/executor_test.go +++ b/drivers/shared/executor/executor_test.go @@ -20,7 +20,7 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/testlog" @@ -29,6 +29,7 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" tu "github.com/hashicorp/nomad/testutil" ps "github.com/mitchellh/go-ps" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -36,7 +37,7 @@ import ( var executorFactories = map[string]executorFactory{} type executorFactory struct { - new func(hclog.Logger, uint64) Executor + new func(hclog.Logger) Executor configureExecCmd func(*testing.T, *ExecCommand) } @@ -90,13 +91,20 @@ func testExecutorCommand(t *testing.T) *testExecCmd { LinuxResources: &drivers.LinuxResources{ CPUShares: 500, MemoryLimitBytes: 256 * 1024 * 1024, + CpusetCgroupPath: cgroupslib.LinuxResourcesPath(alloc.ID, task.Name), }, }, } - if cgutil.UseV2 { - cmd.Resources.LinuxResources.CpusetCgroupPath = filepath.Join(cgutil.CgroupRoot, "testing.scope", cgutil.CgroupScope(alloc.ID, task.Name)) - } + // create cgroup for our task (because we aren't using task runners) + f := cgroupslib.Factory(alloc.ID, task.Name) + must.NoError(t, f.Setup()) + + // cleanup cgroup once test is done (because no task runners) + t.Cleanup(func() { + _ = f.Kill() + _ = f.Teardown() + }) testCmd := &testExecCmd{ command: cmd, @@ -150,7 +158,7 @@ func TestExecutor_Start_Invalid(t *testing.T) { execCmd.Args = []string{"1"} factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) _, err := executor.Launch(execCmd) @@ -170,7 +178,7 @@ func TestExecutor_Start_Wait_Failure_Code(t *testing.T) { execCmd.Args = []string{"-c", "sleep 1; /bin/date fail"} factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) ps, err := executor.Launch(execCmd) @@ -195,7 +203,7 @@ func TestExecutor_Start_Wait(t *testing.T) { factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) ps, err := executor.Launch(execCmd) @@ -232,7 +240,7 @@ func TestExecutor_Start_Wait_Children(t *testing.T) { factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("SIGKILL", 0) ps, err := executor.Launch(execCmd) @@ -273,7 +281,7 @@ func TestExecutor_WaitExitSignal(t *testing.T) { factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) pState, err := executor.Launch(execCmd) @@ -331,7 +339,7 @@ func TestExecutor_Start_Kill(t *testing.T) { factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) ps, err := executor.Launch(execCmd) @@ -536,7 +544,7 @@ func TestExecutor_Start_Kill_Immediately_NoGrace(t *testing.T) { execCmd.Args = []string{"100"} factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) ps, err := executor.Launch(execCmd) @@ -572,7 +580,7 @@ func TestExecutor_Start_Kill_Immediately_WithGrace(t *testing.T) { execCmd.Args = []string{"100"} factory.configureExecCmd(t, execCmd) defer allocDir.Destroy() - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) ps, err := executor.Launch(execCmd) @@ -618,7 +626,7 @@ func TestExecutor_Start_NonExecutableBinaries(t *testing.T) { execCmd.Cmd = nonExecutablePath factory.configureExecCmd(t, execCmd) - executor := factory.new(testlog.HCLogger(t), 0) + executor := factory.new(testlog.HCLogger(t)) defer executor.Shutdown("", 0) // need to configure path in chroot with that file if using isolation executor diff --git a/drivers/shared/executor/executor_universal_linux.go b/drivers/shared/executor/executor_universal_linux.go index ce4267063..9fc72bbed 100644 --- a/drivers/shared/executor/executor_universal_linux.go +++ b/drivers/shared/executor/executor_universal_linux.go @@ -6,19 +6,17 @@ package executor import ( "fmt" "os/exec" - "path/filepath" "strconv" - "strings" "syscall" "github.com/containernetworking/plugins/pkg/ns" - "github.com/hashicorp/nomad/client/lib/cgutil" - "github.com/hashicorp/nomad/client/lib/resources" - "github.com/hashicorp/nomad/client/taskenv" + "github.com/hashicorp/go-set" + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/drivers/shared/executor/procstats" "github.com/hashicorp/nomad/helper/users" "github.com/hashicorp/nomad/plugins/drivers" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/specconv" + "github.com/opencontainers/runc/libcontainer/cgroups" + "golang.org/x/sys/unix" ) // setCmdUser takes a user id as a string and looks up the user, and sets the command @@ -69,74 +67,177 @@ func setCmdUser(cmd *exec.Cmd, userid string) error { return nil } -// configureResourceContainer configured the cgroups to be used to track pids -// created by the executor -func (e *UniversalExecutor) configureResourceContainer(pid int) error { - cfg := &configs.Config{ - Cgroups: &configs.Cgroup{ - Resources: &configs.Resources{}, - }, +// setSubCmdCgroup sets the cgroup for non-Task child processes of the +// executor.Executor (since in cg2 it lives outside the task's cgroup) +func (e *UniversalExecutor) setSubCmdCgroup(cmd *exec.Cmd, cgroup string) (func(), error) { + if cgroup == "" { + panic("cgroup must be set") } - // note: this was always here, but not used until cgroups v2 support - for _, device := range specconv.AllowedDevices { - cfg.Cgroups.Resources.Devices = append(cfg.Cgroups.Resources.Devices, &device.Rule) + // make sure attrs struct has been set + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = new(syscall.SysProcAttr) } - lookup := func(env []string, name string) (result string) { - for _, s := range env { - if strings.HasPrefix(s, name+"=") { - result = strings.TrimLeft(s, name+"=") - return - } + switch cgroupslib.GetMode() { + case cgroupslib.CG2: + fd, cleanup, err := e.statCG(cgroup) + if err != nil { + return nil, err } - return - } - - if cgutil.UseV2 { - // in v2 we have the definitive cgroup; create and enter it - - // use the task environment variables for determining the cgroup path - - // not ideal but plumbing the values directly requires grpc protobuf changes - parent := lookup(e.commandCfg.Env, taskenv.CgroupParent) - allocID := lookup(e.commandCfg.Env, taskenv.AllocID) - task := lookup(e.commandCfg.Env, taskenv.TaskName) - if parent == "" || allocID == "" || task == "" { - return fmt.Errorf( - "environment variables %s must be set", - strings.Join([]string{taskenv.CgroupParent, taskenv.AllocID, taskenv.TaskName}, ","), - ) - } - scope := cgutil.CgroupScope(allocID, task) - path := filepath.Join("/", cgutil.GetCgroupParent(parent), scope) - cfg.Cgroups.Path = path - e.containment = resources.Contain(e.logger, cfg.Cgroups) - return e.containment.Apply(pid) - - } else { - // in v1 create a freezer cgroup for use by containment - - if err := cgutil.ConfigureBasicCgroups(cfg); err != nil { - // Log this error to help diagnose cases where nomad is run with too few - // permissions, but don't return an error. There is no separate check for - // cgroup creation permissions, so this may be the happy path. - e.logger.Warn("failed to create cgroup", - "docs", "https://www.nomadproject.io/docs/drivers/raw_exec.html#no_cgroups", - "error", err) - return nil - } - path := cfg.Cgroups.Path - e.logger.Trace("cgroup created, now need to apply", "path", path) - e.containment = resources.Contain(e.logger, cfg.Cgroups) - return e.containment.Apply(pid) + cmd.SysProcAttr.UseCgroupFD = true + cmd.SysProcAttr.CgroupFD = fd + return cleanup, nil + default: + return func() {}, nil } } -func (e *UniversalExecutor) getAllPids() (resources.PIDs, error) { - if e.containment == nil { - return getAllPidsByScanning() +func (e *UniversalExecutor) ListProcesses() *set.Set[procstats.ProcessID] { + return procstats.List(e.command) +} + +func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) { + fd, err := unix.Open(cgroup, unix.O_PATH, 0) + cleanup := func() { + _ = unix.Close(fd) } - return e.containment.GetPIDs(), nil + return fd, cleanup, err +} + +// configureResourceContainer on Linux configures the cgroups to be used to track +// pids created by the executor +func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) { + + // get our cgroup reference (cpuset in v1) + cgroup := command.Cgroup() + + // cgCleanup will be called after the task has been launched + // v1: remove the executor process from the task's cgroups + // v2: let go of the file descriptor of the task's cgroup + var cgCleanup func() + + // manually configure cgroup for cpu / memory constraints + switch cgroupslib.GetMode() { + case cgroupslib.CG1: + e.configureCG1(cgroup, command) + cgCleanup = e.enterCG1(cgroup) + default: + e.configureCG2(cgroup, command) + // configure child process to spawn in the cgroup + // get file descriptor of the cgroup made for this task + fd, cleanup, err := e.statCG(cgroup) + if err != nil { + return nil, err + } + e.childCmd.SysProcAttr.UseCgroupFD = true + e.childCmd.SysProcAttr.CgroupFD = fd + cgCleanup = cleanup + } + + e.logger.Info("configured cgroup for executor", "pid", pid) + + return cgCleanup, nil +} + +// enterCG1 will write the executor PID (i.e. itself) into the cgroups we +// created for the task - so that the task and its children will spawn in +// those cgroups. The cleanup function moves the executor out of the task's +// cgroups and into the nomad/ parent cgroups. +func (e *UniversalExecutor) enterCG1(cgroup string) func() { + pid := strconv.Itoa(unix.Getpid()) + + // write pid to all the groups + ifaces := []string{"freezer", "cpu", "memory"} // todo: cpuset + for _, iface := range ifaces { + ed := cgroupslib.OpenFromCpusetCG1(cgroup, iface) + err := ed.Write("cgroup.procs", pid) + if err != nil { + e.logger.Warn("failed to write cgroup", "interface", iface, "error", err) + } + } + + // cleanup func that moves executor back up to nomad cgroup + return func() { + for _, iface := range ifaces { + err := cgroupslib.WriteNomadCG1(iface, "cgroup.procs", pid) + if err != nil { + e.logger.Warn("failed to move executor cgroup", "interface", iface, "error", err) + } + } + } +} + +func (e *UniversalExecutor) configureCG1(cgroup string, command *ExecCommand) { + memHard, memSoft := e.computeMemory(command) + ed := cgroupslib.OpenFromCpusetCG1(cgroup, "memory") + _ = ed.Write("memory.limit_in_bytes", strconv.FormatInt(memHard, 10)) + if memSoft > 0 { + ed = cgroupslib.OpenFromCpusetCG1(cgroup, "memory") + _ = ed.Write("memory.soft_limit_in_bytes", strconv.FormatInt(memSoft, 10)) + } + + // set memory swappiness + swappiness := cgroupslib.MaybeDisableMemorySwappiness() + if swappiness != nil { + ed := cgroupslib.OpenFromCpusetCG1(cgroup, "memory") + value := int64(*swappiness) + _ = ed.Write("memory.swappiness", strconv.FormatInt(value, 10)) + } + + // write cpu shares file + cpuShares := strconv.FormatInt(command.Resources.LinuxResources.CPUShares, 10) + ed = cgroupslib.OpenFromCpusetCG1(cgroup, "cpu") + _ = ed.Write("cpu.shares", cpuShares) + + // TODO(shoenig) manage cpuset + e.logger.Info("TODO CORES", "cpuset", command.Resources.LinuxResources.CpusetCpus) +} + +func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) { + // write memory cgroup files + memHard, memSoft := e.computeMemory(command) + ed := cgroupslib.OpenPath(cgroup) + _ = ed.Write("memory.max", strconv.FormatInt(memHard, 10)) + if memSoft > 0 { + ed = cgroupslib.OpenPath(cgroup) + _ = ed.Write("memory.low", strconv.FormatInt(memSoft, 10)) + } + + // set memory swappiness + swappiness := cgroupslib.MaybeDisableMemorySwappiness() + if swappiness != nil { + ed := cgroupslib.OpenPath(cgroup) + value := int64(*swappiness) + _ = ed.Write("memory.swappiness", strconv.FormatInt(value, 10)) + } + + // write cpu cgroup files + cpuWeight := e.computeCPU(command) + ed = cgroupslib.OpenPath(cgroup) + _ = ed.Write("cpu.weight", strconv.FormatUint(cpuWeight, 10)) + + // TODO(shoenig) manage cpuset + e.logger.Info("TODO CORES", "cpuset", command.Resources.LinuxResources.CpusetCpus) +} + +func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 { + cpuShares := command.Resources.LinuxResources.CPUShares + cpuWeight := cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares)) + return cpuWeight +} + +// computeMemory returns the hard and soft memory limits for the task +func (*UniversalExecutor) computeMemory(command *ExecCommand) (int64, int64) { + mem := command.Resources.NomadResources.Memory + memHard, memSoft := mem.MemoryMaxMB, mem.MemoryMB + if memHard <= 0 { + memHard = mem.MemoryMB + memSoft = 0 + } + memHardBytes := memHard * 1024 * 1024 + memSoftBytes := memSoft * 1024 * 1024 + return memHardBytes, memSoftBytes } // withNetworkIsolation calls the passed function the network namespace `spec` diff --git a/drivers/shared/executor/executor_unix.go b/drivers/shared/executor/executor_unix.go index 42490f221..0164d8c51 100644 --- a/drivers/shared/executor/executor_unix.go +++ b/drivers/shared/executor/executor_unix.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 -//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris +//go:build unix package executor diff --git a/drivers/shared/executor/pid_collector.go b/drivers/shared/executor/pid_collector.go deleted file mode 100644 index c3184a83d..000000000 --- a/drivers/shared/executor/pid_collector.go +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package executor - -import ( - "os" - "strconv" - "sync" - "time" - - hclog "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/lib/resources" - "github.com/hashicorp/nomad/helper/stats" - "github.com/hashicorp/nomad/plugins/drivers" - ps "github.com/mitchellh/go-ps" - "github.com/shirou/gopsutil/v3/process" -) - -var ( - // pidScanInterval is the interval at which the executor scans the process - // tree for finding out the pids that the executor and it's child processes - // have forked - pidScanInterval = 5 * time.Second -) - -// pidCollector is a utility that can be embedded in an executor to collect pid -// stats -type pidCollector struct { - pids map[int]*resources.PID - pidLock sync.RWMutex - logger hclog.Logger -} - -// allPidGetter is a func which is used by the pid collector to gather -// stats on -type allPidGetter func() (resources.PIDs, error) - -func newPidCollector(logger hclog.Logger) *pidCollector { - return &pidCollector{ - pids: make(map[int]*resources.PID), - logger: logger.Named("pid_collector"), - } -} - -// collectPids collects the pids of the child processes that the executor is -// running every 5 seconds -func (c *pidCollector) collectPids(stopCh chan interface{}, pidGetter allPidGetter) { - // Fire the timer right away when the executor starts from there on the pids - // are collected every scan interval - timer := time.NewTimer(0) - defer timer.Stop() - for { - select { - case <-timer.C: - pids, err := pidGetter() - if err != nil { - c.logger.Debug("error collecting pids", "error", err) - } - c.pidLock.Lock() - - // Adding pids which are not being tracked - for pid, np := range pids { - if _, ok := c.pids[pid]; !ok { - c.pids[pid] = np - } - } - // Removing pids which are no longer present - for pid := range c.pids { - if _, ok := pids[pid]; !ok { - delete(c.pids, pid) - } - } - c.pidLock.Unlock() - timer.Reset(pidScanInterval) - case <-stopCh: - return - } - } -} - -// scanPids scans all the pids on the machine running the current executor and -// returns the child processes of the executor. -func scanPids(parentPid int, allPids []ps.Process) (map[int]*resources.PID, error) { - processFamily := make(map[int]struct{}) - processFamily[parentPid] = struct{}{} - - // A mapping of pids to their parent pids. It is used to build the process - // tree of the executing task - pidsRemaining := make(map[int]int, len(allPids)) - for _, pid := range allPids { - pidsRemaining[pid.Pid()] = pid.PPid() - } - - for { - // flag to indicate if we have found a match - foundNewPid := false - - for pid, ppid := range pidsRemaining { - _, childPid := processFamily[ppid] - - // checking if the pid is a child of any of the parents - if childPid { - processFamily[pid] = struct{}{} - delete(pidsRemaining, pid) - foundNewPid = true - } - } - - // not scanning anymore if we couldn't find a single match - if !foundNewPid { - break - } - } - - res := make(map[int]*resources.PID) - for pid := range processFamily { - res[pid] = &resources.PID{ - PID: pid, - StatsTotalCPU: stats.NewCpuStats(), - StatsUserCPU: stats.NewCpuStats(), - StatsSysCPU: stats.NewCpuStats(), - } - } - return res, nil -} - -// pidStats returns the resource usage stats per pid -func (c *pidCollector) pidStats() (map[string]*drivers.ResourceUsage, error) { - stats := make(map[string]*drivers.ResourceUsage) - c.pidLock.RLock() - pids := make(map[int]*resources.PID, len(c.pids)) - for k, v := range c.pids { - pids[k] = v - } - c.pidLock.RUnlock() - for pid, np := range pids { - p, err := process.NewProcess(int32(pid)) - if err != nil { - c.logger.Trace("unable to create new process", "pid", pid, "error", err) - continue - } - ms := &drivers.MemoryStats{} - if memInfo, err := p.MemoryInfo(); err == nil { - ms.RSS = memInfo.RSS - ms.Swap = memInfo.Swap - ms.Measured = ExecutorBasicMeasuredMemStats - } - - cs := &drivers.CpuStats{} - if cpuStats, err := p.Times(); err == nil { - cs.SystemMode = np.StatsSysCPU.Percent(cpuStats.System * float64(time.Second)) - cs.UserMode = np.StatsUserCPU.Percent(cpuStats.User * float64(time.Second)) - cs.Measured = ExecutorBasicMeasuredCpuStats - - // calculate cpu usage percent - cs.Percent = np.StatsTotalCPU.Percent(cpuStats.Total() * float64(time.Second)) - } - stats[strconv.Itoa(pid)] = &drivers.ResourceUsage{MemoryStats: ms, CpuStats: cs} - } - - return stats, nil -} - -// aggregatedResourceUsage aggregates the resource usage of all the pids and -// returns a TaskResourceUsage data point -func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string]*drivers.ResourceUsage) *drivers.TaskResourceUsage { - ts := time.Now().UTC().UnixNano() - var ( - systemModeCPU, userModeCPU, percent float64 - totalRSS, totalSwap uint64 - ) - - for _, pidStat := range pidStats { - systemModeCPU += pidStat.CpuStats.SystemMode - userModeCPU += pidStat.CpuStats.UserMode - percent += pidStat.CpuStats.Percent - - totalRSS += pidStat.MemoryStats.RSS - totalSwap += pidStat.MemoryStats.Swap - } - - totalCPU := &drivers.CpuStats{ - SystemMode: systemModeCPU, - UserMode: userModeCPU, - Percent: percent, - Measured: ExecutorBasicMeasuredCpuStats, - TotalTicks: systemCpuStats.TicksConsumed(percent), - } - - totalMemory := &drivers.MemoryStats{ - RSS: totalRSS, - Swap: totalSwap, - Measured: ExecutorBasicMeasuredMemStats, - } - - resourceUsage := drivers.ResourceUsage{ - MemoryStats: totalMemory, - CpuStats: totalCPU, - } - return &drivers.TaskResourceUsage{ - ResourceUsage: &resourceUsage, - Timestamp: ts, - Pids: pidStats, - } -} - -func getAllPidsByScanning() (resources.PIDs, error) { - allProcesses, err := ps.Processes() - if err != nil { - return nil, err - } - return scanPids(os.Getpid(), allProcesses) -} diff --git a/drivers/shared/executor/pid_collector_test.go b/drivers/shared/executor/pid_collector_test.go deleted file mode 100644 index 9040c75b2..000000000 --- a/drivers/shared/executor/pid_collector_test.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package executor - -import ( - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/mitchellh/go-ps" -) - -func TestScanPids(t *testing.T) { - ci.Parallel(t) - p1 := NewFakeProcess(2, 5) - p2 := NewFakeProcess(10, 2) - p3 := NewFakeProcess(15, 6) - p4 := NewFakeProcess(3, 10) - p5 := NewFakeProcess(20, 18) - - nomadPids, err := scanPids(5, []ps.Process{p1, p2, p3, p4, p5}) - if err != nil { - t.Fatalf("error: %v", err) - } - if len(nomadPids) != 4 { - t.Fatalf("expected: 4, actual: %v", len(nomadPids)) - } -} - -type FakeProcess struct { - pid int - ppid int -} - -func (f FakeProcess) Pid() int { - return f.pid -} - -func (f FakeProcess) PPid() int { - return f.ppid -} - -func (f FakeProcess) Executable() string { - return "fake" -} - -func NewFakeProcess(pid int, ppid int) ps.Process { - return FakeProcess{pid: pid, ppid: ppid} -} diff --git a/drivers/shared/executor/plugins.go b/drivers/shared/executor/plugins.go index 97aa65187..c35479346 100644 --- a/drivers/shared/executor/plugins.go +++ b/drivers/shared/executor/plugins.go @@ -22,18 +22,13 @@ type ExecutorConfig struct { // FSIsolation if set will use an executor implementation that support // filesystem isolation FSIsolation bool - - // cpuTotalTicks is the total CPU compute. It should be given as Cores * MHz - // (2 Cores * 2 Ghz = 4000) - CpuTotalTicks uint64 } -func GetPluginMap(logger hclog.Logger, fsIsolation bool, cpuTotalTicks uint64) map[string]plugin.Plugin { +func GetPluginMap(logger hclog.Logger, fsIsolation bool) map[string]plugin.Plugin { return map[string]plugin.Plugin{ "executor": &ExecutorPlugin{ - logger: logger, - fsIsolation: fsIsolation, - cpuTotalTicks: cpuTotalTicks, + logger: logger, + fsIsolation: fsIsolation, }, } } diff --git a/drivers/shared/executor/procstats/getstats.go b/drivers/shared/executor/procstats/getstats.go new file mode 100644 index 000000000..ad9054070 --- /dev/null +++ b/drivers/shared/executor/procstats/getstats.go @@ -0,0 +1,135 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package procstats + +import ( + "context" + "strconv" + "sync" + "time" + + "github.com/hashicorp/nomad/client/lib/cpustats" + "github.com/hashicorp/nomad/plugins/drivers" + "github.com/shirou/gopsutil/v3/process" + "oss.indeed.com/go/libtime" +) + +func New(top cpustats.Topology, pl ProcessList) ProcessStats { + const cacheTTL = 5 * time.Second + return &linuxProcStats{ + cacheTTL: cacheTTL, + procList: pl, + top: top, + clock: libtime.SystemClock(), + latest: make(map[ProcessID]*stats), + cache: make(ProcUsages), + } +} + +type stats struct { + TotalCPU *cpustats.Tracker + UserCPU *cpustats.Tracker + SystemCPU *cpustats.Tracker +} + +type linuxProcStats struct { + cacheTTL time.Duration + procList ProcessList + clock libtime.Clock + top cpustats.Topology + + lock sync.Mutex + latest map[ProcessID]*stats + cache ProcUsages + at time.Time +} + +func (lps *linuxProcStats) expired() bool { + age := lps.clock.Since(lps.at) + return age > lps.cacheTTL +} + +// scanPIDs will update lps.latest with the set of detected live pids that make +// up the task process tree / are in the tasks cgroup +func (lps *linuxProcStats) scanPIDs() { + currentPIDs := lps.procList.ListProcesses() + + // remove old pids no longer present + for pid := range lps.latest { + if !currentPIDs.Contains(pid) { + delete(lps.latest, pid) + } + } + + // insert trackers for new pids not yet present + for _, pid := range currentPIDs.Slice() { + if _, exists := lps.latest[pid]; !exists { + lps.latest[pid] = &stats{ + TotalCPU: cpustats.New(lps.top), + UserCPU: cpustats.New(lps.top), + SystemCPU: cpustats.New(lps.top), + } + } + } +} + +func (lps *linuxProcStats) cached() ProcUsages { + return lps.cache +} + +func (lps *linuxProcStats) StatProcesses() ProcUsages { + lps.lock.Lock() + defer lps.lock.Unlock() + + if !lps.expired() { + return lps.cache + } + + // the stats are expired, scan for new information + lps.scanPIDs() + + // create the response resource usage map + var result = make(ProcUsages) + for pid, s := range lps.latest { + p, err := process.NewProcess(int32(pid)) + if err != nil { + continue + } + + getMemory := func() *drivers.MemoryStats { + ms := new(drivers.MemoryStats) + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + if memInfo, err := p.MemoryInfoWithContext(ctx); err == nil { + ms.RSS = memInfo.RSS + ms.Swap = memInfo.Swap + ms.Measured = ExecutorBasicMeasuredMemStats + } + return ms + } + + getCPU := func() *drivers.CpuStats { + cs := new(drivers.CpuStats) + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + if cpuInfo, err := p.TimesWithContext(ctx); err == nil { + const second = float64(time.Second) + cs.SystemMode = s.SystemCPU.Percent(cpuInfo.System * second) + cs.UserMode = s.UserCPU.Percent(cpuInfo.User * second) + cs.Percent = s.TotalCPU.Percent(cpuInfo.Total() * second) + cs.Measured = ExecutorBasicMeasuredCpuStats + } + return cs + } + + spid := strconv.Itoa(pid) + result[spid] = &drivers.ResourceUsage{ + MemoryStats: getMemory(), + CpuStats: getCPU(), + } + } + + lps.cache = result + return result +} diff --git a/drivers/shared/executor/procstats/list_default.go b/drivers/shared/executor/procstats/list_default.go new file mode 100644 index 000000000..35655fb9b --- /dev/null +++ b/drivers/shared/executor/procstats/list_default.go @@ -0,0 +1,51 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !linux + +package procstats + +import ( + "context" + "time" + + "github.com/hashicorp/go-set" + "github.com/hashicorp/nomad/lib/lang" + "github.com/shirou/gopsutil/v3/process" +) + +// List the process tree starting at the given executorPID +func List(executorPID int) *set.Set[ProcessID] { + result := set.New[ProcessID](10) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + stack := lang.NewStack[int32]() + stack.Push(int32(executorPID)) + + for { + if stack.Empty() { + break + } + + nextPPID := stack.Pop() + result.Insert(ProcessID(nextPPID)) + + p, err := process.NewProcessWithContext(ctx, int32(nextPPID)) + if err != nil { + continue + } + + children, err := p.ChildrenWithContext(ctx) + if err != nil { + continue + } + + for _, child := range children { + stack.Push(child.Pid) + } + } + + return result +} diff --git a/drivers/shared/executor/procstats/list_linux.go b/drivers/shared/executor/procstats/list_linux.go new file mode 100644 index 000000000..5570ecd1d --- /dev/null +++ b/drivers/shared/executor/procstats/list_linux.go @@ -0,0 +1,32 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package procstats + +import ( + "github.com/hashicorp/go-set" + "github.com/hashicorp/nomad/client/lib/cgroupslib" +) + +type Cgrouper interface { + Cgroup() string +} + +func List(cg Cgrouper) *set.Set[ProcessID] { + cgroup := cg.Cgroup() + var ed cgroupslib.Interface + switch cgroupslib.GetMode() { + case cgroupslib.CG1: + ed = cgroupslib.OpenFromCpusetCG1(cgroup, "freezer") + default: + ed = cgroupslib.OpenPath(cgroup) + } + + s, err := ed.PIDs() + if err != nil { + return set.New[ProcessID](0) + } + return s +} diff --git a/drivers/shared/executor/procstats/procstats.go b/drivers/shared/executor/procstats/procstats.go new file mode 100644 index 000000000..6e1c8d3c1 --- /dev/null +++ b/drivers/shared/executor/procstats/procstats.go @@ -0,0 +1,82 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package procstats + +import ( + "time" + + "github.com/hashicorp/go-set" + "github.com/hashicorp/nomad/client/lib/cpustats" + "github.com/hashicorp/nomad/plugins/drivers" +) + +var ( + // The statistics the basic executor exposes + ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} + ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} +) + +// ProcessID is an alias for int; it just helps us identify where PIDs from +// the kernel are being used. +type ProcessID = int + +// ProcUsages is a map from PID to the resources that process is consuming. +// +// The pid type is a string because that's how Nomad wants it. +type ProcUsages map[string]*drivers.ResourceUsage + +// A ProcessStats is anything (i.e. a task driver) that implements StatProcesses +// for gathering CPU and memory process stats for all processes associated with +// a task. +type ProcessStats interface { + StatProcesses() ProcUsages +} + +// A ProcessList is anything (i.e. a task driver) that implements ListProcesses +// for gathering the list of process IDs associated with a task. +type ProcessList interface { + ListProcesses() *set.Set[ProcessID] +} + +// Aggregate combines a given ProcUsages with the Tracker for the Client. +func Aggregate(systemStats *cpustats.Tracker, procStats ProcUsages) *drivers.TaskResourceUsage { + ts := time.Now().UTC().UnixNano() + var ( + systemModeCPU, userModeCPU, percent float64 + totalRSS, totalSwap uint64 + ) + + for _, pidStat := range procStats { + systemModeCPU += pidStat.CpuStats.SystemMode + userModeCPU += pidStat.CpuStats.UserMode + percent += pidStat.CpuStats.Percent + + totalRSS += pidStat.MemoryStats.RSS + totalSwap += pidStat.MemoryStats.Swap + } + + totalCPU := &drivers.CpuStats{ + SystemMode: systemModeCPU, + UserMode: userModeCPU, + Percent: percent, + Measured: ExecutorBasicMeasuredCpuStats, + TotalTicks: systemStats.TicksConsumed(percent), + } + + totalMemory := &drivers.MemoryStats{ + RSS: totalRSS, + Swap: totalSwap, + Measured: ExecutorBasicMeasuredMemStats, + } + + resourceUsage := drivers.ResourceUsage{ + MemoryStats: totalMemory, + CpuStats: totalCPU, + } + return &drivers.TaskResourceUsage{ + ResourceUsage: &resourceUsage, + Timestamp: ts, + Pids: procStats, + } +} diff --git a/drivers/shared/executor/utils.go b/drivers/shared/executor/utils.go index ff1c84c52..fe85bf67d 100644 --- a/drivers/shared/executor/utils.go +++ b/drivers/shared/executor/utils.go @@ -13,7 +13,6 @@ import ( hclog "github.com/hashicorp/go-hclog" plugin "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/drivers/shared/executor/proto" - "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/plugins/base" ) @@ -29,10 +28,12 @@ const ( // CreateExecutor launches an executor plugin and returns an instance of the // Executor interface -func CreateExecutor(logger hclog.Logger, driverConfig *base.ClientDriverConfig, - executorConfig *ExecutorConfig) (Executor, *plugin.Client, error) { +func CreateExecutor( + logger hclog.Logger, + driverConfig *base.ClientDriverConfig, + executorConfig *ExecutorConfig, +) (Executor, *plugin.Client, error) { - executorConfig.CpuTotalTicks = stats.CpuTotalTicks() c, err := json.Marshal(executorConfig) if err != nil { return nil, nil, fmt.Errorf("unable to create executor config: %v", err) @@ -43,9 +44,8 @@ func CreateExecutor(logger hclog.Logger, driverConfig *base.ClientDriverConfig, } p := &ExecutorPlugin{ - logger: logger, - fsIsolation: executorConfig.FSIsolation, - cpuTotalTicks: executorConfig.CpuTotalTicks, + logger: logger, + fsIsolation: executorConfig.FSIsolation, } config := &plugin.ClientConfig{ @@ -78,7 +78,7 @@ func ReattachToExecutor(reattachConfig *plugin.ReattachConfig, logger hclog.Logg config := &plugin.ClientConfig{ HandshakeConfig: base.Handshake, Reattach: reattachConfig, - Plugins: GetPluginMap(logger, false, stats.CpuTotalTicks()), + Plugins: GetPluginMap(logger, false), AllowedProtocols: []plugin.Protocol{plugin.ProtocolGRPC}, Logger: logger.Named("executor"), } diff --git a/drivers/shared/executor/z_executor_cmd.go b/drivers/shared/executor/z_executor_cmd.go index 8125e4fab..cfd1385cd 100644 --- a/drivers/shared/executor/z_executor_cmd.go +++ b/drivers/shared/executor/z_executor_cmd.go @@ -9,7 +9,6 @@ import ( hclog "github.com/hashicorp/go-hclog" plugin "github.com/hashicorp/go-plugin" - "github.com/hashicorp/nomad/plugins/base" ) @@ -49,7 +48,6 @@ func init() { Plugins: GetPluginMap( logger, executorConfig.FSIsolation, - executorConfig.CpuTotalTicks, ), GRPCServer: plugin.DefaultGRPCServer, Logger: logger, diff --git a/go.mod b/go.mod index 2efc10a1e..f70f288f0 100644 --- a/go.mod +++ b/go.mod @@ -85,6 +85,7 @@ require ( github.com/hashicorp/vault/sdk v0.9.0 github.com/hashicorp/yamux v0.1.1 github.com/hpcloud/tail v1.0.1-0.20170814160653-37f427138745 + github.com/klauspost/cpuid/v2 v2.2.5 github.com/kr/pretty v0.3.1 github.com/kr/text v0.2.0 github.com/mattn/go-colorable v0.1.13 diff --git a/go.sum b/go.sum index c7b9e1253..9ddfe7efd 100644 --- a/go.sum +++ b/go.sum @@ -999,6 +999,8 @@ github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYs github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.15.11 h1:Lcadnb3RKGin4FYM/orgq0qde+nc15E5Cbqg4B9Sx9c= github.com/klauspost/compress v1.15.11/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -1758,6 +1760,7 @@ golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/helper/stats/runtime.go b/helper/goruntime/runtime.go similarity index 85% rename from helper/stats/runtime.go rename to helper/goruntime/runtime.go index 704da63f2..d00d17527 100644 --- a/helper/stats/runtime.go +++ b/helper/goruntime/runtime.go @@ -1,7 +1,8 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 -package stats +// Package goruntime contains helper functions related to the Go runtime. +package goruntime import ( "runtime" diff --git a/helper/stats/cpu.go b/helper/stats/cpu.go deleted file mode 100644 index f038469ca..000000000 --- a/helper/stats/cpu.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package stats - -import ( - "runtime" - "time" -) - -var ( - cpuTotalTicks uint64 -) - -// CpuStats calculates cpu usage percentage -type CpuStats struct { - prevCpuTime float64 - prevTime time.Time - - totalCpus int -} - -// NewCpuStats returns a cpu stats calculator -func NewCpuStats() *CpuStats { - numCpus := runtime.NumCPU() - cpuStats := &CpuStats{ - totalCpus: numCpus, - } - return cpuStats -} - -// Percent calculates the cpu usage percentage based on the current cpu usage -// and the previous cpu usage where usage is given as time in nanoseconds spend -// in the cpu -func (c *CpuStats) Percent(cpuTime float64) float64 { - now := time.Now() - - if c.prevCpuTime == 0.0 { - // invoked first time - c.prevCpuTime = cpuTime - c.prevTime = now - return 0.0 - } - - timeDelta := now.Sub(c.prevTime).Nanoseconds() - ret := c.calculatePercent(c.prevCpuTime, cpuTime, timeDelta) - c.prevCpuTime = cpuTime - c.prevTime = now - return ret -} - -// TicksConsumed calculates the total ticks consumes by the process across all -// cpu cores -func (c *CpuStats) TicksConsumed(percent float64) float64 { - return (percent / 100) * float64(CpuTotalTicks()) / float64(c.totalCpus) -} - -func (c *CpuStats) calculatePercent(t1, t2 float64, timeDelta int64) float64 { - vDelta := t2 - t1 - if timeDelta <= 0 || vDelta <= 0.0 { - return 0.0 - } - - overall_percent := (vDelta / float64(timeDelta)) * 100.0 - return overall_percent -} - -// Set the total ticks available across all cores. -func SetCpuTotalTicks(newCpuTotalTicks uint64) { - cpuTotalTicks = newCpuTotalTicks -} - -// CpuTotalTicks calculates the total MHz available across all cores. -// -// Where asymetric cores are correctly detected, the total ticks is the sum of -// the performance across both core types. -// -// Where asymetric cores are not correctly detected (such as Intel 13th gen), -// the total ticks available is over-estimated, as we assume all cores are P -// cores. -func CpuTotalTicks() uint64 { - return cpuTotalTicks -} diff --git a/helper/stats/cpu_test.go b/helper/stats/cpu_test.go deleted file mode 100644 index c26cd0d1c..000000000 --- a/helper/stats/cpu_test.go +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package stats - -import ( - "testing" - "time" - - "github.com/hashicorp/nomad/ci" -) - -func TestCpuStatsPercent(t *testing.T) { - ci.Parallel(t) - - cs := NewCpuStats() - cs.Percent(79.7) - time.Sleep(1 * time.Second) - percent := cs.Percent(80.69) - expectedPercent := 98.00 - if percent < expectedPercent && percent > (expectedPercent+1.00) { - t.Fatalf("expected: %v, actual: %v", expectedPercent, percent) - } -} diff --git a/lib/lang/stack.go b/lib/lang/stack.go new file mode 100644 index 000000000..aaecf1dd4 --- /dev/null +++ b/lib/lang/stack.go @@ -0,0 +1,43 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package lang + +// A Stack is a simple LIFO datastructure. +type Stack[T any] struct { + top *object[T] +} + +type object[T any] struct { + item T + next *object[T] +} + +// NewStack creates a Stack with no elements. +func NewStack[T any]() *Stack[T] { + return new(Stack[T]) +} + +// Push pushes item onto the stack. +func (s *Stack[T]) Push(item T) { + obj := &object[T]{ + item: item, + next: s.top, + } + s.top = obj +} + +// Pop pops the most recently pushed item from the Stack. +// +// It is a logic bug to Pop an Empty stack. +func (s *Stack[T]) Pop() T { + obj := s.top + s.top = obj.next + obj.next = nil + return obj.item +} + +// Empty returns true if there are no elements on the Stack. +func (s *Stack[T]) Empty() bool { + return s.top == nil +} diff --git a/lib/lang/stack_test.go b/lib/lang/stack_test.go new file mode 100644 index 000000000..9e465ed6f --- /dev/null +++ b/lib/lang/stack_test.go @@ -0,0 +1,32 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package lang + +import ( + "testing" + + "github.com/shoenig/test/must" +) + +func Test_Stack(t *testing.T) { + s := NewStack[int]() + + must.True(t, s.Empty()) + + s.Push(1) + s.Push(2) + s.Push(3) + must.NotEmpty(t, s) + + must.Eq(t, 3, s.Pop()) + must.Eq(t, 2, s.Pop()) + + s.Push(4) + s.Push(5) + + must.Eq(t, 5, s.Pop()) + must.Eq(t, 4, s.Pop()) + must.Eq(t, 1, s.Pop()) + must.Empty(t, s) +} diff --git a/nomad/server.go b/nomad/server.go index a7c9f8163..b6f66463b 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -35,8 +35,8 @@ import ( "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/codec" + "github.com/hashicorp/nomad/helper/goruntime" "github.com/hashicorp/nomad/helper/pool" - "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/lib/auth/oidc" "github.com/hashicorp/nomad/nomad/deploymentwatcher" @@ -1958,7 +1958,7 @@ func (s *Server) Stats() map[string]map[string]string { }, "raft": s.raft.Stats(), "serf": s.serf.Stats(), - "runtime": stats.RuntimeStats(), + "runtime": goruntime.RuntimeStats(), "vault": s.vault.Stats(), } diff --git a/plugins/drivers/testutils/exec_testing.go b/plugins/drivers/testutils/exec_testing.go index a9f3c3c5d..309a186fd 100644 --- a/plugins/drivers/testutils/exec_testing.go +++ b/plugins/drivers/testutils/exec_testing.go @@ -16,7 +16,7 @@ import ( "testing" "time" - "github.com/hashicorp/nomad/client/lib/cgutil" + "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/plugins/drivers" dproto "github.com/hashicorp/nomad/plugins/drivers/proto" "github.com/hashicorp/nomad/testutil" @@ -193,13 +193,14 @@ func TestExecFSIsolation(t *testing.T, driver *DriverHarness, taskID string) { // we always run in a cgroup - testing freezer cgroup r = execTask(t, driver, taskID, "cat /proc/self/cgroup", - false, "") + false, "", + ) require.Zero(t, r.exitCode) - if !cgutil.UseV2 { - acceptable := []string{ - ":freezer:/nomad", ":freezer:/docker", - } + switch cgroupslib.GetMode() { + + case cgroupslib.CG1: + acceptable := []string{":freezer:/nomad", ":freezer:/docker"} if testutil.IsCI() { // github actions freezer cgroup acceptable = append(acceptable, ":freezer:/actions_job") @@ -215,7 +216,7 @@ func TestExecFSIsolation(t *testing.T, driver *DriverHarness, taskID string) { if !ok { require.Fail(t, "unexpected freezer cgroup", "expected freezer to be /nomad/ or /docker/, but found:\n%s", r.stdout) } - } else { + case cgroupslib.CG2: info, _ := driver.PluginInfo() if info.Name == "docker" { // Note: docker on cgroups v2 now returns nothing diff --git a/plugins/drivers/testutils/testing.go b/plugins/drivers/testutils/testing.go index 11855a9b9..e2db11132 100644 --- a/plugins/drivers/testutils/testing.go +++ b/plugins/drivers/testutils/testing.go @@ -15,7 +15,6 @@ import ( plugin "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/client/logmon" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper/testlog" @@ -68,46 +67,9 @@ func NewDriverHarness(t testing.T, d drivers.DriverPlugin) *DriverHarness { } } -// setupCgroupV2 creates a v2 cgroup for the task, as if a Client were initialized -// and managing the cgroup as it normally would via the cpuset manager. -// -// Note that we are being lazy and trying to avoid importing cgutil because -// currently plugins/drivers/testutils is platform agnostic-ish. -// -// Some drivers (raw_exec) setup their own cgroup, while others (exec, java, docker) -// would otherwise depend on the Nomad cpuset manager (and docker daemon) to create -// one, which isn't available here in testing, and so we create one via the harness. -// Plumbing such metadata through to the harness is a mind bender, so we just always -// create the cgroup, but at least put it under 'testing.slice'. -// -// tl;dr raw_exec tests should ignore this cgroup. -func (h *DriverHarness) setupCgroupV2(allocID, task string) { - if cgutil.UseV2 { - h.cgroup = filepath.Join(cgutil.CgroupRoot, "testing.slice", cgutil.CgroupScope(allocID, task)) - h.logger.Trace("create cgroup for test", "parent", "testing.slice", "id", allocID, "task", task, "path", h.cgroup) - if err := os.MkdirAll(h.cgroup, 0755); err != nil { - panic(err) - } - } -} - func (h *DriverHarness) Kill() { _ = h.client.Close() h.server.Stop() - h.cleanupCgroup() -} - -// cleanupCgroup might cleanup a cgroup that may or may not be tricked by DriverHarness. -func (h *DriverHarness) cleanupCgroup() { - // some [non-exec] tests don't bother with MkAllocDir which is what would create - // the cgroup, but then do call Kill, so in that case skip the cgroup cleanup - if cgutil.UseV2 && h.cgroup != "" { - if err := os.Remove(h.cgroup); err != nil && !os.IsNotExist(err) { - // in some cases the driver will cleanup the cgroup itself, in which - // case we do not care about the cgroup not existing at cleanup time - h.t.Fatalf("failed to cleanup cgroup: %v", err) - } - } } // MkAllocDir creates a temporary directory and allocdir structure. @@ -159,9 +121,6 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func( } } - // setup a v2 cgroup for test cases that assume one exists - h.setupCgroupV2(alloc.ID, task.Name) - //logmon if enableLogs { lm := logmon.NewLogMon(h.logger.Named("logmon")) @@ -194,7 +153,6 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func( return func() { h.client.Close() allocDir.Destroy() - h.cleanupCgroup() } } diff --git a/plugins/drivers/testutils/testing_default.go b/plugins/drivers/testutils/testing_default.go new file mode 100644 index 000000000..6bf0b7bfa --- /dev/null +++ b/plugins/drivers/testutils/testing_default.go @@ -0,0 +1,10 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !linux + +package testutils + +func (*DriverHarness) MakeTaskCgroup(string, string) { + // nothing +} diff --git a/plugins/drivers/testutils/testing_linux.go b/plugins/drivers/testutils/testing_linux.go new file mode 100644 index 000000000..7ed280858 --- /dev/null +++ b/plugins/drivers/testutils/testing_linux.go @@ -0,0 +1,25 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +package testutils + +import ( + "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/shoenig/test/must" +) + +// MakeTaskCgroup creates the cgroup that the task driver might assume already +// exists, since Nomad client creates them. Why do we write tests that directly +// invoke task drivers without any context of the Nomad client? Who knows. +func (h *DriverHarness) MakeTaskCgroup(allocID, taskName string) { + f := cgroupslib.Factory(allocID, taskName) + must.NoError(h.t, f.Setup()) + + // ensure child procs are dead and remove the cgroup when the test is done + h.t.Cleanup(func() { + _ = f.Kill() + _ = f.Teardown() + }) +} diff --git a/tools/ec2info/cpu_table.go.template b/tools/ec2info/cpu_table.go.template deleted file mode 100644 index 87137f87f..000000000 --- a/tools/ec2info/cpu_table.go.template +++ /dev/null @@ -1,48 +0,0 @@ -// Code generated from hashicorp/nomad/tools/ec2info; DO NOT EDIT. - -package {{.Package}} - -// CPU contains virtual core count and processor baseline performance. -type CPU struct { - // use small units to reduce size of the embedded table - Cores uint32 // good for 4 billion cores - MHz uint32 // good for 4 billion MHz -} - -// Ticks computes the total number of cycles available across the virtual -// cores of a CPU. -func (c CPU) Ticks() int { - return int(c.MHz) * int(c.Cores) -} - -// GHz returns the speed of CPU in ghz. -func (c CPU) GHz() float64 { - return float64(c.MHz) / 1000.0 -} - -// newCPU create a CPUSpecs from the given virtual core count and core speed. -func newCPU(cores uint32, ghz float64) CPU { - return CPU{ - Cores: cores, - MHz: uint32(ghz * 1000), - } -} - -// LookupEC2CPU returns the virtual core count and core speed information from a -// lookup table generated from the Amazon EC2 API. -// -// If the instance type does not exist, nil is returned. -func LookupEC2CPU(instanceType string) *CPU { - specs, exists := instanceTypeCPU[instanceType] - if !exists { - return nil - } - return &specs -} - -{{with .Data}} -var instanceTypeCPU = map[string]CPU { - {{ range $key, $value := . }} - "{{ $key }}": newCPU({{$value.Cores}}, {{$value.Speed}}), {{ end }} -} -{{end}} diff --git a/tools/ec2info/main.go b/tools/ec2info/main.go deleted file mode 100644 index 3e2bd53eb..000000000 --- a/tools/ec2info/main.go +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -// Command ec2info provides a tool for generating a CPU performance lookup -// table indexed by EC2 instance types. -// -// By default the generated file will overwrite `env_aws_cpu.go` in Nomad's -// client/fingerprint package, when run from this directory. -// -// Requires AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN. -// -// Usage (invoke from Nomad's makefile) -// -// make ec2info -package main - -import ( - "fmt" - "io" - "log" - "os" - "os/exec" - "sort" - "text/template" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/ec2" -) - -func check(err error) { - if err != nil { - log.Fatal(err) - } -} - -func main() { - pkg, region, output := "fingerprint", "us-west-1", "client/fingerprint/env_aws_cpu.go" - - client, err := clientForRegion(region) - check(err) - - regions, err := getRegions(client) - check(err) - - data, err := getData(regions) - check(err) - - flat := flatten(data) - - f, err := open(output) - check(err) - defer func() { - check(f.Close()) - }() - - check(write(f, flat, pkg)) - check(format(output)) -} - -func clientForRegion(region string) (*ec2.EC2, error) { - sess, err := session.NewSession(&aws.Config{ - Region: ®ion, - }) - if err != nil { - return nil, err - } - return ec2.New(sess), nil -} - -func getRegions(client *ec2.EC2) ([]*ec2.Region, error) { - all := false // beyond account access - regions, err := client.DescribeRegions(&ec2.DescribeRegionsInput{ - AllRegions: &all, - }) - if err != nil { - log.Println("failed to create AWS session; make sure environment is setup") - log.Println("must have environment variables AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN") - log.Println("or ~/.aws/credentials configured properly") - return nil, err - } - return regions.Regions, nil -} - -type specs struct { - Cores int - Speed float64 -} - -func (s specs) String() string { - return fmt.Sprintf("(%d %.2f)", s.Cores, s.Speed) -} - -func getData(regions []*ec2.Region) (map[string]map[string]specs, error) { - data := make(map[string]map[string]specs) - - for _, region := range regions { - rData, rProblems, err := getDataForRegion(*region.RegionName) - if err != nil { - return nil, err - } - data[*region.RegionName] = rData - - log.Println("region", *region.RegionName, "got data for", len(rData), "instance types", len(rProblems), "incomplete") - instanceProblems(rProblems) - } - - return data, nil -} - -func instanceProblems(problems map[string]string) { - types := make([]string, 0, len(problems)) - for k := range problems { - types = append(types, k) - } - sort.Strings(types) - for _, iType := range types { - log.Println(" ->", iType, problems[iType]) - } -} - -func getDataForRegion(region string) (map[string]specs, map[string]string, error) { - client, err := clientForRegion(region) - if err != nil { - return nil, nil, err - } - - data := make(map[string]specs) - problems := make(map[string]string) - regionInfoPage(client, true, region, nil, data, problems) - return data, problems, nil -} - -func regionInfoPage(client *ec2.EC2, first bool, region string, token *string, data map[string]specs, problems map[string]string) { - if first || token != nil { - output, err := client.DescribeInstanceTypes(&ec2.DescribeInstanceTypesInput{ - NextToken: token, - }) - if err != nil { - log.Fatal(err) - } - - // recursively accumulate each page of data - regionInfoAccumulate(output, data, problems) - regionInfoPage(client, false, region, output.NextToken, data, problems) - } -} - -func regionInfoAccumulate(output *ec2.DescribeInstanceTypesOutput, data map[string]specs, problems map[string]string) { - for _, iType := range output.InstanceTypes { - switch { - - case iType.ProcessorInfo == nil: - fallthrough - case iType.ProcessorInfo.SustainedClockSpeedInGhz == nil: - problems[*iType.InstanceType] = "missing clock Speed" - continue - - case iType.VCpuInfo == nil: - fallthrough - case iType.VCpuInfo.DefaultVCpus == nil: - problems[*iType.InstanceType] = "missing virtual cpu Cores" - continue - - default: - data[*iType.InstanceType] = specs{ - Speed: *iType.ProcessorInfo.SustainedClockSpeedInGhz, - Cores: int(*iType.VCpuInfo.DefaultVCpus), - } - continue - } - } -} - -// open the output file for writing. -func open(output string) (io.ReadWriteCloser, error) { - return os.Create(output) -} - -// flatten region data, assuming instance type is the same across regions. -func flatten(data map[string]map[string]specs) map[string]specs { - result := make(map[string]specs) - for _, m := range data { - for iType, specifications := range m { - result[iType] = specifications - } - } - return result -} - -type Template struct { - Package string - Data map[string]specs -} - -// write the data using the cpu_table.go.template to w. -func write(w io.Writer, data map[string]specs, pkg string) error { - tmpl, err := template.ParseFiles("tools/ec2info/cpu_table.go.template") - if err != nil { - return err - } - return tmpl.Execute(w, Template{ - Package: pkg, - Data: data, - }) -} - -// format the file using gofmt. -func format(file string) error { - cmd := exec.Command("gofmt", "-w", file) - _, err := cmd.CombinedOutput() - return err -} diff --git a/tools/go.mod b/tools/go.mod index 9a08e9490..1666895a5 100644 --- a/tools/go.mod +++ b/tools/go.mod @@ -3,12 +3,8 @@ module github.com/hashicorp/nomad/tools go 1.20 require ( - github.com/aws/aws-sdk-go v1.44.249 github.com/hashicorp/go-set v0.1.11 github.com/shoenig/test v0.6.4 ) -require ( - github.com/google/go-cmp v0.5.9 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect -) +require github.com/google/go-cmp v0.5.9 // indirect diff --git a/tools/go.sum b/tools/go.sum index 187c6c38a..928fd681e 100644 --- a/tools/go.sum +++ b/tools/go.sum @@ -1,48 +1,6 @@ -github.com/aws/aws-sdk-go v1.44.249 h1:UbUvh/oYHdAD3vZjNi316M0NIupJsrqAcJckVuhaCB8= -github.com/aws/aws-sdk-go v1.44.249/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/hashicorp/go-set v0.1.11 h1:EZU3AzhNfCcMHtU0hCo2j4FAp7OHFDe/KTfmsbu1QIM= github.com/hashicorp/go-set v0.1.11/go.mod h1:BaYYjrI6m7H3D0j+N5Z0rZkCbBXOgNtuoDHrMJfORsk= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= -github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= -github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=