From c27af79add16a32cb4fd0a6cc1190af8424b9ffc Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 24 Mar 2022 13:40:42 -0500 Subject: [PATCH] client: cgroups v2 code review followup --- client/lib/cgutil/cgutil_linux.go | 12 ++++---- client/lib/cgutil/cgutil_linux_test.go | 6 ++-- client/lib/cgutil/cpuset_manager_v1.go | 2 +- client/lib/cgutil/cpuset_manager_v1_test.go | 14 ++++------ client/lib/cgutil/cpuset_manager_v2.go | 15 +++++----- client/lib/cgutil/cpuset_manager_v2_test.go | 2 ++ client/testutil/driver_compatible.go | 9 ++++++ client/testutil/driver_compatible_linux.go | 5 +--- .../shared/executor/executor_linux_test.go | 4 +-- .../content/docs/upgrade/upgrade-specific.mdx | 28 ++++++++++++------- 10 files changed, 57 insertions(+), 40 deletions(-) diff --git a/client/lib/cgutil/cgutil_linux.go b/client/lib/cgutil/cgutil_linux.go index 349660ec3..33b2917ec 100644 --- a/client/lib/cgutil/cgutil_linux.go +++ b/client/lib/cgutil/cgutil_linux.go @@ -32,24 +32,26 @@ func GetCgroupParent(parent string) string { // CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration. func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager { + parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config if UseV2 { - return NewCpusetManagerV2(getParentV2(parent), logger.Named("cpuset.v2")) + return NewCpusetManagerV2(parent, logger.Named("cpuset.v2")) } - return NewCpusetManagerV1(getParentV1(parent), logger.Named("cpuset.v1")) + return NewCpusetManagerV1(parent, logger.Named("cpuset.v1")) } // GetCPUsFromCgroup gets the effective cpuset value for the given cgroup. func GetCPUsFromCgroup(group string) ([]uint16, error) { + group = GetCgroupParent(group) if UseV2 { - return getCPUsFromCgroupV2(getParentV2(group)) + return getCPUsFromCgroupV2(group) } - return getCPUsFromCgroupV1(getParentV1(group)) + return getCPUsFromCgroupV1(group) } // CgroupScope returns the name of the scope for Nomad's managed cgroups for // the given allocID and task. // -// e.g. "-.scope" +// e.g. "..scope" // // Only useful for v2. func CgroupScope(allocID, task string) string { diff --git a/client/lib/cgutil/cgutil_linux_test.go b/client/lib/cgutil/cgutil_linux_test.go index bbc18ef8c..ed3ae87bd 100644 --- a/client/lib/cgutil/cgutil_linux_test.go +++ b/client/lib/cgutil/cgutil_linux_test.go @@ -88,8 +88,10 @@ func TestUtil_GetCPUsFromCgroup(t *testing.T) { func create(t *testing.T, name string) { mgr, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, name), rootless) require.NoError(t, err) - err = mgr.Apply(CreationPID) - require.NoError(t, err) + if err = mgr.Apply(CreationPID); err != nil { + _ = cgroups.RemovePath(name) + t.Fatal("failed to create cgroup for test") + } } func cleanup(t *testing.T, name string) { diff --git a/client/lib/cgutil/cpuset_manager_v1.go b/client/lib/cgutil/cpuset_manager_v1.go index 0e9107052..f0fa32527 100644 --- a/client/lib/cgutil/cpuset_manager_v1.go +++ b/client/lib/cgutil/cpuset_manager_v1.go @@ -117,12 +117,12 @@ func (c *cpusetManagerV1) CgroupPathFor(allocID, task string) CgroupPathGetter { break } - timer.Reset(100 * time.Millisecond) if _, err := os.Stat(taskInfo.CgroupPath); os.IsNotExist(err) { select { case <-ctx.Done(): return taskInfo.CgroupPath, ctx.Err() case <-timer.C: + timer.Reset(100 * time.Millisecond) continue } } diff --git a/client/lib/cgutil/cpuset_manager_v1_test.go b/client/lib/cgutil/cpuset_manager_v1_test.go index 3ada7b551..9537f2f87 100644 --- a/client/lib/cgutil/cpuset_manager_v1_test.go +++ b/client/lib/cgutil/cpuset_manager_v1_test.go @@ -106,18 +106,16 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { testutil.CgroupsCompatibleV1(t) + // This case tests adding 2 allocations, reconciling then removing 1 alloc. + // It requires the system to have at least 3 cpu cores (one for each alloc), + // BUT plus another one because writing an empty cpuset causes the cgroup to + // inherit the parent. + testutil.MinimumCores(t, 3) + manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() require.NoError(t, manager.Init(nil)) - // This case tests adding 2 allocs, reconciling then removing 1 alloc - // it requires the system to have at least 3 cpu cores (one for each alloc), - // BUT plus another one because writing an empty cpuset causes the cgroup to - // inherit the parent. - if manager.parentCpuset.Size() < 3 { - t.Skip("test requires at least 3 cpu cores") - } - alloc1 := mock.Alloc() alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0]) alloc1.AllocatedResources.Tasks["web"].Cpu.ReservedCores = alloc1Cpuset.ToSlice() diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 00162ca52..74a8a4f4f 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -40,8 +40,8 @@ const ( // nothing is used for treating a map like a set with no values type nothing struct{} -// null represents nothing -var null = nothing{} +// present indicates something exists +var present = nothing{} type cpusetManagerV2 struct { logger hclog.Logger @@ -57,10 +57,9 @@ type cpusetManagerV2 struct { } func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager { - cgroupParent := getParentV2(parent) return &cpusetManagerV2{ - parent: cgroupParent, - parentAbs: filepath.Join(CgroupRoot, cgroupParent), + parent: parent, + parentAbs: filepath.Join(CgroupRoot, parent), logger: logger, sharing: make(map[identity]nothing), isolating: make(map[identity]cpuset.CPUSet), @@ -93,7 +92,7 @@ func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) { if len(resources.Cpu.ReservedCores) > 0 { c.isolating[id] = cpuset.New(resources.Cpu.ReservedCores...) } else { - c.sharing[id] = null + c.sharing[id] = present } } @@ -197,10 +196,10 @@ func (c *cpusetManagerV2) cleanup() { size := len(c.sharing) + len(c.isolating) ids := make(map[identity]nothing, size) for id := range c.sharing { - ids[id] = null + ids[id] = present } for id := range c.isolating { - ids[id] = null + ids[id] = present } if err := filepath.WalkDir(c.parentAbs, func(path string, entry os.DirEntry, err error) error { diff --git a/client/lib/cgutil/cpuset_manager_v2_test.go b/client/lib/cgutil/cpuset_manager_v2_test.go index e1d658c82..a6acc50e7 100644 --- a/client/lib/cgutil/cpuset_manager_v2_test.go +++ b/client/lib/cgutil/cpuset_manager_v2_test.go @@ -24,6 +24,7 @@ var systemCores = []uint16{0, 1} func TestCpusetManager_V2_AddAlloc(t *testing.T) { testutil.CgroupsCompatibleV2(t) + testutil.MinimumCores(t, 2) logger := testlog.HCLogger(t) parent := uuid.Short() + ".scope" @@ -63,6 +64,7 @@ func cpusetIs(t *testing.T, exp, parent, allocID, task string) { func TestCpusetManager_V2_RemoveAlloc(t *testing.T) { testutil.CgroupsCompatibleV2(t) + testutil.MinimumCores(t, 2) logger := testlog.HCLogger(t) parent := uuid.Short() + ".scope" diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go index 2459f8aa6..f0f84d5e5 100644 --- a/client/testutil/driver_compatible.go +++ b/client/testutil/driver_compatible.go @@ -92,3 +92,12 @@ func MountCompatible(t *testing.T) { t.Skip("Test requires root") } } + +// MinimumCores skips tests unless: +// - system has at least cores available CPU cores +func MinimumCores(t *testing.T, cores int) { + available := runtime.NumCPU() + if available < cores { + t.Skipf("Test requires at least %d cores, only %d available", cores, available) + } +} diff --git a/client/testutil/driver_compatible_linux.go b/client/testutil/driver_compatible_linux.go index 84ace6015..f30aba7ef 100644 --- a/client/testutil/driver_compatible_linux.go +++ b/client/testutil/driver_compatible_linux.go @@ -3,7 +3,6 @@ package testutil import ( - "runtime" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -23,9 +22,7 @@ func CgroupsCompatibleV1(t *testing.T) { } func cgroupsCompatibleV1(t *testing.T) bool { - if runtime.GOOS != "linux" { - return false - } + // build tags mean this will never run outside of linux if cgroupsCompatibleV2(t) { t.Log("No cgroup.v1 mount point: running in cgroup.v2 mode") diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index d38072ad5..585bf2d17 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -371,8 +371,8 @@ func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) { } // Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker - // don't isolate it by default. - if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") { + // don't isolate it by default. And also misc. + if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") || strings.Contains(line, ":misc:") { continue } diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index 874d2158e..73b6d63e8 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -96,21 +96,28 @@ The previous `Protocol` value can be viewed using the `-verbose` flag. #### Linux Control Groups Version 2 -Starting with Nomad 1.3.0, Linux systems configured to use [cgroups v2][cgroups2] are -now supported. A Nomad client will only activate its v2 control groups manager if the -system is configured with the cgroups2 controller mounted at `/sys/fs/cgroup`. This implies -Nomad will continue to fallback to the v1 control groups manager on systems -configured to run in hybrid mode, where the cgroups2 controller is typically mounted -at `/sys/fs/cgroup/unified`. Systems that do not support cgroups v2 are not affected. A -new client attribute `unique.cgroup.version` indicates which version of control groups -Nomad is using. +Starting with Nomad 1.3.0, Linux systems configured to use [cgroups v2][cgroups2] +are now supported. A Nomad client will only activate its v2 control groups manager +if the system is configured with the cgroups2 controller mounted at `/sys/fs/cgroup`. + * Systems that do not support cgroups v2 are not affected. + * Systems configured in hybrid mode typically mount the cgroups2 + controller at `/sys/fs/cgroup/unified`, so Nomad will continue to + use cgroups v1 for these hosts. + * Systems configured with only cgroups v2 now correctly support setting cpu [cores]. + +Nomad will preserve the existing cgroup for tasks when a client is +upgraded, so there will be no disruption to tasks. A new client +attribute `unique.cgroup.version` indicates which version of control +groups Nomad is using. When cgroups v2 are in use, Nomad uses `nomad.slice` as the [default parent][cgroup_parent] for cgroups -created on behalf of tasks. The cgroup created for a task is named in the form `-.scope`. +created on behalf of tasks. The cgroup created for a task is named in the form `..scope`. These cgroups are created by Nomad before a task starts. External task drivers that support containerization should be updated to make use of the new cgroup locations. -``` +The new cgroup file system layout will look like the following: + +```shell-session ➜ tree -d /sys/fs/cgroup/nomad.slice /sys/fs/cgroup/nomad.slice ├── 8b8da4cf-8ebf-b578-0bcf-77190749abf3.redis.scope @@ -1287,6 +1294,7 @@ deleted and then Nomad 0.3.0 can be launched. [api_jobs_parse]: /api-docs/jobs#parse-job [cgroups2]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html [cgroup_parent]: /docs/configuration/client#cgroup_parent +[cores]: /docs/job-specification/resources#cores [dangling-containers]: /docs/drivers/docker#dangling-containers [drain-api]: /api-docs/nodes#drain-node [drain-cli]: /docs/commands/node/drain