client: cgroups v2 code review followup

This commit is contained in:
Seth Hoenig
2022-03-24 13:40:42 -05:00
parent 5da1a31e94
commit c27af79add
10 changed files with 57 additions and 40 deletions

View File

@@ -32,24 +32,26 @@ func GetCgroupParent(parent string) string {
// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration.
func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager {
parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config
if UseV2 {
return NewCpusetManagerV2(getParentV2(parent), logger.Named("cpuset.v2"))
return NewCpusetManagerV2(parent, logger.Named("cpuset.v2"))
}
return NewCpusetManagerV1(getParentV1(parent), logger.Named("cpuset.v1"))
return NewCpusetManagerV1(parent, logger.Named("cpuset.v1"))
}
// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup.
func GetCPUsFromCgroup(group string) ([]uint16, error) {
group = GetCgroupParent(group)
if UseV2 {
return getCPUsFromCgroupV2(getParentV2(group))
return getCPUsFromCgroupV2(group)
}
return getCPUsFromCgroupV1(getParentV1(group))
return getCPUsFromCgroupV1(group)
}
// CgroupScope returns the name of the scope for Nomad's managed cgroups for
// the given allocID and task.
//
// e.g. "<allocID>-<task>.scope"
// e.g. "<allocID>.<task>.scope"
//
// Only useful for v2.
func CgroupScope(allocID, task string) string {

View File

@@ -88,8 +88,10 @@ func TestUtil_GetCPUsFromCgroup(t *testing.T) {
func create(t *testing.T, name string) {
mgr, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, name), rootless)
require.NoError(t, err)
err = mgr.Apply(CreationPID)
require.NoError(t, err)
if err = mgr.Apply(CreationPID); err != nil {
_ = cgroups.RemovePath(name)
t.Fatal("failed to create cgroup for test")
}
}
func cleanup(t *testing.T, name string) {

View File

@@ -117,12 +117,12 @@ func (c *cpusetManagerV1) CgroupPathFor(allocID, task string) CgroupPathGetter {
break
}
timer.Reset(100 * time.Millisecond)
if _, err := os.Stat(taskInfo.CgroupPath); os.IsNotExist(err) {
select {
case <-ctx.Done():
return taskInfo.CgroupPath, ctx.Err()
case <-timer.C:
timer.Reset(100 * time.Millisecond)
continue
}
}

View File

@@ -106,18 +106,16 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) {
func TestCpusetManager_V1_RemoveAlloc(t *testing.T) {
testutil.CgroupsCompatibleV1(t)
// This case tests adding 2 allocations, reconciling then removing 1 alloc.
// It requires the system to have at least 3 cpu cores (one for each alloc),
// BUT plus another one because writing an empty cpuset causes the cgroup to
// inherit the parent.
testutil.MinimumCores(t, 3)
manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
// This case tests adding 2 allocs, reconciling then removing 1 alloc
// it requires the system to have at least 3 cpu cores (one for each alloc),
// BUT plus another one because writing an empty cpuset causes the cgroup to
// inherit the parent.
if manager.parentCpuset.Size() < 3 {
t.Skip("test requires at least 3 cpu cores")
}
alloc1 := mock.Alloc()
alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0])
alloc1.AllocatedResources.Tasks["web"].Cpu.ReservedCores = alloc1Cpuset.ToSlice()

View File

@@ -40,8 +40,8 @@ const (
// nothing is used for treating a map like a set with no values
type nothing struct{}
// null represents nothing
var null = nothing{}
// present indicates something exists
var present = nothing{}
type cpusetManagerV2 struct {
logger hclog.Logger
@@ -57,10 +57,9 @@ type cpusetManagerV2 struct {
}
func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager {
cgroupParent := getParentV2(parent)
return &cpusetManagerV2{
parent: cgroupParent,
parentAbs: filepath.Join(CgroupRoot, cgroupParent),
parent: parent,
parentAbs: filepath.Join(CgroupRoot, parent),
logger: logger,
sharing: make(map[identity]nothing),
isolating: make(map[identity]cpuset.CPUSet),
@@ -93,7 +92,7 @@ func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) {
if len(resources.Cpu.ReservedCores) > 0 {
c.isolating[id] = cpuset.New(resources.Cpu.ReservedCores...)
} else {
c.sharing[id] = null
c.sharing[id] = present
}
}
@@ -197,10 +196,10 @@ func (c *cpusetManagerV2) cleanup() {
size := len(c.sharing) + len(c.isolating)
ids := make(map[identity]nothing, size)
for id := range c.sharing {
ids[id] = null
ids[id] = present
}
for id := range c.isolating {
ids[id] = null
ids[id] = present
}
if err := filepath.WalkDir(c.parentAbs, func(path string, entry os.DirEntry, err error) error {

View File

@@ -24,6 +24,7 @@ var systemCores = []uint16{0, 1}
func TestCpusetManager_V2_AddAlloc(t *testing.T) {
testutil.CgroupsCompatibleV2(t)
testutil.MinimumCores(t, 2)
logger := testlog.HCLogger(t)
parent := uuid.Short() + ".scope"
@@ -63,6 +64,7 @@ func cpusetIs(t *testing.T, exp, parent, allocID, task string) {
func TestCpusetManager_V2_RemoveAlloc(t *testing.T) {
testutil.CgroupsCompatibleV2(t)
testutil.MinimumCores(t, 2)
logger := testlog.HCLogger(t)
parent := uuid.Short() + ".scope"

View File

@@ -92,3 +92,12 @@ func MountCompatible(t *testing.T) {
t.Skip("Test requires root")
}
}
// MinimumCores skips tests unless:
// - system has at least cores available CPU cores
func MinimumCores(t *testing.T, cores int) {
available := runtime.NumCPU()
if available < cores {
t.Skipf("Test requires at least %d cores, only %d available", cores, available)
}
}

View File

@@ -3,7 +3,6 @@
package testutil
import (
"runtime"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -23,9 +22,7 @@ func CgroupsCompatibleV1(t *testing.T) {
}
func cgroupsCompatibleV1(t *testing.T) bool {
if runtime.GOOS != "linux" {
return false
}
// build tags mean this will never run outside of linux
if cgroupsCompatibleV2(t) {
t.Log("No cgroup.v1 mount point: running in cgroup.v2 mode")

View File

@@ -371,8 +371,8 @@ func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) {
}
// Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker
// don't isolate it by default.
if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") {
// don't isolate it by default. And also misc.
if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") || strings.Contains(line, ":misc:") {
continue
}

View File

@@ -96,21 +96,28 @@ The previous `Protocol` value can be viewed using the `-verbose` flag.
#### Linux Control Groups Version 2
Starting with Nomad 1.3.0, Linux systems configured to use [cgroups v2][cgroups2] are
now supported. A Nomad client will only activate its v2 control groups manager if the
system is configured with the cgroups2 controller mounted at `/sys/fs/cgroup`. This implies
Nomad will continue to fallback to the v1 control groups manager on systems
configured to run in hybrid mode, where the cgroups2 controller is typically mounted
at `/sys/fs/cgroup/unified`. Systems that do not support cgroups v2 are not affected. A
new client attribute `unique.cgroup.version` indicates which version of control groups
Nomad is using.
Starting with Nomad 1.3.0, Linux systems configured to use [cgroups v2][cgroups2]
are now supported. A Nomad client will only activate its v2 control groups manager
if the system is configured with the cgroups2 controller mounted at `/sys/fs/cgroup`.
* Systems that do not support cgroups v2 are not affected.
* Systems configured in hybrid mode typically mount the cgroups2
controller at `/sys/fs/cgroup/unified`, so Nomad will continue to
use cgroups v1 for these hosts.
* Systems configured with only cgroups v2 now correctly support setting cpu [cores].
Nomad will preserve the existing cgroup for tasks when a client is
upgraded, so there will be no disruption to tasks. A new client
attribute `unique.cgroup.version` indicates which version of control
groups Nomad is using.
When cgroups v2 are in use, Nomad uses `nomad.slice` as the [default parent][cgroup_parent] for cgroups
created on behalf of tasks. The cgroup created for a task is named in the form `<allocID>-<task>.scope`.
created on behalf of tasks. The cgroup created for a task is named in the form `<allocID>.<task>.scope`.
These cgroups are created by Nomad before a task starts. External task drivers that support
containerization should be updated to make use of the new cgroup locations.
```
The new cgroup file system layout will look like the following:
```shell-session
➜ tree -d /sys/fs/cgroup/nomad.slice
/sys/fs/cgroup/nomad.slice
├── 8b8da4cf-8ebf-b578-0bcf-77190749abf3.redis.scope
@@ -1287,6 +1294,7 @@ deleted and then Nomad 0.3.0 can be launched.
[api_jobs_parse]: /api-docs/jobs#parse-job
[cgroups2]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
[cgroup_parent]: /docs/configuration/client#cgroup_parent
[cores]: /docs/job-specification/resources#cores
[dangling-containers]: /docs/drivers/docker#dangling-containers
[drain-api]: /api-docs/nodes#drain-node
[drain-cli]: /docs/commands/node/drain