mirror of
https://github.com/kemko/nomad.git
synced 2026-01-08 11:25:41 +03:00
client: cgroups v2 code review followup
This commit is contained in:
@@ -32,24 +32,26 @@ func GetCgroupParent(parent string) string {
|
||||
|
||||
// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration.
|
||||
func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager {
|
||||
parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config
|
||||
if UseV2 {
|
||||
return NewCpusetManagerV2(getParentV2(parent), logger.Named("cpuset.v2"))
|
||||
return NewCpusetManagerV2(parent, logger.Named("cpuset.v2"))
|
||||
}
|
||||
return NewCpusetManagerV1(getParentV1(parent), logger.Named("cpuset.v1"))
|
||||
return NewCpusetManagerV1(parent, logger.Named("cpuset.v1"))
|
||||
}
|
||||
|
||||
// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup.
|
||||
func GetCPUsFromCgroup(group string) ([]uint16, error) {
|
||||
group = GetCgroupParent(group)
|
||||
if UseV2 {
|
||||
return getCPUsFromCgroupV2(getParentV2(group))
|
||||
return getCPUsFromCgroupV2(group)
|
||||
}
|
||||
return getCPUsFromCgroupV1(getParentV1(group))
|
||||
return getCPUsFromCgroupV1(group)
|
||||
}
|
||||
|
||||
// CgroupScope returns the name of the scope for Nomad's managed cgroups for
|
||||
// the given allocID and task.
|
||||
//
|
||||
// e.g. "<allocID>-<task>.scope"
|
||||
// e.g. "<allocID>.<task>.scope"
|
||||
//
|
||||
// Only useful for v2.
|
||||
func CgroupScope(allocID, task string) string {
|
||||
|
||||
@@ -88,8 +88,10 @@ func TestUtil_GetCPUsFromCgroup(t *testing.T) {
|
||||
func create(t *testing.T, name string) {
|
||||
mgr, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, name), rootless)
|
||||
require.NoError(t, err)
|
||||
err = mgr.Apply(CreationPID)
|
||||
require.NoError(t, err)
|
||||
if err = mgr.Apply(CreationPID); err != nil {
|
||||
_ = cgroups.RemovePath(name)
|
||||
t.Fatal("failed to create cgroup for test")
|
||||
}
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, name string) {
|
||||
|
||||
@@ -117,12 +117,12 @@ func (c *cpusetManagerV1) CgroupPathFor(allocID, task string) CgroupPathGetter {
|
||||
break
|
||||
}
|
||||
|
||||
timer.Reset(100 * time.Millisecond)
|
||||
if _, err := os.Stat(taskInfo.CgroupPath); os.IsNotExist(err) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return taskInfo.CgroupPath, ctx.Err()
|
||||
case <-timer.C:
|
||||
timer.Reset(100 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,18 +106,16 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) {
|
||||
func TestCpusetManager_V1_RemoveAlloc(t *testing.T) {
|
||||
testutil.CgroupsCompatibleV1(t)
|
||||
|
||||
// This case tests adding 2 allocations, reconciling then removing 1 alloc.
|
||||
// It requires the system to have at least 3 cpu cores (one for each alloc),
|
||||
// BUT plus another one because writing an empty cpuset causes the cgroup to
|
||||
// inherit the parent.
|
||||
testutil.MinimumCores(t, 3)
|
||||
|
||||
manager, cleanup := tmpCpusetManagerV1(t)
|
||||
defer cleanup()
|
||||
require.NoError(t, manager.Init(nil))
|
||||
|
||||
// This case tests adding 2 allocs, reconciling then removing 1 alloc
|
||||
// it requires the system to have at least 3 cpu cores (one for each alloc),
|
||||
// BUT plus another one because writing an empty cpuset causes the cgroup to
|
||||
// inherit the parent.
|
||||
if manager.parentCpuset.Size() < 3 {
|
||||
t.Skip("test requires at least 3 cpu cores")
|
||||
}
|
||||
|
||||
alloc1 := mock.Alloc()
|
||||
alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0])
|
||||
alloc1.AllocatedResources.Tasks["web"].Cpu.ReservedCores = alloc1Cpuset.ToSlice()
|
||||
|
||||
@@ -40,8 +40,8 @@ const (
|
||||
// nothing is used for treating a map like a set with no values
|
||||
type nothing struct{}
|
||||
|
||||
// null represents nothing
|
||||
var null = nothing{}
|
||||
// present indicates something exists
|
||||
var present = nothing{}
|
||||
|
||||
type cpusetManagerV2 struct {
|
||||
logger hclog.Logger
|
||||
@@ -57,10 +57,9 @@ type cpusetManagerV2 struct {
|
||||
}
|
||||
|
||||
func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager {
|
||||
cgroupParent := getParentV2(parent)
|
||||
return &cpusetManagerV2{
|
||||
parent: cgroupParent,
|
||||
parentAbs: filepath.Join(CgroupRoot, cgroupParent),
|
||||
parent: parent,
|
||||
parentAbs: filepath.Join(CgroupRoot, parent),
|
||||
logger: logger,
|
||||
sharing: make(map[identity]nothing),
|
||||
isolating: make(map[identity]cpuset.CPUSet),
|
||||
@@ -93,7 +92,7 @@ func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) {
|
||||
if len(resources.Cpu.ReservedCores) > 0 {
|
||||
c.isolating[id] = cpuset.New(resources.Cpu.ReservedCores...)
|
||||
} else {
|
||||
c.sharing[id] = null
|
||||
c.sharing[id] = present
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,10 +196,10 @@ func (c *cpusetManagerV2) cleanup() {
|
||||
size := len(c.sharing) + len(c.isolating)
|
||||
ids := make(map[identity]nothing, size)
|
||||
for id := range c.sharing {
|
||||
ids[id] = null
|
||||
ids[id] = present
|
||||
}
|
||||
for id := range c.isolating {
|
||||
ids[id] = null
|
||||
ids[id] = present
|
||||
}
|
||||
|
||||
if err := filepath.WalkDir(c.parentAbs, func(path string, entry os.DirEntry, err error) error {
|
||||
|
||||
@@ -24,6 +24,7 @@ var systemCores = []uint16{0, 1}
|
||||
|
||||
func TestCpusetManager_V2_AddAlloc(t *testing.T) {
|
||||
testutil.CgroupsCompatibleV2(t)
|
||||
testutil.MinimumCores(t, 2)
|
||||
|
||||
logger := testlog.HCLogger(t)
|
||||
parent := uuid.Short() + ".scope"
|
||||
@@ -63,6 +64,7 @@ func cpusetIs(t *testing.T, exp, parent, allocID, task string) {
|
||||
|
||||
func TestCpusetManager_V2_RemoveAlloc(t *testing.T) {
|
||||
testutil.CgroupsCompatibleV2(t)
|
||||
testutil.MinimumCores(t, 2)
|
||||
|
||||
logger := testlog.HCLogger(t)
|
||||
parent := uuid.Short() + ".scope"
|
||||
|
||||
@@ -92,3 +92,12 @@ func MountCompatible(t *testing.T) {
|
||||
t.Skip("Test requires root")
|
||||
}
|
||||
}
|
||||
|
||||
// MinimumCores skips tests unless:
|
||||
// - system has at least cores available CPU cores
|
||||
func MinimumCores(t *testing.T, cores int) {
|
||||
available := runtime.NumCPU()
|
||||
if available < cores {
|
||||
t.Skipf("Test requires at least %d cores, only %d available", cores, available)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
package testutil
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
@@ -23,9 +22,7 @@ func CgroupsCompatibleV1(t *testing.T) {
|
||||
}
|
||||
|
||||
func cgroupsCompatibleV1(t *testing.T) bool {
|
||||
if runtime.GOOS != "linux" {
|
||||
return false
|
||||
}
|
||||
// build tags mean this will never run outside of linux
|
||||
|
||||
if cgroupsCompatibleV2(t) {
|
||||
t.Log("No cgroup.v1 mount point: running in cgroup.v2 mode")
|
||||
|
||||
@@ -371,8 +371,8 @@ func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) {
|
||||
}
|
||||
|
||||
// Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker
|
||||
// don't isolate it by default.
|
||||
if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") {
|
||||
// don't isolate it by default. And also misc.
|
||||
if strings.Contains(line, ":rdma:") || strings.Contains(line, "::") || strings.Contains(line, ":misc:") {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -96,21 +96,28 @@ The previous `Protocol` value can be viewed using the `-verbose` flag.
|
||||
|
||||
#### Linux Control Groups Version 2
|
||||
|
||||
Starting with Nomad 1.3.0, Linux systems configured to use [cgroups v2][cgroups2] are
|
||||
now supported. A Nomad client will only activate its v2 control groups manager if the
|
||||
system is configured with the cgroups2 controller mounted at `/sys/fs/cgroup`. This implies
|
||||
Nomad will continue to fallback to the v1 control groups manager on systems
|
||||
configured to run in hybrid mode, where the cgroups2 controller is typically mounted
|
||||
at `/sys/fs/cgroup/unified`. Systems that do not support cgroups v2 are not affected. A
|
||||
new client attribute `unique.cgroup.version` indicates which version of control groups
|
||||
Nomad is using.
|
||||
Starting with Nomad 1.3.0, Linux systems configured to use [cgroups v2][cgroups2]
|
||||
are now supported. A Nomad client will only activate its v2 control groups manager
|
||||
if the system is configured with the cgroups2 controller mounted at `/sys/fs/cgroup`.
|
||||
* Systems that do not support cgroups v2 are not affected.
|
||||
* Systems configured in hybrid mode typically mount the cgroups2
|
||||
controller at `/sys/fs/cgroup/unified`, so Nomad will continue to
|
||||
use cgroups v1 for these hosts.
|
||||
* Systems configured with only cgroups v2 now correctly support setting cpu [cores].
|
||||
|
||||
Nomad will preserve the existing cgroup for tasks when a client is
|
||||
upgraded, so there will be no disruption to tasks. A new client
|
||||
attribute `unique.cgroup.version` indicates which version of control
|
||||
groups Nomad is using.
|
||||
|
||||
When cgroups v2 are in use, Nomad uses `nomad.slice` as the [default parent][cgroup_parent] for cgroups
|
||||
created on behalf of tasks. The cgroup created for a task is named in the form `<allocID>-<task>.scope`.
|
||||
created on behalf of tasks. The cgroup created for a task is named in the form `<allocID>.<task>.scope`.
|
||||
These cgroups are created by Nomad before a task starts. External task drivers that support
|
||||
containerization should be updated to make use of the new cgroup locations.
|
||||
|
||||
```
|
||||
The new cgroup file system layout will look like the following:
|
||||
|
||||
```shell-session
|
||||
➜ tree -d /sys/fs/cgroup/nomad.slice
|
||||
/sys/fs/cgroup/nomad.slice
|
||||
├── 8b8da4cf-8ebf-b578-0bcf-77190749abf3.redis.scope
|
||||
@@ -1287,6 +1294,7 @@ deleted and then Nomad 0.3.0 can be launched.
|
||||
[api_jobs_parse]: /api-docs/jobs#parse-job
|
||||
[cgroups2]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
[cgroup_parent]: /docs/configuration/client#cgroup_parent
|
||||
[cores]: /docs/job-specification/resources#cores
|
||||
[dangling-containers]: /docs/drivers/docker#dangling-containers
|
||||
[drain-api]: /api-docs/nodes#drain-node
|
||||
[drain-cli]: /docs/commands/node/drain
|
||||
|
||||
Reference in New Issue
Block a user