cpuset fixer: use correct cgroup path for updates (#20276)

* cpuset fixer: use correct cgroup path for updates

fixes #20275

* docker: flatten switch statement and add test cases

* cl: add cl

---------

Co-authored-by: Seth Hoenig <shoenig@duck.com>
This commit is contained in:
Yorick Gersie
2024-04-04 22:54:10 +02:00
committed by GitHub
parent a71632e3a4
commit 6124ee8afb
4 changed files with 92 additions and 15 deletions

3
.changelog/20276.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
docker: Fixed a bug where cpuset would not be updated on cgroup v2 systems using cgroupfs
```

View File

@@ -230,6 +230,11 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
return fmt.Errorf("failed to get docker client: %w", err)
}
dockerInfo, err := dockerClient.Info()
if err != nil {
return fmt.Errorf("failed to fetch docker daemon info: %v", err)
}
infinityClient, err := d.getInfinityClient()
if err != nil {
return fmt.Errorf("failed to get docker long operations client: %w", err)
@@ -244,6 +249,7 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
h := &taskHandle{
dockerClient: dockerClient,
dockerCGroupDriver: dockerInfo.CgroupDriver,
infinityClient: infinityClient,
logger: d.logger.With("container_id", container.ID),
task: handle.Config,
@@ -323,6 +329,11 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
return nil, nil, fmt.Errorf("Failed to create docker client: %v", err)
}
dockerInfo, err := dockerClient.Info()
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch docker daemon info: %v", err)
}
// and also the long operations client
infinityClient, err := d.getInfinityClient()
if err != nil {
@@ -433,6 +444,7 @@ CREATE:
// Return a driver handle
h := &taskHandle{
dockerClient: dockerClient,
dockerCGroupDriver: dockerInfo.CgroupDriver,
infinityClient: infinityClient,
dlogger: dlogger,
dloggerPluginClient: pluginClient,

View File

@@ -29,6 +29,8 @@ type taskHandle struct {
// for all calls that aren't Wait() or Stop() (and their variations).
dockerClient *docker.Client
dockerCGroupDriver string
// infinityClient is useful for
// - the Wait docker API call(s) (no limit on container lifetime)
// - the Stop docker API call(s) (context with task kill_timeout required)
@@ -254,27 +256,35 @@ func (h *taskHandle) startCpusetFixer() {
return
}
cgroup := h.containerCgroup
if cgroup == "" {
// The api does not actually set this value, so we are left to compute it ourselves.
// Luckily this is documented,
// https://docs.docker.com/config/containers/runmetrics/#find-the-cgroup-for-a-given-container
switch cgroupslib.GetMode() {
case cgroupslib.CG1:
cgroup = "/sys/fs/cgroup/cpuset/docker/" + h.containerID
default:
// systemd driver; not sure if we need to consider cgroupfs driver
cgroup = "/sys/fs/cgroup/system.slice/docker-" + h.containerID + ".scope"
}
}
go (&cpuset{
doneCh: h.doneCh,
source: h.task.Resources.LinuxResources.CpusetCgroupPath,
destination: cgroup,
destination: h.dockerCgroup(),
}).watch()
}
// dockerCgroup returns the path to the cgroup docker will use for the container.
//
// The api does not provide this value, so we are left to compute it ourselves.
//
// https://docs.docker.com/config/containers/runmetrics/#find-the-cgroup-for-a-given-container
func (h *taskHandle) dockerCgroup() string {
cgroup := h.containerCgroup
if cgroup == "" {
mode := cgroupslib.GetMode()
usingCgroupfs := h.dockerCGroupDriver == "cgroupfs"
switch {
case mode == cgroupslib.CG1:
cgroup = "/sys/fs/cgroup/cpuset/docker/" + h.containerID
case mode == cgroupslib.CG2 && usingCgroupfs:
cgroup = "/sys/fs/cgroup/docker/" + h.containerID
default:
cgroup = "/sys/fs/cgroup/system.slice/docker-" + h.containerID + ".scope"
}
}
return cgroup
}
func (h *taskHandle) run() {
defer h.shutdownLogger()

View File

@@ -0,0 +1,52 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package docker
import (
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/testutil"
"github.com/shoenig/test/must"
)
func Test_dockerCgroup(t *testing.T) {
testutil.RequireRoot(t)
ci.Parallel(t)
t.Run("preset", func(t *testing.T) {
testutil.CgroupsCompatible(t)
h := new(taskHandle)
h.containerCgroup = "/some/preset"
result := h.dockerCgroup()
must.Eq(t, "/some/preset", result)
})
t.Run("v1", func(t *testing.T) {
testutil.CgroupsCompatibleV1(t)
h := new(taskHandle)
h.containerID = "abc123"
result := h.dockerCgroup()
must.Eq(t, "/sys/fs/cgroup/cpuset/docker/abc123", result)
})
t.Run("v2-systemd", func(t *testing.T) {
testutil.CgroupsCompatibleV2(t)
h := new(taskHandle)
h.containerID = "abc123"
result := h.dockerCgroup()
must.Eq(t, "/sys/fs/cgroup/system.slice/docker-abc123.scope", result)
})
t.Run("v2-cgroupfs", func(t *testing.T) {
testutil.CgroupsCompatibleV2(t)
h := new(taskHandle)
h.containerID = "abc123"
h.dockerCGroupDriver = "cgroupfs"
result := h.dockerCgroup()
must.Eq(t, "/sys/fs/cgroup/docker/abc123", result)
})
}