mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
cpuset fixer: use correct cgroup path for updates (#20276)
* cpuset fixer: use correct cgroup path for updates fixes #20275 * docker: flatten switch statement and add test cases * cl: add cl --------- Co-authored-by: Seth Hoenig <shoenig@duck.com>
This commit is contained in:
3
.changelog/20276.txt
Normal file
3
.changelog/20276.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
docker: Fixed a bug where cpuset would not be updated on cgroup v2 systems using cgroupfs
|
||||
```
|
||||
@@ -230,6 +230,11 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
|
||||
return fmt.Errorf("failed to get docker client: %w", err)
|
||||
}
|
||||
|
||||
dockerInfo, err := dockerClient.Info()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch docker daemon info: %v", err)
|
||||
}
|
||||
|
||||
infinityClient, err := d.getInfinityClient()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get docker long operations client: %w", err)
|
||||
@@ -244,6 +249,7 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
|
||||
|
||||
h := &taskHandle{
|
||||
dockerClient: dockerClient,
|
||||
dockerCGroupDriver: dockerInfo.CgroupDriver,
|
||||
infinityClient: infinityClient,
|
||||
logger: d.logger.With("container_id", container.ID),
|
||||
task: handle.Config,
|
||||
@@ -323,6 +329,11 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
|
||||
return nil, nil, fmt.Errorf("Failed to create docker client: %v", err)
|
||||
}
|
||||
|
||||
dockerInfo, err := dockerClient.Info()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to fetch docker daemon info: %v", err)
|
||||
}
|
||||
|
||||
// and also the long operations client
|
||||
infinityClient, err := d.getInfinityClient()
|
||||
if err != nil {
|
||||
@@ -433,6 +444,7 @@ CREATE:
|
||||
// Return a driver handle
|
||||
h := &taskHandle{
|
||||
dockerClient: dockerClient,
|
||||
dockerCGroupDriver: dockerInfo.CgroupDriver,
|
||||
infinityClient: infinityClient,
|
||||
dlogger: dlogger,
|
||||
dloggerPluginClient: pluginClient,
|
||||
|
||||
@@ -29,6 +29,8 @@ type taskHandle struct {
|
||||
// for all calls that aren't Wait() or Stop() (and their variations).
|
||||
dockerClient *docker.Client
|
||||
|
||||
dockerCGroupDriver string
|
||||
|
||||
// infinityClient is useful for
|
||||
// - the Wait docker API call(s) (no limit on container lifetime)
|
||||
// - the Stop docker API call(s) (context with task kill_timeout required)
|
||||
@@ -254,27 +256,35 @@ func (h *taskHandle) startCpusetFixer() {
|
||||
return
|
||||
}
|
||||
|
||||
cgroup := h.containerCgroup
|
||||
if cgroup == "" {
|
||||
// The api does not actually set this value, so we are left to compute it ourselves.
|
||||
// Luckily this is documented,
|
||||
// https://docs.docker.com/config/containers/runmetrics/#find-the-cgroup-for-a-given-container
|
||||
switch cgroupslib.GetMode() {
|
||||
case cgroupslib.CG1:
|
||||
cgroup = "/sys/fs/cgroup/cpuset/docker/" + h.containerID
|
||||
default:
|
||||
// systemd driver; not sure if we need to consider cgroupfs driver
|
||||
cgroup = "/sys/fs/cgroup/system.slice/docker-" + h.containerID + ".scope"
|
||||
}
|
||||
}
|
||||
|
||||
go (&cpuset{
|
||||
doneCh: h.doneCh,
|
||||
source: h.task.Resources.LinuxResources.CpusetCgroupPath,
|
||||
destination: cgroup,
|
||||
destination: h.dockerCgroup(),
|
||||
}).watch()
|
||||
}
|
||||
|
||||
// dockerCgroup returns the path to the cgroup docker will use for the container.
|
||||
//
|
||||
// The api does not provide this value, so we are left to compute it ourselves.
|
||||
//
|
||||
// https://docs.docker.com/config/containers/runmetrics/#find-the-cgroup-for-a-given-container
|
||||
func (h *taskHandle) dockerCgroup() string {
|
||||
cgroup := h.containerCgroup
|
||||
if cgroup == "" {
|
||||
mode := cgroupslib.GetMode()
|
||||
usingCgroupfs := h.dockerCGroupDriver == "cgroupfs"
|
||||
switch {
|
||||
case mode == cgroupslib.CG1:
|
||||
cgroup = "/sys/fs/cgroup/cpuset/docker/" + h.containerID
|
||||
case mode == cgroupslib.CG2 && usingCgroupfs:
|
||||
cgroup = "/sys/fs/cgroup/docker/" + h.containerID
|
||||
default:
|
||||
cgroup = "/sys/fs/cgroup/system.slice/docker-" + h.containerID + ".scope"
|
||||
}
|
||||
}
|
||||
return cgroup
|
||||
}
|
||||
|
||||
func (h *taskHandle) run() {
|
||||
defer h.shutdownLogger()
|
||||
|
||||
|
||||
52
drivers/docker/handle_test.go
Normal file
52
drivers/docker/handle_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/ci"
|
||||
"github.com/hashicorp/nomad/client/testutil"
|
||||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
func Test_dockerCgroup(t *testing.T) {
|
||||
testutil.RequireRoot(t)
|
||||
|
||||
ci.Parallel(t)
|
||||
|
||||
t.Run("preset", func(t *testing.T) {
|
||||
testutil.CgroupsCompatible(t)
|
||||
|
||||
h := new(taskHandle)
|
||||
h.containerCgroup = "/some/preset"
|
||||
result := h.dockerCgroup()
|
||||
must.Eq(t, "/some/preset", result)
|
||||
})
|
||||
|
||||
t.Run("v1", func(t *testing.T) {
|
||||
testutil.CgroupsCompatibleV1(t)
|
||||
h := new(taskHandle)
|
||||
h.containerID = "abc123"
|
||||
result := h.dockerCgroup()
|
||||
must.Eq(t, "/sys/fs/cgroup/cpuset/docker/abc123", result)
|
||||
})
|
||||
|
||||
t.Run("v2-systemd", func(t *testing.T) {
|
||||
testutil.CgroupsCompatibleV2(t)
|
||||
h := new(taskHandle)
|
||||
h.containerID = "abc123"
|
||||
result := h.dockerCgroup()
|
||||
must.Eq(t, "/sys/fs/cgroup/system.slice/docker-abc123.scope", result)
|
||||
})
|
||||
|
||||
t.Run("v2-cgroupfs", func(t *testing.T) {
|
||||
testutil.CgroupsCompatibleV2(t)
|
||||
h := new(taskHandle)
|
||||
h.containerID = "abc123"
|
||||
h.dockerCGroupDriver = "cgroupfs"
|
||||
result := h.dockerCgroup()
|
||||
must.Eq(t, "/sys/fs/cgroup/docker/abc123", result)
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user