From c8dcd3c2dbee57543e5f30667a99085fd35653e7 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 19 Jun 2025 13:48:06 -0400 Subject: [PATCH] docker: clamp CPU shares to minimum of 2 (#26081) In #25963 we added normalization of CPU shares for large hosts where the total compute was larger than the maximum CPU shares. But if the result after normalization is less than 2, runc will have an integer overflow. We prevent this in the shared executor for the `exec`/`rawexec` driver by clamping to the safe minimum value. Do this for the `docker` driver as well and add test coverage of it for the shared executor too. Fixes: https://github.com/hashicorp/nomad/issues/26080 Ref: https://github.com/hashicorp/nomad/pull/25963 --- .changelog/26081.txt | 3 +++ drivers/docker/driver.go | 10 +++++++++- drivers/docker/driver_linux_test.go | 6 ++++++ drivers/shared/executor/executor_linux_test.go | 3 +++ 4 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 .changelog/26081.txt diff --git a/.changelog/26081.txt b/.changelog/26081.txt new file mode 100644 index 000000000..16259f821 --- /dev/null +++ b/.changelog/26081.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: Fixed a bug where very low resources.cpu values could generate invalid cpu weights on hosts with very large client.cpu_total_compute values +``` diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index 102d0947d..6fb6a6413 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -951,17 +951,25 @@ func memoryLimits(driverHardLimitMB int64, taskMemory drivers.MemoryResources) ( // maxCPUShares is the maximum value for cpu_shares in cgroups v1 // https://github.com/torvalds/linux/blob/v6.15/kernel/sched/sched.h#L503 const maxCPUShares = 262_144 +const minCPUShares = 2 // cpuResources normalizes the requested CPU shares when the total compute // available on the node is larger than the largest share value allowed by the // kernel. On cgroups v2, Docker will re-normalize this to be within the // acceptable range for cpu.weight [1-10000]. func (d *Driver) cpuResources(requested int64) int64 { + if requested < minCPUShares { + return minCPUShares + } if d.compute.TotalCompute < maxCPUShares { return requested } - return int64(float64(requested) / float64(d.compute.TotalCompute) * maxCPUShares) + result := int64(float64(requested) / float64(d.compute.TotalCompute) * maxCPUShares) + if result < minCPUShares { + return minCPUShares + } + return result } func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, diff --git a/drivers/docker/driver_linux_test.go b/drivers/docker/driver_linux_test.go index fe79fece5..3f6d3bd19 100644 --- a/drivers/docker/driver_linux_test.go +++ b/drivers/docker/driver_linux_test.go @@ -129,6 +129,12 @@ func TestDockerDriver_NormalizeCPUShares(t *testing.T) { driver.compute.TotalCompute = maxCPUShares + 1 must.Eq(t, 262143, driver.cpuResources(maxCPUShares)) + driver.compute.TotalCompute = maxCPUShares + 1 + must.Eq(t, 2, driver.cpuResources(2)) + + driver.compute.TotalCompute = maxCPUShares + 1 + must.Eq(t, 2, driver.cpuResources(1)) + driver.compute.TotalCompute = maxCPUShares * 2 must.Eq(t, 500, driver.cpuResources(1000)) must.Eq(t, maxCPUShares/2, driver.cpuResources(maxCPUShares)) diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index 438311f67..9dc94487f 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -1089,6 +1089,9 @@ func TestExecutor_clampCPUShares(t *testing.T) { le.compute.TotalCompute = MaxCPUShares + 1 must.Eq(t, 262143, le.clampCpuShares(MaxCPUShares)) + le.compute.TotalCompute = MaxCPUShares + 1 + must.Eq(t, 2, le.clampCpuShares(1)) + le.compute = cpustats.Compute{TotalCompute: MaxCPUShares * 2} must.Eq(t, 500, le.clampCpuShares(1000)) must.Eq(t, MaxCPUShares/2, le.clampCpuShares(MaxCPUShares))