diff --git a/.changelog/25963.txt b/.changelog/25963.txt new file mode 100644 index 000000000..11ba5e3fd --- /dev/null +++ b/.changelog/25963.txt @@ -0,0 +1,3 @@ +```release-note:bug +driver: Allow resources.cpu values above the maximum cpu.share value on Linux +``` diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index 6f912df32..ab57420ff 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -948,6 +948,22 @@ func memoryLimits(driverHardLimitMB int64, taskMemory drivers.MemoryResources) ( return hard * 1024 * 1024, softBytes } +// maxCPUShares is the maximum value for cpu_shares in cgroups v1 +// https://github.com/torvalds/linux/blob/v6.15/kernel/sched/sched.h#L503 +const maxCPUShares = 262_144 + +// cpuResources normalizes the requested CPU shares when the total compute +// available on the node is larger than the largest share value allowed by the +// kernel. On cgroups v2, Docker will re-normalize this to be within the +// acceptable range for cpu.weight [1-10000]. +func (d *Driver) cpuResources(requested int64) int64 { + if d.compute.TotalCompute < maxCPUShares { + return requested + } + + return int64(float64(requested) / float64(d.compute.TotalCompute) * maxCPUShares) +} + func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, imageID string) (createContainerOptions, error) { @@ -1027,6 +1043,8 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T pidsLimit = driverConfig.PidsLimit } + cpuShares := d.cpuResources(task.Resources.LinuxResources.CPUShares) + hostConfig := &containerapi.HostConfig{ // do not set cgroup parent anymore @@ -1048,7 +1066,7 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T hostConfig.Resources = containerapi.Resources{ Memory: memory, // hard limit MemoryReservation: memoryReservation, // soft limit - CPUShares: task.Resources.LinuxResources.CPUShares, + CPUShares: cpuShares, CpusetCpus: task.Resources.LinuxResources.CpusetCpus, PidsLimit: &pidsLimit, } diff --git a/drivers/docker/driver_linux_test.go b/drivers/docker/driver_linux_test.go index cd2ba26f3..fe79fece5 100644 --- a/drivers/docker/driver_linux_test.go +++ b/drivers/docker/driver_linux_test.go @@ -114,3 +114,22 @@ func TestDockerDriver_PidsLimit(t *testing.T) { wait.Gap(50*time.Millisecond), )) } + +func TestDockerDriver_NormalizeCPUShares(t *testing.T) { + dh := dockerDriverHarness(t, nil) + driver := dh.Impl().(*Driver) + driver.compute.TotalCompute = 12000 + + must.Eq(t, maxCPUShares, driver.cpuResources(maxCPUShares)) + must.Eq(t, 1000, driver.cpuResources(1000)) + + driver.compute.TotalCompute = maxCPUShares + must.Eq(t, maxCPUShares, driver.cpuResources(maxCPUShares)) + + driver.compute.TotalCompute = maxCPUShares + 1 + must.Eq(t, 262143, driver.cpuResources(maxCPUShares)) + + driver.compute.TotalCompute = maxCPUShares * 2 + must.Eq(t, 500, driver.cpuResources(1000)) + must.Eq(t, maxCPUShares/2, driver.cpuResources(maxCPUShares)) +} diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 215c5224c..fffc18b53 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -900,13 +900,15 @@ func (l *LibcontainerExecutor) clampCpuShares(shares int64) int64 { ) return MinCPUShares } - if shares > MaxCPUShares { - l.logger.Warn( - "task CPU is greater than maximum allowed, using maximum value instead", - "task_cpu", shares, "max", MaxCPUShares, - ) - return MaxCPUShares + + // Normalize the requested CPU shares when the total compute available on + // the node is larger than the largest share value allowed by the kernel. On + // cgroups v2 we'll later re-normalize this to be within the acceptable + // range for cpu.weight [1-10000]. + if l.compute.TotalCompute >= MaxCPUShares { + return int64(float64(shares) / float64(l.compute.TotalCompute) * MaxCPUShares) } + return shares } diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index 8e997670b..438311f67 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -20,6 +20,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/lib/cgroupslib" + "github.com/hashicorp/nomad/client/lib/cpustats" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/drivers/shared/capabilities" @@ -1071,3 +1072,24 @@ func TestCgroupDeviceRules(t *testing.T) { Allow: true, }) } + +func TestExecutor_clampCPUShares(t *testing.T) { + + le := &LibcontainerExecutor{ + logger: testlog.HCLogger(t), + compute: cpustats.Compute{TotalCompute: 12000}, + } + + must.Eq(t, MaxCPUShares, le.clampCpuShares(MaxCPUShares)) + must.Eq(t, 1000, le.clampCpuShares(1000)) + + le.compute.TotalCompute = MaxCPUShares + must.Eq(t, MaxCPUShares, le.clampCpuShares(MaxCPUShares)) + + le.compute.TotalCompute = MaxCPUShares + 1 + must.Eq(t, 262143, le.clampCpuShares(MaxCPUShares)) + + le.compute = cpustats.Compute{TotalCompute: MaxCPUShares * 2} + must.Eq(t, 500, le.clampCpuShares(1000)) + must.Eq(t, MaxCPUShares/2, le.clampCpuShares(MaxCPUShares)) +}