mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
drivers/executor: set oom_score_adj for raw_exec (#19515)
* drivers/executor: set oom_score_adj for raw_exec This might not be wholly true since I don't know all configurations of Nomad, but in our use cases, we run some of our tasks as `raw_exec` for reasons. We observed that our tasks were running with `oom_score_adj = -1000`, which prevents them from being OOM'd. This value is being inherited from the nomad agent parent process, as configured by systemd. Similar to #10698, we also were shocked to have this value inherited down to every child process and believe that we should also set this value to 0 explicitly. I have no idea if there are other paths that might leverage this or other ways that `raw_exec` can manifest, but this is how I was able to observe and fix in one of our configurations. We have been running in production our tasks wrapped in a script that does: `echo 0 > /proc/self/oom_score_adj` to avoid this issue. * drivers/executor: minor cleanup of setting oom adjustment * e2e: add test for raw_exec oom adjust score * e2e: set oom score adjust to -999 * cl: add cl --------- Co-authored-by: Seth Hoenig <shoenig@duck.com>
This commit is contained in:
@@ -5,6 +5,7 @@ package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"syscall"
|
||||
@@ -112,6 +113,11 @@ func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) {
|
||||
func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) {
|
||||
cgroup := command.StatsCgroup()
|
||||
|
||||
// ensure tasks do not inherit Nomad agent oom_score_adj value
|
||||
if err := e.setOomAdj(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// cgCleanup will be called after the task has been launched
|
||||
// v1: remove the executor process from the task's cgroups
|
||||
// v2: let go of the file descriptor of the task's cgroup
|
||||
@@ -244,6 +250,14 @@ func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) {
|
||||
_ = ed.Write("cpuset.cpus", cpusetCpus)
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) setOomAdj() error {
|
||||
// children should not inherit Nomad agent oom_score_adj value
|
||||
//
|
||||
// /proc/self/oom_score_adj should work on both cgroups v1 and v2 systems
|
||||
// range is -1000 to 1000; 0 is the default
|
||||
return os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644)
|
||||
}
|
||||
|
||||
func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 {
|
||||
cpuShares := command.Resources.LinuxResources.CPUShares
|
||||
cpuWeight := cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares))
|
||||
|
||||
Reference in New Issue
Block a user