drivers/executor: set oom_score_adj for raw_exec (#19515)

* drivers/executor: set oom_score_adj for raw_exec

This might not be wholly true since I don't know all configurations of
Nomad, but in our use cases, we run some of our tasks as `raw_exec` for
reasons.

We observed that our tasks were running with `oom_score_adj = -1000`,
which prevents them from being OOM'd. This value is being inherited from
the nomad agent parent process, as configured by systemd.

Similar to #10698, we also were shocked to have this value inherited
down to every child process and believe that we should also set this
value to 0 explicitly.

I have no idea if there are other paths that might leverage this or
other ways that `raw_exec` can manifest, but this is how I was able to
observe and fix in one of our configurations.

We have been running in production our tasks wrapped in a script that
does: `echo 0 > /proc/self/oom_score_adj` to avoid this issue.

* drivers/executor: minor cleanup of setting oom adjustment

* e2e: add test for raw_exec oom adjust score

* e2e: set oom score adjust to -999

* cl: add cl

---------

Co-authored-by: Seth Hoenig <shoenig@duck.com>
This commit is contained in:
Matt Robenolt
2024-01-02 11:35:09 -08:00
committed by GitHub
parent c06f804cea
commit 656bb5cafa
6 changed files with 84 additions and 0 deletions

3
.changelog/19515.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
rawexec: Fixed a bug where oom_score_adj would be inherited from Nomad client
```

View File

@@ -5,6 +5,7 @@ package executor
import (
"fmt"
"os"
"os/exec"
"strconv"
"syscall"
@@ -112,6 +113,11 @@ func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) {
func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) {
cgroup := command.StatsCgroup()
// ensure tasks do not inherit Nomad agent oom_score_adj value
if err := e.setOomAdj(); err != nil {
return nil, err
}
// cgCleanup will be called after the task has been launched
// v1: remove the executor process from the task's cgroups
// v2: let go of the file descriptor of the task's cgroup
@@ -244,6 +250,14 @@ func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) {
_ = ed.Write("cpuset.cpus", cpusetCpus)
}
func (e *UniversalExecutor) setOomAdj() error {
// children should not inherit Nomad agent oom_score_adj value
//
// /proc/self/oom_score_adj should work on both cgroups v1 and v2 systems
// range is -1000 to 1000; 0 is the default
return os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644)
}
func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 {
cpuShares := command.Resources.LinuxResources.CPUShares
cpuWeight := cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares))

5
e2e/rawexec/doc.go Normal file
View File

@@ -0,0 +1,5 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
// Package rawexec tests the raw_exec task driver.
package rawexec

View File

@@ -0,0 +1,32 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
job "oomadj" {
type = "batch"
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}
group "group" {
reschedule {
attempts = 0
unlimited = false
}
restart {
attempts = 0
mode = "fail"
}
task "cat" {
driver = "raw_exec"
config {
command = "cat"
args = ["/proc/self/oom_score_adj"]
}
}
}
}

View File

@@ -0,0 +1,29 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package rawexec
import (
"testing"
"github.com/hashicorp/nomad/e2e/v3/cluster3"
"github.com/hashicorp/nomad/e2e/v3/jobs3"
"github.com/shoenig/test/must"
)
func TestRawExec(t *testing.T) {
cluster3.Establish(t,
cluster3.Leader(),
cluster3.LinuxClients(1),
)
t.Run("testOomAdj", testOomAdj)
}
func testOomAdj(t *testing.T) {
job, cleanup := jobs3.Submit(t, "./input/oomadj.hcl")
t.Cleanup(cleanup)
logs := job.TaskLogs("group", "cat")
must.StrContains(t, logs.Stdout, "0")
}

View File

@@ -17,6 +17,7 @@ LimitNPROC=infinity
TasksMax=infinity
Restart=on-failure
RestartSec=2
OOMScoreAdjust=-999
[Install]
WantedBy=multi-user.target