mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
Add OOM detection for exec driver (#19563)
* Add OomKilled field to executor proto format * Teach linux executor to detect and report OOMs * Teach exec driver to propagate OOMKill information * Fix data race * use tail /dev/zero to create oom condition * use new test framework * minor tweaks to executor test * add cl entry * remove type conversion --------- Co-authored-by: Marvin Chin <marvinchin@users.noreply.github.com> Co-authored-by: Seth Hoenig <shoenig@duck.com>
This commit is contained in:
@@ -552,8 +552,9 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
|
||||
}
|
||||
} else {
|
||||
result = &drivers.ExitResult{
|
||||
ExitCode: ps.ExitCode,
|
||||
Signal: ps.Signal,
|
||||
ExitCode: ps.ExitCode,
|
||||
Signal: ps.Signal,
|
||||
OOMKilled: ps.OOMKilled,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ import (
|
||||
"github.com/hashicorp/nomad/plugins/drivers"
|
||||
dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -788,6 +789,48 @@ func TestExecDriver_NoPivotRoot(t *testing.T) {
|
||||
require.NoError(t, harness.DestroyTask(task.ID, true))
|
||||
}
|
||||
|
||||
func TestExecDriver_OOMKilled(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
ctestutils.ExecCompatible(t)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
d := newExecDriverTest(t, ctx)
|
||||
harness := dtestutil.NewDriverHarness(t, d)
|
||||
allocID := uuid.Generate()
|
||||
name := "oom-killed"
|
||||
task := &drivers.TaskConfig{
|
||||
AllocID: allocID,
|
||||
ID: uuid.Generate(),
|
||||
Name: name,
|
||||
Resources: testResources(allocID, name),
|
||||
}
|
||||
task.Resources.LinuxResources.MemoryLimitBytes = 10 * 1024 * 1024
|
||||
task.Resources.NomadResources.Memory.MemoryMB = 10
|
||||
|
||||
tc := &TaskConfig{
|
||||
Command: "/bin/tail",
|
||||
Args: []string{"/dev/zero"},
|
||||
}
|
||||
must.NoError(t, task.EncodeConcreteDriverConfig(&tc))
|
||||
|
||||
cleanup := harness.MkAllocDir(task, false)
|
||||
defer cleanup()
|
||||
|
||||
handle, _, err := harness.StartTask(task)
|
||||
must.NoError(t, err)
|
||||
|
||||
ch, err := harness.WaitTask(context.Background(), handle.Config.ID)
|
||||
must.NoError(t, err)
|
||||
result := <-ch
|
||||
must.False(t, result.Successful(), must.Sprint("container should OOM"))
|
||||
must.True(t, result.OOMKilled, must.Sprintf("got non-OOM error, code: %d, err: %v", result.ExitCode, result.Err))
|
||||
|
||||
t.Logf("Successfully killed by OOM killer")
|
||||
must.NoError(t, harness.DestroyTask(task.ID, true))
|
||||
}
|
||||
|
||||
func TestDriver_Config_validate(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
t.Run("pid/ipc", func(t *testing.T) {
|
||||
|
||||
@@ -76,7 +76,6 @@ func (h *taskHandle) run() {
|
||||
h.procState = drivers.TaskStateExited
|
||||
h.exitResult.ExitCode = ps.ExitCode
|
||||
h.exitResult.Signal = ps.Signal
|
||||
h.exitResult.OOMKilled = ps.OOMKilled
|
||||
h.completedAt = ps.Time
|
||||
|
||||
// TODO: detect if the task OOMed
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user