drivers: set -1 exit code in case executor gets killed (#25453)

Nomad driver handles incorrectly set exit code 0 in case of executor failure.
This corrects that behavior.

---------

Co-authored-by: Tim Gross <tgross@hashicorp.com>
This commit is contained in:
Piotr Kazmierczak
2025-03-20 15:06:39 +01:00
committed by GitHub
parent b3f28f9387
commit cb8f4ea452
6 changed files with 83 additions and 0 deletions

3
.changelog/25453.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
drivers: set -1 exit code in case of executor failure for the exec, raw_exec, java, and qemu task drivers
```

View File

@@ -595,6 +595,12 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
result = &drivers.ExitResult{
Err: fmt.Errorf("executor: error waiting on process: %v", err),
}
// if process state is nil, we've probably been killed, so return a reasonable
// exit state to the handlers
if ps == nil {
result.ExitCode = -1
result.OOMKilled = false
}
} else {
result = &drivers.ExitResult{
ExitCode: ps.ExitCode,

View File

@@ -604,6 +604,12 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
result = &drivers.ExitResult{
Err: fmt.Errorf("executor: error waiting on process: %v", err),
}
// if process state is nil, we've probably been killed, so return a reasonable
// exit state to the handlers
if ps == nil {
result.ExitCode = -1
result.OOMKilled = false
}
} else {
result = &drivers.ExitResult{
ExitCode: ps.ExitCode,

View File

@@ -743,6 +743,12 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
result = &drivers.ExitResult{
Err: fmt.Errorf("executor: error waiting on process: %v", err),
}
// if process state is nil, we've probably been killed, so return a reasonable
// exit state to the handlers
if ps == nil {
result.ExitCode = -1
result.OOMKilled = false
}
} else {
result = &drivers.ExitResult{
ExitCode: ps.ExitCode,

View File

@@ -471,6 +471,12 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
result = &drivers.ExitResult{
Err: fmt.Errorf("executor: error waiting on process: %v", err),
}
// if process state is nil, we've probably been killed, so return a reasonable
// exit state to the handlers
if ps == nil {
result.ExitCode = -1
result.OOMKilled = false
}
} else {
result = &drivers.ExitResult{
ExitCode: ps.ExitCode,

View File

@@ -580,3 +580,59 @@ func TestRawExec_Validate(t *testing.T) {
must.Eq(t, tc.exp, d.Validate(tc.driverConfig))
}
}
func TestRawExecDriver_ExecutorKilled_ExitCode(t *testing.T) {
ci.Parallel(t)
clienttestutil.ExecCompatible(t)
d := newEnabledRawExecDriver(t)
harness := dtestutil.NewDriverHarness(t, d)
defer harness.Kill()
allocID := uuid.Generate()
taskName := "sleep"
task := &drivers.TaskConfig{
AllocID: allocID,
ID: uuid.Generate(),
Name: taskName,
Env: defaultEnv(),
Resources: testResources(allocID, taskName),
}
cleanup := harness.MkAllocDir(task, false)
defer cleanup()
tc := &TaskConfig{
Command: testtask.Path(),
Args: []string{"sleep", "10s"},
}
must.NoError(t, task.EncodeConcreteDriverConfig(&tc))
testtask.SetTaskConfigEnv(task)
harness.MakeTaskCgroup(allocID, taskName)
handle, _, err := harness.StartTask(task)
must.NoError(t, err)
// Decode driver state to get executor PID
var driverState TaskState
must.NoError(t, handle.GetDriverState(&driverState))
// Kill the executor and wait until it's gone
pid := driverState.ReattachConfig.Pid
must.NoError(t, err)
must.NoError(t, syscall.Kill(pid, syscall.SIGKILL))
// Make sure the right exit code is set
waitCh, err := harness.WaitTask(context.Background(), task.ID)
must.NoError(t, err)
select {
case res := <-waitCh:
must.False(t, res.Successful())
must.Eq(t, -1, res.ExitCode)
must.Eq(t, false, res.OOMKilled)
case <-time.After(10 * time.Second):
must.Unreachable(t, must.Sprint("exceeded wait timeout"))
}
must.NoError(t, harness.DestroyTask(task.ID, true))
}