exec: don't panic on rootless raw_exec tasks (#26401)

the executor dies, leaving an orphaned process still running.

the panic fix:
 * don't `panic()`
 * and return an empty, but non-nil, func on cgroup error

feature fix:
 * allow non-root agent to proceed with exec when cgroups are off
This commit is contained in:
Daniel Bennett
2025-08-04 13:58:35 -04:00
committed by GitHub
parent 9859f4a140
commit 7c633f8109
4 changed files with 38 additions and 24 deletions

3
.changelog/26401.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
alloc exec: Fixed executor panic when exec-ing a rootless raw_exec task
```

View File

@@ -5,6 +5,7 @@ package executor
import (
"context"
"errors"
"fmt"
"io"
"os"
@@ -49,6 +50,9 @@ var (
// The statistics the basic executor exposes
ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
// ErrCgroupMustBeSet occurs if a cgroup is not provided when expected
ErrCgroupMustBeSet = errors.New("cgroup must be set")
)
// Executor is the interface which allows a driver to launch and supervise
@@ -441,7 +445,7 @@ func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string)
defer cancel()
if cleanup, err := e.setSubCmdCgroup(&e.childCmd, e.command.StatsCgroup()); err != nil {
return nil, 0, err
return nil, 0, fmt.Errorf("Exec: %w", err)
} else {
defer cleanup()
}
@@ -533,7 +537,7 @@ func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string,
}
cgroup := e.command.StatsCgroup()
if cleanup, err := e.setSubCmdCgroup(cmd, cgroup); err != nil {
return err
return fmt.Errorf("ExecStreaming: %w", err)
} else {
defer cleanup()
}

View File

@@ -7,7 +7,6 @@ package executor
import (
"context"
"errors"
"fmt"
"io"
"os"
@@ -766,7 +765,7 @@ func (l *LibcontainerExecutor) configureCgroups(cfg *runc.Config, command *ExecC
cg := command.StatsCgroup()
if cg == "" {
return errors.New("cgroup must be set")
return fmt.Errorf("configureCgroups: %w", ErrCgroupMustBeSet)
}
// // set the libcontainer hook for writing the PID to cgroup.procs file

View File

@@ -30,27 +30,32 @@ const (
// setSubCmdCgroup sets the cgroup for non-Task child processes of the
// executor.Executor (since in cg2 it lives outside the task's cgroup)
func (e *UniversalExecutor) setSubCmdCgroup(cmd *exec.Cmd, cgroup string) (func(), error) {
// no extra setup needed for cg v1 or when cgroups are "off"
switch cgroupslib.GetMode() {
case cgroupslib.OFF, cgroupslib.CG1:
return func() {}, nil
default:
// continue for cg v2
}
if cgroup == "" {
panic("cgroup must be set")
return nil, fmt.Errorf("error setting up exec subcommand: %w", ErrCgroupMustBeSet)
}
fd, cleanup, err := e.statCG(cgroup)
if err != nil {
return nil, err
}
// make sure attrs struct has been set
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = new(syscall.SysProcAttr)
}
cmd.SysProcAttr.UseCgroupFD = true
cmd.SysProcAttr.CgroupFD = fd
switch cgroupslib.GetMode() {
case cgroupslib.CG2:
fd, cleanup, err := e.statCG(cgroup)
if err != nil {
return nil, err
}
cmd.SysProcAttr.UseCgroupFD = true
cmd.SysProcAttr.CgroupFD = fd
return cleanup, nil
default:
return func() {}, nil
}
return cleanup, nil
}
func (e *UniversalExecutor) ListProcesses() set.Collection[procstats.ProcessID] {
@@ -93,11 +98,8 @@ func (e *UniversalExecutor) configureResourceContainer(
) (runningFunc, cleanupFunc, error) {
cgroup := command.StatsCgroup()
// ensure tasks get the desired oom_score_adj value set
if err := e.setOomAdj(command.OOMScoreAdj); err != nil {
return nil, nil, err
}
// we specify these return funcs as empty but non-nil,
// because callers may call them even if this function errors.
// deleteCgroup will be called after the task has been launched
// v1: remove the executor process from the task's cgroups
// v2: let go of the file descriptor of the task's cgroup
@@ -106,6 +108,11 @@ func (e *UniversalExecutor) configureResourceContainer(
moveProcess = func() error { return nil }
)
// ensure tasks get the desired oom_score_adj value set
if err := e.setOomAdj(command.OOMScoreAdj); err != nil {
return moveProcess, deleteCgroup, err
}
// manually configure cgroup for cpu / memory constraints
switch cgroupslib.GetMode() {
case cgroupslib.CG1:
@@ -113,9 +120,10 @@ func (e *UniversalExecutor) configureResourceContainer(
return moveProcess, deleteCgroup, err
}
moveProcess, deleteCgroup = e.enterCG1(cgroup, command.CpusetCgroup())
case cgroupslib.OFF:
deleteCgroup = func() {}
moveProcess = func() error { return nil }
// do nothing
default:
e.configureCG2(cgroup, command)
// configure child process to spawn in the cgroup