mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
exec: don't panic on rootless raw_exec tasks (#26401)
the executor dies, leaving an orphaned process still running. the panic fix: * don't `panic()` * and return an empty, but non-nil, func on cgroup error feature fix: * allow non-root agent to proceed with exec when cgroups are off
This commit is contained in:
3
.changelog/26401.txt
Normal file
3
.changelog/26401.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
alloc exec: Fixed executor panic when exec-ing a rootless raw_exec task
|
||||
```
|
||||
@@ -5,6 +5,7 @@ package executor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -49,6 +50,9 @@ var (
|
||||
// The statistics the basic executor exposes
|
||||
ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
|
||||
ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
|
||||
|
||||
// ErrCgroupMustBeSet occurs if a cgroup is not provided when expected
|
||||
ErrCgroupMustBeSet = errors.New("cgroup must be set")
|
||||
)
|
||||
|
||||
// Executor is the interface which allows a driver to launch and supervise
|
||||
@@ -441,7 +445,7 @@ func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string)
|
||||
defer cancel()
|
||||
|
||||
if cleanup, err := e.setSubCmdCgroup(&e.childCmd, e.command.StatsCgroup()); err != nil {
|
||||
return nil, 0, err
|
||||
return nil, 0, fmt.Errorf("Exec: %w", err)
|
||||
} else {
|
||||
defer cleanup()
|
||||
}
|
||||
@@ -533,7 +537,7 @@ func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string,
|
||||
}
|
||||
cgroup := e.command.StatsCgroup()
|
||||
if cleanup, err := e.setSubCmdCgroup(cmd, cgroup); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("ExecStreaming: %w", err)
|
||||
} else {
|
||||
defer cleanup()
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ package executor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -766,7 +765,7 @@ func (l *LibcontainerExecutor) configureCgroups(cfg *runc.Config, command *ExecC
|
||||
|
||||
cg := command.StatsCgroup()
|
||||
if cg == "" {
|
||||
return errors.New("cgroup must be set")
|
||||
return fmt.Errorf("configureCgroups: %w", ErrCgroupMustBeSet)
|
||||
}
|
||||
|
||||
// // set the libcontainer hook for writing the PID to cgroup.procs file
|
||||
|
||||
@@ -30,27 +30,32 @@ const (
|
||||
// setSubCmdCgroup sets the cgroup for non-Task child processes of the
|
||||
// executor.Executor (since in cg2 it lives outside the task's cgroup)
|
||||
func (e *UniversalExecutor) setSubCmdCgroup(cmd *exec.Cmd, cgroup string) (func(), error) {
|
||||
|
||||
// no extra setup needed for cg v1 or when cgroups are "off"
|
||||
switch cgroupslib.GetMode() {
|
||||
case cgroupslib.OFF, cgroupslib.CG1:
|
||||
return func() {}, nil
|
||||
default:
|
||||
// continue for cg v2
|
||||
}
|
||||
|
||||
if cgroup == "" {
|
||||
panic("cgroup must be set")
|
||||
return nil, fmt.Errorf("error setting up exec subcommand: %w", ErrCgroupMustBeSet)
|
||||
}
|
||||
|
||||
fd, cleanup, err := e.statCG(cgroup)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// make sure attrs struct has been set
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = new(syscall.SysProcAttr)
|
||||
}
|
||||
cmd.SysProcAttr.UseCgroupFD = true
|
||||
cmd.SysProcAttr.CgroupFD = fd
|
||||
|
||||
switch cgroupslib.GetMode() {
|
||||
case cgroupslib.CG2:
|
||||
fd, cleanup, err := e.statCG(cgroup)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmd.SysProcAttr.UseCgroupFD = true
|
||||
cmd.SysProcAttr.CgroupFD = fd
|
||||
return cleanup, nil
|
||||
default:
|
||||
return func() {}, nil
|
||||
}
|
||||
return cleanup, nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) ListProcesses() set.Collection[procstats.ProcessID] {
|
||||
@@ -93,11 +98,8 @@ func (e *UniversalExecutor) configureResourceContainer(
|
||||
) (runningFunc, cleanupFunc, error) {
|
||||
cgroup := command.StatsCgroup()
|
||||
|
||||
// ensure tasks get the desired oom_score_adj value set
|
||||
if err := e.setOomAdj(command.OOMScoreAdj); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// we specify these return funcs as empty but non-nil,
|
||||
// because callers may call them even if this function errors.
|
||||
// deleteCgroup will be called after the task has been launched
|
||||
// v1: remove the executor process from the task's cgroups
|
||||
// v2: let go of the file descriptor of the task's cgroup
|
||||
@@ -106,6 +108,11 @@ func (e *UniversalExecutor) configureResourceContainer(
|
||||
moveProcess = func() error { return nil }
|
||||
)
|
||||
|
||||
// ensure tasks get the desired oom_score_adj value set
|
||||
if err := e.setOomAdj(command.OOMScoreAdj); err != nil {
|
||||
return moveProcess, deleteCgroup, err
|
||||
}
|
||||
|
||||
// manually configure cgroup for cpu / memory constraints
|
||||
switch cgroupslib.GetMode() {
|
||||
case cgroupslib.CG1:
|
||||
@@ -113,9 +120,10 @@ func (e *UniversalExecutor) configureResourceContainer(
|
||||
return moveProcess, deleteCgroup, err
|
||||
}
|
||||
moveProcess, deleteCgroup = e.enterCG1(cgroup, command.CpusetCgroup())
|
||||
|
||||
case cgroupslib.OFF:
|
||||
deleteCgroup = func() {}
|
||||
moveProcess = func() error { return nil }
|
||||
// do nothing
|
||||
|
||||
default:
|
||||
e.configureCG2(cgroup, command)
|
||||
// configure child process to spawn in the cgroup
|
||||
|
||||
Reference in New Issue
Block a user