mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
[gh-6980] Client: clean up old allocs before running new ones using the exec task driver. (#20500)
Whenever the "exec" task driver is being used, nomad runs a plug in that in time runs the task on a container under the hood. If by any circumstance the executor is killed, the task is reparented to the init service and wont be stopped by Nomad in case of a job updated or stop. This commit introduces two mechanisms to avoid this behaviour: * Adds signal catching and handling to the executor, so in case of a SIGTERM, the signal will also be passed on to the task. * Adds a pre start clean up of the processes in the container, ensuring only the ones the executor runs are present at any given time.
This commit is contained in:
committed by
GitHub
parent
5b328d9adc
commit
169818b1bd
@@ -7,10 +7,12 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -27,6 +29,7 @@ import (
|
||||
tu "github.com/hashicorp/nomad/testutil"
|
||||
lconfigs "github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/shoenig/test"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -856,3 +859,117 @@ func TestExecCommand_getCgroupOr_v1_relative(t *testing.T) {
|
||||
result2 := ec.getCgroupOr("cpuset", "/sys/fs/cgroup/cpuset/nomad/abc123")
|
||||
must.Eq(t, result2, "/sys/fs/cgroup/cpuset/custom/path")
|
||||
}
|
||||
|
||||
func createCGroup(fullpath string) (cgroupslib.Interface, error) {
|
||||
if err := os.MkdirAll(fullpath, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroupslib.OpenPath(fullpath), nil
|
||||
}
|
||||
|
||||
func TestExecutor_CleanOldProcessesInCGroup(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
testutil.ExecCompatible(t)
|
||||
testutil.CgroupsCompatible(t)
|
||||
|
||||
testExecCmd := testExecutorCommandWithChroot(t)
|
||||
|
||||
allocDir := testExecCmd.allocDir
|
||||
defer allocDir.Destroy()
|
||||
|
||||
fullCGroupPath := testExecCmd.command.Resources.LinuxResources.CpusetCgroupPath
|
||||
|
||||
execCmd := testExecCmd.command
|
||||
execCmd.Cmd = "/bin/sleep"
|
||||
execCmd.Args = []string{"1"}
|
||||
execCmd.ResourceLimits = true
|
||||
execCmd.ModePID = "private"
|
||||
execCmd.ModeIPC = "private"
|
||||
|
||||
// Create the CGroup the executor's command will run in and populate it with one process
|
||||
cgInterface, err := createCGroup(fullCGroupPath)
|
||||
must.NoError(t, err)
|
||||
|
||||
cmd := exec.Command("/bin/sleep", "3000")
|
||||
err = cmd.Start()
|
||||
must.NoError(t, err)
|
||||
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
//This process will be killed by the executor as a prerequisite to run
|
||||
// the executors command.
|
||||
must.Error(t, err)
|
||||
}()
|
||||
|
||||
pid := cmd.Process.Pid
|
||||
must.Positive(t, pid)
|
||||
|
||||
err = cgInterface.Write("cgroup.procs", strconv.Itoa(pid))
|
||||
must.NoError(t, err)
|
||||
|
||||
pids, err := cgInterface.PIDs()
|
||||
must.NoError(t, err)
|
||||
must.One(t, pids.Size())
|
||||
|
||||
// Run the executor normally and make sure the process that was originally running
|
||||
// as part of the CGroup was killed, and only the executor's process is running.
|
||||
execInterface := NewExecutorWithIsolation(testlog.HCLogger(t), compute)
|
||||
executor := execInterface.(*LibcontainerExecutor)
|
||||
defer executor.Shutdown("SIGKILL", 0)
|
||||
|
||||
ps, err := executor.Launch(execCmd)
|
||||
must.NoError(t, err)
|
||||
must.Positive(t, ps.Pid)
|
||||
|
||||
pids, err = cgInterface.PIDs()
|
||||
must.NoError(t, err)
|
||||
must.One(t, pids.Size())
|
||||
must.True(t, pids.Contains(ps.Pid))
|
||||
must.False(t, pids.Contains(pid))
|
||||
|
||||
estate, err := executor.Wait(context.Background())
|
||||
must.NoError(t, err)
|
||||
must.Zero(t, estate.ExitCode)
|
||||
|
||||
must.NoError(t, executor.Shutdown("", 0))
|
||||
executor.Wait(context.Background())
|
||||
}
|
||||
|
||||
func TestExecutor_SignalCatching(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
testutil.ExecCompatible(t)
|
||||
testutil.CgroupsCompatible(t)
|
||||
|
||||
testExecCmd := testExecutorCommandWithChroot(t)
|
||||
|
||||
allocDir := testExecCmd.allocDir
|
||||
defer allocDir.Destroy()
|
||||
|
||||
execCmd := testExecCmd.command
|
||||
execCmd.Cmd = "/bin/sleep"
|
||||
execCmd.Args = []string{"100"}
|
||||
execCmd.ResourceLimits = true
|
||||
execCmd.ModePID = "private"
|
||||
execCmd.ModeIPC = "private"
|
||||
|
||||
execInterface := NewExecutorWithIsolation(testlog.HCLogger(t), compute)
|
||||
|
||||
ps, err := execInterface.Launch(execCmd)
|
||||
must.NoError(t, err)
|
||||
must.Positive(t, ps.Pid)
|
||||
|
||||
executor := execInterface.(*LibcontainerExecutor)
|
||||
status, err := executor.container.OCIState()
|
||||
must.NoError(t, err)
|
||||
must.Eq(t, specs.StateRunning, status.Status)
|
||||
|
||||
executor.sigChan <- syscall.SIGTERM
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
status, err = executor.container.OCIState()
|
||||
must.NoError(t, err)
|
||||
must.Eq(t, specs.StateStopped, status.Status)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user