mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
Merge pull request #6839 from hashicorp/b-cgroup-cleanup
executor: stop joining executor to container cgroup
This commit is contained in:
@@ -17,7 +17,6 @@ import (
|
||||
"github.com/armon/circbuf"
|
||||
"github.com/hashicorp/consul-template/signals"
|
||||
hclog "github.com/hashicorp/go-hclog"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/stats"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
@@ -27,7 +26,6 @@ import (
|
||||
"github.com/hashicorp/nomad/plugins/drivers"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
lconfigs "github.com/opencontainers/runc/libcontainer/configs"
|
||||
ldevices "github.com/opencontainers/runc/libcontainer/devices"
|
||||
lutils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
@@ -36,7 +34,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
defaultCgroupParent = "nomad"
|
||||
defaultCgroupParent = "/nomad"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -92,15 +90,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro
|
||||
|
||||
l.command = command
|
||||
|
||||
// Move to the root cgroup until process is started
|
||||
subsystems, err := cgroups.GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := JoinRootCgroup(subsystems); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// create a new factory which will store the container state in the allocDir
|
||||
factory, err := libcontainer.New(
|
||||
path.Join(command.TaskDir, "../alloc/container"),
|
||||
@@ -194,15 +183,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Join process cgroups
|
||||
containerState, err := container.State()
|
||||
if err != nil {
|
||||
l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err)
|
||||
}
|
||||
if err := cgroups.EnterPid(containerState.CgroupPaths, os.Getpid()); err != nil {
|
||||
l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err)
|
||||
}
|
||||
|
||||
// start a goroutine to wait on the process to complete, so Wait calls can
|
||||
// be multiplexed
|
||||
l.userProcExited = make(chan interface{})
|
||||
@@ -287,15 +267,6 @@ func (l *LibcontainerExecutor) Shutdown(signal string, grace time.Duration) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
// move executor to root cgroup
|
||||
subsystems, err := cgroups.GetAllSubsystems()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := JoinRootCgroup(subsystems); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
status, err := l.container.Status()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -720,33 +691,40 @@ func configureBasicCgroups(cfg *lconfigs.Config) error {
|
||||
id := uuid.Generate()
|
||||
|
||||
// Manually create freezer cgroup
|
||||
cfg.Cgroups.Paths = map[string]string{}
|
||||
root, err := cgroups.FindCgroupMountpointDir()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return err
|
||||
}
|
||||
subsystem := "freezer"
|
||||
|
||||
freezer := cgroupFs.FreezerGroup{}
|
||||
subsystem := freezer.Name()
|
||||
path, err := cgroups.FindCgroupMountpoint("", subsystem)
|
||||
path, err := getCgroupPathHelper(subsystem, filepath.Join(defaultCgroupParent, id))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err)
|
||||
}
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
path = filepath.Join(root, filepath.Base(path), defaultCgroupParent, id)
|
||||
|
||||
if err = os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg.Cgroups.Paths[subsystem] = path
|
||||
cfg.Cgroups.Paths = map[string]string{
|
||||
subsystem: path,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) {
|
||||
cfg := &lconfigs.Config{
|
||||
Cgroups: &lconfigs.Cgroup{
|
||||
@@ -771,28 +749,6 @@ func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// JoinRootCgroup moves the current process to the cgroups of the init process
|
||||
func JoinRootCgroup(subsystems []string) error {
|
||||
mErrs := new(multierror.Error)
|
||||
paths := map[string]string{}
|
||||
for _, s := range subsystems {
|
||||
mnt, _, err := cgroups.FindCgroupMountpointAndRoot("", s)
|
||||
if err != nil {
|
||||
multierror.Append(mErrs, fmt.Errorf("error getting cgroup path for subsystem: %s", s))
|
||||
continue
|
||||
}
|
||||
|
||||
paths[s] = mnt
|
||||
}
|
||||
|
||||
err := cgroups.EnterPid(paths, os.Getpid())
|
||||
if err != nil {
|
||||
multierror.Append(mErrs, err)
|
||||
}
|
||||
|
||||
return mErrs.ErrorOrNil()
|
||||
}
|
||||
|
||||
// cmdDevices converts a list of driver.DeviceConfigs into excutor.Devices.
|
||||
func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) {
|
||||
if len(devices) == 0 {
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/plugins/drivers"
|
||||
tu "github.com/hashicorp/nomad/testutil"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
lconfigs "github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -217,6 +218,88 @@ func TestExecutor_CgroupPaths(t *testing.T) {
|
||||
}, func(err error) { t.Error(err) })
|
||||
}
|
||||
|
||||
// TestExecutor_CgroupPaths asserts that all cgroups created for a task
|
||||
// are destroyed on shutdown
|
||||
func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
testutil.ExecCompatible(t)
|
||||
|
||||
testExecCmd := testExecutorCommandWithChroot(t)
|
||||
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
|
||||
execCmd.Cmd = "/bin/bash"
|
||||
execCmd.Args = []string{"-c", "sleep 0.2; cat /proc/self/cgroup"}
|
||||
defer allocDir.Destroy()
|
||||
|
||||
execCmd.ResourceLimits = true
|
||||
|
||||
executor := NewExecutorWithIsolation(testlog.HCLogger(t))
|
||||
defer executor.Shutdown("SIGKILL", 0)
|
||||
|
||||
ps, err := executor.Launch(execCmd)
|
||||
require.NoError(err)
|
||||
require.NotZero(ps.Pid)
|
||||
|
||||
state, err := executor.Wait(context.Background())
|
||||
require.NoError(err)
|
||||
require.Zero(state.ExitCode)
|
||||
|
||||
var cgroupsPaths string
|
||||
tu.WaitForResult(func() (bool, error) {
|
||||
output := strings.TrimSpace(testExecCmd.stdout.String())
|
||||
// sanity check that we got some cgroups
|
||||
if !strings.Contains(output, ":devices:") {
|
||||
return false, fmt.Errorf("was expected cgroup files but found:\n%v", output)
|
||||
}
|
||||
lines := strings.Split(output, "\n")
|
||||
for _, line := range lines {
|
||||
// Every cgroup entry should be /nomad/$ALLOC_ID
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker
|
||||
// don't isolate it by default.
|
||||
if strings.Contains(line, ":rdma:") {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.Contains(line, ":/nomad/") {
|
||||
return false, fmt.Errorf("Not a member of the alloc's cgroup: expected=...:/nomad/... -- found=%q", line)
|
||||
}
|
||||
}
|
||||
|
||||
cgroupsPaths = output
|
||||
return true, nil
|
||||
}, func(err error) { t.Error(err) })
|
||||
|
||||
// shutdown executor and test that cgroups are destroyed
|
||||
executor.Shutdown("SIGKILL", 0)
|
||||
|
||||
// test that the cgroup paths are not visible
|
||||
tmpFile, err := ioutil.TempFile("", "")
|
||||
require.NoError(err)
|
||||
defer os.Remove(tmpFile.Name())
|
||||
|
||||
_, err = tmpFile.WriteString(cgroupsPaths)
|
||||
require.NoError(err)
|
||||
tmpFile.Close()
|
||||
|
||||
subsystems, err := cgroups.ParseCgroupFile(tmpFile.Name())
|
||||
require.NoError(err)
|
||||
|
||||
for subsystem, cgroup := range subsystems {
|
||||
if !strings.Contains(cgroup, "nomad/") {
|
||||
// this should only be rdma at this point
|
||||
continue
|
||||
}
|
||||
|
||||
p, err := getCgroupPathHelper(subsystem, cgroup)
|
||||
require.NoError(err)
|
||||
require.Falsef(cgroups.PathExists(p), "cgroup for %s %s still exists", subsystem, cgroup)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUniversalExecutor_LookupTaskBin(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
Reference in New Issue
Block a user