mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
volumes: Add support for mount propagation
This commit introduces support for configuring mount propagation when
mounting volumes with the `volume_mount` stanza on Linux targets.
Similar to Kubernetes, we expose 3 options for configuring mount
propagation:
- private, which is equivalent to `rprivate` on Linux, which does not allow the
container to see any new nested mounts after the chroot was created.
- host-to-task, which is equivalent to `rslave` on Linux, which allows new mounts
that have been created _outside of the container_ to be visible
inside the container after the chroot is created.
- bidirectional, which is equivalent to `rshared` on Linux, which allows both
the container to see new mounts created on the host, but
importantly _allows the container to create mounts that are
visible in other containers an don the host_
private and host-to-task are safe, but bidirectional mounts can be
dangerous, as if the code inside a container creates a mount, and does
not clean it up before tearing down the container, it can cause bad
things to happen inside the kernel.
To add a layer of safety here, we require that the user has ReadWrite
permissions on the volume before allowing bidirectional mounts, as a
defense in depth / validation case, although creating mounts should also require
a priviliged execution environment inside the container.
This commit is contained in:
25
api/tasks.go
25
api/tasks.go
@@ -370,12 +370,28 @@ type VolumeRequest struct {
|
||||
ReadOnly bool `mapstructure:"read_only"`
|
||||
}
|
||||
|
||||
const (
|
||||
VolumeMountPropagationPrivate = "private"
|
||||
VolumeMountPropagationHostToTask = "host-to-task"
|
||||
VolumeMountPropagationBidirectional = "bidirectional"
|
||||
)
|
||||
|
||||
// VolumeMount represents the relationship between a destination path in a task
|
||||
// and the task group volume that should be mounted there.
|
||||
type VolumeMount struct {
|
||||
Volume string
|
||||
Destination string
|
||||
ReadOnly bool `mapstructure:"read_only"`
|
||||
Volume *string
|
||||
Destination *string
|
||||
ReadOnly *bool `mapstructure:"read_only"`
|
||||
PropagationMode *string `mapstructure:"propagation_mode"`
|
||||
}
|
||||
|
||||
func (vm *VolumeMount) Canonicalize() {
|
||||
if vm.PropagationMode == nil {
|
||||
vm.PropagationMode = stringToPtr(VolumeMountPropagationPrivate)
|
||||
}
|
||||
if vm.ReadOnly == nil {
|
||||
vm.ReadOnly = boolToPtr(false)
|
||||
}
|
||||
}
|
||||
|
||||
// TaskGroup is the unit of scheduling.
|
||||
@@ -632,6 +648,9 @@ func (t *Task) Canonicalize(tg *TaskGroup, job *Job) {
|
||||
for _, a := range t.Affinities {
|
||||
a.Canonicalize()
|
||||
}
|
||||
for _, vm := range t.VolumeMounts {
|
||||
vm.Canonicalize()
|
||||
}
|
||||
}
|
||||
|
||||
// TaskArtifact is used to download artifacts before running a task.
|
||||
|
||||
@@ -368,6 +368,14 @@ func TestTask_Artifact(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTask_VolumeMount(t *testing.T) {
|
||||
t.Parallel()
|
||||
vm := &VolumeMount{}
|
||||
vm.Canonicalize()
|
||||
require.NotNil(t, vm.PropagationMode)
|
||||
require.Equal(t, *vm.PropagationMode, "private")
|
||||
}
|
||||
|
||||
// Ensures no regression on https://github.com/hashicorp/nomad/issues/3132
|
||||
func TestTaskGroup_Canonicalize_Update(t *testing.T) {
|
||||
// Job with an Empty() Update
|
||||
|
||||
@@ -812,9 +812,10 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
|
||||
structsTask.VolumeMounts = make([]*structs.VolumeMount, l)
|
||||
for i, mount := range apiTask.VolumeMounts {
|
||||
structsTask.VolumeMounts[i] = &structs.VolumeMount{
|
||||
Volume: mount.Volume,
|
||||
Destination: mount.Destination,
|
||||
ReadOnly: mount.ReadOnly,
|
||||
Volume: *mount.Volume,
|
||||
Destination: *mount.Destination,
|
||||
ReadOnly: *mount.ReadOnly,
|
||||
PropagationMode: *mount.PropagationMode,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -642,6 +642,15 @@ func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConf
|
||||
return binds, nil
|
||||
}
|
||||
|
||||
var userMountToUnixMount = map[string]string{
|
||||
// Empty string maps to `rprivate` for backwards compatibility in restored
|
||||
// older tasks, where mount propagation will not be present.
|
||||
"": "rprivate",
|
||||
nstructs.VolumeMountPropagationPrivate: "rprivate",
|
||||
nstructs.VolumeMountPropagationHostToTask: "rslave",
|
||||
nstructs.VolumeMountPropagationBidirectional: "rshared",
|
||||
}
|
||||
|
||||
func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
|
||||
imageID string) (docker.CreateContainerOptions, error) {
|
||||
|
||||
@@ -833,13 +842,24 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
|
||||
|
||||
hostConfig.Mounts = append(hostConfig.Mounts, hm)
|
||||
}
|
||||
|
||||
for _, m := range task.Mounts {
|
||||
hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{
|
||||
hm := docker.HostMount{
|
||||
Type: "bind",
|
||||
Target: m.TaskPath,
|
||||
Source: m.HostPath,
|
||||
ReadOnly: m.Readonly,
|
||||
})
|
||||
}
|
||||
|
||||
// MountPropagation is only supported by Docker on Linux:
|
||||
// https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation
|
||||
if runtime.GOOS == "linux" {
|
||||
hm.BindOptions = &docker.BindOptions{
|
||||
Propagation: userMountToUnixMount[m.PropagationMode],
|
||||
}
|
||||
}
|
||||
|
||||
hostConfig.Mounts = append(hostConfig.Mounts, hm)
|
||||
}
|
||||
|
||||
// set DNS search domains and extra hosts
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -598,22 +599,32 @@ func TestDockerDriver_CreateContainerConfig_MountsCombined(t *testing.T) {
|
||||
|
||||
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedMounts := []docker.HostMount{
|
||||
{
|
||||
Type: "bind",
|
||||
Source: "/tmp/cfg-mount",
|
||||
Target: "/container/tmp/cfg-mount",
|
||||
ReadOnly: false,
|
||||
BindOptions: &docker.BindOptions{},
|
||||
Type: "bind",
|
||||
Source: "/tmp/cfg-mount",
|
||||
Target: "/container/tmp/cfg-mount",
|
||||
ReadOnly: false,
|
||||
BindOptions: &docker.BindOptions{
|
||||
Propagation: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "bind",
|
||||
Source: "/tmp/task-mount",
|
||||
Target: "/container/tmp/task-mount",
|
||||
ReadOnly: true,
|
||||
BindOptions: &docker.BindOptions{
|
||||
Propagation: "rprivate",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if runtime.GOOS != "linux" {
|
||||
expectedMounts[0].BindOptions = &docker.BindOptions{}
|
||||
expectedMounts[1].BindOptions = &docker.BindOptions{}
|
||||
}
|
||||
|
||||
foundMounts := c.HostConfig.Mounts
|
||||
sort.Slice(foundMounts, func(i, j int) bool {
|
||||
return foundMounts[i].Target < foundMounts[j].Target
|
||||
|
||||
@@ -813,6 +813,15 @@ func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
var userMountToUnixMount = map[string]int{
|
||||
// Empty string maps to `rprivate` for backwards compatibility in restored
|
||||
// older tasks, where mount propagation will not be present.
|
||||
"": unix.MS_PRIVATE | unix.MS_REC, // rprivate
|
||||
structs.VolumeMountPropagationPrivate: unix.MS_PRIVATE | unix.MS_REC, // rprivate
|
||||
structs.VolumeMountPropagationHostToTask: unix.MS_SLAVE | unix.MS_REC, // rslave
|
||||
structs.VolumeMountPropagationBidirectional: unix.MS_SHARED | unix.MS_REC, // rshared
|
||||
}
|
||||
|
||||
// cmdMounts converts a list of driver.MountConfigs into excutor.Mounts.
|
||||
func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
|
||||
if len(mounts) == 0 {
|
||||
@@ -826,11 +835,13 @@ func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
|
||||
if m.Readonly {
|
||||
flags |= unix.MS_RDONLY
|
||||
}
|
||||
|
||||
r[i] = &lconfigs.Mount{
|
||||
Source: m.HostPath,
|
||||
Destination: m.TaskPath,
|
||||
Device: "bind",
|
||||
Flags: flags,
|
||||
Source: m.HostPath,
|
||||
Destination: m.TaskPath,
|
||||
Device: "bind",
|
||||
Flags: flags,
|
||||
PropagationFlags: []int{userMountToUnixMount[m.PropagationMode]},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -467,16 +467,18 @@ func TestExecutor_cmdMounts(t *testing.T) {
|
||||
|
||||
expected := []*lconfigs.Mount{
|
||||
{
|
||||
Source: "/host/path-ro",
|
||||
Destination: "/task/path-ro",
|
||||
Flags: unix.MS_BIND | unix.MS_RDONLY,
|
||||
Device: "bind",
|
||||
Source: "/host/path-ro",
|
||||
Destination: "/task/path-ro",
|
||||
Flags: unix.MS_BIND | unix.MS_RDONLY,
|
||||
Device: "bind",
|
||||
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
|
||||
},
|
||||
{
|
||||
Source: "/host/path-rw",
|
||||
Destination: "/task/path-rw",
|
||||
Flags: unix.MS_BIND,
|
||||
Device: "bind",
|
||||
Source: "/host/path-rw",
|
||||
Destination: "/task/path-rw",
|
||||
Flags: unix.MS_BIND,
|
||||
Device: "bind",
|
||||
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -522,6 +522,7 @@ func parseVolumeMounts(out *[]*api.VolumeMount, list *ast.ObjectList) error {
|
||||
"volume",
|
||||
"read_only",
|
||||
"destination",
|
||||
"propagation_mode",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(item.Val, valid); err != nil {
|
||||
return err
|
||||
|
||||
@@ -197,8 +197,8 @@ func TestParse(t *testing.T) {
|
||||
},
|
||||
VolumeMounts: []*api.VolumeMount{
|
||||
{
|
||||
Volume: "foo",
|
||||
Destination: "/mnt/foo",
|
||||
Volume: helper.StringToPtr("foo"),
|
||||
Destination: helper.StringToPtr("/mnt/foo"),
|
||||
},
|
||||
},
|
||||
Affinities: []*api.Affinity{
|
||||
|
||||
@@ -110,6 +110,7 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis
|
||||
if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// Validate Volume Permsissions
|
||||
for _, tg := range args.Job.TaskGroups {
|
||||
for _, vol := range tg.Volumes {
|
||||
@@ -131,6 +132,16 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range tg.Tasks {
|
||||
for _, vm := range t.VolumeMounts {
|
||||
vol := tg.Volumes[vm.Volume]
|
||||
if vm.PropagationMode == structs.VolumeMountPropagationBidirectional &&
|
||||
!aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if override is set and we do not have permissions
|
||||
|
||||
@@ -5525,6 +5525,14 @@ func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices
|
||||
mErr.Errors = append(mErr.Errors, serviceErr)
|
||||
}
|
||||
}
|
||||
|
||||
// Validation for volumes
|
||||
for idx, vm := range t.VolumeMounts {
|
||||
if !MountPropagationModeIsValid(vm.PropagationMode) {
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode))
|
||||
}
|
||||
}
|
||||
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,21 @@ const (
|
||||
VolumeTypeHost = "host"
|
||||
)
|
||||
|
||||
const (
|
||||
VolumeMountPropagationPrivate = "private"
|
||||
VolumeMountPropagationHostToTask = "host-to-task"
|
||||
VolumeMountPropagationBidirectional = "bidirectional"
|
||||
)
|
||||
|
||||
func MountPropagationModeIsValid(propagationMode string) bool {
|
||||
switch propagationMode {
|
||||
case "", VolumeMountPropagationPrivate, VolumeMountPropagationHostToTask, VolumeMountPropagationBidirectional:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// ClientHostVolumeConfig is used to configure access to host paths on a Nomad Client
|
||||
type ClientHostVolumeConfig struct {
|
||||
Name string `hcl:",key"`
|
||||
@@ -103,9 +118,10 @@ func CopyMapVolumeRequest(s map[string]*VolumeRequest) map[string]*VolumeRequest
|
||||
// VolumeMount represents the relationship between a destination path in a task
|
||||
// and the task group volume that should be mounted there.
|
||||
type VolumeMount struct {
|
||||
Volume string
|
||||
Destination string
|
||||
ReadOnly bool
|
||||
Volume string
|
||||
Destination string
|
||||
ReadOnly bool
|
||||
PropagationMode string
|
||||
}
|
||||
|
||||
func (v *VolumeMount) Copy() *VolumeMount {
|
||||
|
||||
@@ -357,15 +357,17 @@ func (d *DeviceConfig) Copy() *DeviceConfig {
|
||||
}
|
||||
|
||||
type MountConfig struct {
|
||||
TaskPath string
|
||||
HostPath string
|
||||
Readonly bool
|
||||
TaskPath string
|
||||
HostPath string
|
||||
Readonly bool
|
||||
PropagationMode string
|
||||
}
|
||||
|
||||
func (m *MountConfig) IsEqual(o *MountConfig) bool {
|
||||
return m.TaskPath == o.TaskPath &&
|
||||
m.HostPath == o.HostPath &&
|
||||
m.Readonly == o.Readonly
|
||||
m.Readonly == o.Readonly &&
|
||||
m.PropagationMode == o.PropagationMode
|
||||
}
|
||||
|
||||
func (m *MountConfig) Copy() *MountConfig {
|
||||
|
||||
Reference in New Issue
Block a user