volumes: Add support for mount propagation

This commit introduces support for configuring mount propagation when
mounting volumes with the `volume_mount` stanza on Linux targets.

Similar to Kubernetes, we expose 3 options for configuring mount
propagation:

- private, which is equivalent to `rprivate` on Linux, which does not allow the
           container to see any new nested mounts after the chroot was created.

- host-to-task, which is equivalent to `rslave` on Linux, which allows new mounts
                that have been created _outside of the container_ to be visible
                inside the container after the chroot is created.

- bidirectional, which is equivalent to `rshared` on Linux, which allows both
                 the container to see new mounts created on the host, but
                 importantly _allows the container to create mounts that are
                 visible in other containers an don the host_

private and host-to-task are safe, but bidirectional mounts can be
dangerous, as if the code inside a container creates a mount, and does
not clean it up before tearing down the container, it can cause bad
things to happen inside the kernel.

To add a layer of safety here, we require that the user has ReadWrite
permissions on the volume before allowing bidirectional mounts, as a
defense in depth / validation case, although creating mounts should also require
a priviliged execution environment inside the container.
This commit is contained in:
Danielle Lancashire
2019-09-13 23:13:20 +02:00
parent 7b08e19f72
commit afb59bedf5
13 changed files with 145 additions and 35 deletions

View File

@@ -370,12 +370,28 @@ type VolumeRequest struct {
ReadOnly bool `mapstructure:"read_only"`
}
const (
VolumeMountPropagationPrivate = "private"
VolumeMountPropagationHostToTask = "host-to-task"
VolumeMountPropagationBidirectional = "bidirectional"
)
// VolumeMount represents the relationship between a destination path in a task
// and the task group volume that should be mounted there.
type VolumeMount struct {
Volume string
Destination string
ReadOnly bool `mapstructure:"read_only"`
Volume *string
Destination *string
ReadOnly *bool `mapstructure:"read_only"`
PropagationMode *string `mapstructure:"propagation_mode"`
}
func (vm *VolumeMount) Canonicalize() {
if vm.PropagationMode == nil {
vm.PropagationMode = stringToPtr(VolumeMountPropagationPrivate)
}
if vm.ReadOnly == nil {
vm.ReadOnly = boolToPtr(false)
}
}
// TaskGroup is the unit of scheduling.
@@ -632,6 +648,9 @@ func (t *Task) Canonicalize(tg *TaskGroup, job *Job) {
for _, a := range t.Affinities {
a.Canonicalize()
}
for _, vm := range t.VolumeMounts {
vm.Canonicalize()
}
}
// TaskArtifact is used to download artifacts before running a task.

View File

@@ -368,6 +368,14 @@ func TestTask_Artifact(t *testing.T) {
}
}
func TestTask_VolumeMount(t *testing.T) {
t.Parallel()
vm := &VolumeMount{}
vm.Canonicalize()
require.NotNil(t, vm.PropagationMode)
require.Equal(t, *vm.PropagationMode, "private")
}
// Ensures no regression on https://github.com/hashicorp/nomad/issues/3132
func TestTaskGroup_Canonicalize_Update(t *testing.T) {
// Job with an Empty() Update

View File

@@ -812,9 +812,10 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
structsTask.VolumeMounts = make([]*structs.VolumeMount, l)
for i, mount := range apiTask.VolumeMounts {
structsTask.VolumeMounts[i] = &structs.VolumeMount{
Volume: mount.Volume,
Destination: mount.Destination,
ReadOnly: mount.ReadOnly,
Volume: *mount.Volume,
Destination: *mount.Destination,
ReadOnly: *mount.ReadOnly,
PropagationMode: *mount.PropagationMode,
}
}
}

View File

@@ -642,6 +642,15 @@ func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConf
return binds, nil
}
var userMountToUnixMount = map[string]string{
// Empty string maps to `rprivate` for backwards compatibility in restored
// older tasks, where mount propagation will not be present.
"": "rprivate",
nstructs.VolumeMountPropagationPrivate: "rprivate",
nstructs.VolumeMountPropagationHostToTask: "rslave",
nstructs.VolumeMountPropagationBidirectional: "rshared",
}
func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
imageID string) (docker.CreateContainerOptions, error) {
@@ -833,13 +842,24 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
hostConfig.Mounts = append(hostConfig.Mounts, hm)
}
for _, m := range task.Mounts {
hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{
hm := docker.HostMount{
Type: "bind",
Target: m.TaskPath,
Source: m.HostPath,
ReadOnly: m.Readonly,
})
}
// MountPropagation is only supported by Docker on Linux:
// https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation
if runtime.GOOS == "linux" {
hm.BindOptions = &docker.BindOptions{
Propagation: userMountToUnixMount[m.PropagationMode],
}
}
hostConfig.Mounts = append(hostConfig.Mounts, hm)
}
// set DNS search domains and extra hosts

View File

@@ -7,6 +7,7 @@ import (
"io"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
@@ -598,22 +599,32 @@ func TestDockerDriver_CreateContainerConfig_MountsCombined(t *testing.T) {
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)
expectedMounts := []docker.HostMount{
{
Type: "bind",
Source: "/tmp/cfg-mount",
Target: "/container/tmp/cfg-mount",
ReadOnly: false,
BindOptions: &docker.BindOptions{},
Type: "bind",
Source: "/tmp/cfg-mount",
Target: "/container/tmp/cfg-mount",
ReadOnly: false,
BindOptions: &docker.BindOptions{
Propagation: "",
},
},
{
Type: "bind",
Source: "/tmp/task-mount",
Target: "/container/tmp/task-mount",
ReadOnly: true,
BindOptions: &docker.BindOptions{
Propagation: "rprivate",
},
},
}
if runtime.GOOS != "linux" {
expectedMounts[0].BindOptions = &docker.BindOptions{}
expectedMounts[1].BindOptions = &docker.BindOptions{}
}
foundMounts := c.HostConfig.Mounts
sort.Slice(foundMounts, func(i, j int) bool {
return foundMounts[i].Target < foundMounts[j].Target

View File

@@ -813,6 +813,15 @@ func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) {
return r, nil
}
var userMountToUnixMount = map[string]int{
// Empty string maps to `rprivate` for backwards compatibility in restored
// older tasks, where mount propagation will not be present.
"": unix.MS_PRIVATE | unix.MS_REC, // rprivate
structs.VolumeMountPropagationPrivate: unix.MS_PRIVATE | unix.MS_REC, // rprivate
structs.VolumeMountPropagationHostToTask: unix.MS_SLAVE | unix.MS_REC, // rslave
structs.VolumeMountPropagationBidirectional: unix.MS_SHARED | unix.MS_REC, // rshared
}
// cmdMounts converts a list of driver.MountConfigs into excutor.Mounts.
func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
if len(mounts) == 0 {
@@ -826,11 +835,13 @@ func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
if m.Readonly {
flags |= unix.MS_RDONLY
}
r[i] = &lconfigs.Mount{
Source: m.HostPath,
Destination: m.TaskPath,
Device: "bind",
Flags: flags,
Source: m.HostPath,
Destination: m.TaskPath,
Device: "bind",
Flags: flags,
PropagationFlags: []int{userMountToUnixMount[m.PropagationMode]},
}
}

View File

@@ -467,16 +467,18 @@ func TestExecutor_cmdMounts(t *testing.T) {
expected := []*lconfigs.Mount{
{
Source: "/host/path-ro",
Destination: "/task/path-ro",
Flags: unix.MS_BIND | unix.MS_RDONLY,
Device: "bind",
Source: "/host/path-ro",
Destination: "/task/path-ro",
Flags: unix.MS_BIND | unix.MS_RDONLY,
Device: "bind",
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
},
{
Source: "/host/path-rw",
Destination: "/task/path-rw",
Flags: unix.MS_BIND,
Device: "bind",
Source: "/host/path-rw",
Destination: "/task/path-rw",
Flags: unix.MS_BIND,
Device: "bind",
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
},
}

View File

@@ -522,6 +522,7 @@ func parseVolumeMounts(out *[]*api.VolumeMount, list *ast.ObjectList) error {
"volume",
"read_only",
"destination",
"propagation_mode",
}
if err := helper.CheckHCLKeys(item.Val, valid); err != nil {
return err

View File

@@ -197,8 +197,8 @@ func TestParse(t *testing.T) {
},
VolumeMounts: []*api.VolumeMount{
{
Volume: "foo",
Destination: "/mnt/foo",
Volume: helper.StringToPtr("foo"),
Destination: helper.StringToPtr("/mnt/foo"),
},
},
Affinities: []*api.Affinity{

View File

@@ -110,6 +110,7 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis
if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
return structs.ErrPermissionDenied
}
// Validate Volume Permsissions
for _, tg := range args.Job.TaskGroups {
for _, vol := range tg.Volumes {
@@ -131,6 +132,16 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis
}
}
}
for _, t := range tg.Tasks {
for _, vm := range t.VolumeMounts {
vol := tg.Volumes[vm.Volume]
if vm.PropagationMode == structs.VolumeMountPropagationBidirectional &&
!aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) {
return structs.ErrPermissionDenied
}
}
}
}
// Check if override is set and we do not have permissions

View File

@@ -5525,6 +5525,14 @@ func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices
mErr.Errors = append(mErr.Errors, serviceErr)
}
}
// Validation for volumes
for idx, vm := range t.VolumeMounts {
if !MountPropagationModeIsValid(vm.PropagationMode) {
mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode))
}
}
return mErr.ErrorOrNil()
}

View File

@@ -4,6 +4,21 @@ const (
VolumeTypeHost = "host"
)
const (
VolumeMountPropagationPrivate = "private"
VolumeMountPropagationHostToTask = "host-to-task"
VolumeMountPropagationBidirectional = "bidirectional"
)
func MountPropagationModeIsValid(propagationMode string) bool {
switch propagationMode {
case "", VolumeMountPropagationPrivate, VolumeMountPropagationHostToTask, VolumeMountPropagationBidirectional:
return true
default:
return false
}
}
// ClientHostVolumeConfig is used to configure access to host paths on a Nomad Client
type ClientHostVolumeConfig struct {
Name string `hcl:",key"`
@@ -103,9 +118,10 @@ func CopyMapVolumeRequest(s map[string]*VolumeRequest) map[string]*VolumeRequest
// VolumeMount represents the relationship between a destination path in a task
// and the task group volume that should be mounted there.
type VolumeMount struct {
Volume string
Destination string
ReadOnly bool
Volume string
Destination string
ReadOnly bool
PropagationMode string
}
func (v *VolumeMount) Copy() *VolumeMount {

View File

@@ -357,15 +357,17 @@ func (d *DeviceConfig) Copy() *DeviceConfig {
}
type MountConfig struct {
TaskPath string
HostPath string
Readonly bool
TaskPath string
HostPath string
Readonly bool
PropagationMode string
}
func (m *MountConfig) IsEqual(o *MountConfig) bool {
return m.TaskPath == o.TaskPath &&
m.HostPath == o.HostPath &&
m.Readonly == o.Readonly
m.Readonly == o.Readonly &&
m.PropagationMode == o.PropagationMode
}
func (m *MountConfig) Copy() *MountConfig {