diff --git a/e2e/.gitignore b/e2e/.gitignore index cfc151d21..adad33a7b 100644 --- a/e2e/.gitignore +++ b/e2e/.gitignore @@ -1 +1,2 @@ provisioning.json +csi/input/volumes.json diff --git a/e2e/csi/csi.go b/e2e/csi/csi.go new file mode 100644 index 000000000..4029e103d --- /dev/null +++ b/e2e/csi/csi.go @@ -0,0 +1,251 @@ +package csi + +import ( + "bytes" + "context" + "encoding/json" + "io/ioutil" + "os" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/e2e/e2eutil" + "github.com/hashicorp/nomad/e2e/framework" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/stretchr/testify/require" +) + +type CSIVolumesTest struct { + framework.TC + jobIds []string + volumeIDs *volumeConfig +} + +func init() { + framework.AddSuites(&framework.TestSuite{ + Component: "CSI", + CanRunLocal: true, + Consul: false, + Cases: []framework.TestCase{ + new(CSIVolumesTest), + }, + }) +} + +type volumeConfig struct { + EBSVolumeID string `json:"ebs_volume"` + EFSVolumeID string `json:"efs_volume"` +} + +func (tc *CSIVolumesTest) BeforeAll(f *framework.F) { + t := f.T() + // The volume IDs come from the external provider, so we need + // to read the configuration out of our Terraform output. + rawjson, err := ioutil.ReadFile("csi/input/volumes.json") + if err != nil { + t.Skip("volume ID configuration not found, try running 'terraform output volumes > ../csi/input/volumes.json'") + } + volumeIDs := &volumeConfig{} + err = json.Unmarshal(rawjson, volumeIDs) + if err != nil { + t.Fatal("volume ID configuration could not be read") + } + + tc.volumeIDs = volumeIDs + + // Ensure cluster has leader and at least two client + // nodes in a ready state before running tests + e2eutil.WaitForLeader(t, tc.Nomad()) + e2eutil.WaitForNodesReady(t, tc.Nomad(), 2) +} + +// TestEBSVolumeClaim launches AWS EBS plugins and registers an EBS volume +// as a Nomad CSI volume. We then deploy a job that writes to the volume, +// stop that job, and reuse the volume for another job which should be able +// to read the data written by the first job. +func (tc *CSIVolumesTest) TestEBSVolumeClaim(f *framework.F) { + t := f.T() + require := require.New(t) + nomadClient := tc.Nomad() + uuid := uuid.Generate() + + // deploy the controller plugin job + controllerJobID := "aws-ebs-plugin-controller-" + uuid[0:8] + tc.jobIds = append(tc.jobIds, controllerJobID) + e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/plugin-aws-ebs-controller.nomad", controllerJobID, "") + + // deploy the node plugins job + nodesJobID := "aws-ebs-plugin-nodes-" + uuid[0:8] + tc.jobIds = append(tc.jobIds, nodesJobID) + e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/plugin-aws-ebs-nodes.nomad", nodesJobID, "") + + // wait for plugin to become healthy + require.Eventually(func() bool { + plugin, _, err := nomadClient.CSIPlugins().Info("aws-ebs0", nil) + if err != nil { + return false + } + if plugin.ControllersHealthy != 1 || plugin.NodesHealthy < 2 { + return false + } + return true + // TODO(tgross): cut down this time after fixing + // https://github.com/hashicorp/nomad/issues/7296 + }, 90*time.Second, 5*time.Second) + + // register a volume + volID := "ebs-vol0" + vol := &api.CSIVolume{ + ID: volID, + Name: volID, + ExternalID: tc.volumeIDs.EBSVolumeID, + AccessMode: "single-node-writer", + AttachmentMode: "file-system", + PluginID: "aws-ebs0", + } + _, err := nomadClient.CSIVolumes().Register(vol, nil) + require.NoError(err) + defer nomadClient.CSIVolumes().Deregister(volID, nil) + + // deploy a job that writes to the volume + writeJobID := "write-ebs-" + uuid[0:8] + tc.jobIds = append(tc.jobIds, writeJobID) + writeAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/use-ebs-volume.nomad", writeJobID, "") + writeAllocID := writeAllocs[0].ID + e2eutil.WaitForAllocRunning(t, nomadClient, writeAllocID) + + // read data from volume and assert the writer wrote a file to it + writeAlloc, _, err := nomadClient.Allocations().Info(writeAllocID, nil) + require.NoError(err) + expectedPath := "/local/test/" + writeAllocID + _, err = readFile(nomadClient, writeAlloc, expectedPath) + require.NoError(err) + + // Shutdown the writer so we can run a reader. + // we could mount the EBS volume with multi-attach, but we + // want this test to exercise the unpublish workflow. + nomadClient.Jobs().Deregister(writeJobID, true, nil) + + // deploy a job so we can read from the volume + readJobID := "read-ebs-" + uuid[0:8] + tc.jobIds = append(tc.jobIds, readJobID) + readAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/use-ebs-volume.nomad", readJobID, "") + readAllocID := readAllocs[0].ID + e2eutil.WaitForAllocRunning(t, nomadClient, readAllocID) + + // ensure we clean up claim before we deregister volumes + defer nomadClient.Jobs().Deregister(readJobID, true, nil) + + // read data from volume and assert the writer wrote a file to it + readAlloc, _, err := nomadClient.Allocations().Info(readAllocID, nil) + require.NoError(err) + _, err = readFile(nomadClient, readAlloc, expectedPath) + require.NoError(err) +} + +// TestEFSVolumeClaim launches AWS EFS plugins and registers an EFS volume +// as a Nomad CSI volume. We then deploy a job that writes to the volume, +// and share the volume with another job which should be able to read the +// data written by the first job. +func (tc *CSIVolumesTest) TestEFSVolumeClaim(f *framework.F) { + t := f.T() + require := require.New(t) + nomadClient := tc.Nomad() + uuid := uuid.Generate() + + // deploy the node plugins job (no need for a controller for EFS) + nodesJobID := "aws-efs-plugin-nodes-" + uuid[0:8] + tc.jobIds = append(tc.jobIds, nodesJobID) + e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/plugin-aws-efs-nodes.nomad", nodesJobID, "") + + // wait for plugin to become healthy + require.Eventually(func() bool { + plugin, _, err := nomadClient.CSIPlugins().Info("aws-efs0", nil) + if err != nil { + return false + } + if plugin.NodesHealthy < 2 { + return false + } + return true + // TODO(tgross): cut down this time after fixing + // https://github.com/hashicorp/nomad/issues/7296 + }, 90*time.Second, 5*time.Second) + + // register a volume + volID := "efs-vol0" + vol := &api.CSIVolume{ + ID: volID, + Name: volID, + ExternalID: tc.volumeIDs.EFSVolumeID, + AccessMode: "single-node-writer", + AttachmentMode: "file-system", + PluginID: "aws-efs0", + } + _, err := nomadClient.CSIVolumes().Register(vol, nil) + require.NoError(err) + defer nomadClient.CSIVolumes().Deregister(volID, nil) + + // deploy a job that writes to the volume + writeJobID := "write-efs-" + uuid[0:8] + writeAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/use-efs-volume-write.nomad", writeJobID, "") + writeAllocID := writeAllocs[0].ID + e2eutil.WaitForAllocRunning(t, nomadClient, writeAllocID) + + // read data from volume and assert the writer wrote a file to it + writeAlloc, _, err := nomadClient.Allocations().Info(writeAllocID, nil) + require.NoError(err) + expectedPath := "/local/test/" + writeAllocID + _, err = readFile(nomadClient, writeAlloc, expectedPath) + require.NoError(err) + + // Shutdown the writer so we can run a reader. + // although EFS should support multiple readers, the plugin + // does not. + nomadClient.Jobs().Deregister(writeJobID, true, nil) + + // deploy a job that reads from the volume. + readJobID := "read-efs-" + uuid[0:8] + readAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, + "csi/input/use-efs-volume-read.nomad", readJobID, "") + defer nomadClient.Jobs().Deregister(readJobID, true, nil) + e2eutil.WaitForAllocRunning(t, nomadClient, readAllocs[0].ID) + + // read data from volume and assert the writer wrote a file to it + readAlloc, _, err := nomadClient.Allocations().Info(readAllocs[0].ID, nil) + require.NoError(err) + _, err = readFile(nomadClient, readAlloc, expectedPath) + require.NoError(err) +} + +func (tc *CSIVolumesTest) AfterEach(f *framework.F) { + nomadClient := tc.Nomad() + jobs := nomadClient.Jobs() + // Stop all jobs in test + for _, id := range tc.jobIds { + jobs.Deregister(id, true, nil) + } + // Garbage collect + nomadClient.System().GarbageCollect() +} + +// TODO(tgross): replace this w/ AllocFS().Stat() after +// https://github.com/hashicorp/nomad/issues/7365 is fixed +func readFile(client *api.Client, alloc *api.Allocation, path string) (bytes.Buffer, error) { + ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelFn() + + var stdout, stderr bytes.Buffer + _, err := client.Allocations().Exec(ctx, + alloc, "task", false, + []string{"cat", path}, + os.Stdin, &stdout, &stderr, + make(chan api.TerminalSize), nil) + return stdout, err +} diff --git a/e2e/csi/input/plugin-aws-ebs-controller.nomad b/e2e/csi/input/plugin-aws-ebs-controller.nomad new file mode 100644 index 000000000..e5caa730c --- /dev/null +++ b/e2e/csi/input/plugin-aws-ebs-controller.nomad @@ -0,0 +1,40 @@ +# jobspec for running CSI plugin for AWS EBS, derived from +# the kubernetes manifests found at +# https://github.com/kubernetes-sigs/aws-ebs-csi-driver/tree/master/deploy/kubernetes + +job "plugin-aws-ebs-controller" { + datacenters = ["dc1"] + + group "controller" { + task "plugin" { + driver = "docker" + + config { + image = "amazon/aws-ebs-csi-driver:latest" + + args = [ + "controller", + "--endpoint=unix://csi/csi.sock", + "--logtostderr", + "--v=5", + ] + + # note: plugins running as controllers don't + # need to run as privileged tasks + } + + csi_plugin { + id = "aws-ebs0" + type = "controller" + mount_dir = "/csi" + } + + # note: there's no upstream guidance on resource usage so + # this is a best guess until we profile it in heavy use + resources { + cpu = 500 + memory = 256 + } + } + } +} diff --git a/e2e/csi/input/plugin-aws-ebs-nodes.nomad b/e2e/csi/input/plugin-aws-ebs-nodes.nomad new file mode 100644 index 000000000..303b2a8e8 --- /dev/null +++ b/e2e/csi/input/plugin-aws-ebs-nodes.nomad @@ -0,0 +1,43 @@ +# jobspec for running CSI plugin for AWS EBS, derived from +# the kubernetes manifests found at +# https://github.com/kubernetes-sigs/aws-ebs-csi-driver/tree/master/deploy/kubernetes + +job "plugin-aws-ebs-nodes" { + datacenters = ["dc1"] + + # you can run node plugins as service jobs as well, but this ensures + # that all nodes in the DC have a copy. + type = "system" + + group "nodes" { + task "plugin" { + driver = "docker" + + config { + image = "amazon/aws-ebs-csi-driver:latest" + + args = [ + "node", + "--endpoint=unix://csi/csi.sock", + "--logtostderr", + "--v=5", + ] + + privileged = true + } + + csi_plugin { + id = "aws-ebs0" + type = "node" + mount_dir = "/csi" + } + + # note: there's no upstream guidance on resource usage so + # this is a best guess until we profile it in heavy use + resources { + cpu = 500 + memory = 256 + } + } + } +} diff --git a/e2e/csi/input/plugin-aws-efs-nodes.nomad b/e2e/csi/input/plugin-aws-efs-nodes.nomad new file mode 100644 index 000000000..8e1429e7a --- /dev/null +++ b/e2e/csi/input/plugin-aws-efs-nodes.nomad @@ -0,0 +1,45 @@ +# jobspec for running CSI plugin for AWS EFS, derived from +# the kubernetes manifests found at +# https://github.com/kubernetes-sigs/aws-efs-csi-driver/tree/master/deploy/kubernetes + +job "plugin-aws-efs-nodes" { + datacenters = ["dc1"] + + # you can run node plugins as service jobs as well, but this ensures + # that all nodes in the DC have a copy. + type = "system" + + group "nodes" { + task "plugin" { + driver = "docker" + + config { + image = "amazon/aws-efs-csi-driver:latest" + + # note: the EFS driver doesn't seem to respect the --endpoint + # flag and always sets up the listener at '/tmp/csi.sock' + args = [ + "node", + "--endpoint=unix://tmp/csi.sock", + "--logtostderr", + "--v=5", + ] + + privileged = true + } + + csi_plugin { + id = "aws-efs0" + type = "node" + mount_dir = "/tmp" + } + + # note: there's no upstream guidance on resource usage so + # this is a best guess until we profile it in heavy use + resources { + cpu = 500 + memory = 256 + } + } + } +} diff --git a/e2e/csi/input/use-ebs-volume.nomad b/e2e/csi/input/use-ebs-volume.nomad new file mode 100644 index 000000000..866a6a4dc --- /dev/null +++ b/e2e/csi/input/use-ebs-volume.nomad @@ -0,0 +1,32 @@ +# a job that mounts an EBS volume and writes its job ID as a file +job "use-ebs-volume" { + datacenters = ["dc1"] + + group "group" { + volume "test" { + type = "csi" + source = "ebs-vol0" + } + + task "task" { + driver = "docker" + + config { + image = "busybox:1" + command = "/bin/sh" + args = ["-c", "touch /local/test/${NOMAD_ALLOC_ID}; sleep 3600"] + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = false + } + + resources { + cpu = 500 + memory = 128 + } + } + } +} diff --git a/e2e/csi/input/use-efs-volume-read.nomad b/e2e/csi/input/use-efs-volume-read.nomad new file mode 100644 index 000000000..12b5f56b2 --- /dev/null +++ b/e2e/csi/input/use-efs-volume-read.nomad @@ -0,0 +1,33 @@ +# a job that mounts the EFS volume and sleeps, so that we can +# read its mounted file system remotely +job "use-efs-volume" { + datacenters = ["dc1"] + + group "group" { + volume "test" { + type = "csi" + source = "efs-vol0" + } + + task "task" { + driver = "docker" + + config { + image = "busybox:1" + command = "/bin/sh" + args = ["-c", "sleep 3600"] + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = true + } + + resources { + cpu = 500 + memory = 128 + } + } + } +} diff --git a/e2e/csi/input/use-efs-volume-write.nomad b/e2e/csi/input/use-efs-volume-write.nomad new file mode 100644 index 000000000..912fa734f --- /dev/null +++ b/e2e/csi/input/use-efs-volume-write.nomad @@ -0,0 +1,32 @@ +# a job that mounts an EFS volume and writes its job ID as a file +job "use-efs-volume" { + datacenters = ["dc1"] + + group "group" { + volume "test" { + type = "csi" + source = "efs-vol0" + } + + task "task" { + driver = "docker" + + config { + image = "busybox:1" + command = "/bin/sh" + args = ["-c", "touch /local/test/${NOMAD_ALLOC_ID}; sleep 3600"] + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = false + } + + resources { + cpu = 500 + memory = 128 + } + } + } +} diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 8e3d0bf75..8b63e1b5b 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -13,6 +13,7 @@ import ( _ "github.com/hashicorp/nomad/e2e/connect" _ "github.com/hashicorp/nomad/e2e/consul" _ "github.com/hashicorp/nomad/e2e/consultemplate" + _ "github.com/hashicorp/nomad/e2e/csi" _ "github.com/hashicorp/nomad/e2e/deployment" _ "github.com/hashicorp/nomad/e2e/example" _ "github.com/hashicorp/nomad/e2e/hostvolumes" diff --git a/e2e/terraform/iam.tf b/e2e/terraform/iam.tf index 484d0c1ce..8cf30ed3c 100644 --- a/e2e/terraform/iam.tf +++ b/e2e/terraform/iam.tf @@ -48,6 +48,7 @@ data "aws_iam_policy_document" "auto_discover_cluster" { "ec2:DescribeTags", "ec2:DescribeVolume*", "ec2:AttachVolume", + "ec2:DetachVolume", "autoscaling:DescribeAutoScalingGroups", ] resources = ["*"] diff --git a/e2e/terraform/provisioning.tf b/e2e/terraform/provisioning.tf index 5e68d22ca..5d69b13b4 100644 --- a/e2e/terraform/provisioning.tf +++ b/e2e/terraform/provisioning.tf @@ -9,6 +9,15 @@ export NOMAD_E2E=1 EOM } +output "volumes" { + description = "get volume IDs needed to register volumes for CSI testing." + value = jsonencode( + { + "ebs_volume" : aws_ebs_volume.csi.id, + "efs_volume" : aws_efs_file_system.csi.id, + }) +} + output "provisioning" { description = "output to a file to be use w/ E2E framework -provision.terraform" value = jsonencode(