mirror of
https://github.com/kemko/nomad.git
synced 2026-01-07 19:05:42 +03:00
E2E: test exercising node drain behavior for CSI volumes (#12384)
This commit is contained in:
124
e2e/csi/ebs.go
124
e2e/csi/ebs.go
@@ -2,10 +2,12 @@ package csi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/framework"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
)
|
||||
|
||||
// CSIControllerPluginEBSTest exercises the AWS EBS plugin, which is an
|
||||
@@ -16,6 +18,7 @@ type CSIControllerPluginEBSTest struct {
|
||||
testJobIDs []string
|
||||
volumeIDs []string
|
||||
pluginJobIDs []string
|
||||
nodeIDs []string
|
||||
}
|
||||
|
||||
const ebsPluginID = "aws-ebs0"
|
||||
@@ -23,27 +26,27 @@ const ebsPluginID = "aws-ebs0"
|
||||
// BeforeAll waits for the cluster to be ready, deploys the CSI plugins, and
|
||||
// creates two EBS volumes for use in the test.
|
||||
func (tc *CSIControllerPluginEBSTest) BeforeAll(f *framework.F) {
|
||||
e2e.WaitForLeader(f.T(), tc.Nomad())
|
||||
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 2)
|
||||
e2eutil.WaitForLeader(f.T(), tc.Nomad())
|
||||
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 2)
|
||||
|
||||
tc.uuid = uuid.Generate()[0:8]
|
||||
|
||||
// deploy the controller plugin job
|
||||
controllerJobID := "aws-ebs-plugin-controller-" + tc.uuid
|
||||
f.NoError(e2e.Register(controllerJobID, "csi/input/plugin-aws-ebs-controller.nomad"))
|
||||
f.NoError(e2eutil.Register(controllerJobID, "csi/input/plugin-aws-ebs-controller.nomad"))
|
||||
tc.pluginJobIDs = append(tc.pluginJobIDs, controllerJobID)
|
||||
expected := []string{"running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(controllerJobID, ns, expected),
|
||||
e2eutil.WaitForAllocStatusExpected(controllerJobID, ns, expected),
|
||||
"job should be running")
|
||||
|
||||
// deploy the node plugins job
|
||||
nodesJobID := "aws-ebs-plugin-nodes-" + tc.uuid
|
||||
f.NoError(e2e.Register(nodesJobID, "csi/input/plugin-aws-ebs-nodes.nomad"))
|
||||
f.NoError(e2eutil.Register(nodesJobID, "csi/input/plugin-aws-ebs-nodes.nomad"))
|
||||
tc.pluginJobIDs = append(tc.pluginJobIDs, nodesJobID)
|
||||
|
||||
f.NoError(e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(nodesJobID, ns) },
|
||||
f.NoError(e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatuses(nodesJobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "running" {
|
||||
@@ -78,7 +81,7 @@ func (tc *CSIControllerPluginEBSTest) AfterAll(f *framework.F) {
|
||||
|
||||
// Stop all jobs in test
|
||||
for _, id := range tc.testJobIDs {
|
||||
err := e2e.StopJob(id, "-purge")
|
||||
err := e2eutil.StopJob(id, "-purge")
|
||||
f.Assert().NoError(err)
|
||||
}
|
||||
tc.testJobIDs = []string{}
|
||||
@@ -87,20 +90,28 @@ func (tc *CSIControllerPluginEBSTest) AfterAll(f *framework.F) {
|
||||
err := waitForVolumeClaimRelease(volID, reapWait)
|
||||
f.Assert().NoError(err, "volume claims were not released")
|
||||
|
||||
out, err := e2e.Command("nomad", "volume", "delete", volID)
|
||||
out, err := e2eutil.Command("nomad", "volume", "delete", volID)
|
||||
assertNoErrorElseDump(f, err,
|
||||
fmt.Sprintf("could not delete volume:\n%v", out), tc.pluginJobIDs)
|
||||
}
|
||||
|
||||
// Deregister all plugin jobs in test
|
||||
for _, id := range tc.pluginJobIDs {
|
||||
err := e2e.StopJob(id, "-purge")
|
||||
err := e2eutil.StopJob(id, "-purge")
|
||||
f.Assert().NoError(err)
|
||||
}
|
||||
tc.pluginJobIDs = []string{}
|
||||
|
||||
for _, id := range tc.nodeIDs {
|
||||
_, err := e2eutil.Command("nomad", "node", "drain", "-disable", "-yes", id)
|
||||
f.Assert().NoError(err)
|
||||
_, err = e2eutil.Command("nomad", "node", "eligibility", "-enable", id)
|
||||
f.Assert().NoError(err)
|
||||
}
|
||||
tc.nodeIDs = []string{}
|
||||
|
||||
// Garbage collect
|
||||
out, err := e2e.Command("nomad", "system", "gc")
|
||||
out, err := e2eutil.Command("nomad", "system", "gc")
|
||||
f.Assert().NoError(err, out)
|
||||
|
||||
}
|
||||
@@ -112,12 +123,12 @@ func (tc *CSIControllerPluginEBSTest) TestVolumeClaim(f *framework.F) {
|
||||
|
||||
// deploy a job that writes to the volume
|
||||
writeJobID := "write-ebs-" + tc.uuid
|
||||
f.NoError(e2e.Register(writeJobID, "csi/input/use-ebs-volume.nomad"))
|
||||
f.NoError(e2eutil.Register(writeJobID, "csi/input/use-ebs-volume.nomad"))
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}),
|
||||
e2eutil.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}),
|
||||
"job should be running")
|
||||
|
||||
allocs, err := e2e.AllocsForJob(writeJobID, ns)
|
||||
allocs, err := e2eutil.AllocsForJob(writeJobID, ns)
|
||||
f.NoError(err, "could not get allocs for write job")
|
||||
f.Len(allocs, 1, "could not get allocs for write job")
|
||||
writeAllocID := allocs[0]["ID"]
|
||||
@@ -130,7 +141,7 @@ func (tc *CSIControllerPluginEBSTest) TestVolumeClaim(f *framework.F) {
|
||||
// Shutdown (and purge) the writer so we can run a reader.
|
||||
// we could mount the EBS volume with multi-attach, but we
|
||||
// want this test to exercise the unpublish workflow.
|
||||
err = e2e.StopJob(writeJobID, "-purge")
|
||||
err = e2eutil.StopJob(writeJobID, "-purge")
|
||||
f.NoError(err)
|
||||
|
||||
// wait for the volume unpublish workflow to complete
|
||||
@@ -142,12 +153,12 @@ func (tc *CSIControllerPluginEBSTest) TestVolumeClaim(f *framework.F) {
|
||||
// deploy a job so we can read from the volume
|
||||
readJobID := "read-ebs-" + tc.uuid
|
||||
tc.testJobIDs = append(tc.testJobIDs, readJobID) // ensure failed tests clean up
|
||||
f.NoError(e2e.Register(readJobID, "csi/input/use-ebs-volume.nomad"))
|
||||
f.NoError(e2eutil.Register(readJobID, "csi/input/use-ebs-volume.nomad"))
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(readJobID, ns, []string{"running"}),
|
||||
e2eutil.WaitForAllocStatusExpected(readJobID, ns, []string{"running"}),
|
||||
"job should be running")
|
||||
|
||||
allocs, err = e2e.AllocsForJob(readJobID, ns)
|
||||
allocs, err = e2eutil.AllocsForJob(readJobID, ns)
|
||||
f.NoError(err, "could not get allocs for read job")
|
||||
f.Len(allocs, 1, "could not get allocs for read job")
|
||||
readAllocID := allocs[0]["ID"]
|
||||
@@ -161,14 +172,14 @@ func (tc *CSIControllerPluginEBSTest) TestVolumeClaim(f *framework.F) {
|
||||
// TestSnapshot exercises the snapshot commands.
|
||||
func (tc *CSIControllerPluginEBSTest) TestSnapshot(f *framework.F) {
|
||||
|
||||
out, err := e2e.Command("nomad", "volume", "snapshot", "create",
|
||||
out, err := e2eutil.Command("nomad", "volume", "snapshot", "create",
|
||||
tc.volumeIDs[0], "snap-"+tc.uuid)
|
||||
requireNoErrorElseDump(f, err, "could not create volume snapshot", tc.pluginJobIDs)
|
||||
|
||||
snaps, err := e2e.ParseColumns(out)
|
||||
snaps, err := e2eutil.ParseColumns(out)
|
||||
|
||||
defer func() {
|
||||
_, err := e2e.Command("nomad", "volume", "snapshot", "delete",
|
||||
_, err := e2eutil.Command("nomad", "volume", "snapshot", "delete",
|
||||
ebsPluginID, snaps[0]["Snapshot ID"])
|
||||
requireNoErrorElseDump(f, err, "could not delete volume snapshot", tc.pluginJobIDs)
|
||||
}()
|
||||
@@ -179,9 +190,76 @@ func (tc *CSIControllerPluginEBSTest) TestSnapshot(f *framework.F) {
|
||||
// the snapshot we're looking for should be the first one because
|
||||
// we just created it, but give us some breathing room to allow
|
||||
// for concurrent test runs
|
||||
out, err = e2e.Command("nomad", "volume", "snapshot", "list",
|
||||
out, err = e2eutil.Command("nomad", "volume", "snapshot", "list",
|
||||
"-plugin", ebsPluginID, "-per-page", "10")
|
||||
requireNoErrorElseDump(f, err, "could not list volume snapshots", tc.pluginJobIDs)
|
||||
f.Contains(out, snaps[0]["ID"],
|
||||
fmt.Sprintf("volume snapshot list did not include expected snapshot:\n%v", out))
|
||||
}
|
||||
|
||||
// TestNodeDrain exercises the remounting behavior in the face of a node drain
|
||||
func (tc *CSIControllerPluginEBSTest) TestNodeDrain(f *framework.F) {
|
||||
|
||||
nomadClient := tc.Nomad()
|
||||
|
||||
nodesJobID := "aws-ebs-plugin-nodes-" + tc.uuid
|
||||
pluginAllocs, err := e2eutil.AllocsForJob(nodesJobID, ns)
|
||||
f.NoError(err)
|
||||
expectedHealthyNodePlugins := len(pluginAllocs)
|
||||
|
||||
// deploy a job that writes to the volume
|
||||
writeJobID := "write-ebs-" + tc.uuid
|
||||
f.NoError(e2eutil.Register(writeJobID, "csi/input/use-ebs-volume.nomad"))
|
||||
f.NoError(
|
||||
e2eutil.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}),
|
||||
"job should be running")
|
||||
tc.testJobIDs = append(tc.testJobIDs, writeJobID) // ensure failed tests clean up
|
||||
|
||||
allocs, err := e2eutil.AllocsForJob(writeJobID, ns)
|
||||
f.NoError(err, "could not get allocs for write job")
|
||||
f.Len(allocs, 1, "could not get allocs for write job")
|
||||
writeAllocID := allocs[0]["ID"]
|
||||
|
||||
// read data from volume and assert the writer wrote a file to it
|
||||
expectedPath := "/task/test/" + writeAllocID
|
||||
_, err = readFile(nomadClient, writeAllocID, expectedPath)
|
||||
f.NoError(err)
|
||||
|
||||
// intentionally set a long deadline so we can check the plugins
|
||||
// haven't been moved
|
||||
nodeID := allocs[0]["Node ID"]
|
||||
out, err := e2eutil.Command("nomad", "node",
|
||||
"drain", "-enable",
|
||||
"-deadline", "10m",
|
||||
"-yes", "-detach", nodeID)
|
||||
f.NoError(err, fmt.Sprintf("'nomad node drain' failed: %v\n%v", err, out))
|
||||
tc.nodeIDs = append(tc.nodeIDs, nodeID)
|
||||
|
||||
wc := &e2eutil.WaitConfig{}
|
||||
interval, retries := wc.OrDefault()
|
||||
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
||||
time.Sleep(interval)
|
||||
allocs, err := e2eutil.AllocsForJob(writeJobID, ns)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, alloc := range allocs {
|
||||
if alloc["ID"] != writeAllocID {
|
||||
if alloc["Status"] == "running" {
|
||||
return true, nil
|
||||
}
|
||||
if alloc["Status"] == "failed" {
|
||||
// no point in waiting anymore if we hit this case
|
||||
f.T().Fatal("expected replacement alloc not to fail")
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, fmt.Errorf("expected replacement alloc to be running")
|
||||
}, func(e error) {
|
||||
err = e
|
||||
})
|
||||
|
||||
pluginAllocs, err = e2eutil.AllocsForJob(nodesJobID, ns)
|
||||
f.Lenf(pluginAllocs, expectedHealthyNodePlugins,
|
||||
"expected node plugins to be unchanged, got: %v", pluginAllocs)
|
||||
}
|
||||
|
||||
@@ -7,14 +7,14 @@ plan:
|
||||
terraform plan \
|
||||
-var="nomad_local_binary=$(PKG_PATH)" \
|
||||
-var="volumes=false" \
|
||||
-var="client_count_ubuntu_bionic_amd64=2" \
|
||||
-var="client_count_ubuntu_bionic_amd64=3" \
|
||||
-var="client_count_windows_2016_amd64=0"
|
||||
|
||||
apply:
|
||||
terraform apply -auto-approve \
|
||||
-var="nomad_local_binary=$(PKG_PATH)" \
|
||||
-var="volumes=false" \
|
||||
-var="client_count_ubuntu_bionic_amd64=2" \
|
||||
-var="client_count_ubuntu_bionic_amd64=3" \
|
||||
-var="client_count_windows_2016_amd64=0"
|
||||
|
||||
clean: destroy tidy
|
||||
@@ -22,7 +22,7 @@ clean: destroy tidy
|
||||
destroy:
|
||||
terraform destroy -auto-approve \
|
||||
-var="nomad_local_binary=$(PKG_PATH)" \
|
||||
-var="client_count_ubuntu_bionic_amd64=2" \
|
||||
-var="client_count_ubuntu_bionic_amd64=3" \
|
||||
-var="client_count_windows_2016_amd64=0"
|
||||
|
||||
# deploy what's in E2E nightly
|
||||
|
||||
Reference in New Issue
Block a user