From 612dd36d2f0ab60a540de50f66c4805de94f1a47 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 26 May 2020 18:18:59 -0400 Subject: [PATCH 1/8] tests: eval may be processed quickly --- nomad/node_endpoint_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 8d56bf2c9..0324627ae 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2509,7 +2509,12 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) { require.Equal(t, structs.EvalTriggerNodeUpdate, eval.TriggeredBy) require.Equal(t, alloc.NodeID, eval.NodeID) require.Equal(t, uint64(1), eval.NodeModifyIndex) - require.Equal(t, structs.EvalStatusPending, eval.Status) + switch eval.Status { + case structs.EvalStatusPending, structs.EvalStatusComplete: + // success + default: + require.Equal(t, structs.EvalStatusPending, eval.Status) + } require.Equal(t, expPriority, eval.Priority) require.Equal(t, expJobID, eval.JobID) require.NotZero(t, eval.CreateTime) From 5de673e6f2c4b97b6b3d504c1e194bc1dbfe9691 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 26 May 2020 18:19:18 -0400 Subject: [PATCH 2/8] tests: don't delete images after tests complete Fix some docker test flakiness where image cleanup process may contaminate other tests. A clean up process may attempt to delete an image while it's used by another test. --- drivers/docker/config.go | 1 + drivers/docker/coordinator.go | 4 +- drivers/docker/coordinator_test.go | 70 +++++++++++++++++++++++++++--- 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/drivers/docker/config.go b/drivers/docker/config.go index 08be91b8e..44abc6a9a 100644 --- a/drivers/docker/config.go +++ b/drivers/docker/config.go @@ -682,6 +682,7 @@ func (d *Driver) SetConfig(c *base.Config) error { return fmt.Errorf("failed to get docker client: %v", err) } coordinatorConfig := &dockerCoordinatorConfig{ + ctx: d.ctx, client: dockerClient, cleanup: d.config.GC.Image, logger: d.logger, diff --git a/drivers/docker/coordinator.go b/drivers/docker/coordinator.go index 2d68e3b69..23a0ed257 100644 --- a/drivers/docker/coordinator.go +++ b/drivers/docker/coordinator.go @@ -70,6 +70,8 @@ func noopLogEventFn(string, map[string]string) {} // dockerCoordinatorConfig is used to configure the Docker coordinator. type dockerCoordinatorConfig struct { + ctx context.Context + // logger is the logger the coordinator should use logger hclog.Logger @@ -283,7 +285,7 @@ func (d *dockerCoordinator) RemoveImage(imageID, callerID string) { } // Setup a future to delete the image - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(d.ctx) d.deleteFuture[imageID] = cancel go d.removeImageImpl(imageID, ctx) diff --git a/drivers/docker/coordinator_test.go b/drivers/docker/coordinator_test.go index 6ada8cb78..c28e4efc9 100644 --- a/drivers/docker/coordinator_test.go +++ b/drivers/docker/coordinator_test.go @@ -1,6 +1,7 @@ package docker import ( + "context" "fmt" "sync" "testing" @@ -10,6 +11,7 @@ import ( "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" ) type mockImageClient struct { @@ -61,6 +63,7 @@ func TestDockerCoordinator_ConcurrentPulls(t *testing.T) { // Add a delay so we can get multiple queued up mock := newMockImageClient(mapping, 10*time.Millisecond) config := &dockerCoordinatorConfig{ + ctx: context.Background(), logger: testlog.HCLogger(t), cleanup: true, client: mock, @@ -70,10 +73,10 @@ func TestDockerCoordinator_ConcurrentPulls(t *testing.T) { // Create a coordinator coordinator := newDockerCoordinator(config) - id, _ := coordinator.PullImage(image, nil, uuid.Generate(), nil, 2 * time.Minute) + id, _ := coordinator.PullImage(image, nil, uuid.Generate(), nil, 2*time.Minute) for i := 0; i < 9; i++ { go func() { - coordinator.PullImage(image, nil, uuid.Generate(), nil, 2 * time.Minute) + coordinator.PullImage(image, nil, uuid.Generate(), nil, 2*time.Minute) }() } @@ -112,6 +115,7 @@ func TestDockerCoordinator_Pull_Remove(t *testing.T) { // Add a delay so we can get multiple queued up mock := newMockImageClient(mapping, 10*time.Millisecond) config := &dockerCoordinatorConfig{ + ctx: context.Background(), logger: testlog.HCLogger(t), cleanup: true, client: mock, @@ -125,7 +129,7 @@ func TestDockerCoordinator_Pull_Remove(t *testing.T) { callerIDs := make([]string, 10, 10) for i := 0; i < 10; i++ { callerIDs[i] = uuid.Generate() - id, _ = coordinator.PullImage(image, nil, callerIDs[i], nil, 2 * time.Minute) + id, _ = coordinator.PullImage(image, nil, callerIDs[i], nil, 2*time.Minute) } // Check the reference count @@ -179,6 +183,7 @@ func TestDockerCoordinator_Remove_Cancel(t *testing.T) { mock := newMockImageClient(mapping, 1*time.Millisecond) config := &dockerCoordinatorConfig{ + ctx: context.Background(), logger: testlog.HCLogger(t), cleanup: true, client: mock, @@ -190,7 +195,7 @@ func TestDockerCoordinator_Remove_Cancel(t *testing.T) { callerID := uuid.Generate() // Pull image - id, _ := coordinator.PullImage(image, nil, callerID, nil, 2 * time.Minute) + id, _ := coordinator.PullImage(image, nil, callerID, nil, 2*time.Minute) // Check the reference count if references := coordinator.imageRefCount[id]; len(references) != 1 { @@ -206,7 +211,7 @@ func TestDockerCoordinator_Remove_Cancel(t *testing.T) { } // Pull image again within delay - id, _ = coordinator.PullImage(image, nil, callerID, nil, 2 * time.Minute) + id, _ = coordinator.PullImage(image, nil, callerID, nil, 2*time.Minute) // Check the reference count if references := coordinator.imageRefCount[id]; len(references) != 1 { @@ -227,6 +232,7 @@ func TestDockerCoordinator_No_Cleanup(t *testing.T) { mock := newMockImageClient(mapping, 1*time.Millisecond) config := &dockerCoordinatorConfig{ + ctx: context.Background(), logger: testlog.HCLogger(t), cleanup: false, client: mock, @@ -238,7 +244,7 @@ func TestDockerCoordinator_No_Cleanup(t *testing.T) { callerID := uuid.Generate() // Pull image - id, _ := coordinator.PullImage(image, nil, callerID, nil, 2 * time.Minute) + id, _ := coordinator.PullImage(image, nil, callerID, nil, 2*time.Minute) // Check the reference count if references := coordinator.imageRefCount[id]; len(references) != 0 { @@ -253,3 +259,55 @@ func TestDockerCoordinator_No_Cleanup(t *testing.T) { t.Fatalf("Image deleted when it shouldn't have") } } + +func TestDockerCoordinator_Cleanup_HonorsCtx(t *testing.T) { + image1ID := uuid.Generate() + image2ID := uuid.Generate() + + mapping := map[string]string{image1ID: "foo", image2ID: "bar"} + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + mock := newMockImageClient(mapping, 1*time.Millisecond) + config := &dockerCoordinatorConfig{ + ctx: ctx, + logger: testlog.HCLogger(t), + cleanup: true, + client: mock, + removeDelay: 1 * time.Millisecond, + } + + // Create a coordinator + coordinator := newDockerCoordinator(config) + callerID := uuid.Generate() + + // Pull image + id1, _ := coordinator.PullImage(image1ID, nil, callerID, nil, 2*time.Minute) + require.Len(t, coordinator.imageRefCount[id1], 1, "image reference count") + + id2, _ := coordinator.PullImage(image2ID, nil, callerID, nil, 2*time.Minute) + require.Len(t, coordinator.imageRefCount[id2], 1, "image reference count") + + // remove one image, cancel ctx, remove second, and assert only first image is cleanedup + // Remove image + coordinator.RemoveImage(id1, callerID) + testutil.WaitForResult(func() (bool, error) { + if _, ok := mock.removed[id1]; ok { + return true, nil + } + return false, fmt.Errorf("expected image to delete found %v", mock.removed) + }, func(err error) { + require.NoError(t, err) + }) + + cancel() + coordinator.RemoveImage(id2, callerID) + + // deletions occur in background, wait to ensure that + // the image isn't deleted after a timeout + time.Sleep(10 * time.Millisecond) + + // Check that only no delete happened + require.Equal(t, map[string]int{id1: 1}, mock.removed, "removed images") +} From 50c9a0a07f5ea9c6be5f0c667ee4e3707e47b8e0 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 26 May 2020 18:53:04 -0400 Subject: [PATCH 3/8] tests: wait until clients are in the state store --- nomad/client_csi_endpoint_test.go | 63 +++++++++++++++++++------------ 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/nomad/client_csi_endpoint_test.go b/nomad/client_csi_endpoint_test.go index 7aecc47b0..a3345613e 100644 --- a/nomad/client_csi_endpoint_test.go +++ b/nomad/client_csi_endpoint_test.go @@ -1,6 +1,7 @@ package nomad import ( + "fmt" "testing" memdb "github.com/hashicorp/go-memdb" @@ -30,12 +31,7 @@ func TestClientCSIController_AttachVolume_Local(t *testing.T) { }) defer cleanupC() - testutil.WaitForResult(func() (bool, error) { - nodes := s.connectedNodes() - return len(nodes) == 1, nil - }, func(err error) { - require.Fail("should have a client") - }) + waitForNodes(t, s, 1) req := &cstructs.ClientCSIControllerAttachVolumeRequest{ CSIControllerQuery: cstructs.CSIControllerQuery{ControllerNodeID: c.NodeID()}, @@ -69,12 +65,7 @@ func TestClientCSIController_AttachVolume_Forwarded(t *testing.T) { }) defer cleanupC() - testutil.WaitForResult(func() (bool, error) { - nodes := s2.connectedNodes() - return len(nodes) == 1, nil - }, func(err error) { - require.Fail("should have a client") - }) + waitForNodes(t, s2, 1) // Force remove the connection locally in case it exists s1.nodeConnsLock.Lock() @@ -108,12 +99,7 @@ func TestClientCSIController_DetachVolume_Local(t *testing.T) { }) defer cleanupC() - testutil.WaitForResult(func() (bool, error) { - nodes := s.connectedNodes() - return len(nodes) == 1, nil - }, func(err error) { - require.Fail("should have a client") - }) + waitForNodes(t, s, 1) req := &cstructs.ClientCSIControllerDetachVolumeRequest{ CSIControllerQuery: cstructs.CSIControllerQuery{ControllerNodeID: c.NodeID()}, @@ -147,12 +133,7 @@ func TestClientCSIController_DetachVolume_Forwarded(t *testing.T) { }) defer cleanupC() - testutil.WaitForResult(func() (bool, error) { - nodes := s2.connectedNodes() - return len(nodes) == 1, nil - }, func(err error) { - require.Fail("should have a client") - }) + waitForNodes(t, s2, 1) // Force remove the connection locally in case it exists s1.nodeConnsLock.Lock() @@ -212,3 +193,37 @@ func TestClientCSI_NodeForControllerPlugin(t *testing.T) { // only node1 has both the controller and a recent Nomad version require.Equal(t, nodeID, node1.ID) } + +// waitForNodes waits until the server is connected to expectedNodes +// clients and they are in the state store +func waitForNodes(t *testing.T, s *Server, expectedNodes int) { + codec := rpcClient(t, s) + + testutil.WaitForResult(func() (bool, error) { + connNodes := s.connectedNodes() + if len(connNodes) != expectedNodes { + return false, fmt.Errorf("expected %d nodes but found %d", expectedNodes, len(connNodes)) + + } + + get := &structs.NodeListRequest{ + QueryOptions: structs.QueryOptions{Region: "global"}, + } + var resp structs.NodeListResponse + err := msgpackrpc.CallWithCodec(codec, "Node.List", get, &resp) + if err != nil { + return false, err + } + + if err != nil { + return false, fmt.Errorf("failed to list nodes: %v", err) + } + if len(resp.Nodes) != 1 { + return false, fmt.Errorf("expected %d nodes but found %d", 1, len(resp.Nodes)) + } + + return true, nil + }, func(err error) { + require.NoError(t, err) + }) +} From 76324a1511104b2d0ca2a64befef118c4342fbb8 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 26 May 2020 21:08:25 -0400 Subject: [PATCH 4/8] don't GC images in tests by default --- drivers/docker/driver_linux_test.go | 2 +- drivers/docker/driver_test.go | 39 +++++++++++++++-------------- drivers/docker/driver_unix_test.go | 11 +++++--- drivers/docker/reconciler_test.go | 2 +- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/docker/driver_linux_test.go b/drivers/docker/driver_linux_test.go index d9ba710a9..7f7f3e534 100644 --- a/drivers/docker/driver_linux_test.go +++ b/drivers/docker/driver_linux_test.go @@ -59,7 +59,7 @@ func TestDockerDriver_PidsLimit(t *testing.T) { cfg.Args = []string{"-c", "sleep 5 & sleep 5 & sleep 5"} require.NoError(task.EncodeConcreteDriverConfig(cfg)) - _, driver, _, cleanup := dockerSetup(t, task) + _, driver, _, cleanup := dockerSetup(t, task, nil) defer cleanup() driver.WaitUntilStarted(task.ID, time.Duration(tu.TestMultiplier()*5)*time.Second) diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index 16141bbeb..81ec35757 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -120,15 +120,15 @@ func dockerTask(t *testing.T) (*drivers.TaskConfig, *TaskConfig, []int) { // // task := taskTemplate() // // do custom task configuration -// client, handle, cleanup := dockerSetup(t, task) +// client, handle, cleanup := dockerSetup(t, task, nil) // defer cleanup() // // do test stuff // // If there is a problem during setup this function will abort or skip the test // and indicate the reason. -func dockerSetup(t *testing.T, task *drivers.TaskConfig) (*docker.Client, *dtestutil.DriverHarness, *taskHandle, func()) { +func dockerSetup(t *testing.T, task *drivers.TaskConfig, driverCfg map[string]interface{}) (*docker.Client, *dtestutil.DriverHarness, *taskHandle, func()) { client := newTestDockerClient(t) - driver := dockerDriverHarness(t, nil) + driver := dockerDriverHarness(t, driverCfg) cleanup := driver.MkAllocDir(task, true) copyImage(t, task.TaskDir(), "busybox.tar") @@ -181,6 +181,7 @@ func dockerDriverHarness(t *testing.T, cfg map[string]interface{}) *dtestutil.Dr if cfg == nil { cfg = map[string]interface{}{ "gc": map[string]interface{}{ + "image": false, "image_delay": "1s", }, } @@ -912,7 +913,7 @@ func TestDockerDriver_Labels(t *testing.T) { } require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -940,7 +941,7 @@ func TestDockerDriver_ForcePull(t *testing.T) { cfg.ForcePull = true require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -971,7 +972,7 @@ func TestDockerDriver_ForcePull_RepoDigest(t *testing.T) { cfg.Args = busyboxLongRunningCmd[1:] require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -994,7 +995,7 @@ func TestDockerDriver_SecurityOptUnconfined(t *testing.T) { cfg.SecurityOpt = []string{"seccomp=unconfined"} require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1021,7 +1022,7 @@ func TestDockerDriver_SecurityOptFromFile(t *testing.T) { cfg.SecurityOpt = []string{"seccomp=./test-resources/docker/seccomp.json"} require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1042,7 +1043,7 @@ func TestDockerDriver_Runtime(t *testing.T) { cfg.Runtime = "runc" require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1470,7 +1471,7 @@ func TestDockerDriver_DNS(t *testing.T) { cfg.DNSOptions = []string{"ndots:1"} require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1497,7 +1498,7 @@ func TestDockerDriver_MACAddress(t *testing.T) { cfg.MacAddress = "00:16:3e:00:00:00" require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1518,7 +1519,7 @@ func TestDockerWorkDir(t *testing.T) { cfg.WorkDir = "/some/path" require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1547,7 +1548,7 @@ func TestDockerDriver_PortsNoMap(t *testing.T) { res := ports[0] dyn := ports[1] - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1596,7 +1597,7 @@ func TestDockerDriver_PortsMapping(t *testing.T) { } require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -1680,7 +1681,7 @@ func TestDockerDriver_CleanupContainer(t *testing.T) { cfg.Args = []string{"hello"} require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() waitCh, err := d.WaitTask(context.Background(), task.ID) @@ -1914,7 +1915,7 @@ func TestDockerDriver_Stats(t *testing.T) { cfg.Args = []string{"1000"} require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - _, d, handle, cleanup := dockerSetup(t, task) + _, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -2379,7 +2380,7 @@ func TestDockerDriver_Device_Success(t *testing.T) { cfg.Devices = []DockerDevice{config} require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, driver, handle, cleanup := dockerSetup(t, task) + client, driver, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, driver.WaitUntilStarted(task.ID, 5*time.Second)) @@ -2405,7 +2406,7 @@ func TestDockerDriver_Entrypoint(t *testing.T) { require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, driver, handle, cleanup := dockerSetup(t, task) + client, driver, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, driver.WaitUntilStarted(task.ID, 5*time.Second)) @@ -2432,7 +2433,7 @@ func TestDockerDriver_ReadonlyRootfs(t *testing.T) { cfg.ReadonlyRootfs = true require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, driver, handle, cleanup := dockerSetup(t, task) + client, driver, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, driver.WaitUntilStarted(task.ID, 5*time.Second)) diff --git a/drivers/docker/driver_unix_test.go b/drivers/docker/driver_unix_test.go index 690a69e73..39beed994 100644 --- a/drivers/docker/driver_unix_test.go +++ b/drivers/docker/driver_unix_test.go @@ -159,7 +159,7 @@ func TestDockerDriver_CPUCFSPeriod(t *testing.T) { cfg.CPUCFSPeriod = 1000000 require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, _, handle, cleanup := dockerSetup(t, task) + client, _, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() waitForExist(t, client, handle.containerID) @@ -184,7 +184,7 @@ func TestDockerDriver_Sysctl_Ulimit(t *testing.T) { cfg.Ulimit = expectedUlimits require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) @@ -678,7 +678,12 @@ func TestDockerDriver_Cleanup(t *testing.T) { require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, driver, handle, cleanup := dockerSetup(t, task) + client, driver, handle, cleanup := dockerSetup(t, task, map[string]interface{}{ + "gc": map[string]interface{}{ + "image": true, + "image_delay": "1ms", + }, + }) defer cleanup() require.NoError(t, driver.WaitUntilStarted(task.ID, 5*time.Second)) diff --git a/drivers/docker/reconciler_test.go b/drivers/docker/reconciler_test.go index 53c7eaadc..ff5284cf1 100644 --- a/drivers/docker/reconciler_test.go +++ b/drivers/docker/reconciler_test.go @@ -61,7 +61,7 @@ func TestDanglingContainerRemoval(t *testing.T) { defer freeport.Return(ports) require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) - client, d, handle, cleanup := dockerSetup(t, task) + client, d, handle, cleanup := dockerSetup(t, task, nil) defer cleanup() require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) From 41a450c8d871fe7437783988956d9bf3152be531 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 27 May 2020 07:36:35 -0400 Subject: [PATCH 5/8] tests: node drain events may be duplicated --- nomad/drainer_int_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nomad/drainer_int_test.go b/nomad/drainer_int_test.go index a6777e90f..0ff06280e 100644 --- a/nomad/drainer_int_test.go +++ b/nomad/drainer_int_test.go @@ -696,7 +696,11 @@ func TestDrainer_AllTypes_NoDeadline(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 3) + + // sometimes test gets a duplicate node drain complete event + if len(node.Events) < 3 { + require.Len(node.Events, 3, "expected at least 3 events") + } require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) } From bcc4ec910d7147f86f133a859c62352fc0168a97 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 27 May 2020 08:37:12 -0400 Subject: [PATCH 6/8] gracefully shutdown test server --- testutil/server.go | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/testutil/server.go b/testutil/server.go index 08522807e..ab259235c 100644 --- a/testutil/server.go +++ b/testutil/server.go @@ -20,6 +20,7 @@ import ( "net/http" "os" "os/exec" + "time" cleanhttp "github.com/hashicorp/go-cleanhttp" "github.com/hashicorp/nomad/helper/discover" @@ -239,13 +240,36 @@ func (s *TestServer) Stop() { defer os.RemoveAll(s.Config.DataDir) - if err := s.cmd.Process.Kill(); err != nil { + // wait for the process to exit to be sure that the data dir can be + // deleted on all platforms. + done := make(chan struct{}) + go func() { + defer close(done) + + s.cmd.Wait() + }() + + // kill and wait gracefully + if err := s.cmd.Process.Signal(os.Interrupt); err != nil { s.t.Errorf("err: %s", err) } - // wait for the process to exit to be sure that the data dir can be - // deleted on all platforms. - s.cmd.Wait() + select { + case <-done: + return + case <-time.After(5 * time.Second): + s.t.Logf("timed out waiting for process to gracefully terminate") + } + + if err := s.cmd.Process.Kill(); err != nil { + s.t.Errorf("err: %s", err) + } + select { + case <-done: + case <-time.After(5 * time.Second): + s.t.Logf("timed out waiting for process to be killed") + } + } // waitForAPI waits for only the agent HTTP endpoint to start From 35d039e5832267954d154210e438446a729e857e Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 27 May 2020 10:09:56 -0400 Subject: [PATCH 7/8] tests: use t.Fatalf when it's clearer --- nomad/node_endpoint_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 0324627ae..7b0c13b49 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2513,7 +2513,7 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) { case structs.EvalStatusPending, structs.EvalStatusComplete: // success default: - require.Equal(t, structs.EvalStatusPending, eval.Status) + t.Fatalf("expected pending or complete, found %v", eval.Status) } require.Equal(t, expPriority, eval.Priority) require.Equal(t, expJobID, eval.JobID) From 74af591a70b8d47ca4c0883c8a857514ee8c4934 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 27 May 2020 10:10:13 -0400 Subject: [PATCH 8/8] tests: use GreaterOrEqual and apply change to other tests --- nomad/drainer_int_test.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/nomad/drainer_int_test.go b/nomad/drainer_int_test.go index 0ff06280e..ce480bd33 100644 --- a/nomad/drainer_int_test.go +++ b/nomad/drainer_int_test.go @@ -219,7 +219,8 @@ func TestDrainer_Simple_ServiceOnly(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 3) + // sometimes test gets a duplicate node drain complete event + require.GreaterOrEqualf(len(node.Events), 3, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) } @@ -314,7 +315,8 @@ func TestDrainer_Simple_ServiceOnly_Deadline(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 3) + // sometimes test gets a duplicate node drain complete event + require.GreaterOrEqualf(len(node.Events), 3, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) require.Contains(node.Events[2].Details, drainer.NodeDrainEventDetailDeadlined) } @@ -365,7 +367,8 @@ func TestDrainer_DrainEmptyNode(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 3) + // sometimes test gets a duplicate node drain complete event + require.GreaterOrEqualf(len(node.Events), 3, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) } @@ -529,7 +532,8 @@ func TestDrainer_AllTypes_Deadline(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 3) + // sometimes test gets a duplicate node drain complete event + require.GreaterOrEqualf(len(node.Events), 3, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) require.Contains(node.Events[2].Details, drainer.NodeDrainEventDetailDeadlined) } @@ -698,9 +702,7 @@ func TestDrainer_AllTypes_NoDeadline(t *testing.T) { require.NoError(err) // sometimes test gets a duplicate node drain complete event - if len(node.Events) < 3 { - require.Len(node.Events, 3, "expected at least 3 events") - } + require.GreaterOrEqualf(len(node.Events), 3, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) } @@ -872,7 +874,8 @@ func TestDrainer_AllTypes_Deadline_GarbageCollectedNode(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 3) + // sometimes test gets a duplicate node drain complete event + require.GreaterOrEqualf(len(node.Events), 3, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[2].Message) require.Contains(node.Events[2].Details, drainer.NodeDrainEventDetailDeadlined) } @@ -1017,7 +1020,8 @@ func TestDrainer_Batch_TransitionToForce(t *testing.T) { // Check we got the right events node, err := state.NodeByID(nil, n1.ID) require.NoError(err) - require.Len(node.Events, 4) + // sometimes test gets a duplicate node drain complete event + require.GreaterOrEqualf(len(node.Events), 4, "unexpected number of events: %v", node.Events) require.Equal(drainer.NodeDrainEventComplete, node.Events[3].Message) require.Contains(node.Events[3].Details, drainer.NodeDrainEventDetailDeadlined) })