From 2f47a6d86c8f707406f9f9d3394baa70d64d5a19 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 13 Sep 2019 12:59:14 -0400 Subject: [PATCH] docker: defensive against failed starts This handles a bug where we may start a container successfully, yet we fail due to retries and startContainer not being idempotent call. Here, we ensure that when starting a container fails with 500 error, the retry succeeds if container was started successfully. --- drivers/docker/driver.go | 2 +- drivers/docker/driver_test.go | 71 +++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index ac83f376d..dac2dbdd6 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -468,7 +468,7 @@ func (d *Driver) startContainer(c *docker.Container) error { attempted := 0 START: startErr := client.StartContainer(c.ID, c.HostConfig) - if startErr == nil { + if startErr == nil || strings.Contains(startErr.Error(), "Container already running") { return nil } diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index 997ce1d9c..a100aa743 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -2376,3 +2376,74 @@ func waitForExist(t *testing.T, client *docker.Client, containerID string) { require.NoError(t, err) }) } + +// TestDockerDriver_CreationIdempotent asserts that createContainer and +// and startContainers functions are idempotent, as we have some retry +// logic there without ensureing we delete/destroy containers +func TestDockerDriver_CreationIdempotent(t *testing.T) { + if !tu.IsCI() { + t.Parallel() + } + testutil.DockerCompatible(t) + + task, cfg, _ := dockerTask(t) + require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) + + client := newTestDockerClient(t) + driver := dockerDriverHarness(t, nil) + cleanup := driver.MkAllocDir(task, true) + defer cleanup() + + copyImage(t, task.TaskDir(), "busybox.tar") + + d, ok := driver.Impl().(*Driver) + require.True(t, ok) + + _, err := d.createImage(task, cfg, client) + require.NoError(t, err) + + containerCfg, err := d.createContainerConfig(task, cfg, cfg.Image) + require.NoError(t, err) + + c, err := d.createContainer(client, containerCfg, cfg.Image) + require.NoError(t, err) + defer client.RemoveContainer(docker.RemoveContainerOptions{ + ID: c.ID, + Force: true, + }) + + // calling createContainer again creates a new one and remove old one + c2, err := d.createContainer(client, containerCfg, cfg.Image) + require.NoError(t, err) + defer client.RemoveContainer(docker.RemoveContainerOptions{ + ID: c2.ID, + Force: true, + }) + + require.NotEqual(t, c.ID, c2.ID) + // old container was destroyed + { + _, err := client.InspectContainer(c.ID) + require.Error(t, err) + require.Contains(t, err.Error(), NoSuchContainerError) + } + + // now start container twice + require.NoError(t, d.startContainer(c2)) + require.NoError(t, d.startContainer(c2)) + + tu.WaitForResult(func() (bool, error) { + c, err := client.InspectContainer(c2.ID) + if err != nil { + return false, fmt.Errorf("failed to get container status: %v", err) + } + + if !c.State.Running { + return false, fmt.Errorf("container is not running but %v", c.State) + } + + return true, nil + }, func(err error) { + require.NoError(t, err) + }) +}