tests: try deflake TestDockerDriver_OOMKilled

Noticed an issue in Docker daemon failing to handle the OOM test case
failure in build https://travis-ci.org/hashicorp/nomad/jobs/468027848 ,
and I suspect it's related to the process dying so quickly, and
potentially the way we are starting the task, so added a start up delay
and made it more consistent with other tests that don't seem as flaky.

The following is the log line showing Docker returning 500 error condition; while we can probably handle it gracefully without retrying, the retry is very cheap in this case and it's more of an optimization that we can handle in follow up PR.

```
    testlog.go:32: 2018-12-14T14:57:52.626Z [DEBUG] docker/driver.go:852: docker: setting container startup command: task_name=nc-demo command="/bin/nc -l 127.0.0.1 -p 0"
    testlog.go:32: 2018-12-14T14:57:52.626Z [DEBUG] docker/driver.go:866: docker: setting container name: task_name=nc-demo container_name=724a3e77-8b15-e657-f6aa-84c2d3243b18
    testlog.go:32: 2018-12-14T14:57:52.694Z [INFO ] docker/driver.go:196: docker: created container: container_id=362b6ea183f3c4ce472d7d7571ca47023cea1df0f5eb920827921716f17718be
    testlog.go:32: 2018-12-14T14:57:53.523Z [DEBUG] docker/driver.go:416: docker: failed to start container: container_id=362b6ea183f3c4ce472d7d7571ca47023cea1df0f5eb920827921716f17718be attempt=1 error="API error (500): {"message":"cannot start a stopped process: unknown"}
        "
    testlog.go:32: 2018-12-14T14:57:55.394Z [DEBUG] docker/driver.go:416: docker: failed to start container: container_id=362b6ea183f3c4ce472d7d7571ca47023cea1df0f5eb920827921716f17718be attempt=2 error="API error (500): {"message":"cannot start a stopped process: unknown"}
        "
    testlog.go:32: 2018-12-14T14:57:57.243Z [DEBUG] docker/driver.go:416: docker: failed to start container: container_id=362b6ea183f3c4ce472d7d7571ca47023cea1df0f5eb920827921716f17718be attempt=3 error="API error (500): {"message":"cannot start a stopped process: unknown"}
        "
```
This commit is contained in:
Mahmood Ali
2018-12-14 11:06:14 -05:00
committed by Mahmood Ali
parent e3cee53230
commit 29fc3f77c8

View File

@@ -39,12 +39,12 @@ import (
var (
basicResources = &drivers.Resources{
NomadResources: &structs.Resources{
CPU: 250,
MemoryMB: 256,
CPU: 512,
DiskMB: 20,
},
LinuxResources: &drivers.LinuxResources{
CPUShares: 250,
CPUShares: 512,
MemoryLimitBytes: 256 * 1024 * 1024,
},
}
@@ -2247,24 +2247,33 @@ func TestDockerDriver_OOMKilled(t *testing.T) {
t.Skip("Docker not connected")
}
cfg := &TaskConfig{
taskCfg := TaskConfig{
Image: busyboxImageID,
LoadImage: "busybox.tar",
Command: "sh",
Args: []string{"-c", "x=a; while true; do eval x='$x$x'; done"},
Command: "/bin/sh",
Args: []string{"-c", `/bin/sleep 2 && x=a && while true; do x="$x$x"; done`},
}
task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "oom-killed",
Resources: basicResources,
}
task.Resources.LinuxResources.MemoryLimitBytes = 4 * 1024 * 1024
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
task.Resources.LinuxResources.MemoryLimitBytes = 10 * 1024 * 1024
task.Resources.NomadResources.MemoryMB = 10
_, driver, _, cleanup := dockerSetup(t, task)
require.NoError(t, task.EncodeConcreteDriverConfig(&taskCfg))
d := dockerDriverHarness(t, nil)
cleanup := d.MkAllocDir(task, true)
defer cleanup()
copyImage(t, task.TaskDir(), "busybox.tar")
waitCh, err := driver.WaitTask(context.Background(), task.ID)
_, _, err := d.StartTask(task)
require.NoError(t, err)
defer d.DestroyTask(task.ID, true)
waitCh, err := d.WaitTask(context.Background(), task.ID)
require.NoError(t, err)
select {
case res := <-waitCh: