Fix a bunch of tests

Up timeouts trusty travis beta Increase timeouts
2026-01-01 16:05:42 +03:00 · 2016-01-20 12:00:20 -08:00
parent 00aa52d60c
commit e91abac0f6
19 changed files with 140 additions and 47 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
 sudo: required
+dist: trusty

 services:
    - docker
@@ -9,6 +10,9 @@ go:
  - 1.5.3
  - tip

+env:
+  - TRAVIS_RUN=true
+
 matrix:
  allow_failures:
    - go: tip
--- a/client/alloc_runner_test.go
+++ b/client/alloc_runner_test.go
@@ -33,8 +33,7 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
 	alloc := mock.Alloc()
 	consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
 	if !restarts {
-		alloc.Job.Type = structs.JobTypeBatch
-		*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
+		*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0, RestartOnSuccess: false}
 	}

 	ar := NewAllocRunner(logger, conf, upd.Update, alloc, consulClient)
@@ -85,7 +84,7 @@ func TestAllocRunner_Destroy(t *testing.T) {
 		t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
 	})

-	if time.Since(start) > 8*time.Second {
+	if time.Since(start) > 15*time.Second {
 		t.Fatalf("took too long to terminate")
 	}
 }
@@ -118,7 +117,7 @@ func TestAllocRunner_Update(t *testing.T) {
 		t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
 	})

-	if time.Since(start) > 8*time.Second {
+	if time.Since(start) > 15*time.Second {
 		t.Fatalf("took too long to terminate")
 	}
 }
--- a/client/allocdir/alloc_dir.go
+++ b/client/allocdir/alloc_dir.go
@@ -194,7 +194,10 @@ func (d *AllocDir) Embed(task string, entries map[string]string) error {
 				}

 				if err := os.Symlink(link, taskEntry); err != nil {
-					return fmt.Errorf("Couldn't create symlink: %v", err)
+					// Symlinking twice
+					if err.(*os.LinkError).Err.Error() != "file exists" {
+						return fmt.Errorf("Couldn't create symlink: %v", err)
+					}
 				}
 				continue
 			}
--- a/client/allocdir/alloc_dir_test.go
+++ b/client/allocdir/alloc_dir_test.go
@@ -47,6 +47,7 @@ func TestAllocDir_BuildAlloc(t *testing.T) {
 	defer os.RemoveAll(tmp)

 	d := NewAllocDir(tmp)
+	defer d.Destroy()
 	tasks := []*structs.Task{t1, t2}
 	if err := d.Build(tasks); err != nil {
 		t.Fatalf("Build(%v) failed: %v", tasks, err)
@@ -77,6 +78,7 @@ func TestAllocDir_EmbedNonExistent(t *testing.T) {
 	defer os.RemoveAll(tmp)

 	d := NewAllocDir(tmp)
+	defer d.Destroy()
 	tasks := []*structs.Task{t1, t2}
 	if err := d.Build(tasks); err != nil {
 		t.Fatalf("Build(%v) failed: %v", tasks, err)
@@ -98,6 +100,7 @@ func TestAllocDir_EmbedDirs(t *testing.T) {
 	defer os.RemoveAll(tmp)

 	d := NewAllocDir(tmp)
+	defer d.Destroy()
 	tasks := []*structs.Task{t1, t2}
 	if err := d.Build(tasks); err != nil {
 		t.Fatalf("Build(%v) failed: %v", tasks, err)
@@ -158,6 +161,7 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) {
 	defer os.RemoveAll(tmp)

 	d := NewAllocDir(tmp)
+	defer d.Destroy()
 	tasks := []*structs.Task{t1, t2}
 	if err := d.Build(tasks); err != nil {
 		t.Fatalf("Build(%v) failed: %v", tasks, err)
--- a/client/client_test.go
+++ b/client/client_test.go
@@ -141,10 +141,13 @@ func TestClient_Fingerprint(t *testing.T) {
 }

 func TestClient_Fingerprint_InWhitelist(t *testing.T) {
-	ctestutil.ExecCompatible(t)
 	c := testClient(t, func(c *config.Config) {
+		if c.Options == nil {
+			c.Options = make(map[string]string)
+		}
+
 		// Weird spacing to test trimming. Whitelist all modules expect cpu.
-		c.Options["fingerprint.whitelist"] = "  arch, consul,env_aws,env_gce,host,memory,network,storage,foo,bar	"
+		c.Options["fingerprint.whitelist"] = "  arch, consul,cpu,env_aws,env_gce,host,memory,network,storage,foo,bar	"
 	})
 	defer c.Shutdown()

@@ -155,9 +158,12 @@ func TestClient_Fingerprint_InWhitelist(t *testing.T) {
 }

 func TestClient_Fingerprint_OutOfWhitelist(t *testing.T) {
-	ctestutil.ExecCompatible(t)
 	c := testClient(t, func(c *config.Config) {
-		c.Options["fingerprint.whitelist"] = "arch,consul,cpu,env_aws,env_gce,host,memory,network,storage,foo,bar"
+		if c.Options == nil {
+			c.Options = make(map[string]string)
+		}
+
+		c.Options["fingerprint.whitelist"] = "arch,consul,env_aws,env_gce,host,memory,network,storage,foo,bar"
 	})
 	defer c.Shutdown()

@@ -168,7 +174,6 @@ func TestClient_Fingerprint_OutOfWhitelist(t *testing.T) {
 }

 func TestClient_Drivers(t *testing.T) {
-	ctestutil.ExecCompatible(t)
 	c := testClient(t, nil)
 	defer c.Shutdown()

@@ -179,8 +184,11 @@ func TestClient_Drivers(t *testing.T) {
 }

 func TestClient_Drivers_InWhitelist(t *testing.T) {
-	ctestutil.ExecCompatible(t)
 	c := testClient(t, func(c *config.Config) {
+		if c.Options == nil {
+			c.Options = make(map[string]string)
+		}
+
 		// Weird spacing to test trimming
 		c.Options["driver.whitelist"] = "   exec ,  foo	"
 	})
@@ -193,8 +201,11 @@ func TestClient_Drivers_InWhitelist(t *testing.T) {
 }

 func TestClient_Drivers_OutOfWhitelist(t *testing.T) {
-	ctestutil.ExecCompatible(t)
 	c := testClient(t, func(c *config.Config) {
+		if c.Options == nil {
+			c.Options = make(map[string]string)
+		}
+
 		c.Options["driver.whitelist"] = "foo,bar,baz"
 	})
 	defer c.Shutdown()
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -492,15 +492,25 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
 				return nil, fmt.Errorf("Failed to query list of containers: %s", err)
 			}

-			d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name)
-			err = client.RemoveContainer(docker.RemoveContainerOptions{
-				ID: containers[0].ID,
-			})
-			if err != nil {
-				d.logger.Printf("[ERR] driver.docker: failed to purge container %s", config.Name)
-				return nil, fmt.Errorf("Failed to purge container %s: %s", config.Name, err)
+			// Couldn't find any matching containers
+			if len(containers) == 0 {
+				d.logger.Printf("[ERR] driver.docker: failed to get id for container %s: %#v", config.Name, containers)
+				return nil, fmt.Errorf("Failed to get id for container %s", config.Name)
 			}
-			d.logger.Printf("[INFO] driver.docker: purged container %s", config.Name)
+
+			// Delete matching containers
+			d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name)
+			for _, container := range containers {
+				err = client.RemoveContainer(docker.RemoveContainerOptions{
+					ID: container.ID,
+				})
+				if err != nil {
+					d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID)
+					return nil, fmt.Errorf("Failed to purge container %s: %s", container.ID, err)
+				}
+				d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID)
+			}
+
 			container, err = client.CreateContainer(config)
 			if err != nil {
 				d.logger.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name)
@@ -624,7 +634,7 @@ func (h *DockerHandle) Kill() error {
 	// Stop the container
 	err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds()))
 	if err != nil {
-		h.logger.Printf("[ERR] driver.docker: failed to stop container %s", h.containerID)
+		h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
 		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
 	}
 	h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
--- a/client/driver/docker_test.go
+++ b/client/driver/docker_test.go
@@ -50,11 +50,17 @@ func dockerIsRemote(t *testing.T) bool {
 	return false
 }

+// Ports used by tests
+var (
+	docker_reserved = 32768 + int(rand.Int31n(25000))
+	docker_dynamic  = 32768 + int(rand.Int31n(25000))
+)
+
 // Returns a task with a reserved and dynamic port. The ports are returned
 // respectively.
 func dockerTask() (*structs.Task, int, int) {
-	reserved := 32768 + int(rand.Int31n(25000))
-	dynamic := 32768 + int(rand.Int31n(25000))
+	docker_reserved += 1
+	docker_dynamic += 1
 	return &structs.Task{
 		Name: "redis-demo",
 		Config: map[string]interface{}{
@@ -66,12 +72,12 @@ func dockerTask() (*structs.Task, int, int) {
 			Networks: []*structs.NetworkResource{
 				&structs.NetworkResource{
 					IP:            "127.0.0.1",
-					ReservedPorts: []structs.Port{{"main", reserved}},
-					DynamicPorts:  []structs.Port{{"REDIS", dynamic}},
+					ReservedPorts: []structs.Port{{"main", docker_reserved}},
+					DynamicPorts:  []structs.Port{{"REDIS", docker_dynamic}},
 				},
 			},
 		},
-	}, reserved, dynamic
+	}, docker_reserved, docker_dynamic
 }

 // dockerSetup does all of the basic setup you need to get a running docker
--- a/client/driver/env/env.go
+++ b/client/driver/env/env.go
@@ -259,6 +259,18 @@ func (t *TaskEnvironment) SetEnvvars(m map[string]string) *TaskEnvironment {
 	return t
 }

+// Appends the given environment variables.
+func (t *TaskEnvironment) AppendEnvvars(m map[string]string) *TaskEnvironment {
+	if t.env == nil {
+		t.env = make(map[string]string, len(m))
+	}
+
+	for k, v := range m {
+		t.env[k] = v
+	}
+	return t
+}
+
 func (t *TaskEnvironment) ClearEnvvars() *TaskEnvironment {
 	t.env = nil
 	return t
--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -11,6 +11,7 @@ import (
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/env"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/testutil"

 	ctestutils "github.com/hashicorp/nomad/client/testutil"
 )
@@ -196,7 +197,7 @@ func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
 		if !res.Successful() {
 			t.Fatalf("err: %v", res)
 		}
-	case <-time.After(5 * time.Second):
+	case <-time.After(15 * time.Second):
 		t.Fatalf("timeout")
 	}
 }
@@ -236,7 +237,7 @@ func TestExecDriver_Start_Wait_AllocDir(t *testing.T) {
 		if !res.Successful() {
 			t.Fatalf("err: %v", res)
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(testutil.TestMultiplier() * 2 * time.Second):
 		t.Fatalf("timeout")
 	}

--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -57,6 +57,7 @@ func (e *BasicExecutor) Start() error {
 	// variables.
 	e.cmd.Path = e.taskEnv.ReplaceEnv(e.cmd.Path)
 	e.cmd.Args = e.taskEnv.ParseAndReplace(e.cmd.Args)
+	e.cmd.Env = e.taskEnv.Build().EnvList()

 	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
 	e.spawn = spawn.NewSpawner(spawnState)
--- a/client/driver/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -11,6 +11,7 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+	"sync"
 	"syscall"

 	"github.com/hashicorp/go-multierror"
@@ -53,6 +54,7 @@ type LinuxExecutor struct {
 	*ExecutorContext
 	cmd  exec.Cmd
 	user *user.User
+	l    sync.Mutex

 	// Isolation configurations.
 	groups   *cgroupConfig.Cgroup
@@ -265,7 +267,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 			return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
 		}

-		if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
+		if err := syscall.Mount("none", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
 			return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
 		}
 	}
@@ -277,7 +279,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 			return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
 		}

-		if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
+		if err := syscall.Mount("none", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
 			return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
 		}
 	}
@@ -300,6 +302,10 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 // cleanTaskDir is an idempotent operation to clean the task directory and
 // should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
+	// Prevent a race between Wait/ForceStop
+	e.l.Lock()
+	defer e.l.Unlock()
+
 	// Unmount dev.
 	errs := new(multierror.Error)
 	dev := filepath.Join(e.taskDir, "dev")
@@ -373,6 +379,10 @@ func (e *LinuxExecutor) destroyCgroup() error {
 		return errors.New("Can't destroy: cgroup configuration empty")
 	}

+	// Prevent a race between Wait/ForceStop
+	e.l.Lock()
+	defer e.l.Unlock()
+
 	manager := e.getCgroupManager(e.groups)
 	pids, err := manager.GetPids()
 	if err != nil {
--- a/client/driver/executor/test_harness_test.go
+++ b/client/driver/executor/test_harness_test.go
@@ -57,9 +57,10 @@ func testExecutor(t *testing.T, buildExecutor func(*ExecutorContext) Executor, c
 	}

 	command := func(name string, args ...string) Executor {
-		e := buildExecutor(testExecutorContext())
+		ctx := testExecutorContext()
+		e := buildExecutor(ctx)
 		SetCommand(e, name, args)
-		testtask.SetCmdEnv(e.Command())
+		testtask.SetEnv(ctx.taskEnv)
 		return e
 	}

--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -82,11 +82,10 @@ func TestJavaDriver_StartOpen_Wait(t *testing.T) {
 	}

 	time.Sleep(2 * time.Second)
-	// need to kill long lived process
-	err = handle.Kill()
-	if err != nil {
-		t.Fatalf("Error: %s", err)
-	}
+
+	// There is a race condition between the handle waiting and killing. One
+	// will return an error.
+	handle.Kill()
 }

 func TestJavaDriver_Start_Wait(t *testing.T) {
--- a/client/driver/raw_exec_test.go
+++ b/client/driver/raw_exec_test.go
@@ -14,6 +14,7 @@ import (
 	"github.com/hashicorp/nomad/client/driver/env"
 	"github.com/hashicorp/nomad/helper/testtask"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/testutil"
 )

 func TestRawExecDriver_Fingerprint(t *testing.T) {
@@ -284,7 +285,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) {
 		Name: "sleep",
 		Config: map[string]interface{}{
 			"command": testtask.Path(),
-			"args":    []string{"sleep", "1s"},
+			"args":    []string{"sleep", "5s"},
 		},
 		Resources: basicResources,
 	}
@@ -303,7 +304,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) {
 	}

 	go func() {
-		time.Sleep(100 * time.Millisecond)
+		time.Sleep(testutil.TestMultiplier() * 1 * time.Second)
 		err := handle.Kill()
 		if err != nil {
 			t.Fatalf("err: %v", err)
@@ -316,7 +317,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) {
 		if res.Successful() {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(3 * time.Second):
 		t.Fatalf("timeout")
 	}
 }
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -32,7 +32,7 @@ func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
 	if now.After(end) {
 		r.count = 0
 		r.startTime = now
-		return true, r.jitter()
+		return r.shouldRestart(exitCode), r.jitter()
 	}

 	r.count++
@@ -59,7 +59,12 @@ func (r *RestartTracker) shouldRestart(exitCode int) bool {

 // jitter returns the delay time plus a jitter.
 func (r *RestartTracker) jitter() time.Duration {
+	// Get the delay and ensure it is valid.
 	d := r.policy.Delay.Nanoseconds()
+	if d == 0 {
+		d = 1
+	}
+
 	j := float64(r.rand.Int63n(d)) * jitter
 	return time.Duration(d + int64(j))
 }
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -61,7 +61,7 @@ func TestTaskRunner_SimpleRun(t *testing.T) {

 	select {
 	case <-tr.WaitCh():
-	case <-time.After(2 * time.Second):
+	case <-time.After(15 * time.Second):
 		t.Fatalf("timeout")
 	}

@@ -100,7 +100,7 @@ func TestTaskRunner_Destroy(t *testing.T) {

 	select {
 	case <-tr.WaitCh():
-	case <-time.After(8 * time.Second):
+	case <-time.After(15 * time.Second):
 		t.Fatalf("timeout")
 	}

@@ -158,7 +158,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) {
 	defer tr.Destroy()

 	// Snapshot state
-	time.Sleep(1 * time.Second)
+	time.Sleep(2 * time.Second)
 	if err := tr.SaveState(); err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -175,7 +175,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) {
 	defer tr2.Destroy()

 	// Destroy and wait
-	time.Sleep(1 * time.Second)
+	time.Sleep(testutil.TestMultiplier() * 2 * time.Second)
 	if tr2.handle == nil {
 		t.Fatalf("RestoreState() didn't open handle")
 	}
--- a/helper/testtask/testtask.go
+++ b/helper/testtask/testtask.go
@@ -9,6 +9,7 @@ import (
 	"os/exec"
 	"time"

+	"github.com/hashicorp/nomad/client/driver/env"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/kardianos/osext"
 )
@@ -22,6 +23,12 @@ func Path() string {
 	return path
 }

+// SetEnv configures the environment of the task so that Run executes a testtask
+// script when called from within cmd.
+func SetEnv(env *env.TaskEnvironment) {
+	env.AppendEnvvars(map[string]string{"TEST_TASK": "execute"})
+}
+
 // SetCmdEnv configures the environment of cmd so that Run executes a testtask
 // script when called from within cmd.
 func SetCmdEnv(cmd *exec.Cmd) {
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -10,4 +10,4 @@ go build -o $TEMPDIR/nomad || exit 1

 # Run the tests
 echo "--> Running tests"
-go list ./... | sudo -E PATH=$TEMPDIR:$PATH xargs -n1 go test -cover -timeout=80s
+go list ./... | sudo -E PATH=$TEMPDIR:$PATH xargs -n1 go test -cover -timeout=180s
--- a/testutil/wait.go
+++ b/testutil/wait.go
@@ -1,18 +1,27 @@
 package testutil

 import (
+	"os"
 	"testing"
 	"time"

 	"github.com/hashicorp/nomad/nomad/structs"
 )

+const (
+	// TravisRunEnv is an environment variable that is set if being run by
+	// Travis.
+	TravisRunEnv = "TRAVIS_RUN"
+)
+
 type testFn func() (bool, error)
 type errorFn func(error)

 func WaitForResult(test testFn, error errorFn) {
-	retries := 1000
+	WaitForResultRetries(1000*TestMultiplier(), test, error)
+}

+func WaitForResultRetries(retries int, test testFn, error errorFn) {
 	for retries > 0 {
 		time.Sleep(10 * time.Millisecond)
 		retries--
@@ -28,6 +37,16 @@ func WaitForResult(test testFn, error errorFn) {
 	}
 }

+// TestMultiplier returns a multiplier for retries and waits given environment
+// the tests are being run under.
+func TestMultiplier() int {
+	if _, ok := os.LookupEnv(TravisRunEnv); ok {
+		return 3
+	}
+
+	return 1
+}
+
 type rpcFn func(string, interface{}, interface{}) error

 func WaitForLeader(t *testing.T, rpc rpcFn) {