diff --git a/.travis.yml b/.travis.yml index e6b328d74..c9fe2a85c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ sudo: required +dist: trusty services: - docker @@ -9,6 +10,9 @@ go: - 1.5.3 - tip +env: + - TRAVIS_RUN=true + matrix: allow_failures: - go: tip diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index c0bb369a1..0e4f2c3be 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -33,8 +33,7 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { alloc := mock.Alloc() consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) if !restarts { - alloc.Job.Type = structs.JobTypeBatch - *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} + *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0, RestartOnSuccess: false} } ar := NewAllocRunner(logger, conf, upd.Update, alloc, consulClient) @@ -85,7 +84,7 @@ func TestAllocRunner_Destroy(t *testing.T) { t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates) }) - if time.Since(start) > 8*time.Second { + if time.Since(start) > 15*time.Second { t.Fatalf("took too long to terminate") } } @@ -118,7 +117,7 @@ func TestAllocRunner_Update(t *testing.T) { t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates) }) - if time.Since(start) > 8*time.Second { + if time.Since(start) > 15*time.Second { t.Fatalf("took too long to terminate") } } diff --git a/client/allocdir/alloc_dir.go b/client/allocdir/alloc_dir.go index 290ccf3ae..c95b34165 100644 --- a/client/allocdir/alloc_dir.go +++ b/client/allocdir/alloc_dir.go @@ -194,7 +194,10 @@ func (d *AllocDir) Embed(task string, entries map[string]string) error { } if err := os.Symlink(link, taskEntry); err != nil { - return fmt.Errorf("Couldn't create symlink: %v", err) + // Symlinking twice + if err.(*os.LinkError).Err.Error() != "file exists" { + return fmt.Errorf("Couldn't create symlink: %v", err) + } } continue } diff --git a/client/allocdir/alloc_dir_test.go b/client/allocdir/alloc_dir_test.go index 326e772b9..c054aa97c 100644 --- a/client/allocdir/alloc_dir_test.go +++ b/client/allocdir/alloc_dir_test.go @@ -47,6 +47,7 @@ func TestAllocDir_BuildAlloc(t *testing.T) { defer os.RemoveAll(tmp) d := NewAllocDir(tmp) + defer d.Destroy() tasks := []*structs.Task{t1, t2} if err := d.Build(tasks); err != nil { t.Fatalf("Build(%v) failed: %v", tasks, err) @@ -77,6 +78,7 @@ func TestAllocDir_EmbedNonExistent(t *testing.T) { defer os.RemoveAll(tmp) d := NewAllocDir(tmp) + defer d.Destroy() tasks := []*structs.Task{t1, t2} if err := d.Build(tasks); err != nil { t.Fatalf("Build(%v) failed: %v", tasks, err) @@ -98,6 +100,7 @@ func TestAllocDir_EmbedDirs(t *testing.T) { defer os.RemoveAll(tmp) d := NewAllocDir(tmp) + defer d.Destroy() tasks := []*structs.Task{t1, t2} if err := d.Build(tasks); err != nil { t.Fatalf("Build(%v) failed: %v", tasks, err) @@ -158,6 +161,7 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) { defer os.RemoveAll(tmp) d := NewAllocDir(tmp) + defer d.Destroy() tasks := []*structs.Task{t1, t2} if err := d.Build(tasks); err != nil { t.Fatalf("Build(%v) failed: %v", tasks, err) diff --git a/client/client_test.go b/client/client_test.go index 82307f85b..3e8d68cb9 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -141,10 +141,13 @@ func TestClient_Fingerprint(t *testing.T) { } func TestClient_Fingerprint_InWhitelist(t *testing.T) { - ctestutil.ExecCompatible(t) c := testClient(t, func(c *config.Config) { + if c.Options == nil { + c.Options = make(map[string]string) + } + // Weird spacing to test trimming. Whitelist all modules expect cpu. - c.Options["fingerprint.whitelist"] = " arch, consul,env_aws,env_gce,host,memory,network,storage,foo,bar " + c.Options["fingerprint.whitelist"] = " arch, consul,cpu,env_aws,env_gce,host,memory,network,storage,foo,bar " }) defer c.Shutdown() @@ -155,9 +158,12 @@ func TestClient_Fingerprint_InWhitelist(t *testing.T) { } func TestClient_Fingerprint_OutOfWhitelist(t *testing.T) { - ctestutil.ExecCompatible(t) c := testClient(t, func(c *config.Config) { - c.Options["fingerprint.whitelist"] = "arch,consul,cpu,env_aws,env_gce,host,memory,network,storage,foo,bar" + if c.Options == nil { + c.Options = make(map[string]string) + } + + c.Options["fingerprint.whitelist"] = "arch,consul,env_aws,env_gce,host,memory,network,storage,foo,bar" }) defer c.Shutdown() @@ -168,7 +174,6 @@ func TestClient_Fingerprint_OutOfWhitelist(t *testing.T) { } func TestClient_Drivers(t *testing.T) { - ctestutil.ExecCompatible(t) c := testClient(t, nil) defer c.Shutdown() @@ -179,8 +184,11 @@ func TestClient_Drivers(t *testing.T) { } func TestClient_Drivers_InWhitelist(t *testing.T) { - ctestutil.ExecCompatible(t) c := testClient(t, func(c *config.Config) { + if c.Options == nil { + c.Options = make(map[string]string) + } + // Weird spacing to test trimming c.Options["driver.whitelist"] = " exec , foo " }) @@ -193,8 +201,11 @@ func TestClient_Drivers_InWhitelist(t *testing.T) { } func TestClient_Drivers_OutOfWhitelist(t *testing.T) { - ctestutil.ExecCompatible(t) c := testClient(t, func(c *config.Config) { + if c.Options == nil { + c.Options = make(map[string]string) + } + c.Options["driver.whitelist"] = "foo,bar,baz" }) defer c.Shutdown() diff --git a/client/driver/docker.go b/client/driver/docker.go index 9e129981e..6ae591b29 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -492,15 +492,25 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle return nil, fmt.Errorf("Failed to query list of containers: %s", err) } - d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) - err = client.RemoveContainer(docker.RemoveContainerOptions{ - ID: containers[0].ID, - }) - if err != nil { - d.logger.Printf("[ERR] driver.docker: failed to purge container %s", config.Name) - return nil, fmt.Errorf("Failed to purge container %s: %s", config.Name, err) + // Couldn't find any matching containers + if len(containers) == 0 { + d.logger.Printf("[ERR] driver.docker: failed to get id for container %s: %#v", config.Name, containers) + return nil, fmt.Errorf("Failed to get id for container %s", config.Name) } - d.logger.Printf("[INFO] driver.docker: purged container %s", config.Name) + + // Delete matching containers + d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) + for _, container := range containers { + err = client.RemoveContainer(docker.RemoveContainerOptions{ + ID: container.ID, + }) + if err != nil { + d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID) + return nil, fmt.Errorf("Failed to purge container %s: %s", container.ID, err) + } + d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID) + } + container, err = client.CreateContainer(config) if err != nil { d.logger.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name) @@ -624,7 +634,7 @@ func (h *DockerHandle) Kill() error { // Stop the container err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds())) if err != nil { - h.logger.Printf("[ERR] driver.docker: failed to stop container %s", h.containerID) + h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err) return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) } h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID) diff --git a/client/driver/docker_test.go b/client/driver/docker_test.go index 5685163f4..432a2b10f 100644 --- a/client/driver/docker_test.go +++ b/client/driver/docker_test.go @@ -50,11 +50,17 @@ func dockerIsRemote(t *testing.T) bool { return false } +// Ports used by tests +var ( + docker_reserved = 32768 + int(rand.Int31n(25000)) + docker_dynamic = 32768 + int(rand.Int31n(25000)) +) + // Returns a task with a reserved and dynamic port. The ports are returned // respectively. func dockerTask() (*structs.Task, int, int) { - reserved := 32768 + int(rand.Int31n(25000)) - dynamic := 32768 + int(rand.Int31n(25000)) + docker_reserved += 1 + docker_dynamic += 1 return &structs.Task{ Name: "redis-demo", Config: map[string]interface{}{ @@ -66,12 +72,12 @@ func dockerTask() (*structs.Task, int, int) { Networks: []*structs.NetworkResource{ &structs.NetworkResource{ IP: "127.0.0.1", - ReservedPorts: []structs.Port{{"main", reserved}}, - DynamicPorts: []structs.Port{{"REDIS", dynamic}}, + ReservedPorts: []structs.Port{{"main", docker_reserved}}, + DynamicPorts: []structs.Port{{"REDIS", docker_dynamic}}, }, }, }, - }, reserved, dynamic + }, docker_reserved, docker_dynamic } // dockerSetup does all of the basic setup you need to get a running docker diff --git a/client/driver/env/env.go b/client/driver/env/env.go index d5ab3e9fc..5ed202355 100644 --- a/client/driver/env/env.go +++ b/client/driver/env/env.go @@ -259,6 +259,18 @@ func (t *TaskEnvironment) SetEnvvars(m map[string]string) *TaskEnvironment { return t } +// Appends the given environment variables. +func (t *TaskEnvironment) AppendEnvvars(m map[string]string) *TaskEnvironment { + if t.env == nil { + t.env = make(map[string]string, len(m)) + } + + for k, v := range m { + t.env[k] = v + } + return t +} + func (t *TaskEnvironment) ClearEnvvars() *TaskEnvironment { t.env = nil return t diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go index 5b5c1de2d..b1f01b3c3 100644 --- a/client/driver/exec_test.go +++ b/client/driver/exec_test.go @@ -11,6 +11,7 @@ import ( "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver/env" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" ctestutils "github.com/hashicorp/nomad/client/testutil" ) @@ -196,7 +197,7 @@ func TestExecDriver_Start_Artifact_expanded(t *testing.T) { if !res.Successful() { t.Fatalf("err: %v", res) } - case <-time.After(5 * time.Second): + case <-time.After(15 * time.Second): t.Fatalf("timeout") } } @@ -236,7 +237,7 @@ func TestExecDriver_Start_Wait_AllocDir(t *testing.T) { if !res.Successful() { t.Fatalf("err: %v", res) } - case <-time.After(2 * time.Second): + case <-time.After(testutil.TestMultiplier() * 2 * time.Second): t.Fatalf("timeout") } diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go index 16745614b..51099e254 100644 --- a/client/driver/executor/exec_basic.go +++ b/client/driver/executor/exec_basic.go @@ -57,6 +57,7 @@ func (e *BasicExecutor) Start() error { // variables. e.cmd.Path = e.taskEnv.ReplaceEnv(e.cmd.Path) e.cmd.Args = e.taskEnv.ParseAndReplace(e.cmd.Args) + e.cmd.Env = e.taskEnv.Build().EnvList() spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status")) e.spawn = spawn.NewSpawner(spawnState) diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go index 43a7730a4..47e037cef 100644 --- a/client/driver/executor/exec_linux.go +++ b/client/driver/executor/exec_linux.go @@ -11,6 +11,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" "syscall" "github.com/hashicorp/go-multierror" @@ -53,6 +54,7 @@ type LinuxExecutor struct { *ExecutorContext cmd exec.Cmd user *user.User + l sync.Mutex // Isolation configurations. groups *cgroupConfig.Cgroup @@ -265,7 +267,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD return fmt.Errorf("Mkdir(%v) failed: %v", dev, err) } - if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil { + if err := syscall.Mount("none", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil { return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err) } } @@ -277,7 +279,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD return fmt.Errorf("Mkdir(%v) failed: %v", proc, err) } - if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil { + if err := syscall.Mount("none", proc, "proc", syscall.MS_RDONLY, ""); err != nil { return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err) } } @@ -300,6 +302,10 @@ func (e *LinuxExecutor) pathExists(path string) bool { // cleanTaskDir is an idempotent operation to clean the task directory and // should be called when tearing down the task. func (e *LinuxExecutor) cleanTaskDir() error { + // Prevent a race between Wait/ForceStop + e.l.Lock() + defer e.l.Unlock() + // Unmount dev. errs := new(multierror.Error) dev := filepath.Join(e.taskDir, "dev") @@ -373,6 +379,10 @@ func (e *LinuxExecutor) destroyCgroup() error { return errors.New("Can't destroy: cgroup configuration empty") } + // Prevent a race between Wait/ForceStop + e.l.Lock() + defer e.l.Unlock() + manager := e.getCgroupManager(e.groups) pids, err := manager.GetPids() if err != nil { diff --git a/client/driver/executor/test_harness_test.go b/client/driver/executor/test_harness_test.go index e31cc422c..7f78574a8 100644 --- a/client/driver/executor/test_harness_test.go +++ b/client/driver/executor/test_harness_test.go @@ -57,9 +57,10 @@ func testExecutor(t *testing.T, buildExecutor func(*ExecutorContext) Executor, c } command := func(name string, args ...string) Executor { - e := buildExecutor(testExecutorContext()) + ctx := testExecutorContext() + e := buildExecutor(ctx) SetCommand(e, name, args) - testtask.SetCmdEnv(e.Command()) + testtask.SetEnv(ctx.taskEnv) return e } diff --git a/client/driver/java_test.go b/client/driver/java_test.go index b06aaa561..e95307412 100644 --- a/client/driver/java_test.go +++ b/client/driver/java_test.go @@ -82,11 +82,10 @@ func TestJavaDriver_StartOpen_Wait(t *testing.T) { } time.Sleep(2 * time.Second) - // need to kill long lived process - err = handle.Kill() - if err != nil { - t.Fatalf("Error: %s", err) - } + + // There is a race condition between the handle waiting and killing. One + // will return an error. + handle.Kill() } func TestJavaDriver_Start_Wait(t *testing.T) { diff --git a/client/driver/raw_exec_test.go b/client/driver/raw_exec_test.go index a2d3a8634..5238da2b2 100644 --- a/client/driver/raw_exec_test.go +++ b/client/driver/raw_exec_test.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/nomad/client/driver/env" "github.com/hashicorp/nomad/helper/testtask" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" ) func TestRawExecDriver_Fingerprint(t *testing.T) { @@ -284,7 +285,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) { Name: "sleep", Config: map[string]interface{}{ "command": testtask.Path(), - "args": []string{"sleep", "1s"}, + "args": []string{"sleep", "5s"}, }, Resources: basicResources, } @@ -303,7 +304,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) { } go func() { - time.Sleep(100 * time.Millisecond) + time.Sleep(testutil.TestMultiplier() * 1 * time.Second) err := handle.Kill() if err != nil { t.Fatalf("err: %v", err) @@ -316,7 +317,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) { if res.Successful() { t.Fatal("should err") } - case <-time.After(2 * time.Second): + case <-time.After(3 * time.Second): t.Fatalf("timeout") } } diff --git a/client/restarts.go b/client/restarts.go index 6da456e83..3f1b41acd 100644 --- a/client/restarts.go +++ b/client/restarts.go @@ -32,7 +32,7 @@ func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) { if now.After(end) { r.count = 0 r.startTime = now - return true, r.jitter() + return r.shouldRestart(exitCode), r.jitter() } r.count++ @@ -59,7 +59,12 @@ func (r *RestartTracker) shouldRestart(exitCode int) bool { // jitter returns the delay time plus a jitter. func (r *RestartTracker) jitter() time.Duration { + // Get the delay and ensure it is valid. d := r.policy.Delay.Nanoseconds() + if d == 0 { + d = 1 + } + j := float64(r.rand.Int63n(d)) * jitter return time.Duration(d + int64(j)) } diff --git a/client/task_runner_test.go b/client/task_runner_test.go index eae7eb8e6..3c9db8ac6 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -61,7 +61,7 @@ func TestTaskRunner_SimpleRun(t *testing.T) { select { case <-tr.WaitCh(): - case <-time.After(2 * time.Second): + case <-time.After(15 * time.Second): t.Fatalf("timeout") } @@ -100,7 +100,7 @@ func TestTaskRunner_Destroy(t *testing.T) { select { case <-tr.WaitCh(): - case <-time.After(8 * time.Second): + case <-time.After(15 * time.Second): t.Fatalf("timeout") } @@ -158,7 +158,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) { defer tr.Destroy() // Snapshot state - time.Sleep(1 * time.Second) + time.Sleep(2 * time.Second) if err := tr.SaveState(); err != nil { t.Fatalf("err: %v", err) } @@ -175,7 +175,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) { defer tr2.Destroy() // Destroy and wait - time.Sleep(1 * time.Second) + time.Sleep(testutil.TestMultiplier() * 2 * time.Second) if tr2.handle == nil { t.Fatalf("RestoreState() didn't open handle") } diff --git a/helper/testtask/testtask.go b/helper/testtask/testtask.go index 9aea5bb7a..cfcf205f5 100644 --- a/helper/testtask/testtask.go +++ b/helper/testtask/testtask.go @@ -9,6 +9,7 @@ import ( "os/exec" "time" + "github.com/hashicorp/nomad/client/driver/env" "github.com/hashicorp/nomad/nomad/structs" "github.com/kardianos/osext" ) @@ -22,6 +23,12 @@ func Path() string { return path } +// SetEnv configures the environment of the task so that Run executes a testtask +// script when called from within cmd. +func SetEnv(env *env.TaskEnvironment) { + env.AppendEnvvars(map[string]string{"TEST_TASK": "execute"}) +} + // SetCmdEnv configures the environment of cmd so that Run executes a testtask // script when called from within cmd. func SetCmdEnv(cmd *exec.Cmd) { diff --git a/scripts/test.sh b/scripts/test.sh index 6324447cd..4b80af79f 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -10,4 +10,4 @@ go build -o $TEMPDIR/nomad || exit 1 # Run the tests echo "--> Running tests" -go list ./... | sudo -E PATH=$TEMPDIR:$PATH xargs -n1 go test -cover -timeout=80s +go list ./... | sudo -E PATH=$TEMPDIR:$PATH xargs -n1 go test -cover -timeout=180s diff --git a/testutil/wait.go b/testutil/wait.go index a9ec1e702..05406c9c4 100644 --- a/testutil/wait.go +++ b/testutil/wait.go @@ -1,18 +1,27 @@ package testutil import ( + "os" "testing" "time" "github.com/hashicorp/nomad/nomad/structs" ) +const ( + // TravisRunEnv is an environment variable that is set if being run by + // Travis. + TravisRunEnv = "TRAVIS_RUN" +) + type testFn func() (bool, error) type errorFn func(error) func WaitForResult(test testFn, error errorFn) { - retries := 1000 + WaitForResultRetries(1000*TestMultiplier(), test, error) +} +func WaitForResultRetries(retries int, test testFn, error errorFn) { for retries > 0 { time.Sleep(10 * time.Millisecond) retries-- @@ -28,6 +37,16 @@ func WaitForResult(test testFn, error errorFn) { } } +// TestMultiplier returns a multiplier for retries and waits given environment +// the tests are being run under. +func TestMultiplier() int { + if _, ok := os.LookupEnv(TravisRunEnv); ok { + return 3 + } + + return 1 +} + type rpcFn func(string, interface{}, interface{}) error func WaitForLeader(t *testing.T, rpc rpcFn) {