From 3974dfa98c02b5f670628de970ac51156cc08b5b Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 21 Jul 2017 12:24:40 -0700 Subject: [PATCH] Fix TestAllocRunner_TaskLeader_StopTG Also make alloc runner tests less racy. Basically every alloc runner test used to have races with `upd.{Count,Allocs}` --- client/alloc_runner_test.go | 193 +++++++++++++++++++++--------------- 1 file changed, 115 insertions(+), 78 deletions(-) diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index cfdbd1c8f..3065cb950 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "strings" + "sync" "testing" "text/template" "time" @@ -16,6 +17,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/kr/pretty" "github.com/hashicorp/nomad/client/config" ctestutil "github.com/hashicorp/nomad/client/testutil" @@ -23,13 +25,26 @@ import ( ) type MockAllocStateUpdater struct { - Count int Allocs []*structs.Allocation + mu sync.Mutex } +// Update fulfills the TaskStateUpdater interface func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) { - m.Count += 1 + m.mu.Lock() m.Allocs = append(m.Allocs, alloc) + m.mu.Unlock() +} + +// Last returns the total number of updates and the last alloc (or nil) +func (m *MockAllocStateUpdater) Last() (int, *structs.Allocation) { + m.mu.Lock() + defer m.mu.Unlock() + n := len(m.Allocs) + if n == 0 { + return 0, nil + } + return n, m.Allocs[n-1].Copy() } func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) { @@ -61,10 +76,10 @@ func TestAllocRunner_SimpleRun(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -96,10 +111,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil { return false, fmt.Errorf("want deployment status unhealthy; got unset") } else if *last.DeploymentStatus.Healthy { @@ -136,10 +151,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil { return false, fmt.Errorf("want deployment status unhealthy; got unset") } else if *last.DeploymentStatus.Healthy { @@ -181,10 +196,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil { return false, fmt.Errorf("want deployment status unhealthy; got unset") } else if !*last.DeploymentStatus.Healthy { @@ -249,10 +264,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil { return false, fmt.Errorf("want deployment status unhealthy; got unset") } else if !*last.DeploymentStatus.Healthy { @@ -291,10 +306,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil { return false, fmt.Errorf("want deployment status unhealthy; got unset") } else if !*last.DeploymentStatus.Healthy { @@ -306,17 +321,16 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) { }) // Mimick an update to a new deployment id - oldCount := upd.Count - last := upd.Allocs[oldCount-1].Copy() + oldCount, last := upd.Last() last.DeploymentStatus = nil last.DeploymentID = structs.GenerateUUID() ar.Update(last) testutil.WaitForResult(func() (bool, error) { - if upd.Count <= oldCount { + newCount, last := upd.Last() + if newCount <= oldCount { return false, fmt.Errorf("No new updates") } - last := upd.Allocs[upd.Count-1] if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil { return false, fmt.Errorf("want deployment status unhealthy; got unset") } else if !*last.DeploymentStatus.Healthy { @@ -360,10 +374,10 @@ func TestAllocRunner_RetryArtifact(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count < 6 { - return false, fmt.Errorf("Not enough updates") + count, last := upd.Last() + if min := 6; count < min { + return false, fmt.Errorf("Not enough updates (%d < %d)", count, min) } - last := upd.Allocs[upd.Count-1] // web task should have completed successfully while bad task // retries artififact fetching @@ -399,10 +413,10 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { go ar.Run() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusRunning { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) } @@ -418,12 +432,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { ar.Update(update) testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { - return false, nil + _, last := upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") } // Check the status has changed. - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -453,12 +467,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { - return false, nil + _, last := upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") } // Check the status has changed. - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -504,12 +518,12 @@ func TestAllocRunner_Destroy(t *testing.T) { }() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { - return false, nil + _, last := upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") } // Check the status has changed. - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -608,11 +622,11 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) { return false, fmt.Errorf("Incorrect number of tasks") } - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, nil } - last := upd.Allocs[upd.Count-1] return last.ClientStatus == structs.AllocClientStatusRunning, nil }, func(err error) { t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar2.alloc.TaskStates["web"]) @@ -649,10 +663,11 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] + if last.ClientStatus != structs.AllocClientStatusRunning { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) } @@ -721,12 +736,12 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) { ar2.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { - return false, nil + _, last := upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") } // Check the status has changed. - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -775,10 +790,11 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) { // Snapshot state testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] + if last.ClientStatus != structs.AllocClientStatusRunning { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) } @@ -803,22 +819,19 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) { defer ar2.Destroy() // Just-in-case of failure before Destroy below testutil.WaitForResult(func() (bool, error) { - if len(ar2.tasks) != 1 { - return false, fmt.Errorf("Incorrect number of tasks") + count, last := upd.Last() + if min := 3; count < min { + return false, fmt.Errorf("expected at least %d updates but found %d", min, count) } - - if upd.Count < 3 { - return false, nil - } - - for _, ev := range ar2.taskStates["web"].Events { + for _, ev := range last.TaskStates["web"].Events { if strings.HasSuffix(ev.RestartReason, pre06ScriptCheckReason) { return true, nil } } return false, fmt.Errorf("no restart with proper reason found") }, func(err error) { - t.Fatalf("err: %v\nAllocs: %#v\nWeb State: %#v", err, upd.Allocs, ar2.taskStates["web"]) + count, last := upd.Last() + t.Fatalf("err: %v\nAllocs: %d\nweb state: % #v", err, count, pretty.Formatter(last.TaskStates["web"])) }) // Destroy and wait @@ -996,10 +1009,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusFailed { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed) } @@ -1061,12 +1074,13 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) { ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) ar.alloc.TaskResources[task2.Name] = task2.Resources go ar.Run() + defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -1142,30 +1156,53 @@ func TestAllocRunner_TaskLeader_StopTG(t *testing.T) { } ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3) ar.alloc.TaskResources[task2.Name] = task2.Resources + defer ar.Destroy() - // Destroy before running so it shuts down the alloc runner right after - // starting all tasks - ar.Destroy() go ar.Run() - select { - case <-ar.WaitCh(): - case <-time.After(8 * time.Second): - t.Fatalf("timed out waiting for alloc runner to exit") - } - if len(upd.Allocs) != 1 { - t.Fatalf("expected 1 alloc update but found %d", len(upd.Allocs)) - } + // Wait for tasks to start + oldCount, last := upd.Last() + testutil.WaitForResult(func() (bool, error) { + oldCount, last = upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") + } + if n := len(last.TaskStates); n != 3 { + return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n) + } + for name, state := range last.TaskStates { + if state.State != structs.TaskStateRunning { + return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State) + } + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) - a := upd.Allocs[0] - if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower1"].FinishedAt.UnixNano() { - t.Fatalf("expected leader to finish before follower1: %s >= %s", - a.TaskStates["leader"].FinishedAt, a.TaskStates["follower1"].FinishedAt) - } - if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower2"].FinishedAt.UnixNano() { - t.Fatalf("expected leader to finish before follower2: %s >= %s", - a.TaskStates["leader"].FinishedAt, a.TaskStates["follower2"].FinishedAt) - } + // Stop alloc + update := ar.Alloc() + update.DesiredStatus = structs.AllocDesiredStatusStop + ar.Update(update) + + // Wait for tasks to stop + testutil.WaitForResult(func() (bool, error) { + newCount, last := upd.Last() + if newCount == oldCount { + return false, fmt.Errorf("no new updates (count: %d)", newCount) + } + if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() { + t.Fatalf("expected leader to finish before follower1: %s >= %s", + last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt) + } + if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() { + t.Fatalf("expected leader to finish before follower2: %s >= %s", + last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } func TestAllocRunner_MoveAllocDir(t *testing.T) { @@ -1181,10 +1218,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) { defer ar.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd.Count == 0 { + _, last := upd.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd.Allocs[upd.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) } @@ -1213,10 +1250,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) { defer ar1.Destroy() testutil.WaitForResult(func() (bool, error) { - if upd1.Count == 0 { + _, last := upd1.Last() + if last == nil { return false, fmt.Errorf("No updates") } - last := upd1.Allocs[upd1.Count-1] if last.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) }