From aab61497e20cd98651cb894ebc176ecb1749d7cf Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 19 Apr 2018 08:12:18 -0500 Subject: [PATCH] Fix deadlock in deployment watcher when deployment starts with no allocations and eventually has failed allocations --- nomad/deploymentwatcher/deployment_watcher.go | 3 +- .../deployments_watcher_test.go | 62 +++++++++++++++++++ nomad/deploymentwatcher/testutil_test.go | 21 +++++++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/nomad/deploymentwatcher/deployment_watcher.go b/nomad/deploymentwatcher/deployment_watcher.go index 31b353b6a..22b8a4b5e 100644 --- a/nomad/deploymentwatcher/deployment_watcher.go +++ b/nomad/deploymentwatcher/deployment_watcher.go @@ -404,8 +404,9 @@ FAIL: // deadline timer next := getDeploymentProgressCutoff(w.getDeployment()) if !next.Equal(currentDeadline) { + prevDeadlineZero := currentDeadline.IsZero() currentDeadline = next - if !deadlineTimer.Stop() { + if !prevDeadlineZero && !deadlineTimer.Stop() { <-deadlineTimer.C } deadlineTimer.Reset(next.Sub(time.Now())) diff --git a/nomad/deploymentwatcher/deployments_watcher_test.go b/nomad/deploymentwatcher/deployments_watcher_test.go index 2e84cc7a6..168f04612 100644 --- a/nomad/deploymentwatcher/deployments_watcher_test.go +++ b/nomad/deploymentwatcher/deployments_watcher_test.go @@ -877,6 +877,68 @@ func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) { }) } +// Test scenario where deployment initially has no progress deadline +// After the deployment is updated, a failed alloc's DesiredTransition should be set +func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) { + t.Parallel() + assert := assert.New(t) + w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) + + // Create a job, and a deployment + j := mock.Job() + j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() + j.TaskGroups[0].Update.MaxParallel = 2 + j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond + j.Stable = true + d := mock.Deployment() + d.JobID = j.ID + + assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") + assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") + + a := mock.Alloc() + a.CreateTime = time.Now().UnixNano() + a.DeploymentID = d.ID + /*a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + Timestamp: time.Now(), + }*/ + assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") + + d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond + // Update the deployment with a progress deadline + assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") + + // Match on DesiredTransition set to Reschedule for the failed alloc + m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID}) + m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() + + w.SetEnabled(true, m.state) + testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, + func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") }) + + // Update the alloc to be unhealthy + a2 := a.Copy() + a2.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + Timestamp: time.Now(), + } + assert.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2})) + + // Wait for the alloc's DesiredState to set reschedule + testutil.WaitForResult(func() (bool, error) { + a, err := m.state.AllocByID(nil, a.ID) + if err != nil { + return false, err + } + dt := a.DesiredTransition + shouldReschedule := dt.Reschedule != nil && *dt.Reschedule + return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule) + }, func(err error) { + t.Fatal(err) + }) +} + // Tests that the watcher fails rollback when the spec hasn't changed func TestDeploymentWatcher_RollbackFailed(t *testing.T) { t.Parallel() diff --git a/nomad/deploymentwatcher/testutil_test.go b/nomad/deploymentwatcher/testutil_test.go index 78b0f82e8..96753a758 100644 --- a/nomad/deploymentwatcher/testutil_test.go +++ b/nomad/deploymentwatcher/testutil_test.go @@ -69,6 +69,27 @@ func matchUpdateAllocDesiredTransitions(deploymentIDs []string) func(update *str } } +// matchUpdateAllocDesiredTransitionReschedule is used to match allocs that have their DesiredTransition set to Reschedule +func matchUpdateAllocDesiredTransitionReschedule(allocIDs []string) func(update *structs.AllocUpdateDesiredTransitionRequest) bool { + return func(update *structs.AllocUpdateDesiredTransitionRequest) bool { + amap := make(map[string]struct{}, len(allocIDs)) + for _, d := range allocIDs { + amap[d] = struct{}{} + } + + for allocID, dt := range update.Allocs { + if _, ok := amap[allocID]; !ok { + return false + } + if !*dt.Reschedule { + return false + } + } + + return true + } +} + func (m *mockBackend) UpsertJob(job *structs.Job) (uint64, error) { m.Called(job) i := m.nextIndex()