Fix deadlock in deployment watcher when deployment starts with no allocations and eventually has failed allocations

This commit is contained in:
Preetha Appan
2018-04-19 08:12:18 -05:00
parent 32557a1a99
commit aab61497e2
3 changed files with 85 additions and 1 deletions

View File

@@ -404,8 +404,9 @@ FAIL:
// deadline timer
next := getDeploymentProgressCutoff(w.getDeployment())
if !next.Equal(currentDeadline) {
prevDeadlineZero := currentDeadline.IsZero()
currentDeadline = next
if !deadlineTimer.Stop() {
if !prevDeadlineZero && !deadlineTimer.Stop() {
<-deadlineTimer.C
}
deadlineTimer.Reset(next.Sub(time.Now()))

View File

@@ -877,6 +877,68 @@ func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) {
})
}
// Test scenario where deployment initially has no progress deadline
// After the deployment is updated, a failed alloc's DesiredTransition should be set
func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) {
t.Parallel()
assert := assert.New(t)
w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
// Create a job, and a deployment
j := mock.Job()
j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
j.TaskGroups[0].Update.MaxParallel = 2
j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
j.Stable = true
d := mock.Deployment()
d.JobID = j.ID
assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
a := mock.Alloc()
a.CreateTime = time.Now().UnixNano()
a.DeploymentID = d.ID
/*a.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(false),
Timestamp: time.Now(),
}*/
assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
// Update the deployment with a progress deadline
assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
// Match on DesiredTransition set to Reschedule for the failed alloc
m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID})
m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
w.SetEnabled(true, m.state)
testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
// Update the alloc to be unhealthy
a2 := a.Copy()
a2.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(false),
Timestamp: time.Now(),
}
assert.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
// Wait for the alloc's DesiredState to set reschedule
testutil.WaitForResult(func() (bool, error) {
a, err := m.state.AllocByID(nil, a.ID)
if err != nil {
return false, err
}
dt := a.DesiredTransition
shouldReschedule := dt.Reschedule != nil && *dt.Reschedule
return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule)
}, func(err error) {
t.Fatal(err)
})
}
// Tests that the watcher fails rollback when the spec hasn't changed
func TestDeploymentWatcher_RollbackFailed(t *testing.T) {
t.Parallel()

View File

@@ -69,6 +69,27 @@ func matchUpdateAllocDesiredTransitions(deploymentIDs []string) func(update *str
}
}
// matchUpdateAllocDesiredTransitionReschedule is used to match allocs that have their DesiredTransition set to Reschedule
func matchUpdateAllocDesiredTransitionReschedule(allocIDs []string) func(update *structs.AllocUpdateDesiredTransitionRequest) bool {
return func(update *structs.AllocUpdateDesiredTransitionRequest) bool {
amap := make(map[string]struct{}, len(allocIDs))
for _, d := range allocIDs {
amap[d] = struct{}{}
}
for allocID, dt := range update.Allocs {
if _, ok := amap[allocID]; !ok {
return false
}
if !*dt.Reschedule {
return false
}
}
return true
}
}
func (m *mockBackend) UpsertJob(job *structs.Job) (uint64, error) {
m.Called(job)
i := m.nextIndex()