Fix two issues during client restore state

This PR fixes two issues:

1) A close of a nil stopCollection channel when restoring and prestart
fails. The failure will cause the killCh to be triggered which will
close collection before it has been initialized.

2) Fixes a deadlock in which the handleWaitCh is never triggered since
it is not initialized when there is an error in prestart and the killCh
is triggered.

Both fixes are by maintaining the loop invariant that the two channels
are valid after there is a handle.
This commit is contained in:
Alex Dadgar
2017-02-28 10:29:12 -08:00
parent 90cd50b597
commit 4826d8464e

View File

@@ -874,6 +874,18 @@ func (r *TaskRunner) run() {
var stopCollection chan struct{}
var handleWaitCh chan *dstructs.WaitResult
// If we already have a handle, populate the stopCollection and handleWaitCh
// to fix the invariant that it exists.
r.handleLock.Lock()
handleEmpty := r.handle == nil
r.handleLock.Unlock()
if !handleEmpty {
stopCollection = make(chan struct{})
go r.collectResourceUsageStats(stopCollection)
handleWaitCh = r.handle.WaitCh()
}
for {
// Do the prestart activities
prestartResultCh := make(chan bool, 1)
@@ -895,7 +907,6 @@ func (r *TaskRunner) run() {
r.handleLock.Lock()
handleEmpty := r.handle == nil
r.handleLock.Unlock()
if handleEmpty {
startErr := r.startTask()
r.restartTracker.SetStartError(startErr)
@@ -909,14 +920,14 @@ func (r *TaskRunner) run() {
r.runningLock.Lock()
r.running = true
r.runningLock.Unlock()
}
if stopCollection == nil {
stopCollection = make(chan struct{})
go r.collectResourceUsageStats(stopCollection)
}
if stopCollection == nil {
stopCollection = make(chan struct{})
go r.collectResourceUsageStats(stopCollection)
}
handleWaitCh = r.handle.WaitCh()
handleWaitCh = r.handle.WaitCh()
}
case waitRes := <-handleWaitCh:
if waitRes == nil {