From ff3dedd5346fa5b7737a33a19d838d8e928430cf Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Mon, 26 Aug 2019 13:45:58 -0400 Subject: [PATCH] Write to client store while holding lock Protect against a race where destroying and persist state goroutines race. The downside is that the database io operation will run while holding the lock and may run indefinitely. The risk of lock being long held is slow destruction, but slow io has bigger problems. --- client/allocrunner/alloc_runner.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 10bf72906..f628ef501 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -765,14 +765,15 @@ func (ar *allocRunner) destroyImpl() { // state if Run() ran at all. <-ar.taskStateUpdateHandlerCh - // Cleanup state db + // Mark alloc as destroyed + ar.destroyedLock.Lock() + + // Cleanup state db; while holding the lock to avoid + // a race periodic PersistState that may resurrect the alloc if err := ar.stateDB.DeleteAllocationBucket(ar.id); err != nil { ar.logger.Warn("failed to delete allocation state", "error", err) } - // Mark alloc as destroyed - ar.destroyedLock.Lock() - if !ar.shutdown { ar.shutdown = true close(ar.shutdownCh) @@ -785,10 +786,10 @@ func (ar *allocRunner) destroyImpl() { } func (ar *allocRunner) PersistState() error { - // note that a race exists where a goroutine attempts to persist state - // while another kicks off destruction process. - // Here, we attempt to reconcile by always deleting alloc bucket after alloc destruction - if ar.IsDestroyed() { + ar.destroyedLock.Lock() + defer ar.destroyedLock.Unlock() + + if ar.destroyed { err := ar.stateDB.DeleteAllocationBucket(ar.id) if err != nil { ar.logger.Warn("failed to delete allocation bucket", "error", err)