From ad8b5bc1418aa389d26a83f606fff160a1846bba Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 4 Nov 2016 16:57:24 -0700 Subject: [PATCH 1/3] Task state "dead" is terminal --- client/alloc_runner.go | 17 ++++++++++++----- client/task_runner.go | 15 +++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/client/alloc_runner.go b/client/alloc_runner.go index 47cc7eeeb..a6dc9de56 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -330,7 +330,8 @@ func (r *AllocRunner) setStatus(status, desc string) { } } -// setTaskState is used to set the status of a task +// setTaskState is used to set the status of a task. If state is empty then the +// event is appended but not synced with the server. The event may be omitted func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent) { r.taskStatusLock.Lock() defer r.taskStatusLock.Unlock() @@ -341,12 +342,18 @@ func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEv } // Set the tasks state. - taskState.State = state - if event.FailsTask { - taskState.Failed = true + if event != nil { + if event.FailsTask { + taskState.Failed = true + } + r.appendTaskEvent(taskState, event) } - r.appendTaskEvent(taskState, event) + if state == "" { + return + } + + taskState.State = state if state == structs.TaskStateDead { // If the task failed, we should kill all the other tasks in the task group. if taskState.Failed { diff --git a/client/task_runner.go b/client/task_runner.go index c7146018c..c47a1003a 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -674,6 +674,7 @@ func (r *TaskRunner) updatedTokenHandler() { err := fmt.Errorf("failed to build task's template manager: %v", err) r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask()) r.logger.Printf("[ERR] client: alloc %q, task %q %v", r.alloc.ID, r.task.Name, err) + r.Kill("vault", err.Error(), true) return } } @@ -708,11 +709,11 @@ func (r *TaskRunner) prestart(resultCh chan bool) { if !r.artifactsDownloaded && len(r.task.Artifacts) > 0 { r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts)) for _, artifact := range r.task.Artifacts { - // TODO wrap if err := getter.GetArtifact(r.getTaskEnv(), artifact, r.taskDir); err != nil { + wrapped := fmt.Errorf("failed to download artifact %q: %v", artifact.GetterSource, err) r.setState(structs.TaskStatePending, - structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err)) - r.restartTracker.SetStartError(structs.NewRecoverableError(err, true)) + structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(wrapped)) + r.restartTracker.SetStartError(structs.NewRecoverableError(wrapped, true)) goto RESTART } } @@ -785,6 +786,8 @@ func (r *TaskRunner) postrun() { // run is the main run loop that handles starting the application, destroying // it, restarts and signals. func (r *TaskRunner) run() { + defer r.setState(structs.TaskStateDead, nil) + // Predeclare things so we can jump to the RESTART var stopCollection chan struct{} var handleWaitCh chan *dstructs.WaitResult @@ -813,7 +816,7 @@ func (r *TaskRunner) run() { startErr := r.startTask() r.restartTracker.SetStartError(startErr) if startErr != nil { - r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr)) + r.setState("", structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr)) goto RESTART } @@ -845,7 +848,7 @@ func (r *TaskRunner) run() { // Log whether the task was successful or not. r.restartTracker.SetWaitResult(waitRes) - r.setState(structs.TaskStateDead, r.waitErrorToEvent(waitRes)) + r.setState("", r.waitErrorToEvent(waitRes)) if !waitRes.Successful() { r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes) } else { @@ -1003,7 +1006,7 @@ func (r *TaskRunner) killTask(killingEvent *structs.TaskEvent) { r.runningLock.Unlock() // Store that the task has been destroyed and any associated error. - r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err)) + r.setState("", structs.NewTaskEvent(structs.TaskKilled).SetKillError(err)) } // startTask creates the driver and starts the task. From 392e7b609c3653bbf94326c88ac7c15881ac940a Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 4 Nov 2016 17:11:07 -0700 Subject: [PATCH 2/3] More precise marking of dead --- client/task_runner.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/client/task_runner.go b/client/task_runner.go index c47a1003a..979f04b49 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -786,8 +786,6 @@ func (r *TaskRunner) postrun() { // run is the main run loop that handles starting the application, destroying // it, restarts and signals. func (r *TaskRunner) run() { - defer r.setState(structs.TaskStateDead, nil) - // Predeclare things so we can jump to the RESTART var stopCollection chan struct{} var handleWaitCh chan *dstructs.WaitResult @@ -802,6 +800,7 @@ func (r *TaskRunner) run() { select { case success := <-prestartResultCh: if !success { + r.setState(structs.TaskStateDead, nil) return } case <-r.startCh: @@ -903,6 +902,7 @@ func (r *TaskRunner) run() { r.killTask(killEvent) close(stopCollection) + r.setState(structs.TaskStateDead, nil) return } } @@ -910,6 +910,7 @@ func (r *TaskRunner) run() { RESTART: restart := r.shouldRestart() if !restart { + r.setState(structs.TaskStateDead, nil) return } From c5b7a08807dd187536512d12410e5cabda422929 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 4 Nov 2016 17:15:58 -0700 Subject: [PATCH 3/3] Test fix --- client/task_runner_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/client/task_runner_test.go b/client/task_runner_test.go index adc1ca4fd..2ff601a93 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -38,11 +38,15 @@ type MockTaskStateUpdater struct { } func (m *MockTaskStateUpdater) Update(name, state string, event *structs.TaskEvent) { - m.state = state - if event.FailsTask { - m.failed = true + if state != "" { + m.state = state + } + if event != nil { + if event.FailsTask { + m.failed = true + } + m.events = append(m.events, event) } - m.events = append(m.events, event) } func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) {