From 1ae8261139f2f1127108c0806b8aea3dbfad07cc Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 4 Jan 2019 15:19:57 -0800 Subject: [PATCH] client: emit Killing/Killed task events We were just emitting Killed/Terminated events before. In v0.8 we emitted Killing/Killed, but lacked Terminated when explicitly stopping a task. This change makes it so Terminated is always included, whether explicitly stopping a task or it exiting on its own. New output: 2019-01-04T14:58:51-08:00 Killed Task successfully killed 2019-01-04T14:58:51-08:00 Terminated Exit Code: 130, Signal: 2 2019-01-04T14:58:51-08:00 Killing Sent interrupt 2019-01-04T14:58:51-08:00 Leader Task Dead Leader Task in Group dead 2019-01-04T14:58:49-08:00 Started Task started by client 2019-01-04T14:58:49-08:00 Task Setup Building Task Directory 2019-01-04T14:58:49-08:00 Received Task received by client Old (v0.8.6) output: 2019-01-04T22:14:54Z Killed Task successfully killed 2019-01-04T22:14:54Z Killing Sent interrupt. Waiting 5s before force killing 2019-01-04T22:14:54Z Leader Task Dead Leader Task in Group dead 2019-01-04T22:14:53Z Started Task started by client 2019-01-04T22:14:53Z Task Setup Building Task Directory 2019-01-04T22:14:53Z Received Task received by client --- client/allocrunner/alloc_runner.go | 20 ++++++++++++++++++-- client/allocrunner/taskrunner/task_runner.go | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 73ae31f9c..a599d9218 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -458,7 +458,23 @@ func (ar *allocRunner) handleTaskStateUpdates() { ar.logger.Debug("task failure, destroying all tasks", "failed_task", killTask) } + // Emit kill event for live runners + for _, tr := range liveRunners { + tr.EmitEvent(killEvent) + } + + // Kill 'em all states = ar.killTasks() + + // Wait for TaskRunners to exit before continuing to + // prevent looping before TaskRunners have transitioned + // to Dead. + for _, tr := range liveRunners { + select { + case <-tr.WaitCh(): + case <-ar.waitCh: + } + } } // Get the client allocation @@ -485,7 +501,7 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { continue } - err := tr.Kill(context.TODO(), structs.NewTaskEvent(structs.TaskKilled)) + err := tr.Kill(context.TODO(), structs.NewTaskEvent(structs.TaskKilling)) if err != nil && err != taskrunner.ErrTaskNotRunning { ar.logger.Warn("error stopping leader task", "error", err, "task_name", name) } @@ -505,7 +521,7 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { wg.Add(1) go func(name string, tr *taskrunner.TaskRunner) { defer wg.Done() - err := tr.Kill(context.TODO(), structs.NewTaskEvent(structs.TaskKilled)) + err := tr.Kill(context.TODO(), structs.NewTaskEvent(structs.TaskKilling)) if err != nil && err != taskrunner.ErrTaskNotRunning { ar.logger.Warn("error stopping task", "error", err, "task_name", name) } diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index e120f0ca5..364594eef 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -546,6 +546,7 @@ func (tr *TaskRunner) shouldRestart() (bool, time.Duration) { case structs.TaskKilled: // Never restart an explicitly killed task. Kill method handles // updating the server. + tr.EmitEvent(structs.NewTaskEvent(state)) return false, 0 case structs.TaskNotRestarting, structs.TaskTerminated: tr.logger.Info("not restarting task", "reason", reason)