From cf66e25e57e41260305ab9876fd17df56117759f Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 13 Feb 2019 15:34:17 -0800 Subject: [PATCH] client: restart on recoverable StartTask errors Fixes restarting on recoverable errors from StartTask. Ports TestTaskRunner_Run_RecoverableStartError from 0.8 which discovered the bug. --- client/allocrunner/taskrunner/task_runner.go | 4 +- .../taskrunner/task_runner_test.go | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 38fb488db..10a7fac5e 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -656,7 +656,9 @@ func (tr *TaskRunner) runDriver() error { return fmt.Errorf("failed to start task after driver exited unexpectedly: %v", err) } } else { - return fmt.Errorf("driver start failed: %v", err) + // Do *NOT* wrap the error here without maintaining + // whether or not is Recoverable. + return err } } diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index 3fe650070..2dedc011f 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -1197,6 +1197,47 @@ func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) { require.Equal(t, structs.TaskTerminated, state.Events[3].Type) } +// TestTaskRunner_Run_RecoverableStartError asserts tasks are restarted if they +// return a recoverable error from StartTask. +func TestTaskRunner_Run_RecoverableStartError(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Config = map[string]interface{}{ + "start_error": "driver failure", + "start_error_recoverable": true, + } + + // Make the restart policy retry once + alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{ + Attempts: 1, + Interval: 10 * time.Minute, + Delay: 0, + Mode: structs.RestartPolicyModeFail, + } + + tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name) + defer cleanup() + + select { + case <-tr.WaitCh(): + case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): + require.Fail(t, "timed out waiting for task to exit") + } + + state := tr.TaskState() + require.Equal(t, structs.TaskStateDead, state.State) + require.True(t, state.Failed) + require.Len(t, state.Events, 6, pretty.Sprint(state.Events)) + require.Equal(t, structs.TaskReceived, state.Events[0].Type) + require.Equal(t, structs.TaskSetup, state.Events[1].Type) + require.Equal(t, structs.TaskDriverFailure, state.Events[2].Type) + require.Equal(t, structs.TaskRestarting, state.Events[3].Type) + require.Equal(t, structs.TaskDriverFailure, state.Events[4].Type) + require.Equal(t, structs.TaskNotRestarting, state.Events[5].Type) +} + // testWaitForTaskToStart waits for the task to be running or fails the test func testWaitForTaskToStart(t *testing.T, tr *TaskRunner) { testutil.WaitForResult(func() (bool, error) {