diff --git a/client/allocrunner/taskrunner/logmon_hook_unix_test.go b/client/allocrunner/taskrunner/logmon_hook_unix_test.go index 582692a27..1d9e66c16 100644 --- a/client/allocrunner/taskrunner/logmon_hook_unix_test.go +++ b/client/allocrunner/taskrunner/logmon_hook_unix_test.go @@ -10,6 +10,7 @@ import ( "os" "syscall" "testing" + "time" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/helper/testlog" @@ -86,3 +87,74 @@ func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { // Running stop should shutdown logmon require.NoError(t, hook.Stop(context.Background(), nil, nil)) } + +// TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the +// Nomad client is calling Start() and asserts that we recover and spawn a new logmon. +func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + + dir, err := ioutil.TempDir("", "nomadtest") + require.NoError(t, err) + defer func() { + require.NoError(t, os.RemoveAll(dir)) + }() + + hookConf := newLogMonHookConfig(task.Name, dir) + hook := newLogMonHook(hookConf, testlog.HCLogger(t)) + + req := interfaces.TaskPrestartRequest{ + Task: task, + } + resp := interfaces.TaskPrestartResponse{} + + // First start + require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) + defer hook.Stop(context.Background(), nil, nil) + + origState := resp.State + origHookData := resp.State[logmonReattachKey] + require.NotEmpty(t, origHookData) + + // Pluck PID out of reattach synthesize a crash + reattach := struct { + Pid int + }{} + require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) + pid := reattach.Pid + require.NotZero(t, pid) + + proc, _ := os.FindProcess(pid) + + // Assert logmon is running + require.NoError(t, proc.Signal(syscall.Signal(0))) + + // SIGSTOP would freeze process without it being considered + // exited; so this causes process to be non-exited at beginning of call + // then we kill process while Start call is running + require.NoError(t, proc.Signal(syscall.SIGSTOP)) + + go func() { + time.Sleep(2 * time.Second) + + proc.Signal(syscall.SIGCONT) + proc.Signal(os.Kill) + }() + + req.PreviousState = map[string]string{ + logmonReattachKey: origHookData, + } + + initLogmon, initClient := hook.logmon, hook.logmonPluginClient + + resp = interfaces.TaskPrestartResponse{} + err = hook.Prestart(context.Background(), &req, &resp) + require.NoError(t, err) + require.NotEqual(t, origState, resp.State) + + // assert that we got a new client and logmon + require.True(t, initLogmon != hook.logmon) + require.True(t, initClient != hook.logmonPluginClient) +}