From 4958be618c269ec649289740cfb72233d78567bc Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 4 Nov 2015 14:50:44 -0800 Subject: [PATCH] Check if the PID is alive instead of heartbeating through modify time --- client/spawn/spawn.go | 15 ++----------- client/spawn/spawn_posix.go | 14 ++++++++++++ client/spawn/spawn_test.go | 41 ++++++++++++++++++++++++++++++++++- client/spawn/spawn_windows.go | 21 ++++++++++++++++++ command/spawn_daemon.go | 12 ---------- 5 files changed, 77 insertions(+), 26 deletions(-) create mode 100644 client/spawn/spawn_posix.go create mode 100644 client/spawn/spawn_windows.go diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go index ac0d8c444..5338b8777 100644 --- a/client/spawn/spawn.go +++ b/client/spawn/spawn.go @@ -256,10 +256,6 @@ func (s *Spawner) waitOnStatusFile() (int, error) { return s.readExitCode() } - // Store the mod time as a way to heartbeat. If the file doesn't get touched - // then we know the spawner has died. This avoids an infinite loop. - prevModTime := stat.ModTime() - // Wait on watcher. for { select { @@ -277,17 +273,10 @@ func (s *Spawner) waitOnStatusFile() (int, error) { case err := <-watcher.Errors: return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err) case <-time.After(5 * time.Second): - stat, err := os.Stat(s.StateFile) - if err != nil { - return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err) - } - - modTime := stat.ModTime() - if modTime.Equal(prevModTime) { + // Check if the process is still alive. + if !s.Alive() { return -1, fmt.Errorf("Task is dead and exit code unreadable") } - - prevModTime = modTime } } } diff --git a/client/spawn/spawn_posix.go b/client/spawn/spawn_posix.go new file mode 100644 index 000000000..7df381064 --- /dev/null +++ b/client/spawn/spawn_posix.go @@ -0,0 +1,14 @@ +// +build !windows + +package spawn + +import "syscall" + +func (s *Spawner) Alive() bool { + if s.spawn == nil { + return false + } + + err := s.spawn.Signal(syscall.Signal(0)) + return err == nil +} diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go index e8ddfbaf5..9553470a0 100644 --- a/client/spawn/spawn_test.go +++ b/client/spawn/spawn_test.go @@ -214,7 +214,7 @@ func TestSpawn_NonParentWait(t *testing.T) { } } -func TestSpawn_DeadSpawnDaemon(t *testing.T) { +func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) { f, err := ioutil.TempFile("", "") if err != nil { t.Fatalf("TempFile() failed") @@ -250,3 +250,42 @@ func TestSpawn_DeadSpawnDaemon(t *testing.T) { t.Fatalf("Wait() should have failed: %v", err) } } + +func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + var spawnPid int + cb := func(pid int) error { + spawnPid = pid + return nil + } + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("sleep", "5")) + if err := spawn.Spawn(cb); err != nil { + t.Fatalf("Spawn() errored: %v", err) + } + + proc, err := os.FindProcess(spawnPid) + if err != nil { + t.FailNow() + } + + if err := proc.Kill(); err != nil { + t.FailNow() + } + + if _, err := proc.Wait(); err != nil { + t.FailNow() + } + + // Force the wait to assume non-parent. + spawn.SpawnPpid = 0 + if _, err := spawn.Wait(); err == nil { + t.Fatalf("Wait() should have failed: %v", err) + } +} diff --git a/client/spawn/spawn_windows.go b/client/spawn/spawn_windows.go new file mode 100644 index 000000000..9683dce97 --- /dev/null +++ b/client/spawn/spawn_windows.go @@ -0,0 +1,21 @@ +package spawn + +import "syscall" + +const STILL_ACTIVE = 259 + +func (s *Spawner) Alive() bool { + const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE + h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid)) + if e != nil { + return false + } + + var ec uint32 + e = syscall.GetExitCodeProcess(h, &ec) + if e != nil { + return false + } + + return ec == STILL_ACTIVE +} diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go index 81f5ca2ca..52ffd8e6c 100644 --- a/command/spawn_daemon.go +++ b/command/spawn_daemon.go @@ -9,7 +9,6 @@ import ( "strconv" "strings" "syscall" - "time" ) type SpawnDaemonCommand struct { @@ -185,17 +184,6 @@ func (c *SpawnDaemonCommand) Run(args []string) int { // Indicate that the command was started successfully. c.outputStartStatus(nil, 0) - // Start a go routine that touches the exit file periodically. - go func() { - for { - select { - case <-time.After(2 * time.Second): - now := time.Now() - os.Chtimes(c.config.ExitStatusFile, now, now) - } - } - }() - // Wait and then output the exit status. return c.writeExitStatus(c.config.Cmd.Wait()) }