mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
Check if the PID is alive instead of heartbeating through modify time
This commit is contained in:
@@ -256,10 +256,6 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
|
||||
return s.readExitCode()
|
||||
}
|
||||
|
||||
// Store the mod time as a way to heartbeat. If the file doesn't get touched
|
||||
// then we know the spawner has died. This avoids an infinite loop.
|
||||
prevModTime := stat.ModTime()
|
||||
|
||||
// Wait on watcher.
|
||||
for {
|
||||
select {
|
||||
@@ -277,17 +273,10 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
|
||||
case err := <-watcher.Errors:
|
||||
return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
|
||||
case <-time.After(5 * time.Second):
|
||||
stat, err := os.Stat(s.StateFile)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
|
||||
}
|
||||
|
||||
modTime := stat.ModTime()
|
||||
if modTime.Equal(prevModTime) {
|
||||
// Check if the process is still alive.
|
||||
if !s.Alive() {
|
||||
return -1, fmt.Errorf("Task is dead and exit code unreadable")
|
||||
}
|
||||
|
||||
prevModTime = modTime
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
14
client/spawn/spawn_posix.go
Normal file
14
client/spawn/spawn_posix.go
Normal file
@@ -0,0 +1,14 @@
|
||||
// +build !windows
|
||||
|
||||
package spawn
|
||||
|
||||
import "syscall"
|
||||
|
||||
func (s *Spawner) Alive() bool {
|
||||
if s.spawn == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
err := s.spawn.Signal(syscall.Signal(0))
|
||||
return err == nil
|
||||
}
|
||||
@@ -214,7 +214,7 @@ func TestSpawn_NonParentWait(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpawn_DeadSpawnDaemon(t *testing.T) {
|
||||
func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) {
|
||||
f, err := ioutil.TempFile("", "")
|
||||
if err != nil {
|
||||
t.Fatalf("TempFile() failed")
|
||||
@@ -250,3 +250,42 @@ func TestSpawn_DeadSpawnDaemon(t *testing.T) {
|
||||
t.Fatalf("Wait() should have failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
|
||||
f, err := ioutil.TempFile("", "")
|
||||
if err != nil {
|
||||
t.Fatalf("TempFile() failed")
|
||||
}
|
||||
defer os.Remove(f.Name())
|
||||
|
||||
var spawnPid int
|
||||
cb := func(pid int) error {
|
||||
spawnPid = pid
|
||||
return nil
|
||||
}
|
||||
|
||||
spawn := NewSpawner(f.Name())
|
||||
spawn.SetCommand(exec.Command("sleep", "5"))
|
||||
if err := spawn.Spawn(cb); err != nil {
|
||||
t.Fatalf("Spawn() errored: %v", err)
|
||||
}
|
||||
|
||||
proc, err := os.FindProcess(spawnPid)
|
||||
if err != nil {
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
if err := proc.Kill(); err != nil {
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
if _, err := proc.Wait(); err != nil {
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
// Force the wait to assume non-parent.
|
||||
spawn.SpawnPpid = 0
|
||||
if _, err := spawn.Wait(); err == nil {
|
||||
t.Fatalf("Wait() should have failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
21
client/spawn/spawn_windows.go
Normal file
21
client/spawn/spawn_windows.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package spawn
|
||||
|
||||
import "syscall"
|
||||
|
||||
const STILL_ACTIVE = 259
|
||||
|
||||
func (s *Spawner) Alive() bool {
|
||||
const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE
|
||||
h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid))
|
||||
if e != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var ec uint32
|
||||
e = syscall.GetExitCodeProcess(h, &ec)
|
||||
if e != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return ec == STILL_ACTIVE
|
||||
}
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SpawnDaemonCommand struct {
|
||||
@@ -185,17 +184,6 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
|
||||
// Indicate that the command was started successfully.
|
||||
c.outputStartStatus(nil, 0)
|
||||
|
||||
// Start a go routine that touches the exit file periodically.
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-time.After(2 * time.Second):
|
||||
now := time.Now()
|
||||
os.Chtimes(c.config.ExitStatusFile, now, now)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait and then output the exit status.
|
||||
return c.writeExitStatus(c.config.Cmd.Wait())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user