Merge branch 'master' of https://github.com/hashicorp/nomad into f-docker-driver-options

* 'master' of https://github.com/hashicorp/nomad: (59 commits) Move the executor and spawn package into driver Remove file watching Check if the PID is alive instead of heartbeating through modify time Update CHANGELOG.md nomad/watch: add a note about the Item struct go fmt this file Vet errors Search path Update website Make a basic executor that can be shared and fix some fingerprinting/tests Small improvements Use const value for AWS metadata URL Create Spawn pkg that handles IPC with the spawn-daemon and update exec_linux to use that Fixed the restart policy syntax Introducing vars to create default batch and service restart policies Fixed the tests Declaring Batch and Service default restart policies Fixing tests to not create a TG without restart policies This option only work -> This option only works leave -> leaving ...
2026-01-06 02:15:43 +03:00 · 2015-11-05 10:48:09 -08:00
parent 0521562d9c b0464a59e8
commit 467777e6fe
62 changed files with 3447 additions and 1416 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.2.0 (Unreleased)
+
+FEATURES:
+
+  * Blocking queries supported in API [GH-366]
+
 ## 0.1.2 (October 6, 2015)

 IMPROVEMENTS:
--- a/api/compose_test.go
+++ b/api/compose_test.go
@@ -69,6 +69,7 @@ func TestCompose(t *testing.T) {
 						Operand: "=",
 					},
 				},
+				RestartPolicy: NewRestartPolicy(),
 				Tasks: []*Task{
 					&Task{
 						Name:   "task1",
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -1,19 +1,42 @@
 package api

+import (
+	"time"
+)
+
+// RestartPolicy defines how the Nomad client restarts
+// tasks in a taskgroup when they fail
+type RestartPolicy struct {
+	Interval time.Duration
+	Attempts int
+	Delay    time.Duration
+}
+
+func NewRestartPolicy() *RestartPolicy {
+	return &RestartPolicy{
+		Attempts: 10,
+		Interval: 3 * time.Minute,
+		Delay:    5 * time.Second,
+	}
+}
+
 // TaskGroup is the unit of scheduling.
 type TaskGroup struct {
-	Name        string
-	Count       int
-	Constraints []*Constraint
-	Tasks       []*Task
-	Meta        map[string]string
+	Name          string
+	Count         int
+	Constraints   []*Constraint
+	Tasks         []*Task
+	RestartPolicy *RestartPolicy
+	Meta          map[string]string
 }

 // NewTaskGroup creates a new TaskGroup.
 func NewTaskGroup(name string, count int) *TaskGroup {
+	restartPolicy := NewRestartPolicy()
 	return &TaskGroup{
-		Name:  name,
-		Count: count,
+		Name:          name,
+		Count:         count,
+		RestartPolicy: restartPolicy,
 	}
 }

--- a/api/tasks_test.go
+++ b/api/tasks_test.go
@@ -8,8 +8,9 @@ import (
 func TestTaskGroup_NewTaskGroup(t *testing.T) {
 	grp := NewTaskGroup("grp1", 2)
 	expect := &TaskGroup{
-		Name:  "grp1",
-		Count: 2,
+		Name:          "grp1",
+		Count:         2,
+		RestartPolicy: NewRestartPolicy(),
 	}
 	if !reflect.DeepEqual(grp, expect) {
 		t.Fatalf("expect: %#v, got: %#v", expect, grp)
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -12,7 +12,7 @@ import (
 	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/executor"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

@@ -35,8 +35,11 @@ func NewExecDriver(ctx *DriverContext) Driver {
 }

 func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
-	// Only enable if we are root when running on non-windows systems.
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
+	// Only enable if we are root on linux.
+	if runtime.GOOS != "linux" {
+		d.logger.Printf("[DEBUG] driver.exec: only available on linux, disabling")
+		return false, nil
+	} else if syscall.Geteuid() != 0 {
 		d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
 		return false, nil
 	}
@@ -73,10 +76,8 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		}

 		// Add execution permissions to the newly downloaded artifact
-		if runtime.GOOS != "windows" {
-			if err := syscall.Chmod(artifactFile, 0755); err != nil {
-				log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err)
-			}
+		if err := syscall.Chmod(artifactFile, 0755); err != nil {
+			log.Printf("[ERR] driver.exec: Error making artifact executable: %s", err)
 		}
 	}

--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -5,7 +5,6 @@ import (
 	"io/ioutil"
 	"path/filepath"
 	"reflect"
-	"runtime"
 	"testing"
 	"time"

@@ -123,13 +122,7 @@ func TestExecDriver_Start_Wait(t *testing.T) {

 func TestExecDriver_Start_Artifact_basic(t *testing.T) {
 	ctestutils.ExecCompatible(t)
-	var file string
-	switch runtime.GOOS {
-	case "darwin":
-		file = "hi_darwin_amd64"
-	default:
-		file = "hi_linux_amd64"
-	}
+	file := "hi_linux_amd64"

 	task := &structs.Task{
 		Name: "sleep",
@@ -172,13 +165,7 @@ func TestExecDriver_Start_Artifact_basic(t *testing.T) {

 func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
 	ctestutils.ExecCompatible(t)
-	var file string
-	switch runtime.GOOS {
-	case "darwin":
-		file = "hi_darwin_amd64"
-	default:
-		file = "hi_linux_amd64"
-	}
+	file := "hi_linux_amd64"

 	task := &structs.Task{
 		Name: "sleep",
@@ -306,7 +293,7 @@ func TestExecDriver_Start_Kill_Wait(t *testing.T) {
 		if err == nil {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(8 * time.Second):
 		t.Fatalf("timeout")
 	}
 }
--- a/client/driver/executor/exec.go
+++ b/client/driver/executor/exec.go
--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -1,5 +1,3 @@
-// +build !linux
-
 package executor

 import (
@@ -14,24 +12,26 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )

-func NewExecutor() Executor {
-	return &UniversalExecutor{}
-}
-
-// UniversalExecutor should work everywhere, and as a result does not include
+// BasicExecutor should work everywhere, and as a result does not include
 // any resource restrictions or runas capabilities.
-type UniversalExecutor struct {
+type BasicExecutor struct {
 	cmd
 }

-func (e *UniversalExecutor) Limit(resources *structs.Resources) error {
+// TODO: Update to use the Spawner.
+// TODO: Have raw_exec use this as well.
+func NewBasicExecutor() Executor {
+	return &BasicExecutor{}
+}
+
+func (e *BasicExecutor) Limit(resources *structs.Resources) error {
 	if resources == nil {
 		return errNoResources
 	}
 	return nil
 }

-func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
+func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
 	taskDir, ok := alloc.TaskDirs[taskName]
 	if !ok {
 		return fmt.Errorf("Error finding task dir for (%s)", taskName)
@@ -40,7 +40,7 @@ func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.Al
 	return nil
 }

-func (e *UniversalExecutor) Start() error {
+func (e *BasicExecutor) Start() error {
 	// Parse the commands arguments and replace instances of Nomad environment
 	// variables.
 	envVars, err := environment.ParseFromList(e.cmd.Env)
@@ -67,7 +67,7 @@ func (e *UniversalExecutor) Start() error {
 	return e.cmd.Start()
 }

-func (e *UniversalExecutor) Open(pid string) error {
+func (e *BasicExecutor) Open(pid string) error {
 	pidNum, err := strconv.Atoi(pid)
 	if err != nil {
 		return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
@@ -81,12 +81,12 @@ func (e *UniversalExecutor) Open(pid string) error {
 	return nil
 }

-func (e *UniversalExecutor) Wait() error {
+func (e *BasicExecutor) Wait() error {
 	// We don't want to call ourself. We want to call Start on our embedded Cmd
 	return e.cmd.Wait()
 }

-func (e *UniversalExecutor) ID() (string, error) {
+func (e *BasicExecutor) ID() (string, error) {
 	if e.cmd.Process != nil {
 		return strconv.Itoa(e.cmd.Process.Pid), nil
 	} else {
@@ -94,14 +94,14 @@ func (e *UniversalExecutor) ID() (string, error) {
 	}
 }

-func (e *UniversalExecutor) Shutdown() error {
+func (e *BasicExecutor) Shutdown() error {
 	return e.ForceStop()
 }

-func (e *UniversalExecutor) ForceStop() error {
+func (e *BasicExecutor) ForceStop() error {
 	return e.Process.Kill()
 }

-func (e *UniversalExecutor) Command() *cmd {
+func (e *BasicExecutor) Command() *cmd {
 	return &e.cmd
 }
--- a/client/driver/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -0,0 +1,422 @@
+package executor
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"os/user"
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"github.com/hashicorp/go-multierror"
+	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/driver/environment"
+	"github.com/hashicorp/nomad/client/driver/spawn"
+	"github.com/hashicorp/nomad/nomad/structs"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
+	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
+	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var (
+	// A mapping of directories on the host OS to attempt to embed inside each
+	// task's chroot.
+	chrootEnv = map[string]string{
+		"/bin":     "/bin",
+		"/etc":     "/etc",
+		"/lib":     "/lib",
+		"/lib32":   "/lib32",
+		"/lib64":   "/lib64",
+		"/usr/bin": "/usr/bin",
+		"/usr/lib": "/usr/lib",
+	}
+)
+
+func NewExecutor() Executor {
+	return &LinuxExecutor{}
+}
+
+// Linux executor is designed to run on linux kernel 2.8+.
+type LinuxExecutor struct {
+	cmd
+	user *user.User
+
+	// Isolation configurations.
+	groups   *cgroupConfig.Cgroup
+	taskName string
+	taskDir  string
+	allocDir string
+
+	// Spawn process.
+	spawn *spawn.Spawner
+}
+
+func (e *LinuxExecutor) Command() *cmd {
+	return &e.cmd
+}
+
+func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
+	if resources == nil {
+		return errNoResources
+	}
+
+	return e.configureCgroups(resources)
+}
+
+// execLinuxID contains the necessary information to reattach to an executed
+// process and cleanup the created cgroups.
+type ExecLinuxID struct {
+	Groups  *cgroupConfig.Cgroup
+	Spawn   *spawn.Spawner
+	TaskDir string
+}
+
+func (e *LinuxExecutor) Open(id string) error {
+	// De-serialize the ID.
+	dec := json.NewDecoder(strings.NewReader(id))
+	var execID ExecLinuxID
+	if err := dec.Decode(&execID); err != nil {
+		return fmt.Errorf("Failed to parse id: %v", err)
+	}
+
+	// Setup the executor.
+	e.groups = execID.Groups
+	e.spawn = execID.Spawn
+	e.taskDir = execID.TaskDir
+
+	return nil
+}
+
+func (e *LinuxExecutor) ID() (string, error) {
+	if e.groups == nil || e.spawn == nil || e.taskDir == "" {
+		return "", fmt.Errorf("LinuxExecutor not properly initialized.")
+	}
+
+	// Build the ID.
+	id := ExecLinuxID{
+		Groups:  e.groups,
+		Spawn:   e.spawn,
+		TaskDir: e.taskDir,
+	}
+
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(id); err != nil {
+		return "", fmt.Errorf("Failed to serialize id: %v", err)
+	}
+
+	return buffer.String(), nil
+}
+
+// runAs takes a user id as a string and looks up the user. It stores the
+// results in the executor and returns an error if the user could not be found.
+func (e *LinuxExecutor) runAs(userid string) error {
+	errs := new(multierror.Error)
+
+	// First, try to lookup the user by uid
+	u, err := user.LookupId(userid)
+	if err == nil {
+		e.user = u
+		return nil
+	} else {
+		errs = multierror.Append(errs, err)
+	}
+
+	// Lookup failed, so try by username instead
+	u, err = user.Lookup(userid)
+	if err == nil {
+		e.user = u
+		return nil
+	} else {
+		errs = multierror.Append(errs, err)
+	}
+
+	// If we got here we failed to lookup based on id and username, so we'll
+	// return those errors.
+	return fmt.Errorf("Failed to identify user to run as: %s", errs)
+}
+
+func (e *LinuxExecutor) Start() error {
+	// Run as "nobody" user so we don't leak root privilege to the spawned
+	// process.
+	if err := e.runAs("nobody"); err == nil && e.user != nil {
+		e.cmd.SetUID(e.user.Uid)
+		e.cmd.SetGID(e.user.Gid)
+	}
+
+	// Parse the commands arguments and replace instances of Nomad environment
+	// variables.
+	envVars, err := environment.ParseFromList(e.Cmd.Env)
+	if err != nil {
+		return err
+	}
+
+	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
+	if err != nil {
+		return err
+	} else if len(parsedPath) != 1 {
+		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
+	}
+	e.cmd.Path = parsedPath[0]
+
+	combined := strings.Join(e.Cmd.Args, " ")
+	parsed, err := args.ParseAndReplace(combined, envVars.Map())
+	if err != nil {
+		return err
+	}
+	e.Cmd.Args = parsed
+
+	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
+	e.spawn = spawn.NewSpawner(spawnState)
+	e.spawn.SetCommand(&e.cmd.Cmd)
+	e.spawn.SetChroot(e.taskDir)
+	e.spawn.SetLogs(&spawn.Logs{
+		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
+		Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
+		Stdin:  os.DevNull,
+	})
+
+	enterCgroup := func(pid int) error {
+		// Join the spawn-daemon to the cgroup.
+		manager := e.getCgroupManager(e.groups)
+
+		// Apply will place the spawn dameon into the created cgroups.
+		if err := manager.Apply(pid); err != nil {
+			return fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err)
+		}
+
+		return nil
+	}
+
+	return e.spawn.Spawn(enterCgroup)
+}
+
+// Wait waits til the user process exits and returns an error on non-zero exit
+// codes. Wait also cleans up the task directory and created cgroups.
+func (e *LinuxExecutor) Wait() error {
+	errs := new(multierror.Error)
+	code, err := e.spawn.Wait()
+	if err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	if code != 0 {
+		errs = multierror.Append(errs, fmt.Errorf("Task exited with code: %d", code))
+	}
+
+	if err := e.destroyCgroup(); err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	if err := e.cleanTaskDir(); err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	return errs.ErrorOrNil()
+}
+
+func (e *LinuxExecutor) Shutdown() error {
+	return e.ForceStop()
+}
+
+// ForceStop immediately exits the user process and cleans up both the task
+// directory and the cgroups.
+func (e *LinuxExecutor) ForceStop() error {
+	errs := new(multierror.Error)
+	if err := e.destroyCgroup(); err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	if err := e.cleanTaskDir(); err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	return errs.ErrorOrNil()
+}
+
+// Task Directory related functions.
+
+// ConfigureTaskDir creates the necessary directory structure for a proper
+// chroot. cleanTaskDir should be called after.
+func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
+	e.taskName = taskName
+	e.allocDir = alloc.AllocDir
+
+	taskDir, ok := alloc.TaskDirs[taskName]
+	if !ok {
+		fmt.Errorf("Couldn't find task directory for task %v", taskName)
+	}
+	e.taskDir = taskDir
+
+	if err := alloc.MountSharedDir(taskName); err != nil {
+		return err
+	}
+
+	if err := alloc.Embed(taskName, chrootEnv); err != nil {
+		return err
+	}
+
+	// Mount dev
+	dev := filepath.Join(taskDir, "dev")
+	if err := os.Mkdir(dev, 0777); err != nil {
+		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
+	}
+
+	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
+		return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
+	}
+
+	// Mount proc
+	proc := filepath.Join(taskDir, "proc")
+	if err := os.Mkdir(proc, 0777); err != nil {
+		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
+	}
+
+	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
+		return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
+	}
+
+	// Set the tasks AllocDir environment variable.
+	env, err := environment.ParseFromList(e.Cmd.Env)
+	if err != nil {
+		return err
+	}
+	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
+	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
+	e.Cmd.Env = env.List()
+
+	return nil
+}
+
+// pathExists is a helper function to check if the path exists.
+func (e *LinuxExecutor) pathExists(path string) bool {
+	if _, err := os.Stat(path); err != nil {
+		if os.IsNotExist(err) {
+			return false
+		}
+	}
+	return true
+}
+
+// cleanTaskDir is an idempotent operation to clean the task directory and
+// should be called when tearing down the task.
+func (e *LinuxExecutor) cleanTaskDir() error {
+	// Unmount dev.
+	errs := new(multierror.Error)
+	dev := filepath.Join(e.taskDir, "dev")
+	if e.pathExists(dev) {
+		if err := syscall.Unmount(dev, 0); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
+		}
+
+		if err := os.RemoveAll(dev); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err))
+		}
+	}
+
+	// Unmount proc.
+	proc := filepath.Join(e.taskDir, "proc")
+	if e.pathExists(proc) {
+		if err := syscall.Unmount(proc, 0); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
+		}
+
+		if err := os.RemoveAll(proc); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err))
+		}
+	}
+
+	return errs.ErrorOrNil()
+}
+
+// Cgroup related functions.
+
+// configureCgroups converts a Nomad Resources specification into the equivalent
+// cgroup configuration. It returns an error if the resources are invalid.
+func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
+	e.groups = &cgroupConfig.Cgroup{}
+	e.groups.Name = structs.GenerateUUID()
+
+	// TODO: verify this is needed for things like network access
+	e.groups.AllowAllDevices = true
+
+	if resources.MemoryMB > 0 {
+		// Total amount of memory allowed to consume
+		e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
+		// Disable swap to avoid issues on the machine
+		e.groups.MemorySwap = int64(-1)
+	}
+
+	if resources.CPU < 2 {
+		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
+	}
+
+	// Set the relative CPU shares for this cgroup.
+	e.groups.CpuShares = int64(resources.CPU)
+
+	if resources.IOPS != 0 {
+		// Validate it is in an acceptable range.
+		if resources.IOPS < 10 || resources.IOPS > 1000 {
+			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
+		}
+
+		e.groups.BlkioWeight = uint16(resources.IOPS)
+	}
+
+	return nil
+}
+
+// destroyCgroup kills all processes in the cgroup and removes the cgroup
+// configuration from the host.
+func (e *LinuxExecutor) destroyCgroup() error {
+	if e.groups == nil {
+		return errors.New("Can't destroy: cgroup configuration empty")
+	}
+
+	manager := e.getCgroupManager(e.groups)
+	pids, err := manager.GetPids()
+	if err != nil {
+		return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err)
+	}
+
+	errs := new(multierror.Error)
+	for _, pid := range pids {
+		process, err := os.FindProcess(pid)
+		if err != nil {
+			multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err))
+			continue
+		}
+
+		if err := process.Kill(); err != nil {
+			multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
+			continue
+		}
+	}
+
+	// Remove the cgroup.
+	if err := manager.Destroy(); err != nil {
+		multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err))
+	}
+
+	if len(errs.Errors) != 0 {
+		return fmt.Errorf("Failed to destroy cgroup: %v", errs)
+	}
+
+	return nil
+}
+
+// getCgroupManager returns the correct libcontainer cgroup manager.
+func (e *LinuxExecutor) getCgroupManager(groups *cgroupConfig.Cgroup) cgroups.Manager {
+	var manager cgroups.Manager
+	manager = &cgroupFs.Manager{Cgroups: groups}
+	if systemd.UseSystemd() {
+		manager = &systemd.Manager{Cgroups: groups}
+	}
+	return manager
+}
--- a/client/driver/executor/exec_linux_test.go
+++ b/client/driver/executor/exec_linux_test.go
@@ -139,11 +139,6 @@ func TestExecutorLinux_Start_Kill(t *testing.T) {
 	filePath := filepath.Join(taskDir, "output")
 	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)

-	// This test can only be run if cgroups are enabled.
-	if !e.(*LinuxExecutor).cgroupEnabled {
-		t.SkipNow()
-	}
-
 	if err := e.Limit(constraint); err != nil {
 		t.Fatalf("Limit() failed: %v", err)
 	}
@@ -178,13 +173,11 @@ func TestExecutorLinux_Open(t *testing.T) {
 		t.Fatalf("No task directory found for task %v", task)
 	}

-	filePath := filepath.Join(taskDir, "output")
-	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
-
-	// This test can only be run if cgroups are enabled.
-	if !e.(*LinuxExecutor).cgroupEnabled {
-		t.SkipNow()
-	}
+	expected := "hello world"
+	file := filepath.Join(allocdir.TaskLocal, "output.txt")
+	absFilePath := filepath.Join(taskDir, file)
+	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
+	e := Command("/bin/bash", "-c", cmd)

 	if err := e.Limit(constraint); err != nil {
 		t.Fatalf("Limit() failed: %v", err)
@@ -203,14 +196,22 @@ func TestExecutorLinux_Open(t *testing.T) {
 		t.Fatalf("ID() failed: %v", err)
 	}

-	if _, err := OpenId(id); err == nil {
-		t.Fatalf("Open(%v) should have failed", id)
+	e2 := NewExecutor()
+	if err := e2.Open(id); err != nil {
+		t.Fatalf("Open(%v) failed: %v", id, err)
 	}

-	time.Sleep(1500 * time.Millisecond)
+	if err := e2.Wait(); err != nil {
+		t.Fatalf("Wait() failed: %v", err)
+	}

-	// Check that the file doesn't exist, open should have killed the process.
-	if _, err := os.Stat(filePath); err == nil {
-		t.Fatalf("Stat(%v) should have failed: task not killed", filePath)
+	output, err := ioutil.ReadFile(absFilePath)
+	if err != nil {
+		t.Fatalf("Couldn't read file %v", absFilePath)
+	}
+
+	act := string(output)
+	if act != expected {
+		t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
 	}
 }
--- a/client/driver/executor/exec_universal.go
+++ b/client/driver/executor/exec_universal.go
@@ -0,0 +1,12 @@
+// +build !linux
+
+package executor
+
+func NewExecutor() Executor {
+	return &UniversalExecutor{BasicExecutor{}}
+}
+
+// UniversalExecutor wraps the BasicExecutor
+type UniversalExecutor struct {
+	BasicExecutor
+}
--- a/client/driver/executor/setuid.go
+++ b/client/driver/executor/setuid.go
--- a/client/driver/executor/setuid_windows.go
+++ b/client/driver/executor/setuid_windows.go
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -14,7 +14,7 @@ import (
 	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/executor"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

@@ -38,8 +38,8 @@ func NewJavaDriver(ctx *DriverContext) Driver {

 func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
 	// Only enable if we are root when running on non-windows systems.
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling")
+	if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
+		d.logger.Printf("[DEBUG] driver.java: must run as root user on linux, disabling")
 		return false, nil
 	}

--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -19,7 +19,7 @@ func javaLocated() bool {

 // The fingerprinter test should always pass, even if Java is not installed.
 func TestJavaDriver_Fingerprint(t *testing.T) {
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	d := NewJavaDriver(testDriverContext(""))
 	node := &structs.Node{
 		Attributes: make(map[string]string),
@@ -93,7 +93,7 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
 		t.Skip("Java not found; skipping")
 	}

-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
@@ -141,7 +141,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 		t.Skip("Java not found; skipping")
 	}

-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
@@ -179,7 +179,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 		if err == nil {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(8 * time.Second):
 		t.Fatalf("timeout")
 	}

--- a/client/driver/spawn/spawn.go
+++ b/client/driver/spawn/spawn.go
@@ -0,0 +1,285 @@
+package spawn
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"strconv"
+	"time"
+
+	"github.com/hashicorp/go-multierror"
+	"github.com/hashicorp/nomad/command"
+	"github.com/hashicorp/nomad/helper/discover"
+)
+
+// Spawner is used to start a user command in an isolated fashion that is
+// resistent to Nomad agent failure.
+type Spawner struct {
+	spawn     *os.Process
+	SpawnPid  int
+	SpawnPpid int
+	StateFile string
+
+	// User configuration
+	UserCmd *exec.Cmd
+	Logs    *Logs
+	Chroot  string
+}
+
+// Logs is used to define the filepaths the user command's logs should be
+// redirected to. The files do not need to exist.
+type Logs struct {
+	Stdin, Stdout, Stderr string
+}
+
+// NewSpawner takes a path to a state file. This state file can be used to
+// create a new Spawner that can be used to wait on the exit status of a
+// process even through Nomad restarts.
+func NewSpawner(stateFile string) *Spawner {
+	return &Spawner{StateFile: stateFile}
+}
+
+// SetCommand sets the user command to spawn.
+func (s *Spawner) SetCommand(cmd *exec.Cmd) {
+	s.UserCmd = cmd
+}
+
+// SetLogs sets the redirection of user command log files.
+func (s *Spawner) SetLogs(l *Logs) {
+	s.Logs = l
+}
+
+// SetChroot puts the user command into a chroot.
+func (s *Spawner) SetChroot(root string) {
+	s.Chroot = root
+}
+
+// Spawn does a double-fork to start and isolate the user command. It takes a
+// call-back that is invoked with the pid of the intermediary process. If the
+// call back returns an error, the user command is not started and the spawn is
+// cancelled. This can be used to put the process into a cgroup or jail and
+// cancel starting the user process if that was not successful. An error is
+// returned if the call-back returns an error or the user-command couldn't be
+// started.
+func (s *Spawner) Spawn(cb func(pid int) error) error {
+	bin, err := discover.NomadExecutable()
+	if err != nil {
+		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
+	}
+
+	exitFile, err := os.OpenFile(s.StateFile, os.O_CREATE|os.O_WRONLY, 0666)
+	defer exitFile.Close()
+	if err != nil {
+		return fmt.Errorf("Error opening file to store exit status: %v", err)
+	}
+
+	config, err := s.spawnConfig()
+	if err != nil {
+		return err
+	}
+
+	spawn := exec.Command(bin, "spawn-daemon", config)
+
+	// Capture stdout
+	spawnStdout, err := spawn.StdoutPipe()
+	defer spawnStdout.Close()
+	if err != nil {
+		return fmt.Errorf("Failed to capture spawn-daemon stdout: %v", err)
+	}
+
+	// Capture stdin.
+	spawnStdin, err := spawn.StdinPipe()
+	defer spawnStdin.Close()
+	if err != nil {
+		return fmt.Errorf("Failed to capture spawn-daemon stdin: %v", err)
+	}
+
+	if err := spawn.Start(); err != nil {
+		return fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
+	}
+
+	if cb != nil {
+		cbErr := cb(spawn.Process.Pid)
+		if cbErr != nil {
+			errs := new(multierror.Error)
+			errs = multierror.Append(errs, cbErr)
+			if err := s.sendAbortCommand(spawnStdin); err != nil {
+				errs = multierror.Append(errs, err)
+			}
+
+			return errs
+		}
+	}
+
+	if err := s.sendStartCommand(spawnStdin); err != nil {
+		return err
+	}
+
+	respCh := make(chan command.SpawnStartStatus, 1)
+	errCh := make(chan error, 1)
+
+	go func() {
+		var resp command.SpawnStartStatus
+		dec := json.NewDecoder(spawnStdout)
+		if err := dec.Decode(&resp); err != nil {
+			errCh <- fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
+		}
+		respCh <- resp
+	}()
+
+	select {
+	case err := <-errCh:
+		return err
+	case resp := <-respCh:
+		if resp.ErrorMsg != "" {
+			return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
+		}
+	case <-time.After(5 * time.Second):
+		return fmt.Errorf("timed out waiting for response")
+	}
+
+	// Store the spawn process.
+	s.spawn = spawn.Process
+	s.SpawnPid = s.spawn.Pid
+	s.SpawnPpid = os.Getpid()
+	return nil
+}
+
+// spawnConfig returns a serialized config to pass to the Nomad spawn-daemon
+// command.
+func (s *Spawner) spawnConfig() (string, error) {
+	if s.UserCmd == nil {
+		return "", fmt.Errorf("Must specify user command")
+	}
+
+	config := command.DaemonConfig{
+		Cmd:            *s.UserCmd,
+		Chroot:         s.Chroot,
+		ExitStatusFile: s.StateFile,
+	}
+
+	if s.Logs != nil {
+		config.StdoutFile = s.Logs.Stdout
+		config.StdinFile = s.Logs.Stdin
+		config.StderrFile = s.Logs.Stderr
+	}
+
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(config); err != nil {
+		return "", fmt.Errorf("Failed to serialize configuration: %v", err)
+	}
+
+	return strconv.Quote(buffer.String()), nil
+}
+
+// sendStartCommand sends the necessary command to the spawn-daemon to have it
+// start the user process.
+func (s *Spawner) sendStartCommand(w io.Writer) error {
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(true); err != nil {
+		return fmt.Errorf("Failed to serialize start command: %v", err)
+	}
+
+	return nil
+}
+
+// sendAbortCommand sends the necessary command to the spawn-daemon to have it
+// abort starting the user process. This should be invoked if the spawn-daemon
+// could not be isolated into a cgroup.
+func (s *Spawner) sendAbortCommand(w io.Writer) error {
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(false); err != nil {
+		return fmt.Errorf("Failed to serialize abort command: %v", err)
+	}
+
+	return nil
+}
+
+// Wait returns the exit code of the user process or an error if the wait
+// failed.
+func (s *Spawner) Wait() (int, error) {
+	if os.Getpid() == s.SpawnPpid {
+		return s.waitAsParent()
+	}
+
+	return s.pollWait()
+}
+
+// waitAsParent waits on the process if the current process was the spawner.
+func (s *Spawner) waitAsParent() (int, error) {
+	if s.SpawnPpid != os.Getpid() {
+		return -1, fmt.Errorf("not the parent. Spawner parent is %v; current pid is %v", s.SpawnPpid, os.Getpid())
+	}
+
+	// Try to reattach to the spawn.
+	if s.spawn == nil {
+		// If it can't be reattached, it means the spawn process has exited so
+		// we should just read its exit file.
+		var err error
+		if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil {
+			return s.pollWait()
+		}
+	}
+
+	if _, err := s.spawn.Wait(); err != nil {
+		return -1, err
+	}
+
+	return s.pollWait()
+}
+
+// pollWait polls on the spawn daemon to determine when it exits. After it
+// exits, it reads the state file and returns the exit code and possibly an
+// error.
+func (s *Spawner) pollWait() (int, error) {
+	// Stat to check if it is there to avoid a race condition.
+	stat, err := os.Stat(s.StateFile)
+	if err != nil {
+		return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+	}
+
+	// If there is data it means that the file has already been written.
+	if stat.Size() > 0 {
+		return s.readExitCode()
+	}
+
+	// Read after the process exits.
+	for _ = range time.Tick(5 * time.Second) {
+		if !s.Alive() {
+			break
+		}
+	}
+
+	return s.readExitCode()
+}
+
+// readExitCode parses the state file and returns the exit code of the task. It
+// returns an error if the file can't be read.
+func (s *Spawner) readExitCode() (int, error) {
+	f, err := os.Open(s.StateFile)
+	defer f.Close()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to open %v to read exit code: %v", s.StateFile, err)
+	}
+
+	stat, err := f.Stat()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to stat file %v: %v", s.StateFile, err)
+	}
+
+	if stat.Size() == 0 {
+		return -1, fmt.Errorf("Empty state file: %v", s.StateFile)
+	}
+
+	var exitStatus command.SpawnExitStatus
+	dec := json.NewDecoder(f)
+	if err := dec.Decode(&exitStatus); err != nil {
+		return -1, fmt.Errorf("Failed to parse exit status from %v: %v", s.StateFile, err)
+	}
+
+	return exitStatus.ExitCode, nil
+}
--- a/client/driver/spawn/spawn_posix.go
+++ b/client/driver/spawn/spawn_posix.go
@@ -0,0 +1,14 @@
+// +build !windows
+
+package spawn
+
+import "syscall"
+
+func (s *Spawner) Alive() bool {
+	if s.spawn == nil {
+		return false
+	}
+
+	err := s.spawn.Signal(syscall.Signal(0))
+	return err == nil
+}
--- a/client/driver/spawn/spawn_test.go
+++ b/client/driver/spawn/spawn_test.go
@@ -0,0 +1,300 @@
+package spawn
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestSpawn_NoCmd(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	if err := spawn.Spawn(nil); err == nil {
+		t.Fatalf("Spawn() with no user command should fail")
+	}
+}
+
+func TestSpawn_InvalidCmd(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("foo"))
+	if err := spawn.Spawn(nil); err == nil {
+		t.Fatalf("Spawn() with no invalid command should fail")
+	}
+}
+
+func TestSpawn_SetsLogs(t *testing.T) {
+	// TODO: Figure out why this test fails. If the spawn-daemon directly writes
+	// to the opened stdout file it works but not the user command. Maybe a
+	// flush issue?
+	if runtime.GOOS == "windows" {
+		t.Skip("Test fails on windows; unknown reason. Skipping")
+	}
+
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	exp := "foo"
+	spawn.SetCommand(exec.Command("echo", exp))
+
+	// Create file for stdout.
+	stdout, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(stdout.Name())
+	spawn.SetLogs(&Logs{Stdout: stdout.Name()})
+
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed: %v", err)
+	}
+
+	if code, err := spawn.Wait(); code != 0 && err != nil {
+		t.Fatalf("Wait() returned %v, %v; want 0, nil", code, err)
+	}
+
+	stdout2, err := os.Open(stdout.Name())
+	if err != nil {
+		t.Fatalf("Open() failed: %v", err)
+	}
+
+	data, err := ioutil.ReadAll(stdout2)
+	if err != nil {
+		t.Fatalf("ReadAll() failed: %v", err)
+	}
+
+	act := strings.TrimSpace(string(data))
+	if act != exp {
+		t.Fatalf("Unexpected data written to stdout; got %v; want %v", act, exp)
+	}
+}
+
+func TestSpawn_Callback(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "1"))
+
+	called := false
+	cbErr := fmt.Errorf("ERROR CB")
+	cb := func(_ int) error {
+		called = true
+		return cbErr
+	}
+
+	if err := spawn.Spawn(cb); err == nil {
+		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, cbErr)
+	}
+
+	if !called {
+		t.Fatalf("Spawn(%#v) didn't call callback", cb)
+	}
+}
+
+func TestSpawn_ParentWaitExited(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("echo", "foo"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	time.Sleep(1 * time.Second)
+
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_ParentWait(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_NonParentWaitExited(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("echo", "foo"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	time.Sleep(1 * time.Second)
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_NonParentWait(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	// Need to wait on the spawner, otherwise it becomes a zombie and the test
+	// only finishes after the init process cleans it. This speeds that up.
+	go func() {
+		time.Sleep(3 * time.Second)
+		if _, err := spawn.spawn.Wait(); err != nil {
+			t.FailNow()
+		}
+	}()
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	var spawnPid int
+	cb := func(pid int) error {
+		spawnPid = pid
+		return nil
+	}
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "5"))
+	if err := spawn.Spawn(cb); err != nil {
+		t.Fatalf("Spawn() errored: %v", err)
+	}
+
+	proc, err := os.FindProcess(spawnPid)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if err := proc.Kill(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := proc.Wait(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := spawn.Wait(); err == nil {
+		t.Fatalf("Wait() should have failed: %v", err)
+	}
+}
+
+func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	var spawnPid int
+	cb := func(pid int) error {
+		spawnPid = pid
+		return nil
+	}
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(cb); err != nil {
+		t.Fatalf("Spawn() errored: %v", err)
+	}
+
+	proc, err := os.FindProcess(spawnPid)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if err := proc.Kill(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := proc.Wait(); err != nil {
+		t.FailNow()
+	}
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	if _, err := spawn.Wait(); err == nil {
+		t.Fatalf("Wait() should have failed: %v", err)
+	}
+}
--- a/client/driver/spawn/spawn_windows.go
+++ b/client/driver/spawn/spawn_windows.go
@@ -0,0 +1,21 @@
+package spawn
+
+import "syscall"
+
+const STILL_ACTIVE = 259
+
+func (s *Spawner) Alive() bool {
+	const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE
+	h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid))
+	if e != nil {
+		return false
+	}
+
+	var ec uint32
+	e = syscall.GetExitCodeProcess(h, &ec)
+	if e != nil {
+		return false
+	}
+
+	return ec == STILL_ACTIVE
+}
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -1,579 +0,0 @@
-package executor
-
-import (
-	"bytes"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"os/user"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"syscall"
-
-	"github.com/hashicorp/go-multierror"
-	"github.com/hashicorp/nomad/client/allocdir"
-	"github.com/hashicorp/nomad/client/driver/args"
-	"github.com/hashicorp/nomad/client/driver/environment"
-	"github.com/hashicorp/nomad/command"
-	"github.com/hashicorp/nomad/helper/discover"
-	"github.com/hashicorp/nomad/nomad/structs"
-
-	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
-	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
-)
-
-const (
-	cgroupMount = "/sys/fs/cgroup"
-)
-
-var (
-	// A mapping of directories on the host OS to attempt to embed inside each
-	// task's chroot.
-	chrootEnv = map[string]string{
-		"/bin":     "/bin",
-		"/etc":     "/etc",
-		"/lib":     "/lib",
-		"/lib32":   "/lib32",
-		"/lib64":   "/lib64",
-		"/usr/bin": "/usr/bin",
-		"/usr/lib": "/usr/lib",
-	}
-)
-
-func NewExecutor() Executor {
-	e := LinuxExecutor{}
-
-	// TODO: In a follow-up PR make it so this only happens once per client.
-	// Fingerprinting shouldn't happen per task.
-
-	// Check that cgroups are available.
-	if _, err := os.Stat(cgroupMount); err == nil {
-		e.cgroupEnabled = true
-	}
-
-	return &e
-}
-
-// Linux executor is designed to run on linux kernel 2.8+.
-type LinuxExecutor struct {
-	cmd
-	user *user.User
-
-	// Finger print capabilities.
-	cgroupEnabled bool
-
-	// Isolation configurations.
-	groups   *cgroupConfig.Cgroup
-	alloc    *allocdir.AllocDir
-	taskName string
-	taskDir  string
-
-	// Tracking of child process.
-	spawnChild        exec.Cmd
-	spawnOutputWriter *os.File
-	spawnOutputReader *os.File
-
-	// Track whether there are filesystems mounted in the task dir.
-	mounts bool
-}
-
-func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
-	if resources == nil {
-		return errNoResources
-	}
-
-	if e.cgroupEnabled {
-		return e.configureCgroups(resources)
-	}
-
-	return nil
-}
-
-func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
-	e.taskName = taskName
-	taskDir, ok := alloc.TaskDirs[taskName]
-	if !ok {
-		fmt.Errorf("Couldn't find task directory for task %v", taskName)
-	}
-	e.taskDir = taskDir
-
-	if err := alloc.MountSharedDir(taskName); err != nil {
-		return err
-	}
-
-	if err := alloc.Embed(taskName, chrootEnv); err != nil {
-		return err
-	}
-
-	// Mount dev
-	dev := filepath.Join(taskDir, "dev")
-	if err := os.Mkdir(dev, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
-	}
-
-	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
-		return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
-	}
-
-	// Mount proc
-	proc := filepath.Join(taskDir, "proc")
-	if err := os.Mkdir(proc, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
-	}
-
-	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
-		return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
-	}
-
-	// Set the tasks AllocDir environment variable.
-	env, err := environment.ParseFromList(e.Cmd.Env)
-	if err != nil {
-		return err
-	}
-	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
-	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
-	e.Cmd.Env = env.List()
-
-	e.alloc = alloc
-	e.mounts = true
-	return nil
-}
-
-func (e *LinuxExecutor) cleanTaskDir() error {
-	if e.alloc == nil {
-		return errors.New("ConfigureTaskDir() must be called before Start()")
-	}
-
-	if !e.mounts {
-		return nil
-	}
-
-	// Unmount dev.
-	errs := new(multierror.Error)
-	dev := filepath.Join(e.taskDir, "dev")
-	if err := syscall.Unmount(dev, 0); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
-	}
-
-	// Unmount proc.
-	proc := filepath.Join(e.taskDir, "proc")
-	if err := syscall.Unmount(proc, 0); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
-	}
-
-	e.mounts = false
-	return errs.ErrorOrNil()
-}
-
-func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
-	if !e.cgroupEnabled {
-		return nil
-	}
-
-	e.groups = &cgroupConfig.Cgroup{}
-
-	// Groups will be created in a heiarchy according to the resource being
-	// constrained, current session, and then this unique name. Restraints are
-	// then placed in the corresponding files.
-	// Ex: restricting a process to 2048Mhz CPU and 2MB of memory:
-	//   $ cat /sys/fs/cgroup/cpu/user/1000.user/4.session/<uuid>/cpu.shares
-	//		2028
-	//   $ cat /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/memory.limit_in_bytes
-	//		2097152
-	e.groups.Name = structs.GenerateUUID()
-
-	// TODO: verify this is needed for things like network access
-	e.groups.AllowAllDevices = true
-
-	if resources.MemoryMB > 0 {
-		// Total amount of memory allowed to consume
-		e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
-		// Disable swap to avoid issues on the machine
-		e.groups.MemorySwap = int64(-1)
-	}
-
-	if resources.CPU != 0 {
-		if resources.CPU < 2 {
-			return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
-		}
-
-		// Set the relative CPU shares for this cgroup.
-		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
-		// given process will have at least that amount of resources, but likely
-		// more since it is (probably) rare that the machine will run at 100%
-		// CPU. This scale will cease to work if a node is overprovisioned.
-		e.groups.CpuShares = int64(resources.CPU)
-	}
-
-	if resources.IOPS != 0 {
-		// Validate it is in an acceptable range.
-		if resources.IOPS < 10 || resources.IOPS > 1000 {
-			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
-		}
-
-		e.groups.BlkioWeight = uint16(resources.IOPS)
-	}
-
-	return nil
-}
-
-func (e *LinuxExecutor) runAs(userid string) error {
-	errs := new(multierror.Error)
-
-	// First, try to lookup the user by uid
-	u, err := user.LookupId(userid)
-	if err == nil {
-		e.user = u
-		return nil
-	} else {
-		errs = multierror.Append(errs, err)
-	}
-
-	// Lookup failed, so try by username instead
-	u, err = user.Lookup(userid)
-	if err == nil {
-		e.user = u
-		return nil
-	} else {
-		errs = multierror.Append(errs, err)
-	}
-
-	// If we got here we failed to lookup based on id and username, so we'll
-	// return those errors.
-	return fmt.Errorf("Failed to identify user to run as: %s", errs)
-}
-
-func (e *LinuxExecutor) Start() error {
-	// Run as "nobody" user so we don't leak root privilege to the
-	// spawned process.
-	if err := e.runAs("nobody"); err == nil && e.user != nil {
-		e.cmd.SetUID(e.user.Uid)
-		e.cmd.SetGID(e.user.Gid)
-	}
-
-	if e.alloc == nil {
-		return errors.New("ConfigureTaskDir() must be called before Start()")
-	}
-
-	// Parse the commands arguments and replace instances of Nomad environment
-	// variables.
-	envVars, err := environment.ParseFromList(e.Cmd.Env)
-	if err != nil {
-		return err
-	}
-
-	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
-	if err != nil {
-		return err
-	} else if len(parsedPath) != 1 {
-		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
-	}
-	e.cmd.Path = parsedPath[0]
-
-	combined := strings.Join(e.Cmd.Args, " ")
-	parsed, err := args.ParseAndReplace(combined, envVars.Map())
-	if err != nil {
-		return err
-	}
-	e.Cmd.Args = parsed
-
-	return e.spawnDaemon()
-}
-
-// spawnDaemon executes a double fork to start the user command with proper
-// isolation. Stores the child process for use in Wait.
-func (e *LinuxExecutor) spawnDaemon() error {
-	bin, err := discover.NomadExecutable()
-	if err != nil {
-		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
-	}
-
-	// Serialize the cmd and the cgroup configuration so it can be passed to the
-	// sub-process.
-	var buffer bytes.Buffer
-	enc := json.NewEncoder(&buffer)
-
-	c := command.DaemonConfig{
-		Cmd:        e.cmd.Cmd,
-		Chroot:     e.taskDir,
-		StdoutFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
-		StderrFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		StdinFile:  "/dev/null",
-	}
-	if err := enc.Encode(c); err != nil {
-		return fmt.Errorf("Failed to serialize daemon configuration: %v", err)
-	}
-
-	// Create a pipe to capture Stdout.
-	pr, pw, err := os.Pipe()
-	if err != nil {
-		return err
-	}
-	e.spawnOutputWriter = pw
-	e.spawnOutputReader = pr
-
-	// Call ourselves using a hidden flag. The new instance of nomad will join
-	// the passed cgroup, forkExec the cmd, and output status codes through
-	// Stdout.
-	escaped := strconv.Quote(buffer.String())
-	spawn := exec.Command(bin, "spawn-daemon", escaped)
-	spawn.Stdout = e.spawnOutputWriter
-
-	// Capture its Stdin.
-	spawnStdIn, err := spawn.StdinPipe()
-	if err != nil {
-		return err
-	}
-
-	if err := spawn.Start(); err != nil {
-		fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
-	}
-
-	// Join the spawn-daemon to the cgroup.
-	if e.groups != nil {
-		manager := cgroupFs.Manager{}
-		manager.Cgroups = e.groups
-
-		// Apply will place the current pid into the tasks file for each of the
-		// created cgroups:
-		//  /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/tasks
-		//
-		// Apply requires superuser permissions, and may fail if Nomad is not run with
-		// the required permissions
-		if err := manager.Apply(spawn.Process.Pid); err != nil {
-			errs := new(multierror.Error)
-			errs = multierror.Append(errs, fmt.Errorf("Failed to join spawn-daemon to the cgroup (config => %+v): %v", manager.Cgroups, err))
-
-			if err := sendAbortCommand(spawnStdIn); err != nil {
-				errs = multierror.Append(errs, err)
-			}
-
-			return errs
-		}
-	}
-
-	// Tell it to start.
-	if err := sendStartCommand(spawnStdIn); err != nil {
-		return err
-	}
-
-	// Parse the response.
-	dec := json.NewDecoder(e.spawnOutputReader)
-	var resp command.SpawnStartStatus
-	if err := dec.Decode(&resp); err != nil {
-		return fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
-	}
-
-	if resp.ErrorMsg != "" {
-		return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
-	}
-
-	e.spawnChild = *spawn
-	return nil
-}
-
-func sendStartCommand(w io.Writer) error {
-	enc := json.NewEncoder(w)
-	if err := enc.Encode(true); err != nil {
-		return fmt.Errorf("Failed to serialize start command: %v", err)
-	}
-
-	return nil
-}
-
-func sendAbortCommand(w io.Writer) error {
-	enc := json.NewEncoder(w)
-	if err := enc.Encode(false); err != nil {
-		return fmt.Errorf("Failed to serialize abort command: %v", err)
-	}
-
-	return nil
-}
-
-// Open's behavior is to kill all processes associated with the id and return an
-// error. This is done because it is not possible to re-attach to the
-// spawn-daemon's stdout to retrieve status messages.
-func (e *LinuxExecutor) Open(id string) error {
-	parts := strings.SplitN(id, ":", 2)
-	if len(parts) != 2 {
-		return fmt.Errorf("Invalid id: %v", id)
-	}
-
-	switch parts[0] {
-	case "PID":
-		pid, err := strconv.Atoi(parts[1])
-		if err != nil {
-			return fmt.Errorf("Invalid id: failed to parse pid %v", parts[1])
-		}
-
-		process, err := os.FindProcess(pid)
-		if err != nil {
-			return fmt.Errorf("Failed to find Pid %v: %v", pid, err)
-		}
-
-		if err := process.Kill(); err != nil {
-			return fmt.Errorf("Failed to kill Pid %v: %v", pid, err)
-		}
-	case "CGROUP":
-		if !e.cgroupEnabled {
-			return errors.New("Passed a a cgroup identifier, but cgroups are disabled")
-		}
-
-		// De-serialize the cgroup configuration.
-		dec := json.NewDecoder(strings.NewReader(parts[1]))
-		var groups cgroupConfig.Cgroup
-		if err := dec.Decode(&groups); err != nil {
-			return fmt.Errorf("Failed to parse cgroup configuration: %v", err)
-		}
-
-		e.groups = &groups
-		if err := e.destroyCgroup(); err != nil {
-			return err
-		}
-		// TODO: cleanTaskDir is a little more complicated here because the OS
-		// may have already unmounted in the case of a restart. Need to scan.
-	default:
-		return fmt.Errorf("Invalid id type: %v", parts[0])
-	}
-
-	return errors.New("Could not re-open to id (intended).")
-}
-
-func (e *LinuxExecutor) Wait() error {
-	if e.spawnChild.Process == nil {
-		return errors.New("Can not find child to wait on")
-	}
-
-	defer e.spawnOutputWriter.Close()
-	defer e.spawnOutputReader.Close()
-
-	errs := new(multierror.Error)
-	if err := e.spawnChild.Wait(); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Process.Pid, err))
-	}
-
-	// If they fork/exec and then exit, wait will return but they will be still
-	// running processes so we need to kill the full cgroup.
-	if e.groups != nil {
-		if err := e.destroyCgroup(); err != nil {
-			errs = multierror.Append(errs, err)
-		}
-	}
-
-	if err := e.cleanTaskDir(); err != nil {
-		errs = multierror.Append(errs, err)
-	}
-
-	return errs.ErrorOrNil()
-}
-
-// If cgroups are used, the ID is the cgroup structurue. Otherwise, it is the
-// PID of the spawn-daemon process. An error is returned if the process was
-// never started.
-func (e *LinuxExecutor) ID() (string, error) {
-	if e.spawnChild.Process != nil {
-		if e.cgroupEnabled && e.groups != nil {
-			// Serialize the cgroup structure so it can be undone on suabsequent
-			// opens.
-			var buffer bytes.Buffer
-			enc := json.NewEncoder(&buffer)
-			if err := enc.Encode(e.groups); err != nil {
-				return "", fmt.Errorf("Failed to serialize daemon configuration: %v", err)
-			}
-
-			return fmt.Sprintf("CGROUP:%v", buffer.String()), nil
-		}
-
-		return fmt.Sprintf("PID:%d", e.spawnChild.Process.Pid), nil
-	}
-
-	return "", fmt.Errorf("Process has finished or was never started")
-}
-
-func (e *LinuxExecutor) Shutdown() error {
-	return e.ForceStop()
-}
-
-func (e *LinuxExecutor) ForceStop() error {
-	if e.spawnOutputReader != nil {
-		e.spawnOutputReader.Close()
-	}
-
-	if e.spawnOutputWriter != nil {
-		e.spawnOutputWriter.Close()
-	}
-
-	// If the task is not running inside a cgroup then just the spawn-daemon child is killed.
-	// TODO: Find a good way to kill the children of the spawn-daemon.
-	if e.groups == nil {
-		if err := e.spawnChild.Process.Kill(); err != nil {
-			return fmt.Errorf("Failed to kill child (%v): %v", e.spawnChild.Process.Pid, err)
-		}
-
-		return nil
-	}
-
-	errs := new(multierror.Error)
-	if e.groups != nil {
-		if err := e.destroyCgroup(); err != nil {
-			errs = multierror.Append(errs, err)
-		}
-	}
-
-	if err := e.cleanTaskDir(); err != nil {
-		errs = multierror.Append(errs, err)
-	}
-
-	return errs.ErrorOrNil()
-}
-
-func (e *LinuxExecutor) destroyCgroup() error {
-	if e.groups == nil {
-		return errors.New("Can't destroy: cgroup configuration empty")
-	}
-
-	manager := cgroupFs.Manager{}
-	manager.Cgroups = e.groups
-	pids, err := manager.GetPids()
-	if err != nil {
-		return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err)
-	}
-
-	errs := new(multierror.Error)
-	for _, pid := range pids {
-		process, err := os.FindProcess(pid)
-		if err != nil {
-			multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err))
-			continue
-		}
-
-		if err := process.Kill(); err != nil {
-			multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
-			continue
-		}
-
-		if _, err := process.Wait(); err != nil {
-			multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err))
-			continue
-		}
-	}
-
-	// Remove the cgroup.
-	if err := manager.Destroy(); err != nil {
-		multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err))
-	}
-
-	if len(errs.Errors) != 0 {
-		return fmt.Errorf("Failed to destroy cgroup: %v", errs)
-	}
-
-	return nil
-}
-
-func (e *LinuxExecutor) Command() *cmd {
-	return &e.cmd
-}
--- a/client/fingerprint/env_aws.go
+++ b/client/fingerprint/env_aws.go
@@ -15,6 +15,10 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )

+// This is where the AWS metadata server normally resides. We hardcode the
+// "instance" path as well since it's the only one we access here.
+const DEFAULT_AWS_URL = "http//169.254.169.254/latest/meta-data/"
+
 // map of instance type to approximate speed, in Mbits/s
 // http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797
 // which itself cites these sources:
@@ -89,7 +93,7 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node)
 	}
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}

 	// assume 2 seconds is enough time for inside AWS network
@@ -161,7 +165,7 @@ func isAWS() bool {
 	// provide their own
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}

 	// assume 2 seconds is enough time for inside AWS network
@@ -205,7 +209,7 @@ func (f *EnvAWSFingerprint) linkSpeed() int {
 	// the network speed
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}

 	// assume 2 seconds is enough time for inside AWS network
--- a/client/testutil/driver_compatible.go
+++ b/client/testutil/driver_compatible.go
@@ -8,8 +8,14 @@ import (
 )

 func ExecCompatible(t *testing.T) {
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		t.Skip("Must be root on non-windows environments to run test")
+	if runtime.GOOS != "linux" || syscall.Geteuid() != 0 {
+		t.Skip("Test only available running as root on linux")
+	}
+}
+
+func JavaCompatible(t *testing.T) {
+	if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
+		t.Skip("Test only available when running as root on linux")
 	}
 }

--- a/command/init.go
+++ b/command/init.go
@@ -104,6 +104,17 @@ job "example" {
 		# Defaults to 1
 		# count = 1

+		# Restart Policy - This block defines the restart policy for TaskGroups,
+		# the attempts value defines the number of restarts Nomad will do if Tasks
+		# in this TaskGroup fails in a rolling window of interval duration
+		# The delay value makes Nomad wait for that duration to restart after a Task
+		# fails or crashes.
+		restart {
+			interval = "5m"
+			attempts = 10
+			delay = "25s"
+		}
+
 		# Define a task to run
 		task "redis" {
 			# Use Docker to run the task.
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -2,19 +2,19 @@ package command

 import (
 	"encoding/json"
+	"fmt"
+	"io"
 	"os"
+	"os/exec"
+	"strconv"
 	"strings"
+	"syscall"
 )

 type SpawnDaemonCommand struct {
 	Meta
-}
-
-// Status of executing the user's command.
-type SpawnStartStatus struct {
-	// ErrorMsg will be empty if the user command was started successfully.
-	// Otherwise it will have an error message.
-	ErrorMsg string
+	config   *DaemonConfig
+	exitFile io.WriteCloser
 }

 func (c *SpawnDaemonCommand) Help() string {
@@ -23,15 +23,15 @@ Usage: nomad spawn-daemon [options] <daemon_config>

  INTERNAL ONLY

-  Spawns a daemon process optionally inside a cgroup. The required daemon_config is a json
-  encoding of the DaemonConfig struct containing the isolation configuration and command to run.
-  SpawnStartStatus is json serialized to Stdout upon running the user command or if any error
-  prevents its execution. If there is no error, the process waits on the users
-  command and then json serializes  SpawnExitStatus to Stdout after its termination.
-
-General Options:
-
-  ` + generalOptionsUsage()
+  Spawns a daemon process by double forking. The required daemon_config is a
+  json encoding of the DaemonConfig struct containing the isolation
+  configuration and command to run. SpawnStartStatus is json serialized to
+  stdout upon running the user command or if any error prevents its execution.
+  If there is no error, the process waits on the users command. Once the user
+  command exits, the exit code is written to a file specified in the
+  daemon_config and this process exits with the same exit status as the user
+  command.
+  `

 	return strings.TrimSpace(helpText)
 }
@@ -40,6 +40,154 @@ func (c *SpawnDaemonCommand) Synopsis() string {
 	return "Spawn a daemon command with configurable isolation."
 }

+// Status of executing the user's command.
+type SpawnStartStatus struct {
+	// The PID of the user's command.
+	UserPID int
+
+	// ErrorMsg will be empty if the user command was started successfully.
+	// Otherwise it will have an error message.
+	ErrorMsg string
+}
+
+// Exit status of the user's command.
+type SpawnExitStatus struct {
+	// The exit code of the user's command.
+	ExitCode int
+}
+
+// Configuration for the command to start as a daemon.
+type DaemonConfig struct {
+	exec.Cmd
+
+	// The filepath to write the exit status to.
+	ExitStatusFile string
+
+	// The paths, if not /dev/null, must be either in the tasks root directory
+	// or in the shared alloc directory.
+	StdoutFile string
+	StdinFile  string
+	StderrFile string
+
+	// An optional path specifying the directory to chroot the process in.
+	Chroot string
+}
+
+// Whether to start the user command or abort.
+type TaskStart bool
+
+// parseConfig reads the DaemonConfig from the passed arguments. If not
+// successful, an error is returned.
+func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) {
+	flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
+	flags.Usage = func() { c.Ui.Output(c.Help()) }
+	if err := flags.Parse(args); err != nil {
+		return nil, fmt.Errorf("failed to parse args: %v", err)
+	}
+
+	// Check that we got json input.
+	args = flags.Args()
+	if len(args) != 1 {
+		return nil, fmt.Errorf("incorrect number of args; got %v; want 1", len(args))
+	}
+	jsonInput, err := strconv.Unquote(args[0])
+	if err != nil {
+		return nil, fmt.Errorf("Failed to unquote json input: %v", err)
+	}
+
+	// De-serialize the passed command.
+	var config DaemonConfig
+	dec := json.NewDecoder(strings.NewReader(jsonInput))
+	if err := dec.Decode(&config); err != nil {
+		return nil, err
+	}
+
+	return &config, nil
+}
+
+// configureLogs creates the log files and redirects the process
+// stdin/stderr/stdout to them. If unsuccessful, an error is returned.
+func (c *SpawnDaemonCommand) configureLogs() error {
+	if len(c.config.StdoutFile) != 0 {
+		stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+		}
+
+		c.config.Cmd.Stdout = stdo
+	}
+
+	if len(c.config.StderrFile) != 0 {
+		stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+		}
+		c.config.Cmd.Stderr = stde
+	}
+
+	if len(c.config.StdinFile) != 0 {
+		stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+		}
+		c.config.Cmd.Stdin = stdi
+	}
+
+	return nil
+}
+
+func (c *SpawnDaemonCommand) Run(args []string) int {
+	var err error
+	c.config, err = c.parseConfig(args)
+	if err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Open the file we will be using to write exit codes to. We do this early
+	// to ensure that we don't start the user process when we can't capture its
+	// exit status.
+	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_WRONLY, 0666)
+	if err != nil {
+		return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1)
+	}
+
+	// Isolate the user process.
+	if err := c.isolateCmd(); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Redirect logs.
+	if err := c.configureLogs(); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Chroot jail the process and set its working directory.
+	c.configureChroot()
+
+	// Wait to get the start command.
+	var start TaskStart
+	dec := json.NewDecoder(os.Stdin)
+	if err := dec.Decode(&start); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Aborted by Nomad process.
+	if !start {
+		return 0
+	}
+
+	// Spawn the user process.
+	if err := c.config.Cmd.Start(); err != nil {
+		return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
+	}
+
+	// Indicate that the command was started successfully.
+	c.outputStartStatus(nil, 0)
+
+	// Wait and then output the exit status.
+	return c.writeExitStatus(c.config.Cmd.Wait())
+}
+
 // outputStartStatus is a helper function that outputs a SpawnStartStatus to
 // Stdout with the passed error, which may be nil to indicate no error. It
 // returns the passed status.
@@ -51,6 +199,36 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}

+	if c.config != nil && c.config.Cmd.Process != nil {
+		startStatus.UserPID = c.config.Process.Pid
+	}
+
 	enc.Encode(startStatus)
 	return status
 }
+
+// writeExitStatus takes in the error result from calling wait and writes out
+// the exit status to a file. It returns the same exit status as the user
+// command.
+func (c *SpawnDaemonCommand) writeExitStatus(exit error) int {
+	// Parse the exit code.
+	exitStatus := &SpawnExitStatus{}
+	if exit != nil {
+		// Default to exit code 1 if we can not get the actual exit code.
+		exitStatus.ExitCode = 1
+
+		if exiterr, ok := exit.(*exec.ExitError); ok {
+			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+				exitStatus.ExitCode = status.ExitStatus()
+			}
+		}
+	}
+
+	if c.exitFile != nil {
+		enc := json.NewEncoder(c.exitFile)
+		enc.Encode(exitStatus)
+		c.exitFile.Close()
+	}
+
+	return exitStatus.ExitCode
+}
--- a/command/spawn_daemon_darwin.go
+++ b/command/spawn_daemon_darwin.go
@@ -0,0 +1,4 @@
+package command
+
+// No chroot on darwin.
+func (c *SpawnDaemonCommand) configureChroot() {}
--- a/command/spawn_daemon_linux.go
+++ b/command/spawn_daemon_linux.go
@@ -1,115 +1,16 @@
 package command

-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"strconv"
-	"strings"
-	"syscall"
-)
+import "syscall"

-// Configuration for the command to start as a daemon.
-type DaemonConfig struct {
-	exec.Cmd
+// configureChroot enters the user command into a chroot if specified in the
+// config and on an OS that supports Chroots.
+func (c *SpawnDaemonCommand) configureChroot() {
+	if len(c.config.Chroot) != 0 {
+		if c.config.Cmd.SysProcAttr == nil {
+			c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
+		}

-	// The paths, if not /dev/null, must be either in the tasks root directory
-	// or in the shared alloc directory.
-	StdoutFile string
-	StdinFile  string
-	StderrFile string
-
-	Chroot string
-}
-
-// Whether to start the user command or abort.
-type TaskStart bool
-
-func (c *SpawnDaemonCommand) Run(args []string) int {
-	flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
-	flags.Usage = func() { c.Ui.Output(c.Help()) }
-
-	if err := flags.Parse(args); err != nil {
-		return 1
-	}
-
-	// Check that we got json input.
-	args = flags.Args()
-	if len(args) != 1 {
-		c.Ui.Error(c.Help())
-		return 1
-	}
-	jsonInput, err := strconv.Unquote(args[0])
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Failed to unquote json input: %v", err), 1)
-	}
-
-	// De-serialize the passed command.
-	var cmd DaemonConfig
-	dec := json.NewDecoder(strings.NewReader(jsonInput))
-	if err := dec.Decode(&cmd); err != nil {
-		return c.outputStartStatus(err, 1)
-	}
-
-	// Isolate the user process.
-	if _, err := syscall.Setsid(); err != nil {
-		return c.outputStartStatus(fmt.Errorf("Failed setting sid: %v", err), 1)
-	}
-
-	syscall.Umask(0)
-
-	// Redirect logs.
-	stdo, err := os.OpenFile(cmd.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdout: %v", err), 1)
-	}
-
-	stde, err := os.OpenFile(cmd.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stderr: %v", err), 1)
-	}
-
-	stdi, err := os.OpenFile(cmd.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdin: %v", err), 1)
-	}
-
-	cmd.Cmd.Stdout = stdo
-	cmd.Cmd.Stderr = stde
-	cmd.Cmd.Stdin = stdi
-
-	// Chroot jail the process and set its working directory.
-	if cmd.Cmd.SysProcAttr == nil {
-		cmd.Cmd.SysProcAttr = &syscall.SysProcAttr{}
-	}
-
-	cmd.Cmd.SysProcAttr.Chroot = cmd.Chroot
-	cmd.Cmd.Dir = "/"
-
-	// Wait to get the start command.
-	var start TaskStart
-	dec = json.NewDecoder(os.Stdin)
-	if err := dec.Decode(&start); err != nil {
-		return c.outputStartStatus(err, 1)
-	}
-
-	if !start {
-		return 0
-	}
-
-	// Spawn the user process.
-	if err := cmd.Cmd.Start(); err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
-	}
-
-	// Indicate that the command was started successfully.
-	c.outputStartStatus(nil, 0)
-
-	// Wait and then output the exit status.
-	if err := cmd.Wait(); err != nil {
-		return 1
-	}
-
-	return 0
+		c.config.Cmd.SysProcAttr.Chroot = c.config.Chroot
+		c.config.Cmd.Dir = "/"
+	}
 }
--- a/command/spawn_daemon_test.go
+++ b/command/spawn_daemon_test.go
@@ -0,0 +1,48 @@
+package command
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os/exec"
+	"testing"
+)
+
+type nopCloser struct {
+	io.ReadWriter
+}
+
+func (n *nopCloser) Close() error {
+	return nil
+}
+
+func TestSpawnDaemon_WriteExitStatus(t *testing.T) {
+	// Check if there is python.
+	path, err := exec.LookPath("python")
+	if err != nil {
+		t.Skip("python not detected")
+	}
+
+	var b bytes.Buffer
+	daemon := &SpawnDaemonCommand{exitFile: &nopCloser{&b}}
+
+	code := 3
+	cmd := exec.Command(path, "./test-resources/exiter.py", fmt.Sprintf("%d", code))
+	err = cmd.Run()
+	actual := daemon.writeExitStatus(err)
+	if actual != code {
+		t.Fatalf("writeExitStatus(%v) returned %v; want %v", err, actual, code)
+	}
+
+	// De-serialize the passed command.
+	var exitStatus SpawnExitStatus
+	dec := json.NewDecoder(&b)
+	if err := dec.Decode(&exitStatus); err != nil {
+		t.Fatalf("failed to decode exit status: %v", err)
+	}
+
+	if exitStatus.ExitCode != code {
+		t.Fatalf("writeExitStatus(%v) wrote exit status %v; want %v", err, exitStatus.ExitCode, code)
+	}
+}
--- a/command/spawn_daemon_universal.go
+++ b/command/spawn_daemon_universal.go
@@ -1,9 +0,0 @@
-// +build !linux
-
-package command
-
-import "errors"
-
-func (c *SpawnDaemonCommand) Run(args []string) int {
-	return c.outputStartStatus(errors.New("spawn-daemon not supported"), 1)
-}
--- a/command/spawn_daemon_unix.go
+++ b/command/spawn_daemon_unix.go
@@ -0,0 +1,16 @@
+// +build !windows
+
+package command
+
+import "syscall"
+
+// isolateCmd sets the session id for the process and the umask.
+func (c *SpawnDaemonCommand) isolateCmd() error {
+	if c.config.Cmd.SysProcAttr == nil {
+		c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+
+	c.config.Cmd.SysProcAttr.Setsid = true
+	syscall.Umask(0)
+	return nil
+}
--- a/command/spawn_daemon_windows.go
+++ b/command/spawn_daemon_windows.go
@@ -0,0 +1,7 @@
+// build !linux !darwin
+
+package command
+
+// No isolation on Windows.
+func (c *SpawnDaemonCommand) isolateCmd() error { return nil }
+func (c *SpawnDaemonCommand) configureChroot()  {}
--- a/command/test-resources/exiter.py
+++ b/command/test-resources/exiter.py
@@ -0,0 +1,3 @@
+import sys
+
+sys.exit(int(sys.argv[1]))
--- a/helper/discover/discover.go
+++ b/helper/discover/discover.go
@@ -3,18 +3,21 @@ package discover
 import (
 	"fmt"
 	"os"
+	"os/exec"
 	"path/filepath"
+	"runtime"

 	"github.com/kardianos/osext"
 )

-const (
-	nomadExe = "nomad"
-)
-
 // Checks the current executable, then $GOPATH/bin, and finally the CWD, in that
 // order. If it can't be found, an error is returned.
 func NomadExecutable() (string, error) {
+	nomadExe := "nomad"
+	if runtime.GOOS == "windows" {
+		nomadExe = "nomad.exe"
+	}
+
 	// Check the current executable.
 	bin, err := osext.Executable()
 	if err != nil {
@@ -25,6 +28,11 @@ func NomadExecutable() (string, error) {
 		return bin, nil
 	}

+	// Check the $PATH
+	if bin, err := exec.LookPath(nomadExe); err == nil {
+		return bin, nil
+	}
+
 	// Check the $GOPATH.
 	bin = filepath.Join(os.Getenv("GOPATH"), "bin", nomadExe)
 	if _, err := os.Stat(bin); err == nil {
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -124,7 +124,7 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
 		}
 	}

-	// If we have tasks outside, do those
+	// If we have tasks outside, create TaskGroups for them
 	if o := obj.Get("task", false); o != nil {
 		var tasks []*structs.Task
 		if err := parseTasks(&tasks, o); err != nil {
@@ -134,9 +134,10 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
 		result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2)
 		for i, t := range tasks {
 			result.TaskGroups[i] = &structs.TaskGroup{
-				Name:  t.Name,
-				Count: 1,
-				Tasks: []*structs.Task{t},
+				Name:          t.Name,
+				Count:         1,
+				Tasks:         []*structs.Task{t},
+				RestartPolicy: structs.NewRestartPolicy(result.Type),
 			}
 		}
 	}
@@ -180,6 +181,7 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 		delete(m, "constraint")
 		delete(m, "meta")
 		delete(m, "task")
+		delete(m, "restart")

 		// Default count to 1 if not specified
 		if _, ok := m["count"]; !ok {
@@ -199,6 +201,11 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 				return err
 			}
 		}
+		g.RestartPolicy = structs.NewRestartPolicy(result.Type)
+
+		if err := parseRestartPolicy(g.RestartPolicy, o); err != nil {
+			return err
+		}

 		// Parse out meta fields. These are in HCL as a list so we need
 		// to iterate over them and merge them.
@@ -228,6 +235,42 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 	return nil
 }

+func parseRestartPolicy(result *structs.RestartPolicy, obj *hclobj.Object) error {
+	var restartHclObj *hclobj.Object
+	var m map[string]interface{}
+	if restartHclObj = obj.Get("restart", false); restartHclObj == nil {
+		return nil
+	}
+	if err := hcl.DecodeObject(&m, restartHclObj); err != nil {
+		return err
+	}
+
+	if delay, ok := m["delay"]; ok {
+		d, err := toDuration(delay)
+		if err != nil {
+			return fmt.Errorf("Invalid Delay time in restart policy: %v", err)
+		}
+		result.Delay = d
+	}
+
+	if interval, ok := m["interval"]; ok {
+		i, err := toDuration(interval)
+		if err != nil {
+			return fmt.Errorf("Invalid Interval time in restart policy: %v", err)
+		}
+		result.Interval = i
+	}
+
+	if attempts, ok := m["attempts"]; ok {
+		a, err := toInteger(attempts)
+		if err != nil {
+			return fmt.Errorf("Invalid value in attempts: %v", err)
+		}
+		result.Attempts = a
+	}
+	return nil
+}
+
 func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error {
 	for _, o := range obj.Elem(false) {
 		var m map[string]interface{}
@@ -455,19 +498,11 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
 		}
 		for _, key := range []string{"stagger", "Stagger"} {
 			if raw, ok := m[key]; ok {
-				switch v := raw.(type) {
-				case string:
-					dur, err := time.ParseDuration(v)
-					if err != nil {
-						return fmt.Errorf("invalid stagger time '%s'", raw)
-					}
-					m[key] = dur
-				case int:
-					m[key] = time.Duration(v) * time.Second
-				default:
-					return fmt.Errorf("invalid type for stagger time '%s'",
-						raw)
+				staggerTime, err := toDuration(raw)
+				if err != nil {
+					return fmt.Errorf("Invalid stagger time: %v", err)
 				}
+				m[key] = staggerTime
 			}
 		}

@@ -477,3 +512,35 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
 	}
 	return nil
 }
+
+func toDuration(value interface{}) (time.Duration, error) {
+	var dur time.Duration
+	var err error
+	switch v := value.(type) {
+	case string:
+		dur, err = time.ParseDuration(v)
+	case int:
+		dur = time.Duration(v) * time.Second
+	default:
+		err = fmt.Errorf("Invalid time %s", value)
+	}
+
+	return dur, err
+}
+
+func toInteger(value interface{}) (int, error) {
+	var integer int
+	var err error
+	switch v := value.(type) {
+	case string:
+		var i int64
+		i, err = strconv.ParseInt(v, 10, 32)
+		integer = int(i)
+	case int:
+		integer = v
+	default:
+		err = fmt.Errorf("Value: %v can't be parsed into int", value)
+	}
+
+	return integer, err
+}
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -48,6 +48,11 @@ func TestParse(t *testing.T) {
 					&structs.TaskGroup{
 						Name:  "outside",
 						Count: 1,
+						RestartPolicy: &structs.RestartPolicy{
+							Attempts: 2,
+							Interval: 1 * time.Minute,
+							Delay:    15 * time.Second,
+						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "outside",
@@ -77,6 +82,11 @@ func TestParse(t *testing.T) {
 							"elb_interval": "10",
 							"elb_checks":   "3",
 						},
+						RestartPolicy: &structs.RestartPolicy{
+							Interval: 10 * time.Minute,
+							Attempts: 5,
+							Delay:    15 * time.Second,
+						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "binstore",
--- a/jobspec/test-fixtures/basic.hcl
+++ b/jobspec/test-fixtures/basic.hcl
@@ -31,6 +31,11 @@ job "binstore-storagelocker" {

    group "binsl" {
        count = 5
+        restart {
+            attempts = 5
+            interval = "10m"
+            delay = "15s"
+        }
        task "binstore" {
            driver = "docker"
            config {
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -5,6 +5,7 @@ import (

 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )

 // Alloc endpoint is used for manipulating allocations
@@ -19,35 +20,45 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 	}
 	defer metrics.MeasureSince([]string{"nomad", "alloc", "list"}, time.Now())

-	// Capture all the allocations
-	snap, err := a.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Allocs()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "allocs"}),
+		run: func() error {
+			// Capture all the allocations
+			snap, err := a.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Allocs()
+			if err != nil {
+				return err
+			}

-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		alloc := raw.(*structs.Allocation)
-		reply.Allocations = append(reply.Allocations, alloc.Stub())
-	}
+			var allocs []*structs.AllocListStub
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				alloc := raw.(*structs.Allocation)
+				allocs = append(allocs, alloc.Stub())
+			}
+			reply.Allocations = allocs

-	// Use the last index that affected the jobs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index

-	// Set the query response
-	a.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			a.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return a.srv.blockingRPC(&opts)
 }

 // GetAlloc is used to lookup a particular allocation
@@ -58,30 +69,38 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "alloc", "get_alloc"}, time.Now())

-	// Lookup the allocation
-	snap, err := a.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.AllocByID(args.AllocID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Alloc: args.AllocID}),
+		run: func() error {
+			// Lookup the allocation
+			snap, err := a.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.AllocByID(args.AllocID)
+			if err != nil {
+				return err
+			}

-	// Setup the output
-	if out != nil {
-		reply.Alloc = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("allocs")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			reply.Alloc = out
+			if out != nil {
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("allocs")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}

-	// Set the query response
-	a.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			a.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return a.srv.blockingRPC(&opts)
 }
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -3,6 +3,7 @@ package nomad
 import (
 	"reflect"
 	"testing"
+	"time"

 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/mock"
@@ -44,6 +45,74 @@ func TestAllocEndpoint_List(t *testing.T) {
 	}
 }

+func TestAllocEndpoint_List_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the alloc
+	alloc := mock.Alloc()
+
+	// Upsert alloc triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertAllocs(2, []*structs.Allocation{alloc}); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.AllocListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.AllocListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+
+	// Client updates trigger watches
+	alloc2 := mock.Alloc()
+	alloc2.ID = alloc.ID
+	alloc2.ClientStatus = structs.AllocClientStatusRunning
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateAllocFromClient(3, alloc2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.AllocListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID ||
+		resp2.Allocations[0].ClientStatus != structs.AllocClientStatusRunning {
+		t.Fatalf("bad: %#v", resp2.Allocations)
+	}
+}
+
 func TestAllocEndpoint_GetAlloc(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
@@ -75,3 +144,55 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
 		t.Fatalf("bad: %#v", resp.Alloc)
 	}
 }
+
+func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the allocs
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+
+	// First create an unrelated alloc
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Create the alloc we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the jobs
+	get := &structs.AllocSpecificRequest{
+		AllocID: alloc2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	var resp structs.SingleAllocResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.GetAlloc", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
+	}
+	if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID {
+		t.Fatalf("bad: %#v", resp.Alloc)
+	}
+}
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -6,6 +6,7 @@ import (

 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )

 const (
@@ -26,32 +27,40 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())

-	// Look for the job
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.EvalByID(args.EvalID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Eval: args.EvalID}),
+		run: func() error {
+			// Look for the job
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.EvalByID(args.EvalID)
+			if err != nil {
+				return err
+			}

-	// Setup the output
-	if out != nil {
-		reply.Eval = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("evals")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			reply.Eval = out
+			if out != nil {
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("evals")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}

-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }

 // Dequeue is used to dequeue a pending evaluation
@@ -219,35 +228,45 @@ func (e *Eval) List(args *structs.EvalListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())

-	// Scan all the evaluations
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Evals()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "evals"}),
+		run: func() error {
+			// Scan all the evaluations
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Evals()
+			if err != nil {
+				return err
+			}

-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		eval := raw.(*structs.Evaluation)
-		reply.Evaluations = append(reply.Evaluations, eval)
-	}
+			var evals []*structs.Evaluation
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				eval := raw.(*structs.Evaluation)
+				evals = append(evals, eval)
+			}
+			reply.Evaluations = evals

-	// Use the last index that affected the jobs table
-	index, err := snap.Index("evals")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("evals")
+			if err != nil {
+				return err
+			}
+			reply.Index = index

-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }

 // Allocations is used to list the allocations for an evaluation
@@ -258,32 +277,40 @@ func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())

-	// Capture the allocations
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	allocs, err := snap.AllocsByEval(args.EvalID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocEval: args.EvalID}),
+		run: func() error {
+			// Capture the allocations
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			allocs, err := snap.AllocsByEval(args.EvalID)
+			if err != nil {
+				return err
+			}

-	// Convert to a stub
-	if len(allocs) > 0 {
-		reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
-		for _, alloc := range allocs {
-			reply.Allocations = append(reply.Allocations, alloc.Stub())
-		}
-	}
+			// Convert to a stub
+			if len(allocs) > 0 {
+				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
+				for _, alloc := range allocs {
+					reply.Allocations = append(reply.Allocations, alloc.Stub())
+				}
+			}

-	// Use the last index that affected the allocs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the allocs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index

-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -51,6 +51,83 @@ func TestEvalEndpoint_GetEval(t *testing.T) {
 	}
 }

+func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the evals
+	eval1 := mock.Eval()
+	eval2 := mock.Eval()
+
+	// First create an unrelated eval
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertEvals(100, []*structs.Evaluation{eval1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert the eval we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertEvals(200, []*structs.Evaluation{eval2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the eval
+	req := &structs.EvalSpecificRequest{
+		EvalID: eval2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	var resp structs.SingleEvalResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
+	}
+	if resp.Eval == nil || resp.Eval.ID != eval2.ID {
+		t.Fatalf("bad: %#v", resp.Eval)
+	}
+
+	// Eval delete triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.DeleteEval(300, []string{eval2.ID}, []string{})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	var resp2 structs.SingleEvalResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Eval != nil {
+		t.Fatalf("bad: %#v", resp2.Eval)
+	}
+}
+
 func TestEvalEndpoint_Dequeue(t *testing.T) {
 	s1 := testServer(t, func(c *Config) {
 		c.NumSchedulers = 0 // Prevent automatic dequeue
@@ -334,6 +411,70 @@ func TestEvalEndpoint_List(t *testing.T) {
 	}
 }

+func TestEvalEndpoint_List_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the ieval
+	eval := mock.Eval()
+
+	// Upsert eval triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertEvals(2, []*structs.Evaluation{eval}); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.EvalListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.EvalListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Evaluations) != 1 || resp.Evaluations[0].ID != eval.ID {
+		t.Fatalf("bad: %#v", resp.Evaluations)
+	}
+
+	// Eval deletion triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteEval(3, []string{eval.ID}, nil); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.EvalListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Evaluations) != 0 {
+		t.Fatalf("bad: %#v", resp2.Evaluations)
+	}
+}
+
 func TestEvalEndpoint_Allocations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
@@ -368,3 +509,55 @@ func TestEvalEndpoint_Allocations(t *testing.T) {
 		t.Fatalf("bad: %#v", resp.Allocations)
 	}
 }
+
+func TestEvalEndpoint_Allocations_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the allocs
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+
+	// Upsert an unrelated alloc first
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert an alloc which will trigger the watch later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the eval
+	get := &structs.EvalSpecificRequest{
+		EvalID: alloc2.EvalID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	var resp structs.EvalAllocationsResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.Allocations", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+}
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -6,6 +6,7 @@ import (

 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )

 // Job endpoint is used for job interactions
@@ -180,32 +181,41 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now())

-	// Look for the job
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.JobByID(args.JobID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Job: args.JobID}),
+		run: func() error {

-	// Setup the output
-	if out != nil {
-		reply.Job = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("jobs")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Look for the job
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.JobByID(args.JobID)
+			if err != nil {
+				return err
+			}

-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Setup the output
+			reply.Job = out
+			if out != nil {
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("jobs")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
+
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return j.srv.blockingRPC(&opts)
 }

 // List is used to list the jobs registered in the system
@@ -216,35 +226,45 @@ func (j *Job) List(args *structs.JobListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now())

-	// Capture all the jobs
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Jobs()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "jobs"}),
+		run: func() error {
+			// Capture all the jobs
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Jobs()
+			if err != nil {
+				return err
+			}

-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		job := raw.(*structs.Job)
-		reply.Jobs = append(reply.Jobs, job.Stub())
-	}
+			var jobs []*structs.JobListStub
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				job := raw.(*structs.Job)
+				jobs = append(jobs, job.Stub())
+			}
+			reply.Jobs = jobs

-	// Use the last index that affected the jobs table
-	index, err := snap.Index("jobs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("jobs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index

-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return j.srv.blockingRPC(&opts)
 }

 // Allocations is used to list the allocations for a job
@@ -255,34 +275,43 @@ func (j *Job) Allocations(args *structs.JobSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now())

-	// Capture the allocations
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	allocs, err := snap.AllocsByJob(args.JobID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocJob: args.JobID}),
+		run: func() error {
+			// Capture the allocations
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			allocs, err := snap.AllocsByJob(args.JobID)
+			if err != nil {
+				return err
+			}

-	// Convert to stubs
-	if len(allocs) > 0 {
-		reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
-		for _, alloc := range allocs {
-			reply.Allocations = append(reply.Allocations, alloc.Stub())
-		}
-	}
+			// Convert to stubs
+			if len(allocs) > 0 {
+				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
+				for _, alloc := range allocs {
+					reply.Allocations = append(reply.Allocations, alloc.Stub())
+				}
+			}

-	// Use the last index that affected the allocs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the allocs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index

-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+
+		}}
+	return j.srv.blockingRPC(&opts)
 }

 // Evaluations is used to list the evaluations for a job
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -3,6 +3,7 @@ package nomad
 import (
 	"reflect"
 	"testing"
+	"time"

 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/mock"
@@ -363,6 +364,80 @@ func TestJobEndpoint_GetJob(t *testing.T) {
 	}
 }

+func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the jobs
+	job1 := mock.Job()
+	job2 := mock.Job()
+
+	// Upsert a job we are not interested in first.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertJob(100, job1); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert another job later which should trigger the watch.
+	time.AfterFunc(200*time.Millisecond, func() {
+		if err := state.UpsertJob(200, job2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.JobSpecificRequest{
+		JobID: job2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	start := time.Now()
+	var resp structs.SingleJobResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
+	}
+	if resp.Job == nil || resp.Job.ID != job2.ID {
+		t.Fatalf("bad: %#v", resp.Job)
+	}
+
+	// Job delete fires watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteJob(300, job2.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	start = time.Now()
+
+	var resp2 structs.SingleJobResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Job != nil {
+		t.Fatalf("bad: %#v", resp2.Job)
+	}
+}
+
 func TestJobEndpoint_ListJobs(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
@@ -397,6 +472,70 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 	}
 }

+func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the job
+	job := mock.Job()
+
+	// Upsert job triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertJob(100, job); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.JobListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	start := time.Now()
+	var resp structs.JobListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 100 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 100)
+	}
+	if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID {
+		t.Fatalf("bad: %#v", resp.Jobs)
+	}
+
+	// Job deletion triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteJob(200, job.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 150
+	start = time.Now()
+	var resp2 structs.JobListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 200)
+	}
+	if len(resp2.Jobs) != 0 {
+		t.Fatalf("bad: %#v", resp2.Jobs)
+	}
+}
+
 func TestJobEndpoint_Allocations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
@@ -432,6 +571,59 @@ func TestJobEndpoint_Allocations(t *testing.T) {
 	}
 }

+func TestJobEndpoint_Allocations_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the register request
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+	alloc2.JobID = "job1"
+	state := s1.fsm.State()
+
+	// First upsert an unrelated alloc
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert an alloc for the job we are interested in later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the jobs
+	get := &structs.JobSpecificRequest{
+		JobID: "job1",
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	var resp structs.JobAllocationsResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Job.Allocations", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+}
+
 func TestJobEndpoint_Evaluations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -1,6 +1,9 @@
 package mock

-import "github.com/hashicorp/nomad/nomad/structs"
+import (
+	"github.com/hashicorp/nomad/nomad/structs"
+	"time"
+)

 func Node() *structs.Node {
 	node := &structs.Node{
@@ -71,6 +74,11 @@ func Job() *structs.Job {
 			&structs.TaskGroup{
 				Name:  "web",
 				Count: 10,
+				RestartPolicy: &structs.RestartPolicy{
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+				},
 				Tasks: []*structs.Task{
 					&structs.Task{
 						Name:   "web",
@@ -131,6 +139,11 @@ func SystemJob() *structs.Job {
 			&structs.TaskGroup{
 				Name:  "web",
 				Count: 1,
+				RestartPolicy: &structs.RestartPolicy{
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+				},
 				Tasks: []*structs.Task{
 					&structs.Task{
 						Name:   "web",
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -6,6 +6,7 @@ import (

 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )

 // Node endpoint is used for client interactions
@@ -282,37 +283,45 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())

-	// Verify the arguments
-	if args.NodeID == "" {
-		return fmt.Errorf("missing node ID")
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Node: args.NodeID}),
+		run: func() error {
+			// Verify the arguments
+			if args.NodeID == "" {
+				return fmt.Errorf("missing node ID")
+			}

-	// Look for the node
-	snap, err := n.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.NodeByID(args.NodeID)
-	if err != nil {
-		return err
-	}
+			// Look for the node
+			snap, err := n.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.NodeByID(args.NodeID)
+			if err != nil {
+				return err
+			}

-	// Setup the output
-	if out != nil {
-		reply.Node = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("nodes")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			reply.Node = out
+			if out != nil {
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("nodes")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}

-	// Set the query response
-	n.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			n.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return n.srv.blockingRPC(&opts)
 }

 // GetAllocs is used to request allocations for a specific node
@@ -330,9 +339,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,

 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		allocWatch: args.NodeID,
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocNode: args.NodeID}),
 		run: func() error {
 			// Look for the node
 			snap, err := n.srv.fsm.State().Snapshot()
@@ -404,35 +413,45 @@ func (n *Node) List(args *structs.NodeListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())

-	// Capture all the nodes
-	snap, err := n.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Nodes()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "nodes"}),
+		run: func() error {
+			// Capture all the nodes
+			snap, err := n.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Nodes()
+			if err != nil {
+				return err
+			}

-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		node := raw.(*structs.Node)
-		reply.Nodes = append(reply.Nodes, node.Stub())
-	}
+			var nodes []*structs.NodeListStub
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				node := raw.(*structs.Node)
+				nodes = append(nodes, node.Stub())
+			}
+			reply.Nodes = nodes

-	// Use the last index that affected the jobs table
-	index, err := snap.Index("nodes")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("nodes")
+			if err != nil {
+				return err
+			}
+			reply.Index = index

-	// Set the query response
-	n.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			n.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return n.srv.blockingRPC(&opts)
 }

 // createNodeEvals is used to create evaluations for each alloc on a node.
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -371,6 +371,107 @@ func TestClientEndpoint_GetNode(t *testing.T) {
 	}
 }

+func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the node
+	node1 := mock.Node()
+	node2 := mock.Node()
+
+	// First create an unrelated node.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertNode(100, node1); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert the node we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		if err := state.UpsertNode(200, node2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the node
+	req := &structs.NodeSpecificRequest{
+		NodeID: node2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 50,
+		},
+	}
+	var resp structs.SingleNodeResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
+	}
+	if resp.Node == nil || resp.Node.ID != node2.ID {
+		t.Fatalf("bad: %#v", resp.Node)
+	}
+
+	// Node update triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		nodeUpdate := mock.Node()
+		nodeUpdate.ID = node2.ID
+		nodeUpdate.Status = structs.NodeStatusDown
+		if err := state.UpsertNode(300, nodeUpdate); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	var resp2 structs.SingleNodeResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Node == nil || resp2.Node.Status != structs.NodeStatusDown {
+		t.Fatalf("bad: %#v", resp2.Node)
+	}
+
+	// Node delete triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteNode(400, node2.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 350
+	var resp3 structs.SingleNodeResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp3.Index != 400 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 400)
+	}
+	if resp3.Node != nil {
+		t.Fatalf("bad: %#v", resp3.Node)
+	}
+}
+
 func TestClientEndpoint_GetAllocs(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
@@ -457,16 +558,15 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 	alloc.NodeID = node.ID
 	state := s1.fsm.State()
 	start := time.Now()
-	go func() {
-		time.Sleep(100 * time.Millisecond)
+	time.AfterFunc(100*time.Millisecond, func() {
 		err := state.UpsertAllocs(100, []*structs.Allocation{alloc})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})

 	// Lookup the allocs in a blocking query
-	get := &structs.NodeSpecificRequest{
+	req := &structs.NodeSpecificRequest{
 		NodeID: node.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
@@ -475,7 +575,7 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 		},
 	}
 	var resp2 structs.NodeAllocsResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", get, &resp2); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp2); err != nil {
 		t.Fatalf("err: %v", err)
 	}

@@ -491,6 +591,34 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 	if len(resp2.Allocs) != 1 || resp2.Allocs[0].ID != alloc.ID {
 		t.Fatalf("bad: %#v", resp2.Allocs)
 	}
+
+	// Alloc updates fire watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		allocUpdate := mock.Alloc()
+		allocUpdate.NodeID = alloc.NodeID
+		allocUpdate.ID = alloc.ID
+		allocUpdate.ClientStatus = structs.AllocClientStatusRunning
+		err := state.UpdateAllocFromClient(200, allocUpdate)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 150
+	var resp3 structs.NodeAllocsResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if time.Since(start) < 100*time.Millisecond {
+		t.Fatalf("too fast")
+	}
+	if resp3.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp3.Index, 200)
+	}
+	if len(resp3.Allocs) != 1 || resp3.Allocs[0].ClientStatus != structs.AllocClientStatusRunning {
+		t.Fatalf("bad: %#v", resp3.Allocs[0])
+	}
 }

 func TestClientEndpoint_UpdateAlloc(t *testing.T) {
@@ -752,3 +880,115 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 		t.Fatalf("bad: %#v", resp2.Nodes[0])
 	}
 }
+
+func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the node
+	node := mock.Node()
+
+	// Node upsert triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertNode(2, node); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.NodeListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.NodeListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Nodes) != 1 || resp.Nodes[0].ID != node.ID {
+		t.Fatalf("bad: %#v", resp.Nodes)
+	}
+
+	// Node drain updates trigger watches.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateNodeDrain(3, node.ID, true); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	var resp2 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Nodes) != 1 || !resp2.Nodes[0].Drain {
+		t.Fatalf("bad: %#v", resp2.Nodes)
+	}
+
+	// Node status update triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateNodeStatus(4, node.ID, structs.NodeStatusDown); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 3
+	var resp3 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp3)
+	}
+	if resp3.Index != 4 {
+		t.Fatalf("Bad index: %d %d", resp3.Index, 4)
+	}
+	if len(resp3.Nodes) != 1 || resp3.Nodes[0].Status != structs.NodeStatusDown {
+		t.Fatalf("bad: %#v", resp3.Nodes)
+	}
+
+	// Node delete triggers watches.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteNode(5, node.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 4
+	var resp4 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp4); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp4)
+	}
+	if resp4.Index != 5 {
+		t.Fatalf("Bad index: %d %d", resp4.Index, 5)
+	}
+	if len(resp4.Nodes) != 0 {
+		t.Fatalf("bad: %#v", resp4.Nodes)
+	}
+}
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -13,6 +13,7 @@ import (
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 	"github.com/hashicorp/raft"
 	"github.com/hashicorp/yamux"
 )
@@ -268,10 +269,10 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {

 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts  *structs.QueryOptions
-	queryMeta  *structs.QueryMeta
-	allocWatch string
-	run        func() error
+	queryOpts *structs.QueryOptions
+	queryMeta *structs.QueryMeta
+	watch     watch.Items
+	run       func() error
 }

 // blockingRPC is used for queries that need to wait for a
@@ -306,17 +307,13 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 	state = s.fsm.State()
 	defer func() {
 		timeout.Stop()
-		if opts.allocWatch != "" {
-			state.StopWatchAllocs(opts.allocWatch, notifyCh)
-		}
+		state.StopWatch(opts.watch, notifyCh)
 	}()

 REGISTER_NOTIFY:
 	// Register the notification channel. This may be done
 	// multiple times if we have not reached the target wait index.
-	if opts.allocWatch != "" {
-		state.WatchAllocs(opts.allocWatch, notifyCh)
-	}
+	state.Watch(opts.watch, notifyCh)

 RUN_QUERY:
 	// Update the query meta data
@@ -327,7 +324,7 @@ RUN_QUERY:
 	err := opts.run()

 	// Check for minimum query time
-	if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
+	if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
 		select {
 		case <-notifyCh:
 			goto REGISTER_NOTIFY
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -8,8 +8,16 @@ import (

 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )

+// IndexEntry is used with the "index" table
+// for managing the latest Raft index affecting a table.
+type IndexEntry struct {
+	Key   string
+	Value uint64
+}
+
 // The StateStore is responsible for maintaining all the Nomad
 // state. It is manipulated by the FSM which maintains consistency
 // through the use of Raft. The goals of the StateStore are to provide
@@ -23,45 +31,6 @@ type StateStore struct {
 	watch  *stateWatch
 }

-// StateSnapshot is used to provide a point-in-time snapshot
-type StateSnapshot struct {
-	StateStore
-}
-
-// StateRestore is used to optimize the performance when
-// restoring state by only using a single large transaction
-// instead of thousands of sub transactions
-type StateRestore struct {
-	txn        *memdb.Txn
-	watch      *stateWatch
-	allocNodes map[string]struct{}
-}
-
-// Abort is used to abort the restore operation
-func (s *StateRestore) Abort() {
-	s.txn.Abort()
-}
-
-// Commit is used to commit the restore operation
-func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notifyAllocs(s.allocNodes) })
-	s.txn.Commit()
-}
-
-// IndexEntry is used with the "index" table
-// for managing the latest Raft index affecting a table.
-type IndexEntry struct {
-	Key   string
-	Value uint64
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	allocs    map[string]*NotifyGroup
-	allocLock sync.Mutex
-}
-
 // NewStateStore is used to create a new state store
 func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the MemDB
@@ -70,16 +39,11 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 		return nil, fmt.Errorf("state store setup failed: %v", err)
 	}

-	// Create the watch entry
-	watch := &stateWatch{
-		allocs: make(map[string]*NotifyGroup),
-	}
-
 	// Create the state store
 	s := &StateStore{
 		logger: log.New(logOutput, "", log.LstdFlags),
 		db:     db,
-		watch:  watch,
+		watch:  newStateWatch(),
 	}
 	return s, nil
 }
@@ -104,55 +68,21 @@ func (s *StateStore) Snapshot() (*StateSnapshot, error) {
 func (s *StateStore) Restore() (*StateRestore, error) {
 	txn := s.db.Txn(true)
 	r := &StateRestore{
-		txn:        txn,
-		watch:      s.watch,
-		allocNodes: make(map[string]struct{}),
+		txn:   txn,
+		watch: s.watch,
+		items: watch.NewItems(),
 	}
 	return r, nil
 }

-// WatchAllocs is used to subscribe a channel to changes in allocations for a node
-func (s *StateStore) WatchAllocs(node string, notify chan struct{}) {
-	s.watch.allocLock.Lock()
-	defer s.watch.allocLock.Unlock()
-
-	// Check for an existing notify group
-	if grp, ok := s.watch.allocs[node]; ok {
-		grp.Wait(notify)
-		return
-	}
-
-	// Create new notify group
-	grp := &NotifyGroup{}
-	grp.Wait(notify)
-	s.watch.allocs[node] = grp
+// Watch subscribes a channel to a set of watch items.
+func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
+	s.watch.watch(items, notify)
 }

-// StopWatchAllocs is used to unsubscribe a channel from changes in allocations
-func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) {
-	s.watch.allocLock.Lock()
-	defer s.watch.allocLock.Unlock()
-
-	// Check for an existing notify group
-	if grp, ok := s.watch.allocs[node]; ok {
-		grp.Clear(notify)
-		if grp.Empty() {
-			delete(s.watch.allocs, node)
-		}
-	}
-}
-
-// notifyAllocs is used to notify any node alloc listeners of a change
-func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
-	w.allocLock.Lock()
-	defer w.allocLock.Unlock()
-
-	for node := range nodes {
-		if grp, ok := w.allocs[node]; ok {
-			grp.Notify()
-			delete(w.allocs, node)
-		}
-	}
+// StopWatch unsubscribes a channel from a set of watch items.
+func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
+	s.watch.stopWatch(items, notify)
 }

 // UpsertNode is used to register a node or update a node definition
@@ -162,6 +92,10 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: node.ID})
+
 	// Check if the node already exists
 	existing, err := txn.First("nodes", "id", node.ID)
 	if err != nil {
@@ -187,6 +121,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -196,6 +131,10 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -213,6 +152,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -222,6 +162,10 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -248,6 +192,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -257,6 +202,10 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -283,6 +232,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -319,6 +269,10 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "jobs"})
+	watcher.Add(watch.Item{Job: job.ID})
+
 	// Check if the job already exists
 	existing, err := txn.First("jobs", "id", job.ID)
 	if err != nil {
@@ -342,6 +296,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -351,6 +306,10 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "jobs"})
+	watcher.Add(watch.Item{Job: jobID})
+
 	// Lookup the node
 	existing, err := txn.First("jobs", "id", jobID)
 	if err != nil {
@@ -368,6 +327,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -417,13 +377,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "evals"})
+
 	// Do a nested upsert
 	for _, eval := range evals {
+		watcher.Add(watch.Item{Eval: eval.ID})
 		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
 			return err
 		}
 	}

+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -459,7 +424,9 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	nodes := make(map[string]struct{})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "evals"})
+	watcher.Add(watch.Item{Table: "allocs"})

 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -472,6 +439,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("evals", existing); err != nil {
 			return fmt.Errorf("eval delete failed: %v", err)
 		}
+		watcher.Add(watch.Item{Eval: eval})
 	}

 	for _, alloc := range allocs {
@@ -482,10 +450,14 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
-		nodes[existing.(*structs.Allocation).NodeID] = struct{}{}
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
+		realAlloc := existing.(*structs.Allocation)
+		watcher.Add(watch.Item{Alloc: realAlloc.ID})
+		watcher.Add(watch.Item{AllocEval: realAlloc.EvalID})
+		watcher.Add(watch.Item{AllocJob: realAlloc.JobID})
+		watcher.Add(watch.Item{AllocNode: realAlloc.NodeID})
 	}

 	// Update the indexes
@@ -495,7 +467,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
 		return fmt.Errorf("index update failed: %v", err)
 	}
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -557,6 +530,13 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	txn := s.db.Txn(true)
 	defer txn.Abort()

+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "allocs"})
+	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocEval: alloc.EvalID})
+	watcher.Add(watch.Item{AllocJob: alloc.JobID})
+	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
+
 	// Look for existing alloc
 	existing, err := txn.First("allocs", "id", alloc.ID)
 	if err != nil {
@@ -590,8 +570,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}

-	nodes := map[string]struct{}{alloc.NodeID: struct{}{}}
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -601,7 +580,9 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	nodes := make(map[string]struct{})
+
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "allocs"})

 	// Handle the allocations
 	for _, alloc := range allocs {
@@ -620,10 +601,14 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 			alloc.ClientStatus = exist.ClientStatus
 			alloc.ClientDescription = exist.ClientDescription
 		}
-		nodes[alloc.NodeID] = struct{}{}
 		if err := txn.Insert("allocs", alloc); err != nil {
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
+
+		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocEval: alloc.EvalID})
+		watcher.Add(watch.Item{AllocJob: alloc.JobID})
+		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}

 	// Update the indexes
@@ -631,7 +616,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}

-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -753,8 +738,35 @@ func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
 	return iter, nil
 }

+// StateSnapshot is used to provide a point-in-time snapshot
+type StateSnapshot struct {
+	StateStore
+}
+
+// StateRestore is used to optimize the performance when
+// restoring state by only using a single large transaction
+// instead of thousands of sub transactions
+type StateRestore struct {
+	txn   *memdb.Txn
+	watch *stateWatch
+	items watch.Items
+}
+
+// Abort is used to abort the restore operation
+func (s *StateRestore) Abort() {
+	s.txn.Abort()
+}
+
+// Commit is used to commit the restore operation
+func (s *StateRestore) Commit() {
+	s.txn.Defer(func() { s.watch.notify(s.items) })
+	s.txn.Commit()
+}
+
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
+	r.items.Add(watch.Item{Table: "nodes"})
+	r.items.Add(watch.Item{Node: node.ID})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -763,6 +775,8 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {

 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
+	r.items.Add(watch.Item{Table: "jobs"})
+	r.items.Add(watch.Item{Job: job.ID})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -771,6 +785,8 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {

 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
+	r.items.Add(watch.Item{Table: "evals"})
+	r.items.Add(watch.Item{Eval: eval.ID})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -779,7 +795,11 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {

 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.allocNodes[alloc.NodeID] = struct{}{}
+	r.items.Add(watch.Item{Table: "allocs"})
+	r.items.Add(watch.Item{Alloc: alloc.ID})
+	r.items.Add(watch.Item{AllocEval: alloc.EvalID})
+	r.items.Add(watch.Item{AllocJob: alloc.JobID})
+	r.items.Add(watch.Item{AllocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -793,3 +813,59 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	items map[watch.Item]*NotifyGroup
+	l     sync.Mutex
+}
+
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watch.Item]*NotifyGroup),
+	}
+}
+
+// watch subscribes a channel to the given watch items.
+func (w *stateWatch) watch(items watch.Items, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for item, _ := range items {
+		grp, ok := w.items[item]
+		if !ok {
+			grp = new(NotifyGroup)
+			w.items[item] = grp
+		}
+		grp.Wait(ch)
+	}
+}
+
+// stopWatch unsubscribes a channel from the given watch items.
+func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for item, _ := range items {
+		if grp, ok := w.items[item]; ok {
+			grp.Clear(ch)
+			if grp.Empty() {
+				delete(w.items, item)
+			}
+		}
+	}
+}
+
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items watch.Items) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for wi, _ := range items {
+		if grp, ok := w.items[wi]; ok {
+			grp.Notify()
+		}
+	}
+}
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -8,6 +8,7 @@ import (

 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )

 func testStateStore(t *testing.T) *StateStore {
@@ -25,6 +26,11 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -46,12 +52,19 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_DeleteNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -78,12 +91,19 @@ func TestStateStore_DeleteNode_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -113,12 +133,19 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -148,6 +175,8 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_Nodes(t *testing.T) {
@@ -188,18 +217,22 @@ func TestStateStore_Nodes(t *testing.T) {

 func TestStateStore_RestoreNode(t *testing.T) {
 	state := testStateStore(t)
+	node := mock.Node()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})

 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	node := mock.Node()
 	err = restore.NodeRestore(node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()

 	out, err := state.NodeByID(node.ID)
@@ -210,12 +243,19 @@ func TestStateStore_RestoreNode(t *testing.T) {
 	if !reflect.DeepEqual(out, node) {
 		t.Fatalf("Bad: %#v %#v", out, node)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_UpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -237,12 +277,19 @@ func TestStateStore_UpsertJob_Job(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -278,12 +325,19 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_DeleteJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -310,6 +364,8 @@ func TestStateStore_DeleteJob_Job(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_Jobs(t *testing.T) {
@@ -417,18 +473,22 @@ func TestStateStore_JobsByScheduler(t *testing.T) {

 func TestStateStore_RestoreJob(t *testing.T) {
 	state := testStateStore(t)
+	job := mock.Job()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})

 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	job := mock.Job()
 	err = restore.JobRestore(job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()

 	out, err := state.JobByID(job.ID)
@@ -439,6 +499,8 @@ func TestStateStore_RestoreJob(t *testing.T) {
 	if !reflect.DeepEqual(out, job) {
 		t.Fatalf("Bad: %#v %#v", out, job)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_Indexes(t *testing.T) {
@@ -503,6 +565,11 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
+
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -524,6 +591,8 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
@@ -535,6 +604,11 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}

+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
+
 	eval2 := mock.Eval()
 	eval2.ID = eval.ID
 	err = state.UpsertEvals(1001, []*structs.Evaluation{eval2})
@@ -565,40 +639,54 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	state := testStateStore(t)
-	eval := mock.Eval()
+	eval1 := mock.Eval()
 	eval2 := mock.Eval()
-	alloc := mock.Alloc()
+	alloc1 := mock.Alloc()
 	alloc2 := mock.Alloc()

-	err := state.UpsertEvals(1000, []*structs.Evaluation{eval, eval2})
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Table: "allocs"},
+		watch.Item{Eval: eval1.ID},
+		watch.Item{Eval: eval2.ID},
+		watch.Item{Alloc: alloc1.ID},
+		watch.Item{Alloc: alloc2.ID},
+		watch.Item{AllocEval: alloc1.EvalID},
+		watch.Item{AllocEval: alloc2.EvalID},
+		watch.Item{AllocJob: alloc1.JobID},
+		watch.Item{AllocJob: alloc2.JobID},
+		watch.Item{AllocNode: alloc1.NodeID},
+		watch.Item{AllocNode: alloc2.NodeID})
+
+	err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
+	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc1, alloc2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	notify1 := make(chan struct{}, 1)
-	state.WatchAllocs(alloc.NodeID, notify1)
-
-	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
+	err = state.DeleteEval(1002, []string{eval1.ID, eval2.ID}, []string{alloc1.ID, alloc2.ID})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	out, err := state.EvalByID(eval.ID)
+	out, err := state.EvalByID(eval1.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

 	if out != nil {
-		t.Fatalf("bad: %#v %#v", eval, out)
+		t.Fatalf("bad: %#v %#v", eval1, out)
 	}

 	out, err = state.EvalByID(eval2.ID)
@@ -607,16 +695,16 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}

 	if out != nil {
-		t.Fatalf("bad: %#v %#v", eval, out)
+		t.Fatalf("bad: %#v %#v", eval1, out)
 	}

-	outA, err := state.AllocByID(alloc.ID)
+	outA, err := state.AllocByID(alloc1.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

 	if out != nil {
-		t.Fatalf("bad: %#v %#v", alloc, outA)
+		t.Fatalf("bad: %#v %#v", alloc1, outA)
 	}

 	outA, err = state.AllocByID(alloc2.ID)
@@ -625,7 +713,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}

 	if out != nil {
-		t.Fatalf("bad: %#v %#v", alloc, outA)
+		t.Fatalf("bad: %#v %#v", alloc1, outA)
 	}

 	index, err := state.Index("evals")
@@ -644,11 +732,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 		t.Fatalf("bad: %d", index)
 	}

-	select {
-	case <-notify1:
-	default:
-		t.Fatalf("should be notified")
-	}
+	notify.verify(t)
 }

 func TestStateStore_EvalsByJob(t *testing.T) {
@@ -720,34 +804,48 @@ func TestStateStore_Evals(t *testing.T) {

 func TestStateStore_RestoreEval(t *testing.T) {
 	state := testStateStore(t)
+	eval := mock.Eval()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})

 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	job := mock.Eval()
-	err = restore.EvalRestore(job)
+	err = restore.EvalRestore(eval)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()

-	out, err := state.EvalByID(job.ID)
+	out, err := state.EvalByID(eval.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	if !reflect.DeepEqual(out, job) {
-		t.Fatalf("Bad: %#v %#v", out, job)
+	if !reflect.DeepEqual(out, eval) {
+		t.Fatalf("Bad: %#v %#v", out, eval)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	state := testStateStore(t)
-
 	alloc := mock.Alloc()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
+
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -779,12 +877,22 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	state := testStateStore(t)
-
 	alloc := mock.Alloc()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
+
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -806,35 +914,8 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
-}

-func TestStateStore_WatchAllocs(t *testing.T) {
-	state := testStateStore(t)
-
-	notify1 := make(chan struct{}, 1)
-	notify2 := make(chan struct{}, 1)
-	state.WatchAllocs("foo", notify1)
-	state.WatchAllocs("foo", notify2)
-	state.StopWatchAllocs("foo", notify2)
-
-	alloc := mock.Alloc()
-	alloc.NodeID = "foo"
-	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-
-	select {
-	case <-notify1:
-	default:
-		t.Fatalf("should be notified")
-	}
-
-	select {
-	case <-notify2:
-		t.Fatalf("should not be notified")
-	default:
-	}
+	notify.verify(t)
 }

 func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
@@ -849,6 +930,15 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	alloc2 := mock.Alloc()
 	alloc2.ID = alloc.ID
 	alloc2.NodeID = alloc.NodeID + ".new"
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc2.ID},
+		watch.Item{AllocEval: alloc2.EvalID},
+		watch.Item{AllocJob: alloc2.JobID},
+		watch.Item{AllocNode: alloc2.NodeID})
+
 	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -877,6 +967,8 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }

 func TestStateStore_EvictAlloc_Alloc(t *testing.T) {
@@ -1008,13 +1100,21 @@ func TestStateStore_Allocs(t *testing.T) {

 func TestStateStore_RestoreAlloc(t *testing.T) {
 	state := testStateStore(t)
+	alloc := mock.Alloc()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})

 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}

-	alloc := mock.Alloc()
 	err = restore.AllocRestore(alloc)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -1030,6 +1130,87 @@ func TestStateStore_RestoreAlloc(t *testing.T) {
 	if !reflect.DeepEqual(out, alloc) {
 		t.Fatalf("Bad: %#v %#v", out, alloc)
 	}
+
+	notify.verify(t)
+}
+
+func TestStateWatch_watch(t *testing.T) {
+	sw := newStateWatch()
+	notify1 := make(chan struct{}, 1)
+	notify2 := make(chan struct{}, 1)
+	notify3 := make(chan struct{}, 1)
+
+	// Notifications trigger subscribed channels
+	sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify1)
+	sw.watch(watch.NewItems(watch.Item{Table: "bar"}), notify2)
+	sw.watch(watch.NewItems(watch.Item{Table: "baz"}), notify3)
+
+	items := watch.NewItems()
+	items.Add(watch.Item{Table: "foo"})
+	items.Add(watch.Item{Table: "bar"})
+
+	sw.notify(items)
+	if len(notify1) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify2) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify3) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
+func TestStateWatch_stopWatch(t *testing.T) {
+	sw := newStateWatch()
+	notify := make(chan struct{})
+
+	// First subscribe
+	sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify)
+
+	// Unsubscribe stop notifications
+	sw.stopWatch(watch.NewItems(watch.Item{Table: "foo"}), notify)
+
+	// Check that the group was removed
+	if _, ok := sw.items[watch.Item{Table: "foo"}]; ok {
+		t.Fatalf("should remove group")
+	}
+
+	// Check that we are not notified
+	sw.notify(watch.NewItems(watch.Item{Table: "foo"}))
+	if len(notify) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
+// setupNotifyTest takes a state store and a set of watch items, then creates
+// and subscribes a notification channel for each item.
+func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest {
+	var n notifyTest
+	for _, item := range items {
+		ch := make(chan struct{}, 1)
+		state.Watch(watch.NewItems(item), ch)
+		n = append(n, &notifyTestCase{item, ch})
+	}
+	return n
+}
+
+// notifyTestCase is used to set up and verify watch triggers.
+type notifyTestCase struct {
+	item watch.Item
+	ch   chan struct{}
+}
+
+// notifyTest is a suite of notifyTestCases.
+type notifyTest []*notifyTestCase
+
+// verify ensures that each channel received a notification.
+func (n notifyTest) verify(t *testing.T) {
+	for _, tcase := range n {
+		if len(tcase.ch) != 1 {
+			t.Fatalf("should notify %#v", tcase.item)
+		}
+	}
 }

 // NodeIDSort is used to sort nodes by ID
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,8 +14,17 @@ import (
 )

 var (
-	ErrNoLeader     = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath = fmt.Errorf("No path to region")
+	ErrNoLeader                    = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath                = fmt.Errorf("No path to region")
+	defaultServiceJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 2,
+		Interval: 1 * time.Minute,
+	}
+	defaultBatchJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 15,
+	}
 )

 type MessageType uint8
@@ -898,6 +907,33 @@ func (u *UpdateStrategy) Rolling() bool {
 	return u.Stagger > 0 && u.MaxParallel > 0
 }

+// RestartPolicy influences how Nomad restarts Tasks when they
+// crash or fail.
+type RestartPolicy struct {
+	Attempts int
+	Interval time.Duration
+	Delay    time.Duration
+}
+
+func (r *RestartPolicy) Validate() error {
+	if time.Duration(r.Attempts)*r.Delay > r.Interval {
+		return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
+	}
+	return nil
+}
+
+func NewRestartPolicy(jobType string) *RestartPolicy {
+	switch jobType {
+	case JobTypeService:
+		rp := defaultServiceJobRestartPolicy
+		return &rp
+	case JobTypeBatch:
+		rp := defaultBatchJobRestartPolicy
+		return &rp
+	}
+	return nil
+}
+
 // TaskGroup is an atomic unit of placement. Each task group belongs to
 // a job and may contain any number of tasks. A task group support running
 // in many replicas using the same configuration..
@@ -913,6 +949,9 @@ type TaskGroup struct {
 	// all the tasks contained.
 	Constraints []*Constraint

+	//RestartPolicy of a TaskGroup
+	RestartPolicy *RestartPolicy
+
 	// Tasks are the collection of tasks that this task group needs to run
 	Tasks []*Task

@@ -940,6 +979,10 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}

+	if err := tg.RestartPolicy.Validate(); err != nil {
+		mErr.Errors = append(mErr.Errors, err)
+	}
+
 	// Check for duplicate tasks
 	tasks := make(map[string]int)
 	for idx, task := range tg.Tasks {
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -1,11 +1,11 @@
 package structs

 import (
+	"github.com/hashicorp/go-multierror"
 	"reflect"
 	"strings"
 	"testing"
-
-	"github.com/hashicorp/go-multierror"
+	"time"
 )

 func TestJob_Validate(t *testing.T) {
@@ -44,11 +44,27 @@ func TestJob_Validate(t *testing.T) {
 		TaskGroups: []*TaskGroup{
 			&TaskGroup{
 				Name: "web",
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
 			},
 			&TaskGroup{
 				Name: "web",
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
+			},
+			&TaskGroup{
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
 			},
-			&TaskGroup{},
 		},
 	}
 	err = j.Validate()
@@ -65,7 +81,13 @@ func TestJob_Validate(t *testing.T) {
 }

 func TestTaskGroup_Validate(t *testing.T) {
-	tg := &TaskGroup{}
+	tg := &TaskGroup{
+		RestartPolicy: &RestartPolicy{
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+		},
+	}
 	err := tg.Validate()
 	mErr := err.(*multierror.Error)
 	if !strings.Contains(mErr.Errors[0].Error(), "group name") {
@@ -86,6 +108,11 @@ func TestTaskGroup_Validate(t *testing.T) {
 			&Task{Name: "web"},
 			&Task{},
 		},
+		RestartPolicy: &RestartPolicy{
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+		},
 	}
 	err = tg.Validate()
 	mErr = err.(*multierror.Error)
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -0,0 +1,38 @@
+package watch
+
+// The watch package provides a means of describing a watch for a blocking
+// query. It is exported so it may be shared between Nomad's RPC layer and
+// the underlying state store.
+
+// Item describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing. Specifying
+// multiple fields does not place a watch on multiple items. Each Item
+// describes exactly one scoped watch.
+type Item struct {
+	Alloc     string
+	AllocEval string
+	AllocJob  string
+	AllocNode string
+	Eval      string
+	Job       string
+	Node      string
+	Table     string
+}
+
+// Items is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type Items map[Item]struct{}
+
+// NewItems creates a new Items set and adds the given items.
+func NewItems(items ...Item) Items {
+	wi := make(Items)
+	for _, item := range items {
+		wi.Add(item)
+	}
+	return wi
+}
+
+// Add adds an item to the watch set.
+func (wi Items) Add(i Item) {
+	wi[i] = struct{}{}
+}
--- a/nomad/watch/watch_test.go
+++ b/nomad/watch/watch_test.go
@@ -0,0 +1,31 @@
+package watch
+
+import (
+	"testing"
+)
+
+func TestWatchItems(t *testing.T) {
+	// Creates an empty set of items
+	wi := NewItems()
+	if len(wi) != 0 {
+		t.Fatalf("expect 0 items, got: %#v", wi)
+	}
+
+	// Creates a new set of supplied items
+	wi = NewItems(Item{Table: "foo"})
+	if len(wi) != 1 {
+		t.Fatalf("expected 1 item, got: %#v", wi)
+	}
+
+	// Adding items works
+	wi.Add(Item{Node: "bar"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
+	}
+
+	// Adding duplicates auto-dedupes
+	wi.Add(Item{Table: "foo"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
+	}
+}
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -42,7 +42,7 @@ nodes, unless otherwise specified:
  as `us-west` and `us-east`. Defaults to `global`.

 * `datacenter`: Datacenter of the local agent. All members of a datacenter
-  should all share a local LAN connection. Defaults to `dc1`.
+  should share a local LAN connection. Defaults to `dc1`.

 * <a id="name">`name`</a>: The name of the local node. This value is used to
  identify individual nodes in a given datacenter and must be unique
@@ -103,7 +103,7 @@ nodes, unless otherwise specified:
  This can be used to advertise a different address to the peers of a server
  node to support more complex network configurations such as NAT. This
  configuration is optional, and defaults to the bind address of the specific
-  network service if it is not provided. This configuration is only appicable
+  network service if it is not provided. This configuration is only applicable
  on server nodes. The value is a map of IP addresses and supports the
  following keys:
  <br>
@@ -125,13 +125,13 @@ nodes, unless otherwise specified:
  * `disable_hostname`: A boolean indicating if gauge values should not be
    prefixed with the local hostname.

-* `leave_on_interrupt`: Enables gracefully leave when receiving the
+* `leave_on_interrupt`: Enables gracefully leaving when receiving the
  interrupt signal. By default, the agent will exit forcefully on any signal.

-* `leave_on_terminate`: Enables gracefully leave when receiving the
+* `leave_on_terminate`: Enables gracefully leaving when receiving the
  terminate signal. By default, the agent will exit forcefully on any signal.

-* `enable_syslog`: Enables logging to syslog. This option only work on
+* `enable_syslog`: Enables logging to syslog. This option only works on
  Unix based systems.

 * `syslog_facility`: Controls the syslog facility that is used. By default,
--- a/website/source/docs/drivers/exec.html.md
+++ b/website/source/docs/drivers/exec.html.md
@@ -11,7 +11,7 @@ description: |-
 Name: `exec`

 The `exec` driver is used to simply execute a particular command for a task.
-However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
+However, unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
 primitives of the operating system to limit the tasks access to resources. While
 simple, since the `exec` driver  can invoke any command, it can be used to call
 scripts or other wrappers which provide higher level features.
@@ -28,9 +28,10 @@ must reference it in the `command` as show in the examples below

 ## Client Requirements

-The `exec` driver can run on all supported operating systems but to provide
-proper isolation the client must be run as root on non-Windows operating systems.
-Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
+The `exec` driver can only be run when on Linux and running Nomad as root.
+`exec` is limited to this configuration because currently isolation of resources
+is only guaranteed on Linux. Further the host must have cgroups mounted properly
+in order for the driver to work.

 You must specify a `command` to be executed. Optionally you can specify an
 `artifact_source` to be downloaded as well. Any `command` is assumed to be present on the 
@@ -68,8 +69,5 @@ The `exec` driver will set the following client attributes:
 The resource isolation provided varies by the operating system of
 the client and the configuration.

-On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the
+On Linux, Nomad will use cgroups, and a chroot to isolate the
 resources of a process and as such the Nomad agent must be run as root.
-
-On Windows, the task driver will just execute the command with no additional
-resource isolation.
--- a/website/source/docs/http/alloc.html.md
+++ b/website/source/docs/http/alloc.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -179,4 +184,3 @@ be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
--- a/website/source/docs/http/allocs.html.md
+++ b/website/source/docs/http/allocs.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -56,4 +61,3 @@ be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
--- a/website/source/docs/http/eval.html.md
+++ b/website/source/docs/http/eval.html.md
@@ -3,7 +3,7 @@ layout: "http"
 page_title: "HTTP API: /v1/evaluation"
 sidebar_current: "docs-http-eval-"
 description: |-
-  The '/1/evaluation' endpoint is used to query a specific evaluation.
+  The '/v1/evaluation' endpoint is used to query a specific evaluation.
 ---

 # /v1/evaluation
@@ -17,7 +17,7 @@ be specified using the `?region=` query parameter.
 <dl>
  <dt>Description</dt>
  <dd>
-    Lists all the evaluations.
+    Query a specific evaluation.
  </dd>

  <dt>Method</dt>
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -57,9 +62,6 @@ be specified using the `?region=` query parameter.
  </dd>
 </dl>

-# /v1/evaluation/\<ID\>/allocations
-## GET
-
 <dl>
  <dt>Description</dt>
  <dd>
@@ -77,6 +79,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -102,4 +109,3 @@ be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
--- a/website/source/docs/http/evals.html.md
+++ b/website/source/docs/http/evals.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -59,4 +64,3 @@ be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
--- a/website/source/docs/http/index.html.md
+++ b/website/source/docs/http/index.html.md
@@ -31,6 +31,7 @@ The API is modeled closely on the underlying data model. Use the links to the le
 documentation about specific endpoints. There are also "Agent" APIs which interact with
 a specific agent and not the broader cluster used for administration.

+<a name="blocking-queries"></a>
 ## Blocking Queries

 Certain endpoints support a feature called a "blocking query." A blocking query
--- a/website/source/docs/http/job.html.md
+++ b/website/source/docs/http/job.html.md
@@ -6,7 +6,7 @@ description: |-
  The '/1/job' endpoint is used for CRUD on a single job.
 ---

-# /v1/job/\<ID\>
+# /v1/job

 The `job` endpoint is used for CRUD on a single job. By default, the agent's local
 region is used; another region can be specified using the `?region=` query parameter.
@@ -30,6 +30,11 @@ region is used; another region can be specified using the `?region=` query param
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -136,6 +141,105 @@ region is used; another region can be specified using the `?region=` query param
  </dd>
 </dl>

+<dl>
+  <dt>Description</dt>
+  <dd>
+    Query the allocations belonging to a single job.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>GET</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<id>/allocations`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    [
+    {
+        "ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
+        "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
+        "Name": "binstore-storagelocker.binsl[0]",
+        "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
+        "JobID": "binstore-storagelocker",
+        "TaskGroup": "binsl",
+        "DesiredStatus": "run",
+        "DesiredDescription": "",
+        "ClientStatus": "running",
+        "ClientDescription": "",
+        "CreateIndex": 16,
+        "ModifyIndex": 16
+    },
+    ...
+    ]
+    ```
+
+  </dd>
+</dl>
+
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Query the evaluations belonging to a single job.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>GET</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<id>/evaluations`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    [
+    {
+        "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
+        "Priority": 50,
+        "Type": "service",
+        "TriggeredBy": "job-register",
+        "JobID": "binstore-storagelocker",
+        "JobModifyIndex": 14,
+        "NodeID": "",
+        "NodeModifyIndex": 0,
+        "Status": "complete",
+        "StatusDescription": "",
+        "Wait": 0,
+        "NextEval": "",
+        "PreviousEval": "",
+        "CreateIndex": 15,
+        "ModifyIndex": 17
+    },
+    ...
+    ]
+    ```
+
+  </dd>
+</dl>
+
 ## PUT / POST

 <dl>
@@ -177,6 +281,38 @@ region is used; another region can be specified using the `?region=` query param
  </dd>
 </dl>

+<dl>
+  <dt>Description</dt>
+  <dd>
+    Creates a new evaluation for the given job. This can be used to force
+    run the scheduling logic if necessary.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>PUT or POST</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<ID>/evaluate`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    {
+    "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
+    "EvalCreateIndex": 35,
+    "JobModifyIndex": 34,
+    }
+    ```
+
+  </dd>
+</dl>
+
 ## DELETE

 <dl>
@@ -209,134 +345,3 @@ region is used; another region can be specified using the `?region=` query param

  </dd>
 </dl>
-
-# /v1/job/\<ID\>/allocations
-## GET
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Query the allocations belonging to a single job.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>GET</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<id>/allocations`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    [
-    {
-        "ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
-        "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
-        "Name": "binstore-storagelocker.binsl[0]",
-        "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
-        "JobID": "binstore-storagelocker",
-        "TaskGroup": "binsl",
-        "DesiredStatus": "run",
-        "DesiredDescription": "",
-        "ClientStatus": "running",
-        "ClientDescription": "",
-        "CreateIndex": 16,
-        "ModifyIndex": 16
-    },
-    ...
-    ]
-    ```
-
-  </dd>
-</dl>
-
-# /v1/job/\<ID\>/evaluate
-## PUT / POST
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Creates a new evaluation for the given job. This can be used to force
-    run the scheduling logic if necessary.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>PUT or POST</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<ID>/evaluate`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    {
-    "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
-    "EvalCreateIndex": 35,
-    "JobModifyIndex": 34,
-    }
-    ```
-
-  </dd>
-</dl>
-
-# /v1/job/\<ID\>/evaluations
-## GET
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Query the evaluations belonging to a single job.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>GET</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<id>/evaluations`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    [
-    {
-        "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
-        "Priority": 50,
-        "Type": "service",
-        "TriggeredBy": "job-register",
-        "JobID": "binstore-storagelocker",
-        "JobModifyIndex": 14,
-        "NodeID": "",
-        "NodeModifyIndex": 0,
-        "Status": "complete",
-        "StatusDescription": "",
-        "Wait": 0,
-        "NextEval": "",
-        "PreviousEval": "",
-        "CreateIndex": 15,
-        "ModifyIndex": 17
-    },
-    ...
-    ]
-    ```
-
-  </dd>
-</dl>
-
--- a/website/source/docs/http/jobs.html.md
+++ b/website/source/docs/http/jobs.html.md
@@ -31,6 +31,11 @@ another region can be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -93,4 +98,3 @@ another region can be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
--- a/website/source/docs/http/node.html.md
+++ b/website/source/docs/http/node.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -82,9 +87,6 @@ be specified using the `?region=` query parameter.
  </dd>
 </dl>

-# /v1/node/\<ID\>/allocations
-## GET
-
 <dl>
  <dt>Description</dt>
  <dd>
@@ -102,6 +104,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -128,7 +135,6 @@ be specified using the `?region=` query parameter.
  </dd>
 </dl>

-# /v1/node/\<ID\>/evaluate
 ## PUT / POST

 <dl>
@@ -163,9 +169,6 @@ be specified using the `?region=` query parameter.
  </dd>
 </dl>

-# /v1/node/\<ID\>/drain
-## PUT / POST
-
 <dl>
  <dt>Description</dt>
  <dd>
@@ -175,7 +178,7 @@ be specified using the `?region=` query parameter.
  </dd>

  <dt>Method</dt>
-  <dd>PUT or POSt</dd>
+  <dd>PUT or POST</dd>

  <dt>URL</dt>
  <dd>`/v1/node/<ID>/drain`</dd>
@@ -205,4 +208,3 @@ be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
--- a/website/source/docs/http/nodes.html.md
+++ b/website/source/docs/http/nodes.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
    None
  </dd>

+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
  <dt>Returns</dt>
  <dd>

@@ -53,5 +58,3 @@ be specified using the `?region=` query parameter.

  </dd>
 </dl>
-
-