From ef99e3d16eeb08e8be2b552084137be28031a385 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Tue, 27 Oct 2015 14:36:32 -0700
Subject: [PATCH 01/59] nomad: initial pass at blocking queries for jobs

---
 nomad/job_endpoint.go      | 60 +++++++++++++++++++++-----------------
 nomad/job_endpoint_test.go | 51 ++++++++++++++++++++++++++++++++
 nomad/rpc.go               |  7 +++++
 nomad/state/state_store.go | 17 +++++++++++
 4 files changed, 109 insertions(+), 26 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 63d31eb3c..cfb92bc24 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -216,35 +216,43 @@ func (j *Job) List(args *structs.JobListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now())
 
-	// Capture all the jobs
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Jobs()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		jobsWatch: true,
+		run: func() error {
+			// Capture all the jobs
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Jobs()
+			if err != nil {
+				return err
+			}
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		job := raw.(*structs.Job)
-		reply.Jobs = append(reply.Jobs, job.Stub())
-	}
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				job := raw.(*structs.Job)
+				reply.Jobs = append(reply.Jobs, job.Stub())
+			}
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("jobs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("jobs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return j.srv.blockingRPC(&opts)
 }
 
 // Allocations is used to list the allocations for a job
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index e43ed3ba2..e922f31c3 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -3,6 +3,7 @@ package nomad
 import (
 	"reflect"
 	"testing"
+	"time"
 
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/mock"
@@ -397,6 +398,56 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 	}
 }
 
+func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the job
+	job := mock.Job()
+
+	go func() {
+		// Wait a bit
+		time.Sleep(100 * time.Millisecond)
+
+		// Send the register request
+		state := s1.fsm.State()
+		err := state.UpsertJob(2, job)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}()
+
+	// Lookup the jobs. Should block until the index is reached.
+	get := &structs.JobListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.JobListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check that we blocked
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Jobs) != 1 {
+		t.Fatalf("bad: %#v", resp.Jobs)
+	}
+	if resp.Jobs[0].ID != job.ID {
+		t.Fatalf("bad: %#v", resp.Jobs[0])
+	}
+}
+
 func TestJobEndpoint_Allocations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index 074dec0d6..dff77eafa 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -271,6 +271,7 @@ type blockingOptions struct {
 	queryOpts  *structs.QueryOptions
 	queryMeta  *structs.QueryMeta
 	allocWatch string
+	jobsWatch  bool
 	run        func() error
 }
 
@@ -309,6 +310,9 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 		if opts.allocWatch != "" {
 			state.StopWatchAllocs(opts.allocWatch, notifyCh)
 		}
+		if opts.jobsWatch {
+			state.StopWatchJobs(notifyCh)
+		}
 	}()
 
 REGISTER_NOTIFY:
@@ -317,6 +321,9 @@ REGISTER_NOTIFY:
 	if opts.allocWatch != "" {
 		state.WatchAllocs(opts.allocWatch, notifyCh)
 	}
+	if opts.jobsWatch {
+		state.WatchJobs(notifyCh)
+	}
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 22487234b..a24fe9195 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -58,8 +58,12 @@ type IndexEntry struct {
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {
+	// Allocation watches by node
 	allocs    map[string]*NotifyGroup
 	allocLock sync.Mutex
+
+	// Full table job watches
+	jobs *NotifyGroup
 }
 
 // NewStateStore is used to create a new state store
@@ -73,6 +77,7 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the watch entry
 	watch := &stateWatch{
 		allocs: make(map[string]*NotifyGroup),
+		jobs:   &NotifyGroup{},
 	}
 
 	// Create the state store
@@ -155,6 +160,16 @@ func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
 	}
 }
 
+// WatchJobs is used to start watching the jobs view for changes.
+func (s *StateStore) WatchJobs(notify chan struct{}) {
+	s.watch.jobs.Wait(notify)
+}
+
+// StopWatchJobs is used to cancel notification on the given channel.
+func (s *StateStore) StopWatchJobs(notify chan struct{}) {
+	s.watch.jobs.Clear(notify)
+}
+
 // UpsertNode is used to register a node or update a node definition
 // This is assumed to be triggered by the client, so we retain the value
 // of drain which is set by the scheduler.
@@ -342,6 +357,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.jobs.Notify() })
 	txn.Commit()
 	return nil
 }
@@ -368,6 +384,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.jobs.Notify() })
 	txn.Commit()
 	return nil
 }

From 750be3892c66c84d00539f585729220cd53a09b0 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Tue, 27 Oct 2015 15:52:40 -0700
Subject: [PATCH 02/59] nomad: allow blocking on empty data views

---
 nomad/rpc.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/rpc.go b/nomad/rpc.go
index dff77eafa..a6b6595f3 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -334,7 +334,7 @@ RUN_QUERY:
 	err := opts.run()
 
 	// Check for minimum query time
-	if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
+	if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
 		select {
 		case <-notifyCh:
 			goto REGISTER_NOTIFY

From 1012a3e5ac9468b51cb5efe0c136dcf572b99572 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 11:13:30 -0700
Subject: [PATCH 03/59] nomad: use a generic full-table watcher

---
 nomad/job_endpoint.go      |  6 ++--
 nomad/rpc.go               | 18 ++++-------
 nomad/state/state_store.go | 65 +++++++++++++++++++++++++++++++-------
 3 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index cfb92bc24..8960a2e9d 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -218,9 +218,9 @@ func (j *Job) List(args *structs.JobListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts: &args.QueryOptions,
-		queryMeta: &reply.QueryMeta,
-		jobsWatch: true,
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"jobs"},
 		run: func() error {
 			// Capture all the jobs
 			snap, err := j.srv.fsm.State().Snapshot()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index a6b6595f3..dcb120cc8 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -268,11 +268,11 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
 
 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts  *structs.QueryOptions
-	queryMeta  *structs.QueryMeta
-	allocWatch string
-	jobsWatch  bool
-	run        func() error
+	queryOpts   *structs.QueryOptions
+	queryMeta   *structs.QueryMeta
+	allocWatch  string
+	watchTables []string
+	run         func() error
 }
 
 // blockingRPC is used for queries that need to wait for a
@@ -310,9 +310,7 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 		if opts.allocWatch != "" {
 			state.StopWatchAllocs(opts.allocWatch, notifyCh)
 		}
-		if opts.jobsWatch {
-			state.StopWatchJobs(notifyCh)
-		}
+		state.StopWatchTables(notifyCh, opts.watchTables...)
 	}()
 
 REGISTER_NOTIFY:
@@ -321,9 +319,7 @@ REGISTER_NOTIFY:
 	if opts.allocWatch != "" {
 		state.WatchAllocs(opts.allocWatch, notifyCh)
 	}
-	if opts.jobsWatch {
-		state.WatchJobs(notifyCh)
-	}
+	state.WatchTables(notifyCh, opts.watchTables...)
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index a24fe9195..ac16b2ead 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -62,8 +62,47 @@ type stateWatch struct {
 	allocs    map[string]*NotifyGroup
 	allocLock sync.Mutex
 
-	// Full table job watches
-	jobs *NotifyGroup
+	// Full table watches
+	tables    map[string]*NotifyGroup
+	tableLock sync.Mutex
+}
+
+// watchTable is used to subscribe a channel to a full table watch.
+func (w *stateWatch) watchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	tw, ok := w.tables[table]
+	if !ok {
+		tw = new(NotifyGroup)
+		w.tables[table] = tw
+	}
+	tw.Wait(ch)
+}
+
+// stopWatchTable is used to unsubscribe a channel from a table watch.
+func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	if tw, ok := w.tables[table]; ok {
+		tw.Clear(ch)
+		if tw.Empty() {
+			delete(w.tables, table)
+		}
+	}
+}
+
+// notifyTables is used to notify watchers of the given tables.
+func (w *stateWatch) notifyTables(tables ...string) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	for _, table := range tables {
+		if tw, ok := w.tables[table]; ok {
+			tw.Notify()
+		}
+	}
 }
 
 // NewStateStore is used to create a new state store
@@ -77,7 +116,7 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the watch entry
 	watch := &stateWatch{
 		allocs: make(map[string]*NotifyGroup),
-		jobs:   &NotifyGroup{},
+		tables: make(map[string]*NotifyGroup),
 	}
 
 	// Create the state store
@@ -160,14 +199,18 @@ func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
 	}
 }
 
-// WatchJobs is used to start watching the jobs view for changes.
-func (s *StateStore) WatchJobs(notify chan struct{}) {
-	s.watch.jobs.Wait(notify)
+// WatchTables is used to subscribe a channel to a set of tables.
+func (s *StateStore) WatchTables(notify chan struct{}, tables ...string) {
+	for _, table := range tables {
+		s.watch.watchTable(table, notify)
+	}
 }
 
-// StopWatchJobs is used to cancel notification on the given channel.
-func (s *StateStore) StopWatchJobs(notify chan struct{}) {
-	s.watch.jobs.Clear(notify)
+// StopWatchTables is used to unsubscribe a channel from table watches.
+func (s *StateStore) StopWatchTables(notify chan struct{}, tables ...string) {
+	for _, table := range tables {
+		s.watch.stopWatchTable(table, notify)
+	}
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -357,7 +400,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.jobs.Notify() })
+	txn.Defer(func() { s.watch.notifyTables("jobs") })
 	txn.Commit()
 	return nil
 }
@@ -384,7 +427,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.jobs.Notify() })
+	txn.Defer(func() { s.watch.notifyTables("jobs") })
 	txn.Commit()
 	return nil
 }

From 75af87c2d4129425963a9963ae20a88bd0ce35d8 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 11:21:39 -0700
Subject: [PATCH 04/59] nomad: support blocking queries on nodes

---
 nomad/node_endpoint.go      | 61 +++++++++++++++++++++----------------
 nomad/node_endpoint_test.go | 50 ++++++++++++++++++++++++++++++
 nomad/state/state_store.go  |  4 +++
 3 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 9ce14aadd..715b6a58d 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -404,35 +404,44 @@ func (n *Node) List(args *structs.NodeListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
 
-	// Capture all the nodes
-	snap, err := n.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Nodes()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"nodes"},
+		run: func() error {
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		node := raw.(*structs.Node)
-		reply.Nodes = append(reply.Nodes, node.Stub())
-	}
+			// Capture all the nodes
+			snap, err := n.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Nodes()
+			if err != nil {
+				return err
+			}
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("nodes")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				node := raw.(*structs.Node)
+				reply.Nodes = append(reply.Nodes, node.Stub())
+			}
 
-	// Set the query response
-	n.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("nodes")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
+
+			// Set the query response
+			n.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return n.srv.blockingRPC(&opts)
 }
 
 // createNodeEvals is used to create evaluations for each alloc on a node.
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 62f4a4959..c1a312d48 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -752,3 +752,53 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 		t.Fatalf("bad: %#v", resp2.Nodes[0])
 	}
 }
+
+func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the node
+	node := mock.Node()
+
+	go func() {
+		// Wait a bit
+		time.Sleep(100 * time.Millisecond)
+
+		// Send the register request
+		state := s1.fsm.State()
+		err := state.UpsertNode(2, node)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}()
+
+	// List the nodes. Should block until the index is reached.
+	get := &structs.NodeListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.NodeListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check that we blocked
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Nodes) != 1 {
+		t.Fatalf("bad: %#v", resp.Nodes)
+	}
+	if resp.Nodes[0].ID != node.ID {
+		t.Fatalf("bad: %#v", resp.Nodes[0])
+	}
+}
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index ac16b2ead..a244bb71a 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -245,6 +245,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }
@@ -271,6 +272,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }
@@ -306,6 +308,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }
@@ -341,6 +344,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }

From 417b76a1ac5eba32fa21b4731f49a780c155fc66 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 12:29:06 -0700
Subject: [PATCH 05/59] nomad: test all node watch triggers

---
 nomad/node_endpoint.go      |  5 +-
 nomad/node_endpoint_test.go | 96 ++++++++++++++++++++++++++++++-------
 2 files changed, 82 insertions(+), 19 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 715b6a58d..23c50de57 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -410,7 +410,6 @@ func (n *Node) List(args *structs.NodeListRequest,
 		queryMeta:   &reply.QueryMeta,
 		watchTables: []string{"nodes"},
 		run: func() error {
-
 			// Capture all the nodes
 			snap, err := n.srv.fsm.State().Snapshot()
 			if err != nil {
@@ -421,14 +420,16 @@ func (n *Node) List(args *structs.NodeListRequest,
 				return err
 			}
 
+			var nodes []*structs.NodeListStub
 			for {
 				raw := iter.Next()
 				if raw == nil {
 					break
 				}
 				node := raw.(*structs.Node)
-				reply.Nodes = append(reply.Nodes, node.Stub())
+				nodes = append(nodes, node.Stub())
 			}
+			reply.Nodes = nodes
 
 			// Use the last index that affected the jobs table
 			index, err := snap.Index("nodes")
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index c1a312d48..91ae5d4fc 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -756,26 +756,21 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
+	state := s1.fsm.State()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
 
 	// Create the node
 	node := mock.Node()
 
-	go func() {
-		// Wait a bit
-		time.Sleep(100 * time.Millisecond)
-
-		// Send the register request
-		state := s1.fsm.State()
-		err := state.UpsertNode(2, node)
-		if err != nil {
+	// Node upsert triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertNode(2, node); err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})
 
-	// List the nodes. Should block until the index is reached.
-	get := &structs.NodeListRequest{
+	req := &structs.NodeListRequest{
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
 			MinQueryIndex: 1,
@@ -783,22 +778,89 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 	start := time.Now()
 	var resp structs.NodeListResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Node.List", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Check that we blocked
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-
 	if resp.Index != 2 {
 		t.Fatalf("Bad index: %d %d", resp.Index, 2)
 	}
-	if len(resp.Nodes) != 1 {
+	if len(resp.Nodes) != 1 || resp.Nodes[0].ID != node.ID {
 		t.Fatalf("bad: %#v", resp.Nodes)
 	}
-	if resp.Nodes[0].ID != node.ID {
-		t.Fatalf("bad: %#v", resp.Nodes[0])
+
+	// Node drain updates trigger watches.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateNodeDrain(3, node.ID, true); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	var resp2 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Nodes) != 1 || !resp2.Nodes[0].Drain {
+		t.Fatalf("bad: %#v", resp2.Nodes)
+	}
+
+	// Node status update triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateNodeStatus(4, node.ID, structs.NodeStatusDown); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 3
+	var resp3 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp3.Index != 4 {
+		t.Fatalf("Bad index: %d %d", resp3.Index, 4)
+	}
+	if len(resp3.Nodes) != 1 || resp3.Nodes[0].Status != structs.NodeStatusDown {
+		t.Fatalf("bad: %#v", resp3.Nodes)
+	}
+
+	// Node delete triggers watches.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteNode(5, node.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 4
+	var resp4 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp4); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp4.Index != 5 {
+		t.Fatalf("Bad index: %d %d", resp4.Index, 5)
+	}
+	if len(resp4.Nodes) != 0 {
+		t.Fatalf("bad: %#v", resp4.Nodes)
 	}
 }

From 49a2bef922c4c6883aacd2acb6c76225bf9a8256 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 12:43:00 -0700
Subject: [PATCH 06/59] nomad: job watches return correct response, add tests

---
 nomad/job_endpoint.go      |  4 +++-
 nomad/job_endpoint_test.go | 48 ++++++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 8960a2e9d..ca6d59e1a 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -232,14 +232,16 @@ func (j *Job) List(args *structs.JobListRequest,
 				return err
 			}
 
+			var jobs []*structs.JobListStub
 			for {
 				raw := iter.Next()
 				if raw == nil {
 					break
 				}
 				job := raw.(*structs.Job)
-				reply.Jobs = append(reply.Jobs, job.Stub())
+				jobs = append(jobs, job.Stub())
 			}
+			reply.Jobs = jobs
 
 			// Use the last index that affected the jobs table
 			index, err := snap.Index("jobs")
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index e922f31c3..8a9e5a1ee 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -401,26 +401,21 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
+	state := s1.fsm.State()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
 
 	// Create the job
 	job := mock.Job()
 
-	go func() {
-		// Wait a bit
-		time.Sleep(100 * time.Millisecond)
-
-		// Send the register request
-		state := s1.fsm.State()
-		err := state.UpsertJob(2, job)
-		if err != nil {
+	// Upsert job triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertJob(2, job); err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})
 
-	// Lookup the jobs. Should block until the index is reached.
-	get := &structs.JobListRequest{
+	req := &structs.JobListRequest{
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
 			MinQueryIndex: 1,
@@ -428,23 +423,42 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	}
 	start := time.Now()
 	var resp structs.JobListResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Job.List", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Check that we blocked
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-
 	if resp.Index != 2 {
 		t.Fatalf("Bad index: %d %d", resp.Index, 2)
 	}
-	if len(resp.Jobs) != 1 {
+	if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID {
 		t.Fatalf("bad: %#v", resp.Jobs)
 	}
-	if resp.Jobs[0].ID != job.ID {
-		t.Fatalf("bad: %#v", resp.Jobs[0])
+
+	// Job deletion triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteJob(3, job.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.JobListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Jobs) != 0 {
+		t.Fatalf("bad: %#v", resp2.Jobs)
 	}
 }
 

From dc7cbcc3f0fe8556c021f48cf36db540812b13b4 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 28 Oct 2015 16:23:33 -0700
Subject: [PATCH 07/59] Refactor spawn-daemon so it can be used by all OSes and
 make it write exit code to a file

---
 command/spawn_daemon.go           | 203 +++++++++++++++++++++++++++---
 command/spawn_daemon_darwin.go    |   4 +
 command/spawn_daemon_linux.go     | 121 ++----------------
 command/spawn_daemon_test.go      |  48 +++++++
 command/spawn_daemon_universal.go |   9 --
 command/spawn_daemon_unix.go      |  16 +++
 command/spawn_daemon_windows.go   |   7 ++
 command/test-resources/exiter.py  |   3 +
 8 files changed, 276 insertions(+), 135 deletions(-)
 create mode 100644 command/spawn_daemon_darwin.go
 create mode 100644 command/spawn_daemon_test.go
 delete mode 100644 command/spawn_daemon_universal.go
 create mode 100644 command/spawn_daemon_unix.go
 create mode 100644 command/spawn_daemon_windows.go
 create mode 100644 command/test-resources/exiter.py

diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index 3ca825d41..ea7868be4 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -2,19 +2,19 @@ package command
 
 import (
 	"encoding/json"
+	"fmt"
+	"io"
 	"os"
+	"os/exec"
+	"strconv"
 	"strings"
+	"syscall"
 )
 
 type SpawnDaemonCommand struct {
 	Meta
-}
-
-// Status of executing the user's command.
-type SpawnStartStatus struct {
-	// ErrorMsg will be empty if the user command was started successfully.
-	// Otherwise it will have an error message.
-	ErrorMsg string
+	config   *DaemonConfig
+	exitFile io.WriteCloser
 }
 
 func (c *SpawnDaemonCommand) Help() string {
@@ -23,15 +23,15 @@ Usage: nomad spawn-daemon [options] <daemon_config>
 
   INTERNAL ONLY
 
-  Spawns a daemon process optionally inside a cgroup. The required daemon_config is a json
-  encoding of the DaemonConfig struct containing the isolation configuration and command to run.
-  SpawnStartStatus is json serialized to Stdout upon running the user command or if any error
-  prevents its execution. If there is no error, the process waits on the users
-  command and then json serializes  SpawnExitStatus to Stdout after its termination.
-
-General Options:
-
-  ` + generalOptionsUsage()
+  Spawns a daemon process by double forking. The required daemon_config is a
+  json encoding of the DaemonConfig struct containing the isolation
+  configuration and command to run. SpawnStartStatus is json serialized to
+  stdout upon running the user command or if any error prevents its execution.
+  If there is no error, the process waits on the users command. Once the user
+  command exits, the exit code is written to a file specified in the
+  daemon_config and this process exits with the same exit status as the user
+  command.
+  `
 
 	return strings.TrimSpace(helpText)
 }
@@ -40,6 +40,147 @@ func (c *SpawnDaemonCommand) Synopsis() string {
 	return "Spawn a daemon command with configurable isolation."
 }
 
+// Status of executing the user's command.
+type SpawnStartStatus struct {
+	// The PID of the user's command.
+	UserPID int
+
+	// ErrorMsg will be empty if the user command was started successfully.
+	// Otherwise it will have an error message.
+	ErrorMsg string
+}
+
+// Exit status of the user's command.
+type SpawnExitStatus struct {
+	// The exit code of the user's command.
+	ExitCode int
+}
+
+// Configuration for the command to start as a daemon.
+type DaemonConfig struct {
+	exec.Cmd
+
+	// The filepath to write the exit status to.
+	ExitStatusFile string
+
+	// The paths, if not /dev/null, must be either in the tasks root directory
+	// or in the shared alloc directory.
+	StdoutFile string
+	StdinFile  string
+	StderrFile string
+
+	// An optional path specifying the directory to chroot the process in.
+	Chroot string
+}
+
+// Whether to start the user command or abort.
+type TaskStart bool
+
+// parseConfig reads the DaemonConfig from the passed arguments. If not
+// successful, an error is returned.
+func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) {
+	flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
+	flags.Usage = func() { c.Ui.Output(c.Help()) }
+	if err := flags.Parse(args); err != nil {
+		return nil, fmt.Errorf("failed to parse args: %v", err)
+	}
+
+	// Check that we got json input.
+	args = flags.Args()
+	if len(args) != 1 {
+		return nil, fmt.Errorf("incorrect number of args; got %v; want 1", len(args))
+	}
+	jsonInput, err := strconv.Unquote(args[0])
+	if err != nil {
+		return nil, fmt.Errorf("Failed to unquote json input: %v", err)
+	}
+
+	// De-serialize the passed command.
+	var config DaemonConfig
+	dec := json.NewDecoder(strings.NewReader(jsonInput))
+	if err := dec.Decode(&config); err != nil {
+		return nil, err
+	}
+
+	return &config, nil
+}
+
+// configureLogs creates the log files and redirects the process
+// stdin/stderr/stdout to them. If unsuccessful, an error is returned.
+func (c *SpawnDaemonCommand) configureLogs() error {
+	stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+	}
+
+	stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	}
+
+	stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
+	if err != nil {
+		return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+	}
+
+	c.config.Cmd.Stdout = stdo
+	c.config.Cmd.Stderr = stde
+	c.config.Cmd.Stdin = stdi
+	return nil
+}
+
+func (c *SpawnDaemonCommand) Run(args []string) int {
+	var err error
+	c.config, err = c.parseConfig(args)
+	if err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Open the file we will be using to write exit codes to. We do this early
+	// to ensure that we don't start the user process when we can't capture its
+	// exit status.
+	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1)
+	}
+
+	// Isolate the user process.
+	if err := c.isolateCmd(); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Redirect logs.
+	if err := c.configureLogs(); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Chroot jail the process and set its working directory.
+	c.configureChroot()
+
+	// Wait to get the start command.
+	var start TaskStart
+	dec := json.NewDecoder(os.Stdin)
+	if err := dec.Decode(&start); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Aborted by Nomad process.
+	if !start {
+		return 0
+	}
+
+	// Spawn the user process.
+	if err := c.config.Cmd.Start(); err != nil {
+		return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
+	}
+
+	// Indicate that the command was started successfully.
+	c.outputStartStatus(nil, 0)
+
+	// Wait and then output the exit status.
+	return c.writeExitStatus(c.config.Cmd.Wait())
+}
+
 // outputStartStatus is a helper function that outputs a SpawnStartStatus to
 // Stdout with the passed error, which may be nil to indicate no error. It
 // returns the passed status.
@@ -51,6 +192,36 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}
 
+	if c.config != nil && c.config.Process == nil {
+		startStatus.UserPID = c.config.Process.Pid
+	}
+
 	enc.Encode(startStatus)
 	return status
 }
+
+// writeExitStatus takes in the error result from calling wait and writes out
+// the exit status to a file. It returns the same exit status as the user
+// command.
+func (c *SpawnDaemonCommand) writeExitStatus(exit error) int {
+	// Parse the exit code.
+	exitStatus := &SpawnExitStatus{}
+	if exit != nil {
+		// Default to exit code 1 if we can not get the actual exit code.
+		exitStatus.ExitCode = 1
+
+		if exiterr, ok := exit.(*exec.ExitError); ok {
+			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+				exitStatus.ExitCode = status.ExitStatus()
+			}
+		}
+	}
+
+	if c.exitFile != nil {
+		enc := json.NewEncoder(c.exitFile)
+		enc.Encode(exitStatus)
+		c.exitFile.Close()
+	}
+
+	return exitStatus.ExitCode
+}
diff --git a/command/spawn_daemon_darwin.go b/command/spawn_daemon_darwin.go
new file mode 100644
index 000000000..f3fe8484a
--- /dev/null
+++ b/command/spawn_daemon_darwin.go
@@ -0,0 +1,4 @@
+package command
+
+// No chroot on darwin.
+func (c *SpawnDaemonCommand) configureChroot() {}
diff --git a/command/spawn_daemon_linux.go b/command/spawn_daemon_linux.go
index 3e9ceaa3e..512ec645f 100644
--- a/command/spawn_daemon_linux.go
+++ b/command/spawn_daemon_linux.go
@@ -1,115 +1,16 @@
 package command
 
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"strconv"
-	"strings"
-	"syscall"
-)
+import "syscall"
 
-// Configuration for the command to start as a daemon.
-type DaemonConfig struct {
-	exec.Cmd
+// configureChroot enters the user command into a chroot if specified in the
+// config and on an OS that supports Chroots.
+func (c *SpawnDaemonCommand) configureChroot() {
+	if len(c.config.Chroot) != 0 {
+		if c.config.Cmd.SysProcAttr == nil {
+			c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
+		}
 
-	// The paths, if not /dev/null, must be either in the tasks root directory
-	// or in the shared alloc directory.
-	StdoutFile string
-	StdinFile  string
-	StderrFile string
-
-	Chroot string
-}
-
-// Whether to start the user command or abort.
-type TaskStart bool
-
-func (c *SpawnDaemonCommand) Run(args []string) int {
-	flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
-	flags.Usage = func() { c.Ui.Output(c.Help()) }
-
-	if err := flags.Parse(args); err != nil {
-		return 1
-	}
-
-	// Check that we got json input.
-	args = flags.Args()
-	if len(args) != 1 {
-		c.Ui.Error(c.Help())
-		return 1
-	}
-	jsonInput, err := strconv.Unquote(args[0])
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Failed to unquote json input: %v", err), 1)
-	}
-
-	// De-serialize the passed command.
-	var cmd DaemonConfig
-	dec := json.NewDecoder(strings.NewReader(jsonInput))
-	if err := dec.Decode(&cmd); err != nil {
-		return c.outputStartStatus(err, 1)
-	}
-
-	// Isolate the user process.
-	if _, err := syscall.Setsid(); err != nil {
-		return c.outputStartStatus(fmt.Errorf("Failed setting sid: %v", err), 1)
-	}
-
-	syscall.Umask(0)
-
-	// Redirect logs.
-	stdo, err := os.OpenFile(cmd.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdout: %v", err), 1)
-	}
-
-	stde, err := os.OpenFile(cmd.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stderr: %v", err), 1)
-	}
-
-	stdi, err := os.OpenFile(cmd.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdin: %v", err), 1)
-	}
-
-	cmd.Cmd.Stdout = stdo
-	cmd.Cmd.Stderr = stde
-	cmd.Cmd.Stdin = stdi
-
-	// Chroot jail the process and set its working directory.
-	if cmd.Cmd.SysProcAttr == nil {
-		cmd.Cmd.SysProcAttr = &syscall.SysProcAttr{}
-	}
-
-	cmd.Cmd.SysProcAttr.Chroot = cmd.Chroot
-	cmd.Cmd.Dir = "/"
-
-	// Wait to get the start command.
-	var start TaskStart
-	dec = json.NewDecoder(os.Stdin)
-	if err := dec.Decode(&start); err != nil {
-		return c.outputStartStatus(err, 1)
-	}
-
-	if !start {
-		return 0
-	}
-
-	// Spawn the user process.
-	if err := cmd.Cmd.Start(); err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
-	}
-
-	// Indicate that the command was started successfully.
-	c.outputStartStatus(nil, 0)
-
-	// Wait and then output the exit status.
-	if err := cmd.Wait(); err != nil {
-		return 1
-	}
-
-	return 0
+		c.config.Cmd.SysProcAttr.Chroot = c.config.Chroot
+		c.config.Cmd.Dir = "/"
+	}
 }
diff --git a/command/spawn_daemon_test.go b/command/spawn_daemon_test.go
new file mode 100644
index 000000000..5bfd6ad5a
--- /dev/null
+++ b/command/spawn_daemon_test.go
@@ -0,0 +1,48 @@
+package command
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os/exec"
+	"testing"
+)
+
+type nopCloser struct {
+	io.ReadWriter
+}
+
+func (n *nopCloser) Close() error {
+	return nil
+}
+
+func TestSpawnDaemon_WriteExitStatus(t *testing.T) {
+	// Check if there is python.
+	path, err := exec.LookPath("python")
+	if err != nil {
+		t.Skip("python not detected")
+	}
+
+	var b bytes.Buffer
+	daemon := &SpawnDaemonCommand{exitFile: &nopCloser{&b}}
+
+	code := 3
+	cmd := exec.Command(path, "./test-resources/exiter.py", fmt.Sprintf("%d", code))
+	err = cmd.Run()
+	actual := daemon.writeExitStatus(err)
+	if actual != code {
+		t.Fatalf("writeExitStatus(%v) returned %v; want %v", err, actual, code)
+	}
+
+	// De-serialize the passed command.
+	var exitStatus SpawnExitStatus
+	dec := json.NewDecoder(&b)
+	if err := dec.Decode(&exitStatus); err != nil {
+		t.Fatalf("failed to decode exit status: %v", err)
+	}
+
+	if exitStatus.ExitCode != code {
+		t.Fatalf("writeExitStatus(%v) wrote exit status %v; want %v", err, exitStatus.ExitCode, code)
+	}
+}
diff --git a/command/spawn_daemon_universal.go b/command/spawn_daemon_universal.go
deleted file mode 100644
index 5083af5f3..000000000
--- a/command/spawn_daemon_universal.go
+++ /dev/null
@@ -1,9 +0,0 @@
-// +build !linux
-
-package command
-
-import "errors"
-
-func (c *SpawnDaemonCommand) Run(args []string) int {
-	return c.outputStartStatus(errors.New("spawn-daemon not supported"), 1)
-}
diff --git a/command/spawn_daemon_unix.go b/command/spawn_daemon_unix.go
new file mode 100644
index 000000000..981e52596
--- /dev/null
+++ b/command/spawn_daemon_unix.go
@@ -0,0 +1,16 @@
+// +build !windows
+
+package command
+
+import "syscall"
+
+// isolateCmd sets the session id for the process and the umask.
+func (c *SpawnDaemonCommand) isolateCmd() error {
+	if c.config.Cmd.SysProcAttr == nil {
+		c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+
+	c.config.Cmd.SysProcAttr.Setsid = true
+	syscall.Umask(0)
+	return nil
+}
diff --git a/command/spawn_daemon_windows.go b/command/spawn_daemon_windows.go
new file mode 100644
index 000000000..bb2d63ed8
--- /dev/null
+++ b/command/spawn_daemon_windows.go
@@ -0,0 +1,7 @@
+// build !linux !darwin
+
+package command
+
+// No isolation on Windows.
+func (c *SpawnDaemonCommand) isolateCmd() error { return nil }
+func (c *SpawnDaemonCommand) configureChroot()  {}
diff --git a/command/test-resources/exiter.py b/command/test-resources/exiter.py
new file mode 100644
index 000000000..90e66b98c
--- /dev/null
+++ b/command/test-resources/exiter.py
@@ -0,0 +1,3 @@
+import sys
+
+sys.exit(int(sys.argv[1]))

From ecdc1c92b6bc96ac02be423a11e3da1776c720d3 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 28 Oct 2015 17:22:04 -0700
Subject: [PATCH 08/59] Exec driver only applies on linux as root

---
 client/driver/exec.go                |  13 ++--
 client/driver/exec_test.go           |  17 +----
 client/executor/exec_universal.go    | 109 +++------------------------
 client/testutil/driver_compatible.go |   4 +-
 4 files changed, 23 insertions(+), 120 deletions(-)

diff --git a/client/driver/exec.go b/client/driver/exec.go
index 0324cad68..cbcb85a0a 100644
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -35,8 +35,11 @@ func NewExecDriver(ctx *DriverContext) Driver {
 }
 
 func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
-	// Only enable if we are root when running on non-windows systems.
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
+	// Only enable if we are root on linux.
+	if runtime.GOOS != "linux" {
+		d.logger.Printf("[DEBUG] driver.exec: only available on linux, disabling")
+		return false, nil
+	} else if syscall.Geteuid() != 0 {
 		d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
 		return false, nil
 	}
@@ -73,10 +76,8 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		}
 
 		// Add execution permissions to the newly downloaded artifact
-		if runtime.GOOS != "windows" {
-			if err := syscall.Chmod(artifactFile, 0755); err != nil {
-				log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err)
-			}
+		if err := syscall.Chmod(artifactFile, 0755); err != nil {
+			log.Printf("[ERR] driver.exec: Error making artifact executable: %s", err)
 		}
 	}
 
diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go
index ba8745176..488847c5c 100644
--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -5,7 +5,6 @@ import (
 	"io/ioutil"
 	"path/filepath"
 	"reflect"
-	"runtime"
 	"testing"
 	"time"
 
@@ -123,13 +122,7 @@ func TestExecDriver_Start_Wait(t *testing.T) {
 
 func TestExecDriver_Start_Artifact_basic(t *testing.T) {
 	ctestutils.ExecCompatible(t)
-	var file string
-	switch runtime.GOOS {
-	case "darwin":
-		file = "hi_darwin_amd64"
-	default:
-		file = "hi_linux_amd64"
-	}
+	file := "hi_linux_amd64"
 
 	task := &structs.Task{
 		Name: "sleep",
@@ -172,13 +165,7 @@ func TestExecDriver_Start_Artifact_basic(t *testing.T) {
 
 func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
 	ctestutils.ExecCompatible(t)
-	var file string
-	switch runtime.GOOS {
-	case "darwin":
-		file = "hi_darwin_amd64"
-	default:
-		file = "hi_linux_amd64"
-	}
+	file := "hi_linux_amd64"
 
 	task := &structs.Task{
 		Name: "sleep",
diff --git a/client/executor/exec_universal.go b/client/executor/exec_universal.go
index 6b1977d10..4979ae3b7 100644
--- a/client/executor/exec_universal.go
+++ b/client/executor/exec_universal.go
@@ -3,105 +3,20 @@
 package executor
 
 import (
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-
 	"github.com/hashicorp/nomad/client/allocdir"
-	"github.com/hashicorp/nomad/client/driver/args"
-	"github.com/hashicorp/nomad/client/driver/environment"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-func NewExecutor() Executor {
-	return &UniversalExecutor{}
-}
+// UniversalExecutor exists to make the exec driver compile on all operating systems.
+type UniversalExecutor struct{}
 
-// UniversalExecutor should work everywhere, and as a result does not include
-// any resource restrictions or runas capabilities.
-type UniversalExecutor struct {
-	cmd
-}
-
-func (e *UniversalExecutor) Limit(resources *structs.Resources) error {
-	if resources == nil {
-		return errNoResources
-	}
-	return nil
-}
-
-func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
-	taskDir, ok := alloc.TaskDirs[taskName]
-	if !ok {
-		return fmt.Errorf("Error finding task dir for (%s)", taskName)
-	}
-	e.Dir = taskDir
-	return nil
-}
-
-func (e *UniversalExecutor) Start() error {
-	// Parse the commands arguments and replace instances of Nomad environment
-	// variables.
-	envVars, err := environment.ParseFromList(e.cmd.Env)
-	if err != nil {
-		return err
-	}
-
-	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
-	if err != nil {
-		return err
-	} else if len(parsedPath) != 1 {
-		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
-	}
-
-	e.cmd.Path = parsedPath[0]
-	combined := strings.Join(e.cmd.Args, " ")
-	parsed, err := args.ParseAndReplace(combined, envVars.Map())
-	if err != nil {
-		return err
-	}
-	e.Cmd.Args = parsed
-
-	// We don't want to call ourself. We want to call Start on our embedded Cmd
-	return e.cmd.Start()
-}
-
-func (e *UniversalExecutor) Open(pid string) error {
-	pidNum, err := strconv.Atoi(pid)
-	if err != nil {
-		return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
-	}
-
-	process, err := os.FindProcess(pidNum)
-	if err != nil {
-		return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err)
-	}
-	e.Process = process
-	return nil
-}
-
-func (e *UniversalExecutor) Wait() error {
-	// We don't want to call ourself. We want to call Start on our embedded Cmd
-	return e.cmd.Wait()
-}
-
-func (e *UniversalExecutor) ID() (string, error) {
-	if e.cmd.Process != nil {
-		return strconv.Itoa(e.cmd.Process.Pid), nil
-	} else {
-		return "", fmt.Errorf("Process has finished or was never started")
-	}
-}
-
-func (e *UniversalExecutor) Shutdown() error {
-	return e.ForceStop()
-}
-
-func (e *UniversalExecutor) ForceStop() error {
-	return e.Process.Kill()
-}
-
-func (e *UniversalExecutor) Command() *cmd {
-	return &e.cmd
-}
+func NewExecutor() Executor                                                    { return &UniversalExecutor{} }
+func (e *UniversalExecutor) Limit(resources *structs.Resources) error          { return nil }
+func (e *UniversalExecutor) ConfigureTaskDir(string, *allocdir.AllocDir) error { return nil }
+func (e *UniversalExecutor) Start() error                                      { return nil }
+func (e *UniversalExecutor) Open(pid string) error                             { return nil }
+func (e *UniversalExecutor) Wait() error                                       { return nil }
+func (e *UniversalExecutor) ID() (string, error)                               { return "", nil }
+func (e *UniversalExecutor) Shutdown() error                                   { return nil }
+func (e *UniversalExecutor) ForceStop() error                                  { return nil }
+func (e *UniversalExecutor) Command() *cmd                                     { return nil }
diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go
index df1d27d11..94ae6225c 100644
--- a/client/testutil/driver_compatible.go
+++ b/client/testutil/driver_compatible.go
@@ -8,8 +8,8 @@ import (
 )
 
 func ExecCompatible(t *testing.T) {
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		t.Skip("Must be root on non-windows environments to run test")
+	if runtime.GOOS != "linux" || syscall.Geteuid() != 0 {
+		t.Skip("Test only available running as root on linux")
 	}
 }
 

From c74a5b8c0a5f276f067c81c0dc540b08d251e264 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 18:11:55 -0700
Subject: [PATCH 09/59] nomad/state: move methods so we can sanely find them

---
 nomad/state/state_store.go | 217 ++++++++++++++++++++-----------------
 1 file changed, 116 insertions(+), 101 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index a244bb71a..31dbd7bac 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -10,6 +10,13 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+// IndexEntry is used with the "index" table
+// for managing the latest Raft index affecting a table.
+type IndexEntry struct {
+	Key   string
+	Value uint64
+}
+
 // The StateStore is responsible for maintaining all the Nomad
 // state. It is manipulated by the FSM which maintains consistency
 // through the use of Raft. The goals of the StateStore are to provide
@@ -23,88 +30,6 @@ type StateStore struct {
 	watch  *stateWatch
 }
 
-// StateSnapshot is used to provide a point-in-time snapshot
-type StateSnapshot struct {
-	StateStore
-}
-
-// StateRestore is used to optimize the performance when
-// restoring state by only using a single large transaction
-// instead of thousands of sub transactions
-type StateRestore struct {
-	txn        *memdb.Txn
-	watch      *stateWatch
-	allocNodes map[string]struct{}
-}
-
-// Abort is used to abort the restore operation
-func (s *StateRestore) Abort() {
-	s.txn.Abort()
-}
-
-// Commit is used to commit the restore operation
-func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notifyAllocs(s.allocNodes) })
-	s.txn.Commit()
-}
-
-// IndexEntry is used with the "index" table
-// for managing the latest Raft index affecting a table.
-type IndexEntry struct {
-	Key   string
-	Value uint64
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	// Allocation watches by node
-	allocs    map[string]*NotifyGroup
-	allocLock sync.Mutex
-
-	// Full table watches
-	tables    map[string]*NotifyGroup
-	tableLock sync.Mutex
-}
-
-// watchTable is used to subscribe a channel to a full table watch.
-func (w *stateWatch) watchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
-
-	tw, ok := w.tables[table]
-	if !ok {
-		tw = new(NotifyGroup)
-		w.tables[table] = tw
-	}
-	tw.Wait(ch)
-}
-
-// stopWatchTable is used to unsubscribe a channel from a table watch.
-func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
-
-	if tw, ok := w.tables[table]; ok {
-		tw.Clear(ch)
-		if tw.Empty() {
-			delete(w.tables, table)
-		}
-	}
-}
-
-// notifyTables is used to notify watchers of the given tables.
-func (w *stateWatch) notifyTables(tables ...string) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
-
-	for _, table := range tables {
-		if tw, ok := w.tables[table]; ok {
-			tw.Notify()
-		}
-	}
-}
-
 // NewStateStore is used to create a new state store
 func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the MemDB
@@ -151,6 +76,7 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 		txn:        txn,
 		watch:      s.watch,
 		allocNodes: make(map[string]struct{}),
+		tables:     make(map[string]struct{}),
 	}
 	return r, nil
 }
@@ -186,19 +112,6 @@ func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) {
 	}
 }
 
-// notifyAllocs is used to notify any node alloc listeners of a change
-func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
-	w.allocLock.Lock()
-	defer w.allocLock.Unlock()
-
-	for node := range nodes {
-		if grp, ok := w.allocs[node]; ok {
-			grp.Notify()
-			delete(w.allocs, node)
-		}
-	}
-}
-
 // WatchTables is used to subscribe a channel to a set of tables.
 func (s *StateStore) WatchTables(notify chan struct{}, tables ...string) {
 	for _, table := range tables {
@@ -245,7 +158,8 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -272,7 +186,8 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -308,7 +223,8 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -344,7 +260,8 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -404,7 +321,8 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("jobs") })
+	tables := map[string]struct{}{"jobs": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -431,7 +349,8 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("jobs") })
+	tables := map[string]struct{}{"jobs": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -817,8 +736,38 @@ func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
 	return iter, nil
 }
 
+// StateSnapshot is used to provide a point-in-time snapshot
+type StateSnapshot struct {
+	StateStore
+}
+
+// StateRestore is used to optimize the performance when
+// restoring state by only using a single large transaction
+// instead of thousands of sub transactions
+type StateRestore struct {
+	txn        *memdb.Txn
+	watch      *stateWatch
+	allocNodes map[string]struct{}
+	tables     map[string]struct{}
+}
+
+// Abort is used to abort the restore operation
+func (s *StateRestore) Abort() {
+	s.txn.Abort()
+}
+
+// Commit is used to commit the restore operation
+func (s *StateRestore) Commit() {
+	s.txn.Defer(func() {
+		s.watch.notifyAllocs(s.allocNodes)
+		s.watch.notifyTables(s.tables)
+	})
+	s.txn.Commit()
+}
+
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
+	r.tables["nodes"] = struct{}{}
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -827,6 +776,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
+	r.tables["jobs"] = struct{}{}
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -835,6 +785,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
+	r.tables["evals"] = struct{}{}
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -843,6 +794,7 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
+	r.tables["allocs"] = struct{}{}
 	r.allocNodes[alloc.NodeID] = struct{}{}
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
@@ -857,3 +809,66 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	// Allocation watches by node
+	allocs    map[string]*NotifyGroup
+	allocLock sync.Mutex
+
+	// Full table watches
+	tables    map[string]*NotifyGroup
+	tableLock sync.Mutex
+}
+
+// watchTable is used to subscribe a channel to a full table watch.
+func (w *stateWatch) watchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	tw, ok := w.tables[table]
+	if !ok {
+		tw = new(NotifyGroup)
+		w.tables[table] = tw
+	}
+	tw.Wait(ch)
+}
+
+// stopWatchTable is used to unsubscribe a channel from a table watch.
+func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	if tw, ok := w.tables[table]; ok {
+		tw.Clear(ch)
+		if tw.Empty() {
+			delete(w.tables, table)
+		}
+	}
+}
+
+// notifyTables is used to notify watchers of the given tables.
+func (w *stateWatch) notifyTables(tables map[string]struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	for table, _ := range tables {
+		if tw, ok := w.tables[table]; ok {
+			tw.Notify()
+		}
+	}
+}
+
+// notifyAllocs is used to notify any node alloc listeners of a change
+func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
+	w.allocLock.Lock()
+	defer w.allocLock.Unlock()
+
+	for node := range nodes {
+		if grp, ok := w.allocs[node]; ok {
+			grp.Notify()
+			delete(w.allocs, node)
+		}
+	}
+}

From e23f547f2383c110498488450afa6009776cc051 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 18:34:56 -0700
Subject: [PATCH 10/59] nomad: support blocking queries on eval list

---
 nomad/eval_endpoint.go      | 62 ++++++++++++++++++++---------------
 nomad/eval_endpoint_test.go | 64 +++++++++++++++++++++++++++++++++++++
 nomad/state/state_store.go  |  9 +++++-
 3 files changed, 108 insertions(+), 27 deletions(-)

diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 0dce98a52..6e8e65054 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -219,35 +219,45 @@ func (e *Eval) List(args *structs.EvalListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())
 
-	// Scan all the evaluations
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Evals()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"evals"},
+		run: func() error {
+			// Scan all the evaluations
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Evals()
+			if err != nil {
+				return err
+			}
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		eval := raw.(*structs.Evaluation)
-		reply.Evaluations = append(reply.Evaluations, eval)
-	}
+			var evals []*structs.Evaluation
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				eval := raw.(*structs.Evaluation)
+				evals = append(evals, eval)
+			}
+			reply.Evaluations = evals
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("evals")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("evals")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
 
 // Allocations is used to list the allocations for an evaluation
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index eb61ea3d0..3b9a62a8e 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -334,6 +334,70 @@ func TestEvalEndpoint_List(t *testing.T) {
 	}
 }
 
+func TestEvalEndpoint_List_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the ieval
+	eval := mock.Eval()
+
+	// Upsert eval triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertEvals(2, []*structs.Evaluation{eval}); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.EvalListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.EvalListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Evaluations) != 1 || resp.Evaluations[0].ID != eval.ID {
+		t.Fatalf("bad: %#v", resp.Evaluations)
+	}
+
+	// Eval deletion triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteEval(3, []string{eval.ID}, nil); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.EvalListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Evaluations) != 0 {
+		t.Fatalf("bad: %#v", resp2.Evaluations)
+	}
+}
+
 func TestEvalEndpoint_Allocations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 31dbd7bac..9a7a33273 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -407,6 +407,8 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 		}
 	}
 
+	tables := map[string]struct{}{"evals": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -478,7 +480,12 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
 		return fmt.Errorf("index update failed: %v", err)
 	}
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+
+	tables := map[string]struct{}{"evals": struct{}{}}
+	txn.Defer(func() {
+		s.watch.notifyAllocs(nodes)
+		s.watch.notifyTables(tables)
+	})
 	txn.Commit()
 	return nil
 }

From b9fb0252007ebf669b46fb67170b9cbd0d239060 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 18:35:48 -0700
Subject: [PATCH 11/59] nomad: fix node test output

---
 nomad/job_endpoint_test.go  | 2 +-
 nomad/node_endpoint_test.go | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 8a9e5a1ee..0591e73bf 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -452,7 +452,7 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
 		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 91ae5d4fc..d06e6ea0f 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -807,7 +807,7 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
 		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
@@ -831,7 +831,7 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp3)
 	}
 	if resp3.Index != 4 {
 		t.Fatalf("Bad index: %d %d", resp3.Index, 4)
@@ -855,7 +855,7 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp4)
 	}
 	if resp4.Index != 5 {
 		t.Fatalf("Bad index: %d %d", resp4.Index, 5)

From b162c259d24ce6336c49477649829aef7b793dc1 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 19:25:39 -0700
Subject: [PATCH 12/59] nomad: support full table watches for allocations

---
 nomad/alloc_endpoint.go      | 62 ++++++++++++++++++--------------
 nomad/alloc_endpoint_test.go | 69 ++++++++++++++++++++++++++++++++++++
 nomad/state/state_store.go   | 12 +++++--
 3 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index 53b630480..09bd28727 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -19,35 +19,45 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 	}
 	defer metrics.MeasureSince([]string{"nomad", "alloc", "list"}, time.Now())
 
-	// Capture all the allocations
-	snap, err := a.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Allocs()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"allocs"},
+		run: func() error {
+			// Capture all the allocations
+			snap, err := a.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Allocs()
+			if err != nil {
+				return err
+			}
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		alloc := raw.(*structs.Allocation)
-		reply.Allocations = append(reply.Allocations, alloc.Stub())
-	}
+			var allocs []*structs.AllocListStub
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				alloc := raw.(*structs.Allocation)
+				allocs = append(allocs, alloc.Stub())
+			}
+			reply.Allocations = allocs
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	a.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			a.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return a.srv.blockingRPC(&opts)
 }
 
 // GetAlloc is used to lookup a particular allocation
diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 8076b64d6..01688da96 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -3,6 +3,7 @@ package nomad
 import (
 	"reflect"
 	"testing"
+	"time"
 
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/mock"
@@ -44,6 +45,74 @@ func TestAllocEndpoint_List(t *testing.T) {
 	}
 }
 
+func TestAllocEndpoint_List_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the alloc
+	alloc := mock.Alloc()
+
+	// Upsert alloc triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertAllocs(2, []*structs.Allocation{alloc}); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.AllocListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.AllocListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+
+	// Client updates trigger watches
+	alloc2 := mock.Alloc()
+	alloc2.ID = alloc.ID
+	alloc2.ClientStatus = structs.AllocClientStatusRunning
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateAllocFromClient(3, alloc2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.AllocListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID ||
+		resp2.Allocations[0].ClientStatus != structs.AllocClientStatusRunning {
+		t.Fatalf("bad: %#v", resp2.Allocations)
+	}
+}
+
 func TestAllocEndpoint_GetAlloc(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 9a7a33273..389991de9 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -580,8 +580,12 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	tables := map[string]struct{}{"allocs": struct{}{}}
 	nodes := map[string]struct{}{alloc.NodeID: struct{}{}}
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+	txn.Defer(func() {
+		s.watch.notifyAllocs(nodes)
+		s.watch.notifyTables(tables)
+	})
 	txn.Commit()
 	return nil
 }
@@ -621,7 +625,11 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+	tables := map[string]struct{}{"allocs": struct{}{}}
+	txn.Defer(func() {
+		s.watch.notifyAllocs(nodes)
+		s.watch.notifyTables(tables)
+	})
 	txn.Commit()
 	return nil
 }

From 2558ab3f31e989a49fe125f098cfb02423134f5b Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 11:57:41 -0700
Subject: [PATCH 13/59] nomad: unify watcher inputs for reusability

---
 nomad/alloc_endpoint.go    |   6 +-
 nomad/eval_endpoint.go     |   6 +-
 nomad/job_endpoint.go      |   6 +-
 nomad/node_endpoint.go     |  12 +--
 nomad/rpc.go               |  22 ++--
 nomad/state/state_store.go | 213 +++++++++++++------------------------
 6 files changed, 100 insertions(+), 165 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index 09bd28727..a2ce6a09a 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -21,9 +21,9 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"allocs"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "allocs",
 		run: func() error {
 			// Capture all the allocations
 			snap, err := a.srv.fsm.State().Snapshot()
diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 6e8e65054..5d87948aa 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -221,9 +221,9 @@ func (e *Eval) List(args *structs.EvalListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"evals"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "evals",
 		run: func() error {
 			// Scan all the evaluations
 			snap, err := e.srv.fsm.State().Snapshot()
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index ca6d59e1a..30bc35563 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -218,9 +218,9 @@ func (j *Job) List(args *structs.JobListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"jobs"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "jobs",
 		run: func() error {
 			// Capture all the jobs
 			snap, err := j.srv.fsm.State().Snapshot()
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 23c50de57..7e7332974 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -330,9 +330,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		allocWatch: args.NodeID,
+		queryOpts:      &args.QueryOptions,
+		queryMeta:      &reply.QueryMeta,
+		watchAllocNode: args.NodeID,
 		run: func() error {
 			// Look for the node
 			snap, err := n.srv.fsm.State().Snapshot()
@@ -406,9 +406,9 @@ func (n *Node) List(args *structs.NodeListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"nodes"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "nodes",
 		run: func() error {
 			// Capture all the nodes
 			snap, err := n.srv.fsm.State().Snapshot()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index dcb120cc8..f1977dbc7 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -268,11 +268,11 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
 
 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts   *structs.QueryOptions
-	queryMeta   *structs.QueryMeta
-	allocWatch  string
-	watchTables []string
-	run         func() error
+	queryOpts      *structs.QueryOptions
+	queryMeta      *structs.QueryMeta
+	watchAllocNode string
+	watchTable     string
+	run            func() error
 }
 
 // blockingRPC is used for queries that need to wait for a
@@ -307,19 +307,15 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 	state = s.fsm.State()
 	defer func() {
 		timeout.Stop()
-		if opts.allocWatch != "" {
-			state.StopWatchAllocs(opts.allocWatch, notifyCh)
-		}
-		state.StopWatchTables(notifyCh, opts.watchTables...)
+		state.StopWatchAllocNode(opts.watchAllocNode, notifyCh)
+		state.StopWatchTable(opts.watchTable, notifyCh)
 	}()
 
 REGISTER_NOTIFY:
 	// Register the notification channel. This may be done
 	// multiple times if we have not reached the target wait index.
-	if opts.allocWatch != "" {
-		state.WatchAllocs(opts.allocWatch, notifyCh)
-	}
-	state.WatchTables(notifyCh, opts.watchTables...)
+	state.WatchAllocNode(opts.watchAllocNode, notifyCh)
+	state.WatchTable(opts.watchTable, notifyCh)
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 389991de9..0a8adc0d0 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -38,17 +38,11 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 		return nil, fmt.Errorf("state store setup failed: %v", err)
 	}
 
-	// Create the watch entry
-	watch := &stateWatch{
-		allocs: make(map[string]*NotifyGroup),
-		tables: make(map[string]*NotifyGroup),
-	}
-
 	// Create the state store
 	s := &StateStore{
 		logger: log.New(logOutput, "", log.LstdFlags),
 		db:     db,
-		watch:  watch,
+		watch:  newStateWatch(),
 	}
 	return s, nil
 }
@@ -73,57 +67,30 @@ func (s *StateStore) Snapshot() (*StateSnapshot, error) {
 func (s *StateStore) Restore() (*StateRestore, error) {
 	txn := s.db.Txn(true)
 	r := &StateRestore{
-		txn:        txn,
-		watch:      s.watch,
-		allocNodes: make(map[string]struct{}),
-		tables:     make(map[string]struct{}),
+		txn:   txn,
+		watch: s.watch,
 	}
 	return r, nil
 }
 
-// WatchAllocs is used to subscribe a channel to changes in allocations for a node
-func (s *StateStore) WatchAllocs(node string, notify chan struct{}) {
-	s.watch.allocLock.Lock()
-	defer s.watch.allocLock.Unlock()
-
-	// Check for an existing notify group
-	if grp, ok := s.watch.allocs[node]; ok {
-		grp.Wait(notify)
-		return
-	}
-
-	// Create new notify group
-	grp := &NotifyGroup{}
-	grp.Wait(notify)
-	s.watch.allocs[node] = grp
+// WatchTable is used to subscribe a channel to a full table watch.
+func (s *StateStore) WatchTable(table string, notify chan struct{}) {
+	s.watch.watch(watchItem{table: table}, notify)
 }
 
-// StopWatchAllocs is used to unsubscribe a channel from changes in allocations
-func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) {
-	s.watch.allocLock.Lock()
-	defer s.watch.allocLock.Unlock()
-
-	// Check for an existing notify group
-	if grp, ok := s.watch.allocs[node]; ok {
-		grp.Clear(notify)
-		if grp.Empty() {
-			delete(s.watch.allocs, node)
-		}
-	}
+// StopWatchTable unsubscribes a channel from a full table watch.
+func (s *StateStore) StopWatchTable(table string, notify chan struct{}) {
+	s.watch.stopWatch(watchItem{table: table}, notify)
 }
 
-// WatchTables is used to subscribe a channel to a set of tables.
-func (s *StateStore) WatchTables(notify chan struct{}, tables ...string) {
-	for _, table := range tables {
-		s.watch.watchTable(table, notify)
-	}
+// WatchAllocNode is used to subscribe a channel to a node allocation watch.
+func (s *StateStore) WatchAllocNode(nodeID string, notify chan struct{}) {
+	s.watch.watch(watchItem{allocNode: nodeID}, notify)
 }
 
-// StopWatchTables is used to unsubscribe a channel from table watches.
-func (s *StateStore) StopWatchTables(notify chan struct{}, tables ...string) {
-	for _, table := range tables {
-		s.watch.stopWatchTable(table, notify)
-	}
+// StopWatchAllocNode unsubscribes a channel from a node allocation watch.
+func (s *StateStore) StopWatchAllocNode(nodeID string, notify chan struct{}) {
+	s.watch.stopWatch(watchItem{allocNode: nodeID}, notify)
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -158,8 +125,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -186,8 +152,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -223,8 +188,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -260,8 +224,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -321,8 +284,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"jobs": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
 	txn.Commit()
 	return nil
 }
@@ -349,8 +311,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"jobs": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
 	txn.Commit()
 	return nil
 }
@@ -407,8 +368,7 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 		}
 	}
 
-	tables := map[string]struct{}{"evals": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
 	txn.Commit()
 	return nil
 }
@@ -444,7 +404,6 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	nodes := make(map[string]struct{})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -467,7 +426,6 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
-		nodes[existing.(*structs.Allocation).NodeID] = struct{}{}
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
@@ -481,11 +439,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"evals": struct{}{}}
-	txn.Defer(func() {
-		s.watch.notifyAllocs(nodes)
-		s.watch.notifyTables(tables)
-	})
+	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
 	txn.Commit()
 	return nil
 }
@@ -580,12 +534,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"allocs": struct{}{}}
-	nodes := map[string]struct{}{alloc.NodeID: struct{}{}}
-	txn.Defer(func() {
-		s.watch.notifyAllocs(nodes)
-		s.watch.notifyTables(tables)
-	})
+	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
 	txn.Commit()
 	return nil
 }
@@ -595,7 +544,6 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	nodes := make(map[string]struct{})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
@@ -614,7 +562,6 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 			alloc.ClientStatus = exist.ClientStatus
 			alloc.ClientDescription = exist.ClientDescription
 		}
-		nodes[alloc.NodeID] = struct{}{}
 		if err := txn.Insert("allocs", alloc); err != nil {
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
@@ -625,11 +572,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"allocs": struct{}{}}
-	txn.Defer(func() {
-		s.watch.notifyAllocs(nodes)
-		s.watch.notifyTables(tables)
-	})
+	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
 	txn.Commit()
 	return nil
 }
@@ -760,10 +703,9 @@ type StateSnapshot struct {
 // restoring state by only using a single large transaction
 // instead of thousands of sub transactions
 type StateRestore struct {
-	txn        *memdb.Txn
-	watch      *stateWatch
-	allocNodes map[string]struct{}
-	tables     map[string]struct{}
+	txn   *memdb.Txn
+	watch *stateWatch
+	items []watchItem
 }
 
 // Abort is used to abort the restore operation
@@ -773,16 +715,13 @@ func (s *StateRestore) Abort() {
 
 // Commit is used to commit the restore operation
 func (s *StateRestore) Commit() {
-	s.txn.Defer(func() {
-		s.watch.notifyAllocs(s.allocNodes)
-		s.watch.notifyTables(s.tables)
-	})
+	s.txn.Defer(func() { s.watch.notify(s.items...) })
 	s.txn.Commit()
 }
 
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
-	r.tables["nodes"] = struct{}{}
+	r.items = append(r.items, watchItem{table: "nodes"})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -791,7 +730,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
-	r.tables["jobs"] = struct{}{}
+	r.items = append(r.items, watchItem{table: "jobs"})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -800,7 +739,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
-	r.tables["evals"] = struct{}{}
+	r.items = append(r.items, watchItem{table: "evals"})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -809,8 +748,8 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.tables["allocs"] = struct{}{}
-	r.allocNodes[alloc.NodeID] = struct{}{}
+	r.items = append(r.items, watchItem{table: "allocs"})
+	r.items = append(r.items, watchItem{allocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -825,65 +764,65 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	return nil
 }
 
+// watchItem describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing.
+type watchItem struct {
+	allocID   string
+	allocNode string
+	evalID    string
+	jobID     string
+	nodeID    string
+	table     string
+}
+
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {
-	// Allocation watches by node
-	allocs    map[string]*NotifyGroup
-	allocLock sync.Mutex
-
-	// Full table watches
-	tables    map[string]*NotifyGroup
-	tableLock sync.Mutex
+	items map[watchItem]*NotifyGroup
+	l     sync.Mutex
 }
 
-// watchTable is used to subscribe a channel to a full table watch.
-func (w *stateWatch) watchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watchItem]*NotifyGroup),
+	}
+}
 
-	tw, ok := w.tables[table]
+// watch subscribes a channel to the given watch item.
+func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	grp, ok := w.items[wi]
 	if !ok {
-		tw = new(NotifyGroup)
-		w.tables[table] = tw
+		grp = new(NotifyGroup)
+		w.items[wi] = grp
 	}
-	tw.Wait(ch)
+	grp.Wait(ch)
 }
 
-// stopWatchTable is used to unsubscribe a channel from a table watch.
-func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
+// stopWatch unsubscribes a channel from the given watch item.
+func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
 
-	if tw, ok := w.tables[table]; ok {
-		tw.Clear(ch)
-		if tw.Empty() {
-			delete(w.tables, table)
+	if grp, ok := w.items[wi]; ok {
+		grp.Clear(ch)
+		if grp.Empty() {
+			delete(w.items, wi)
 		}
 	}
 }
 
-// notifyTables is used to notify watchers of the given tables.
-func (w *stateWatch) notifyTables(tables map[string]struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items ...watchItem) {
+	w.l.Lock()
+	defer w.l.Unlock()
 
-	for table, _ := range tables {
-		if tw, ok := w.tables[table]; ok {
-			tw.Notify()
-		}
-	}
-}
-
-// notifyAllocs is used to notify any node alloc listeners of a change
-func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
-	w.allocLock.Lock()
-	defer w.allocLock.Unlock()
-
-	for node := range nodes {
-		if grp, ok := w.allocs[node]; ok {
+	for _, wi := range items {
+		if grp, ok := w.items[wi]; ok {
 			grp.Notify()
-			delete(w.allocs, node)
 		}
 	}
 }

From d9e593a0ae06d726566db9ce4e63a72bd15e0229 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 12:09:09 -0700
Subject: [PATCH 14/59] nomad: deduplicate watch items with a helper

---
 nomad/state/state_store.go | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 0a8adc0d0..60af402d3 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -69,6 +69,7 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 	r := &StateRestore{
 		txn:   txn,
 		watch: s.watch,
+		items: make(watchItems),
 	}
 	return r, nil
 }
@@ -705,7 +706,7 @@ type StateSnapshot struct {
 type StateRestore struct {
 	txn   *memdb.Txn
 	watch *stateWatch
-	items []watchItem
+	items watchItems
 }
 
 // Abort is used to abort the restore operation
@@ -715,13 +716,13 @@ func (s *StateRestore) Abort() {
 
 // Commit is used to commit the restore operation
 func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notify(s.items...) })
+	s.txn.Defer(func() { s.watch.notify(s.items.items()...) })
 	s.txn.Commit()
 }
 
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
-	r.items = append(r.items, watchItem{table: "nodes"})
+	r.items.add(watchItem{table: "nodes"})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -730,7 +731,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
-	r.items = append(r.items, watchItem{table: "jobs"})
+	r.items.add(watchItem{table: "jobs"})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -739,7 +740,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
-	r.items = append(r.items, watchItem{table: "evals"})
+	r.items.add(watchItem{table: "evals"})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -748,8 +749,8 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.items = append(r.items, watchItem{table: "allocs"})
-	r.items = append(r.items, watchItem{allocNode: alloc.NodeID})
+	r.items.add(watchItem{table: "allocs"})
+	r.items.add(watchItem{allocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -775,6 +776,24 @@ type watchItem struct {
 	table     string
 }
 
+// watchItems is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type watchItems map[watchItem]struct{}
+
+// add adds an item to the watch set.
+func (w watchItems) add(wi watchItem) {
+	w[wi] = struct{}{}
+}
+
+// items returns the items as a slice.
+func (w watchItems) items() []watchItem {
+	items := make([]watchItem, 0, len(w))
+	for wi, _ := range w {
+		items = append(items, wi)
+	}
+	return items
+}
+
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {

From 31abf97e06ae51edcdc7b09aa8cee8dadf087ea2 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 13:21:25 -0700
Subject: [PATCH 15/59] nomad: move state watcher into its own file, add tests

---
 nomad/state/state_store.go      | 92 +++------------------------------
 nomad/state/state_store_test.go | 10 ++--
 nomad/state/watch.go            | 86 ++++++++++++++++++++++++++++++
 nomad/state/watch_test.go       | 64 +++++++++++++++++++++++
 4 files changed, 163 insertions(+), 89 deletions(-)
 create mode 100644 nomad/state/watch.go
 create mode 100644 nomad/state/watch_test.go

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 60af402d3..685233447 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"io"
 	"log"
-	"sync"
 
 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -405,6 +404,8 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+	watch := make(watchItems)
+	watch.add(watchItem{table: "evals"})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -427,6 +428,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
+		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
@@ -440,7 +442,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
+	txn.Defer(func() { s.watch.notify(watch.items()...) })
 	txn.Commit()
 	return nil
 }
@@ -545,9 +547,12 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+	watch := make(watchItems)
+	watch.add(watchItem{table: "allocs"})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
+		watch.add(watchItem{allocNode: alloc.NodeID})
 		existing, err := txn.First("allocs", "id", alloc.ID)
 		if err != nil {
 			return fmt.Errorf("alloc lookup failed: %v", err)
@@ -573,7 +578,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
+	txn.Defer(func() { s.watch.notify(watch.items()...) })
 	txn.Commit()
 	return nil
 }
@@ -764,84 +769,3 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
-
-// watchItem describes the scope of a watch. It is used to provide a uniform
-// input for subscribe/unsubscribe and notification firing.
-type watchItem struct {
-	allocID   string
-	allocNode string
-	evalID    string
-	jobID     string
-	nodeID    string
-	table     string
-}
-
-// watchItems is a helper used to construct a set of watchItems. It deduplicates
-// the items as they are added using map keys.
-type watchItems map[watchItem]struct{}
-
-// add adds an item to the watch set.
-func (w watchItems) add(wi watchItem) {
-	w[wi] = struct{}{}
-}
-
-// items returns the items as a slice.
-func (w watchItems) items() []watchItem {
-	items := make([]watchItem, 0, len(w))
-	for wi, _ := range w {
-		items = append(items, wi)
-	}
-	return items
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	items map[watchItem]*NotifyGroup
-	l     sync.Mutex
-}
-
-// newStateWatch creates a new stateWatch for change notification.
-func newStateWatch() *stateWatch {
-	return &stateWatch{
-		items: make(map[watchItem]*NotifyGroup),
-	}
-}
-
-// watch subscribes a channel to the given watch item.
-func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	grp, ok := w.items[wi]
-	if !ok {
-		grp = new(NotifyGroup)
-		w.items[wi] = grp
-	}
-	grp.Wait(ch)
-}
-
-// stopWatch unsubscribes a channel from the given watch item.
-func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	if grp, ok := w.items[wi]; ok {
-		grp.Clear(ch)
-		if grp.Empty() {
-			delete(w.items, wi)
-		}
-	}
-}
-
-// notify is used to fire notifications on the given watch items.
-func (w *stateWatch) notify(items ...watchItem) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	for _, wi := range items {
-		if grp, ok := w.items[wi]; ok {
-			grp.Notify()
-		}
-	}
-}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 1c4b60238..58f8093bf 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -585,7 +585,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	notify1 := make(chan struct{}, 1)
-	state.WatchAllocs(alloc.NodeID, notify1)
+	state.WatchAllocNode(alloc.NodeID, notify1)
 
 	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
 	if err != nil {
@@ -808,14 +808,14 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	}
 }
 
-func TestStateStore_WatchAllocs(t *testing.T) {
+func TestStateStore_WatchAllocNode(t *testing.T) {
 	state := testStateStore(t)
 
 	notify1 := make(chan struct{}, 1)
 	notify2 := make(chan struct{}, 1)
-	state.WatchAllocs("foo", notify1)
-	state.WatchAllocs("foo", notify2)
-	state.StopWatchAllocs("foo", notify2)
+	state.WatchAllocNode("foo", notify1)
+	state.WatchAllocNode("foo", notify2)
+	state.StopWatchAllocNode("foo", notify2)
 
 	alloc := mock.Alloc()
 	alloc.NodeID = "foo"
diff --git a/nomad/state/watch.go b/nomad/state/watch.go
new file mode 100644
index 000000000..c0e529b28
--- /dev/null
+++ b/nomad/state/watch.go
@@ -0,0 +1,86 @@
+package state
+
+import (
+	"sync"
+)
+
+// watchItem describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing.
+type watchItem struct {
+	allocID   string
+	allocNode string
+	evalID    string
+	jobID     string
+	nodeID    string
+	table     string
+}
+
+// watchItems is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type watchItems map[watchItem]struct{}
+
+// add adds an item to the watch set.
+func (w watchItems) add(wi watchItem) {
+	w[wi] = struct{}{}
+}
+
+// items returns the items as a slice.
+func (w watchItems) items() []watchItem {
+	items := make([]watchItem, 0, len(w))
+	for wi, _ := range w {
+		items = append(items, wi)
+	}
+	return items
+}
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	items map[watchItem]*NotifyGroup
+	l     sync.Mutex
+}
+
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watchItem]*NotifyGroup),
+	}
+}
+
+// watch subscribes a channel to the given watch item.
+func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	grp, ok := w.items[wi]
+	if !ok {
+		grp = new(NotifyGroup)
+		w.items[wi] = grp
+	}
+	grp.Wait(ch)
+}
+
+// stopWatch unsubscribes a channel from the given watch item.
+func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	if grp, ok := w.items[wi]; ok {
+		grp.Clear(ch)
+		if grp.Empty() {
+			delete(w.items, wi)
+		}
+	}
+}
+
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items ...watchItem) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for _, wi := range items {
+		if grp, ok := w.items[wi]; ok {
+			grp.Notify()
+		}
+	}
+}
diff --git a/nomad/state/watch_test.go b/nomad/state/watch_test.go
new file mode 100644
index 000000000..5992b65ee
--- /dev/null
+++ b/nomad/state/watch_test.go
@@ -0,0 +1,64 @@
+package state
+
+import (
+	"testing"
+)
+
+func TestWatchItems(t *testing.T) {
+	// No items returns empty slice
+	wi := make(watchItems)
+	if items := wi.items(); len(items) != 0 {
+		t.Fatalf("expected empty, got: %#v", items)
+	}
+
+	// Adding items works
+	wi.add(watchItem{table: "foo"})
+	wi.add(watchItem{nodeID: "bar"})
+	if items := wi.items(); len(items) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", items)
+	}
+
+	// Adding duplicates auto-dedupes
+	wi.add(watchItem{table: "foo"})
+	if items := wi.items(); len(items) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", items)
+	}
+}
+
+func TestStateWatch_watch(t *testing.T) {
+	watch := newStateWatch()
+	notify1 := make(chan struct{}, 1)
+	notify2 := make(chan struct{}, 1)
+	notify3 := make(chan struct{}, 1)
+
+	// Notifications trigger subscribed channels
+	watch.watch(watchItem{table: "foo"}, notify1)
+	watch.watch(watchItem{table: "bar"}, notify2)
+	watch.watch(watchItem{table: "baz"}, notify3)
+
+	watch.notify(watchItem{table: "foo"}, watchItem{table: "bar"})
+	if len(notify1) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify2) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify3) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
+func TestStateWatch_stopWatch(t *testing.T) {
+	watch := newStateWatch()
+	notify := make(chan struct{})
+
+	// First subscribe
+	watch.watch(watchItem{table: "foo"}, notify)
+
+	// Unsubscribe stop notifications
+	watch.stopWatch(watchItem{table: "foo"}, notify)
+	watch.notify(watchItem{table: "foo"})
+	if len(notify) != 0 {
+		t.Fatalf("should not notify")
+	}
+}

From d9a77e0257fbac9decd70c4d4bd3fbb1652d6b3f Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 13:52:15 -0700
Subject: [PATCH 16/59] nomad: add triggering for more types of events

---
 nomad/state/state_store.go | 67 ++++++++++++++++++++++++++++++--------
 nomad/state/watch.go       | 29 ++++++++---------
 nomad/state/watch_test.go  | 25 +++++++-------
 3 files changed, 81 insertions(+), 40 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 685233447..0895b77e8 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -100,6 +100,10 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: node.ID})
+
 	// Check if the node already exists
 	existing, err := txn.First("nodes", "id", node.ID)
 	if err != nil {
@@ -125,7 +129,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -135,6 +139,10 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -152,7 +160,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -162,6 +170,10 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -188,7 +200,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -198,6 +210,10 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -224,7 +240,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -261,6 +277,10 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "jobs"})
+	watch.add(watchItem{job: job.ID})
+
 	// Check if the job already exists
 	existing, err := txn.First("jobs", "id", job.ID)
 	if err != nil {
@@ -284,7 +304,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -294,6 +314,10 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "jobs"})
+	watch.add(watchItem{job: jobID})
+
 	// Lookup the node
 	existing, err := txn.First("jobs", "id", jobID)
 	if err != nil {
@@ -311,7 +335,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -361,14 +385,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "evals"})
+
 	// Do a nested upsert
 	for _, eval := range evals {
+		watch.add(watchItem{eval: eval.ID})
 		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
 			return err
 		}
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -405,7 +433,6 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 	watch := make(watchItems)
-	watch.add(watchItem{table: "evals"})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -418,6 +445,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("evals", existing); err != nil {
 			return fmt.Errorf("eval delete failed: %v", err)
 		}
+		watch.add(watchItem{table: "evals"})
+		watch.add(watchItem{eval: eval})
 	}
 
 	for _, alloc := range allocs {
@@ -428,10 +457,12 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
-		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
+		watch.add(watchItem{table: "allocs"})
+		watch.add(watchItem{alloc: alloc})
+		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
 	}
 
 	// Update the indexes
@@ -442,7 +473,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch.items()...) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -504,6 +535,11 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "allocs"})
+	watch.add(watchItem{alloc: alloc.ID})
+	watch.add(watchItem{allocNode: alloc.NodeID})
+
 	// Look for existing alloc
 	existing, err := txn.First("allocs", "id", alloc.ID)
 	if err != nil {
@@ -537,7 +573,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -547,12 +583,12 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+
 	watch := make(watchItems)
 	watch.add(watchItem{table: "allocs"})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
-		watch.add(watchItem{allocNode: alloc.NodeID})
 		existing, err := txn.First("allocs", "id", alloc.ID)
 		if err != nil {
 			return fmt.Errorf("alloc lookup failed: %v", err)
@@ -571,6 +607,9 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		if err := txn.Insert("allocs", alloc); err != nil {
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
+
+		watch.add(watchItem{alloc: alloc.ID})
+		watch.add(watchItem{allocNode: alloc.NodeID})
 	}
 
 	// Update the indexes
@@ -578,7 +617,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch.items()...) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -721,7 +760,7 @@ func (s *StateRestore) Abort() {
 
 // Commit is used to commit the restore operation
 func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notify(s.items.items()...) })
+	s.txn.Defer(func() { s.watch.notify(s.items) })
 	s.txn.Commit()
 }
 
diff --git a/nomad/state/watch.go b/nomad/state/watch.go
index c0e529b28..124eb0612 100644
--- a/nomad/state/watch.go
+++ b/nomad/state/watch.go
@@ -7,11 +7,11 @@ import (
 // watchItem describes the scope of a watch. It is used to provide a uniform
 // input for subscribe/unsubscribe and notification firing.
 type watchItem struct {
-	allocID   string
+	alloc     string
 	allocNode string
-	evalID    string
-	jobID     string
-	nodeID    string
+	eval      string
+	job       string
+	node      string
 	table     string
 }
 
@@ -19,20 +19,19 @@ type watchItem struct {
 // the items as they are added using map keys.
 type watchItems map[watchItem]struct{}
 
+func newWatchItems(items ...watchItem) watchItems {
+	wi := make(watchItems)
+	for _, item := range items {
+		wi.add(item)
+	}
+	return wi
+}
+
 // add adds an item to the watch set.
 func (w watchItems) add(wi watchItem) {
 	w[wi] = struct{}{}
 }
 
-// items returns the items as a slice.
-func (w watchItems) items() []watchItem {
-	items := make([]watchItem, 0, len(w))
-	for wi, _ := range w {
-		items = append(items, wi)
-	}
-	return items
-}
-
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {
@@ -74,11 +73,11 @@ func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
 }
 
 // notify is used to fire notifications on the given watch items.
-func (w *stateWatch) notify(items ...watchItem) {
+func (w *stateWatch) notify(items watchItems) {
 	w.l.Lock()
 	defer w.l.Unlock()
 
-	for _, wi := range items {
+	for wi, _ := range items {
 		if grp, ok := w.items[wi]; ok {
 			grp.Notify()
 		}
diff --git a/nomad/state/watch_test.go b/nomad/state/watch_test.go
index 5992b65ee..aad53d798 100644
--- a/nomad/state/watch_test.go
+++ b/nomad/state/watch_test.go
@@ -5,23 +5,19 @@ import (
 )
 
 func TestWatchItems(t *testing.T) {
-	// No items returns empty slice
 	wi := make(watchItems)
-	if items := wi.items(); len(items) != 0 {
-		t.Fatalf("expected empty, got: %#v", items)
-	}
 
 	// Adding items works
 	wi.add(watchItem{table: "foo"})
-	wi.add(watchItem{nodeID: "bar"})
-	if items := wi.items(); len(items) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", items)
+	wi.add(watchItem{node: "bar"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
 	}
 
 	// Adding duplicates auto-dedupes
 	wi.add(watchItem{table: "foo"})
-	if items := wi.items(); len(items) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", items)
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
 	}
 }
 
@@ -36,7 +32,11 @@ func TestStateWatch_watch(t *testing.T) {
 	watch.watch(watchItem{table: "bar"}, notify2)
 	watch.watch(watchItem{table: "baz"}, notify3)
 
-	watch.notify(watchItem{table: "foo"}, watchItem{table: "bar"})
+	items := make(watchItems)
+	items.add(watchItem{table: "foo"})
+	items.add(watchItem{table: "bar"})
+
+	watch.notify(items)
 	if len(notify1) != 1 {
 		t.Fatalf("should notify")
 	}
@@ -57,7 +57,10 @@ func TestStateWatch_stopWatch(t *testing.T) {
 
 	// Unsubscribe stop notifications
 	watch.stopWatch(watchItem{table: "foo"}, notify)
-	watch.notify(watchItem{table: "foo"})
+
+	items := make(watchItems)
+	items.add(watchItem{table: "foo"})
+	watch.notify(items)
 	if len(notify) != 0 {
 		t.Fatalf("should not notify")
 	}

From 573e9dfb9abfb15c51602797a0c0ab05fca4ddf2 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 14:47:39 -0700
Subject: [PATCH 17/59] nomad: export watcher to share between rpc and state
 store

---
 nomad/alloc_endpoint.go         |   7 +-
 nomad/eval_endpoint.go          |   7 +-
 nomad/job_endpoint.go           |   7 +-
 nomad/node_endpoint.go          |  13 +--
 nomad/rpc.go                    |  16 ++-
 nomad/state/state_store.go      | 182 ++++++++++++++++++++------------
 nomad/state/state_store_test.go |  56 +++++++++-
 nomad/state/watch.go            |  85 ---------------
 nomad/state/watch_test.go       |  67 ------------
 nomad/watch/watch.go            |  33 ++++++
 nomad/watch/watch_test.go       |  31 ++++++
 11 files changed, 255 insertions(+), 249 deletions(-)
 delete mode 100644 nomad/state/watch.go
 delete mode 100644 nomad/state/watch_test.go
 create mode 100644 nomad/watch/watch.go
 create mode 100644 nomad/watch/watch_test.go

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index a2ce6a09a..f3b9dbdc4 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -5,6 +5,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // Alloc endpoint is used for manipulating allocations
@@ -21,9 +22,9 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "allocs",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "allocs"}),
 		run: func() error {
 			// Capture all the allocations
 			snap, err := a.srv.fsm.State().Snapshot()
diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 5d87948aa..07dfc18fe 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 const (
@@ -221,9 +222,9 @@ func (e *Eval) List(args *structs.EvalListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "evals",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "evals"}),
 		run: func() error {
 			// Scan all the evaluations
 			snap, err := e.srv.fsm.State().Snapshot()
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 30bc35563..ff296a986 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // Job endpoint is used for job interactions
@@ -218,9 +219,9 @@ func (j *Job) List(args *structs.JobListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "jobs",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "jobs"}),
 		run: func() error {
 			// Capture all the jobs
 			snap, err := j.srv.fsm.State().Snapshot()
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 7e7332974..65a83e1b1 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // Node endpoint is used for client interactions
@@ -330,9 +331,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:      &args.QueryOptions,
-		queryMeta:      &reply.QueryMeta,
-		watchAllocNode: args.NodeID,
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocNode: args.NodeID}),
 		run: func() error {
 			// Look for the node
 			snap, err := n.srv.fsm.State().Snapshot()
@@ -406,9 +407,9 @@ func (n *Node) List(args *structs.NodeListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "nodes",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "nodes"}),
 		run: func() error {
 			// Capture all the nodes
 			snap, err := n.srv.fsm.State().Snapshot()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index f1977dbc7..21f9c9dc6 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -13,6 +13,7 @@ import (
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 	"github.com/hashicorp/raft"
 	"github.com/hashicorp/yamux"
 )
@@ -268,11 +269,10 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
 
 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts      *structs.QueryOptions
-	queryMeta      *structs.QueryMeta
-	watchAllocNode string
-	watchTable     string
-	run            func() error
+	queryOpts *structs.QueryOptions
+	queryMeta *structs.QueryMeta
+	watch     watch.Items
+	run       func() error
 }
 
 // blockingRPC is used for queries that need to wait for a
@@ -307,15 +307,13 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 	state = s.fsm.State()
 	defer func() {
 		timeout.Stop()
-		state.StopWatchAllocNode(opts.watchAllocNode, notifyCh)
-		state.StopWatchTable(opts.watchTable, notifyCh)
+		state.StopWatch(opts.watch, notifyCh)
 	}()
 
 REGISTER_NOTIFY:
 	// Register the notification channel. This may be done
 	// multiple times if we have not reached the target wait index.
-	state.WatchAllocNode(opts.watchAllocNode, notifyCh)
-	state.WatchTable(opts.watchTable, notifyCh)
+	state.Watch(opts.watch, notifyCh)
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 0895b77e8..17484f985 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4,9 +4,11 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"sync"
 
 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // IndexEntry is used with the "index" table
@@ -68,29 +70,21 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 	r := &StateRestore{
 		txn:   txn,
 		watch: s.watch,
-		items: make(watchItems),
+		items: watch.NewItems(),
 	}
 	return r, nil
 }
 
-// WatchTable is used to subscribe a channel to a full table watch.
-func (s *StateStore) WatchTable(table string, notify chan struct{}) {
-	s.watch.watch(watchItem{table: table}, notify)
+func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
+	for wi, _ := range items {
+		s.watch.watch(wi, notify)
+	}
 }
 
-// StopWatchTable unsubscribes a channel from a full table watch.
-func (s *StateStore) StopWatchTable(table string, notify chan struct{}) {
-	s.watch.stopWatch(watchItem{table: table}, notify)
-}
-
-// WatchAllocNode is used to subscribe a channel to a node allocation watch.
-func (s *StateStore) WatchAllocNode(nodeID string, notify chan struct{}) {
-	s.watch.watch(watchItem{allocNode: nodeID}, notify)
-}
-
-// StopWatchAllocNode unsubscribes a channel from a node allocation watch.
-func (s *StateStore) StopWatchAllocNode(nodeID string, notify chan struct{}) {
-	s.watch.stopWatch(watchItem{allocNode: nodeID}, notify)
+func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
+	for wi, _ := range items {
+		s.watch.stopWatch(wi, notify)
+	}
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -100,9 +94,9 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: node.ID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: node.ID})
 
 	// Check if the node already exists
 	existing, err := txn.First("nodes", "id", node.ID)
@@ -129,7 +123,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -139,9 +133,9 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: nodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
 
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
@@ -160,7 +154,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -170,9 +164,9 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: nodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
 
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
@@ -200,7 +194,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -210,9 +204,9 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: nodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
 
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
@@ -240,7 +234,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -277,9 +271,9 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "jobs"})
-	watch.add(watchItem{job: job.ID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "jobs"})
+	watcher.Add(watch.Item{Job: job.ID})
 
 	// Check if the job already exists
 	existing, err := txn.First("jobs", "id", job.ID)
@@ -304,7 +298,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -314,9 +308,9 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "jobs"})
-	watch.add(watchItem{job: jobID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "jobs"})
+	watcher.Add(watch.Item{Job: jobID})
 
 	// Lookup the node
 	existing, err := txn.First("jobs", "id", jobID)
@@ -335,7 +329,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -385,18 +379,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "evals"})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "evals"})
 
 	// Do a nested upsert
 	for _, eval := range evals {
-		watch.add(watchItem{eval: eval.ID})
+		watcher.Add(watch.Item{Eval: eval.ID})
 		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
 			return err
 		}
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -432,7 +426,9 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	watch := make(watchItems)
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "evals"})
+	watcher.Add(watch.Item{Table: "allocs"})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -445,8 +441,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("evals", existing); err != nil {
 			return fmt.Errorf("eval delete failed: %v", err)
 		}
-		watch.add(watchItem{table: "evals"})
-		watch.add(watchItem{eval: eval})
+		watcher.Add(watch.Item{Eval: eval})
 	}
 
 	for _, alloc := range allocs {
@@ -460,9 +455,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
-		watch.add(watchItem{table: "allocs"})
-		watch.add(watchItem{alloc: alloc})
-		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
+		watcher.Add(watch.Item{Alloc: alloc})
+		watcher.Add(watch.Item{AllocNode: existing.(*structs.Allocation).NodeID})
 	}
 
 	// Update the indexes
@@ -473,7 +467,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -535,10 +529,10 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "allocs"})
-	watch.add(watchItem{alloc: alloc.ID})
-	watch.add(watchItem{allocNode: alloc.NodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "allocs"})
+	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 
 	// Look for existing alloc
 	existing, err := txn.First("allocs", "id", alloc.ID)
@@ -573,7 +567,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -584,8 +578,8 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "allocs"})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "allocs"})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
@@ -608,8 +602,8 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
 
-		watch.add(watchItem{alloc: alloc.ID})
-		watch.add(watchItem{allocNode: alloc.NodeID})
+		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}
 
 	// Update the indexes
@@ -617,7 +611,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -750,7 +744,7 @@ type StateSnapshot struct {
 type StateRestore struct {
 	txn   *memdb.Txn
 	watch *stateWatch
-	items watchItems
+	items watch.Items
 }
 
 // Abort is used to abort the restore operation
@@ -766,7 +760,7 @@ func (s *StateRestore) Commit() {
 
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
-	r.items.add(watchItem{table: "nodes"})
+	r.items.Add(watch.Item{Table: "nodes"})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -775,7 +769,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
-	r.items.add(watchItem{table: "jobs"})
+	r.items.Add(watch.Item{Table: "jobs"})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -784,7 +778,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
-	r.items.add(watchItem{table: "evals"})
+	r.items.Add(watch.Item{Table: "evals"})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -793,8 +787,8 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.items.add(watchItem{table: "allocs"})
-	r.items.add(watchItem{allocNode: alloc.NodeID})
+	r.items.Add(watch.Item{Table: "allocs"})
+	r.items.Add(watch.Item{AllocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -808,3 +802,55 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	items map[watch.Item]*NotifyGroup
+	l     sync.Mutex
+}
+
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watch.Item]*NotifyGroup),
+	}
+}
+
+// watch subscribes a channel to the given watch item.
+func (w *stateWatch) watch(wi watch.Item, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	grp, ok := w.items[wi]
+	if !ok {
+		grp = new(NotifyGroup)
+		w.items[wi] = grp
+	}
+	grp.Wait(ch)
+}
+
+// stopWatch unsubscribes a channel from the given watch item.
+func (w *stateWatch) stopWatch(wi watch.Item, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	if grp, ok := w.items[wi]; ok {
+		grp.Clear(ch)
+		if grp.Empty() {
+			delete(w.items, wi)
+		}
+	}
+}
+
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items watch.Items) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for wi, _ := range items {
+		if grp, ok := w.items[wi]; ok {
+			grp.Notify()
+		}
+	}
+}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 58f8093bf..2d1134d9c 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 func testStateStore(t *testing.T) *StateStore {
@@ -585,7 +586,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	notify1 := make(chan struct{}, 1)
-	state.WatchAllocNode(alloc.NodeID, notify1)
+	state.Watch(watch.NewItems(watch.Item{AllocNode: alloc.NodeID}), notify1)
 
 	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
 	if err != nil {
@@ -808,14 +809,16 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	}
 }
 
-func TestStateStore_WatchAllocNode(t *testing.T) {
+func TestStateStore_Watch(t *testing.T) {
 	state := testStateStore(t)
 
 	notify1 := make(chan struct{}, 1)
 	notify2 := make(chan struct{}, 1)
-	state.WatchAllocNode("foo", notify1)
-	state.WatchAllocNode("foo", notify2)
-	state.StopWatchAllocNode("foo", notify2)
+
+	items := watch.NewItems(watch.Item{AllocNode: "foo"})
+	state.Watch(items, notify1)
+	state.Watch(items, notify2)
+	state.StopWatch(items, notify2)
 
 	alloc := mock.Alloc()
 	alloc.NodeID = "foo"
@@ -1032,6 +1035,49 @@ func TestStateStore_RestoreAlloc(t *testing.T) {
 	}
 }
 
+func TestStateWatch_watch(t *testing.T) {
+	sw := newStateWatch()
+	notify1 := make(chan struct{}, 1)
+	notify2 := make(chan struct{}, 1)
+	notify3 := make(chan struct{}, 1)
+
+	// Notifications trigger subscribed channels
+	sw.watch(watch.Item{Table: "foo"}, notify1)
+	sw.watch(watch.Item{Table: "bar"}, notify2)
+	sw.watch(watch.Item{Table: "baz"}, notify3)
+
+	items := watch.NewItems()
+	items.Add(watch.Item{Table: "foo"})
+	items.Add(watch.Item{Table: "bar"})
+
+	sw.notify(items)
+	if len(notify1) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify2) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify3) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
+func TestStateWatch_stopWatch(t *testing.T) {
+	sw := newStateWatch()
+	notify := make(chan struct{})
+
+	// First subscribe
+	sw.watch(watch.Item{Table: "foo"}, notify)
+
+	// Unsubscribe stop notifications
+	sw.stopWatch(watch.Item{Table: "foo"}, notify)
+
+	sw.notify(watch.NewItems(watch.Item{Table: "foo"}))
+	if len(notify) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
 // NodeIDSort is used to sort nodes by ID
 type NodeIDSort []*structs.Node
 
diff --git a/nomad/state/watch.go b/nomad/state/watch.go
deleted file mode 100644
index 124eb0612..000000000
--- a/nomad/state/watch.go
+++ /dev/null
@@ -1,85 +0,0 @@
-package state
-
-import (
-	"sync"
-)
-
-// watchItem describes the scope of a watch. It is used to provide a uniform
-// input for subscribe/unsubscribe and notification firing.
-type watchItem struct {
-	alloc     string
-	allocNode string
-	eval      string
-	job       string
-	node      string
-	table     string
-}
-
-// watchItems is a helper used to construct a set of watchItems. It deduplicates
-// the items as they are added using map keys.
-type watchItems map[watchItem]struct{}
-
-func newWatchItems(items ...watchItem) watchItems {
-	wi := make(watchItems)
-	for _, item := range items {
-		wi.add(item)
-	}
-	return wi
-}
-
-// add adds an item to the watch set.
-func (w watchItems) add(wi watchItem) {
-	w[wi] = struct{}{}
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	items map[watchItem]*NotifyGroup
-	l     sync.Mutex
-}
-
-// newStateWatch creates a new stateWatch for change notification.
-func newStateWatch() *stateWatch {
-	return &stateWatch{
-		items: make(map[watchItem]*NotifyGroup),
-	}
-}
-
-// watch subscribes a channel to the given watch item.
-func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	grp, ok := w.items[wi]
-	if !ok {
-		grp = new(NotifyGroup)
-		w.items[wi] = grp
-	}
-	grp.Wait(ch)
-}
-
-// stopWatch unsubscribes a channel from the given watch item.
-func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	if grp, ok := w.items[wi]; ok {
-		grp.Clear(ch)
-		if grp.Empty() {
-			delete(w.items, wi)
-		}
-	}
-}
-
-// notify is used to fire notifications on the given watch items.
-func (w *stateWatch) notify(items watchItems) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	for wi, _ := range items {
-		if grp, ok := w.items[wi]; ok {
-			grp.Notify()
-		}
-	}
-}
diff --git a/nomad/state/watch_test.go b/nomad/state/watch_test.go
deleted file mode 100644
index aad53d798..000000000
--- a/nomad/state/watch_test.go
+++ /dev/null
@@ -1,67 +0,0 @@
-package state
-
-import (
-	"testing"
-)
-
-func TestWatchItems(t *testing.T) {
-	wi := make(watchItems)
-
-	// Adding items works
-	wi.add(watchItem{table: "foo"})
-	wi.add(watchItem{node: "bar"})
-	if len(wi) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", wi)
-	}
-
-	// Adding duplicates auto-dedupes
-	wi.add(watchItem{table: "foo"})
-	if len(wi) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", wi)
-	}
-}
-
-func TestStateWatch_watch(t *testing.T) {
-	watch := newStateWatch()
-	notify1 := make(chan struct{}, 1)
-	notify2 := make(chan struct{}, 1)
-	notify3 := make(chan struct{}, 1)
-
-	// Notifications trigger subscribed channels
-	watch.watch(watchItem{table: "foo"}, notify1)
-	watch.watch(watchItem{table: "bar"}, notify2)
-	watch.watch(watchItem{table: "baz"}, notify3)
-
-	items := make(watchItems)
-	items.add(watchItem{table: "foo"})
-	items.add(watchItem{table: "bar"})
-
-	watch.notify(items)
-	if len(notify1) != 1 {
-		t.Fatalf("should notify")
-	}
-	if len(notify2) != 1 {
-		t.Fatalf("should notify")
-	}
-	if len(notify3) != 0 {
-		t.Fatalf("should not notify")
-	}
-}
-
-func TestStateWatch_stopWatch(t *testing.T) {
-	watch := newStateWatch()
-	notify := make(chan struct{})
-
-	// First subscribe
-	watch.watch(watchItem{table: "foo"}, notify)
-
-	// Unsubscribe stop notifications
-	watch.stopWatch(watchItem{table: "foo"}, notify)
-
-	items := make(watchItems)
-	items.add(watchItem{table: "foo"})
-	watch.notify(items)
-	if len(notify) != 0 {
-		t.Fatalf("should not notify")
-	}
-}
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
new file mode 100644
index 000000000..e5cdce16f
--- /dev/null
+++ b/nomad/watch/watch.go
@@ -0,0 +1,33 @@
+package watch
+
+// The watch package provides a means of describing a watch for a blocking
+// query. It is exported so it may be shared between Nomad's RPC layer and
+// the underlying state store.
+
+// Item describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing.
+type Item struct {
+	Alloc     string
+	AllocNode string
+	Eval      string
+	Job       string
+	Node      string
+	Table     string
+}
+
+// Items is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type Items map[Item]struct{}
+
+func NewItems(items ...Item) Items {
+	wi := make(Items)
+	for _, item := range items {
+		wi.Add(item)
+	}
+	return wi
+}
+
+// Add adds an item to the watch set.
+func (wi Items) Add(i Item) {
+	wi[i] = struct{}{}
+}
diff --git a/nomad/watch/watch_test.go b/nomad/watch/watch_test.go
new file mode 100644
index 000000000..9a8901aa8
--- /dev/null
+++ b/nomad/watch/watch_test.go
@@ -0,0 +1,31 @@
+package watch
+
+import (
+	"testing"
+)
+
+func TestWatchItems(t *testing.T) {
+	// Creates an empty set of items
+	wi := NewItems()
+	if len(wi) != 0 {
+		t.Fatalf("expect 0 items, got: %#v", wi)
+	}
+
+	// Creates a new set of supplied items
+	wi = NewItems(Item{Table: "foo"})
+	if len(wi) != 1 {
+		t.Fatalf("expected 1 item, got: %#v", wi)
+	}
+
+	// Adding items works
+	wi.Add(Item{Node: "bar"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
+	}
+
+	// Adding duplicates auto-dedupes
+	wi.Add(Item{Table: "foo"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
+	}
+}

From cd5bdd7c08d86ac39b880dc3bbffc298f29b7552 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 15:01:29 -0700
Subject: [PATCH 18/59] nomad: support blocking queries on single jobs

---
 nomad/job_endpoint.go      | 57 ++++++++++++++++++++++----------------
 nomad/job_endpoint_test.go | 49 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 24 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index ff296a986..ad15d626d 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -181,32 +181,41 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now())
 
-	// Look for the job
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.JobByID(args.JobID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Job: args.JobID}),
+		run: func() error {
 
-	// Setup the output
-	if out != nil {
-		reply.Job = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("jobs")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Look for the job
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.JobByID(args.JobID)
+			if err != nil {
+				return err
+			}
 
-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Setup the output
+			if out != nil {
+				reply.Job = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("jobs")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
+
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return j.srv.blockingRPC(&opts)
 }
 
 // List is used to list the jobs registered in the system
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 0591e73bf..5b4ba079b 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -364,6 +364,55 @@ func TestJobEndpoint_GetJob(t *testing.T) {
 	}
 }
 
+func TestJobEndpoint_GetJob_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the jobs
+	job1 := mock.Job()
+	job2 := mock.Job()
+
+	// Upsert a job we are not interested in first.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertJob(2, job1); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert another job later which should trigger the watch.
+	time.AfterFunc(200*time.Millisecond, func() {
+		if err := state.UpsertJob(2, job2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.JobSpecificRequest{
+		JobID: job2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.SingleJobResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if resp.Job == nil || resp.Job.ID != job2.ID {
+		t.Fatalf("bad: %#v", resp.Job)
+	}
+}
+
 func TestJobEndpoint_ListJobs(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()

From 82529305f3023790465e107b1c630bff2c886529 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 29 Oct 2015 15:26:35 -0700
Subject: [PATCH 19/59] Cleanup exec_linux, force cgroups, check for systemd
 and update the Open() api

---
 client/executor/exec_linux.go      | 504 +++++++++++++----------------
 client/executor/exec_linux_test.go |  37 +--
 command/spawn_daemon.go            |   2 +-
 3 files changed, 248 insertions(+), 295 deletions(-)

diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index ceb178063..1a52265b1 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -22,14 +22,12 @@ import (
 	"github.com/hashicorp/nomad/helper/discover"
 	"github.com/hashicorp/nomad/nomad/structs"
 
+	"github.com/opencontainers/runc/libcontainer/cgroups"
 	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
+	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
 	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
 )
 
-const (
-	cgroupMount = "/sys/fs/cgroup"
-)
-
 var (
 	// A mapping of directories on the host OS to attempt to embed inside each
 	// task's chroot.
@@ -45,17 +43,7 @@ var (
 )
 
 func NewExecutor() Executor {
-	e := LinuxExecutor{}
-
-	// TODO: In a follow-up PR make it so this only happens once per client.
-	// Fingerprinting shouldn't happen per task.
-
-	// Check that cgroups are available.
-	if _, err := os.Stat(cgroupMount); err == nil {
-		e.cgroupEnabled = true
-	}
-
-	return &e
+	return &LinuxExecutor{}
 }
 
 // Linux executor is designed to run on linux kernel 2.8+.
@@ -63,22 +51,24 @@ type LinuxExecutor struct {
 	cmd
 	user *user.User
 
-	// Finger print capabilities.
-	cgroupEnabled bool
-
 	// Isolation configurations.
 	groups   *cgroupConfig.Cgroup
 	alloc    *allocdir.AllocDir
 	taskName string
 	taskDir  string
 
-	// Tracking of child process.
-	spawnChild        exec.Cmd
+	// Tracking of spawn process.
+	spawnChild        *os.Process
 	spawnOutputWriter *os.File
 	spawnOutputReader *os.File
 
-	// Track whether there are filesystems mounted in the task dir.
-	mounts bool
+	// Tracking of user process.
+	exitStatusFile string
+	userPid        int
+}
+
+func (e *LinuxExecutor) Command() *cmd {
+	return &e.cmd
 }
 
 func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
@@ -86,139 +76,62 @@ func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
 		return errNoResources
 	}
 
-	if e.cgroupEnabled {
-		return e.configureCgroups(resources)
+	return e.configureCgroups(resources)
+}
+
+// execLinuxID contains the necessary information to reattach to an executed
+// process and cleanup the created cgroups.
+type ExecLinuxID struct {
+	Groups         *cgroupConfig.Cgroup
+	SpawnPid       int
+	UserPid        int
+	ExitStatusFile string
+	TaskDir        string
+}
+
+func (e *LinuxExecutor) Open(id string) error {
+	// De-serialize the ID.
+	dec := json.NewDecoder(strings.NewReader(id))
+	var execID ExecLinuxID
+	if err := dec.Decode(&execID); err != nil {
+		return fmt.Errorf("Failed to parse id: %v", err)
+	}
+
+	// Setup the executor.
+	e.groups = execID.Groups
+	e.exitStatusFile = execID.ExitStatusFile
+	e.userPid = execID.UserPid
+	e.taskDir = execID.TaskDir
+
+	proc, err := os.FindProcess(execID.SpawnPid)
+	if proc != nil && err == nil {
+		e.spawnChild = proc
 	}
 
 	return nil
 }
 
-func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
-	e.taskName = taskName
-	taskDir, ok := alloc.TaskDirs[taskName]
-	if !ok {
-		fmt.Errorf("Couldn't find task directory for task %v", taskName)
-	}
-	e.taskDir = taskDir
-
-	if err := alloc.MountSharedDir(taskName); err != nil {
-		return err
+func (e *LinuxExecutor) ID() (string, error) {
+	if e.spawnChild == nil {
+		return "", fmt.Errorf("Process has finished or was never started")
 	}
 
-	if err := alloc.Embed(taskName, chrootEnv); err != nil {
-		return err
+	// Build the ID.
+	id := ExecLinuxID{
+		Groups:         e.groups,
+		SpawnPid:       e.spawnChild.Pid,
+		UserPid:        e.userPid,
+		ExitStatusFile: e.exitStatusFile,
+		TaskDir:        e.taskDir,
 	}
 
-	// Mount dev
-	dev := filepath.Join(taskDir, "dev")
-	if err := os.Mkdir(dev, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(id); err != nil {
+		return "", fmt.Errorf("Failed to serialize id: %v", err)
 	}
 
-	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
-		return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
-	}
-
-	// Mount proc
-	proc := filepath.Join(taskDir, "proc")
-	if err := os.Mkdir(proc, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
-	}
-
-	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
-		return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
-	}
-
-	// Set the tasks AllocDir environment variable.
-	env, err := environment.ParseFromList(e.Cmd.Env)
-	if err != nil {
-		return err
-	}
-	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
-	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
-	e.Cmd.Env = env.List()
-
-	e.alloc = alloc
-	e.mounts = true
-	return nil
-}
-
-func (e *LinuxExecutor) cleanTaskDir() error {
-	if e.alloc == nil {
-		return errors.New("ConfigureTaskDir() must be called before Start()")
-	}
-
-	if !e.mounts {
-		return nil
-	}
-
-	// Unmount dev.
-	errs := new(multierror.Error)
-	dev := filepath.Join(e.taskDir, "dev")
-	if err := syscall.Unmount(dev, 0); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
-	}
-
-	// Unmount proc.
-	proc := filepath.Join(e.taskDir, "proc")
-	if err := syscall.Unmount(proc, 0); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
-	}
-
-	e.mounts = false
-	return errs.ErrorOrNil()
-}
-
-func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
-	if !e.cgroupEnabled {
-		return nil
-	}
-
-	e.groups = &cgroupConfig.Cgroup{}
-
-	// Groups will be created in a heiarchy according to the resource being
-	// constrained, current session, and then this unique name. Restraints are
-	// then placed in the corresponding files.
-	// Ex: restricting a process to 2048Mhz CPU and 2MB of memory:
-	//   $ cat /sys/fs/cgroup/cpu/user/1000.user/4.session/<uuid>/cpu.shares
-	//		2028
-	//   $ cat /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/memory.limit_in_bytes
-	//		2097152
-	e.groups.Name = structs.GenerateUUID()
-
-	// TODO: verify this is needed for things like network access
-	e.groups.AllowAllDevices = true
-
-	if resources.MemoryMB > 0 {
-		// Total amount of memory allowed to consume
-		e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
-		// Disable swap to avoid issues on the machine
-		e.groups.MemorySwap = int64(-1)
-	}
-
-	if resources.CPU != 0 {
-		if resources.CPU < 2 {
-			return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
-		}
-
-		// Set the relative CPU shares for this cgroup.
-		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
-		// given process will have at least that amount of resources, but likely
-		// more since it is (probably) rare that the machine will run at 100%
-		// CPU. This scale will cease to work if a node is overprovisioned.
-		e.groups.CpuShares = int64(resources.CPU)
-	}
-
-	if resources.IOPS != 0 {
-		// Validate it is in an acceptable range.
-		if resources.IOPS < 10 || resources.IOPS > 1000 {
-			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
-		}
-
-		e.groups.BlkioWeight = uint16(resources.IOPS)
-	}
-
-	return nil
+	return buffer.String(), nil
 }
 
 func (e *LinuxExecutor) runAs(userid string) error {
@@ -292,33 +205,30 @@ func (e *LinuxExecutor) spawnDaemon() error {
 		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
 	}
 
+	c := command.DaemonConfig{
+		Cmd:            e.cmd.Cmd,
+		Chroot:         e.taskDir,
+		StdoutFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
+		StderrFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
+		StdinFile:      "/dev/null",
+		ExitStatusFile: e.exitStatusFile,
+	}
+
 	// Serialize the cmd and the cgroup configuration so it can be passed to the
 	// sub-process.
 	var buffer bytes.Buffer
 	enc := json.NewEncoder(&buffer)
-
-	c := command.DaemonConfig{
-		Cmd:        e.cmd.Cmd,
-		Chroot:     e.taskDir,
-		StdoutFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
-		StderrFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		StdinFile:  "/dev/null",
-	}
 	if err := enc.Encode(c); err != nil {
 		return fmt.Errorf("Failed to serialize daemon configuration: %v", err)
 	}
 
-	// Create a pipe to capture Stdout.
-	pr, pw, err := os.Pipe()
-	if err != nil {
+	// Create a pipe to capture stdout.
+	if e.spawnOutputReader, e.spawnOutputWriter, err = os.Pipe(); err != nil {
 		return err
 	}
-	e.spawnOutputWriter = pw
-	e.spawnOutputReader = pr
 
 	// Call ourselves using a hidden flag. The new instance of nomad will join
-	// the passed cgroup, forkExec the cmd, and output status codes through
-	// Stdout.
+	// the passed cgroup, forkExec the cmd, and return statuses through stdout.
 	escaped := strconv.Quote(buffer.String())
 	spawn := exec.Command(bin, "spawn-daemon", escaped)
 	spawn.Stdout = e.spawnOutputWriter
@@ -334,26 +244,19 @@ func (e *LinuxExecutor) spawnDaemon() error {
 	}
 
 	// Join the spawn-daemon to the cgroup.
-	if e.groups != nil {
-		manager := cgroupFs.Manager{}
-		manager.Cgroups = e.groups
+	manager := e.getCgroupManager(e.groups)
 
-		// Apply will place the current pid into the tasks file for each of the
-		// created cgroups:
-		//  /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/tasks
-		//
-		// Apply requires superuser permissions, and may fail if Nomad is not run with
-		// the required permissions
-		if err := manager.Apply(spawn.Process.Pid); err != nil {
-			errs := new(multierror.Error)
-			errs = multierror.Append(errs, fmt.Errorf("Failed to join spawn-daemon to the cgroup (config => %+v): %v", manager.Cgroups, err))
+	// Apply will place the spawn dameon into the created cgroups.
+	if err := manager.Apply(spawn.Process.Pid); err != nil {
+		errs := new(multierror.Error)
+		errs = multierror.Append(errs,
+			fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err))
 
-			if err := sendAbortCommand(spawnStdIn); err != nil {
-				errs = multierror.Append(errs, err)
-			}
-
-			return errs
+		if err := sendAbortCommand(spawnStdIn); err != nil {
+			errs = multierror.Append(errs, err)
 		}
+
+		return errs
 	}
 
 	// Tell it to start.
@@ -372,7 +275,8 @@ func (e *LinuxExecutor) spawnDaemon() error {
 		return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
 	}
 
-	e.spawnChild = *spawn
+	e.userPid = resp.UserPID
+	e.spawnChild = spawn.Process
 	return nil
 }
 
@@ -394,74 +298,22 @@ func sendAbortCommand(w io.Writer) error {
 	return nil
 }
 
-// Open's behavior is to kill all processes associated with the id and return an
-// error. This is done because it is not possible to re-attach to the
-// spawn-daemon's stdout to retrieve status messages.
-func (e *LinuxExecutor) Open(id string) error {
-	parts := strings.SplitN(id, ":", 2)
-	if len(parts) != 2 {
-		return fmt.Errorf("Invalid id: %v", id)
-	}
-
-	switch parts[0] {
-	case "PID":
-		pid, err := strconv.Atoi(parts[1])
-		if err != nil {
-			return fmt.Errorf("Invalid id: failed to parse pid %v", parts[1])
-		}
-
-		process, err := os.FindProcess(pid)
-		if err != nil {
-			return fmt.Errorf("Failed to find Pid %v: %v", pid, err)
-		}
-
-		if err := process.Kill(); err != nil {
-			return fmt.Errorf("Failed to kill Pid %v: %v", pid, err)
-		}
-	case "CGROUP":
-		if !e.cgroupEnabled {
-			return errors.New("Passed a a cgroup identifier, but cgroups are disabled")
-		}
-
-		// De-serialize the cgroup configuration.
-		dec := json.NewDecoder(strings.NewReader(parts[1]))
-		var groups cgroupConfig.Cgroup
-		if err := dec.Decode(&groups); err != nil {
-			return fmt.Errorf("Failed to parse cgroup configuration: %v", err)
-		}
-
-		e.groups = &groups
-		if err := e.destroyCgroup(); err != nil {
-			return err
-		}
-		// TODO: cleanTaskDir is a little more complicated here because the OS
-		// may have already unmounted in the case of a restart. Need to scan.
-	default:
-		return fmt.Errorf("Invalid id type: %v", parts[0])
-	}
-
-	return errors.New("Could not re-open to id (intended).")
-}
-
 func (e *LinuxExecutor) Wait() error {
-	if e.spawnChild.Process == nil {
-		return errors.New("Can not find child to wait on")
+	if e.spawnOutputReader != nil {
+		e.spawnOutputReader.Close()
 	}
 
-	defer e.spawnOutputWriter.Close()
-	defer e.spawnOutputReader.Close()
+	if e.spawnOutputWriter != nil {
+		e.spawnOutputWriter.Close()
+	}
 
 	errs := new(multierror.Error)
-	if err := e.spawnChild.Wait(); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Process.Pid, err))
+	if err := e.spawnWait(); err != nil {
+		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Pid, err))
 	}
 
-	// If they fork/exec and then exit, wait will return but they will be still
-	// running processes so we need to kill the full cgroup.
-	if e.groups != nil {
-		if err := e.destroyCgroup(); err != nil {
-			errs = multierror.Append(errs, err)
-		}
+	if err := e.destroyCgroup(); err != nil {
+		errs = multierror.Append(errs, err)
 	}
 
 	if err := e.cleanTaskDir(); err != nil {
@@ -471,27 +323,18 @@ func (e *LinuxExecutor) Wait() error {
 	return errs.ErrorOrNil()
 }
 
-// If cgroups are used, the ID is the cgroup structurue. Otherwise, it is the
-// PID of the spawn-daemon process. An error is returned if the process was
-// never started.
-func (e *LinuxExecutor) ID() (string, error) {
-	if e.spawnChild.Process != nil {
-		if e.cgroupEnabled && e.groups != nil {
-			// Serialize the cgroup structure so it can be undone on suabsequent
-			// opens.
-			var buffer bytes.Buffer
-			enc := json.NewEncoder(&buffer)
-			if err := enc.Encode(e.groups); err != nil {
-				return "", fmt.Errorf("Failed to serialize daemon configuration: %v", err)
-			}
-
-			return fmt.Sprintf("CGROUP:%v", buffer.String()), nil
-		}
-
-		return fmt.Sprintf("PID:%d", e.spawnChild.Process.Pid), nil
+// spawnWait waits on the spawn-daemon and can handle the spawn-daemon not being
+// a child of this process.
+func (e *LinuxExecutor) spawnWait() error {
+	// TODO: This needs to be able to wait on non-child processes.
+	state, err := e.spawnChild.Wait()
+	if err != nil {
+		return err
+	} else if !state.Success() {
+		return fmt.Errorf("exited with non-zero code")
 	}
 
-	return "", fmt.Errorf("Process has finished or was never started")
+	return nil
 }
 
 func (e *LinuxExecutor) Shutdown() error {
@@ -507,16 +350,6 @@ func (e *LinuxExecutor) ForceStop() error {
 		e.spawnOutputWriter.Close()
 	}
 
-	// If the task is not running inside a cgroup then just the spawn-daemon child is killed.
-	// TODO: Find a good way to kill the children of the spawn-daemon.
-	if e.groups == nil {
-		if err := e.spawnChild.Process.Kill(); err != nil {
-			return fmt.Errorf("Failed to kill child (%v): %v", e.spawnChild.Process.Pid, err)
-		}
-
-		return nil
-	}
-
 	errs := new(multierror.Error)
 	if e.groups != nil {
 		if err := e.destroyCgroup(); err != nil {
@@ -531,13 +364,131 @@ func (e *LinuxExecutor) ForceStop() error {
 	return errs.ErrorOrNil()
 }
 
+// Task Directory related functions.
+
+func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
+	e.taskName = taskName
+	taskDir, ok := alloc.TaskDirs[taskName]
+	if !ok {
+		fmt.Errorf("Couldn't find task directory for task %v", taskName)
+	}
+	e.taskDir = taskDir
+
+	if err := alloc.MountSharedDir(taskName); err != nil {
+		return err
+	}
+
+	if err := alloc.Embed(taskName, chrootEnv); err != nil {
+		return err
+	}
+
+	// Mount dev
+	dev := filepath.Join(taskDir, "dev")
+	if err := os.Mkdir(dev, 0777); err != nil {
+		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
+	}
+
+	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
+		return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
+	}
+
+	// Mount proc
+	proc := filepath.Join(taskDir, "proc")
+	if err := os.Mkdir(proc, 0777); err != nil {
+		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
+	}
+
+	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
+		return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
+	}
+
+	// Set the tasks AllocDir environment variable.
+	env, err := environment.ParseFromList(e.Cmd.Env)
+	if err != nil {
+		return err
+	}
+	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
+	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
+	e.Cmd.Env = env.List()
+
+	// Store the file path to save the exit status to.
+	e.exitStatusFile = filepath.Join(alloc.AllocDir, fmt.Sprintf("%s_%s", taskName, "exit_status"))
+
+	e.alloc = alloc
+	return nil
+}
+
+func (e *LinuxExecutor) pathExists(path string) bool {
+	if _, err := os.Stat(path); err != nil {
+		if os.IsNotExist(err) {
+			return false
+		}
+	}
+	return true
+}
+
+func (e *LinuxExecutor) cleanTaskDir() error {
+	// Unmount dev.
+	errs := new(multierror.Error)
+	dev := filepath.Join(e.taskDir, "dev")
+	if e.pathExists(dev) {
+		if err := syscall.Unmount(dev, 0); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
+		}
+	}
+
+	// Unmount proc.
+	proc := filepath.Join(e.taskDir, "proc")
+	if e.pathExists(proc) {
+		if err := syscall.Unmount(proc, 0); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
+		}
+	}
+
+	return errs.ErrorOrNil()
+}
+
+// Cgroup related functions.
+
+func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
+	e.groups = &cgroupConfig.Cgroup{}
+	e.groups.Name = structs.GenerateUUID()
+
+	// TODO: verify this is needed for things like network access
+	e.groups.AllowAllDevices = true
+
+	if resources.MemoryMB > 0 {
+		// Total amount of memory allowed to consume
+		e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
+		// Disable swap to avoid issues on the machine
+		e.groups.MemorySwap = int64(-1)
+	}
+
+	if resources.CPU < 2 {
+		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
+	}
+
+	// Set the relative CPU shares for this cgroup.
+	e.groups.CpuShares = int64(resources.CPU)
+
+	if resources.IOPS != 0 {
+		// Validate it is in an acceptable range.
+		if resources.IOPS < 10 || resources.IOPS > 1000 {
+			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
+		}
+
+		e.groups.BlkioWeight = uint16(resources.IOPS)
+	}
+
+	return nil
+}
+
 func (e *LinuxExecutor) destroyCgroup() error {
 	if e.groups == nil {
 		return errors.New("Can't destroy: cgroup configuration empty")
 	}
 
-	manager := cgroupFs.Manager{}
-	manager.Cgroups = e.groups
+	manager := e.getCgroupManager(e.groups)
 	pids, err := manager.GetPids()
 	if err != nil {
 		return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err)
@@ -555,11 +506,6 @@ func (e *LinuxExecutor) destroyCgroup() error {
 			multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
 			continue
 		}
-
-		if _, err := process.Wait(); err != nil {
-			multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err))
-			continue
-		}
 	}
 
 	// Remove the cgroup.
@@ -574,6 +520,12 @@ func (e *LinuxExecutor) destroyCgroup() error {
 	return nil
 }
 
-func (e *LinuxExecutor) Command() *cmd {
-	return &e.cmd
+// getCgroupManager returns the correct libcontainer cgroup manager.
+func (e *LinuxExecutor) getCgroupManager(groups *cgroupConfig.Cgroup) cgroups.Manager {
+	var manager cgroups.Manager
+	manager = &cgroupFs.Manager{Cgroups: groups}
+	if systemd.UseSystemd() {
+		manager = &systemd.Manager{Cgroups: groups}
+	}
+	return manager
 }
diff --git a/client/executor/exec_linux_test.go b/client/executor/exec_linux_test.go
index 8f33b0da4..1b8307b02 100644
--- a/client/executor/exec_linux_test.go
+++ b/client/executor/exec_linux_test.go
@@ -139,11 +139,6 @@ func TestExecutorLinux_Start_Kill(t *testing.T) {
 	filePath := filepath.Join(taskDir, "output")
 	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
 
-	// This test can only be run if cgroups are enabled.
-	if !e.(*LinuxExecutor).cgroupEnabled {
-		t.SkipNow()
-	}
-
 	if err := e.Limit(constraint); err != nil {
 		t.Fatalf("Limit() failed: %v", err)
 	}
@@ -178,13 +173,11 @@ func TestExecutorLinux_Open(t *testing.T) {
 		t.Fatalf("No task directory found for task %v", task)
 	}
 
-	filePath := filepath.Join(taskDir, "output")
-	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
-
-	// This test can only be run if cgroups are enabled.
-	if !e.(*LinuxExecutor).cgroupEnabled {
-		t.SkipNow()
-	}
+	expected := "hello world"
+	file := filepath.Join(allocdir.TaskLocal, "output.txt")
+	absFilePath := filepath.Join(taskDir, file)
+	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
+	e := Command("/bin/bash", "-c", cmd)
 
 	if err := e.Limit(constraint); err != nil {
 		t.Fatalf("Limit() failed: %v", err)
@@ -203,14 +196,22 @@ func TestExecutorLinux_Open(t *testing.T) {
 		t.Fatalf("ID() failed: %v", err)
 	}
 
-	if _, err := OpenId(id); err == nil {
-		t.Fatalf("Open(%v) should have failed", id)
+	e2 := NewExecutor()
+	if err := e2.Open(id); err != nil {
+		t.Fatalf("Open(%v) failed: %v", id, err)
 	}
 
-	time.Sleep(1500 * time.Millisecond)
+	if err := e2.Wait(); err != nil {
+		t.Fatalf("Wait() failed: %v", err)
+	}
 
-	// Check that the file doesn't exist, open should have killed the process.
-	if _, err := os.Stat(filePath); err == nil {
-		t.Fatalf("Stat(%v) should have failed: task not killed", filePath)
+	output, err := ioutil.ReadFile(absFilePath)
+	if err != nil {
+		t.Fatalf("Couldn't read file %v", absFilePath)
+	}
+
+	act := string(output)
+	if act != expected {
+		t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
 	}
 }
diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index ea7868be4..81117ce2e 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -192,7 +192,7 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}
 
-	if c.config != nil && c.config.Process == nil {
+	if c.config != nil && c.config.Process != nil {
 		startStatus.UserPID = c.config.Process.Pid
 	}
 

From de495bfc3ccd5b234340396a9c39633412f6cd01 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 15:26:14 -0700
Subject: [PATCH 20/59] nomad: support blocking queries on job allocations

---
 nomad/job_endpoint.go      | 59 ++++++++++++++++++++----------------
 nomad/job_endpoint_test.go | 61 +++++++++++++++++++++++++++++++++++---
 nomad/state/state_store.go |  2 ++
 nomad/watch/watch.go       |  1 +
 4 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index ad15d626d..6d43e1c89 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -275,34 +275,43 @@ func (j *Job) Allocations(args *structs.JobSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now())
 
-	// Capture the allocations
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	allocs, err := snap.AllocsByJob(args.JobID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocJob: args.JobID}),
+		run: func() error {
+			// Capture the allocations
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			allocs, err := snap.AllocsByJob(args.JobID)
+			if err != nil {
+				return err
+			}
 
-	// Convert to stubs
-	if len(allocs) > 0 {
-		reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
-		for _, alloc := range allocs {
-			reply.Allocations = append(reply.Allocations, alloc.Stub())
-		}
-	}
+			// Convert to stubs
+			if len(allocs) > 0 {
+				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
+				for _, alloc := range allocs {
+					reply.Allocations = append(reply.Allocations, alloc.Stub())
+				}
+			}
 
-	// Use the last index that affected the allocs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the allocs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+
+		}}
+	return j.srv.blockingRPC(&opts)
 }
 
 // Evaluations is used to list the evaluations for a job
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 5b4ba079b..f42031a7a 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -377,14 +377,14 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 
 	// Upsert a job we are not interested in first.
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertJob(2, job1); err != nil {
+		if err := state.UpsertJob(1000, job1); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Upsert another job later which should trigger the watch.
 	time.AfterFunc(200*time.Millisecond, func() {
-		if err := state.UpsertJob(2, job2); err != nil {
+		if err := state.UpsertJob(2000, job2); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
@@ -405,8 +405,8 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
 	}
 	if resp.Job == nil || resp.Job.ID != job2.ID {
 		t.Fatalf("bad: %#v", resp.Job)
@@ -546,6 +546,59 @@ func TestJobEndpoint_Allocations(t *testing.T) {
 	}
 }
 
+func TestJobEndpoint_Allocations_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the register request
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+	alloc2.JobID = "job1"
+	state := s1.fsm.State()
+
+	// First upsert an unrelated alloc
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert an alloc for the job we are interested in later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the jobs
+	get := &structs.JobSpecificRequest{
+		JobID: "job1",
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.JobAllocationsResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Job.Allocations", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+}
+
 func TestJobEndpoint_Evaluations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 17484f985..2a31555f2 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -532,6 +532,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	watcher := watch.NewItems()
 	watcher.Add(watch.Item{Table: "allocs"})
 	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocJob: alloc.JobID})
 	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 
 	// Look for existing alloc
@@ -603,6 +604,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		}
 
 		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocJob: alloc.JobID})
 		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}
 
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index e5cdce16f..3973e562b 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -8,6 +8,7 @@ package watch
 // input for subscribe/unsubscribe and notification firing.
 type Item struct {
 	Alloc     string
+	AllocJob  string
 	AllocNode string
 	Eval      string
 	Job       string

From 035e5ba80fa4e3bda7d9e37d57ca9acd54dfe873 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 29 Oct 2015 15:39:26 -0700
Subject: [PATCH 21/59] Comments

---
 client/executor/exec_linux.go | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index 1a52265b1..9c4bcd9a4 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -134,6 +134,8 @@ func (e *LinuxExecutor) ID() (string, error) {
 	return buffer.String(), nil
 }
 
+// runAs takes a user id as a string and looks up the user. It stores the
+// results in the executor and returns an error if the user could not be found.
 func (e *LinuxExecutor) runAs(userid string) error {
 	errs := new(multierror.Error)
 
@@ -161,8 +163,8 @@ func (e *LinuxExecutor) runAs(userid string) error {
 }
 
 func (e *LinuxExecutor) Start() error {
-	// Run as "nobody" user so we don't leak root privilege to the
-	// spawned process.
+	// Run as "nobody" user so we don't leak root privilege to the spawned
+	// process.
 	if err := e.runAs("nobody"); err == nil && e.user != nil {
 		e.cmd.SetUID(e.user.Uid)
 		e.cmd.SetGID(e.user.Gid)
@@ -280,6 +282,8 @@ func (e *LinuxExecutor) spawnDaemon() error {
 	return nil
 }
 
+// sendStartCommand sends the necessary command to the spawn-daemon to have it
+// start the user process.
 func sendStartCommand(w io.Writer) error {
 	enc := json.NewEncoder(w)
 	if err := enc.Encode(true); err != nil {
@@ -289,6 +293,9 @@ func sendStartCommand(w io.Writer) error {
 	return nil
 }
 
+// sendAbortCommand sends the necessary command to the spawn-daemon to have it
+// abort starting the user process. This should be invoked if the spawn-daemon
+// could not be isolated into a cgroup.
 func sendAbortCommand(w io.Writer) error {
 	enc := json.NewEncoder(w)
 	if err := enc.Encode(false); err != nil {
@@ -298,6 +305,8 @@ func sendAbortCommand(w io.Writer) error {
 	return nil
 }
 
+// Wait waits til the user process exits and returns an error on non-zero exit
+// codes. Wait also cleans up the task directory and created cgroups.
 func (e *LinuxExecutor) Wait() error {
 	if e.spawnOutputReader != nil {
 		e.spawnOutputReader.Close()
@@ -341,6 +350,8 @@ func (e *LinuxExecutor) Shutdown() error {
 	return e.ForceStop()
 }
 
+// ForceStop immediately exits the user process and cleans up both the task
+// directory and the cgroups.
 func (e *LinuxExecutor) ForceStop() error {
 	if e.spawnOutputReader != nil {
 		e.spawnOutputReader.Close()
@@ -366,6 +377,8 @@ func (e *LinuxExecutor) ForceStop() error {
 
 // Task Directory related functions.
 
+// ConfigureTaskDir creates the necessary directory structure for a proper
+// chroot. cleanTaskDir should be called after.
 func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
 	e.taskName = taskName
 	taskDir, ok := alloc.TaskDirs[taskName]
@@ -418,6 +431,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	return nil
 }
 
+// pathExists is a helper function to check if the path exists.
 func (e *LinuxExecutor) pathExists(path string) bool {
 	if _, err := os.Stat(path); err != nil {
 		if os.IsNotExist(err) {
@@ -427,6 +441,8 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 	return true
 }
 
+// cleanTaskDir is an idempotent operation to clean the task directory and
+// should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
 	// Unmount dev.
 	errs := new(multierror.Error)
@@ -450,6 +466,8 @@ func (e *LinuxExecutor) cleanTaskDir() error {
 
 // Cgroup related functions.
 
+// configureCgroups converts a Nomad Resources specification into the equivalent
+// cgroup configuration. It returns an error if the resources are invalid.
 func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
 	e.groups = &cgroupConfig.Cgroup{}
 	e.groups.Name = structs.GenerateUUID()
@@ -483,6 +501,8 @@ func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
 	return nil
 }
 
+// destroyCgroup kills all processes in the cgroup and removes the cgroup
+// configuration from the host.
 func (e *LinuxExecutor) destroyCgroup() error {
 	if e.groups == nil {
 		return errors.New("Can't destroy: cgroup configuration empty")

From 89a0af6306d95cf92d33cafeb903ea3bdf698327 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 15:48:44 -0700
Subject: [PATCH 22/59] nomad: support blocking queries on single nodes

---
 nomad/node_endpoint.go      | 64 +++++++++++++++++++++----------------
 nomad/node_endpoint_test.go | 50 +++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 65a83e1b1..f28cc3930 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -283,37 +283,45 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
 
-	// Verify the arguments
-	if args.NodeID == "" {
-		return fmt.Errorf("missing node ID")
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Node: args.NodeID}),
+		run: func() error {
+			// Verify the arguments
+			if args.NodeID == "" {
+				return fmt.Errorf("missing node ID")
+			}
 
-	// Look for the node
-	snap, err := n.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.NodeByID(args.NodeID)
-	if err != nil {
-		return err
-	}
+			// Look for the node
+			snap, err := n.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.NodeByID(args.NodeID)
+			if err != nil {
+				return err
+			}
 
-	// Setup the output
-	if out != nil {
-		reply.Node = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("nodes")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			if out != nil {
+				reply.Node = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("nodes")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
 
-	// Set the query response
-	n.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			n.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return n.srv.blockingRPC(&opts)
 }
 
 // GetAllocs is used to request allocations for a specific node
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index d06e6ea0f..8d53c0a66 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -371,6 +371,56 @@ func TestClientEndpoint_GetNode(t *testing.T) {
 	}
 }
 
+func TestClientEndpoint_GetNode_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the node
+	node1 := mock.Node()
+	node2 := mock.Node()
+
+	// First create an unrelated node.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertNode(1000, node1); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert the node we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		if err := state.UpsertNode(2000, node2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the node
+	get := &structs.NodeSpecificRequest{
+		NodeID: node2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.SingleNodeResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if resp.Node == nil || resp.Node.ID != node2.ID {
+		t.Fatalf("bad: %#v", resp.Node)
+	}
+}
+
 func TestClientEndpoint_GetAllocs(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()

From ff6e1fea49db9282042289d15745704de084e35b Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 16:04:53 -0700
Subject: [PATCH 23/59] nomad: support blocking queries on single allocations

---
 nomad/alloc_endpoint.go      | 56 ++++++++++++++++++++----------------
 nomad/alloc_endpoint_test.go | 52 +++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index f3b9dbdc4..e8b6af63c 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -69,30 +69,38 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "alloc", "get_alloc"}, time.Now())
 
-	// Lookup the allocation
-	snap, err := a.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.AllocByID(args.AllocID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Alloc: args.AllocID}),
+		run: func() error {
+			// Lookup the allocation
+			snap, err := a.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.AllocByID(args.AllocID)
+			if err != nil {
+				return err
+			}
 
-	// Setup the output
-	if out != nil {
-		reply.Alloc = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("allocs")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			if out != nil {
+				reply.Alloc = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("allocs")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
 
-	// Set the query response
-	a.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			a.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return a.srv.blockingRPC(&opts)
 }
diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 01688da96..0f7e4c0c4 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -144,3 +144,55 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
 		t.Fatalf("bad: %#v", resp.Alloc)
 	}
 }
+
+func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the allocs
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+
+	// First create an unrelated alloc
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Create the alloc we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the jobs
+	get := &structs.AllocSpecificRequest{
+		AllocID: alloc2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.SingleAllocResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.GetAlloc", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID {
+		t.Fatalf("bad: %#v", resp.Alloc)
+	}
+}

From 1e4320e0331ef97436711b0802276d9687a7b9d0 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 16:12:25 -0700
Subject: [PATCH 24/59] nomad: support blocking queries for single evals

---
 nomad/eval_endpoint.go      | 56 +++++++++++++++++++++----------------
 nomad/eval_endpoint_test.go | 52 ++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 07dfc18fe..e473b5b10 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -27,32 +27,40 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())
 
-	// Look for the job
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.EvalByID(args.EvalID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Eval: args.EvalID}),
+		run: func() error {
+			// Look for the job
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.EvalByID(args.EvalID)
+			if err != nil {
+				return err
+			}
 
-	// Setup the output
-	if out != nil {
-		reply.Eval = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("evals")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			if out != nil {
+				reply.Eval = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("evals")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
 
-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
 
 // Dequeue is used to dequeue a pending evaluation
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 3b9a62a8e..01ec27f46 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -51,6 +51,58 @@ func TestEvalEndpoint_GetEval(t *testing.T) {
 	}
 }
 
+func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the evals
+	eval1 := mock.Eval()
+	eval2 := mock.Eval()
+
+	// First create an unrelated eval
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertEvals(1000, []*structs.Evaluation{eval1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert the eval we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertEvals(2000, []*structs.Evaluation{eval2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the eval
+	get := &structs.EvalSpecificRequest{
+		EvalID: eval2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.SingleEvalResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if resp.Eval == nil || resp.Eval.ID != eval2.ID {
+		t.Fatalf("bad: %#v", resp.Eval)
+	}
+}
+
 func TestEvalEndpoint_Dequeue(t *testing.T) {
 	s1 := testServer(t, func(c *Config) {
 		c.NumSchedulers = 0 // Prevent automatic dequeue

From a27e8bbe51496278074bcd26cdd5a27b40f3c10a Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 16:20:57 -0700
Subject: [PATCH 25/59] nomad: support blocking queries on eval-specific
 allocations

---
 nomad/eval_endpoint.go      | 58 +++++++++++++++++++++----------------
 nomad/eval_endpoint_test.go | 52 +++++++++++++++++++++++++++++++++
 nomad/state/state_store.go  |  2 ++
 nomad/watch/watch.go        |  1 +
 4 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index e473b5b10..5afa31dfc 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -277,32 +277,40 @@ func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())
 
-	// Capture the allocations
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	allocs, err := snap.AllocsByEval(args.EvalID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocEval: args.EvalID}),
+		run: func() error {
+			// Capture the allocations
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			allocs, err := snap.AllocsByEval(args.EvalID)
+			if err != nil {
+				return err
+			}
 
-	// Convert to a stub
-	if len(allocs) > 0 {
-		reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
-		for _, alloc := range allocs {
-			reply.Allocations = append(reply.Allocations, alloc.Stub())
-		}
-	}
+			// Convert to a stub
+			if len(allocs) > 0 {
+				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
+				for _, alloc := range allocs {
+					reply.Allocations = append(reply.Allocations, alloc.Stub())
+				}
+			}
 
-	// Use the last index that affected the allocs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the allocs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 01ec27f46..442a2c1aa 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -484,3 +484,55 @@ func TestEvalEndpoint_Allocations(t *testing.T) {
 		t.Fatalf("bad: %#v", resp.Allocations)
 	}
 }
+
+func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the allocs
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+
+	// Upsert an unrelated alloc first
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert an alloc which will trigger the watch later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the eval
+	get := &structs.EvalSpecificRequest{
+		EvalID: alloc2.EvalID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.EvalAllocationsResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.Allocations", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+}
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 2a31555f2..f4f97489a 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -532,6 +532,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	watcher := watch.NewItems()
 	watcher.Add(watch.Item{Table: "allocs"})
 	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocEval: alloc.EvalID})
 	watcher.Add(watch.Item{AllocJob: alloc.JobID})
 	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 
@@ -604,6 +605,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		}
 
 		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocEval: alloc.EvalID})
 		watcher.Add(watch.Item{AllocJob: alloc.JobID})
 		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index 3973e562b..c71fe5087 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -8,6 +8,7 @@ package watch
 // input for subscribe/unsubscribe and notification firing.
 type Item struct {
 	Alloc     string
+	AllocEval string
 	AllocJob  string
 	AllocNode string
 	Eval      string

From 6fb8a2d3e2481b829e4ba3a4ec87b90fac52b6a7 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 19:00:02 -0700
Subject: [PATCH 26/59] nomad: more tests

---
 nomad/alloc_endpoint_test.go |  14 ++---
 nomad/eval_endpoint.go       |   2 +-
 nomad/eval_endpoint_test.go  |  55 +++++++++++++-----
 nomad/job_endpoint.go        |   2 +-
 nomad/job_endpoint_test.go   |  67 +++++++++++++++-------
 nomad/node_endpoint.go       |   2 +-
 nomad/node_endpoint_test.go  | 106 ++++++++++++++++++++++++++++++-----
 7 files changed, 188 insertions(+), 60 deletions(-)

diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 0f7e4c0c4..4147011ac 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -45,7 +45,7 @@ func TestAllocEndpoint_List(t *testing.T) {
 	}
 }
 
-func TestAllocEndpoint_List_blocking(t *testing.T) {
+func TestAllocEndpoint_List_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -145,7 +145,7 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
 	}
 }
 
-func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
+func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -158,7 +158,7 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 
 	// First create an unrelated alloc
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -166,7 +166,7 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 
 	// Create the alloc we are watching later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -177,7 +177,7 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 		AllocID: alloc2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.SingleAllocResponse
@@ -189,8 +189,8 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID {
 		t.Fatalf("bad: %#v", resp.Alloc)
diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 5afa31dfc..bc74e85f3 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -44,8 +44,8 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Eval = out
 			if out != nil {
-				reply.Eval = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 442a2c1aa..6f3d154e5 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -51,7 +51,7 @@ func TestEvalEndpoint_GetEval(t *testing.T) {
 	}
 }
 
-func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
+func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -64,7 +64,7 @@ func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
 
 	// First create an unrelated eval
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertEvals(1000, []*structs.Evaluation{eval1})
+		err := state.UpsertEvals(100, []*structs.Evaluation{eval1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -72,35 +72,60 @@ func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
 
 	// Upsert the eval we are watching later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertEvals(2000, []*structs.Evaluation{eval2})
+		err := state.UpsertEvals(200, []*structs.Evaluation{eval2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Lookup the eval
-	get := &structs.EvalSpecificRequest{
+	req := &structs.EvalSpecificRequest{
 		EvalID: eval2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.SingleEvalResponse
 	start := time.Now()
-	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Eval == nil || resp.Eval.ID != eval2.ID {
 		t.Fatalf("bad: %#v", resp.Eval)
 	}
+
+	// Eval delete triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.DeleteEval(300, []string{eval2.ID}, []string{})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	var resp2 structs.SingleEvalResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Eval != nil {
+		t.Fatalf("bad: %#v", resp2.Eval)
+	}
 }
 
 func TestEvalEndpoint_Dequeue(t *testing.T) {
@@ -386,7 +411,7 @@ func TestEvalEndpoint_List(t *testing.T) {
 	}
 }
 
-func TestEvalEndpoint_List_blocking(t *testing.T) {
+func TestEvalEndpoint_List_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -485,7 +510,7 @@ func TestEvalEndpoint_Allocations(t *testing.T) {
 	}
 }
 
-func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
+func TestEvalEndpoint_Allocations_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -498,7 +523,7 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 
 	// Upsert an unrelated alloc first
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -506,7 +531,7 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 
 	// Upsert an alloc which will trigger the watch later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -517,7 +542,7 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 		EvalID: alloc2.EvalID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.EvalAllocationsResponse
@@ -529,8 +554,8 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID {
 		t.Fatalf("bad: %#v", resp.Allocations)
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 6d43e1c89..e961428e4 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -199,8 +199,8 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Job = out
 			if out != nil {
-				reply.Job = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index f42031a7a..9e09de538 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -364,7 +364,7 @@ func TestJobEndpoint_GetJob(t *testing.T) {
 	}
 }
 
-func TestJobEndpoint_GetJob_blocking(t *testing.T) {
+func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -377,14 +377,14 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 
 	// Upsert a job we are not interested in first.
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertJob(1000, job1); err != nil {
+		if err := state.UpsertJob(100, job1); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Upsert another job later which should trigger the watch.
 	time.AfterFunc(200*time.Millisecond, func() {
-		if err := state.UpsertJob(2000, job2); err != nil {
+		if err := state.UpsertJob(200, job2); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
@@ -393,7 +393,7 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 		JobID: job2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	start := time.Now()
@@ -405,12 +405,37 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Job == nil || resp.Job.ID != job2.ID {
 		t.Fatalf("bad: %#v", resp.Job)
 	}
+
+	// Job delete fires watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteJob(300, job2.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	start = time.Now()
+
+	var resp2 structs.SingleJobResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Job != nil {
+		t.Fatalf("bad: %#v", resp2.Job)
+	}
 }
 
 func TestJobEndpoint_ListJobs(t *testing.T) {
@@ -447,7 +472,7 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 	}
 }
 
-func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
+func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -459,7 +484,7 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 
 	// Upsert job triggers watches
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertJob(2, job); err != nil {
+		if err := state.UpsertJob(100, job); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
@@ -467,7 +492,7 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	req := &structs.JobListRequest{
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	start := time.Now()
@@ -479,8 +504,8 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	if resp.Index != 100 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 100)
 	}
 	if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID {
 		t.Fatalf("bad: %#v", resp.Jobs)
@@ -488,12 +513,12 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 
 	// Job deletion triggers watches
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.DeleteJob(3, job.ID); err != nil {
+		if err := state.DeleteJob(200, job.ID); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
-	req.MinQueryIndex = 2
+	req.MinQueryIndex = 150
 	start = time.Now()
 	var resp2 structs.JobListResponse
 	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil {
@@ -503,8 +528,8 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
-	if resp2.Index != 3 {
-		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	if resp2.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 200)
 	}
 	if len(resp2.Jobs) != 0 {
 		t.Fatalf("bad: %#v", resp2.Jobs)
@@ -546,7 +571,7 @@ func TestJobEndpoint_Allocations(t *testing.T) {
 	}
 }
 
-func TestJobEndpoint_Allocations_blocking(t *testing.T) {
+func TestJobEndpoint_Allocations_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
@@ -560,7 +585,7 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 
 	// First upsert an unrelated alloc
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -568,7 +593,7 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 
 	// Upsert an alloc for the job we are interested in later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -579,7 +604,7 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 		JobID: "job1",
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.JobAllocationsResponse
@@ -591,8 +616,8 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" {
 		t.Fatalf("bad: %#v", resp.Allocations)
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index f28cc3930..5bd600380 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -305,8 +305,8 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Node = out
 			if out != nil {
-				reply.Node = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 8d53c0a66..9a74316c7 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -371,7 +371,7 @@ func TestClientEndpoint_GetNode(t *testing.T) {
 	}
 }
 
-func TestClientEndpoint_GetNode_blocking(t *testing.T) {
+func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -384,41 +384,92 @@ func TestClientEndpoint_GetNode_blocking(t *testing.T) {
 
 	// First create an unrelated node.
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertNode(1000, node1); err != nil {
+		if err := state.UpsertNode(100, node1); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Upsert the node we are watching later
 	time.AfterFunc(200*time.Millisecond, func() {
-		if err := state.UpsertNode(2000, node2); err != nil {
+		if err := state.UpsertNode(200, node2); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Lookup the node
-	get := &structs.NodeSpecificRequest{
+	req := &structs.NodeSpecificRequest{
 		NodeID: node2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.SingleNodeResponse
 	start := time.Now()
-	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Node == nil || resp.Node.ID != node2.ID {
 		t.Fatalf("bad: %#v", resp.Node)
 	}
+
+	// Node update triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		nodeUpdate := mock.Node()
+		nodeUpdate.ID = node2.ID
+		nodeUpdate.Status = structs.NodeStatusDown
+		if err := state.UpsertNode(300, nodeUpdate); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	var resp2 structs.SingleNodeResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Node == nil || resp2.Node.Status != structs.NodeStatusDown {
+		t.Fatalf("bad: %#v", resp2.Node)
+	}
+
+	// Node delete triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteNode(400, node2.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 350
+	var resp3 structs.SingleNodeResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp3.Index != 400 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 400)
+	}
+	if resp3.Node != nil {
+		t.Fatalf("bad: %#v", resp3.Node)
+	}
 }
 
 func TestClientEndpoint_GetAllocs(t *testing.T) {
@@ -507,16 +558,15 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 	alloc.NodeID = node.ID
 	state := s1.fsm.State()
 	start := time.Now()
-	go func() {
-		time.Sleep(100 * time.Millisecond)
+	time.AfterFunc(100*time.Millisecond, func() {
 		err := state.UpsertAllocs(100, []*structs.Allocation{alloc})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})
 
 	// Lookup the allocs in a blocking query
-	get := &structs.NodeSpecificRequest{
+	req := &structs.NodeSpecificRequest{
 		NodeID: node.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
@@ -525,7 +575,7 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 		},
 	}
 	var resp2 structs.NodeAllocsResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", get, &resp2); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp2); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
@@ -541,6 +591,34 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 	if len(resp2.Allocs) != 1 || resp2.Allocs[0].ID != alloc.ID {
 		t.Fatalf("bad: %#v", resp2.Allocs)
 	}
+
+	// Alloc updates fire watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		allocUpdate := mock.Alloc()
+		allocUpdate.NodeID = alloc.NodeID
+		allocUpdate.ID = alloc.ID
+		allocUpdate.ClientStatus = structs.AllocClientStatusRunning
+		err := state.UpdateAllocFromClient(200, allocUpdate)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 150
+	var resp3 structs.NodeAllocsResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if time.Since(start) < 100*time.Millisecond {
+		t.Fatalf("too fast")
+	}
+	if resp3.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp3.Index, 200)
+	}
+	if len(resp3.Allocs) != 1 || resp3.Allocs[0].ClientStatus != structs.AllocClientStatusRunning {
+		t.Fatalf("bad: %#v", resp3.Allocs[0])
+	}
 }
 
 func TestClientEndpoint_UpdateAlloc(t *testing.T) {
@@ -803,7 +881,7 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 	}
 }
 
-func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
+func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()

From ae4156d2b3f3d1c70c0db006c2cc8653f126a68e Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 21:42:41 -0700
Subject: [PATCH 27/59] nomad/state: add watch trigger tests

---
 nomad/state/state_store.go      |   3 +
 nomad/state/state_store_test.go | 249 ++++++++++++++++++++++++--------
 2 files changed, 191 insertions(+), 61 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index f4f97489a..47ead285e 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -765,6 +765,7 @@ func (s *StateRestore) Commit() {
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
 	r.items.Add(watch.Item{Table: "nodes"})
+	r.items.Add(watch.Item{Node: node.ID})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -774,6 +775,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
 	r.items.Add(watch.Item{Table: "jobs"})
+	r.items.Add(watch.Item{Job: job.ID})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -783,6 +785,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 	r.items.Add(watch.Item{Table: "evals"})
+	r.items.Add(watch.Item{Eval: eval.ID})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 2d1134d9c..788b9f26a 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -26,6 +26,12 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -47,12 +53,20 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_DeleteNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -79,12 +93,20 @@ func TestStateStore_DeleteNode_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -114,12 +136,20 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -149,6 +179,8 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Nodes(t *testing.T) {
@@ -189,18 +221,23 @@ func TestStateStore_Nodes(t *testing.T) {
 
 func TestStateStore_RestoreNode(t *testing.T) {
 	state := testStateStore(t)
+	node := mock.Node()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	node := mock.Node()
 	err = restore.NodeRestore(node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()
 
 	out, err := state.NodeByID(node.ID)
@@ -211,12 +248,20 @@ func TestStateStore_RestoreNode(t *testing.T) {
 	if !reflect.DeepEqual(out, node) {
 		t.Fatalf("Bad: %#v %#v", out, node)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -238,12 +283,20 @@ func TestStateStore_UpsertJob_Job(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -279,12 +332,20 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_DeleteJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -311,6 +372,8 @@ func TestStateStore_DeleteJob_Job(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Jobs(t *testing.T) {
@@ -418,18 +481,23 @@ func TestStateStore_JobsByScheduler(t *testing.T) {
 
 func TestStateStore_RestoreJob(t *testing.T) {
 	state := testStateStore(t)
+	job := mock.Job()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	job := mock.Job()
 	err = restore.JobRestore(job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()
 
 	out, err := state.JobByID(job.ID)
@@ -440,6 +508,8 @@ func TestStateStore_RestoreJob(t *testing.T) {
 	if !reflect.DeepEqual(out, job) {
 		t.Fatalf("Bad: %#v %#v", out, job)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Indexes(t *testing.T) {
@@ -504,6 +574,12 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval", item: watch.Item{Eval: eval.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -525,6 +601,8 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
@@ -536,6 +614,12 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval", item: watch.Item{Eval: eval.ID}},
+	}
+	notify.start(state)
+
 	eval2 := mock.Eval()
 	eval2.ID = eval.ID
 	err = state.UpsertEvals(1001, []*structs.Evaluation{eval2})
@@ -566,40 +650,50 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	state := testStateStore(t)
-	eval := mock.Eval()
+	eval1 := mock.Eval()
 	eval2 := mock.Eval()
-	alloc := mock.Alloc()
+	alloc1 := mock.Alloc()
 	alloc2 := mock.Alloc()
 
-	err := state.UpsertEvals(1000, []*structs.Evaluation{eval, eval2})
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval1", item: watch.Item{Eval: eval1.ID}},
+		{desc: "eval2", item: watch.Item{Eval: eval2.ID}},
+		{desc: "alloc1", item: watch.Item{Alloc: alloc1.ID}},
+		{desc: "alloc2", item: watch.Item{Alloc: alloc2.ID}},
+		{desc: "allocnode1", item: watch.Item{AllocNode: alloc1.NodeID}},
+		{desc: "allocnode2", item: watch.Item{AllocNode: alloc2.NodeID}},
+	}
+	notify.start(state)
+
+	err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
+	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc1, alloc2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	notify1 := make(chan struct{}, 1)
-	state.Watch(watch.NewItems(watch.Item{AllocNode: alloc.NodeID}), notify1)
-
-	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
+	err = state.DeleteEval(1002, []string{eval1.ID, eval2.ID}, []string{alloc1.ID, alloc2.ID})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	out, err := state.EvalByID(eval.ID)
+	out, err := state.EvalByID(eval1.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", eval, out)
+		t.Fatalf("bad: %#v %#v", eval1, out)
 	}
 
 	out, err = state.EvalByID(eval2.ID)
@@ -608,16 +702,16 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", eval, out)
+		t.Fatalf("bad: %#v %#v", eval1, out)
 	}
 
-	outA, err := state.AllocByID(alloc.ID)
+	outA, err := state.AllocByID(alloc1.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", alloc, outA)
+		t.Fatalf("bad: %#v %#v", alloc1, outA)
 	}
 
 	outA, err = state.AllocByID(alloc2.ID)
@@ -626,7 +720,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", alloc, outA)
+		t.Fatalf("bad: %#v %#v", alloc1, outA)
 	}
 
 	index, err := state.Index("evals")
@@ -645,11 +739,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 		t.Fatalf("bad: %d", index)
 	}
 
-	select {
-	case <-notify1:
-	default:
-		t.Fatalf("should be notified")
-	}
+	notify.verify(t)
 }
 
 func TestStateStore_EvalsByJob(t *testing.T) {
@@ -721,34 +811,50 @@ func TestStateStore_Evals(t *testing.T) {
 
 func TestStateStore_RestoreEval(t *testing.T) {
 	state := testStateStore(t)
+	eval := mock.Eval()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval", item: watch.Item{Eval: eval.ID}},
+	}
+	notify.start(state)
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	job := mock.Eval()
-	err = restore.EvalRestore(job)
+	err = restore.EvalRestore(eval)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()
 
-	out, err := state.EvalByID(job.ID)
+	out, err := state.EvalByID(eval.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	if !reflect.DeepEqual(out, job) {
-		t.Fatalf("Bad: %#v %#v", out, job)
+	if !reflect.DeepEqual(out, eval) {
+		t.Fatalf("Bad: %#v %#v", out, eval)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	state := testStateStore(t)
-
 	alloc := mock.Alloc()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "allocs"}},
+		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
+		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
+		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
+		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -780,12 +886,23 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	state := testStateStore(t)
-
 	alloc := mock.Alloc()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "allocs"}},
+		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
+		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
+		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
+		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -807,37 +924,8 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
-}
 
-func TestStateStore_Watch(t *testing.T) {
-	state := testStateStore(t)
-
-	notify1 := make(chan struct{}, 1)
-	notify2 := make(chan struct{}, 1)
-
-	items := watch.NewItems(watch.Item{AllocNode: "foo"})
-	state.Watch(items, notify1)
-	state.Watch(items, notify2)
-	state.StopWatch(items, notify2)
-
-	alloc := mock.Alloc()
-	alloc.NodeID = "foo"
-	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-
-	select {
-	case <-notify1:
-	default:
-		t.Fatalf("should be notified")
-	}
-
-	select {
-	case <-notify2:
-		t.Fatalf("should not be notified")
-	default:
-	}
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
@@ -852,6 +940,16 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	alloc2 := mock.Alloc()
 	alloc2.ID = alloc.ID
 	alloc2.NodeID = alloc.NodeID + ".new"
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "allocs"}},
+		{desc: "alloc", item: watch.Item{Alloc: alloc2.ID}},
+		{desc: "alloceval", item: watch.Item{AllocEval: alloc2.EvalID}},
+		{desc: "allocjob", item: watch.Item{AllocJob: alloc2.JobID}},
+		{desc: "allocnode", item: watch.Item{AllocNode: alloc2.NodeID}},
+	}
+	notify.start(state)
+
 	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -880,6 +978,8 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_EvictAlloc_Alloc(t *testing.T) {
@@ -1078,6 +1178,33 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	}
 }
 
+// notifyTestCase is used to set up and verify watch triggers.
+type notifyTestCase struct {
+	desc string
+	item watch.Item
+	ch   chan struct{}
+}
+
+// notifyTest is a suite of notifyTestCases.
+type notifyTest []*notifyTestCase
+
+// start creates the notify channels and subscribes them.
+func (n notifyTest) start(state *StateStore) {
+	for _, tcase := range n {
+		tcase.ch = make(chan struct{}, 1)
+		state.Watch(watch.NewItems(tcase.item), tcase.ch)
+	}
+}
+
+// verify ensures that each channel received a notification.
+func (n notifyTest) verify(t *testing.T) {
+	for _, tcase := range n {
+		if len(tcase.ch) != 1 {
+			t.Fatalf("should notify %s", tcase.desc)
+		}
+	}
+}
+
 // NodeIDSort is used to sort nodes by ID
 type NodeIDSort []*structs.Node
 

From 284c2e2f2b07ed976422d877f821e77e887147fd Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Fri, 30 Oct 2015 08:27:47 -0700
Subject: [PATCH 28/59] nomad: cleanup and more tests

---
 nomad/alloc_endpoint.go         |   2 +-
 nomad/alloc_endpoint_test.go    |   6 +-
 nomad/eval_endpoint_test.go     |  10 +-
 nomad/job_endpoint_test.go      |  10 +-
 nomad/node_endpoint_test.go     |  14 +--
 nomad/state/state_store.go      |  10 +-
 nomad/state/state_store_test.go | 215 ++++++++++++++++----------------
 7 files changed, 139 insertions(+), 128 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index e8b6af63c..c07d5549d 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -86,8 +86,8 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Alloc = out
 			if out != nil {
-				reply.Alloc = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 4147011ac..bcab0a387 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -74,7 +74,7 @@ func TestAllocEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 2 {
@@ -101,7 +101,7 @@ func TestAllocEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
@@ -186,7 +186,7 @@ func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 6f3d154e5..55782a031 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -92,7 +92,7 @@ func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
@@ -117,7 +117,7 @@ func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 300 {
@@ -440,7 +440,7 @@ func TestEvalEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 2 {
@@ -464,7 +464,7 @@ func TestEvalEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
@@ -551,7 +551,7 @@ func TestEvalEndpoint_Allocations_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 9e09de538..c12e5b463 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -402,7 +402,7 @@ func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
@@ -427,7 +427,7 @@ func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 300 {
@@ -501,7 +501,7 @@ func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 100 {
@@ -525,7 +525,7 @@ func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 200 {
@@ -613,7 +613,7 @@ func TestJobEndpoint_Allocations_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 9a74316c7..74b154655 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -410,7 +410,7 @@ func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
@@ -437,7 +437,7 @@ func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp2.Index != 300 {
@@ -461,7 +461,7 @@ func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp3.Index != 400 {
@@ -910,7 +910,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 2 {
@@ -934,7 +934,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
@@ -958,7 +958,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp3)
 	}
 	if resp3.Index != 4 {
@@ -982,7 +982,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp4)
 	}
 	if resp4.Index != 5 {
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 47ead285e..ec5aef29f 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -455,8 +455,11 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
-		watcher.Add(watch.Item{Alloc: alloc})
-		watcher.Add(watch.Item{AllocNode: existing.(*structs.Allocation).NodeID})
+		realAlloc := existing.(*structs.Allocation)
+		watcher.Add(watch.Item{Alloc: realAlloc.ID})
+		watcher.Add(watch.Item{AllocEval: realAlloc.EvalID})
+		watcher.Add(watch.Item{AllocJob: realAlloc.JobID})
+		watcher.Add(watch.Item{AllocNode: realAlloc.NodeID})
 	}
 
 	// Update the indexes
@@ -795,6 +798,9 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
 	r.items.Add(watch.Item{Table: "allocs"})
+	r.items.Add(watch.Item{Alloc: alloc.ID})
+	r.items.Add(watch.Item{AllocEval: alloc.EvalID})
+	r.items.Add(watch.Item{AllocJob: alloc.JobID})
 	r.items.Add(watch.Item{AllocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 788b9f26a..2a5967450 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -26,11 +26,10 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -61,11 +60,10 @@ func TestStateStore_DeleteNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -101,11 +99,10 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -144,11 +141,10 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -223,11 +219,10 @@ func TestStateStore_RestoreNode(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	restore, err := state.Restore()
 	if err != nil {
@@ -256,11 +251,10 @@ func TestStateStore_UpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	err := state.UpsertJob(1000, job)
 	if err != nil {
@@ -291,11 +285,10 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	err := state.UpsertJob(1000, job)
 	if err != nil {
@@ -340,11 +333,10 @@ func TestStateStore_DeleteJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	err := state.UpsertJob(1000, job)
 	if err != nil {
@@ -483,11 +475,10 @@ func TestStateStore_RestoreJob(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	restore, err := state.Restore()
 	if err != nil {
@@ -574,11 +565,10 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval", item: watch.Item{Eval: eval.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
 
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
 	if err != nil {
@@ -614,11 +604,10 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval", item: watch.Item{Eval: eval.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
 
 	eval2 := mock.Eval()
 	eval2.ID = eval.ID
@@ -661,16 +650,19 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	alloc1 := mock.Alloc()
 	alloc2 := mock.Alloc()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval1", item: watch.Item{Eval: eval1.ID}},
-		{desc: "eval2", item: watch.Item{Eval: eval2.ID}},
-		{desc: "alloc1", item: watch.Item{Alloc: alloc1.ID}},
-		{desc: "alloc2", item: watch.Item{Alloc: alloc2.ID}},
-		{desc: "allocnode1", item: watch.Item{AllocNode: alloc1.NodeID}},
-		{desc: "allocnode2", item: watch.Item{AllocNode: alloc2.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval1.ID},
+		watch.Item{Eval: eval2.ID},
+		watch.Item{Alloc: alloc1.ID},
+		watch.Item{Alloc: alloc2.ID},
+		watch.Item{AllocEval: alloc1.EvalID},
+		watch.Item{AllocEval: alloc2.EvalID},
+		watch.Item{AllocJob: alloc1.JobID},
+		watch.Item{AllocJob: alloc2.JobID},
+		watch.Item{AllocNode: alloc1.NodeID},
+		watch.Item{AllocNode: alloc2.NodeID})
 
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2})
 	if err != nil {
@@ -813,11 +805,10 @@ func TestStateStore_RestoreEval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval", item: watch.Item{Eval: eval.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
 
 	restore, err := state.Restore()
 	if err != nil {
@@ -846,14 +837,13 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	state := testStateStore(t)
 	alloc := mock.Alloc()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "allocs"}},
-		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
-		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
-		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
-		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
 
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
@@ -894,14 +884,13 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	state := testStateStore(t)
 	alloc := mock.Alloc()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "allocs"}},
-		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
-		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
-		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
-		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
 
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
@@ -941,14 +930,13 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	alloc2.ID = alloc.ID
 	alloc2.NodeID = alloc.NodeID + ".new"
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "allocs"}},
-		{desc: "alloc", item: watch.Item{Alloc: alloc2.ID}},
-		{desc: "alloceval", item: watch.Item{AllocEval: alloc2.EvalID}},
-		{desc: "allocjob", item: watch.Item{AllocJob: alloc2.JobID}},
-		{desc: "allocnode", item: watch.Item{AllocNode: alloc2.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc2.ID},
+		watch.Item{AllocEval: alloc2.EvalID},
+		watch.Item{AllocJob: alloc2.JobID},
+		watch.Item{AllocNode: alloc2.NodeID})
 
 	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2})
 	if err != nil {
@@ -1111,13 +1099,21 @@ func TestStateStore_Allocs(t *testing.T) {
 
 func TestStateStore_RestoreAlloc(t *testing.T) {
 	state := testStateStore(t)
+	alloc := mock.Alloc()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	alloc := mock.Alloc()
 	err = restore.AllocRestore(alloc)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -1133,6 +1129,8 @@ func TestStateStore_RestoreAlloc(t *testing.T) {
 	if !reflect.DeepEqual(out, alloc) {
 		t.Fatalf("Bad: %#v %#v", out, alloc)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateWatch_watch(t *testing.T) {
@@ -1172,15 +1170,30 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	// Unsubscribe stop notifications
 	sw.stopWatch(watch.Item{Table: "foo"}, notify)
 
+	// Check that the group was removed
+	if _, ok := sw.items[watch.Item{Table: "foo"}]; ok {
+		t.Fatalf("should remove group")
+	}
+
+	// Check that we are not notified
 	sw.notify(watch.NewItems(watch.Item{Table: "foo"}))
 	if len(notify) != 0 {
 		t.Fatalf("should not notify")
 	}
 }
 
+func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest {
+	var n notifyTest
+	for _, item := range items {
+		ch := make(chan struct{}, 1)
+		state.Watch(watch.NewItems(item), ch)
+		n = append(n, &notifyTestCase{item, ch})
+	}
+	return n
+}
+
 // notifyTestCase is used to set up and verify watch triggers.
 type notifyTestCase struct {
-	desc string
 	item watch.Item
 	ch   chan struct{}
 }
@@ -1188,19 +1201,11 @@ type notifyTestCase struct {
 // notifyTest is a suite of notifyTestCases.
 type notifyTest []*notifyTestCase
 
-// start creates the notify channels and subscribes them.
-func (n notifyTest) start(state *StateStore) {
-	for _, tcase := range n {
-		tcase.ch = make(chan struct{}, 1)
-		state.Watch(watch.NewItems(tcase.item), tcase.ch)
-	}
-}
-
 // verify ensures that each channel received a notification.
 func (n notifyTest) verify(t *testing.T) {
 	for _, tcase := range n {
 		if len(tcase.ch) != 1 {
-			t.Fatalf("should notify %s", tcase.desc)
+			t.Fatalf("should notify %#v", tcase.item)
 		}
 	}
 }

From 5f53478137ec01b71cda970f3cba1f1e4d5d2944 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Fri, 30 Oct 2015 08:42:23 -0700
Subject: [PATCH 29/59] nomad/state: subscribe/unsubscribe all watch items
 while holding the lock

---
 nomad/state/state_store.go      | 40 +++++++++++++++++----------------
 nomad/state/state_store_test.go | 13 ++++++-----
 nomad/watch/watch.go            |  1 +
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index ec5aef29f..30ee87259 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -75,16 +75,14 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 	return r, nil
 }
 
+// Watch subscribes a channel to a set of watch items.
 func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
-	for wi, _ := range items {
-		s.watch.watch(wi, notify)
-	}
+	s.watch.watch(items, notify)
 }
 
+// StopWatch unsubscribes a channel from a set of watch items.
 func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
-	for wi, _ := range items {
-		s.watch.stopWatch(wi, notify)
-	}
+	s.watch.stopWatch(items, notify)
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -830,28 +828,32 @@ func newStateWatch() *stateWatch {
 	}
 }
 
-// watch subscribes a channel to the given watch item.
-func (w *stateWatch) watch(wi watch.Item, ch chan struct{}) {
+// watch subscribes a channel to the given watch items.
+func (w *stateWatch) watch(items watch.Items, ch chan struct{}) {
 	w.l.Lock()
 	defer w.l.Unlock()
 
-	grp, ok := w.items[wi]
-	if !ok {
-		grp = new(NotifyGroup)
-		w.items[wi] = grp
+	for item, _ := range items {
+		grp, ok := w.items[item]
+		if !ok {
+			grp = new(NotifyGroup)
+			w.items[item] = grp
+		}
+		grp.Wait(ch)
 	}
-	grp.Wait(ch)
 }
 
-// stopWatch unsubscribes a channel from the given watch item.
-func (w *stateWatch) stopWatch(wi watch.Item, ch chan struct{}) {
+// stopWatch unsubscribes a channel from the given watch items.
+func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) {
 	w.l.Lock()
 	defer w.l.Unlock()
 
-	if grp, ok := w.items[wi]; ok {
-		grp.Clear(ch)
-		if grp.Empty() {
-			delete(w.items, wi)
+	for item, _ := range items {
+		if grp, ok := w.items[item]; ok {
+			grp.Clear(ch)
+			if grp.Empty() {
+				delete(w.items, item)
+			}
 		}
 	}
 }
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 2a5967450..5e1021e55 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -653,6 +653,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	notify := setupNotifyTest(
 		state,
 		watch.Item{Table: "evals"},
+		watch.Item{Table: "allocs"},
 		watch.Item{Eval: eval1.ID},
 		watch.Item{Eval: eval2.ID},
 		watch.Item{Alloc: alloc1.ID},
@@ -1140,9 +1141,9 @@ func TestStateWatch_watch(t *testing.T) {
 	notify3 := make(chan struct{}, 1)
 
 	// Notifications trigger subscribed channels
-	sw.watch(watch.Item{Table: "foo"}, notify1)
-	sw.watch(watch.Item{Table: "bar"}, notify2)
-	sw.watch(watch.Item{Table: "baz"}, notify3)
+	sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify1)
+	sw.watch(watch.NewItems(watch.Item{Table: "bar"}), notify2)
+	sw.watch(watch.NewItems(watch.Item{Table: "baz"}), notify3)
 
 	items := watch.NewItems()
 	items.Add(watch.Item{Table: "foo"})
@@ -1165,10 +1166,10 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	notify := make(chan struct{})
 
 	// First subscribe
-	sw.watch(watch.Item{Table: "foo"}, notify)
+	sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify)
 
 	// Unsubscribe stop notifications
-	sw.stopWatch(watch.Item{Table: "foo"}, notify)
+	sw.stopWatch(watch.NewItems(watch.Item{Table: "foo"}), notify)
 
 	// Check that the group was removed
 	if _, ok := sw.items[watch.Item{Table: "foo"}]; ok {
@@ -1182,6 +1183,8 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	}
 }
 
+// setupNotifyTest takes a state store and a set of watch items, then creates
+// and subscribes a notification channel for each item.
 func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest {
 	var n notifyTest
 	for _, item := range items {
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index c71fe5087..102e535b2 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -21,6 +21,7 @@ type Item struct {
 // the items as they are added using map keys.
 type Items map[Item]struct{}
 
+// NewItems creates a new Items set and adds the given items.
 func NewItems(items ...Item) Items {
 	wi := make(Items)
 	for _, item := range items {

From 8f2bb251578a6419265dd6f3ff03d8fe3336be2a Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Fri, 30 Oct 2015 14:38:51 -0700
Subject: [PATCH 30/59] website: clean up HTTP docs, add blocking queries

---
 website/source/docs/http/alloc.html.md  |   6 +-
 website/source/docs/http/allocs.html.md |   6 +-
 website/source/docs/http/eval.html.md   |  18 +-
 website/source/docs/http/evals.html.md  |   6 +-
 website/source/docs/http/index.html.md  |   1 +
 website/source/docs/http/job.html.md    | 269 ++++++++++++------------
 website/source/docs/http/jobs.html.md   |   6 +-
 website/source/docs/http/node.html.md   |  20 +-
 website/source/docs/http/nodes.html.md  |   7 +-
 9 files changed, 186 insertions(+), 153 deletions(-)

diff --git a/website/source/docs/http/alloc.html.md b/website/source/docs/http/alloc.html.md
index 3c224fd54..822858a8c 100644
--- a/website/source/docs/http/alloc.html.md
+++ b/website/source/docs/http/alloc.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -179,4 +184,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/allocs.html.md b/website/source/docs/http/allocs.html.md
index 44ad8aa7e..b59a4f204 100644
--- a/website/source/docs/http/allocs.html.md
+++ b/website/source/docs/http/allocs.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -56,4 +61,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/eval.html.md b/website/source/docs/http/eval.html.md
index cba43900c..87e048209 100644
--- a/website/source/docs/http/eval.html.md
+++ b/website/source/docs/http/eval.html.md
@@ -3,7 +3,7 @@ layout: "http"
 page_title: "HTTP API: /v1/evaluation"
 sidebar_current: "docs-http-eval-"
 description: |-
-  The '/1/evaluation' endpoint is used to query a specific evaluation.
+  The '/v1/evaluation' endpoint is used to query a specific evaluation.
 ---
 
 # /v1/evaluation
@@ -17,7 +17,7 @@ be specified using the `?region=` query parameter.
 <dl>
   <dt>Description</dt>
   <dd>
-    Lists all the evaluations.
+    Query a specific evaluation.
   </dd>
 
   <dt>Method</dt>
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -57,9 +62,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/evaluation/\<ID\>/allocations
-## GET
-
 <dl>
   <dt>Description</dt>
   <dd>
@@ -77,6 +79,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -102,4 +109,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/evals.html.md b/website/source/docs/http/evals.html.md
index 3bc22da8f..23d98cc95 100644
--- a/website/source/docs/http/evals.html.md
+++ b/website/source/docs/http/evals.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -59,4 +64,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/index.html.md b/website/source/docs/http/index.html.md
index 671d19fa5..7ed3f0dbd 100644
--- a/website/source/docs/http/index.html.md
+++ b/website/source/docs/http/index.html.md
@@ -31,6 +31,7 @@ The API is modeled closely on the underlying data model. Use the links to the le
 documentation about specific endpoints. There are also "Agent" APIs which interact with
 a specific agent and not the broader cluster used for administration.
 
+<a name="blocking-queries"></a>
 ## Blocking Queries
 
 Certain endpoints support a feature called a "blocking query." A blocking query
diff --git a/website/source/docs/http/job.html.md b/website/source/docs/http/job.html.md
index 211963e6a..cbf0f5097 100644
--- a/website/source/docs/http/job.html.md
+++ b/website/source/docs/http/job.html.md
@@ -6,7 +6,7 @@ description: |-
   The '/1/job' endpoint is used for CRUD on a single job.
 ---
 
-# /v1/job/\<ID\>
+# /v1/job
 
 The `job` endpoint is used for CRUD on a single job. By default, the agent's local
 region is used; another region can be specified using the `?region=` query parameter.
@@ -30,6 +30,11 @@ region is used; another region can be specified using the `?region=` query param
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -136,6 +141,105 @@ region is used; another region can be specified using the `?region=` query param
   </dd>
 </dl>
 
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Query the allocations belonging to a single job.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>GET</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<id>/allocations`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    [
+    {
+        "ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
+        "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
+        "Name": "binstore-storagelocker.binsl[0]",
+        "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
+        "JobID": "binstore-storagelocker",
+        "TaskGroup": "binsl",
+        "DesiredStatus": "run",
+        "DesiredDescription": "",
+        "ClientStatus": "running",
+        "ClientDescription": "",
+        "CreateIndex": 16,
+        "ModifyIndex": 16
+    },
+    ...
+    ]
+    ```
+
+  </dd>
+</dl>
+
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Query the evaluations belonging to a single job.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>GET</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<id>/evaluations`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    [
+    {
+        "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
+        "Priority": 50,
+        "Type": "service",
+        "TriggeredBy": "job-register",
+        "JobID": "binstore-storagelocker",
+        "JobModifyIndex": 14,
+        "NodeID": "",
+        "NodeModifyIndex": 0,
+        "Status": "complete",
+        "StatusDescription": "",
+        "Wait": 0,
+        "NextEval": "",
+        "PreviousEval": "",
+        "CreateIndex": 15,
+        "ModifyIndex": 17
+    },
+    ...
+    ]
+    ```
+
+  </dd>
+</dl>
+
 ## PUT / POST
 
 <dl>
@@ -177,6 +281,38 @@ region is used; another region can be specified using the `?region=` query param
   </dd>
 </dl>
 
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Creates a new evaluation for the given job. This can be used to force
+    run the scheduling logic if necessary.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>PUT or POST</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<ID>/evaluate`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    {
+    "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
+    "EvalCreateIndex": 35,
+    "JobModifyIndex": 34,
+    }
+    ```
+
+  </dd>
+</dl>
+
 ## DELETE
 
 <dl>
@@ -209,134 +345,3 @@ region is used; another region can be specified using the `?region=` query param
 
   </dd>
 </dl>
-
-# /v1/job/\<ID\>/allocations
-## GET
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Query the allocations belonging to a single job.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>GET</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<id>/allocations`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    [
-    {
-        "ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
-        "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
-        "Name": "binstore-storagelocker.binsl[0]",
-        "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
-        "JobID": "binstore-storagelocker",
-        "TaskGroup": "binsl",
-        "DesiredStatus": "run",
-        "DesiredDescription": "",
-        "ClientStatus": "running",
-        "ClientDescription": "",
-        "CreateIndex": 16,
-        "ModifyIndex": 16
-    },
-    ...
-    ]
-    ```
-
-  </dd>
-</dl>
-
-# /v1/job/\<ID\>/evaluate
-## PUT / POST
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Creates a new evaluation for the given job. This can be used to force
-    run the scheduling logic if necessary.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>PUT or POST</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<ID>/evaluate`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    {
-    "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
-    "EvalCreateIndex": 35,
-    "JobModifyIndex": 34,
-    }
-    ```
-
-  </dd>
-</dl>
-
-# /v1/job/\<ID\>/evaluations
-## GET
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Query the evaluations belonging to a single job.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>GET</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<id>/evaluations`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    [
-    {
-        "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
-        "Priority": 50,
-        "Type": "service",
-        "TriggeredBy": "job-register",
-        "JobID": "binstore-storagelocker",
-        "JobModifyIndex": 14,
-        "NodeID": "",
-        "NodeModifyIndex": 0,
-        "Status": "complete",
-        "StatusDescription": "",
-        "Wait": 0,
-        "NextEval": "",
-        "PreviousEval": "",
-        "CreateIndex": 15,
-        "ModifyIndex": 17
-    },
-    ...
-    ]
-    ```
-
-  </dd>
-</dl>
-
diff --git a/website/source/docs/http/jobs.html.md b/website/source/docs/http/jobs.html.md
index f724ce0ac..8f098b1ca 100644
--- a/website/source/docs/http/jobs.html.md
+++ b/website/source/docs/http/jobs.html.md
@@ -31,6 +31,11 @@ another region can be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -93,4 +98,3 @@ another region can be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/node.html.md b/website/source/docs/http/node.html.md
index f16131f97..df09426d6 100644
--- a/website/source/docs/http/node.html.md
+++ b/website/source/docs/http/node.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -82,9 +87,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/node/\<ID\>/allocations
-## GET
-
 <dl>
   <dt>Description</dt>
   <dd>
@@ -102,6 +104,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -128,7 +135,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/node/\<ID\>/evaluate
 ## PUT / POST
 
 <dl>
@@ -163,9 +169,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/node/\<ID\>/drain
-## PUT / POST
-
 <dl>
   <dt>Description</dt>
   <dd>
@@ -175,7 +178,7 @@ be specified using the `?region=` query parameter.
   </dd>
 
   <dt>Method</dt>
-  <dd>PUT or POSt</dd>
+  <dd>PUT or POST</dd>
 
   <dt>URL</dt>
   <dd>`/v1/node/<ID>/drain`</dd>
@@ -205,4 +208,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/nodes.html.md b/website/source/docs/http/nodes.html.md
index 36fa96fcd..b8e2b91a9 100644
--- a/website/source/docs/http/nodes.html.md
+++ b/website/source/docs/http/nodes.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -53,5 +58,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
-

From 83695cb5d10c09acc1e335081815b79d86e59e4f Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 15:51:39 -0700
Subject: [PATCH 31/59] Added support for parsing restart blocks

---
 command/init.go                 | 11 +++++
 jobspec/parse.go                | 83 +++++++++++++++++++++++++++++++--
 jobspec/parse_test.go           |  5 ++
 jobspec/test-fixtures/basic.hcl |  5 ++
 nomad/structs/structs.go        | 39 ++++++++++++++++
 5 files changed, 139 insertions(+), 4 deletions(-)

diff --git a/command/init.go b/command/init.go
index 0b9be934b..8827f5e9d 100644
--- a/command/init.go
+++ b/command/init.go
@@ -104,6 +104,17 @@ job "example" {
 		# Defaults to 1
 		# count = 1
 
+		# Restart Policy - This block defines the restart policy for TaskGroups
+		# attempts defines the number of restarts Nomad will do if Tasks
+		# in this TaskGroup fails in a rolling window of interval duration
+		# The delay value makes Nomad wait for that duration to restart after a Task
+		# fails or crashes.
+		restart {
+			interval = 5m
+			attempts = 10
+			delay = 25s
+		}
+
 		# Define a task to run
 		task "redis" {
 			# Use Docker to run the task.
diff --git a/jobspec/parse.go b/jobspec/parse.go
index f63ac5294..c3c71ac9c 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -30,6 +30,7 @@ func Parse(r io.Reader) (*structs.Job, error) {
 
 	// Parse the buffer
 	obj, err := hcl.Parse(buf.String())
+
 	if err != nil {
 		return nil, fmt.Errorf("error parsing: %s", err)
 	}
@@ -124,7 +125,7 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
 		}
 	}
 
-	// If we have tasks outside, do those
+	// If we have tasks outside, create TaskGroups for them
 	if o := obj.Get("task", false); o != nil {
 		var tasks []*structs.Task
 		if err := parseTasks(&tasks, o); err != nil {
@@ -134,9 +135,10 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
 		result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2)
 		for i, t := range tasks {
 			result.TaskGroups[i] = &structs.TaskGroup{
-				Name:  t.Name,
-				Count: 1,
-				Tasks: []*structs.Task{t},
+				Name:          t.Name,
+				Count:         1,
+				Tasks:         []*structs.Task{t},
+				RestartPolicy: structs.NewRestartPolicy(result.Type),
 			}
 		}
 	}
@@ -180,6 +182,7 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 		delete(m, "constraint")
 		delete(m, "meta")
 		delete(m, "task")
+		delete(m, "restart")
 
 		// Default count to 1 if not specified
 		if _, ok := m["count"]; !ok {
@@ -200,6 +203,10 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 			}
 		}
 
+		if err := parseRestartPolicy(structs.NewRestartPolicy(result.Type), o); err != nil {
+			return err
+		}
+
 		// Parse out meta fields. These are in HCL as a list so we need
 		// to iterate over them and merge them.
 		if metaO := o.Get("meta", false); metaO != nil {
@@ -228,6 +235,42 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 	return nil
 }
 
+func parseRestartPolicy(result *structs.RestartPolicy, obj *hclobj.Object) error {
+	var restartHclObj *hclobj.Object
+	var m map[string]interface{}
+	if restartHclObj = obj.Get("restart", false); restartHclObj == nil {
+		return nil
+	}
+	if err := hcl.DecodeObject(&m, restartHclObj); err != nil {
+		return err
+	}
+
+	if delay, ok := m["delay"]; ok {
+		d, err := toDuration(delay)
+		if err != nil {
+			return fmt.Errorf("Invalid Delay time in restart policy: %v", err)
+		}
+		result.Delay = d
+	}
+
+	if interval, ok := m["interval"]; ok {
+		i, err := toDuration(interval)
+		if err != nil {
+			return fmt.Errorf("Invalid Interval time in restart policy: %v", err)
+		}
+		result.Interval = i
+	}
+
+	if attempts, ok := m["attempts"]; ok {
+		a, err := toInteger(attempts)
+		if err != nil {
+			return fmt.Errorf("Invalid value in attempts: %v", err)
+		}
+		result.Attempts = a
+	}
+	return nil
+}
+
 func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error {
 	for _, o := range obj.Elem(false) {
 		var m map[string]interface{}
@@ -477,3 +520,35 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
 	}
 	return nil
 }
+
+func toDuration(value interface{}) (time.Duration, error) {
+	var dur time.Duration
+	var err error
+	switch v := value.(type) {
+	case string:
+		dur, err = time.ParseDuration(v)
+	case int:
+		dur = time.Duration(v) * time.Second
+	default:
+		err = fmt.Errorf("Invalid time %s", value)
+	}
+
+	return dur, err
+}
+
+func toInteger(value interface{}) (int, error) {
+	var integer int
+	var err error
+	switch v := value.(type) {
+	case string:
+		var i int64
+		i, err = strconv.ParseInt(v, 10, 32)
+		integer = int(i)
+	case int:
+		integer = v
+	default:
+		err = fmt.Errorf("Value: %v can't be parsed into int", value)
+	}
+
+	return integer, err
+}
diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go
index f91789ddb..c3b91e785 100644
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -48,6 +48,11 @@ func TestParse(t *testing.T) {
 					&structs.TaskGroup{
 						Name:  "outside",
 						Count: 1,
+						RestartPolicy: &structs.RestartPolicy{
+							Attempts: 2,
+							Interval: 1 * time.Minute,
+							Delay:    15 * time.Second,
+						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "outside",
diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl
index 941272b2d..bf81a6ae7 100644
--- a/jobspec/test-fixtures/basic.hcl
+++ b/jobspec/test-fixtures/basic.hcl
@@ -31,6 +31,11 @@ job "binstore-storagelocker" {
 
     group "binsl" {
         count = 5
+        restart {
+            attempts = 5
+            interval = "10m"
+            delay = "15s"
+        }
         task "binstore" {
             driver = "docker"
             config {
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index f5d20552a..8afe1c452 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -898,6 +898,37 @@ func (u *UpdateStrategy) Rolling() bool {
 	return u.Stagger > 0 && u.MaxParallel > 0
 }
 
+// RestartPolicy influences how Nomad restarts Tasks when they
+// crash or fail.
+type RestartPolicy struct {
+	Attempts int
+	Interval time.Duration
+	Delay    time.Duration
+}
+
+func (r *RestartPolicy) Validate() error {
+	if time.Duration(r.Attempts)*r.Delay > r.Interval {
+		return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
+	}
+	return nil
+}
+
+func NewRestartPolicy(jobType string) *RestartPolicy {
+	defaultDelayBetweenRestarts := 15 * time.Second
+	defaultAttempts := 15
+	var defaultRestartInterval time.Duration
+
+	if jobType == "service" {
+		defaultRestartInterval = 1 * time.Minute
+		defaultAttempts = 2
+	}
+	return &RestartPolicy{
+		Attempts: defaultAttempts,
+		Interval: defaultRestartInterval,
+		Delay:    defaultDelayBetweenRestarts,
+	}
+}
+
 // TaskGroup is an atomic unit of placement. Each task group belongs to
 // a job and may contain any number of tasks. A task group support running
 // in many replicas using the same configuration..
@@ -913,6 +944,9 @@ type TaskGroup struct {
 	// all the tasks contained.
 	Constraints []*Constraint
 
+	//RestartPolicy of a TaskGroup
+	RestartPolicy *RestartPolicy
+
 	// Tasks are the collection of tasks that this task group needs to run
 	Tasks []*Task
 
@@ -940,6 +974,10 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}
 
+	if err := tg.RestartPolicy.Validate(); err != nil {
+		mErr.Errors = append(mErr.Errors, err)
+	}
+
 	// Check for duplicate tasks
 	tasks := make(map[string]int)
 	for idx, task := range tg.Tasks {
@@ -954,6 +992,7 @@ func (tg *TaskGroup) Validate() error {
 
 	// Validate the tasks
 	for idx, task := range tg.Tasks {
+
 		if err := task.Validate(); err != nil {
 			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
 			mErr.Errors = append(mErr.Errors, outer)

From e2f61e25e9529f439d3a9b7fc836004816d30f6a Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 16:32:05 -0700
Subject: [PATCH 32/59] Sending restart policies to the Nomad API

---
 api/tasks.go          | 23 ++++++++++++++++++-----
 jobspec/parse.go      |  3 ++-
 jobspec/parse_test.go |  5 +++++
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/api/tasks.go b/api/tasks.go
index c1d5bf2ff..b2516e706 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -1,12 +1,25 @@
 package api
 
+import (
+	"time"
+)
+
+//RestartPolicy defines how the Nomad client restarts
+//tasks in a taskgroup when they fail
+type RestartPolicy struct {
+	Interval time.Duration
+	Attempts int
+	Delay    time.Duration
+}
+
 // TaskGroup is the unit of scheduling.
 type TaskGroup struct {
-	Name        string
-	Count       int
-	Constraints []*Constraint
-	Tasks       []*Task
-	Meta        map[string]string
+	Name          string
+	Count         int
+	Constraints   []*Constraint
+	Tasks         []*Task
+	RestartPolicy *RestartPolicy
+	Meta          map[string]string
 }
 
 // NewTaskGroup creates a new TaskGroup.
diff --git a/jobspec/parse.go b/jobspec/parse.go
index c3c71ac9c..548632239 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -202,8 +202,9 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 				return err
 			}
 		}
+		g.RestartPolicy = structs.NewRestartPolicy(result.Type)
 
-		if err := parseRestartPolicy(structs.NewRestartPolicy(result.Type), o); err != nil {
+		if err := parseRestartPolicy(g.RestartPolicy, o); err != nil {
 			return err
 		}
 
diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go
index c3b91e785..e785443b7 100644
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -82,6 +82,11 @@ func TestParse(t *testing.T) {
 							"elb_interval": "10",
 							"elb_checks":   "3",
 						},
+						RestartPolicy: &structs.RestartPolicy{
+							Interval: 10 * time.Minute,
+							Attempts: 5,
+							Delay:    15 * time.Second,
+						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "binstore",

From a035dcf2c0d96439bc3fe9bafd26f7039bae0381 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 16:49:08 -0700
Subject: [PATCH 33/59] Re-using toDuration while figuring out staggertime

---
 jobspec/parse.go | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/jobspec/parse.go b/jobspec/parse.go
index 548632239..1c28d59ee 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -499,19 +499,11 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
 		}
 		for _, key := range []string{"stagger", "Stagger"} {
 			if raw, ok := m[key]; ok {
-				switch v := raw.(type) {
-				case string:
-					dur, err := time.ParseDuration(v)
-					if err != nil {
-						return fmt.Errorf("invalid stagger time '%s'", raw)
-					}
-					m[key] = dur
-				case int:
-					m[key] = time.Duration(v) * time.Second
-				default:
-					return fmt.Errorf("invalid type for stagger time '%s'",
-						raw)
+				staggerTime, err := toDuration(raw)
+				if err != nil {
+					return fmt.Errorf("Invalid stagger time: %v", err)
 				}
+				m[key] = staggerTime
 			}
 		}
 

From 93cdcb5ac24d8430bd0fc83ad97c0616b087c65f Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 18:34:23 -0700
Subject: [PATCH 34/59] Added the restart policies to mocks

---
 nomad/mock/mock.go | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go
index 87c426dce..329ecd872 100644
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -1,6 +1,9 @@
 package mock
 
-import "github.com/hashicorp/nomad/nomad/structs"
+import (
+	"github.com/hashicorp/nomad/nomad/structs"
+	"time"
+)
 
 func Node() *structs.Node {
 	node := &structs.Node{
@@ -71,6 +74,11 @@ func Job() *structs.Job {
 			&structs.TaskGroup{
 				Name:  "web",
 				Count: 10,
+				RestartPolicy: &structs.RestartPolicy{
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+				},
 				Tasks: []*structs.Task{
 					&structs.Task{
 						Name:   "web",
@@ -131,6 +139,11 @@ func SystemJob() *structs.Job {
 			&structs.TaskGroup{
 				Name:  "web",
 				Count: 1,
+				RestartPolicy: &structs.RestartPolicy{
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+				},
 				Tasks: []*structs.Task{
 					&structs.Task{
 						Name:   "web",

From 0d17430306ecf6a535f5786c681e87aa7688ef44 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 21:06:56 -0700
Subject: [PATCH 35/59] Fixed grammer of comment

---
 command/init.go          | 4 ++--
 jobspec/parse.go         | 1 -
 nomad/structs/structs.go | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/command/init.go b/command/init.go
index 8827f5e9d..851f366be 100644
--- a/command/init.go
+++ b/command/init.go
@@ -104,8 +104,8 @@ job "example" {
 		# Defaults to 1
 		# count = 1
 
-		# Restart Policy - This block defines the restart policy for TaskGroups
-		# attempts defines the number of restarts Nomad will do if Tasks
+		# Restart Policy - This block defines the restart policy for TaskGroups,
+		# the attempts value defines the number of restarts Nomad will do if Tasks
 		# in this TaskGroup fails in a rolling window of interval duration
 		# The delay value makes Nomad wait for that duration to restart after a Task
 		# fails or crashes.
diff --git a/jobspec/parse.go b/jobspec/parse.go
index 1c28d59ee..77f9b819f 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -30,7 +30,6 @@ func Parse(r io.Reader) (*structs.Job, error) {
 
 	// Parse the buffer
 	obj, err := hcl.Parse(buf.String())
-
 	if err != nil {
 		return nil, fmt.Errorf("error parsing: %s", err)
 	}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 8afe1c452..a42a8f822 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -992,7 +992,6 @@ func (tg *TaskGroup) Validate() error {
 
 	// Validate the tasks
 	for idx, task := range tg.Tasks {
-
 		if err := task.Validate(); err != nil {
 			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
 			mErr.Errors = append(mErr.Errors, outer)

From 67c21e4b31b1d4ab6e4bef7f2bbad572cf4cbeef Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 21:28:56 -0700
Subject: [PATCH 36/59] Added a RestartPolicy to some mocks

---
 api/compose_test.go |  1 +
 api/tasks.go        | 14 ++++++++++++--
 api/tasks_test.go   |  5 +++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/api/compose_test.go b/api/compose_test.go
index 68801519f..2a509bc55 100644
--- a/api/compose_test.go
+++ b/api/compose_test.go
@@ -69,6 +69,7 @@ func TestCompose(t *testing.T) {
 						Operand: "=",
 					},
 				},
+				RestartPolicy: NewRestartPolicy(),
 				Tasks: []*Task{
 					&Task{
 						Name:   "task1",
diff --git a/api/tasks.go b/api/tasks.go
index b2516e706..3ef918850 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -12,6 +12,14 @@ type RestartPolicy struct {
 	Delay    time.Duration
 }
 
+func NewRestartPolicy() *RestartPolicy {
+	return &RestartPolicy{
+		Attempts: 10,
+		Interval: 3 * time.Minute,
+		Delay:    5 * time.Second,
+	}
+}
+
 // TaskGroup is the unit of scheduling.
 type TaskGroup struct {
 	Name          string
@@ -24,9 +32,11 @@ type TaskGroup struct {
 
 // NewTaskGroup creates a new TaskGroup.
 func NewTaskGroup(name string, count int) *TaskGroup {
+	restartPolicy := NewRestartPolicy()
 	return &TaskGroup{
-		Name:  name,
-		Count: count,
+		Name:          name,
+		Count:         count,
+		RestartPolicy: restartPolicy,
 	}
 }
 
diff --git a/api/tasks_test.go b/api/tasks_test.go
index 877f84d5c..945fdf9bf 100644
--- a/api/tasks_test.go
+++ b/api/tasks_test.go
@@ -8,8 +8,9 @@ import (
 func TestTaskGroup_NewTaskGroup(t *testing.T) {
 	grp := NewTaskGroup("grp1", 2)
 	expect := &TaskGroup{
-		Name:  "grp1",
-		Count: 2,
+		Name:          "grp1",
+		Count:         2,
+		RestartPolicy: NewRestartPolicy(),
 	}
 	if !reflect.DeepEqual(grp, expect) {
 		t.Fatalf("expect: %#v, got: %#v", expect, grp)

From 96f946b88e901641ad15ee1d93c6a1c36a6e4f83 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 21:43:00 -0700
Subject: [PATCH 37/59] Not validating task groups if it's nil in a job

---
 nomad/structs/structs.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index a42a8f822..cf81c6afb 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -974,8 +974,10 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}
 
-	if err := tg.RestartPolicy.Validate(); err != nil {
-		mErr.Errors = append(mErr.Errors, err)
+	if tg.RestartPolicy != nil {
+		if err := tg.RestartPolicy.Validate(); err != nil {
+			mErr.Errors = append(mErr.Errors, err)
+		}
 	}
 
 	// Check for duplicate tasks

From 6fa5b45c3e3c3bfdcbe81f2ed6554073e2df672a Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 10:15:26 -0700
Subject: [PATCH 38/59] Remove redundant 'all'

---
 website/source/docs/agent/config.html.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 6f0b9b0dc..a8c6412a3 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -42,7 +42,7 @@ nodes, unless otherwise specified:
   as `us-west` and `us-east`. Defaults to `global`.
 
 * `datacenter`: Datacenter of the local agent. All members of a datacenter
-  should all share a local LAN connection. Defaults to `dc1`.
+  should share a local LAN connection. Defaults to `dc1`.
 
 * <a id="name">`name`</a>: The name of the local node. This value is used to
   identify individual nodes in a given datacenter and must be unique

From 4bdaa1bbc0057703ff667d5970e8efae7e490859 Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 11:10:12 -0700
Subject: [PATCH 39/59] appicable -> applicable

---
 website/source/docs/agent/config.html.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 6f0b9b0dc..7199e8160 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -103,7 +103,7 @@ nodes, unless otherwise specified:
   This can be used to advertise a different address to the peers of a server
   node to support more complex network configurations such as NAT. This
   configuration is optional, and defaults to the bind address of the specific
-  network service if it is not provided. This configuration is only appicable
+  network service if it is not provided. This configuration is only applicable
   on server nodes. The value is a map of IP addresses and supports the
   following keys:
   <br>

From b770d1a7098be7536db7780a98678b8554c4406b Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 11:18:42 -0700
Subject: [PATCH 40/59] leave -> leaving

---
 website/source/docs/agent/config.html.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 7199e8160..5f186fdac 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -125,10 +125,10 @@ nodes, unless otherwise specified:
   * `disable_hostname`: A boolean indicating if gauge values should not be
     prefixed with the local hostname.
 
-* `leave_on_interrupt`: Enables gracefully leave when receiving the
+* `leave_on_interrupt`: Enables gracefully leaving when receiving the
   interrupt signal. By default, the agent will exit forcefully on any signal.
 
-* `leave_on_terminate`: Enables gracefully leave when receiving the
+* `leave_on_terminate`: Enables gracefully leaving when receiving the
   terminate signal. By default, the agent will exit forcefully on any signal.
 
 * `enable_syslog`: Enables logging to syslog. This option only work on

From 614a01fb4bf8cd8644219ae84b4f23bc4404a46b Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 11:19:38 -0700
Subject: [PATCH 41/59] This option only work -> This option only works

---
 website/source/docs/agent/config.html.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 7199e8160..f2631ef45 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -131,7 +131,7 @@ nodes, unless otherwise specified:
 * `leave_on_terminate`: Enables gracefully leave when receiving the
   terminate signal. By default, the agent will exit forcefully on any signal.
 
-* `enable_syslog`: Enables logging to syslog. This option only work on
+* `enable_syslog`: Enables logging to syslog. This option only works on
   Unix based systems.
 
 * `syslog_facility`: Controls the syslog facility that is used. By default,

From ec819f9761acc65dad3f1df9153e6c5cdbcc7d7a Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 13:24:59 -0800
Subject: [PATCH 42/59] Fixing tests to not create a TG without restart
 policies

---
 api/tasks.go                  |  4 ++--
 nomad/structs/structs.go      |  6 ++----
 nomad/structs/structs_test.go | 35 +++++++++++++++++++++++++++++++----
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/api/tasks.go b/api/tasks.go
index 3ef918850..2535d5ec5 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -4,8 +4,8 @@ import (
 	"time"
 )
 
-//RestartPolicy defines how the Nomad client restarts
-//tasks in a taskgroup when they fail
+// RestartPolicy defines how the Nomad client restarts
+// tasks in a taskgroup when they fail
 type RestartPolicy struct {
 	Interval time.Duration
 	Attempts int
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index cf81c6afb..a42a8f822 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -974,10 +974,8 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}
 
-	if tg.RestartPolicy != nil {
-		if err := tg.RestartPolicy.Validate(); err != nil {
-			mErr.Errors = append(mErr.Errors, err)
-		}
+	if err := tg.RestartPolicy.Validate(); err != nil {
+		mErr.Errors = append(mErr.Errors, err)
 	}
 
 	// Check for duplicate tasks
diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go
index cabf83dfa..1f107b095 100644
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -1,11 +1,11 @@
 package structs
 
 import (
+	"github.com/hashicorp/go-multierror"
 	"reflect"
 	"strings"
 	"testing"
-
-	"github.com/hashicorp/go-multierror"
+	"time"
 )
 
 func TestJob_Validate(t *testing.T) {
@@ -44,11 +44,27 @@ func TestJob_Validate(t *testing.T) {
 		TaskGroups: []*TaskGroup{
 			&TaskGroup{
 				Name: "web",
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
 			},
 			&TaskGroup{
 				Name: "web",
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
+			},
+			&TaskGroup{
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
 			},
-			&TaskGroup{},
 		},
 	}
 	err = j.Validate()
@@ -65,7 +81,13 @@ func TestJob_Validate(t *testing.T) {
 }
 
 func TestTaskGroup_Validate(t *testing.T) {
-	tg := &TaskGroup{}
+	tg := &TaskGroup{
+		RestartPolicy: &RestartPolicy{
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+		},
+	}
 	err := tg.Validate()
 	mErr := err.(*multierror.Error)
 	if !strings.Contains(mErr.Errors[0].Error(), "group name") {
@@ -86,6 +108,11 @@ func TestTaskGroup_Validate(t *testing.T) {
 			&Task{Name: "web"},
 			&Task{},
 		},
+		RestartPolicy: &RestartPolicy{
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+		},
 	}
 	err = tg.Validate()
 	mErr = err.(*multierror.Error)

From c7d31e56839f30c95756052029a6e3925cec1d56 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 13:35:51 -0800
Subject: [PATCH 43/59] Declaring Batch and Service default restart policies

---
 nomad/structs/structs.go | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index a42a8f822..f6feaa3de 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,8 +14,17 @@ import (
 )
 
 var (
-	ErrNoLeader     = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath = fmt.Errorf("No path to region")
+	ErrNoLeader           = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath       = fmt.Errorf("No path to region")
+	BatchJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 15,
+	}
+	ServiceJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 2,
+		Interval: 1 * time.Minute,
+	}
 )
 
 type MessageType uint8
@@ -914,18 +923,13 @@ func (r *RestartPolicy) Validate() error {
 }
 
 func NewRestartPolicy(jobType string) *RestartPolicy {
-	defaultDelayBetweenRestarts := 15 * time.Second
-	defaultAttempts := 15
-	var defaultRestartInterval time.Duration
-
-	if jobType == "service" {
-		defaultRestartInterval = 1 * time.Minute
-		defaultAttempts = 2
-	}
-	return &RestartPolicy{
-		Attempts: defaultAttempts,
-		Interval: defaultRestartInterval,
-		Delay:    defaultDelayBetweenRestarts,
+	switch jobType {
+	case JobTypeService:
+		return &ServiceJobRestartPolicy
+	case JobTypeBatch:
+		return &BatchJobRestartPolicy
+	default:
+		return nil
 	}
 }
 

From 795c786ca51bd6b79e6ea967ba269ebfc865b95e Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 15:04:04 -0800
Subject: [PATCH 44/59] Fixed the tests

---
 nomad/structs/structs.go | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index f6feaa3de..589781580 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,17 +14,8 @@ import (
 )
 
 var (
-	ErrNoLeader           = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath       = fmt.Errorf("No path to region")
-	BatchJobRestartPolicy = RestartPolicy{
-		Delay:    15 * time.Second,
-		Attempts: 15,
-	}
-	ServiceJobRestartPolicy = RestartPolicy{
-		Delay:    15 * time.Second,
-		Attempts: 2,
-		Interval: 1 * time.Minute,
-	}
+	ErrNoLeader     = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath = fmt.Errorf("No path to region")
 )
 
 type MessageType uint8
@@ -925,9 +916,16 @@ func (r *RestartPolicy) Validate() error {
 func NewRestartPolicy(jobType string) *RestartPolicy {
 	switch jobType {
 	case JobTypeService:
-		return &ServiceJobRestartPolicy
+		return &RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 2,
+			Interval: 1 * time.Minute,
+		}
 	case JobTypeBatch:
-		return &BatchJobRestartPolicy
+		return &RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 15,
+		}
 	default:
 		return nil
 	}

From ef841d5e89f638a7de184643ef5c05e956473f92 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 17:00:17 -0800
Subject: [PATCH 45/59] Introducing vars to create default batch and service
 restart policies

---
 nomad/structs/structs.go | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 589781580..15e58d333 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -16,6 +16,15 @@ import (
 var (
 	ErrNoLeader     = fmt.Errorf("No cluster leader")
 	ErrNoRegionPath = fmt.Errorf("No path to region")
+    defaultServiceJobRestartPolicy = RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 2,
+			Interval: 1 * time.Minute,
+		}
+	defaultBatchJobRestartPolicy = RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 15,
+		}
 )
 
 type MessageType uint8
@@ -916,19 +925,13 @@ func (r *RestartPolicy) Validate() error {
 func NewRestartPolicy(jobType string) *RestartPolicy {
 	switch jobType {
 	case JobTypeService:
-		return &RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 2,
-			Interval: 1 * time.Minute,
-		}
+		rp := defaultServiceJobRestartPolicy
+		return &rp
 	case JobTypeBatch:
-		return &RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 15,
-		}
-	default:
-		return nil
+		rp  := defaultBatchJobRestartPolicy
+		return &rp
 	}
+	return nil
 }
 
 // TaskGroup is an atomic unit of placement. Each task group belongs to

From 6a56218fb79e7be603d81d27ba43936cb3a81bec Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 17:30:41 -0800
Subject: [PATCH 46/59] Fixed the restart policy syntax

---
 command/init.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/command/init.go b/command/init.go
index 851f366be..356337ae8 100644
--- a/command/init.go
+++ b/command/init.go
@@ -110,9 +110,9 @@ job "example" {
 		# The delay value makes Nomad wait for that duration to restart after a Task
 		# fails or crashes.
 		restart {
-			interval = 5m
+			interval = "5m"
 			attempts = 10
-			delay = 25s
+			delay = "25s"
 		}
 
 		# Define a task to run

From 3576f489932ede9b12640d5bbfbf44324f0cc527 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Mon, 2 Nov 2015 20:28:37 -0800
Subject: [PATCH 47/59] Create Spawn pkg that handles IPC with the spawn-daemon
 and update exec_linux to use that

---
 client/driver/exec_test.go    |   2 +-
 client/executor/exec_linux.go | 221 +++++------------------
 client/spawn/spawn.go         | 322 ++++++++++++++++++++++++++++++++++
 client/spawn/spawn_test.go    | 252 ++++++++++++++++++++++++++
 command/spawn_daemon.go       |  47 +++--
 helper/discover/discover.go   |  10 +-
 6 files changed, 657 insertions(+), 197 deletions(-)
 create mode 100644 client/spawn/spawn.go
 create mode 100644 client/spawn/spawn_test.go

diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go
index 488847c5c..1bb4adf36 100644
--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -293,7 +293,7 @@ func TestExecDriver_Start_Kill_Wait(t *testing.T) {
 		if err == nil {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(8 * time.Second):
 		t.Fatalf("timeout")
 	}
 }
diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index 9c4bcd9a4..be70379d2 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -5,12 +5,9 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"io"
 	"os"
-	"os/exec"
 	"os/user"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"syscall"
 
@@ -18,8 +15,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/client/driver/environment"
-	"github.com/hashicorp/nomad/command"
-	"github.com/hashicorp/nomad/helper/discover"
+	"github.com/hashicorp/nomad/client/spawn"
 	"github.com/hashicorp/nomad/nomad/structs"
 
 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -53,18 +49,13 @@ type LinuxExecutor struct {
 
 	// Isolation configurations.
 	groups   *cgroupConfig.Cgroup
-	alloc    *allocdir.AllocDir
 	taskName string
 	taskDir  string
+	allocDir string
 
-	// Tracking of spawn process.
-	spawnChild        *os.Process
-	spawnOutputWriter *os.File
-	spawnOutputReader *os.File
-
-	// Tracking of user process.
-	exitStatusFile string
-	userPid        int
+	// Spawn process.
+	spawn      *spawn.Spawner
+	spawnState string
 }
 
 func (e *LinuxExecutor) Command() *cmd {
@@ -82,11 +73,9 @@ func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
 // execLinuxID contains the necessary information to reattach to an executed
 // process and cleanup the created cgroups.
 type ExecLinuxID struct {
-	Groups         *cgroupConfig.Cgroup
-	SpawnPid       int
-	UserPid        int
-	ExitStatusFile string
-	TaskDir        string
+	Groups  *cgroupConfig.Cgroup
+	Spawn   *spawn.Spawner
+	TaskDir string
 }
 
 func (e *LinuxExecutor) Open(id string) error {
@@ -99,30 +88,22 @@ func (e *LinuxExecutor) Open(id string) error {
 
 	// Setup the executor.
 	e.groups = execID.Groups
-	e.exitStatusFile = execID.ExitStatusFile
-	e.userPid = execID.UserPid
+	e.spawn = execID.Spawn
 	e.taskDir = execID.TaskDir
 
-	proc, err := os.FindProcess(execID.SpawnPid)
-	if proc != nil && err == nil {
-		e.spawnChild = proc
-	}
-
 	return nil
 }
 
 func (e *LinuxExecutor) ID() (string, error) {
-	if e.spawnChild == nil {
-		return "", fmt.Errorf("Process has finished or was never started")
+	if e.groups == nil || e.spawn == nil || e.taskDir == "" {
+		return "", fmt.Errorf("LinuxExecutor not properly initialized.")
 	}
 
 	// Build the ID.
 	id := ExecLinuxID{
-		Groups:         e.groups,
-		SpawnPid:       e.spawnChild.Pid,
-		UserPid:        e.userPid,
-		ExitStatusFile: e.exitStatusFile,
-		TaskDir:        e.taskDir,
+		Groups:  e.groups,
+		Spawn:   e.spawn,
+		TaskDir: e.taskDir,
 	}
 
 	var buffer bytes.Buffer
@@ -170,10 +151,6 @@ func (e *LinuxExecutor) Start() error {
 		e.cmd.SetGID(e.user.Gid)
 	}
 
-	if e.alloc == nil {
-		return errors.New("ConfigureTaskDir() must be called before Start()")
-	}
-
 	// Parse the commands arguments and replace instances of Nomad environment
 	// variables.
 	envVars, err := environment.ParseFromList(e.Cmd.Env)
@@ -196,129 +173,42 @@ func (e *LinuxExecutor) Start() error {
 	}
 	e.Cmd.Args = parsed
 
-	return e.spawnDaemon()
-}
+	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
+	e.spawn = spawn.NewSpawner(spawnState)
+	e.spawn.SetCommand(&e.cmd.Cmd)
+	e.spawn.SetChroot(e.taskDir)
+	e.spawn.SetLogs(&spawn.Logs{
+		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
+		Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
+		Stdin:  "/dev/null",
+	})
 
-// spawnDaemon executes a double fork to start the user command with proper
-// isolation. Stores the child process for use in Wait.
-func (e *LinuxExecutor) spawnDaemon() error {
-	bin, err := discover.NomadExecutable()
-	if err != nil {
-		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
-	}
+	enterCgroup := func(pid int) error {
+		// Join the spawn-daemon to the cgroup.
+		manager := e.getCgroupManager(e.groups)
 
-	c := command.DaemonConfig{
-		Cmd:            e.cmd.Cmd,
-		Chroot:         e.taskDir,
-		StdoutFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
-		StderrFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		StdinFile:      "/dev/null",
-		ExitStatusFile: e.exitStatusFile,
-	}
-
-	// Serialize the cmd and the cgroup configuration so it can be passed to the
-	// sub-process.
-	var buffer bytes.Buffer
-	enc := json.NewEncoder(&buffer)
-	if err := enc.Encode(c); err != nil {
-		return fmt.Errorf("Failed to serialize daemon configuration: %v", err)
-	}
-
-	// Create a pipe to capture stdout.
-	if e.spawnOutputReader, e.spawnOutputWriter, err = os.Pipe(); err != nil {
-		return err
-	}
-
-	// Call ourselves using a hidden flag. The new instance of nomad will join
-	// the passed cgroup, forkExec the cmd, and return statuses through stdout.
-	escaped := strconv.Quote(buffer.String())
-	spawn := exec.Command(bin, "spawn-daemon", escaped)
-	spawn.Stdout = e.spawnOutputWriter
-
-	// Capture its Stdin.
-	spawnStdIn, err := spawn.StdinPipe()
-	if err != nil {
-		return err
-	}
-
-	if err := spawn.Start(); err != nil {
-		fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
-	}
-
-	// Join the spawn-daemon to the cgroup.
-	manager := e.getCgroupManager(e.groups)
-
-	// Apply will place the spawn dameon into the created cgroups.
-	if err := manager.Apply(spawn.Process.Pid); err != nil {
-		errs := new(multierror.Error)
-		errs = multierror.Append(errs,
-			fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err))
-
-		if err := sendAbortCommand(spawnStdIn); err != nil {
-			errs = multierror.Append(errs, err)
+		// Apply will place the spawn dameon into the created cgroups.
+		if err := manager.Apply(pid); err != nil {
+			return fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err)
 		}
 
-		return errs
+		return nil
 	}
 
-	// Tell it to start.
-	if err := sendStartCommand(spawnStdIn); err != nil {
-		return err
-	}
-
-	// Parse the response.
-	dec := json.NewDecoder(e.spawnOutputReader)
-	var resp command.SpawnStartStatus
-	if err := dec.Decode(&resp); err != nil {
-		return fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
-	}
-
-	if resp.ErrorMsg != "" {
-		return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
-	}
-
-	e.userPid = resp.UserPID
-	e.spawnChild = spawn.Process
-	return nil
-}
-
-// sendStartCommand sends the necessary command to the spawn-daemon to have it
-// start the user process.
-func sendStartCommand(w io.Writer) error {
-	enc := json.NewEncoder(w)
-	if err := enc.Encode(true); err != nil {
-		return fmt.Errorf("Failed to serialize start command: %v", err)
-	}
-
-	return nil
-}
-
-// sendAbortCommand sends the necessary command to the spawn-daemon to have it
-// abort starting the user process. This should be invoked if the spawn-daemon
-// could not be isolated into a cgroup.
-func sendAbortCommand(w io.Writer) error {
-	enc := json.NewEncoder(w)
-	if err := enc.Encode(false); err != nil {
-		return fmt.Errorf("Failed to serialize abort command: %v", err)
-	}
-
-	return nil
+	return e.spawn.Spawn(enterCgroup)
 }
 
 // Wait waits til the user process exits and returns an error on non-zero exit
 // codes. Wait also cleans up the task directory and created cgroups.
 func (e *LinuxExecutor) Wait() error {
-	if e.spawnOutputReader != nil {
-		e.spawnOutputReader.Close()
-	}
-
-	if e.spawnOutputWriter != nil {
-		e.spawnOutputWriter.Close()
-	}
-
 	errs := new(multierror.Error)
-	if err := e.spawnWait(); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Pid, err))
+	code, err := e.spawn.Wait()
+	if err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	if code != 0 {
+		errs = multierror.Append(errs, fmt.Errorf("Task exited with code: %d", code))
 	}
 
 	if err := e.destroyCgroup(); err != nil {
@@ -332,20 +222,6 @@ func (e *LinuxExecutor) Wait() error {
 	return errs.ErrorOrNil()
 }
 
-// spawnWait waits on the spawn-daemon and can handle the spawn-daemon not being
-// a child of this process.
-func (e *LinuxExecutor) spawnWait() error {
-	// TODO: This needs to be able to wait on non-child processes.
-	state, err := e.spawnChild.Wait()
-	if err != nil {
-		return err
-	} else if !state.Success() {
-		return fmt.Errorf("exited with non-zero code")
-	}
-
-	return nil
-}
-
 func (e *LinuxExecutor) Shutdown() error {
 	return e.ForceStop()
 }
@@ -353,19 +229,9 @@ func (e *LinuxExecutor) Shutdown() error {
 // ForceStop immediately exits the user process and cleans up both the task
 // directory and the cgroups.
 func (e *LinuxExecutor) ForceStop() error {
-	if e.spawnOutputReader != nil {
-		e.spawnOutputReader.Close()
-	}
-
-	if e.spawnOutputWriter != nil {
-		e.spawnOutputWriter.Close()
-	}
-
 	errs := new(multierror.Error)
-	if e.groups != nil {
-		if err := e.destroyCgroup(); err != nil {
-			errs = multierror.Append(errs, err)
-		}
+	if err := e.destroyCgroup(); err != nil {
+		errs = multierror.Append(errs, err)
 	}
 
 	if err := e.cleanTaskDir(); err != nil {
@@ -381,6 +247,8 @@ func (e *LinuxExecutor) ForceStop() error {
 // chroot. cleanTaskDir should be called after.
 func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
 	e.taskName = taskName
+	e.allocDir = alloc.AllocDir
+
 	taskDir, ok := alloc.TaskDirs[taskName]
 	if !ok {
 		fmt.Errorf("Couldn't find task directory for task %v", taskName)
@@ -424,10 +292,6 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
 	e.Cmd.Env = env.List()
 
-	// Store the file path to save the exit status to.
-	e.exitStatusFile = filepath.Join(alloc.AllocDir, fmt.Sprintf("%s_%s", taskName, "exit_status"))
-
-	e.alloc = alloc
 	return nil
 }
 
@@ -445,6 +309,7 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 // should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
 	// Unmount dev.
+	// TODO: This should check if it is a mount.
 	errs := new(multierror.Error)
 	dev := filepath.Join(e.taskDir, "dev")
 	if e.pathExists(dev) {
diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
new file mode 100644
index 000000000..fa75b3940
--- /dev/null
+++ b/client/spawn/spawn.go
@@ -0,0 +1,322 @@
+package spawn
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"strconv"
+	"time"
+
+	"github.com/docker/docker/vendor/src/gopkg.in/fsnotify.v1"
+	"github.com/hashicorp/go-multierror"
+	"github.com/hashicorp/nomad/command"
+	"github.com/hashicorp/nomad/helper/discover"
+)
+
+// Spawner is used to start a user command in an isolated fashion that is
+// resistent to Nomad agent failure.
+type Spawner struct {
+	spawn     *os.Process
+	SpawnPid  int
+	SpawnPpid int
+	StateFile string
+
+	// User configuration
+	UserCmd *exec.Cmd
+	Logs    *Logs
+	Chroot  string
+}
+
+// Logs is used to define the filepaths the user command's logs should be
+// redirected to. The files do not need to exist.
+type Logs struct {
+	Stdin, Stdout, Stderr string
+}
+
+// NewSpawner takes a path to a state file. This state file can be used to
+// create a new Spawner that can be used to wait on the exit status of a
+// process even through Nomad restarts.
+func NewSpawner(stateFile string) *Spawner {
+	return &Spawner{StateFile: stateFile}
+}
+
+// SetCommand sets the user command to spawn.
+func (s *Spawner) SetCommand(cmd *exec.Cmd) {
+	s.UserCmd = cmd
+}
+
+// SetLogs sets the redirection of user command log files.
+func (s *Spawner) SetLogs(l *Logs) {
+	s.Logs = l
+}
+
+// SetChroot puts the user command into a chroot.
+func (s *Spawner) SetChroot(root string) {
+	s.Chroot = root
+}
+
+// Spawn does a double-fork to start and isolate the user command. It takes a
+// call-back that is invoked with the pid of the intermediary process. If the
+// call back returns an error, the user command is not started and the spawn is
+// cancelled. This can be used to put the process into a cgroup or jail and
+// cancel starting the user process if that was not successful. An error is
+// returned if the call-back returns an error or the user-command couldn't be
+// started.
+func (s *Spawner) Spawn(cb func(pid int) error) error {
+	bin, err := discover.NomadExecutable()
+	if err != nil {
+		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
+	}
+
+	exitFile, err := os.OpenFile(s.StateFile, os.O_CREATE|os.O_WRONLY, 0666)
+	defer exitFile.Close()
+	if err != nil {
+		return fmt.Errorf("Error opening file to store exit status: %v", err)
+	}
+
+	config, err := s.spawnConfig()
+	if err != nil {
+		return err
+	}
+
+	spawn := exec.Command(bin, "spawn-daemon", config)
+
+	// Capture stdout
+	spawnStdout, err := spawn.StdoutPipe()
+	defer spawnStdout.Close()
+	if err != nil {
+		return fmt.Errorf("Failed to capture spawn-daemon stdout: %v", err)
+	}
+
+	// Capture stdin.
+	spawnStdin, err := spawn.StdinPipe()
+	defer spawnStdin.Close()
+	if err != nil {
+		return fmt.Errorf("Failed to capture spawn-daemon stdin: %v", err)
+	}
+
+	if err := spawn.Start(); err != nil {
+		return fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
+	}
+
+	if cb != nil {
+		cbErr := cb(spawn.Process.Pid)
+		if cbErr != nil {
+			errs := new(multierror.Error)
+			errs = multierror.Append(errs, cbErr)
+			if err := s.sendAbortCommand(spawnStdin); err != nil {
+				errs = multierror.Append(errs, err)
+			}
+
+			return errs
+		}
+	}
+
+	if err := s.sendStartCommand(spawnStdin); err != nil {
+		return err
+	}
+
+	respCh := make(chan command.SpawnStartStatus, 1)
+	errCh := make(chan error, 1)
+
+	go func() {
+		var resp command.SpawnStartStatus
+		dec := json.NewDecoder(spawnStdout)
+		if err := dec.Decode(&resp); err != nil {
+			errCh <- fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
+		}
+		respCh <- resp
+	}()
+
+	select {
+	case err := <-errCh:
+		return err
+	case resp := <-respCh:
+		if resp.ErrorMsg != "" {
+			return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
+		}
+	case <-time.After(5 * time.Second):
+		return fmt.Errorf("timed out waiting for response")
+	}
+
+	// Store the spawn process.
+	s.spawn = spawn.Process
+	s.SpawnPpid = os.Getpid()
+	return nil
+}
+
+// spawnConfig returns a serialized config to pass to the Nomad spawn-daemon
+// command.
+func (s *Spawner) spawnConfig() (string, error) {
+	if s.UserCmd == nil {
+		return "", fmt.Errorf("Must specify user command")
+	}
+
+	config := command.DaemonConfig{
+		Cmd:            *s.UserCmd,
+		Chroot:         s.Chroot,
+		ExitStatusFile: s.StateFile,
+	}
+
+	if s.Logs != nil {
+		config.StdoutFile = s.Logs.Stdout
+		config.StdinFile = s.Logs.Stdin
+		config.StderrFile = s.Logs.Stderr
+	}
+
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(config); err != nil {
+		return "", fmt.Errorf("Failed to serialize configuration: %v", err)
+	}
+
+	return strconv.Quote(buffer.String()), nil
+}
+
+// sendStartCommand sends the necessary command to the spawn-daemon to have it
+// start the user process.
+func (s *Spawner) sendStartCommand(w io.Writer) error {
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(true); err != nil {
+		return fmt.Errorf("Failed to serialize start command: %v", err)
+	}
+
+	return nil
+}
+
+// sendAbortCommand sends the necessary command to the spawn-daemon to have it
+// abort starting the user process. This should be invoked if the spawn-daemon
+// could not be isolated into a cgroup.
+func (s *Spawner) sendAbortCommand(w io.Writer) error {
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(false); err != nil {
+		return fmt.Errorf("Failed to serialize abort command: %v", err)
+	}
+
+	return nil
+}
+
+// Wait returns the exit code of the user process or an error if the wait
+// failed.
+func (s *Spawner) Wait() (int, error) {
+	if os.Getpid() == s.SpawnPpid {
+		return s.waitAsParent()
+	}
+
+	return s.waitOnStatusFile()
+}
+
+// waitAsParent waits on the process if the current process was the spawner.
+func (s *Spawner) waitAsParent() (int, error) {
+	if s.SpawnPpid != os.Getpid() {
+		return -1, fmt.Errorf("not the parent. Spawner parent is %v; current pid is %v", s.SpawnPpid, os.Getpid())
+	}
+
+	// Try to reattach to the spawn.
+	if s.spawn == nil {
+		// If it can't be reattached, it means the spawn process has exited so
+		// we should just read its exit file.
+		var err error
+		if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil {
+			return s.waitOnStatusFile()
+		}
+	}
+
+	if state, err := s.spawn.Wait(); err != nil {
+		return -1, err
+	} else if !state.Exited() {
+		return -1, fmt.Errorf("Task was killed or crashed")
+	}
+
+	return s.waitOnStatusFile()
+}
+
+// waitOnStatusFile uses OS level file watching APIs to wait on the status file
+// and returns the exit code and possibly an error.
+func (s *Spawner) waitOnStatusFile() (int, error) {
+	// Set up a watcher for the exit status file.
+	watcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to create file watcher to read exit code: %v", err)
+	}
+
+	if err := watcher.Add(s.StateFile); err != nil {
+		return -1, fmt.Errorf("Failed to watch %v to read exit code: %v", s.StateFile, err)
+	}
+
+	// Stat to check if it is there to avoid a race condition.
+	stat, err := os.Stat(s.StateFile)
+	if err != nil {
+		return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+	}
+
+	// If there is data it means that the file has already been written.
+	if stat.Size() > 0 {
+		return s.readExitCode()
+	}
+
+	// Store the mod time as a way to heartbeat. If the file doesn't get touched
+	// then we know the spawner has died. This avoids an infinite loop.
+	prevModTime := stat.ModTime()
+
+	// Wait on watcher.
+	for {
+		select {
+		case event := <-watcher.Events:
+			if event.Op&fsnotify.Write == fsnotify.Write {
+				stat, err := os.Stat(s.StateFile)
+				if err != nil {
+					return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+				}
+
+				if stat.Size() > 0 {
+					return s.readExitCode()
+				}
+			}
+		case err := <-watcher.Errors:
+			return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
+		case <-time.After(5 * time.Second):
+			stat, err := os.Stat(s.StateFile)
+			if err != nil {
+				return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+			}
+
+			modTime := stat.ModTime()
+			if modTime.Equal(prevModTime) {
+				return -1, fmt.Errorf("Task is dead and exit code unreadable")
+			}
+
+			prevModTime = modTime
+		}
+	}
+}
+
+// readExitCode parses the state file and returns the exit code of the task. It
+// returns an error if the file can't be read.
+func (s *Spawner) readExitCode() (int, error) {
+	f, err := os.Open(s.StateFile)
+	defer f.Close()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to open %v to read exit code: %v", s.StateFile, err)
+	}
+
+	stat, err := f.Stat()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to stat file %v: %v", s.StateFile, err)
+	}
+
+	if stat.Size() == 0 {
+		return -1, fmt.Errorf("Empty state file: %v", s.StateFile)
+	}
+
+	var exitStatus command.SpawnExitStatus
+	dec := json.NewDecoder(f)
+	if err := dec.Decode(&exitStatus); err != nil {
+		return -1, fmt.Errorf("Failed to parse exit status from %v: %v", s.StateFile, err)
+	}
+
+	return exitStatus.ExitCode, nil
+}
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
new file mode 100644
index 000000000..d624f9d9f
--- /dev/null
+++ b/client/spawn/spawn_test.go
@@ -0,0 +1,252 @@
+package spawn
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestSpawn_NoCmd(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	if err := spawn.Spawn(nil); err == nil {
+		t.Fatalf("Spawn() with no user command should fail")
+	}
+}
+
+func TestSpawn_InvalidCmd(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("foo"))
+	if err := spawn.Spawn(nil); err == nil {
+		t.Fatalf("Spawn() with no invalid command should fail")
+	}
+}
+
+func TestSpawn_SetsLogs(t *testing.T) {
+	// TODO: Figure out why this test fails. If the spawn-daemon directly writes
+	// to the opened stdout file it works but not the user command. Maybe a
+	// flush issue?
+	if runtime.GOOS == "windows" {
+		t.Skip("Test fails on windows; unknown reason. Skipping")
+	}
+
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	exp := "foo"
+	spawn.SetCommand(exec.Command("echo", exp))
+
+	// Create file for stdout.
+	stdout, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(stdout.Name())
+	spawn.SetLogs(&Logs{Stdout: stdout.Name()})
+
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed: %v", err)
+	}
+
+	if code, err := spawn.Wait(); code != 0 && err != nil {
+		t.Fatalf("Wait() returned %v, %v; want 0, nil", code, err)
+	}
+
+	stdout2, err := os.Open(stdout.Name())
+	if err != nil {
+		t.Fatalf("Open() failed: %v", err)
+	}
+
+	data, err := ioutil.ReadAll(stdout2)
+	if err != nil {
+		t.Fatalf("ReadAll() failed: %v", err)
+	}
+
+	act := strings.TrimSpace(string(data))
+	if act != exp {
+		t.Fatalf("Unexpected data written to stdout; got %v; want %v", act, exp)
+	}
+}
+
+func TestSpawn_Callback(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "1"))
+
+	called := false
+	cbErr := fmt.Errorf("ERROR CB")
+	cb := func(_ int) error {
+		called = true
+		return cbErr
+	}
+
+	if err := spawn.Spawn(cb); err == nil {
+		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, err, cbErr)
+	}
+
+	if !called {
+		t.Fatalf("Spawn(%#v) didn't call callback", cb)
+	}
+}
+
+func TestSpawn_ParentWaitExited(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("echo", "foo"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	time.Sleep(1 * time.Second)
+
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_ParentWait(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_NonParentWaitExited(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("echo", "foo"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	time.Sleep(1 * time.Second)
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_NonParentWait(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_DeadSpawnDaemon(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	var spawnPid int
+	cb := func(pid int) error {
+		spawnPid = pid
+		return nil
+	}
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "5"))
+	if err := spawn.Spawn(cb); err != nil {
+		t.Fatalf("Spawn() errored: %v", err)
+	}
+
+	proc, err := os.FindProcess(spawnPid)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if err := proc.Kill(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := proc.Wait(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := spawn.Wait(); err == nil {
+		t.Fatalf("Wait() should have failed: %v", err)
+	}
+}
diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index 81117ce2e..81f5ca2ca 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
+	"time"
 )
 
 type SpawnDaemonCommand struct {
@@ -108,24 +109,31 @@ func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) {
 // configureLogs creates the log files and redirects the process
 // stdin/stderr/stdout to them. If unsuccessful, an error is returned.
 func (c *SpawnDaemonCommand) configureLogs() error {
-	stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+	if len(c.config.StdoutFile) != 0 {
+		stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+		}
+
+		c.config.Cmd.Stdout = stdo
 	}
 
-	stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	if len(c.config.StderrFile) != 0 {
+		stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+		}
+		c.config.Cmd.Stderr = stde
 	}
 
-	stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
-	if err != nil {
-		return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+	if len(c.config.StdinFile) != 0 {
+		stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+		}
+		c.config.Cmd.Stdin = stdi
 	}
 
-	c.config.Cmd.Stdout = stdo
-	c.config.Cmd.Stderr = stde
-	c.config.Cmd.Stdin = stdi
 	return nil
 }
 
@@ -139,7 +147,7 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
 	// Open the file we will be using to write exit codes to. We do this early
 	// to ensure that we don't start the user process when we can't capture its
 	// exit status.
-	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_CREATE|os.O_RDWR, 0666)
+	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_WRONLY, 0666)
 	if err != nil {
 		return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1)
 	}
@@ -177,6 +185,17 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
 	// Indicate that the command was started successfully.
 	c.outputStartStatus(nil, 0)
 
+	// Start a go routine that touches the exit file periodically.
+	go func() {
+		for {
+			select {
+			case <-time.After(2 * time.Second):
+				now := time.Now()
+				os.Chtimes(c.config.ExitStatusFile, now, now)
+			}
+		}
+	}()
+
 	// Wait and then output the exit status.
 	return c.writeExitStatus(c.config.Cmd.Wait())
 }
@@ -192,7 +211,7 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}
 
-	if c.config != nil && c.config.Process != nil {
+	if c.config != nil && c.config.Cmd.Process != nil {
 		startStatus.UserPID = c.config.Process.Pid
 	}
 
diff --git a/helper/discover/discover.go b/helper/discover/discover.go
index d90ddb4cc..d172970f7 100644
--- a/helper/discover/discover.go
+++ b/helper/discover/discover.go
@@ -4,17 +4,19 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"runtime"
 
 	"github.com/kardianos/osext"
 )
 
-const (
-	nomadExe = "nomad"
-)
-
 // Checks the current executable, then $GOPATH/bin, and finally the CWD, in that
 // order. If it can't be found, an error is returned.
 func NomadExecutable() (string, error) {
+	nomadExe := "nomad"
+	if runtime.GOOS == "windows" {
+		nomadExe = "nomad.exe"
+	}
+
 	// Check the current executable.
 	bin, err := osext.Executable()
 	if err != nil {

From 84dc194d8a586e177300eb0a33f7aefb922225dd Mon Sep 17 00:00:00 2001
From: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
Date: Wed, 4 Nov 2015 00:06:14 +0900
Subject: [PATCH 48/59] Use const value for AWS metadata URL

---
 client/fingerprint/env_aws.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go
index 839285a1d..575409bf8 100644
--- a/client/fingerprint/env_aws.go
+++ b/client/fingerprint/env_aws.go
@@ -15,6 +15,10 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+// This is where the AWS metadata server normally resides. We hardcode the
+// "instance" path as well since it's the only one we access here.
+const DEFAULT_AWS_URL = "http//169.254.169.254/latest/meta-data/"
+
 // map of instance type to approximate speed, in Mbits/s
 // http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797
 // which itself cites these sources:
@@ -89,7 +93,7 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node)
 	}
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}
 
 	// assume 2 seconds is enough time for inside AWS network
@@ -161,7 +165,7 @@ func isAWS() bool {
 	// provide their own
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}
 
 	// assume 2 seconds is enough time for inside AWS network
@@ -205,7 +209,7 @@ func (f *EnvAWSFingerprint) linkSpeed() int {
 	// the network speed
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}
 
 	// assume 2 seconds is enough time for inside AWS network

From 6cf8eeb21618125358a0507a80e5da63ba22981e Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 10:50:30 -0800
Subject: [PATCH 49/59] Small improvements

---
 client/driver/java_test.go    |  2 +-
 client/executor/exec_linux.go | 14 ++++++++++----
 client/spawn/spawn.go         |  4 +---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/client/driver/java_test.go b/client/driver/java_test.go
index ad8f5e578..eecfc0faf 100644
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -179,7 +179,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 		if err == nil {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(8 * time.Second):
 		t.Fatalf("timeout")
 	}
 
diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index be70379d2..35090be78 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -54,8 +54,7 @@ type LinuxExecutor struct {
 	allocDir string
 
 	// Spawn process.
-	spawn      *spawn.Spawner
-	spawnState string
+	spawn *spawn.Spawner
 }
 
 func (e *LinuxExecutor) Command() *cmd {
@@ -180,7 +179,7 @@ func (e *LinuxExecutor) Start() error {
 	e.spawn.SetLogs(&spawn.Logs{
 		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
 		Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		Stdin:  "/dev/null",
+		Stdin:  os.DevNull,
 	})
 
 	enterCgroup := func(pid int) error {
@@ -309,13 +308,16 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 // should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
 	// Unmount dev.
-	// TODO: This should check if it is a mount.
 	errs := new(multierror.Error)
 	dev := filepath.Join(e.taskDir, "dev")
 	if e.pathExists(dev) {
 		if err := syscall.Unmount(dev, 0); err != nil {
 			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
 		}
+
+		if err := os.RemoveAll(dev); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err))
+		}
 	}
 
 	// Unmount proc.
@@ -324,6 +326,10 @@ func (e *LinuxExecutor) cleanTaskDir() error {
 		if err := syscall.Unmount(proc, 0); err != nil {
 			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
 		}
+
+		if err := os.RemoveAll(proc); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err))
+		}
 	}
 
 	return errs.ErrorOrNil()
diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index fa75b3940..4b9bb5ddc 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -225,10 +225,8 @@ func (s *Spawner) waitAsParent() (int, error) {
 		}
 	}
 
-	if state, err := s.spawn.Wait(); err != nil {
+	if _, err := s.spawn.Wait(); err != nil {
 		return -1, err
-	} else if !state.Exited() {
-		return -1, fmt.Errorf("Task was killed or crashed")
 	}
 
 	return s.waitOnStatusFile()

From 13ea9bc9fff4da16852838da31aec30a635a8339 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 12:47:48 -0800
Subject: [PATCH 50/59] Make a basic executor that can be shared and fix some
 fingerprinting/tests

---
 client/driver/java.go                |   4 +-
 client/driver/java_test.go           |   6 +-
 client/executor/exec_basic.go        | 107 +++++++++++++++++++++++++++
 client/executor/exec_universal.go    |  24 ++----
 client/testutil/driver_compatible.go |   6 ++
 5 files changed, 125 insertions(+), 22 deletions(-)
 create mode 100644 client/executor/exec_basic.go

diff --git a/client/driver/java.go b/client/driver/java.go
index ac2c3c6f3..8aad1dd65 100644
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -38,8 +38,8 @@ func NewJavaDriver(ctx *DriverContext) Driver {
 
 func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
 	// Only enable if we are root when running on non-windows systems.
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling")
+	if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
+		d.logger.Printf("[DEBUG] driver.java: must run as root user on linux, disabling")
 		return false, nil
 	}
 
diff --git a/client/driver/java_test.go b/client/driver/java_test.go
index eecfc0faf..b4f2f2e15 100644
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -19,7 +19,7 @@ func javaLocated() bool {
 
 // The fingerprinter test should always pass, even if Java is not installed.
 func TestJavaDriver_Fingerprint(t *testing.T) {
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	d := NewJavaDriver(testDriverContext(""))
 	node := &structs.Node{
 		Attributes: make(map[string]string),
@@ -93,7 +93,7 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
 		t.Skip("Java not found; skipping")
 	}
 
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
@@ -141,7 +141,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 		t.Skip("Java not found; skipping")
 	}
 
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
diff --git a/client/executor/exec_basic.go b/client/executor/exec_basic.go
new file mode 100644
index 000000000..81f17d414
--- /dev/null
+++ b/client/executor/exec_basic.go
@@ -0,0 +1,107 @@
+package executor
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/driver/environment"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// BasicExecutor should work everywhere, and as a result does not include
+// any resource restrictions or runas capabilities.
+type BasicExecutor struct {
+	cmd
+}
+
+// TODO: Update to use the Spawner.
+// TODO: Have raw_exec use this as well.
+func NewBasicExecutor() Executor {
+	return &BasicExecutor{}
+}
+
+func (e *BasicExecutor) Limit(resources *structs.Resources) error {
+	if resources == nil {
+		return errNoResources
+	}
+	return nil
+}
+
+func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
+	taskDir, ok := alloc.TaskDirs[taskName]
+	if !ok {
+		return fmt.Errorf("Error finding task dir for (%s)", taskName)
+	}
+	e.Dir = taskDir
+	return nil
+}
+
+func (e *BasicExecutor) Start() error {
+	// Parse the commands arguments and replace instances of Nomad environment
+	// variables.
+	envVars, err := environment.ParseFromList(e.cmd.Env)
+	if err != nil {
+		return err
+	}
+
+	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
+	if err != nil {
+		return err
+	} else if len(parsedPath) != 1 {
+		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
+	}
+
+	e.cmd.Path = parsedPath[0]
+	combined := strings.Join(e.cmd.Args, " ")
+	parsed, err := args.ParseAndReplace(combined, envVars.Map())
+	if err != nil {
+		return err
+	}
+	e.Cmd.Args = parsed
+
+	// We don't want to call ourself. We want to call Start on our embedded Cmd
+	return e.cmd.Start()
+}
+
+func (e *BasicExecutor) Open(pid string) error {
+	pidNum, err := strconv.Atoi(pid)
+	if err != nil {
+		return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
+	}
+
+	process, err := os.FindProcess(pidNum)
+	if err != nil {
+		return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err)
+	}
+	e.Process = process
+	return nil
+}
+
+func (e *BasicExecutor) Wait() error {
+	// We don't want to call ourself. We want to call Start on our embedded Cmd
+	return e.cmd.Wait()
+}
+
+func (e *BasicExecutor) ID() (string, error) {
+	if e.cmd.Process != nil {
+		return strconv.Itoa(e.cmd.Process.Pid), nil
+	} else {
+		return "", fmt.Errorf("Process has finished or was never started")
+	}
+}
+
+func (e *BasicExecutor) Shutdown() error {
+	return e.ForceStop()
+}
+
+func (e *BasicExecutor) ForceStop() error {
+	return e.Process.Kill()
+}
+
+func (e *BasicExecutor) Command() *cmd {
+	return &e.cmd
+}
diff --git a/client/executor/exec_universal.go b/client/executor/exec_universal.go
index 4979ae3b7..318faea4b 100644
--- a/client/executor/exec_universal.go
+++ b/client/executor/exec_universal.go
@@ -2,21 +2,11 @@
 
 package executor
 
-import (
-	"github.com/hashicorp/nomad/client/allocdir"
-	"github.com/hashicorp/nomad/nomad/structs"
-)
+func NewExecutor() Executor {
+	return &UniversalExecutor{BasicExecutor{}}
+}
 
-// UniversalExecutor exists to make the exec driver compile on all operating systems.
-type UniversalExecutor struct{}
-
-func NewExecutor() Executor                                                    { return &UniversalExecutor{} }
-func (e *UniversalExecutor) Limit(resources *structs.Resources) error          { return nil }
-func (e *UniversalExecutor) ConfigureTaskDir(string, *allocdir.AllocDir) error { return nil }
-func (e *UniversalExecutor) Start() error                                      { return nil }
-func (e *UniversalExecutor) Open(pid string) error                             { return nil }
-func (e *UniversalExecutor) Wait() error                                       { return nil }
-func (e *UniversalExecutor) ID() (string, error)                               { return "", nil }
-func (e *UniversalExecutor) Shutdown() error                                   { return nil }
-func (e *UniversalExecutor) ForceStop() error                                  { return nil }
-func (e *UniversalExecutor) Command() *cmd                                     { return nil }
+// UniversalExecutor wraps the BasicExecutor
+type UniversalExecutor struct {
+	BasicExecutor
+}
diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go
index 94ae6225c..768051e63 100644
--- a/client/testutil/driver_compatible.go
+++ b/client/testutil/driver_compatible.go
@@ -13,6 +13,12 @@ func ExecCompatible(t *testing.T) {
 	}
 }
 
+func JavaCompatible(t *testing.T) {
+	if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
+		t.Skip("Test only available when running as root on linux")
+	}
+}
+
 func QemuCompatible(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("Must be on non-windows environments to run test")

From fc5b418e7e2377afdeea33b416106e991984c79f Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 12:57:39 -0800
Subject: [PATCH 51/59] Update website

---
 website/source/docs/drivers/exec.html.md | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/website/source/docs/drivers/exec.html.md b/website/source/docs/drivers/exec.html.md
index dadf28549..e82aa1505 100644
--- a/website/source/docs/drivers/exec.html.md
+++ b/website/source/docs/drivers/exec.html.md
@@ -11,7 +11,7 @@ description: |-
 Name: `exec`
 
 The `exec` driver is used to simply execute a particular command for a task.
-However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
+However, unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
 primitives of the operating system to limit the tasks access to resources. While
 simple, since the `exec` driver  can invoke any command, it can be used to call
 scripts or other wrappers which provide higher level features.
@@ -28,9 +28,10 @@ must reference it in the `command` as show in the examples below
 
 ## Client Requirements
 
-The `exec` driver can run on all supported operating systems but to provide
-proper isolation the client must be run as root on non-Windows operating systems.
-Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
+The `exec` driver can only be run when on Linux and running Nomad as root.
+`exec` is limited to this configuration because currently isolation of resources
+is only guaranteed on Linux. Further the host must have cgroups mounted properly
+in order for the driver to work.
 
 You must specify a `command` to be executed. Optionally you can specify an
 `artifact_source` to be downloaded as well. Any `command` is assumed to be present on the 
@@ -68,8 +69,5 @@ The `exec` driver will set the following client attributes:
 The resource isolation provided varies by the operating system of
 the client and the configuration.
 
-On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the
+On Linux, Nomad will use cgroups, and a chroot to isolate the
 resources of a process and as such the Nomad agent must be run as root.
-
-On Windows, the task driver will just execute the command with no additional
-resource isolation.

From 2291ea9060b756677733c1585775ea5452419aa4 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 13:26:09 -0800
Subject: [PATCH 52/59] Search path

---
 helper/discover/discover.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/helper/discover/discover.go b/helper/discover/discover.go
index d172970f7..8582a0133 100644
--- a/helper/discover/discover.go
+++ b/helper/discover/discover.go
@@ -3,6 +3,7 @@ package discover
 import (
 	"fmt"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"runtime"
 
@@ -27,6 +28,11 @@ func NomadExecutable() (string, error) {
 		return bin, nil
 	}
 
+	// Check the $PATH
+	if bin, err := exec.LookPath(nomadExe); err == nil {
+		return bin, nil
+	}
+
 	// Check the $GOPATH.
 	bin = filepath.Join(os.Getenv("GOPATH"), "bin", nomadExe)
 	if _, err := os.Stat(bin); err == nil {

From a6f9aeb1b1823d9342dba703cbe49de14dcfe2fa Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 13:37:45 -0800
Subject: [PATCH 53/59] Vet errors

---
 client/spawn/spawn.go      | 2 +-
 client/spawn/spawn_test.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index 4b9bb5ddc..ac0d8c444 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -10,7 +10,7 @@ import (
 	"strconv"
 	"time"
 
-	"github.com/docker/docker/vendor/src/gopkg.in/fsnotify.v1"
+	"github.com/go-fsnotify/fsnotify"
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/nomad/command"
 	"github.com/hashicorp/nomad/helper/discover"
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
index d624f9d9f..e8ddfbaf5 100644
--- a/client/spawn/spawn_test.go
+++ b/client/spawn/spawn_test.go
@@ -106,7 +106,7 @@ func TestSpawn_Callback(t *testing.T) {
 	}
 
 	if err := spawn.Spawn(cb); err == nil {
-		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, err, cbErr)
+		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, cbErr)
 	}
 
 	if !called {

From 1870f9b79994213d3c96596ebe0c92d4d70981b3 Mon Sep 17 00:00:00 2001
From: Clint Shryock <clint@ctshryock.com>
Date: Tue, 3 Nov 2015 15:54:29 -0600
Subject: [PATCH 54/59] go fmt this file

---
 nomad/structs/structs.go | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 15e58d333..bfec26fce 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,17 +14,17 @@ import (
 )
 
 var (
-	ErrNoLeader     = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath = fmt.Errorf("No path to region")
-    defaultServiceJobRestartPolicy = RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 2,
-			Interval: 1 * time.Minute,
-		}
+	ErrNoLeader                    = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath                = fmt.Errorf("No path to region")
+	defaultServiceJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 2,
+		Interval: 1 * time.Minute,
+	}
 	defaultBatchJobRestartPolicy = RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 15,
-		}
+		Delay:    15 * time.Second,
+		Attempts: 15,
+	}
 )
 
 type MessageType uint8
@@ -928,7 +928,7 @@ func NewRestartPolicy(jobType string) *RestartPolicy {
 		rp := defaultServiceJobRestartPolicy
 		return &rp
 	case JobTypeBatch:
-		rp  := defaultBatchJobRestartPolicy
+		rp := defaultBatchJobRestartPolicy
 		return &rp
 	}
 	return nil

From 2a1577ec8823500d82cd9cdf9fbb8c6acb9d3e93 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 4 Nov 2015 11:18:17 -0800
Subject: [PATCH 55/59] nomad/watch: add a note about the Item struct

---
 nomad/watch/watch.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index 102e535b2..4e9bafbc9 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -5,7 +5,9 @@ package watch
 // the underlying state store.
 
 // Item describes the scope of a watch. It is used to provide a uniform
-// input for subscribe/unsubscribe and notification firing.
+// input for subscribe/unsubscribe and notification firing. Specifying
+// multiple fields does not place a watch on multiple items. Each Item
+// describes exactly one scoped watch.
 type Item struct {
 	Alloc     string
 	AllocEval string

From 4e88552044d4cc6bb0722febab3f76e0e5f7ed1f Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 4 Nov 2015 11:22:20 -0800
Subject: [PATCH 56/59] Update CHANGELOG.md

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 786380eef..118e0cfc8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.2.0 (Unreleased)
+
+FEATURES:
+
+  * Blocking queries supported in API [GH-366]
+
 ## 0.1.2 (October 6, 2015)
 
 IMPROVEMENTS:

From 4958be618c269ec649289740cfb72233d78567bc Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 14:50:44 -0800
Subject: [PATCH 57/59] Check if the PID is alive instead of heartbeating
 through modify time

---
 client/spawn/spawn.go         | 15 ++-----------
 client/spawn/spawn_posix.go   | 14 ++++++++++++
 client/spawn/spawn_test.go    | 41 ++++++++++++++++++++++++++++++++++-
 client/spawn/spawn_windows.go | 21 ++++++++++++++++++
 command/spawn_daemon.go       | 12 ----------
 5 files changed, 77 insertions(+), 26 deletions(-)
 create mode 100644 client/spawn/spawn_posix.go
 create mode 100644 client/spawn/spawn_windows.go

diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index ac0d8c444..5338b8777 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -256,10 +256,6 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
 		return s.readExitCode()
 	}
 
-	// Store the mod time as a way to heartbeat. If the file doesn't get touched
-	// then we know the spawner has died. This avoids an infinite loop.
-	prevModTime := stat.ModTime()
-
 	// Wait on watcher.
 	for {
 		select {
@@ -277,17 +273,10 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
 		case err := <-watcher.Errors:
 			return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
 		case <-time.After(5 * time.Second):
-			stat, err := os.Stat(s.StateFile)
-			if err != nil {
-				return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
-			}
-
-			modTime := stat.ModTime()
-			if modTime.Equal(prevModTime) {
+			// Check if the process is still alive.
+			if !s.Alive() {
 				return -1, fmt.Errorf("Task is dead and exit code unreadable")
 			}
-
-			prevModTime = modTime
 		}
 	}
 }
diff --git a/client/spawn/spawn_posix.go b/client/spawn/spawn_posix.go
new file mode 100644
index 000000000..7df381064
--- /dev/null
+++ b/client/spawn/spawn_posix.go
@@ -0,0 +1,14 @@
+// +build !windows
+
+package spawn
+
+import "syscall"
+
+func (s *Spawner) Alive() bool {
+	if s.spawn == nil {
+		return false
+	}
+
+	err := s.spawn.Signal(syscall.Signal(0))
+	return err == nil
+}
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
index e8ddfbaf5..9553470a0 100644
--- a/client/spawn/spawn_test.go
+++ b/client/spawn/spawn_test.go
@@ -214,7 +214,7 @@ func TestSpawn_NonParentWait(t *testing.T) {
 	}
 }
 
-func TestSpawn_DeadSpawnDaemon(t *testing.T) {
+func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) {
 	f, err := ioutil.TempFile("", "")
 	if err != nil {
 		t.Fatalf("TempFile() failed")
@@ -250,3 +250,42 @@ func TestSpawn_DeadSpawnDaemon(t *testing.T) {
 		t.Fatalf("Wait() should have failed: %v", err)
 	}
 }
+
+func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	var spawnPid int
+	cb := func(pid int) error {
+		spawnPid = pid
+		return nil
+	}
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "5"))
+	if err := spawn.Spawn(cb); err != nil {
+		t.Fatalf("Spawn() errored: %v", err)
+	}
+
+	proc, err := os.FindProcess(spawnPid)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if err := proc.Kill(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := proc.Wait(); err != nil {
+		t.FailNow()
+	}
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	if _, err := spawn.Wait(); err == nil {
+		t.Fatalf("Wait() should have failed: %v", err)
+	}
+}
diff --git a/client/spawn/spawn_windows.go b/client/spawn/spawn_windows.go
new file mode 100644
index 000000000..9683dce97
--- /dev/null
+++ b/client/spawn/spawn_windows.go
@@ -0,0 +1,21 @@
+package spawn
+
+import "syscall"
+
+const STILL_ACTIVE = 259
+
+func (s *Spawner) Alive() bool {
+	const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE
+	h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid))
+	if e != nil {
+		return false
+	}
+
+	var ec uint32
+	e = syscall.GetExitCodeProcess(h, &ec)
+	if e != nil {
+		return false
+	}
+
+	return ec == STILL_ACTIVE
+}
diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index 81f5ca2ca..52ffd8e6c 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -9,7 +9,6 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
-	"time"
 )
 
 type SpawnDaemonCommand struct {
@@ -185,17 +184,6 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
 	// Indicate that the command was started successfully.
 	c.outputStartStatus(nil, 0)
 
-	// Start a go routine that touches the exit file periodically.
-	go func() {
-		for {
-			select {
-			case <-time.After(2 * time.Second):
-				now := time.Now()
-				os.Chtimes(c.config.ExitStatusFile, now, now)
-			}
-		}
-	}()
-
 	// Wait and then output the exit status.
 	return c.writeExitStatus(c.config.Cmd.Wait())
 }

From 29d72b7477cf2a21732c0783bafa17a318c5f121 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 16:38:28 -0800
Subject: [PATCH 58/59] Remove file watching

---
 client/spawn/spawn.go      | 52 ++++++++++----------------------------
 client/spawn/spawn_test.go | 11 +++++++-
 2 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index 5338b8777..ef160611e 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -10,7 +10,6 @@ import (
 	"strconv"
 	"time"
 
-	"github.com/go-fsnotify/fsnotify"
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/nomad/command"
 	"github.com/hashicorp/nomad/helper/discover"
@@ -144,6 +143,7 @@ func (s *Spawner) Spawn(cb func(pid int) error) error {
 
 	// Store the spawn process.
 	s.spawn = spawn.Process
+	s.SpawnPid = s.spawn.Pid
 	s.SpawnPpid = os.Getpid()
 	return nil
 }
@@ -206,7 +206,7 @@ func (s *Spawner) Wait() (int, error) {
 		return s.waitAsParent()
 	}
 
-	return s.waitOnStatusFile()
+	return s.pollWait()
 }
 
 // waitAsParent waits on the process if the current process was the spawner.
@@ -221,7 +221,7 @@ func (s *Spawner) waitAsParent() (int, error) {
 		// we should just read its exit file.
 		var err error
 		if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil {
-			return s.waitOnStatusFile()
+			return s.pollWait()
 		}
 	}
 
@@ -229,22 +229,13 @@ func (s *Spawner) waitAsParent() (int, error) {
 		return -1, err
 	}
 
-	return s.waitOnStatusFile()
+	return s.pollWait()
 }
 
-// waitOnStatusFile uses OS level file watching APIs to wait on the status file
-// and returns the exit code and possibly an error.
-func (s *Spawner) waitOnStatusFile() (int, error) {
-	// Set up a watcher for the exit status file.
-	watcher, err := fsnotify.NewWatcher()
-	if err != nil {
-		return -1, fmt.Errorf("Failed to create file watcher to read exit code: %v", err)
-	}
-
-	if err := watcher.Add(s.StateFile); err != nil {
-		return -1, fmt.Errorf("Failed to watch %v to read exit code: %v", s.StateFile, err)
-	}
-
+// pollWait polls on the spawn daemon to determine when it exits. After it
+// exits, it reads the state file and returns the exit code and possibly an
+// error.
+func (s *Spawner) pollWait() (int, error) {
 	// Stat to check if it is there to avoid a race condition.
 	stat, err := os.Stat(s.StateFile)
 	if err != nil {
@@ -256,29 +247,14 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
 		return s.readExitCode()
 	}
 
-	// Wait on watcher.
-	for {
-		select {
-		case event := <-watcher.Events:
-			if event.Op&fsnotify.Write == fsnotify.Write {
-				stat, err := os.Stat(s.StateFile)
-				if err != nil {
-					return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
-				}
-
-				if stat.Size() > 0 {
-					return s.readExitCode()
-				}
-			}
-		case err := <-watcher.Errors:
-			return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
-		case <-time.After(5 * time.Second):
-			// Check if the process is still alive.
-			if !s.Alive() {
-				return -1, fmt.Errorf("Task is dead and exit code unreadable")
-			}
+	// Read after the process exits.
+	for _ = range time.Tick(5 * time.Second) {
+		if !s.Alive() {
+			break
 		}
 	}
+
+	return s.readExitCode()
 }
 
 // readExitCode parses the state file and returns the exit code of the task. It
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
index 9553470a0..bbb8c8dca 100644
--- a/client/spawn/spawn_test.go
+++ b/client/spawn/spawn_test.go
@@ -202,6 +202,15 @@ func TestSpawn_NonParentWait(t *testing.T) {
 		t.Fatalf("Spawn() failed %v", err)
 	}
 
+	// Need to wait on the spawner, otherwise it becomes a zombie and the test
+	// only finishes after the init process cleans it. This speeds that up.
+	go func() {
+		time.Sleep(3 * time.Second)
+		if _, err := spawn.spawn.Wait(); err != nil {
+			t.FailNow()
+		}
+	}()
+
 	// Force the wait to assume non-parent.
 	spawn.SpawnPpid = 0
 	code, err := spawn.Wait()
@@ -265,7 +274,7 @@ func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
 	}
 
 	spawn := NewSpawner(f.Name())
-	spawn.SetCommand(exec.Command("sleep", "5"))
+	spawn.SetCommand(exec.Command("sleep", "2"))
 	if err := spawn.Spawn(cb); err != nil {
 		t.Fatalf("Spawn() errored: %v", err)
 	}

From 0f1050b1bf471da9fedaf2ddd8550618d7701acc Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 16:53:27 -0800
Subject: [PATCH 59/59] Move the executor and spawn package into driver

---
 client/driver/exec.go                           | 2 +-
 client/{ => driver}/executor/exec.go            | 0
 client/{ => driver}/executor/exec_basic.go      | 0
 client/{ => driver}/executor/exec_linux.go      | 2 +-
 client/{ => driver}/executor/exec_linux_test.go | 0
 client/{ => driver}/executor/exec_universal.go  | 0
 client/{ => driver}/executor/setuid.go          | 0
 client/{ => driver}/executor/setuid_windows.go  | 0
 client/driver/java.go                           | 2 +-
 client/{ => driver}/spawn/spawn.go              | 0
 client/{ => driver}/spawn/spawn_posix.go        | 0
 client/{ => driver}/spawn/spawn_test.go         | 0
 client/{ => driver}/spawn/spawn_windows.go      | 0
 13 files changed, 3 insertions(+), 3 deletions(-)
 rename client/{ => driver}/executor/exec.go (100%)
 rename client/{ => driver}/executor/exec_basic.go (100%)
 rename client/{ => driver}/executor/exec_linux.go (99%)
 rename client/{ => driver}/executor/exec_linux_test.go (100%)
 rename client/{ => driver}/executor/exec_universal.go (100%)
 rename client/{ => driver}/executor/setuid.go (100%)
 rename client/{ => driver}/executor/setuid_windows.go (100%)
 rename client/{ => driver}/spawn/spawn.go (100%)
 rename client/{ => driver}/spawn/spawn_posix.go (100%)
 rename client/{ => driver}/spawn/spawn_test.go (100%)
 rename client/{ => driver}/spawn/spawn_windows.go (100%)

diff --git a/client/driver/exec.go b/client/driver/exec.go
index cbcb85a0a..e48604894 100644
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -12,7 +12,7 @@ import (
 	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/executor"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
diff --git a/client/executor/exec.go b/client/driver/executor/exec.go
similarity index 100%
rename from client/executor/exec.go
rename to client/driver/executor/exec.go
diff --git a/client/executor/exec_basic.go b/client/driver/executor/exec_basic.go
similarity index 100%
rename from client/executor/exec_basic.go
rename to client/driver/executor/exec_basic.go
diff --git a/client/executor/exec_linux.go b/client/driver/executor/exec_linux.go
similarity index 99%
rename from client/executor/exec_linux.go
rename to client/driver/executor/exec_linux.go
index 35090be78..1b4b312bf 100644
--- a/client/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -15,7 +15,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/client/driver/environment"
-	"github.com/hashicorp/nomad/client/spawn"
+	"github.com/hashicorp/nomad/client/driver/spawn"
 	"github.com/hashicorp/nomad/nomad/structs"
 
 	"github.com/opencontainers/runc/libcontainer/cgroups"
diff --git a/client/executor/exec_linux_test.go b/client/driver/executor/exec_linux_test.go
similarity index 100%
rename from client/executor/exec_linux_test.go
rename to client/driver/executor/exec_linux_test.go
diff --git a/client/executor/exec_universal.go b/client/driver/executor/exec_universal.go
similarity index 100%
rename from client/executor/exec_universal.go
rename to client/driver/executor/exec_universal.go
diff --git a/client/executor/setuid.go b/client/driver/executor/setuid.go
similarity index 100%
rename from client/executor/setuid.go
rename to client/driver/executor/setuid.go
diff --git a/client/executor/setuid_windows.go b/client/driver/executor/setuid_windows.go
similarity index 100%
rename from client/executor/setuid_windows.go
rename to client/driver/executor/setuid_windows.go
diff --git a/client/driver/java.go b/client/driver/java.go
index 8aad1dd65..e7563f6e2 100644
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -14,7 +14,7 @@ import (
 	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/executor"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
diff --git a/client/spawn/spawn.go b/client/driver/spawn/spawn.go
similarity index 100%
rename from client/spawn/spawn.go
rename to client/driver/spawn/spawn.go
diff --git a/client/spawn/spawn_posix.go b/client/driver/spawn/spawn_posix.go
similarity index 100%
rename from client/spawn/spawn_posix.go
rename to client/driver/spawn/spawn_posix.go
diff --git a/client/spawn/spawn_test.go b/client/driver/spawn/spawn_test.go
similarity index 100%
rename from client/spawn/spawn_test.go
rename to client/driver/spawn/spawn_test.go
diff --git a/client/spawn/spawn_windows.go b/client/driver/spawn/spawn_windows.go
similarity index 100%
rename from client/spawn/spawn_windows.go
rename to client/driver/spawn/spawn_windows.go