From ef99e3d16eeb08e8be2b552084137be28031a385 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Tue, 27 Oct 2015 14:36:32 -0700
Subject: [PATCH 01/92] nomad: initial pass at blocking queries for jobs

---
 nomad/job_endpoint.go      | 60 +++++++++++++++++++++-----------------
 nomad/job_endpoint_test.go | 51 ++++++++++++++++++++++++++++++++
 nomad/rpc.go               |  7 +++++
 nomad/state/state_store.go | 17 +++++++++++
 4 files changed, 109 insertions(+), 26 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 63d31eb3c..cfb92bc24 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -216,35 +216,43 @@ func (j *Job) List(args *structs.JobListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now())
 
-	// Capture all the jobs
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Jobs()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		jobsWatch: true,
+		run: func() error {
+			// Capture all the jobs
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Jobs()
+			if err != nil {
+				return err
+			}
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		job := raw.(*structs.Job)
-		reply.Jobs = append(reply.Jobs, job.Stub())
-	}
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				job := raw.(*structs.Job)
+				reply.Jobs = append(reply.Jobs, job.Stub())
+			}
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("jobs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("jobs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return j.srv.blockingRPC(&opts)
 }
 
 // Allocations is used to list the allocations for a job
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index e43ed3ba2..e922f31c3 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -3,6 +3,7 @@ package nomad
 import (
 	"reflect"
 	"testing"
+	"time"
 
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/mock"
@@ -397,6 +398,56 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 	}
 }
 
+func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the job
+	job := mock.Job()
+
+	go func() {
+		// Wait a bit
+		time.Sleep(100 * time.Millisecond)
+
+		// Send the register request
+		state := s1.fsm.State()
+		err := state.UpsertJob(2, job)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}()
+
+	// Lookup the jobs. Should block until the index is reached.
+	get := &structs.JobListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.JobListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check that we blocked
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Jobs) != 1 {
+		t.Fatalf("bad: %#v", resp.Jobs)
+	}
+	if resp.Jobs[0].ID != job.ID {
+		t.Fatalf("bad: %#v", resp.Jobs[0])
+	}
+}
+
 func TestJobEndpoint_Allocations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index 074dec0d6..dff77eafa 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -271,6 +271,7 @@ type blockingOptions struct {
 	queryOpts  *structs.QueryOptions
 	queryMeta  *structs.QueryMeta
 	allocWatch string
+	jobsWatch  bool
 	run        func() error
 }
 
@@ -309,6 +310,9 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 		if opts.allocWatch != "" {
 			state.StopWatchAllocs(opts.allocWatch, notifyCh)
 		}
+		if opts.jobsWatch {
+			state.StopWatchJobs(notifyCh)
+		}
 	}()
 
 REGISTER_NOTIFY:
@@ -317,6 +321,9 @@ REGISTER_NOTIFY:
 	if opts.allocWatch != "" {
 		state.WatchAllocs(opts.allocWatch, notifyCh)
 	}
+	if opts.jobsWatch {
+		state.WatchJobs(notifyCh)
+	}
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 22487234b..a24fe9195 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -58,8 +58,12 @@ type IndexEntry struct {
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {
+	// Allocation watches by node
 	allocs    map[string]*NotifyGroup
 	allocLock sync.Mutex
+
+	// Full table job watches
+	jobs *NotifyGroup
 }
 
 // NewStateStore is used to create a new state store
@@ -73,6 +77,7 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the watch entry
 	watch := &stateWatch{
 		allocs: make(map[string]*NotifyGroup),
+		jobs:   &NotifyGroup{},
 	}
 
 	// Create the state store
@@ -155,6 +160,16 @@ func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
 	}
 }
 
+// WatchJobs is used to start watching the jobs view for changes.
+func (s *StateStore) WatchJobs(notify chan struct{}) {
+	s.watch.jobs.Wait(notify)
+}
+
+// StopWatchJobs is used to cancel notification on the given channel.
+func (s *StateStore) StopWatchJobs(notify chan struct{}) {
+	s.watch.jobs.Clear(notify)
+}
+
 // UpsertNode is used to register a node or update a node definition
 // This is assumed to be triggered by the client, so we retain the value
 // of drain which is set by the scheduler.
@@ -342,6 +357,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.jobs.Notify() })
 	txn.Commit()
 	return nil
 }
@@ -368,6 +384,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.jobs.Notify() })
 	txn.Commit()
 	return nil
 }

From 750be3892c66c84d00539f585729220cd53a09b0 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Tue, 27 Oct 2015 15:52:40 -0700
Subject: [PATCH 02/92] nomad: allow blocking on empty data views

---
 nomad/rpc.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/rpc.go b/nomad/rpc.go
index dff77eafa..a6b6595f3 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -334,7 +334,7 @@ RUN_QUERY:
 	err := opts.run()
 
 	// Check for minimum query time
-	if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
+	if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
 		select {
 		case <-notifyCh:
 			goto REGISTER_NOTIFY

From 1012a3e5ac9468b51cb5efe0c136dcf572b99572 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 11:13:30 -0700
Subject: [PATCH 03/92] nomad: use a generic full-table watcher

---
 nomad/job_endpoint.go      |  6 ++--
 nomad/rpc.go               | 18 ++++-------
 nomad/state/state_store.go | 65 +++++++++++++++++++++++++++++++-------
 3 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index cfb92bc24..8960a2e9d 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -218,9 +218,9 @@ func (j *Job) List(args *structs.JobListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts: &args.QueryOptions,
-		queryMeta: &reply.QueryMeta,
-		jobsWatch: true,
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"jobs"},
 		run: func() error {
 			// Capture all the jobs
 			snap, err := j.srv.fsm.State().Snapshot()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index a6b6595f3..dcb120cc8 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -268,11 +268,11 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
 
 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts  *structs.QueryOptions
-	queryMeta  *structs.QueryMeta
-	allocWatch string
-	jobsWatch  bool
-	run        func() error
+	queryOpts   *structs.QueryOptions
+	queryMeta   *structs.QueryMeta
+	allocWatch  string
+	watchTables []string
+	run         func() error
 }
 
 // blockingRPC is used for queries that need to wait for a
@@ -310,9 +310,7 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 		if opts.allocWatch != "" {
 			state.StopWatchAllocs(opts.allocWatch, notifyCh)
 		}
-		if opts.jobsWatch {
-			state.StopWatchJobs(notifyCh)
-		}
+		state.StopWatchTables(notifyCh, opts.watchTables...)
 	}()
 
 REGISTER_NOTIFY:
@@ -321,9 +319,7 @@ REGISTER_NOTIFY:
 	if opts.allocWatch != "" {
 		state.WatchAllocs(opts.allocWatch, notifyCh)
 	}
-	if opts.jobsWatch {
-		state.WatchJobs(notifyCh)
-	}
+	state.WatchTables(notifyCh, opts.watchTables...)
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index a24fe9195..ac16b2ead 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -62,8 +62,47 @@ type stateWatch struct {
 	allocs    map[string]*NotifyGroup
 	allocLock sync.Mutex
 
-	// Full table job watches
-	jobs *NotifyGroup
+	// Full table watches
+	tables    map[string]*NotifyGroup
+	tableLock sync.Mutex
+}
+
+// watchTable is used to subscribe a channel to a full table watch.
+func (w *stateWatch) watchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	tw, ok := w.tables[table]
+	if !ok {
+		tw = new(NotifyGroup)
+		w.tables[table] = tw
+	}
+	tw.Wait(ch)
+}
+
+// stopWatchTable is used to unsubscribe a channel from a table watch.
+func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	if tw, ok := w.tables[table]; ok {
+		tw.Clear(ch)
+		if tw.Empty() {
+			delete(w.tables, table)
+		}
+	}
+}
+
+// notifyTables is used to notify watchers of the given tables.
+func (w *stateWatch) notifyTables(tables ...string) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	for _, table := range tables {
+		if tw, ok := w.tables[table]; ok {
+			tw.Notify()
+		}
+	}
 }
 
 // NewStateStore is used to create a new state store
@@ -77,7 +116,7 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the watch entry
 	watch := &stateWatch{
 		allocs: make(map[string]*NotifyGroup),
-		jobs:   &NotifyGroup{},
+		tables: make(map[string]*NotifyGroup),
 	}
 
 	// Create the state store
@@ -160,14 +199,18 @@ func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
 	}
 }
 
-// WatchJobs is used to start watching the jobs view for changes.
-func (s *StateStore) WatchJobs(notify chan struct{}) {
-	s.watch.jobs.Wait(notify)
+// WatchTables is used to subscribe a channel to a set of tables.
+func (s *StateStore) WatchTables(notify chan struct{}, tables ...string) {
+	for _, table := range tables {
+		s.watch.watchTable(table, notify)
+	}
 }
 
-// StopWatchJobs is used to cancel notification on the given channel.
-func (s *StateStore) StopWatchJobs(notify chan struct{}) {
-	s.watch.jobs.Clear(notify)
+// StopWatchTables is used to unsubscribe a channel from table watches.
+func (s *StateStore) StopWatchTables(notify chan struct{}, tables ...string) {
+	for _, table := range tables {
+		s.watch.stopWatchTable(table, notify)
+	}
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -357,7 +400,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.jobs.Notify() })
+	txn.Defer(func() { s.watch.notifyTables("jobs") })
 	txn.Commit()
 	return nil
 }
@@ -384,7 +427,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.jobs.Notify() })
+	txn.Defer(func() { s.watch.notifyTables("jobs") })
 	txn.Commit()
 	return nil
 }

From 75af87c2d4129425963a9963ae20a88bd0ce35d8 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 11:21:39 -0700
Subject: [PATCH 04/92] nomad: support blocking queries on nodes

---
 nomad/node_endpoint.go      | 61 +++++++++++++++++++++----------------
 nomad/node_endpoint_test.go | 50 ++++++++++++++++++++++++++++++
 nomad/state/state_store.go  |  4 +++
 3 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 9ce14aadd..715b6a58d 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -404,35 +404,44 @@ func (n *Node) List(args *structs.NodeListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
 
-	// Capture all the nodes
-	snap, err := n.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Nodes()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"nodes"},
+		run: func() error {
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		node := raw.(*structs.Node)
-		reply.Nodes = append(reply.Nodes, node.Stub())
-	}
+			// Capture all the nodes
+			snap, err := n.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Nodes()
+			if err != nil {
+				return err
+			}
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("nodes")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				node := raw.(*structs.Node)
+				reply.Nodes = append(reply.Nodes, node.Stub())
+			}
 
-	// Set the query response
-	n.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("nodes")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
+
+			// Set the query response
+			n.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return n.srv.blockingRPC(&opts)
 }
 
 // createNodeEvals is used to create evaluations for each alloc on a node.
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 62f4a4959..c1a312d48 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -752,3 +752,53 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 		t.Fatalf("bad: %#v", resp2.Nodes[0])
 	}
 }
+
+func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the node
+	node := mock.Node()
+
+	go func() {
+		// Wait a bit
+		time.Sleep(100 * time.Millisecond)
+
+		// Send the register request
+		state := s1.fsm.State()
+		err := state.UpsertNode(2, node)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}()
+
+	// List the nodes. Should block until the index is reached.
+	get := &structs.NodeListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.NodeListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check that we blocked
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Nodes) != 1 {
+		t.Fatalf("bad: %#v", resp.Nodes)
+	}
+	if resp.Nodes[0].ID != node.ID {
+		t.Fatalf("bad: %#v", resp.Nodes[0])
+	}
+}
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index ac16b2ead..a244bb71a 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -245,6 +245,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }
@@ -271,6 +272,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }
@@ -306,6 +308,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }
@@ -341,6 +344,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	txn.Defer(func() { s.watch.notifyTables("nodes") })
 	txn.Commit()
 	return nil
 }

From 417b76a1ac5eba32fa21b4731f49a780c155fc66 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 12:29:06 -0700
Subject: [PATCH 05/92] nomad: test all node watch triggers

---
 nomad/node_endpoint.go      |  5 +-
 nomad/node_endpoint_test.go | 96 ++++++++++++++++++++++++++++++-------
 2 files changed, 82 insertions(+), 19 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 715b6a58d..23c50de57 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -410,7 +410,6 @@ func (n *Node) List(args *structs.NodeListRequest,
 		queryMeta:   &reply.QueryMeta,
 		watchTables: []string{"nodes"},
 		run: func() error {
-
 			// Capture all the nodes
 			snap, err := n.srv.fsm.State().Snapshot()
 			if err != nil {
@@ -421,14 +420,16 @@ func (n *Node) List(args *structs.NodeListRequest,
 				return err
 			}
 
+			var nodes []*structs.NodeListStub
 			for {
 				raw := iter.Next()
 				if raw == nil {
 					break
 				}
 				node := raw.(*structs.Node)
-				reply.Nodes = append(reply.Nodes, node.Stub())
+				nodes = append(nodes, node.Stub())
 			}
+			reply.Nodes = nodes
 
 			// Use the last index that affected the jobs table
 			index, err := snap.Index("nodes")
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index c1a312d48..91ae5d4fc 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -756,26 +756,21 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
+	state := s1.fsm.State()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
 
 	// Create the node
 	node := mock.Node()
 
-	go func() {
-		// Wait a bit
-		time.Sleep(100 * time.Millisecond)
-
-		// Send the register request
-		state := s1.fsm.State()
-		err := state.UpsertNode(2, node)
-		if err != nil {
+	// Node upsert triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertNode(2, node); err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})
 
-	// List the nodes. Should block until the index is reached.
-	get := &structs.NodeListRequest{
+	req := &structs.NodeListRequest{
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
 			MinQueryIndex: 1,
@@ -783,22 +778,89 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 	start := time.Now()
 	var resp structs.NodeListResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Node.List", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Check that we blocked
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-
 	if resp.Index != 2 {
 		t.Fatalf("Bad index: %d %d", resp.Index, 2)
 	}
-	if len(resp.Nodes) != 1 {
+	if len(resp.Nodes) != 1 || resp.Nodes[0].ID != node.ID {
 		t.Fatalf("bad: %#v", resp.Nodes)
 	}
-	if resp.Nodes[0].ID != node.ID {
-		t.Fatalf("bad: %#v", resp.Nodes[0])
+
+	// Node drain updates trigger watches.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateNodeDrain(3, node.ID, true); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	var resp2 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Nodes) != 1 || !resp2.Nodes[0].Drain {
+		t.Fatalf("bad: %#v", resp2.Nodes)
+	}
+
+	// Node status update triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateNodeStatus(4, node.ID, structs.NodeStatusDown); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 3
+	var resp3 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp3.Index != 4 {
+		t.Fatalf("Bad index: %d %d", resp3.Index, 4)
+	}
+	if len(resp3.Nodes) != 1 || resp3.Nodes[0].Status != structs.NodeStatusDown {
+		t.Fatalf("bad: %#v", resp3.Nodes)
+	}
+
+	// Node delete triggers watches.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteNode(5, node.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 4
+	var resp4 structs.NodeListResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp4); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp4.Index != 5 {
+		t.Fatalf("Bad index: %d %d", resp4.Index, 5)
+	}
+	if len(resp4.Nodes) != 0 {
+		t.Fatalf("bad: %#v", resp4.Nodes)
 	}
 }

From 49a2bef922c4c6883aacd2acb6c76225bf9a8256 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 12:43:00 -0700
Subject: [PATCH 06/92] nomad: job watches return correct response, add tests

---
 nomad/job_endpoint.go      |  4 +++-
 nomad/job_endpoint_test.go | 48 ++++++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 8960a2e9d..ca6d59e1a 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -232,14 +232,16 @@ func (j *Job) List(args *structs.JobListRequest,
 				return err
 			}
 
+			var jobs []*structs.JobListStub
 			for {
 				raw := iter.Next()
 				if raw == nil {
 					break
 				}
 				job := raw.(*structs.Job)
-				reply.Jobs = append(reply.Jobs, job.Stub())
+				jobs = append(jobs, job.Stub())
 			}
+			reply.Jobs = jobs
 
 			// Use the last index that affected the jobs table
 			index, err := snap.Index("jobs")
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index e922f31c3..8a9e5a1ee 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -401,26 +401,21 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
+	state := s1.fsm.State()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
 
 	// Create the job
 	job := mock.Job()
 
-	go func() {
-		// Wait a bit
-		time.Sleep(100 * time.Millisecond)
-
-		// Send the register request
-		state := s1.fsm.State()
-		err := state.UpsertJob(2, job)
-		if err != nil {
+	// Upsert job triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertJob(2, job); err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})
 
-	// Lookup the jobs. Should block until the index is reached.
-	get := &structs.JobListRequest{
+	req := &structs.JobListRequest{
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
 			MinQueryIndex: 1,
@@ -428,23 +423,42 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	}
 	start := time.Now()
 	var resp structs.JobListResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Job.List", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Check that we blocked
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-
 	if resp.Index != 2 {
 		t.Fatalf("Bad index: %d %d", resp.Index, 2)
 	}
-	if len(resp.Jobs) != 1 {
+	if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID {
 		t.Fatalf("bad: %#v", resp.Jobs)
 	}
-	if resp.Jobs[0].ID != job.ID {
-		t.Fatalf("bad: %#v", resp.Jobs[0])
+
+	// Job deletion triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteJob(3, job.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.JobListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Jobs) != 0 {
+		t.Fatalf("bad: %#v", resp2.Jobs)
 	}
 }
 

From dc7cbcc3f0fe8556c021f48cf36db540812b13b4 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 28 Oct 2015 16:23:33 -0700
Subject: [PATCH 07/92] Refactor spawn-daemon so it can be used by all OSes and
 make it write exit code to a file

---
 command/spawn_daemon.go           | 203 +++++++++++++++++++++++++++---
 command/spawn_daemon_darwin.go    |   4 +
 command/spawn_daemon_linux.go     | 121 ++----------------
 command/spawn_daemon_test.go      |  48 +++++++
 command/spawn_daemon_universal.go |   9 --
 command/spawn_daemon_unix.go      |  16 +++
 command/spawn_daemon_windows.go   |   7 ++
 command/test-resources/exiter.py  |   3 +
 8 files changed, 276 insertions(+), 135 deletions(-)
 create mode 100644 command/spawn_daemon_darwin.go
 create mode 100644 command/spawn_daemon_test.go
 delete mode 100644 command/spawn_daemon_universal.go
 create mode 100644 command/spawn_daemon_unix.go
 create mode 100644 command/spawn_daemon_windows.go
 create mode 100644 command/test-resources/exiter.py

diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index 3ca825d41..ea7868be4 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -2,19 +2,19 @@ package command
 
 import (
 	"encoding/json"
+	"fmt"
+	"io"
 	"os"
+	"os/exec"
+	"strconv"
 	"strings"
+	"syscall"
 )
 
 type SpawnDaemonCommand struct {
 	Meta
-}
-
-// Status of executing the user's command.
-type SpawnStartStatus struct {
-	// ErrorMsg will be empty if the user command was started successfully.
-	// Otherwise it will have an error message.
-	ErrorMsg string
+	config   *DaemonConfig
+	exitFile io.WriteCloser
 }
 
 func (c *SpawnDaemonCommand) Help() string {
@@ -23,15 +23,15 @@ Usage: nomad spawn-daemon [options] <daemon_config>
 
   INTERNAL ONLY
 
-  Spawns a daemon process optionally inside a cgroup. The required daemon_config is a json
-  encoding of the DaemonConfig struct containing the isolation configuration and command to run.
-  SpawnStartStatus is json serialized to Stdout upon running the user command or if any error
-  prevents its execution. If there is no error, the process waits on the users
-  command and then json serializes  SpawnExitStatus to Stdout after its termination.
-
-General Options:
-
-  ` + generalOptionsUsage()
+  Spawns a daemon process by double forking. The required daemon_config is a
+  json encoding of the DaemonConfig struct containing the isolation
+  configuration and command to run. SpawnStartStatus is json serialized to
+  stdout upon running the user command or if any error prevents its execution.
+  If there is no error, the process waits on the users command. Once the user
+  command exits, the exit code is written to a file specified in the
+  daemon_config and this process exits with the same exit status as the user
+  command.
+  `
 
 	return strings.TrimSpace(helpText)
 }
@@ -40,6 +40,147 @@ func (c *SpawnDaemonCommand) Synopsis() string {
 	return "Spawn a daemon command with configurable isolation."
 }
 
+// Status of executing the user's command.
+type SpawnStartStatus struct {
+	// The PID of the user's command.
+	UserPID int
+
+	// ErrorMsg will be empty if the user command was started successfully.
+	// Otherwise it will have an error message.
+	ErrorMsg string
+}
+
+// Exit status of the user's command.
+type SpawnExitStatus struct {
+	// The exit code of the user's command.
+	ExitCode int
+}
+
+// Configuration for the command to start as a daemon.
+type DaemonConfig struct {
+	exec.Cmd
+
+	// The filepath to write the exit status to.
+	ExitStatusFile string
+
+	// The paths, if not /dev/null, must be either in the tasks root directory
+	// or in the shared alloc directory.
+	StdoutFile string
+	StdinFile  string
+	StderrFile string
+
+	// An optional path specifying the directory to chroot the process in.
+	Chroot string
+}
+
+// Whether to start the user command or abort.
+type TaskStart bool
+
+// parseConfig reads the DaemonConfig from the passed arguments. If not
+// successful, an error is returned.
+func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) {
+	flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
+	flags.Usage = func() { c.Ui.Output(c.Help()) }
+	if err := flags.Parse(args); err != nil {
+		return nil, fmt.Errorf("failed to parse args: %v", err)
+	}
+
+	// Check that we got json input.
+	args = flags.Args()
+	if len(args) != 1 {
+		return nil, fmt.Errorf("incorrect number of args; got %v; want 1", len(args))
+	}
+	jsonInput, err := strconv.Unquote(args[0])
+	if err != nil {
+		return nil, fmt.Errorf("Failed to unquote json input: %v", err)
+	}
+
+	// De-serialize the passed command.
+	var config DaemonConfig
+	dec := json.NewDecoder(strings.NewReader(jsonInput))
+	if err := dec.Decode(&config); err != nil {
+		return nil, err
+	}
+
+	return &config, nil
+}
+
+// configureLogs creates the log files and redirects the process
+// stdin/stderr/stdout to them. If unsuccessful, an error is returned.
+func (c *SpawnDaemonCommand) configureLogs() error {
+	stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+	}
+
+	stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	}
+
+	stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
+	if err != nil {
+		return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+	}
+
+	c.config.Cmd.Stdout = stdo
+	c.config.Cmd.Stderr = stde
+	c.config.Cmd.Stdin = stdi
+	return nil
+}
+
+func (c *SpawnDaemonCommand) Run(args []string) int {
+	var err error
+	c.config, err = c.parseConfig(args)
+	if err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Open the file we will be using to write exit codes to. We do this early
+	// to ensure that we don't start the user process when we can't capture its
+	// exit status.
+	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1)
+	}
+
+	// Isolate the user process.
+	if err := c.isolateCmd(); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Redirect logs.
+	if err := c.configureLogs(); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Chroot jail the process and set its working directory.
+	c.configureChroot()
+
+	// Wait to get the start command.
+	var start TaskStart
+	dec := json.NewDecoder(os.Stdin)
+	if err := dec.Decode(&start); err != nil {
+		return c.outputStartStatus(err, 1)
+	}
+
+	// Aborted by Nomad process.
+	if !start {
+		return 0
+	}
+
+	// Spawn the user process.
+	if err := c.config.Cmd.Start(); err != nil {
+		return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
+	}
+
+	// Indicate that the command was started successfully.
+	c.outputStartStatus(nil, 0)
+
+	// Wait and then output the exit status.
+	return c.writeExitStatus(c.config.Cmd.Wait())
+}
+
 // outputStartStatus is a helper function that outputs a SpawnStartStatus to
 // Stdout with the passed error, which may be nil to indicate no error. It
 // returns the passed status.
@@ -51,6 +192,36 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}
 
+	if c.config != nil && c.config.Process == nil {
+		startStatus.UserPID = c.config.Process.Pid
+	}
+
 	enc.Encode(startStatus)
 	return status
 }
+
+// writeExitStatus takes in the error result from calling wait and writes out
+// the exit status to a file. It returns the same exit status as the user
+// command.
+func (c *SpawnDaemonCommand) writeExitStatus(exit error) int {
+	// Parse the exit code.
+	exitStatus := &SpawnExitStatus{}
+	if exit != nil {
+		// Default to exit code 1 if we can not get the actual exit code.
+		exitStatus.ExitCode = 1
+
+		if exiterr, ok := exit.(*exec.ExitError); ok {
+			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+				exitStatus.ExitCode = status.ExitStatus()
+			}
+		}
+	}
+
+	if c.exitFile != nil {
+		enc := json.NewEncoder(c.exitFile)
+		enc.Encode(exitStatus)
+		c.exitFile.Close()
+	}
+
+	return exitStatus.ExitCode
+}
diff --git a/command/spawn_daemon_darwin.go b/command/spawn_daemon_darwin.go
new file mode 100644
index 000000000..f3fe8484a
--- /dev/null
+++ b/command/spawn_daemon_darwin.go
@@ -0,0 +1,4 @@
+package command
+
+// No chroot on darwin.
+func (c *SpawnDaemonCommand) configureChroot() {}
diff --git a/command/spawn_daemon_linux.go b/command/spawn_daemon_linux.go
index 3e9ceaa3e..512ec645f 100644
--- a/command/spawn_daemon_linux.go
+++ b/command/spawn_daemon_linux.go
@@ -1,115 +1,16 @@
 package command
 
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"strconv"
-	"strings"
-	"syscall"
-)
+import "syscall"
 
-// Configuration for the command to start as a daemon.
-type DaemonConfig struct {
-	exec.Cmd
+// configureChroot enters the user command into a chroot if specified in the
+// config and on an OS that supports Chroots.
+func (c *SpawnDaemonCommand) configureChroot() {
+	if len(c.config.Chroot) != 0 {
+		if c.config.Cmd.SysProcAttr == nil {
+			c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
+		}
 
-	// The paths, if not /dev/null, must be either in the tasks root directory
-	// or in the shared alloc directory.
-	StdoutFile string
-	StdinFile  string
-	StderrFile string
-
-	Chroot string
-}
-
-// Whether to start the user command or abort.
-type TaskStart bool
-
-func (c *SpawnDaemonCommand) Run(args []string) int {
-	flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
-	flags.Usage = func() { c.Ui.Output(c.Help()) }
-
-	if err := flags.Parse(args); err != nil {
-		return 1
-	}
-
-	// Check that we got json input.
-	args = flags.Args()
-	if len(args) != 1 {
-		c.Ui.Error(c.Help())
-		return 1
-	}
-	jsonInput, err := strconv.Unquote(args[0])
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Failed to unquote json input: %v", err), 1)
-	}
-
-	// De-serialize the passed command.
-	var cmd DaemonConfig
-	dec := json.NewDecoder(strings.NewReader(jsonInput))
-	if err := dec.Decode(&cmd); err != nil {
-		return c.outputStartStatus(err, 1)
-	}
-
-	// Isolate the user process.
-	if _, err := syscall.Setsid(); err != nil {
-		return c.outputStartStatus(fmt.Errorf("Failed setting sid: %v", err), 1)
-	}
-
-	syscall.Umask(0)
-
-	// Redirect logs.
-	stdo, err := os.OpenFile(cmd.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdout: %v", err), 1)
-	}
-
-	stde, err := os.OpenFile(cmd.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stderr: %v", err), 1)
-	}
-
-	stdi, err := os.OpenFile(cmd.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
-	if err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdin: %v", err), 1)
-	}
-
-	cmd.Cmd.Stdout = stdo
-	cmd.Cmd.Stderr = stde
-	cmd.Cmd.Stdin = stdi
-
-	// Chroot jail the process and set its working directory.
-	if cmd.Cmd.SysProcAttr == nil {
-		cmd.Cmd.SysProcAttr = &syscall.SysProcAttr{}
-	}
-
-	cmd.Cmd.SysProcAttr.Chroot = cmd.Chroot
-	cmd.Cmd.Dir = "/"
-
-	// Wait to get the start command.
-	var start TaskStart
-	dec = json.NewDecoder(os.Stdin)
-	if err := dec.Decode(&start); err != nil {
-		return c.outputStartStatus(err, 1)
-	}
-
-	if !start {
-		return 0
-	}
-
-	// Spawn the user process.
-	if err := cmd.Cmd.Start(); err != nil {
-		return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
-	}
-
-	// Indicate that the command was started successfully.
-	c.outputStartStatus(nil, 0)
-
-	// Wait and then output the exit status.
-	if err := cmd.Wait(); err != nil {
-		return 1
-	}
-
-	return 0
+		c.config.Cmd.SysProcAttr.Chroot = c.config.Chroot
+		c.config.Cmd.Dir = "/"
+	}
 }
diff --git a/command/spawn_daemon_test.go b/command/spawn_daemon_test.go
new file mode 100644
index 000000000..5bfd6ad5a
--- /dev/null
+++ b/command/spawn_daemon_test.go
@@ -0,0 +1,48 @@
+package command
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os/exec"
+	"testing"
+)
+
+type nopCloser struct {
+	io.ReadWriter
+}
+
+func (n *nopCloser) Close() error {
+	return nil
+}
+
+func TestSpawnDaemon_WriteExitStatus(t *testing.T) {
+	// Check if there is python.
+	path, err := exec.LookPath("python")
+	if err != nil {
+		t.Skip("python not detected")
+	}
+
+	var b bytes.Buffer
+	daemon := &SpawnDaemonCommand{exitFile: &nopCloser{&b}}
+
+	code := 3
+	cmd := exec.Command(path, "./test-resources/exiter.py", fmt.Sprintf("%d", code))
+	err = cmd.Run()
+	actual := daemon.writeExitStatus(err)
+	if actual != code {
+		t.Fatalf("writeExitStatus(%v) returned %v; want %v", err, actual, code)
+	}
+
+	// De-serialize the passed command.
+	var exitStatus SpawnExitStatus
+	dec := json.NewDecoder(&b)
+	if err := dec.Decode(&exitStatus); err != nil {
+		t.Fatalf("failed to decode exit status: %v", err)
+	}
+
+	if exitStatus.ExitCode != code {
+		t.Fatalf("writeExitStatus(%v) wrote exit status %v; want %v", err, exitStatus.ExitCode, code)
+	}
+}
diff --git a/command/spawn_daemon_universal.go b/command/spawn_daemon_universal.go
deleted file mode 100644
index 5083af5f3..000000000
--- a/command/spawn_daemon_universal.go
+++ /dev/null
@@ -1,9 +0,0 @@
-// +build !linux
-
-package command
-
-import "errors"
-
-func (c *SpawnDaemonCommand) Run(args []string) int {
-	return c.outputStartStatus(errors.New("spawn-daemon not supported"), 1)
-}
diff --git a/command/spawn_daemon_unix.go b/command/spawn_daemon_unix.go
new file mode 100644
index 000000000..981e52596
--- /dev/null
+++ b/command/spawn_daemon_unix.go
@@ -0,0 +1,16 @@
+// +build !windows
+
+package command
+
+import "syscall"
+
+// isolateCmd sets the session id for the process and the umask.
+func (c *SpawnDaemonCommand) isolateCmd() error {
+	if c.config.Cmd.SysProcAttr == nil {
+		c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+
+	c.config.Cmd.SysProcAttr.Setsid = true
+	syscall.Umask(0)
+	return nil
+}
diff --git a/command/spawn_daemon_windows.go b/command/spawn_daemon_windows.go
new file mode 100644
index 000000000..bb2d63ed8
--- /dev/null
+++ b/command/spawn_daemon_windows.go
@@ -0,0 +1,7 @@
+// build !linux !darwin
+
+package command
+
+// No isolation on Windows.
+func (c *SpawnDaemonCommand) isolateCmd() error { return nil }
+func (c *SpawnDaemonCommand) configureChroot()  {}
diff --git a/command/test-resources/exiter.py b/command/test-resources/exiter.py
new file mode 100644
index 000000000..90e66b98c
--- /dev/null
+++ b/command/test-resources/exiter.py
@@ -0,0 +1,3 @@
+import sys
+
+sys.exit(int(sys.argv[1]))

From ecdc1c92b6bc96ac02be423a11e3da1776c720d3 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 28 Oct 2015 17:22:04 -0700
Subject: [PATCH 08/92] Exec driver only applies on linux as root

---
 client/driver/exec.go                |  13 ++--
 client/driver/exec_test.go           |  17 +----
 client/executor/exec_universal.go    | 109 +++------------------------
 client/testutil/driver_compatible.go |   4 +-
 4 files changed, 23 insertions(+), 120 deletions(-)

diff --git a/client/driver/exec.go b/client/driver/exec.go
index 0324cad68..cbcb85a0a 100644
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -35,8 +35,11 @@ func NewExecDriver(ctx *DriverContext) Driver {
 }
 
 func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
-	// Only enable if we are root when running on non-windows systems.
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
+	// Only enable if we are root on linux.
+	if runtime.GOOS != "linux" {
+		d.logger.Printf("[DEBUG] driver.exec: only available on linux, disabling")
+		return false, nil
+	} else if syscall.Geteuid() != 0 {
 		d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
 		return false, nil
 	}
@@ -73,10 +76,8 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		}
 
 		// Add execution permissions to the newly downloaded artifact
-		if runtime.GOOS != "windows" {
-			if err := syscall.Chmod(artifactFile, 0755); err != nil {
-				log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err)
-			}
+		if err := syscall.Chmod(artifactFile, 0755); err != nil {
+			log.Printf("[ERR] driver.exec: Error making artifact executable: %s", err)
 		}
 	}
 
diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go
index ba8745176..488847c5c 100644
--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -5,7 +5,6 @@ import (
 	"io/ioutil"
 	"path/filepath"
 	"reflect"
-	"runtime"
 	"testing"
 	"time"
 
@@ -123,13 +122,7 @@ func TestExecDriver_Start_Wait(t *testing.T) {
 
 func TestExecDriver_Start_Artifact_basic(t *testing.T) {
 	ctestutils.ExecCompatible(t)
-	var file string
-	switch runtime.GOOS {
-	case "darwin":
-		file = "hi_darwin_amd64"
-	default:
-		file = "hi_linux_amd64"
-	}
+	file := "hi_linux_amd64"
 
 	task := &structs.Task{
 		Name: "sleep",
@@ -172,13 +165,7 @@ func TestExecDriver_Start_Artifact_basic(t *testing.T) {
 
 func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
 	ctestutils.ExecCompatible(t)
-	var file string
-	switch runtime.GOOS {
-	case "darwin":
-		file = "hi_darwin_amd64"
-	default:
-		file = "hi_linux_amd64"
-	}
+	file := "hi_linux_amd64"
 
 	task := &structs.Task{
 		Name: "sleep",
diff --git a/client/executor/exec_universal.go b/client/executor/exec_universal.go
index 6b1977d10..4979ae3b7 100644
--- a/client/executor/exec_universal.go
+++ b/client/executor/exec_universal.go
@@ -3,105 +3,20 @@
 package executor
 
 import (
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-
 	"github.com/hashicorp/nomad/client/allocdir"
-	"github.com/hashicorp/nomad/client/driver/args"
-	"github.com/hashicorp/nomad/client/driver/environment"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-func NewExecutor() Executor {
-	return &UniversalExecutor{}
-}
+// UniversalExecutor exists to make the exec driver compile on all operating systems.
+type UniversalExecutor struct{}
 
-// UniversalExecutor should work everywhere, and as a result does not include
-// any resource restrictions or runas capabilities.
-type UniversalExecutor struct {
-	cmd
-}
-
-func (e *UniversalExecutor) Limit(resources *structs.Resources) error {
-	if resources == nil {
-		return errNoResources
-	}
-	return nil
-}
-
-func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
-	taskDir, ok := alloc.TaskDirs[taskName]
-	if !ok {
-		return fmt.Errorf("Error finding task dir for (%s)", taskName)
-	}
-	e.Dir = taskDir
-	return nil
-}
-
-func (e *UniversalExecutor) Start() error {
-	// Parse the commands arguments and replace instances of Nomad environment
-	// variables.
-	envVars, err := environment.ParseFromList(e.cmd.Env)
-	if err != nil {
-		return err
-	}
-
-	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
-	if err != nil {
-		return err
-	} else if len(parsedPath) != 1 {
-		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
-	}
-
-	e.cmd.Path = parsedPath[0]
-	combined := strings.Join(e.cmd.Args, " ")
-	parsed, err := args.ParseAndReplace(combined, envVars.Map())
-	if err != nil {
-		return err
-	}
-	e.Cmd.Args = parsed
-
-	// We don't want to call ourself. We want to call Start on our embedded Cmd
-	return e.cmd.Start()
-}
-
-func (e *UniversalExecutor) Open(pid string) error {
-	pidNum, err := strconv.Atoi(pid)
-	if err != nil {
-		return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
-	}
-
-	process, err := os.FindProcess(pidNum)
-	if err != nil {
-		return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err)
-	}
-	e.Process = process
-	return nil
-}
-
-func (e *UniversalExecutor) Wait() error {
-	// We don't want to call ourself. We want to call Start on our embedded Cmd
-	return e.cmd.Wait()
-}
-
-func (e *UniversalExecutor) ID() (string, error) {
-	if e.cmd.Process != nil {
-		return strconv.Itoa(e.cmd.Process.Pid), nil
-	} else {
-		return "", fmt.Errorf("Process has finished or was never started")
-	}
-}
-
-func (e *UniversalExecutor) Shutdown() error {
-	return e.ForceStop()
-}
-
-func (e *UniversalExecutor) ForceStop() error {
-	return e.Process.Kill()
-}
-
-func (e *UniversalExecutor) Command() *cmd {
-	return &e.cmd
-}
+func NewExecutor() Executor                                                    { return &UniversalExecutor{} }
+func (e *UniversalExecutor) Limit(resources *structs.Resources) error          { return nil }
+func (e *UniversalExecutor) ConfigureTaskDir(string, *allocdir.AllocDir) error { return nil }
+func (e *UniversalExecutor) Start() error                                      { return nil }
+func (e *UniversalExecutor) Open(pid string) error                             { return nil }
+func (e *UniversalExecutor) Wait() error                                       { return nil }
+func (e *UniversalExecutor) ID() (string, error)                               { return "", nil }
+func (e *UniversalExecutor) Shutdown() error                                   { return nil }
+func (e *UniversalExecutor) ForceStop() error                                  { return nil }
+func (e *UniversalExecutor) Command() *cmd                                     { return nil }
diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go
index df1d27d11..94ae6225c 100644
--- a/client/testutil/driver_compatible.go
+++ b/client/testutil/driver_compatible.go
@@ -8,8 +8,8 @@ import (
 )
 
 func ExecCompatible(t *testing.T) {
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		t.Skip("Must be root on non-windows environments to run test")
+	if runtime.GOOS != "linux" || syscall.Geteuid() != 0 {
+		t.Skip("Test only available running as root on linux")
 	}
 }
 

From c74a5b8c0a5f276f067c81c0dc540b08d251e264 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 18:11:55 -0700
Subject: [PATCH 09/92] nomad/state: move methods so we can sanely find them

---
 nomad/state/state_store.go | 217 ++++++++++++++++++++-----------------
 1 file changed, 116 insertions(+), 101 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index a244bb71a..31dbd7bac 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -10,6 +10,13 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+// IndexEntry is used with the "index" table
+// for managing the latest Raft index affecting a table.
+type IndexEntry struct {
+	Key   string
+	Value uint64
+}
+
 // The StateStore is responsible for maintaining all the Nomad
 // state. It is manipulated by the FSM which maintains consistency
 // through the use of Raft. The goals of the StateStore are to provide
@@ -23,88 +30,6 @@ type StateStore struct {
 	watch  *stateWatch
 }
 
-// StateSnapshot is used to provide a point-in-time snapshot
-type StateSnapshot struct {
-	StateStore
-}
-
-// StateRestore is used to optimize the performance when
-// restoring state by only using a single large transaction
-// instead of thousands of sub transactions
-type StateRestore struct {
-	txn        *memdb.Txn
-	watch      *stateWatch
-	allocNodes map[string]struct{}
-}
-
-// Abort is used to abort the restore operation
-func (s *StateRestore) Abort() {
-	s.txn.Abort()
-}
-
-// Commit is used to commit the restore operation
-func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notifyAllocs(s.allocNodes) })
-	s.txn.Commit()
-}
-
-// IndexEntry is used with the "index" table
-// for managing the latest Raft index affecting a table.
-type IndexEntry struct {
-	Key   string
-	Value uint64
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	// Allocation watches by node
-	allocs    map[string]*NotifyGroup
-	allocLock sync.Mutex
-
-	// Full table watches
-	tables    map[string]*NotifyGroup
-	tableLock sync.Mutex
-}
-
-// watchTable is used to subscribe a channel to a full table watch.
-func (w *stateWatch) watchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
-
-	tw, ok := w.tables[table]
-	if !ok {
-		tw = new(NotifyGroup)
-		w.tables[table] = tw
-	}
-	tw.Wait(ch)
-}
-
-// stopWatchTable is used to unsubscribe a channel from a table watch.
-func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
-
-	if tw, ok := w.tables[table]; ok {
-		tw.Clear(ch)
-		if tw.Empty() {
-			delete(w.tables, table)
-		}
-	}
-}
-
-// notifyTables is used to notify watchers of the given tables.
-func (w *stateWatch) notifyTables(tables ...string) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
-
-	for _, table := range tables {
-		if tw, ok := w.tables[table]; ok {
-			tw.Notify()
-		}
-	}
-}
-
 // NewStateStore is used to create a new state store
 func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 	// Create the MemDB
@@ -151,6 +76,7 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 		txn:        txn,
 		watch:      s.watch,
 		allocNodes: make(map[string]struct{}),
+		tables:     make(map[string]struct{}),
 	}
 	return r, nil
 }
@@ -186,19 +112,6 @@ func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) {
 	}
 }
 
-// notifyAllocs is used to notify any node alloc listeners of a change
-func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
-	w.allocLock.Lock()
-	defer w.allocLock.Unlock()
-
-	for node := range nodes {
-		if grp, ok := w.allocs[node]; ok {
-			grp.Notify()
-			delete(w.allocs, node)
-		}
-	}
-}
-
 // WatchTables is used to subscribe a channel to a set of tables.
 func (s *StateStore) WatchTables(notify chan struct{}, tables ...string) {
 	for _, table := range tables {
@@ -245,7 +158,8 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -272,7 +186,8 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -308,7 +223,8 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -344,7 +260,8 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("nodes") })
+	tables := map[string]struct{}{"nodes": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -404,7 +321,8 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("jobs") })
+	tables := map[string]struct{}{"jobs": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -431,7 +349,8 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyTables("jobs") })
+	tables := map[string]struct{}{"jobs": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -817,8 +736,38 @@ func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
 	return iter, nil
 }
 
+// StateSnapshot is used to provide a point-in-time snapshot
+type StateSnapshot struct {
+	StateStore
+}
+
+// StateRestore is used to optimize the performance when
+// restoring state by only using a single large transaction
+// instead of thousands of sub transactions
+type StateRestore struct {
+	txn        *memdb.Txn
+	watch      *stateWatch
+	allocNodes map[string]struct{}
+	tables     map[string]struct{}
+}
+
+// Abort is used to abort the restore operation
+func (s *StateRestore) Abort() {
+	s.txn.Abort()
+}
+
+// Commit is used to commit the restore operation
+func (s *StateRestore) Commit() {
+	s.txn.Defer(func() {
+		s.watch.notifyAllocs(s.allocNodes)
+		s.watch.notifyTables(s.tables)
+	})
+	s.txn.Commit()
+}
+
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
+	r.tables["nodes"] = struct{}{}
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -827,6 +776,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
+	r.tables["jobs"] = struct{}{}
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -835,6 +785,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
+	r.tables["evals"] = struct{}{}
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -843,6 +794,7 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
+	r.tables["allocs"] = struct{}{}
 	r.allocNodes[alloc.NodeID] = struct{}{}
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
@@ -857,3 +809,66 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	// Allocation watches by node
+	allocs    map[string]*NotifyGroup
+	allocLock sync.Mutex
+
+	// Full table watches
+	tables    map[string]*NotifyGroup
+	tableLock sync.Mutex
+}
+
+// watchTable is used to subscribe a channel to a full table watch.
+func (w *stateWatch) watchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	tw, ok := w.tables[table]
+	if !ok {
+		tw = new(NotifyGroup)
+		w.tables[table] = tw
+	}
+	tw.Wait(ch)
+}
+
+// stopWatchTable is used to unsubscribe a channel from a table watch.
+func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	if tw, ok := w.tables[table]; ok {
+		tw.Clear(ch)
+		if tw.Empty() {
+			delete(w.tables, table)
+		}
+	}
+}
+
+// notifyTables is used to notify watchers of the given tables.
+func (w *stateWatch) notifyTables(tables map[string]struct{}) {
+	w.tableLock.Lock()
+	defer w.tableLock.Unlock()
+
+	for table, _ := range tables {
+		if tw, ok := w.tables[table]; ok {
+			tw.Notify()
+		}
+	}
+}
+
+// notifyAllocs is used to notify any node alloc listeners of a change
+func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
+	w.allocLock.Lock()
+	defer w.allocLock.Unlock()
+
+	for node := range nodes {
+		if grp, ok := w.allocs[node]; ok {
+			grp.Notify()
+			delete(w.allocs, node)
+		}
+	}
+}

From e23f547f2383c110498488450afa6009776cc051 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 18:34:56 -0700
Subject: [PATCH 10/92] nomad: support blocking queries on eval list

---
 nomad/eval_endpoint.go      | 62 ++++++++++++++++++++---------------
 nomad/eval_endpoint_test.go | 64 +++++++++++++++++++++++++++++++++++++
 nomad/state/state_store.go  |  9 +++++-
 3 files changed, 108 insertions(+), 27 deletions(-)

diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 0dce98a52..6e8e65054 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -219,35 +219,45 @@ func (e *Eval) List(args *structs.EvalListRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())
 
-	// Scan all the evaluations
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Evals()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"evals"},
+		run: func() error {
+			// Scan all the evaluations
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Evals()
+			if err != nil {
+				return err
+			}
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		eval := raw.(*structs.Evaluation)
-		reply.Evaluations = append(reply.Evaluations, eval)
-	}
+			var evals []*structs.Evaluation
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				eval := raw.(*structs.Evaluation)
+				evals = append(evals, eval)
+			}
+			reply.Evaluations = evals
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("evals")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("evals")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
 
 // Allocations is used to list the allocations for an evaluation
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index eb61ea3d0..3b9a62a8e 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -334,6 +334,70 @@ func TestEvalEndpoint_List(t *testing.T) {
 	}
 }
 
+func TestEvalEndpoint_List_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the ieval
+	eval := mock.Eval()
+
+	// Upsert eval triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertEvals(2, []*structs.Evaluation{eval}); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.EvalListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.EvalListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Evaluations) != 1 || resp.Evaluations[0].ID != eval.ID {
+		t.Fatalf("bad: %#v", resp.Evaluations)
+	}
+
+	// Eval deletion triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteEval(3, []string{eval.ID}, nil); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.EvalListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Evaluations) != 0 {
+		t.Fatalf("bad: %#v", resp2.Evaluations)
+	}
+}
+
 func TestEvalEndpoint_Allocations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 31dbd7bac..9a7a33273 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -407,6 +407,8 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 		}
 	}
 
+	tables := map[string]struct{}{"evals": struct{}{}}
+	txn.Defer(func() { s.watch.notifyTables(tables) })
 	txn.Commit()
 	return nil
 }
@@ -478,7 +480,12 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
 		return fmt.Errorf("index update failed: %v", err)
 	}
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+
+	tables := map[string]struct{}{"evals": struct{}{}}
+	txn.Defer(func() {
+		s.watch.notifyAllocs(nodes)
+		s.watch.notifyTables(tables)
+	})
 	txn.Commit()
 	return nil
 }

From b9fb0252007ebf669b46fb67170b9cbd0d239060 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 18:35:48 -0700
Subject: [PATCH 11/92] nomad: fix node test output

---
 nomad/job_endpoint_test.go  | 2 +-
 nomad/node_endpoint_test.go | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 8a9e5a1ee..0591e73bf 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -452,7 +452,7 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
 		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 91ae5d4fc..d06e6ea0f 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -807,7 +807,7 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
 		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
@@ -831,7 +831,7 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp3)
 	}
 	if resp3.Index != 4 {
 		t.Fatalf("Bad index: %d %d", resp3.Index, 4)
@@ -855,7 +855,7 @@ func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
-		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp4)
 	}
 	if resp4.Index != 5 {
 		t.Fatalf("Bad index: %d %d", resp4.Index, 5)

From b162c259d24ce6336c49477649829aef7b793dc1 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 28 Oct 2015 19:25:39 -0700
Subject: [PATCH 12/92] nomad: support full table watches for allocations

---
 nomad/alloc_endpoint.go      | 62 ++++++++++++++++++--------------
 nomad/alloc_endpoint_test.go | 69 ++++++++++++++++++++++++++++++++++++
 nomad/state/state_store.go   | 12 +++++--
 3 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index 53b630480..09bd28727 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -19,35 +19,45 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 	}
 	defer metrics.MeasureSince([]string{"nomad", "alloc", "list"}, time.Now())
 
-	// Capture all the allocations
-	snap, err := a.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	iter, err := snap.Allocs()
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts:   &args.QueryOptions,
+		queryMeta:   &reply.QueryMeta,
+		watchTables: []string{"allocs"},
+		run: func() error {
+			// Capture all the allocations
+			snap, err := a.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			iter, err := snap.Allocs()
+			if err != nil {
+				return err
+			}
 
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-		alloc := raw.(*structs.Allocation)
-		reply.Allocations = append(reply.Allocations, alloc.Stub())
-	}
+			var allocs []*structs.AllocListStub
+			for {
+				raw := iter.Next()
+				if raw == nil {
+					break
+				}
+				alloc := raw.(*structs.Allocation)
+				allocs = append(allocs, alloc.Stub())
+			}
+			reply.Allocations = allocs
 
-	// Use the last index that affected the jobs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the jobs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	a.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			a.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return a.srv.blockingRPC(&opts)
 }
 
 // GetAlloc is used to lookup a particular allocation
diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 8076b64d6..01688da96 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -3,6 +3,7 @@ package nomad
 import (
 	"reflect"
 	"testing"
+	"time"
 
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/mock"
@@ -44,6 +45,74 @@ func TestAllocEndpoint_List(t *testing.T) {
 	}
 }
 
+func TestAllocEndpoint_List_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the alloc
+	alloc := mock.Alloc()
+
+	// Upsert alloc triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertAllocs(2, []*structs.Allocation{alloc}); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.AllocListRequest{
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.AllocListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+
+	// Client updates trigger watches
+	alloc2 := mock.Alloc()
+	alloc2.ID = alloc.ID
+	alloc2.ClientStatus = structs.AllocClientStatusRunning
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpdateAllocFromClient(3, alloc2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.MinQueryIndex = 2
+	start = time.Now()
+	var resp2 structs.AllocListResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 3 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	}
+	if len(resp2.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID ||
+		resp2.Allocations[0].ClientStatus != structs.AllocClientStatusRunning {
+		t.Fatalf("bad: %#v", resp2.Allocations)
+	}
+}
+
 func TestAllocEndpoint_GetAlloc(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 9a7a33273..389991de9 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -580,8 +580,12 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
+	tables := map[string]struct{}{"allocs": struct{}{}}
 	nodes := map[string]struct{}{alloc.NodeID: struct{}{}}
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+	txn.Defer(func() {
+		s.watch.notifyAllocs(nodes)
+		s.watch.notifyTables(tables)
+	})
 	txn.Commit()
 	return nil
 }
@@ -621,7 +625,11 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notifyAllocs(nodes) })
+	tables := map[string]struct{}{"allocs": struct{}{}}
+	txn.Defer(func() {
+		s.watch.notifyAllocs(nodes)
+		s.watch.notifyTables(tables)
+	})
 	txn.Commit()
 	return nil
 }

From 2558ab3f31e989a49fe125f098cfb02423134f5b Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 11:57:41 -0700
Subject: [PATCH 13/92] nomad: unify watcher inputs for reusability

---
 nomad/alloc_endpoint.go    |   6 +-
 nomad/eval_endpoint.go     |   6 +-
 nomad/job_endpoint.go      |   6 +-
 nomad/node_endpoint.go     |  12 +--
 nomad/rpc.go               |  22 ++--
 nomad/state/state_store.go | 213 +++++++++++++------------------------
 6 files changed, 100 insertions(+), 165 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index 09bd28727..a2ce6a09a 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -21,9 +21,9 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"allocs"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "allocs",
 		run: func() error {
 			// Capture all the allocations
 			snap, err := a.srv.fsm.State().Snapshot()
diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 6e8e65054..5d87948aa 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -221,9 +221,9 @@ func (e *Eval) List(args *structs.EvalListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"evals"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "evals",
 		run: func() error {
 			// Scan all the evaluations
 			snap, err := e.srv.fsm.State().Snapshot()
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index ca6d59e1a..30bc35563 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -218,9 +218,9 @@ func (j *Job) List(args *structs.JobListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"jobs"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "jobs",
 		run: func() error {
 			// Capture all the jobs
 			snap, err := j.srv.fsm.State().Snapshot()
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 23c50de57..7e7332974 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -330,9 +330,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		allocWatch: args.NodeID,
+		queryOpts:      &args.QueryOptions,
+		queryMeta:      &reply.QueryMeta,
+		watchAllocNode: args.NodeID,
 		run: func() error {
 			// Look for the node
 			snap, err := n.srv.fsm.State().Snapshot()
@@ -406,9 +406,9 @@ func (n *Node) List(args *structs.NodeListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:   &args.QueryOptions,
-		queryMeta:   &reply.QueryMeta,
-		watchTables: []string{"nodes"},
+		queryOpts:  &args.QueryOptions,
+		queryMeta:  &reply.QueryMeta,
+		watchTable: "nodes",
 		run: func() error {
 			// Capture all the nodes
 			snap, err := n.srv.fsm.State().Snapshot()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index dcb120cc8..f1977dbc7 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -268,11 +268,11 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
 
 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts   *structs.QueryOptions
-	queryMeta   *structs.QueryMeta
-	allocWatch  string
-	watchTables []string
-	run         func() error
+	queryOpts      *structs.QueryOptions
+	queryMeta      *structs.QueryMeta
+	watchAllocNode string
+	watchTable     string
+	run            func() error
 }
 
 // blockingRPC is used for queries that need to wait for a
@@ -307,19 +307,15 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 	state = s.fsm.State()
 	defer func() {
 		timeout.Stop()
-		if opts.allocWatch != "" {
-			state.StopWatchAllocs(opts.allocWatch, notifyCh)
-		}
-		state.StopWatchTables(notifyCh, opts.watchTables...)
+		state.StopWatchAllocNode(opts.watchAllocNode, notifyCh)
+		state.StopWatchTable(opts.watchTable, notifyCh)
 	}()
 
 REGISTER_NOTIFY:
 	// Register the notification channel. This may be done
 	// multiple times if we have not reached the target wait index.
-	if opts.allocWatch != "" {
-		state.WatchAllocs(opts.allocWatch, notifyCh)
-	}
-	state.WatchTables(notifyCh, opts.watchTables...)
+	state.WatchAllocNode(opts.watchAllocNode, notifyCh)
+	state.WatchTable(opts.watchTable, notifyCh)
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 389991de9..0a8adc0d0 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -38,17 +38,11 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
 		return nil, fmt.Errorf("state store setup failed: %v", err)
 	}
 
-	// Create the watch entry
-	watch := &stateWatch{
-		allocs: make(map[string]*NotifyGroup),
-		tables: make(map[string]*NotifyGroup),
-	}
-
 	// Create the state store
 	s := &StateStore{
 		logger: log.New(logOutput, "", log.LstdFlags),
 		db:     db,
-		watch:  watch,
+		watch:  newStateWatch(),
 	}
 	return s, nil
 }
@@ -73,57 +67,30 @@ func (s *StateStore) Snapshot() (*StateSnapshot, error) {
 func (s *StateStore) Restore() (*StateRestore, error) {
 	txn := s.db.Txn(true)
 	r := &StateRestore{
-		txn:        txn,
-		watch:      s.watch,
-		allocNodes: make(map[string]struct{}),
-		tables:     make(map[string]struct{}),
+		txn:   txn,
+		watch: s.watch,
 	}
 	return r, nil
 }
 
-// WatchAllocs is used to subscribe a channel to changes in allocations for a node
-func (s *StateStore) WatchAllocs(node string, notify chan struct{}) {
-	s.watch.allocLock.Lock()
-	defer s.watch.allocLock.Unlock()
-
-	// Check for an existing notify group
-	if grp, ok := s.watch.allocs[node]; ok {
-		grp.Wait(notify)
-		return
-	}
-
-	// Create new notify group
-	grp := &NotifyGroup{}
-	grp.Wait(notify)
-	s.watch.allocs[node] = grp
+// WatchTable is used to subscribe a channel to a full table watch.
+func (s *StateStore) WatchTable(table string, notify chan struct{}) {
+	s.watch.watch(watchItem{table: table}, notify)
 }
 
-// StopWatchAllocs is used to unsubscribe a channel from changes in allocations
-func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) {
-	s.watch.allocLock.Lock()
-	defer s.watch.allocLock.Unlock()
-
-	// Check for an existing notify group
-	if grp, ok := s.watch.allocs[node]; ok {
-		grp.Clear(notify)
-		if grp.Empty() {
-			delete(s.watch.allocs, node)
-		}
-	}
+// StopWatchTable unsubscribes a channel from a full table watch.
+func (s *StateStore) StopWatchTable(table string, notify chan struct{}) {
+	s.watch.stopWatch(watchItem{table: table}, notify)
 }
 
-// WatchTables is used to subscribe a channel to a set of tables.
-func (s *StateStore) WatchTables(notify chan struct{}, tables ...string) {
-	for _, table := range tables {
-		s.watch.watchTable(table, notify)
-	}
+// WatchAllocNode is used to subscribe a channel to a node allocation watch.
+func (s *StateStore) WatchAllocNode(nodeID string, notify chan struct{}) {
+	s.watch.watch(watchItem{allocNode: nodeID}, notify)
 }
 
-// StopWatchTables is used to unsubscribe a channel from table watches.
-func (s *StateStore) StopWatchTables(notify chan struct{}, tables ...string) {
-	for _, table := range tables {
-		s.watch.stopWatchTable(table, notify)
-	}
+// StopWatchAllocNode unsubscribes a channel from a node allocation watch.
+func (s *StateStore) StopWatchAllocNode(nodeID string, notify chan struct{}) {
+	s.watch.stopWatch(watchItem{allocNode: nodeID}, notify)
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -158,8 +125,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -186,8 +152,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -223,8 +188,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -260,8 +224,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"nodes": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
 	txn.Commit()
 	return nil
 }
@@ -321,8 +284,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"jobs": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
 	txn.Commit()
 	return nil
 }
@@ -349,8 +311,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"jobs": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
 	txn.Commit()
 	return nil
 }
@@ -407,8 +368,7 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 		}
 	}
 
-	tables := map[string]struct{}{"evals": struct{}{}}
-	txn.Defer(func() { s.watch.notifyTables(tables) })
+	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
 	txn.Commit()
 	return nil
 }
@@ -444,7 +404,6 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	nodes := make(map[string]struct{})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -467,7 +426,6 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
-		nodes[existing.(*structs.Allocation).NodeID] = struct{}{}
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
@@ -481,11 +439,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"evals": struct{}{}}
-	txn.Defer(func() {
-		s.watch.notifyAllocs(nodes)
-		s.watch.notifyTables(tables)
-	})
+	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
 	txn.Commit()
 	return nil
 }
@@ -580,12 +534,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"allocs": struct{}{}}
-	nodes := map[string]struct{}{alloc.NodeID: struct{}{}}
-	txn.Defer(func() {
-		s.watch.notifyAllocs(nodes)
-		s.watch.notifyTables(tables)
-	})
+	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
 	txn.Commit()
 	return nil
 }
@@ -595,7 +544,6 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	nodes := make(map[string]struct{})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
@@ -614,7 +562,6 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 			alloc.ClientStatus = exist.ClientStatus
 			alloc.ClientDescription = exist.ClientDescription
 		}
-		nodes[alloc.NodeID] = struct{}{}
 		if err := txn.Insert("allocs", alloc); err != nil {
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
@@ -625,11 +572,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	tables := map[string]struct{}{"allocs": struct{}{}}
-	txn.Defer(func() {
-		s.watch.notifyAllocs(nodes)
-		s.watch.notifyTables(tables)
-	})
+	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
 	txn.Commit()
 	return nil
 }
@@ -760,10 +703,9 @@ type StateSnapshot struct {
 // restoring state by only using a single large transaction
 // instead of thousands of sub transactions
 type StateRestore struct {
-	txn        *memdb.Txn
-	watch      *stateWatch
-	allocNodes map[string]struct{}
-	tables     map[string]struct{}
+	txn   *memdb.Txn
+	watch *stateWatch
+	items []watchItem
 }
 
 // Abort is used to abort the restore operation
@@ -773,16 +715,13 @@ func (s *StateRestore) Abort() {
 
 // Commit is used to commit the restore operation
 func (s *StateRestore) Commit() {
-	s.txn.Defer(func() {
-		s.watch.notifyAllocs(s.allocNodes)
-		s.watch.notifyTables(s.tables)
-	})
+	s.txn.Defer(func() { s.watch.notify(s.items...) })
 	s.txn.Commit()
 }
 
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
-	r.tables["nodes"] = struct{}{}
+	r.items = append(r.items, watchItem{table: "nodes"})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -791,7 +730,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
-	r.tables["jobs"] = struct{}{}
+	r.items = append(r.items, watchItem{table: "jobs"})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -800,7 +739,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
-	r.tables["evals"] = struct{}{}
+	r.items = append(r.items, watchItem{table: "evals"})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -809,8 +748,8 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.tables["allocs"] = struct{}{}
-	r.allocNodes[alloc.NodeID] = struct{}{}
+	r.items = append(r.items, watchItem{table: "allocs"})
+	r.items = append(r.items, watchItem{allocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -825,65 +764,65 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	return nil
 }
 
+// watchItem describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing.
+type watchItem struct {
+	allocID   string
+	allocNode string
+	evalID    string
+	jobID     string
+	nodeID    string
+	table     string
+}
+
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {
-	// Allocation watches by node
-	allocs    map[string]*NotifyGroup
-	allocLock sync.Mutex
-
-	// Full table watches
-	tables    map[string]*NotifyGroup
-	tableLock sync.Mutex
+	items map[watchItem]*NotifyGroup
+	l     sync.Mutex
 }
 
-// watchTable is used to subscribe a channel to a full table watch.
-func (w *stateWatch) watchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watchItem]*NotifyGroup),
+	}
+}
 
-	tw, ok := w.tables[table]
+// watch subscribes a channel to the given watch item.
+func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	grp, ok := w.items[wi]
 	if !ok {
-		tw = new(NotifyGroup)
-		w.tables[table] = tw
+		grp = new(NotifyGroup)
+		w.items[wi] = grp
 	}
-	tw.Wait(ch)
+	grp.Wait(ch)
 }
 
-// stopWatchTable is used to unsubscribe a channel from a table watch.
-func (w *stateWatch) stopWatchTable(table string, ch chan struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
+// stopWatch unsubscribes a channel from the given watch item.
+func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
 
-	if tw, ok := w.tables[table]; ok {
-		tw.Clear(ch)
-		if tw.Empty() {
-			delete(w.tables, table)
+	if grp, ok := w.items[wi]; ok {
+		grp.Clear(ch)
+		if grp.Empty() {
+			delete(w.items, wi)
 		}
 	}
 }
 
-// notifyTables is used to notify watchers of the given tables.
-func (w *stateWatch) notifyTables(tables map[string]struct{}) {
-	w.tableLock.Lock()
-	defer w.tableLock.Unlock()
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items ...watchItem) {
+	w.l.Lock()
+	defer w.l.Unlock()
 
-	for table, _ := range tables {
-		if tw, ok := w.tables[table]; ok {
-			tw.Notify()
-		}
-	}
-}
-
-// notifyAllocs is used to notify any node alloc listeners of a change
-func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
-	w.allocLock.Lock()
-	defer w.allocLock.Unlock()
-
-	for node := range nodes {
-		if grp, ok := w.allocs[node]; ok {
+	for _, wi := range items {
+		if grp, ok := w.items[wi]; ok {
 			grp.Notify()
-			delete(w.allocs, node)
 		}
 	}
 }

From d9e593a0ae06d726566db9ce4e63a72bd15e0229 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 12:09:09 -0700
Subject: [PATCH 14/92] nomad: deduplicate watch items with a helper

---
 nomad/state/state_store.go | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 0a8adc0d0..60af402d3 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -69,6 +69,7 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 	r := &StateRestore{
 		txn:   txn,
 		watch: s.watch,
+		items: make(watchItems),
 	}
 	return r, nil
 }
@@ -705,7 +706,7 @@ type StateSnapshot struct {
 type StateRestore struct {
 	txn   *memdb.Txn
 	watch *stateWatch
-	items []watchItem
+	items watchItems
 }
 
 // Abort is used to abort the restore operation
@@ -715,13 +716,13 @@ func (s *StateRestore) Abort() {
 
 // Commit is used to commit the restore operation
 func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notify(s.items...) })
+	s.txn.Defer(func() { s.watch.notify(s.items.items()...) })
 	s.txn.Commit()
 }
 
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
-	r.items = append(r.items, watchItem{table: "nodes"})
+	r.items.add(watchItem{table: "nodes"})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -730,7 +731,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
-	r.items = append(r.items, watchItem{table: "jobs"})
+	r.items.add(watchItem{table: "jobs"})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -739,7 +740,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
-	r.items = append(r.items, watchItem{table: "evals"})
+	r.items.add(watchItem{table: "evals"})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -748,8 +749,8 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.items = append(r.items, watchItem{table: "allocs"})
-	r.items = append(r.items, watchItem{allocNode: alloc.NodeID})
+	r.items.add(watchItem{table: "allocs"})
+	r.items.add(watchItem{allocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -775,6 +776,24 @@ type watchItem struct {
 	table     string
 }
 
+// watchItems is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type watchItems map[watchItem]struct{}
+
+// add adds an item to the watch set.
+func (w watchItems) add(wi watchItem) {
+	w[wi] = struct{}{}
+}
+
+// items returns the items as a slice.
+func (w watchItems) items() []watchItem {
+	items := make([]watchItem, 0, len(w))
+	for wi, _ := range w {
+		items = append(items, wi)
+	}
+	return items
+}
+
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {

From 31abf97e06ae51edcdc7b09aa8cee8dadf087ea2 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 13:21:25 -0700
Subject: [PATCH 15/92] nomad: move state watcher into its own file, add tests

---
 nomad/state/state_store.go      | 92 +++------------------------------
 nomad/state/state_store_test.go | 10 ++--
 nomad/state/watch.go            | 86 ++++++++++++++++++++++++++++++
 nomad/state/watch_test.go       | 64 +++++++++++++++++++++++
 4 files changed, 163 insertions(+), 89 deletions(-)
 create mode 100644 nomad/state/watch.go
 create mode 100644 nomad/state/watch_test.go

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 60af402d3..685233447 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"io"
 	"log"
-	"sync"
 
 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -405,6 +404,8 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+	watch := make(watchItems)
+	watch.add(watchItem{table: "evals"})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -427,6 +428,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
+		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
@@ -440,7 +442,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
+	txn.Defer(func() { s.watch.notify(watch.items()...) })
 	txn.Commit()
 	return nil
 }
@@ -545,9 +547,12 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+	watch := make(watchItems)
+	watch.add(watchItem{table: "allocs"})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
+		watch.add(watchItem{allocNode: alloc.NodeID})
 		existing, err := txn.First("allocs", "id", alloc.ID)
 		if err != nil {
 			return fmt.Errorf("alloc lookup failed: %v", err)
@@ -573,7 +578,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
+	txn.Defer(func() { s.watch.notify(watch.items()...) })
 	txn.Commit()
 	return nil
 }
@@ -764,84 +769,3 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
-
-// watchItem describes the scope of a watch. It is used to provide a uniform
-// input for subscribe/unsubscribe and notification firing.
-type watchItem struct {
-	allocID   string
-	allocNode string
-	evalID    string
-	jobID     string
-	nodeID    string
-	table     string
-}
-
-// watchItems is a helper used to construct a set of watchItems. It deduplicates
-// the items as they are added using map keys.
-type watchItems map[watchItem]struct{}
-
-// add adds an item to the watch set.
-func (w watchItems) add(wi watchItem) {
-	w[wi] = struct{}{}
-}
-
-// items returns the items as a slice.
-func (w watchItems) items() []watchItem {
-	items := make([]watchItem, 0, len(w))
-	for wi, _ := range w {
-		items = append(items, wi)
-	}
-	return items
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	items map[watchItem]*NotifyGroup
-	l     sync.Mutex
-}
-
-// newStateWatch creates a new stateWatch for change notification.
-func newStateWatch() *stateWatch {
-	return &stateWatch{
-		items: make(map[watchItem]*NotifyGroup),
-	}
-}
-
-// watch subscribes a channel to the given watch item.
-func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	grp, ok := w.items[wi]
-	if !ok {
-		grp = new(NotifyGroup)
-		w.items[wi] = grp
-	}
-	grp.Wait(ch)
-}
-
-// stopWatch unsubscribes a channel from the given watch item.
-func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	if grp, ok := w.items[wi]; ok {
-		grp.Clear(ch)
-		if grp.Empty() {
-			delete(w.items, wi)
-		}
-	}
-}
-
-// notify is used to fire notifications on the given watch items.
-func (w *stateWatch) notify(items ...watchItem) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	for _, wi := range items {
-		if grp, ok := w.items[wi]; ok {
-			grp.Notify()
-		}
-	}
-}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 1c4b60238..58f8093bf 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -585,7 +585,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	notify1 := make(chan struct{}, 1)
-	state.WatchAllocs(alloc.NodeID, notify1)
+	state.WatchAllocNode(alloc.NodeID, notify1)
 
 	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
 	if err != nil {
@@ -808,14 +808,14 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	}
 }
 
-func TestStateStore_WatchAllocs(t *testing.T) {
+func TestStateStore_WatchAllocNode(t *testing.T) {
 	state := testStateStore(t)
 
 	notify1 := make(chan struct{}, 1)
 	notify2 := make(chan struct{}, 1)
-	state.WatchAllocs("foo", notify1)
-	state.WatchAllocs("foo", notify2)
-	state.StopWatchAllocs("foo", notify2)
+	state.WatchAllocNode("foo", notify1)
+	state.WatchAllocNode("foo", notify2)
+	state.StopWatchAllocNode("foo", notify2)
 
 	alloc := mock.Alloc()
 	alloc.NodeID = "foo"
diff --git a/nomad/state/watch.go b/nomad/state/watch.go
new file mode 100644
index 000000000..c0e529b28
--- /dev/null
+++ b/nomad/state/watch.go
@@ -0,0 +1,86 @@
+package state
+
+import (
+	"sync"
+)
+
+// watchItem describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing.
+type watchItem struct {
+	allocID   string
+	allocNode string
+	evalID    string
+	jobID     string
+	nodeID    string
+	table     string
+}
+
+// watchItems is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type watchItems map[watchItem]struct{}
+
+// add adds an item to the watch set.
+func (w watchItems) add(wi watchItem) {
+	w[wi] = struct{}{}
+}
+
+// items returns the items as a slice.
+func (w watchItems) items() []watchItem {
+	items := make([]watchItem, 0, len(w))
+	for wi, _ := range w {
+		items = append(items, wi)
+	}
+	return items
+}
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	items map[watchItem]*NotifyGroup
+	l     sync.Mutex
+}
+
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watchItem]*NotifyGroup),
+	}
+}
+
+// watch subscribes a channel to the given watch item.
+func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	grp, ok := w.items[wi]
+	if !ok {
+		grp = new(NotifyGroup)
+		w.items[wi] = grp
+	}
+	grp.Wait(ch)
+}
+
+// stopWatch unsubscribes a channel from the given watch item.
+func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	if grp, ok := w.items[wi]; ok {
+		grp.Clear(ch)
+		if grp.Empty() {
+			delete(w.items, wi)
+		}
+	}
+}
+
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items ...watchItem) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for _, wi := range items {
+		if grp, ok := w.items[wi]; ok {
+			grp.Notify()
+		}
+	}
+}
diff --git a/nomad/state/watch_test.go b/nomad/state/watch_test.go
new file mode 100644
index 000000000..5992b65ee
--- /dev/null
+++ b/nomad/state/watch_test.go
@@ -0,0 +1,64 @@
+package state
+
+import (
+	"testing"
+)
+
+func TestWatchItems(t *testing.T) {
+	// No items returns empty slice
+	wi := make(watchItems)
+	if items := wi.items(); len(items) != 0 {
+		t.Fatalf("expected empty, got: %#v", items)
+	}
+
+	// Adding items works
+	wi.add(watchItem{table: "foo"})
+	wi.add(watchItem{nodeID: "bar"})
+	if items := wi.items(); len(items) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", items)
+	}
+
+	// Adding duplicates auto-dedupes
+	wi.add(watchItem{table: "foo"})
+	if items := wi.items(); len(items) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", items)
+	}
+}
+
+func TestStateWatch_watch(t *testing.T) {
+	watch := newStateWatch()
+	notify1 := make(chan struct{}, 1)
+	notify2 := make(chan struct{}, 1)
+	notify3 := make(chan struct{}, 1)
+
+	// Notifications trigger subscribed channels
+	watch.watch(watchItem{table: "foo"}, notify1)
+	watch.watch(watchItem{table: "bar"}, notify2)
+	watch.watch(watchItem{table: "baz"}, notify3)
+
+	watch.notify(watchItem{table: "foo"}, watchItem{table: "bar"})
+	if len(notify1) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify2) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify3) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
+func TestStateWatch_stopWatch(t *testing.T) {
+	watch := newStateWatch()
+	notify := make(chan struct{})
+
+	// First subscribe
+	watch.watch(watchItem{table: "foo"}, notify)
+
+	// Unsubscribe stop notifications
+	watch.stopWatch(watchItem{table: "foo"}, notify)
+	watch.notify(watchItem{table: "foo"})
+	if len(notify) != 0 {
+		t.Fatalf("should not notify")
+	}
+}

From d9a77e0257fbac9decd70c4d4bd3fbb1652d6b3f Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 13:52:15 -0700
Subject: [PATCH 16/92] nomad: add triggering for more types of events

---
 nomad/state/state_store.go | 67 ++++++++++++++++++++++++++++++--------
 nomad/state/watch.go       | 29 ++++++++---------
 nomad/state/watch_test.go  | 25 +++++++-------
 3 files changed, 81 insertions(+), 40 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 685233447..0895b77e8 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -100,6 +100,10 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: node.ID})
+
 	// Check if the node already exists
 	existing, err := txn.First("nodes", "id", node.ID)
 	if err != nil {
@@ -125,7 +129,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -135,6 +139,10 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -152,7 +160,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -162,6 +170,10 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -188,7 +200,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -198,6 +210,10 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "nodes"})
+	watch.add(watchItem{node: nodeID})
+
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
 	if err != nil {
@@ -224,7 +240,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "nodes"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -261,6 +277,10 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "jobs"})
+	watch.add(watchItem{job: job.ID})
+
 	// Check if the job already exists
 	existing, err := txn.First("jobs", "id", job.ID)
 	if err != nil {
@@ -284,7 +304,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -294,6 +314,10 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "jobs"})
+	watch.add(watchItem{job: jobID})
+
 	// Lookup the node
 	existing, err := txn.First("jobs", "id", jobID)
 	if err != nil {
@@ -311,7 +335,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "jobs"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -361,14 +385,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "evals"})
+
 	// Do a nested upsert
 	for _, eval := range evals {
+		watch.add(watchItem{eval: eval.ID})
 		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
 			return err
 		}
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "evals"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -405,7 +433,6 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 	watch := make(watchItems)
-	watch.add(watchItem{table: "evals"})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -418,6 +445,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("evals", existing); err != nil {
 			return fmt.Errorf("eval delete failed: %v", err)
 		}
+		watch.add(watchItem{table: "evals"})
+		watch.add(watchItem{eval: eval})
 	}
 
 	for _, alloc := range allocs {
@@ -428,10 +457,12 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if existing == nil {
 			continue
 		}
-		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
+		watch.add(watchItem{table: "allocs"})
+		watch.add(watchItem{alloc: alloc})
+		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
 	}
 
 	// Update the indexes
@@ -442,7 +473,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch.items()...) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -504,6 +535,11 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
+	watch := make(watchItems)
+	watch.add(watchItem{table: "allocs"})
+	watch.add(watchItem{alloc: alloc.ID})
+	watch.add(watchItem{allocNode: alloc.NodeID})
+
 	// Look for existing alloc
 	existing, err := txn.First("allocs", "id", alloc.ID)
 	if err != nil {
@@ -537,7 +573,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watchItem{table: "allocs"}) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -547,12 +583,12 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+
 	watch := make(watchItems)
 	watch.add(watchItem{table: "allocs"})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
-		watch.add(watchItem{allocNode: alloc.NodeID})
 		existing, err := txn.First("allocs", "id", alloc.ID)
 		if err != nil {
 			return fmt.Errorf("alloc lookup failed: %v", err)
@@ -571,6 +607,9 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		if err := txn.Insert("allocs", alloc); err != nil {
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
+
+		watch.add(watchItem{alloc: alloc.ID})
+		watch.add(watchItem{allocNode: alloc.NodeID})
 	}
 
 	// Update the indexes
@@ -578,7 +617,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch.items()...) })
+	txn.Defer(func() { s.watch.notify(watch) })
 	txn.Commit()
 	return nil
 }
@@ -721,7 +760,7 @@ func (s *StateRestore) Abort() {
 
 // Commit is used to commit the restore operation
 func (s *StateRestore) Commit() {
-	s.txn.Defer(func() { s.watch.notify(s.items.items()...) })
+	s.txn.Defer(func() { s.watch.notify(s.items) })
 	s.txn.Commit()
 }
 
diff --git a/nomad/state/watch.go b/nomad/state/watch.go
index c0e529b28..124eb0612 100644
--- a/nomad/state/watch.go
+++ b/nomad/state/watch.go
@@ -7,11 +7,11 @@ import (
 // watchItem describes the scope of a watch. It is used to provide a uniform
 // input for subscribe/unsubscribe and notification firing.
 type watchItem struct {
-	allocID   string
+	alloc     string
 	allocNode string
-	evalID    string
-	jobID     string
-	nodeID    string
+	eval      string
+	job       string
+	node      string
 	table     string
 }
 
@@ -19,20 +19,19 @@ type watchItem struct {
 // the items as they are added using map keys.
 type watchItems map[watchItem]struct{}
 
+func newWatchItems(items ...watchItem) watchItems {
+	wi := make(watchItems)
+	for _, item := range items {
+		wi.add(item)
+	}
+	return wi
+}
+
 // add adds an item to the watch set.
 func (w watchItems) add(wi watchItem) {
 	w[wi] = struct{}{}
 }
 
-// items returns the items as a slice.
-func (w watchItems) items() []watchItem {
-	items := make([]watchItem, 0, len(w))
-	for wi, _ := range w {
-		items = append(items, wi)
-	}
-	return items
-}
-
 // stateWatch holds shared state for watching updates. This is
 // outside of StateStore so it can be shared with snapshots.
 type stateWatch struct {
@@ -74,11 +73,11 @@ func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
 }
 
 // notify is used to fire notifications on the given watch items.
-func (w *stateWatch) notify(items ...watchItem) {
+func (w *stateWatch) notify(items watchItems) {
 	w.l.Lock()
 	defer w.l.Unlock()
 
-	for _, wi := range items {
+	for wi, _ := range items {
 		if grp, ok := w.items[wi]; ok {
 			grp.Notify()
 		}
diff --git a/nomad/state/watch_test.go b/nomad/state/watch_test.go
index 5992b65ee..aad53d798 100644
--- a/nomad/state/watch_test.go
+++ b/nomad/state/watch_test.go
@@ -5,23 +5,19 @@ import (
 )
 
 func TestWatchItems(t *testing.T) {
-	// No items returns empty slice
 	wi := make(watchItems)
-	if items := wi.items(); len(items) != 0 {
-		t.Fatalf("expected empty, got: %#v", items)
-	}
 
 	// Adding items works
 	wi.add(watchItem{table: "foo"})
-	wi.add(watchItem{nodeID: "bar"})
-	if items := wi.items(); len(items) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", items)
+	wi.add(watchItem{node: "bar"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
 	}
 
 	// Adding duplicates auto-dedupes
 	wi.add(watchItem{table: "foo"})
-	if items := wi.items(); len(items) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", items)
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
 	}
 }
 
@@ -36,7 +32,11 @@ func TestStateWatch_watch(t *testing.T) {
 	watch.watch(watchItem{table: "bar"}, notify2)
 	watch.watch(watchItem{table: "baz"}, notify3)
 
-	watch.notify(watchItem{table: "foo"}, watchItem{table: "bar"})
+	items := make(watchItems)
+	items.add(watchItem{table: "foo"})
+	items.add(watchItem{table: "bar"})
+
+	watch.notify(items)
 	if len(notify1) != 1 {
 		t.Fatalf("should notify")
 	}
@@ -57,7 +57,10 @@ func TestStateWatch_stopWatch(t *testing.T) {
 
 	// Unsubscribe stop notifications
 	watch.stopWatch(watchItem{table: "foo"}, notify)
-	watch.notify(watchItem{table: "foo"})
+
+	items := make(watchItems)
+	items.add(watchItem{table: "foo"})
+	watch.notify(items)
 	if len(notify) != 0 {
 		t.Fatalf("should not notify")
 	}

From 573e9dfb9abfb15c51602797a0c0ab05fca4ddf2 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 14:47:39 -0700
Subject: [PATCH 17/92] nomad: export watcher to share between rpc and state
 store

---
 nomad/alloc_endpoint.go         |   7 +-
 nomad/eval_endpoint.go          |   7 +-
 nomad/job_endpoint.go           |   7 +-
 nomad/node_endpoint.go          |  13 +--
 nomad/rpc.go                    |  16 ++-
 nomad/state/state_store.go      | 182 ++++++++++++++++++++------------
 nomad/state/state_store_test.go |  56 +++++++++-
 nomad/state/watch.go            |  85 ---------------
 nomad/state/watch_test.go       |  67 ------------
 nomad/watch/watch.go            |  33 ++++++
 nomad/watch/watch_test.go       |  31 ++++++
 11 files changed, 255 insertions(+), 249 deletions(-)
 delete mode 100644 nomad/state/watch.go
 delete mode 100644 nomad/state/watch_test.go
 create mode 100644 nomad/watch/watch.go
 create mode 100644 nomad/watch/watch_test.go

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index a2ce6a09a..f3b9dbdc4 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -5,6 +5,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // Alloc endpoint is used for manipulating allocations
@@ -21,9 +22,9 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "allocs",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "allocs"}),
 		run: func() error {
 			// Capture all the allocations
 			snap, err := a.srv.fsm.State().Snapshot()
diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 5d87948aa..07dfc18fe 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 const (
@@ -221,9 +222,9 @@ func (e *Eval) List(args *structs.EvalListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "evals",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "evals"}),
 		run: func() error {
 			// Scan all the evaluations
 			snap, err := e.srv.fsm.State().Snapshot()
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 30bc35563..ff296a986 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // Job endpoint is used for job interactions
@@ -218,9 +219,9 @@ func (j *Job) List(args *structs.JobListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "jobs",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "jobs"}),
 		run: func() error {
 			// Capture all the jobs
 			snap, err := j.srv.fsm.State().Snapshot()
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 7e7332974..65a83e1b1 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // Node endpoint is used for client interactions
@@ -330,9 +331,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:      &args.QueryOptions,
-		queryMeta:      &reply.QueryMeta,
-		watchAllocNode: args.NodeID,
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocNode: args.NodeID}),
 		run: func() error {
 			// Look for the node
 			snap, err := n.srv.fsm.State().Snapshot()
@@ -406,9 +407,9 @@ func (n *Node) List(args *structs.NodeListRequest,
 
 	// Setup the blocking query
 	opts := blockingOptions{
-		queryOpts:  &args.QueryOptions,
-		queryMeta:  &reply.QueryMeta,
-		watchTable: "nodes",
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Table: "nodes"}),
 		run: func() error {
 			// Capture all the nodes
 			snap, err := n.srv.fsm.State().Snapshot()
diff --git a/nomad/rpc.go b/nomad/rpc.go
index f1977dbc7..21f9c9dc6 100644
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -13,6 +13,7 @@ import (
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 	"github.com/hashicorp/raft"
 	"github.com/hashicorp/yamux"
 )
@@ -268,11 +269,10 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
 
 // blockingOptions is used to parameterize blockingRPC
 type blockingOptions struct {
-	queryOpts      *structs.QueryOptions
-	queryMeta      *structs.QueryMeta
-	watchAllocNode string
-	watchTable     string
-	run            func() error
+	queryOpts *structs.QueryOptions
+	queryMeta *structs.QueryMeta
+	watch     watch.Items
+	run       func() error
 }
 
 // blockingRPC is used for queries that need to wait for a
@@ -307,15 +307,13 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
 	state = s.fsm.State()
 	defer func() {
 		timeout.Stop()
-		state.StopWatchAllocNode(opts.watchAllocNode, notifyCh)
-		state.StopWatchTable(opts.watchTable, notifyCh)
+		state.StopWatch(opts.watch, notifyCh)
 	}()
 
 REGISTER_NOTIFY:
 	// Register the notification channel. This may be done
 	// multiple times if we have not reached the target wait index.
-	state.WatchAllocNode(opts.watchAllocNode, notifyCh)
-	state.WatchTable(opts.watchTable, notifyCh)
+	state.Watch(opts.watch, notifyCh)
 
 RUN_QUERY:
 	// Update the query meta data
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 0895b77e8..17484f985 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4,9 +4,11 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"sync"
 
 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 // IndexEntry is used with the "index" table
@@ -68,29 +70,21 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 	r := &StateRestore{
 		txn:   txn,
 		watch: s.watch,
-		items: make(watchItems),
+		items: watch.NewItems(),
 	}
 	return r, nil
 }
 
-// WatchTable is used to subscribe a channel to a full table watch.
-func (s *StateStore) WatchTable(table string, notify chan struct{}) {
-	s.watch.watch(watchItem{table: table}, notify)
+func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
+	for wi, _ := range items {
+		s.watch.watch(wi, notify)
+	}
 }
 
-// StopWatchTable unsubscribes a channel from a full table watch.
-func (s *StateStore) StopWatchTable(table string, notify chan struct{}) {
-	s.watch.stopWatch(watchItem{table: table}, notify)
-}
-
-// WatchAllocNode is used to subscribe a channel to a node allocation watch.
-func (s *StateStore) WatchAllocNode(nodeID string, notify chan struct{}) {
-	s.watch.watch(watchItem{allocNode: nodeID}, notify)
-}
-
-// StopWatchAllocNode unsubscribes a channel from a node allocation watch.
-func (s *StateStore) StopWatchAllocNode(nodeID string, notify chan struct{}) {
-	s.watch.stopWatch(watchItem{allocNode: nodeID}, notify)
+func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
+	for wi, _ := range items {
+		s.watch.stopWatch(wi, notify)
+	}
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -100,9 +94,9 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: node.ID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: node.ID})
 
 	// Check if the node already exists
 	existing, err := txn.First("nodes", "id", node.ID)
@@ -129,7 +123,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -139,9 +133,9 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: nodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
 
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
@@ -160,7 +154,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -170,9 +164,9 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: nodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
 
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
@@ -200,7 +194,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -210,9 +204,9 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "nodes"})
-	watch.add(watchItem{node: nodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "nodes"})
+	watcher.Add(watch.Item{Node: nodeID})
 
 	// Lookup the node
 	existing, err := txn.First("nodes", "id", nodeID)
@@ -240,7 +234,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -277,9 +271,9 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "jobs"})
-	watch.add(watchItem{job: job.ID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "jobs"})
+	watcher.Add(watch.Item{Job: job.ID})
 
 	// Check if the job already exists
 	existing, err := txn.First("jobs", "id", job.ID)
@@ -304,7 +298,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -314,9 +308,9 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "jobs"})
-	watch.add(watchItem{job: jobID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "jobs"})
+	watcher.Add(watch.Item{Job: jobID})
 
 	// Lookup the node
 	existing, err := txn.First("jobs", "id", jobID)
@@ -335,7 +329,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -385,18 +379,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "evals"})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "evals"})
 
 	// Do a nested upsert
 	for _, eval := range evals {
-		watch.add(watchItem{eval: eval.ID})
+		watcher.Add(watch.Item{Eval: eval.ID})
 		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
 			return err
 		}
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -432,7 +426,9 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
-	watch := make(watchItems)
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "evals"})
+	watcher.Add(watch.Item{Table: "allocs"})
 
 	for _, eval := range evals {
 		existing, err := txn.First("evals", "id", eval)
@@ -445,8 +441,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("evals", existing); err != nil {
 			return fmt.Errorf("eval delete failed: %v", err)
 		}
-		watch.add(watchItem{table: "evals"})
-		watch.add(watchItem{eval: eval})
+		watcher.Add(watch.Item{Eval: eval})
 	}
 
 	for _, alloc := range allocs {
@@ -460,9 +455,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
-		watch.add(watchItem{table: "allocs"})
-		watch.add(watchItem{alloc: alloc})
-		watch.add(watchItem{allocNode: existing.(*structs.Allocation).NodeID})
+		watcher.Add(watch.Item{Alloc: alloc})
+		watcher.Add(watch.Item{AllocNode: existing.(*structs.Allocation).NodeID})
 	}
 
 	// Update the indexes
@@ -473,7 +467,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -535,10 +529,10 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "allocs"})
-	watch.add(watchItem{alloc: alloc.ID})
-	watch.add(watchItem{allocNode: alloc.NodeID})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "allocs"})
+	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 
 	// Look for existing alloc
 	existing, err := txn.First("allocs", "id", alloc.ID)
@@ -573,7 +567,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -584,8 +578,8 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 	txn := s.db.Txn(true)
 	defer txn.Abort()
 
-	watch := make(watchItems)
-	watch.add(watchItem{table: "allocs"})
+	watcher := watch.NewItems()
+	watcher.Add(watch.Item{Table: "allocs"})
 
 	// Handle the allocations
 	for _, alloc := range allocs {
@@ -608,8 +602,8 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 			return fmt.Errorf("alloc insert failed: %v", err)
 		}
 
-		watch.add(watchItem{alloc: alloc.ID})
-		watch.add(watchItem{allocNode: alloc.NodeID})
+		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}
 
 	// Update the indexes
@@ -617,7 +611,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		return fmt.Errorf("index update failed: %v", err)
 	}
 
-	txn.Defer(func() { s.watch.notify(watch) })
+	txn.Defer(func() { s.watch.notify(watcher) })
 	txn.Commit()
 	return nil
 }
@@ -750,7 +744,7 @@ type StateSnapshot struct {
 type StateRestore struct {
 	txn   *memdb.Txn
 	watch *stateWatch
-	items watchItems
+	items watch.Items
 }
 
 // Abort is used to abort the restore operation
@@ -766,7 +760,7 @@ func (s *StateRestore) Commit() {
 
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
-	r.items.add(watchItem{table: "nodes"})
+	r.items.Add(watch.Item{Table: "nodes"})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -775,7 +769,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
-	r.items.add(watchItem{table: "jobs"})
+	r.items.Add(watch.Item{Table: "jobs"})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -784,7 +778,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
-	r.items.add(watchItem{table: "evals"})
+	r.items.Add(watch.Item{Table: "evals"})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
@@ -793,8 +787,8 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
-	r.items.add(watchItem{table: "allocs"})
-	r.items.add(watchItem{allocNode: alloc.NodeID})
+	r.items.Add(watch.Item{Table: "allocs"})
+	r.items.Add(watch.Item{AllocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
 	}
@@ -808,3 +802,55 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
 	}
 	return nil
 }
+
+// stateWatch holds shared state for watching updates. This is
+// outside of StateStore so it can be shared with snapshots.
+type stateWatch struct {
+	items map[watch.Item]*NotifyGroup
+	l     sync.Mutex
+}
+
+// newStateWatch creates a new stateWatch for change notification.
+func newStateWatch() *stateWatch {
+	return &stateWatch{
+		items: make(map[watch.Item]*NotifyGroup),
+	}
+}
+
+// watch subscribes a channel to the given watch item.
+func (w *stateWatch) watch(wi watch.Item, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	grp, ok := w.items[wi]
+	if !ok {
+		grp = new(NotifyGroup)
+		w.items[wi] = grp
+	}
+	grp.Wait(ch)
+}
+
+// stopWatch unsubscribes a channel from the given watch item.
+func (w *stateWatch) stopWatch(wi watch.Item, ch chan struct{}) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	if grp, ok := w.items[wi]; ok {
+		grp.Clear(ch)
+		if grp.Empty() {
+			delete(w.items, wi)
+		}
+	}
+}
+
+// notify is used to fire notifications on the given watch items.
+func (w *stateWatch) notify(items watch.Items) {
+	w.l.Lock()
+	defer w.l.Unlock()
+
+	for wi, _ := range items {
+		if grp, ok := w.items[wi]; ok {
+			grp.Notify()
+		}
+	}
+}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 58f8093bf..2d1134d9c 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/nomad/watch"
 )
 
 func testStateStore(t *testing.T) *StateStore {
@@ -585,7 +586,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	notify1 := make(chan struct{}, 1)
-	state.WatchAllocNode(alloc.NodeID, notify1)
+	state.Watch(watch.NewItems(watch.Item{AllocNode: alloc.NodeID}), notify1)
 
 	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
 	if err != nil {
@@ -808,14 +809,16 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	}
 }
 
-func TestStateStore_WatchAllocNode(t *testing.T) {
+func TestStateStore_Watch(t *testing.T) {
 	state := testStateStore(t)
 
 	notify1 := make(chan struct{}, 1)
 	notify2 := make(chan struct{}, 1)
-	state.WatchAllocNode("foo", notify1)
-	state.WatchAllocNode("foo", notify2)
-	state.StopWatchAllocNode("foo", notify2)
+
+	items := watch.NewItems(watch.Item{AllocNode: "foo"})
+	state.Watch(items, notify1)
+	state.Watch(items, notify2)
+	state.StopWatch(items, notify2)
 
 	alloc := mock.Alloc()
 	alloc.NodeID = "foo"
@@ -1032,6 +1035,49 @@ func TestStateStore_RestoreAlloc(t *testing.T) {
 	}
 }
 
+func TestStateWatch_watch(t *testing.T) {
+	sw := newStateWatch()
+	notify1 := make(chan struct{}, 1)
+	notify2 := make(chan struct{}, 1)
+	notify3 := make(chan struct{}, 1)
+
+	// Notifications trigger subscribed channels
+	sw.watch(watch.Item{Table: "foo"}, notify1)
+	sw.watch(watch.Item{Table: "bar"}, notify2)
+	sw.watch(watch.Item{Table: "baz"}, notify3)
+
+	items := watch.NewItems()
+	items.Add(watch.Item{Table: "foo"})
+	items.Add(watch.Item{Table: "bar"})
+
+	sw.notify(items)
+	if len(notify1) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify2) != 1 {
+		t.Fatalf("should notify")
+	}
+	if len(notify3) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
+func TestStateWatch_stopWatch(t *testing.T) {
+	sw := newStateWatch()
+	notify := make(chan struct{})
+
+	// First subscribe
+	sw.watch(watch.Item{Table: "foo"}, notify)
+
+	// Unsubscribe stop notifications
+	sw.stopWatch(watch.Item{Table: "foo"}, notify)
+
+	sw.notify(watch.NewItems(watch.Item{Table: "foo"}))
+	if len(notify) != 0 {
+		t.Fatalf("should not notify")
+	}
+}
+
 // NodeIDSort is used to sort nodes by ID
 type NodeIDSort []*structs.Node
 
diff --git a/nomad/state/watch.go b/nomad/state/watch.go
deleted file mode 100644
index 124eb0612..000000000
--- a/nomad/state/watch.go
+++ /dev/null
@@ -1,85 +0,0 @@
-package state
-
-import (
-	"sync"
-)
-
-// watchItem describes the scope of a watch. It is used to provide a uniform
-// input for subscribe/unsubscribe and notification firing.
-type watchItem struct {
-	alloc     string
-	allocNode string
-	eval      string
-	job       string
-	node      string
-	table     string
-}
-
-// watchItems is a helper used to construct a set of watchItems. It deduplicates
-// the items as they are added using map keys.
-type watchItems map[watchItem]struct{}
-
-func newWatchItems(items ...watchItem) watchItems {
-	wi := make(watchItems)
-	for _, item := range items {
-		wi.add(item)
-	}
-	return wi
-}
-
-// add adds an item to the watch set.
-func (w watchItems) add(wi watchItem) {
-	w[wi] = struct{}{}
-}
-
-// stateWatch holds shared state for watching updates. This is
-// outside of StateStore so it can be shared with snapshots.
-type stateWatch struct {
-	items map[watchItem]*NotifyGroup
-	l     sync.Mutex
-}
-
-// newStateWatch creates a new stateWatch for change notification.
-func newStateWatch() *stateWatch {
-	return &stateWatch{
-		items: make(map[watchItem]*NotifyGroup),
-	}
-}
-
-// watch subscribes a channel to the given watch item.
-func (w *stateWatch) watch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	grp, ok := w.items[wi]
-	if !ok {
-		grp = new(NotifyGroup)
-		w.items[wi] = grp
-	}
-	grp.Wait(ch)
-}
-
-// stopWatch unsubscribes a channel from the given watch item.
-func (w *stateWatch) stopWatch(wi watchItem, ch chan struct{}) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	if grp, ok := w.items[wi]; ok {
-		grp.Clear(ch)
-		if grp.Empty() {
-			delete(w.items, wi)
-		}
-	}
-}
-
-// notify is used to fire notifications on the given watch items.
-func (w *stateWatch) notify(items watchItems) {
-	w.l.Lock()
-	defer w.l.Unlock()
-
-	for wi, _ := range items {
-		if grp, ok := w.items[wi]; ok {
-			grp.Notify()
-		}
-	}
-}
diff --git a/nomad/state/watch_test.go b/nomad/state/watch_test.go
deleted file mode 100644
index aad53d798..000000000
--- a/nomad/state/watch_test.go
+++ /dev/null
@@ -1,67 +0,0 @@
-package state
-
-import (
-	"testing"
-)
-
-func TestWatchItems(t *testing.T) {
-	wi := make(watchItems)
-
-	// Adding items works
-	wi.add(watchItem{table: "foo"})
-	wi.add(watchItem{node: "bar"})
-	if len(wi) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", wi)
-	}
-
-	// Adding duplicates auto-dedupes
-	wi.add(watchItem{table: "foo"})
-	if len(wi) != 2 {
-		t.Fatalf("expected 2 items, got: %#v", wi)
-	}
-}
-
-func TestStateWatch_watch(t *testing.T) {
-	watch := newStateWatch()
-	notify1 := make(chan struct{}, 1)
-	notify2 := make(chan struct{}, 1)
-	notify3 := make(chan struct{}, 1)
-
-	// Notifications trigger subscribed channels
-	watch.watch(watchItem{table: "foo"}, notify1)
-	watch.watch(watchItem{table: "bar"}, notify2)
-	watch.watch(watchItem{table: "baz"}, notify3)
-
-	items := make(watchItems)
-	items.add(watchItem{table: "foo"})
-	items.add(watchItem{table: "bar"})
-
-	watch.notify(items)
-	if len(notify1) != 1 {
-		t.Fatalf("should notify")
-	}
-	if len(notify2) != 1 {
-		t.Fatalf("should notify")
-	}
-	if len(notify3) != 0 {
-		t.Fatalf("should not notify")
-	}
-}
-
-func TestStateWatch_stopWatch(t *testing.T) {
-	watch := newStateWatch()
-	notify := make(chan struct{})
-
-	// First subscribe
-	watch.watch(watchItem{table: "foo"}, notify)
-
-	// Unsubscribe stop notifications
-	watch.stopWatch(watchItem{table: "foo"}, notify)
-
-	items := make(watchItems)
-	items.add(watchItem{table: "foo"})
-	watch.notify(items)
-	if len(notify) != 0 {
-		t.Fatalf("should not notify")
-	}
-}
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
new file mode 100644
index 000000000..e5cdce16f
--- /dev/null
+++ b/nomad/watch/watch.go
@@ -0,0 +1,33 @@
+package watch
+
+// The watch package provides a means of describing a watch for a blocking
+// query. It is exported so it may be shared between Nomad's RPC layer and
+// the underlying state store.
+
+// Item describes the scope of a watch. It is used to provide a uniform
+// input for subscribe/unsubscribe and notification firing.
+type Item struct {
+	Alloc     string
+	AllocNode string
+	Eval      string
+	Job       string
+	Node      string
+	Table     string
+}
+
+// Items is a helper used to construct a set of watchItems. It deduplicates
+// the items as they are added using map keys.
+type Items map[Item]struct{}
+
+func NewItems(items ...Item) Items {
+	wi := make(Items)
+	for _, item := range items {
+		wi.Add(item)
+	}
+	return wi
+}
+
+// Add adds an item to the watch set.
+func (wi Items) Add(i Item) {
+	wi[i] = struct{}{}
+}
diff --git a/nomad/watch/watch_test.go b/nomad/watch/watch_test.go
new file mode 100644
index 000000000..9a8901aa8
--- /dev/null
+++ b/nomad/watch/watch_test.go
@@ -0,0 +1,31 @@
+package watch
+
+import (
+	"testing"
+)
+
+func TestWatchItems(t *testing.T) {
+	// Creates an empty set of items
+	wi := NewItems()
+	if len(wi) != 0 {
+		t.Fatalf("expect 0 items, got: %#v", wi)
+	}
+
+	// Creates a new set of supplied items
+	wi = NewItems(Item{Table: "foo"})
+	if len(wi) != 1 {
+		t.Fatalf("expected 1 item, got: %#v", wi)
+	}
+
+	// Adding items works
+	wi.Add(Item{Node: "bar"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
+	}
+
+	// Adding duplicates auto-dedupes
+	wi.Add(Item{Table: "foo"})
+	if len(wi) != 2 {
+		t.Fatalf("expected 2 items, got: %#v", wi)
+	}
+}

From cd5bdd7c08d86ac39b880dc3bbffc298f29b7552 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 15:01:29 -0700
Subject: [PATCH 18/92] nomad: support blocking queries on single jobs

---
 nomad/job_endpoint.go      | 57 ++++++++++++++++++++++----------------
 nomad/job_endpoint_test.go | 49 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 24 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index ff296a986..ad15d626d 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -181,32 +181,41 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now())
 
-	// Look for the job
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.JobByID(args.JobID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Job: args.JobID}),
+		run: func() error {
 
-	// Setup the output
-	if out != nil {
-		reply.Job = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("jobs")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Look for the job
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.JobByID(args.JobID)
+			if err != nil {
+				return err
+			}
 
-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Setup the output
+			if out != nil {
+				reply.Job = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("jobs")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
+
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return j.srv.blockingRPC(&opts)
 }
 
 // List is used to list the jobs registered in the system
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 0591e73bf..5b4ba079b 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -364,6 +364,55 @@ func TestJobEndpoint_GetJob(t *testing.T) {
 	}
 }
 
+func TestJobEndpoint_GetJob_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the jobs
+	job1 := mock.Job()
+	job2 := mock.Job()
+
+	// Upsert a job we are not interested in first.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertJob(2, job1); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert another job later which should trigger the watch.
+	time.AfterFunc(200*time.Millisecond, func() {
+		if err := state.UpsertJob(2, job2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req := &structs.JobSpecificRequest{
+		JobID: job2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	start := time.Now()
+	var resp structs.SingleJobResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	}
+	if resp.Job == nil || resp.Job.ID != job2.ID {
+		t.Fatalf("bad: %#v", resp.Job)
+	}
+}
+
 func TestJobEndpoint_ListJobs(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()

From 82529305f3023790465e107b1c630bff2c886529 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 29 Oct 2015 15:26:35 -0700
Subject: [PATCH 19/92] Cleanup exec_linux, force cgroups, check for systemd
 and update the Open() api

---
 client/executor/exec_linux.go      | 504 +++++++++++++----------------
 client/executor/exec_linux_test.go |  37 +--
 command/spawn_daemon.go            |   2 +-
 3 files changed, 248 insertions(+), 295 deletions(-)

diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index ceb178063..1a52265b1 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -22,14 +22,12 @@ import (
 	"github.com/hashicorp/nomad/helper/discover"
 	"github.com/hashicorp/nomad/nomad/structs"
 
+	"github.com/opencontainers/runc/libcontainer/cgroups"
 	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
+	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
 	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
 )
 
-const (
-	cgroupMount = "/sys/fs/cgroup"
-)
-
 var (
 	// A mapping of directories on the host OS to attempt to embed inside each
 	// task's chroot.
@@ -45,17 +43,7 @@ var (
 )
 
 func NewExecutor() Executor {
-	e := LinuxExecutor{}
-
-	// TODO: In a follow-up PR make it so this only happens once per client.
-	// Fingerprinting shouldn't happen per task.
-
-	// Check that cgroups are available.
-	if _, err := os.Stat(cgroupMount); err == nil {
-		e.cgroupEnabled = true
-	}
-
-	return &e
+	return &LinuxExecutor{}
 }
 
 // Linux executor is designed to run on linux kernel 2.8+.
@@ -63,22 +51,24 @@ type LinuxExecutor struct {
 	cmd
 	user *user.User
 
-	// Finger print capabilities.
-	cgroupEnabled bool
-
 	// Isolation configurations.
 	groups   *cgroupConfig.Cgroup
 	alloc    *allocdir.AllocDir
 	taskName string
 	taskDir  string
 
-	// Tracking of child process.
-	spawnChild        exec.Cmd
+	// Tracking of spawn process.
+	spawnChild        *os.Process
 	spawnOutputWriter *os.File
 	spawnOutputReader *os.File
 
-	// Track whether there are filesystems mounted in the task dir.
-	mounts bool
+	// Tracking of user process.
+	exitStatusFile string
+	userPid        int
+}
+
+func (e *LinuxExecutor) Command() *cmd {
+	return &e.cmd
 }
 
 func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
@@ -86,139 +76,62 @@ func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
 		return errNoResources
 	}
 
-	if e.cgroupEnabled {
-		return e.configureCgroups(resources)
+	return e.configureCgroups(resources)
+}
+
+// execLinuxID contains the necessary information to reattach to an executed
+// process and cleanup the created cgroups.
+type ExecLinuxID struct {
+	Groups         *cgroupConfig.Cgroup
+	SpawnPid       int
+	UserPid        int
+	ExitStatusFile string
+	TaskDir        string
+}
+
+func (e *LinuxExecutor) Open(id string) error {
+	// De-serialize the ID.
+	dec := json.NewDecoder(strings.NewReader(id))
+	var execID ExecLinuxID
+	if err := dec.Decode(&execID); err != nil {
+		return fmt.Errorf("Failed to parse id: %v", err)
+	}
+
+	// Setup the executor.
+	e.groups = execID.Groups
+	e.exitStatusFile = execID.ExitStatusFile
+	e.userPid = execID.UserPid
+	e.taskDir = execID.TaskDir
+
+	proc, err := os.FindProcess(execID.SpawnPid)
+	if proc != nil && err == nil {
+		e.spawnChild = proc
 	}
 
 	return nil
 }
 
-func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
-	e.taskName = taskName
-	taskDir, ok := alloc.TaskDirs[taskName]
-	if !ok {
-		fmt.Errorf("Couldn't find task directory for task %v", taskName)
-	}
-	e.taskDir = taskDir
-
-	if err := alloc.MountSharedDir(taskName); err != nil {
-		return err
+func (e *LinuxExecutor) ID() (string, error) {
+	if e.spawnChild == nil {
+		return "", fmt.Errorf("Process has finished or was never started")
 	}
 
-	if err := alloc.Embed(taskName, chrootEnv); err != nil {
-		return err
+	// Build the ID.
+	id := ExecLinuxID{
+		Groups:         e.groups,
+		SpawnPid:       e.spawnChild.Pid,
+		UserPid:        e.userPid,
+		ExitStatusFile: e.exitStatusFile,
+		TaskDir:        e.taskDir,
 	}
 
-	// Mount dev
-	dev := filepath.Join(taskDir, "dev")
-	if err := os.Mkdir(dev, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(id); err != nil {
+		return "", fmt.Errorf("Failed to serialize id: %v", err)
 	}
 
-	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
-		return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
-	}
-
-	// Mount proc
-	proc := filepath.Join(taskDir, "proc")
-	if err := os.Mkdir(proc, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
-	}
-
-	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
-		return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
-	}
-
-	// Set the tasks AllocDir environment variable.
-	env, err := environment.ParseFromList(e.Cmd.Env)
-	if err != nil {
-		return err
-	}
-	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
-	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
-	e.Cmd.Env = env.List()
-
-	e.alloc = alloc
-	e.mounts = true
-	return nil
-}
-
-func (e *LinuxExecutor) cleanTaskDir() error {
-	if e.alloc == nil {
-		return errors.New("ConfigureTaskDir() must be called before Start()")
-	}
-
-	if !e.mounts {
-		return nil
-	}
-
-	// Unmount dev.
-	errs := new(multierror.Error)
-	dev := filepath.Join(e.taskDir, "dev")
-	if err := syscall.Unmount(dev, 0); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
-	}
-
-	// Unmount proc.
-	proc := filepath.Join(e.taskDir, "proc")
-	if err := syscall.Unmount(proc, 0); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
-	}
-
-	e.mounts = false
-	return errs.ErrorOrNil()
-}
-
-func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
-	if !e.cgroupEnabled {
-		return nil
-	}
-
-	e.groups = &cgroupConfig.Cgroup{}
-
-	// Groups will be created in a heiarchy according to the resource being
-	// constrained, current session, and then this unique name. Restraints are
-	// then placed in the corresponding files.
-	// Ex: restricting a process to 2048Mhz CPU and 2MB of memory:
-	//   $ cat /sys/fs/cgroup/cpu/user/1000.user/4.session/<uuid>/cpu.shares
-	//		2028
-	//   $ cat /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/memory.limit_in_bytes
-	//		2097152
-	e.groups.Name = structs.GenerateUUID()
-
-	// TODO: verify this is needed for things like network access
-	e.groups.AllowAllDevices = true
-
-	if resources.MemoryMB > 0 {
-		// Total amount of memory allowed to consume
-		e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
-		// Disable swap to avoid issues on the machine
-		e.groups.MemorySwap = int64(-1)
-	}
-
-	if resources.CPU != 0 {
-		if resources.CPU < 2 {
-			return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
-		}
-
-		// Set the relative CPU shares for this cgroup.
-		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
-		// given process will have at least that amount of resources, but likely
-		// more since it is (probably) rare that the machine will run at 100%
-		// CPU. This scale will cease to work if a node is overprovisioned.
-		e.groups.CpuShares = int64(resources.CPU)
-	}
-
-	if resources.IOPS != 0 {
-		// Validate it is in an acceptable range.
-		if resources.IOPS < 10 || resources.IOPS > 1000 {
-			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
-		}
-
-		e.groups.BlkioWeight = uint16(resources.IOPS)
-	}
-
-	return nil
+	return buffer.String(), nil
 }
 
 func (e *LinuxExecutor) runAs(userid string) error {
@@ -292,33 +205,30 @@ func (e *LinuxExecutor) spawnDaemon() error {
 		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
 	}
 
+	c := command.DaemonConfig{
+		Cmd:            e.cmd.Cmd,
+		Chroot:         e.taskDir,
+		StdoutFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
+		StderrFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
+		StdinFile:      "/dev/null",
+		ExitStatusFile: e.exitStatusFile,
+	}
+
 	// Serialize the cmd and the cgroup configuration so it can be passed to the
 	// sub-process.
 	var buffer bytes.Buffer
 	enc := json.NewEncoder(&buffer)
-
-	c := command.DaemonConfig{
-		Cmd:        e.cmd.Cmd,
-		Chroot:     e.taskDir,
-		StdoutFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
-		StderrFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		StdinFile:  "/dev/null",
-	}
 	if err := enc.Encode(c); err != nil {
 		return fmt.Errorf("Failed to serialize daemon configuration: %v", err)
 	}
 
-	// Create a pipe to capture Stdout.
-	pr, pw, err := os.Pipe()
-	if err != nil {
+	// Create a pipe to capture stdout.
+	if e.spawnOutputReader, e.spawnOutputWriter, err = os.Pipe(); err != nil {
 		return err
 	}
-	e.spawnOutputWriter = pw
-	e.spawnOutputReader = pr
 
 	// Call ourselves using a hidden flag. The new instance of nomad will join
-	// the passed cgroup, forkExec the cmd, and output status codes through
-	// Stdout.
+	// the passed cgroup, forkExec the cmd, and return statuses through stdout.
 	escaped := strconv.Quote(buffer.String())
 	spawn := exec.Command(bin, "spawn-daemon", escaped)
 	spawn.Stdout = e.spawnOutputWriter
@@ -334,26 +244,19 @@ func (e *LinuxExecutor) spawnDaemon() error {
 	}
 
 	// Join the spawn-daemon to the cgroup.
-	if e.groups != nil {
-		manager := cgroupFs.Manager{}
-		manager.Cgroups = e.groups
+	manager := e.getCgroupManager(e.groups)
 
-		// Apply will place the current pid into the tasks file for each of the
-		// created cgroups:
-		//  /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/tasks
-		//
-		// Apply requires superuser permissions, and may fail if Nomad is not run with
-		// the required permissions
-		if err := manager.Apply(spawn.Process.Pid); err != nil {
-			errs := new(multierror.Error)
-			errs = multierror.Append(errs, fmt.Errorf("Failed to join spawn-daemon to the cgroup (config => %+v): %v", manager.Cgroups, err))
+	// Apply will place the spawn dameon into the created cgroups.
+	if err := manager.Apply(spawn.Process.Pid); err != nil {
+		errs := new(multierror.Error)
+		errs = multierror.Append(errs,
+			fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err))
 
-			if err := sendAbortCommand(spawnStdIn); err != nil {
-				errs = multierror.Append(errs, err)
-			}
-
-			return errs
+		if err := sendAbortCommand(spawnStdIn); err != nil {
+			errs = multierror.Append(errs, err)
 		}
+
+		return errs
 	}
 
 	// Tell it to start.
@@ -372,7 +275,8 @@ func (e *LinuxExecutor) spawnDaemon() error {
 		return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
 	}
 
-	e.spawnChild = *spawn
+	e.userPid = resp.UserPID
+	e.spawnChild = spawn.Process
 	return nil
 }
 
@@ -394,74 +298,22 @@ func sendAbortCommand(w io.Writer) error {
 	return nil
 }
 
-// Open's behavior is to kill all processes associated with the id and return an
-// error. This is done because it is not possible to re-attach to the
-// spawn-daemon's stdout to retrieve status messages.
-func (e *LinuxExecutor) Open(id string) error {
-	parts := strings.SplitN(id, ":", 2)
-	if len(parts) != 2 {
-		return fmt.Errorf("Invalid id: %v", id)
-	}
-
-	switch parts[0] {
-	case "PID":
-		pid, err := strconv.Atoi(parts[1])
-		if err != nil {
-			return fmt.Errorf("Invalid id: failed to parse pid %v", parts[1])
-		}
-
-		process, err := os.FindProcess(pid)
-		if err != nil {
-			return fmt.Errorf("Failed to find Pid %v: %v", pid, err)
-		}
-
-		if err := process.Kill(); err != nil {
-			return fmt.Errorf("Failed to kill Pid %v: %v", pid, err)
-		}
-	case "CGROUP":
-		if !e.cgroupEnabled {
-			return errors.New("Passed a a cgroup identifier, but cgroups are disabled")
-		}
-
-		// De-serialize the cgroup configuration.
-		dec := json.NewDecoder(strings.NewReader(parts[1]))
-		var groups cgroupConfig.Cgroup
-		if err := dec.Decode(&groups); err != nil {
-			return fmt.Errorf("Failed to parse cgroup configuration: %v", err)
-		}
-
-		e.groups = &groups
-		if err := e.destroyCgroup(); err != nil {
-			return err
-		}
-		// TODO: cleanTaskDir is a little more complicated here because the OS
-		// may have already unmounted in the case of a restart. Need to scan.
-	default:
-		return fmt.Errorf("Invalid id type: %v", parts[0])
-	}
-
-	return errors.New("Could not re-open to id (intended).")
-}
-
 func (e *LinuxExecutor) Wait() error {
-	if e.spawnChild.Process == nil {
-		return errors.New("Can not find child to wait on")
+	if e.spawnOutputReader != nil {
+		e.spawnOutputReader.Close()
 	}
 
-	defer e.spawnOutputWriter.Close()
-	defer e.spawnOutputReader.Close()
+	if e.spawnOutputWriter != nil {
+		e.spawnOutputWriter.Close()
+	}
 
 	errs := new(multierror.Error)
-	if err := e.spawnChild.Wait(); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Process.Pid, err))
+	if err := e.spawnWait(); err != nil {
+		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Pid, err))
 	}
 
-	// If they fork/exec and then exit, wait will return but they will be still
-	// running processes so we need to kill the full cgroup.
-	if e.groups != nil {
-		if err := e.destroyCgroup(); err != nil {
-			errs = multierror.Append(errs, err)
-		}
+	if err := e.destroyCgroup(); err != nil {
+		errs = multierror.Append(errs, err)
 	}
 
 	if err := e.cleanTaskDir(); err != nil {
@@ -471,27 +323,18 @@ func (e *LinuxExecutor) Wait() error {
 	return errs.ErrorOrNil()
 }
 
-// If cgroups are used, the ID is the cgroup structurue. Otherwise, it is the
-// PID of the spawn-daemon process. An error is returned if the process was
-// never started.
-func (e *LinuxExecutor) ID() (string, error) {
-	if e.spawnChild.Process != nil {
-		if e.cgroupEnabled && e.groups != nil {
-			// Serialize the cgroup structure so it can be undone on suabsequent
-			// opens.
-			var buffer bytes.Buffer
-			enc := json.NewEncoder(&buffer)
-			if err := enc.Encode(e.groups); err != nil {
-				return "", fmt.Errorf("Failed to serialize daemon configuration: %v", err)
-			}
-
-			return fmt.Sprintf("CGROUP:%v", buffer.String()), nil
-		}
-
-		return fmt.Sprintf("PID:%d", e.spawnChild.Process.Pid), nil
+// spawnWait waits on the spawn-daemon and can handle the spawn-daemon not being
+// a child of this process.
+func (e *LinuxExecutor) spawnWait() error {
+	// TODO: This needs to be able to wait on non-child processes.
+	state, err := e.spawnChild.Wait()
+	if err != nil {
+		return err
+	} else if !state.Success() {
+		return fmt.Errorf("exited with non-zero code")
 	}
 
-	return "", fmt.Errorf("Process has finished or was never started")
+	return nil
 }
 
 func (e *LinuxExecutor) Shutdown() error {
@@ -507,16 +350,6 @@ func (e *LinuxExecutor) ForceStop() error {
 		e.spawnOutputWriter.Close()
 	}
 
-	// If the task is not running inside a cgroup then just the spawn-daemon child is killed.
-	// TODO: Find a good way to kill the children of the spawn-daemon.
-	if e.groups == nil {
-		if err := e.spawnChild.Process.Kill(); err != nil {
-			return fmt.Errorf("Failed to kill child (%v): %v", e.spawnChild.Process.Pid, err)
-		}
-
-		return nil
-	}
-
 	errs := new(multierror.Error)
 	if e.groups != nil {
 		if err := e.destroyCgroup(); err != nil {
@@ -531,13 +364,131 @@ func (e *LinuxExecutor) ForceStop() error {
 	return errs.ErrorOrNil()
 }
 
+// Task Directory related functions.
+
+func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
+	e.taskName = taskName
+	taskDir, ok := alloc.TaskDirs[taskName]
+	if !ok {
+		fmt.Errorf("Couldn't find task directory for task %v", taskName)
+	}
+	e.taskDir = taskDir
+
+	if err := alloc.MountSharedDir(taskName); err != nil {
+		return err
+	}
+
+	if err := alloc.Embed(taskName, chrootEnv); err != nil {
+		return err
+	}
+
+	// Mount dev
+	dev := filepath.Join(taskDir, "dev")
+	if err := os.Mkdir(dev, 0777); err != nil {
+		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
+	}
+
+	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
+		return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
+	}
+
+	// Mount proc
+	proc := filepath.Join(taskDir, "proc")
+	if err := os.Mkdir(proc, 0777); err != nil {
+		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
+	}
+
+	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
+		return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
+	}
+
+	// Set the tasks AllocDir environment variable.
+	env, err := environment.ParseFromList(e.Cmd.Env)
+	if err != nil {
+		return err
+	}
+	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
+	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
+	e.Cmd.Env = env.List()
+
+	// Store the file path to save the exit status to.
+	e.exitStatusFile = filepath.Join(alloc.AllocDir, fmt.Sprintf("%s_%s", taskName, "exit_status"))
+
+	e.alloc = alloc
+	return nil
+}
+
+func (e *LinuxExecutor) pathExists(path string) bool {
+	if _, err := os.Stat(path); err != nil {
+		if os.IsNotExist(err) {
+			return false
+		}
+	}
+	return true
+}
+
+func (e *LinuxExecutor) cleanTaskDir() error {
+	// Unmount dev.
+	errs := new(multierror.Error)
+	dev := filepath.Join(e.taskDir, "dev")
+	if e.pathExists(dev) {
+		if err := syscall.Unmount(dev, 0); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
+		}
+	}
+
+	// Unmount proc.
+	proc := filepath.Join(e.taskDir, "proc")
+	if e.pathExists(proc) {
+		if err := syscall.Unmount(proc, 0); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
+		}
+	}
+
+	return errs.ErrorOrNil()
+}
+
+// Cgroup related functions.
+
+func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
+	e.groups = &cgroupConfig.Cgroup{}
+	e.groups.Name = structs.GenerateUUID()
+
+	// TODO: verify this is needed for things like network access
+	e.groups.AllowAllDevices = true
+
+	if resources.MemoryMB > 0 {
+		// Total amount of memory allowed to consume
+		e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
+		// Disable swap to avoid issues on the machine
+		e.groups.MemorySwap = int64(-1)
+	}
+
+	if resources.CPU < 2 {
+		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
+	}
+
+	// Set the relative CPU shares for this cgroup.
+	e.groups.CpuShares = int64(resources.CPU)
+
+	if resources.IOPS != 0 {
+		// Validate it is in an acceptable range.
+		if resources.IOPS < 10 || resources.IOPS > 1000 {
+			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
+		}
+
+		e.groups.BlkioWeight = uint16(resources.IOPS)
+	}
+
+	return nil
+}
+
 func (e *LinuxExecutor) destroyCgroup() error {
 	if e.groups == nil {
 		return errors.New("Can't destroy: cgroup configuration empty")
 	}
 
-	manager := cgroupFs.Manager{}
-	manager.Cgroups = e.groups
+	manager := e.getCgroupManager(e.groups)
 	pids, err := manager.GetPids()
 	if err != nil {
 		return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err)
@@ -555,11 +506,6 @@ func (e *LinuxExecutor) destroyCgroup() error {
 			multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
 			continue
 		}
-
-		if _, err := process.Wait(); err != nil {
-			multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err))
-			continue
-		}
 	}
 
 	// Remove the cgroup.
@@ -574,6 +520,12 @@ func (e *LinuxExecutor) destroyCgroup() error {
 	return nil
 }
 
-func (e *LinuxExecutor) Command() *cmd {
-	return &e.cmd
+// getCgroupManager returns the correct libcontainer cgroup manager.
+func (e *LinuxExecutor) getCgroupManager(groups *cgroupConfig.Cgroup) cgroups.Manager {
+	var manager cgroups.Manager
+	manager = &cgroupFs.Manager{Cgroups: groups}
+	if systemd.UseSystemd() {
+		manager = &systemd.Manager{Cgroups: groups}
+	}
+	return manager
 }
diff --git a/client/executor/exec_linux_test.go b/client/executor/exec_linux_test.go
index 8f33b0da4..1b8307b02 100644
--- a/client/executor/exec_linux_test.go
+++ b/client/executor/exec_linux_test.go
@@ -139,11 +139,6 @@ func TestExecutorLinux_Start_Kill(t *testing.T) {
 	filePath := filepath.Join(taskDir, "output")
 	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
 
-	// This test can only be run if cgroups are enabled.
-	if !e.(*LinuxExecutor).cgroupEnabled {
-		t.SkipNow()
-	}
-
 	if err := e.Limit(constraint); err != nil {
 		t.Fatalf("Limit() failed: %v", err)
 	}
@@ -178,13 +173,11 @@ func TestExecutorLinux_Open(t *testing.T) {
 		t.Fatalf("No task directory found for task %v", task)
 	}
 
-	filePath := filepath.Join(taskDir, "output")
-	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
-
-	// This test can only be run if cgroups are enabled.
-	if !e.(*LinuxExecutor).cgroupEnabled {
-		t.SkipNow()
-	}
+	expected := "hello world"
+	file := filepath.Join(allocdir.TaskLocal, "output.txt")
+	absFilePath := filepath.Join(taskDir, file)
+	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
+	e := Command("/bin/bash", "-c", cmd)
 
 	if err := e.Limit(constraint); err != nil {
 		t.Fatalf("Limit() failed: %v", err)
@@ -203,14 +196,22 @@ func TestExecutorLinux_Open(t *testing.T) {
 		t.Fatalf("ID() failed: %v", err)
 	}
 
-	if _, err := OpenId(id); err == nil {
-		t.Fatalf("Open(%v) should have failed", id)
+	e2 := NewExecutor()
+	if err := e2.Open(id); err != nil {
+		t.Fatalf("Open(%v) failed: %v", id, err)
 	}
 
-	time.Sleep(1500 * time.Millisecond)
+	if err := e2.Wait(); err != nil {
+		t.Fatalf("Wait() failed: %v", err)
+	}
 
-	// Check that the file doesn't exist, open should have killed the process.
-	if _, err := os.Stat(filePath); err == nil {
-		t.Fatalf("Stat(%v) should have failed: task not killed", filePath)
+	output, err := ioutil.ReadFile(absFilePath)
+	if err != nil {
+		t.Fatalf("Couldn't read file %v", absFilePath)
+	}
+
+	act := string(output)
+	if act != expected {
+		t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
 	}
 }
diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index ea7868be4..81117ce2e 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -192,7 +192,7 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}
 
-	if c.config != nil && c.config.Process == nil {
+	if c.config != nil && c.config.Process != nil {
 		startStatus.UserPID = c.config.Process.Pid
 	}
 

From de495bfc3ccd5b234340396a9c39633412f6cd01 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 15:26:14 -0700
Subject: [PATCH 20/92] nomad: support blocking queries on job allocations

---
 nomad/job_endpoint.go      | 59 ++++++++++++++++++++----------------
 nomad/job_endpoint_test.go | 61 +++++++++++++++++++++++++++++++++++---
 nomad/state/state_store.go |  2 ++
 nomad/watch/watch.go       |  1 +
 4 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index ad15d626d..6d43e1c89 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -275,34 +275,43 @@ func (j *Job) Allocations(args *structs.JobSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now())
 
-	// Capture the allocations
-	snap, err := j.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	allocs, err := snap.AllocsByJob(args.JobID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocJob: args.JobID}),
+		run: func() error {
+			// Capture the allocations
+			snap, err := j.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			allocs, err := snap.AllocsByJob(args.JobID)
+			if err != nil {
+				return err
+			}
 
-	// Convert to stubs
-	if len(allocs) > 0 {
-		reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
-		for _, alloc := range allocs {
-			reply.Allocations = append(reply.Allocations, alloc.Stub())
-		}
-	}
+			// Convert to stubs
+			if len(allocs) > 0 {
+				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
+				for _, alloc := range allocs {
+					reply.Allocations = append(reply.Allocations, alloc.Stub())
+				}
+			}
 
-	// Use the last index that affected the allocs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the allocs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	j.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			j.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+
+		}}
+	return j.srv.blockingRPC(&opts)
 }
 
 // Evaluations is used to list the evaluations for a job
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 5b4ba079b..f42031a7a 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -377,14 +377,14 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 
 	// Upsert a job we are not interested in first.
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertJob(2, job1); err != nil {
+		if err := state.UpsertJob(1000, job1); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Upsert another job later which should trigger the watch.
 	time.AfterFunc(200*time.Millisecond, func() {
-		if err := state.UpsertJob(2, job2); err != nil {
+		if err := state.UpsertJob(2000, job2); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
@@ -405,8 +405,8 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
 	}
 	if resp.Job == nil || resp.Job.ID != job2.ID {
 		t.Fatalf("bad: %#v", resp.Job)
@@ -546,6 +546,59 @@ func TestJobEndpoint_Allocations(t *testing.T) {
 	}
 }
 
+func TestJobEndpoint_Allocations_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the register request
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+	alloc2.JobID = "job1"
+	state := s1.fsm.State()
+
+	// First upsert an unrelated alloc
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert an alloc for the job we are interested in later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the jobs
+	get := &structs.JobSpecificRequest{
+		JobID: "job1",
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.JobAllocationsResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Job.Allocations", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+}
+
 func TestJobEndpoint_Evaluations(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 17484f985..2a31555f2 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -532,6 +532,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	watcher := watch.NewItems()
 	watcher.Add(watch.Item{Table: "allocs"})
 	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocJob: alloc.JobID})
 	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 
 	// Look for existing alloc
@@ -603,6 +604,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		}
 
 		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocJob: alloc.JobID})
 		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}
 
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index e5cdce16f..3973e562b 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -8,6 +8,7 @@ package watch
 // input for subscribe/unsubscribe and notification firing.
 type Item struct {
 	Alloc     string
+	AllocJob  string
 	AllocNode string
 	Eval      string
 	Job       string

From 035e5ba80fa4e3bda7d9e37d57ca9acd54dfe873 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 29 Oct 2015 15:39:26 -0700
Subject: [PATCH 21/92] Comments

---
 client/executor/exec_linux.go | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index 1a52265b1..9c4bcd9a4 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -134,6 +134,8 @@ func (e *LinuxExecutor) ID() (string, error) {
 	return buffer.String(), nil
 }
 
+// runAs takes a user id as a string and looks up the user. It stores the
+// results in the executor and returns an error if the user could not be found.
 func (e *LinuxExecutor) runAs(userid string) error {
 	errs := new(multierror.Error)
 
@@ -161,8 +163,8 @@ func (e *LinuxExecutor) runAs(userid string) error {
 }
 
 func (e *LinuxExecutor) Start() error {
-	// Run as "nobody" user so we don't leak root privilege to the
-	// spawned process.
+	// Run as "nobody" user so we don't leak root privilege to the spawned
+	// process.
 	if err := e.runAs("nobody"); err == nil && e.user != nil {
 		e.cmd.SetUID(e.user.Uid)
 		e.cmd.SetGID(e.user.Gid)
@@ -280,6 +282,8 @@ func (e *LinuxExecutor) spawnDaemon() error {
 	return nil
 }
 
+// sendStartCommand sends the necessary command to the spawn-daemon to have it
+// start the user process.
 func sendStartCommand(w io.Writer) error {
 	enc := json.NewEncoder(w)
 	if err := enc.Encode(true); err != nil {
@@ -289,6 +293,9 @@ func sendStartCommand(w io.Writer) error {
 	return nil
 }
 
+// sendAbortCommand sends the necessary command to the spawn-daemon to have it
+// abort starting the user process. This should be invoked if the spawn-daemon
+// could not be isolated into a cgroup.
 func sendAbortCommand(w io.Writer) error {
 	enc := json.NewEncoder(w)
 	if err := enc.Encode(false); err != nil {
@@ -298,6 +305,8 @@ func sendAbortCommand(w io.Writer) error {
 	return nil
 }
 
+// Wait waits til the user process exits and returns an error on non-zero exit
+// codes. Wait also cleans up the task directory and created cgroups.
 func (e *LinuxExecutor) Wait() error {
 	if e.spawnOutputReader != nil {
 		e.spawnOutputReader.Close()
@@ -341,6 +350,8 @@ func (e *LinuxExecutor) Shutdown() error {
 	return e.ForceStop()
 }
 
+// ForceStop immediately exits the user process and cleans up both the task
+// directory and the cgroups.
 func (e *LinuxExecutor) ForceStop() error {
 	if e.spawnOutputReader != nil {
 		e.spawnOutputReader.Close()
@@ -366,6 +377,8 @@ func (e *LinuxExecutor) ForceStop() error {
 
 // Task Directory related functions.
 
+// ConfigureTaskDir creates the necessary directory structure for a proper
+// chroot. cleanTaskDir should be called after.
 func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
 	e.taskName = taskName
 	taskDir, ok := alloc.TaskDirs[taskName]
@@ -418,6 +431,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	return nil
 }
 
+// pathExists is a helper function to check if the path exists.
 func (e *LinuxExecutor) pathExists(path string) bool {
 	if _, err := os.Stat(path); err != nil {
 		if os.IsNotExist(err) {
@@ -427,6 +441,8 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 	return true
 }
 
+// cleanTaskDir is an idempotent operation to clean the task directory and
+// should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
 	// Unmount dev.
 	errs := new(multierror.Error)
@@ -450,6 +466,8 @@ func (e *LinuxExecutor) cleanTaskDir() error {
 
 // Cgroup related functions.
 
+// configureCgroups converts a Nomad Resources specification into the equivalent
+// cgroup configuration. It returns an error if the resources are invalid.
 func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
 	e.groups = &cgroupConfig.Cgroup{}
 	e.groups.Name = structs.GenerateUUID()
@@ -483,6 +501,8 @@ func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
 	return nil
 }
 
+// destroyCgroup kills all processes in the cgroup and removes the cgroup
+// configuration from the host.
 func (e *LinuxExecutor) destroyCgroup() error {
 	if e.groups == nil {
 		return errors.New("Can't destroy: cgroup configuration empty")

From 89a0af6306d95cf92d33cafeb903ea3bdf698327 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 15:48:44 -0700
Subject: [PATCH 22/92] nomad: support blocking queries on single nodes

---
 nomad/node_endpoint.go      | 64 +++++++++++++++++++++----------------
 nomad/node_endpoint_test.go | 50 +++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index 65a83e1b1..f28cc3930 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -283,37 +283,45 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
 
-	// Verify the arguments
-	if args.NodeID == "" {
-		return fmt.Errorf("missing node ID")
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Node: args.NodeID}),
+		run: func() error {
+			// Verify the arguments
+			if args.NodeID == "" {
+				return fmt.Errorf("missing node ID")
+			}
 
-	// Look for the node
-	snap, err := n.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.NodeByID(args.NodeID)
-	if err != nil {
-		return err
-	}
+			// Look for the node
+			snap, err := n.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.NodeByID(args.NodeID)
+			if err != nil {
+				return err
+			}
 
-	// Setup the output
-	if out != nil {
-		reply.Node = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("nodes")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			if out != nil {
+				reply.Node = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("nodes")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
 
-	// Set the query response
-	n.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			n.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return n.srv.blockingRPC(&opts)
 }
 
 // GetAllocs is used to request allocations for a specific node
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index d06e6ea0f..8d53c0a66 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -371,6 +371,56 @@ func TestClientEndpoint_GetNode(t *testing.T) {
 	}
 }
 
+func TestClientEndpoint_GetNode_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the node
+	node1 := mock.Node()
+	node2 := mock.Node()
+
+	// First create an unrelated node.
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.UpsertNode(1000, node1); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert the node we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		if err := state.UpsertNode(2000, node2); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the node
+	get := &structs.NodeSpecificRequest{
+		NodeID: node2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.SingleNodeResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if resp.Node == nil || resp.Node.ID != node2.ID {
+		t.Fatalf("bad: %#v", resp.Node)
+	}
+}
+
 func TestClientEndpoint_GetAllocs(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()

From ff6e1fea49db9282042289d15745704de084e35b Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 16:04:53 -0700
Subject: [PATCH 23/92] nomad: support blocking queries on single allocations

---
 nomad/alloc_endpoint.go      | 56 ++++++++++++++++++++----------------
 nomad/alloc_endpoint_test.go | 52 +++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index f3b9dbdc4..e8b6af63c 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -69,30 +69,38 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "alloc", "get_alloc"}, time.Now())
 
-	// Lookup the allocation
-	snap, err := a.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.AllocByID(args.AllocID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Alloc: args.AllocID}),
+		run: func() error {
+			// Lookup the allocation
+			snap, err := a.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.AllocByID(args.AllocID)
+			if err != nil {
+				return err
+			}
 
-	// Setup the output
-	if out != nil {
-		reply.Alloc = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("allocs")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			if out != nil {
+				reply.Alloc = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("allocs")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
 
-	// Set the query response
-	a.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			a.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return a.srv.blockingRPC(&opts)
 }
diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 01688da96..0f7e4c0c4 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -144,3 +144,55 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
 		t.Fatalf("bad: %#v", resp.Alloc)
 	}
 }
+
+func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the allocs
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+
+	// First create an unrelated alloc
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Create the alloc we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the jobs
+	get := &structs.AllocSpecificRequest{
+		AllocID: alloc2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.SingleAllocResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Alloc.GetAlloc", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID {
+		t.Fatalf("bad: %#v", resp.Alloc)
+	}
+}

From 1e4320e0331ef97436711b0802276d9687a7b9d0 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 16:12:25 -0700
Subject: [PATCH 24/92] nomad: support blocking queries for single evals

---
 nomad/eval_endpoint.go      | 56 +++++++++++++++++++++----------------
 nomad/eval_endpoint_test.go | 52 ++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 07dfc18fe..e473b5b10 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -27,32 +27,40 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())
 
-	// Look for the job
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	out, err := snap.EvalByID(args.EvalID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{Eval: args.EvalID}),
+		run: func() error {
+			// Look for the job
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			out, err := snap.EvalByID(args.EvalID)
+			if err != nil {
+				return err
+			}
 
-	// Setup the output
-	if out != nil {
-		reply.Eval = out
-		reply.Index = out.ModifyIndex
-	} else {
-		// Use the last index that affected the nodes table
-		index, err := snap.Index("evals")
-		if err != nil {
-			return err
-		}
-		reply.Index = index
-	}
+			// Setup the output
+			if out != nil {
+				reply.Eval = out
+				reply.Index = out.ModifyIndex
+			} else {
+				// Use the last index that affected the nodes table
+				index, err := snap.Index("evals")
+				if err != nil {
+					return err
+				}
+				reply.Index = index
+			}
 
-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
 
 // Dequeue is used to dequeue a pending evaluation
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 3b9a62a8e..01ec27f46 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -51,6 +51,58 @@ func TestEvalEndpoint_GetEval(t *testing.T) {
 	}
 }
 
+func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the evals
+	eval1 := mock.Eval()
+	eval2 := mock.Eval()
+
+	// First create an unrelated eval
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertEvals(1000, []*structs.Evaluation{eval1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert the eval we are watching later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertEvals(2000, []*structs.Evaluation{eval2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the eval
+	get := &structs.EvalSpecificRequest{
+		EvalID: eval2.ID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.SingleEvalResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if resp.Eval == nil || resp.Eval.ID != eval2.ID {
+		t.Fatalf("bad: %#v", resp.Eval)
+	}
+}
+
 func TestEvalEndpoint_Dequeue(t *testing.T) {
 	s1 := testServer(t, func(c *Config) {
 		c.NumSchedulers = 0 // Prevent automatic dequeue

From a27e8bbe51496278074bcd26cdd5a27b40f3c10a Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 16:20:57 -0700
Subject: [PATCH 25/92] nomad: support blocking queries on eval-specific
 allocations

---
 nomad/eval_endpoint.go      | 58 +++++++++++++++++++++----------------
 nomad/eval_endpoint_test.go | 52 +++++++++++++++++++++++++++++++++
 nomad/state/state_store.go  |  2 ++
 nomad/watch/watch.go        |  1 +
 4 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index e473b5b10..5afa31dfc 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -277,32 +277,40 @@ func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
 	}
 	defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())
 
-	// Capture the allocations
-	snap, err := e.srv.fsm.State().Snapshot()
-	if err != nil {
-		return err
-	}
-	allocs, err := snap.AllocsByEval(args.EvalID)
-	if err != nil {
-		return err
-	}
+	// Setup the blocking query
+	opts := blockingOptions{
+		queryOpts: &args.QueryOptions,
+		queryMeta: &reply.QueryMeta,
+		watch:     watch.NewItems(watch.Item{AllocEval: args.EvalID}),
+		run: func() error {
+			// Capture the allocations
+			snap, err := e.srv.fsm.State().Snapshot()
+			if err != nil {
+				return err
+			}
+			allocs, err := snap.AllocsByEval(args.EvalID)
+			if err != nil {
+				return err
+			}
 
-	// Convert to a stub
-	if len(allocs) > 0 {
-		reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
-		for _, alloc := range allocs {
-			reply.Allocations = append(reply.Allocations, alloc.Stub())
-		}
-	}
+			// Convert to a stub
+			if len(allocs) > 0 {
+				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
+				for _, alloc := range allocs {
+					reply.Allocations = append(reply.Allocations, alloc.Stub())
+				}
+			}
 
-	// Use the last index that affected the allocs table
-	index, err := snap.Index("allocs")
-	if err != nil {
-		return err
-	}
-	reply.Index = index
+			// Use the last index that affected the allocs table
+			index, err := snap.Index("allocs")
+			if err != nil {
+				return err
+			}
+			reply.Index = index
 
-	// Set the query response
-	e.srv.setQueryMeta(&reply.QueryMeta)
-	return nil
+			// Set the query response
+			e.srv.setQueryMeta(&reply.QueryMeta)
+			return nil
+		}}
+	return e.srv.blockingRPC(&opts)
 }
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 01ec27f46..442a2c1aa 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -484,3 +484,55 @@ func TestEvalEndpoint_Allocations(t *testing.T) {
 		t.Fatalf("bad: %#v", resp.Allocations)
 	}
 }
+
+func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	state := s1.fsm.State()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Create the allocs
+	alloc1 := mock.Alloc()
+	alloc2 := mock.Alloc()
+
+	// Upsert an unrelated alloc first
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Upsert an alloc which will trigger the watch later
+	time.AfterFunc(200*time.Millisecond, func() {
+		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	// Lookup the eval
+	get := &structs.EvalSpecificRequest{
+		EvalID: alloc2.EvalID,
+		QueryOptions: structs.QueryOptions{
+			Region:        "global",
+			MinQueryIndex: 1,
+		},
+	}
+	var resp structs.EvalAllocationsResponse
+	start := time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.Allocations", get, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp.Index != 2000 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	}
+	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID {
+		t.Fatalf("bad: %#v", resp.Allocations)
+	}
+}
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 2a31555f2..f4f97489a 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -532,6 +532,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
 	watcher := watch.NewItems()
 	watcher.Add(watch.Item{Table: "allocs"})
 	watcher.Add(watch.Item{Alloc: alloc.ID})
+	watcher.Add(watch.Item{AllocEval: alloc.EvalID})
 	watcher.Add(watch.Item{AllocJob: alloc.JobID})
 	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 
@@ -604,6 +605,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
 		}
 
 		watcher.Add(watch.Item{Alloc: alloc.ID})
+		watcher.Add(watch.Item{AllocEval: alloc.EvalID})
 		watcher.Add(watch.Item{AllocJob: alloc.JobID})
 		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
 	}
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index 3973e562b..c71fe5087 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -8,6 +8,7 @@ package watch
 // input for subscribe/unsubscribe and notification firing.
 type Item struct {
 	Alloc     string
+	AllocEval string
 	AllocJob  string
 	AllocNode string
 	Eval      string

From 6fb8a2d3e2481b829e4ba3a4ec87b90fac52b6a7 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 19:00:02 -0700
Subject: [PATCH 26/92] nomad: more tests

---
 nomad/alloc_endpoint_test.go |  14 ++---
 nomad/eval_endpoint.go       |   2 +-
 nomad/eval_endpoint_test.go  |  55 +++++++++++++-----
 nomad/job_endpoint.go        |   2 +-
 nomad/job_endpoint_test.go   |  67 +++++++++++++++-------
 nomad/node_endpoint.go       |   2 +-
 nomad/node_endpoint_test.go  | 106 ++++++++++++++++++++++++++++++-----
 7 files changed, 188 insertions(+), 60 deletions(-)

diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 0f7e4c0c4..4147011ac 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -45,7 +45,7 @@ func TestAllocEndpoint_List(t *testing.T) {
 	}
 }
 
-func TestAllocEndpoint_List_blocking(t *testing.T) {
+func TestAllocEndpoint_List_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -145,7 +145,7 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
 	}
 }
 
-func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
+func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -158,7 +158,7 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 
 	// First create an unrelated alloc
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -166,7 +166,7 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 
 	// Create the alloc we are watching later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -177,7 +177,7 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 		AllocID: alloc2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.SingleAllocResponse
@@ -189,8 +189,8 @@ func TestAllocEndpoint_GetAlloc_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID {
 		t.Fatalf("bad: %#v", resp.Alloc)
diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go
index 5afa31dfc..bc74e85f3 100644
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -44,8 +44,8 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Eval = out
 			if out != nil {
-				reply.Eval = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 442a2c1aa..6f3d154e5 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -51,7 +51,7 @@ func TestEvalEndpoint_GetEval(t *testing.T) {
 	}
 }
 
-func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
+func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -64,7 +64,7 @@ func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
 
 	// First create an unrelated eval
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertEvals(1000, []*structs.Evaluation{eval1})
+		err := state.UpsertEvals(100, []*structs.Evaluation{eval1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -72,35 +72,60 @@ func TestEvalEndpoint_GetEval_blocking(t *testing.T) {
 
 	// Upsert the eval we are watching later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertEvals(2000, []*structs.Evaluation{eval2})
+		err := state.UpsertEvals(200, []*structs.Evaluation{eval2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Lookup the eval
-	get := &structs.EvalSpecificRequest{
+	req := &structs.EvalSpecificRequest{
 		EvalID: eval2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.SingleEvalResponse
 	start := time.Now()
-	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Eval == nil || resp.Eval.ID != eval2.ID {
 		t.Fatalf("bad: %#v", resp.Eval)
 	}
+
+	// Eval delete triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		err := state.DeleteEval(300, []string{eval2.ID}, []string{})
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	var resp2 structs.SingleEvalResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Eval != nil {
+		t.Fatalf("bad: %#v", resp2.Eval)
+	}
 }
 
 func TestEvalEndpoint_Dequeue(t *testing.T) {
@@ -386,7 +411,7 @@ func TestEvalEndpoint_List(t *testing.T) {
 	}
 }
 
-func TestEvalEndpoint_List_blocking(t *testing.T) {
+func TestEvalEndpoint_List_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -485,7 +510,7 @@ func TestEvalEndpoint_Allocations(t *testing.T) {
 	}
 }
 
-func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
+func TestEvalEndpoint_Allocations_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -498,7 +523,7 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 
 	// Upsert an unrelated alloc first
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -506,7 +531,7 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 
 	// Upsert an alloc which will trigger the watch later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -517,7 +542,7 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 		EvalID: alloc2.EvalID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.EvalAllocationsResponse
@@ -529,8 +554,8 @@ func TestEvalEndpoint_Allocations_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID {
 		t.Fatalf("bad: %#v", resp.Allocations)
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
index 6d43e1c89..e961428e4 100644
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@@ -199,8 +199,8 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Job = out
 			if out != nil {
-				reply.Job = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index f42031a7a..9e09de538 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -364,7 +364,7 @@ func TestJobEndpoint_GetJob(t *testing.T) {
 	}
 }
 
-func TestJobEndpoint_GetJob_blocking(t *testing.T) {
+func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -377,14 +377,14 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 
 	// Upsert a job we are not interested in first.
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertJob(1000, job1); err != nil {
+		if err := state.UpsertJob(100, job1); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Upsert another job later which should trigger the watch.
 	time.AfterFunc(200*time.Millisecond, func() {
-		if err := state.UpsertJob(2000, job2); err != nil {
+		if err := state.UpsertJob(200, job2); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
@@ -393,7 +393,7 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 		JobID: job2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	start := time.Now()
@@ -405,12 +405,37 @@ func TestJobEndpoint_GetJob_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Job == nil || resp.Job.ID != job2.ID {
 		t.Fatalf("bad: %#v", resp.Job)
 	}
+
+	// Job delete fires watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteJob(300, job2.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	start = time.Now()
+
+	var resp2 structs.SingleJobResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Job != nil {
+		t.Fatalf("bad: %#v", resp2.Job)
+	}
 }
 
 func TestJobEndpoint_ListJobs(t *testing.T) {
@@ -447,7 +472,7 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
 	}
 }
 
-func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
+func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -459,7 +484,7 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 
 	// Upsert job triggers watches
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertJob(2, job); err != nil {
+		if err := state.UpsertJob(100, job); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
@@ -467,7 +492,7 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	req := &structs.JobListRequest{
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	start := time.Now()
@@ -479,8 +504,8 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2)
+	if resp.Index != 100 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 100)
 	}
 	if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID {
 		t.Fatalf("bad: %#v", resp.Jobs)
@@ -488,12 +513,12 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 
 	// Job deletion triggers watches
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.DeleteJob(3, job.ID); err != nil {
+		if err := state.DeleteJob(200, job.ID); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
-	req.MinQueryIndex = 2
+	req.MinQueryIndex = 150
 	start = time.Now()
 	var resp2 structs.JobListResponse
 	if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil {
@@ -503,8 +528,8 @@ func TestJobEndpoint_ListJobs_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
-	if resp2.Index != 3 {
-		t.Fatalf("Bad index: %d %d", resp2.Index, 3)
+	if resp2.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 200)
 	}
 	if len(resp2.Jobs) != 0 {
 		t.Fatalf("bad: %#v", resp2.Jobs)
@@ -546,7 +571,7 @@ func TestJobEndpoint_Allocations(t *testing.T) {
 	}
 }
 
-func TestJobEndpoint_Allocations_blocking(t *testing.T) {
+func TestJobEndpoint_Allocations_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
@@ -560,7 +585,7 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 
 	// First upsert an unrelated alloc
 	time.AfterFunc(100*time.Millisecond, func() {
-		err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1})
+		err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -568,7 +593,7 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 
 	// Upsert an alloc for the job we are interested in later
 	time.AfterFunc(200*time.Millisecond, func() {
-		err := state.UpsertAllocs(2000, []*structs.Allocation{alloc2})
+		err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
@@ -579,7 +604,7 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 		JobID: "job1",
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.JobAllocationsResponse
@@ -591,8 +616,8 @@ func TestJobEndpoint_Allocations_blocking(t *testing.T) {
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" {
 		t.Fatalf("bad: %#v", resp.Allocations)
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index f28cc3930..5bd600380 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -305,8 +305,8 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Node = out
 			if out != nil {
-				reply.Node = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 8d53c0a66..9a74316c7 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -371,7 +371,7 @@ func TestClientEndpoint_GetNode(t *testing.T) {
 	}
 }
 
-func TestClientEndpoint_GetNode_blocking(t *testing.T) {
+func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()
@@ -384,41 +384,92 @@ func TestClientEndpoint_GetNode_blocking(t *testing.T) {
 
 	// First create an unrelated node.
 	time.AfterFunc(100*time.Millisecond, func() {
-		if err := state.UpsertNode(1000, node1); err != nil {
+		if err := state.UpsertNode(100, node1); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Upsert the node we are watching later
 	time.AfterFunc(200*time.Millisecond, func() {
-		if err := state.UpsertNode(2000, node2); err != nil {
+		if err := state.UpsertNode(200, node2); err != nil {
 			t.Fatalf("err: %v", err)
 		}
 	})
 
 	// Lookup the node
-	get := &structs.NodeSpecificRequest{
+	req := &structs.NodeSpecificRequest{
 		NodeID: node2.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
-			MinQueryIndex: 1,
+			MinQueryIndex: 50,
 		},
 	}
 	var resp structs.SingleNodeResponse
 	start := time.Now()
-	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", get, &resp); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
-	if resp.Index != 2000 {
-		t.Fatalf("Bad index: %d %d", resp.Index, 2000)
+	if resp.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp.Index, 200)
 	}
 	if resp.Node == nil || resp.Node.ID != node2.ID {
 		t.Fatalf("bad: %#v", resp.Node)
 	}
+
+	// Node update triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		nodeUpdate := mock.Node()
+		nodeUpdate.ID = node2.ID
+		nodeUpdate.Status = structs.NodeStatusDown
+		if err := state.UpsertNode(300, nodeUpdate); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 250
+	var resp2 structs.SingleNodeResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp2.Index != 300 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 300)
+	}
+	if resp2.Node == nil || resp2.Node.Status != structs.NodeStatusDown {
+		t.Fatalf("bad: %#v", resp2.Node)
+	}
+
+	// Node delete triggers watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		if err := state.DeleteNode(400, node2.ID); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 350
+	var resp3 structs.SingleNodeResponse
+	start = time.Now()
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
+	}
+	if resp3.Index != 400 {
+		t.Fatalf("Bad index: %d %d", resp2.Index, 400)
+	}
+	if resp3.Node != nil {
+		t.Fatalf("bad: %#v", resp3.Node)
+	}
 }
 
 func TestClientEndpoint_GetAllocs(t *testing.T) {
@@ -507,16 +558,15 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 	alloc.NodeID = node.ID
 	state := s1.fsm.State()
 	start := time.Now()
-	go func() {
-		time.Sleep(100 * time.Millisecond)
+	time.AfterFunc(100*time.Millisecond, func() {
 		err := state.UpsertAllocs(100, []*structs.Allocation{alloc})
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	}()
+	})
 
 	// Lookup the allocs in a blocking query
-	get := &structs.NodeSpecificRequest{
+	req := &structs.NodeSpecificRequest{
 		NodeID: node.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:        "global",
@@ -525,7 +575,7 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 		},
 	}
 	var resp2 structs.NodeAllocsResponse
-	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", get, &resp2); err != nil {
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp2); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
@@ -541,6 +591,34 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
 	if len(resp2.Allocs) != 1 || resp2.Allocs[0].ID != alloc.ID {
 		t.Fatalf("bad: %#v", resp2.Allocs)
 	}
+
+	// Alloc updates fire watches
+	time.AfterFunc(100*time.Millisecond, func() {
+		allocUpdate := mock.Alloc()
+		allocUpdate.NodeID = alloc.NodeID
+		allocUpdate.ID = alloc.ID
+		allocUpdate.ClientStatus = structs.AllocClientStatusRunning
+		err := state.UpdateAllocFromClient(200, allocUpdate)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	})
+
+	req.QueryOptions.MinQueryIndex = 150
+	var resp3 structs.NodeAllocsResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp3); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if time.Since(start) < 100*time.Millisecond {
+		t.Fatalf("too fast")
+	}
+	if resp3.Index != 200 {
+		t.Fatalf("Bad index: %d %d", resp3.Index, 200)
+	}
+	if len(resp3.Allocs) != 1 || resp3.Allocs[0].ClientStatus != structs.AllocClientStatusRunning {
+		t.Fatalf("bad: %#v", resp3.Allocs[0])
+	}
 }
 
 func TestClientEndpoint_UpdateAlloc(t *testing.T) {
@@ -803,7 +881,7 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
 	}
 }
 
-func TestClientEndpoint_ListNodes_blocking(t *testing.T) {
+func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
 	state := s1.fsm.State()

From ae4156d2b3f3d1c70c0db006c2cc8653f126a68e Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Thu, 29 Oct 2015 21:42:41 -0700
Subject: [PATCH 27/92] nomad/state: add watch trigger tests

---
 nomad/state/state_store.go      |   3 +
 nomad/state/state_store_test.go | 249 ++++++++++++++++++++++++--------
 2 files changed, 191 insertions(+), 61 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index f4f97489a..47ead285e 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -765,6 +765,7 @@ func (s *StateRestore) Commit() {
 // NodeRestore is used to restore a node
 func (r *StateRestore) NodeRestore(node *structs.Node) error {
 	r.items.Add(watch.Item{Table: "nodes"})
+	r.items.Add(watch.Item{Node: node.ID})
 	if err := r.txn.Insert("nodes", node); err != nil {
 		return fmt.Errorf("node insert failed: %v", err)
 	}
@@ -774,6 +775,7 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
 // JobRestore is used to restore a job
 func (r *StateRestore) JobRestore(job *structs.Job) error {
 	r.items.Add(watch.Item{Table: "jobs"})
+	r.items.Add(watch.Item{Job: job.ID})
 	if err := r.txn.Insert("jobs", job); err != nil {
 		return fmt.Errorf("job insert failed: %v", err)
 	}
@@ -783,6 +785,7 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
 // EvalRestore is used to restore an evaluation
 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 	r.items.Add(watch.Item{Table: "evals"})
+	r.items.Add(watch.Item{Eval: eval.ID})
 	if err := r.txn.Insert("evals", eval); err != nil {
 		return fmt.Errorf("eval insert failed: %v", err)
 	}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 2d1134d9c..788b9f26a 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -26,6 +26,12 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -47,12 +53,20 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_DeleteNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -79,12 +93,20 @@ func TestStateStore_DeleteNode_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -114,12 +136,20 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertNode(1000, node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -149,6 +179,8 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Nodes(t *testing.T) {
@@ -189,18 +221,23 @@ func TestStateStore_Nodes(t *testing.T) {
 
 func TestStateStore_RestoreNode(t *testing.T) {
 	state := testStateStore(t)
+	node := mock.Node()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "nodes"}},
+		{desc: "node", item: watch.Item{Node: node.ID}},
+	}
+	notify.start(state)
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	node := mock.Node()
 	err = restore.NodeRestore(node)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()
 
 	out, err := state.NodeByID(node.ID)
@@ -211,12 +248,20 @@ func TestStateStore_RestoreNode(t *testing.T) {
 	if !reflect.DeepEqual(out, node) {
 		t.Fatalf("Bad: %#v %#v", out, node)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -238,12 +283,20 @@ func TestStateStore_UpsertJob_Job(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -279,12 +332,20 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_DeleteJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertJob(1000, job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -311,6 +372,8 @@ func TestStateStore_DeleteJob_Job(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Jobs(t *testing.T) {
@@ -418,18 +481,23 @@ func TestStateStore_JobsByScheduler(t *testing.T) {
 
 func TestStateStore_RestoreJob(t *testing.T) {
 	state := testStateStore(t)
+	job := mock.Job()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "jobs"}},
+		{desc: "job", item: watch.Item{Job: job.ID}},
+	}
+	notify.start(state)
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	job := mock.Job()
 	err = restore.JobRestore(job)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()
 
 	out, err := state.JobByID(job.ID)
@@ -440,6 +508,8 @@ func TestStateStore_RestoreJob(t *testing.T) {
 	if !reflect.DeepEqual(out, job) {
 		t.Fatalf("Bad: %#v %#v", out, job)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Indexes(t *testing.T) {
@@ -504,6 +574,12 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval", item: watch.Item{Eval: eval.ID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -525,6 +601,8 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
@@ -536,6 +614,12 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval", item: watch.Item{Eval: eval.ID}},
+	}
+	notify.start(state)
+
 	eval2 := mock.Eval()
 	eval2.ID = eval.ID
 	err = state.UpsertEvals(1001, []*structs.Evaluation{eval2})
@@ -566,40 +650,50 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	state := testStateStore(t)
-	eval := mock.Eval()
+	eval1 := mock.Eval()
 	eval2 := mock.Eval()
-	alloc := mock.Alloc()
+	alloc1 := mock.Alloc()
 	alloc2 := mock.Alloc()
 
-	err := state.UpsertEvals(1000, []*structs.Evaluation{eval, eval2})
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval1", item: watch.Item{Eval: eval1.ID}},
+		{desc: "eval2", item: watch.Item{Eval: eval2.ID}},
+		{desc: "alloc1", item: watch.Item{Alloc: alloc1.ID}},
+		{desc: "alloc2", item: watch.Item{Alloc: alloc2.ID}},
+		{desc: "allocnode1", item: watch.Item{AllocNode: alloc1.NodeID}},
+		{desc: "allocnode2", item: watch.Item{AllocNode: alloc2.NodeID}},
+	}
+	notify.start(state)
+
+	err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
+	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc1, alloc2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	notify1 := make(chan struct{}, 1)
-	state.Watch(watch.NewItems(watch.Item{AllocNode: alloc.NodeID}), notify1)
-
-	err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
+	err = state.DeleteEval(1002, []string{eval1.ID, eval2.ID}, []string{alloc1.ID, alloc2.ID})
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	out, err := state.EvalByID(eval.ID)
+	out, err := state.EvalByID(eval1.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", eval, out)
+		t.Fatalf("bad: %#v %#v", eval1, out)
 	}
 
 	out, err = state.EvalByID(eval2.ID)
@@ -608,16 +702,16 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", eval, out)
+		t.Fatalf("bad: %#v %#v", eval1, out)
 	}
 
-	outA, err := state.AllocByID(alloc.ID)
+	outA, err := state.AllocByID(alloc1.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", alloc, outA)
+		t.Fatalf("bad: %#v %#v", alloc1, outA)
 	}
 
 	outA, err = state.AllocByID(alloc2.ID)
@@ -626,7 +720,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	}
 
 	if out != nil {
-		t.Fatalf("bad: %#v %#v", alloc, outA)
+		t.Fatalf("bad: %#v %#v", alloc1, outA)
 	}
 
 	index, err := state.Index("evals")
@@ -645,11 +739,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 		t.Fatalf("bad: %d", index)
 	}
 
-	select {
-	case <-notify1:
-	default:
-		t.Fatalf("should be notified")
-	}
+	notify.verify(t)
 }
 
 func TestStateStore_EvalsByJob(t *testing.T) {
@@ -721,34 +811,50 @@ func TestStateStore_Evals(t *testing.T) {
 
 func TestStateStore_RestoreEval(t *testing.T) {
 	state := testStateStore(t)
+	eval := mock.Eval()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "evals"}},
+		{desc: "eval", item: watch.Item{Eval: eval.ID}},
+	}
+	notify.start(state)
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	job := mock.Eval()
-	err = restore.EvalRestore(job)
+	err = restore.EvalRestore(eval)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-
 	restore.Commit()
 
-	out, err := state.EvalByID(job.ID)
+	out, err := state.EvalByID(eval.ID)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	if !reflect.DeepEqual(out, job) {
-		t.Fatalf("Bad: %#v %#v", out, job)
+	if !reflect.DeepEqual(out, eval) {
+		t.Fatalf("Bad: %#v %#v", out, eval)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	state := testStateStore(t)
-
 	alloc := mock.Alloc()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "allocs"}},
+		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
+		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
+		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
+		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -780,12 +886,23 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	state := testStateStore(t)
-
 	alloc := mock.Alloc()
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "allocs"}},
+		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
+		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
+		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
+		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
+	}
+	notify.start(state)
+
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -807,37 +924,8 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	if index != 1000 {
 		t.Fatalf("bad: %d", index)
 	}
-}
 
-func TestStateStore_Watch(t *testing.T) {
-	state := testStateStore(t)
-
-	notify1 := make(chan struct{}, 1)
-	notify2 := make(chan struct{}, 1)
-
-	items := watch.NewItems(watch.Item{AllocNode: "foo"})
-	state.Watch(items, notify1)
-	state.Watch(items, notify2)
-	state.StopWatch(items, notify2)
-
-	alloc := mock.Alloc()
-	alloc.NodeID = "foo"
-	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-
-	select {
-	case <-notify1:
-	default:
-		t.Fatalf("should be notified")
-	}
-
-	select {
-	case <-notify2:
-		t.Fatalf("should not be notified")
-	default:
-	}
+	notify.verify(t)
 }
 
 func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
@@ -852,6 +940,16 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	alloc2 := mock.Alloc()
 	alloc2.ID = alloc.ID
 	alloc2.NodeID = alloc.NodeID + ".new"
+
+	notify := notifyTest{
+		{desc: "table", item: watch.Item{Table: "allocs"}},
+		{desc: "alloc", item: watch.Item{Alloc: alloc2.ID}},
+		{desc: "alloceval", item: watch.Item{AllocEval: alloc2.EvalID}},
+		{desc: "allocjob", item: watch.Item{AllocJob: alloc2.JobID}},
+		{desc: "allocnode", item: watch.Item{AllocNode: alloc2.NodeID}},
+	}
+	notify.start(state)
+
 	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2})
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -880,6 +978,8 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	if index != 1001 {
 		t.Fatalf("bad: %d", index)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateStore_EvictAlloc_Alloc(t *testing.T) {
@@ -1078,6 +1178,33 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	}
 }
 
+// notifyTestCase is used to set up and verify watch triggers.
+type notifyTestCase struct {
+	desc string
+	item watch.Item
+	ch   chan struct{}
+}
+
+// notifyTest is a suite of notifyTestCases.
+type notifyTest []*notifyTestCase
+
+// start creates the notify channels and subscribes them.
+func (n notifyTest) start(state *StateStore) {
+	for _, tcase := range n {
+		tcase.ch = make(chan struct{}, 1)
+		state.Watch(watch.NewItems(tcase.item), tcase.ch)
+	}
+}
+
+// verify ensures that each channel received a notification.
+func (n notifyTest) verify(t *testing.T) {
+	for _, tcase := range n {
+		if len(tcase.ch) != 1 {
+			t.Fatalf("should notify %s", tcase.desc)
+		}
+	}
+}
+
 // NodeIDSort is used to sort nodes by ID
 type NodeIDSort []*structs.Node
 

From 284c2e2f2b07ed976422d877f821e77e887147fd Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Fri, 30 Oct 2015 08:27:47 -0700
Subject: [PATCH 28/92] nomad: cleanup and more tests

---
 nomad/alloc_endpoint.go         |   2 +-
 nomad/alloc_endpoint_test.go    |   6 +-
 nomad/eval_endpoint_test.go     |  10 +-
 nomad/job_endpoint_test.go      |  10 +-
 nomad/node_endpoint_test.go     |  14 +--
 nomad/state/state_store.go      |  10 +-
 nomad/state/state_store_test.go | 215 ++++++++++++++++----------------
 7 files changed, 139 insertions(+), 128 deletions(-)

diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go
index e8b6af63c..c07d5549d 100644
--- a/nomad/alloc_endpoint.go
+++ b/nomad/alloc_endpoint.go
@@ -86,8 +86,8 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
 			}
 
 			// Setup the output
+			reply.Alloc = out
 			if out != nil {
-				reply.Alloc = out
 				reply.Index = out.ModifyIndex
 			} else {
 				// Use the last index that affected the nodes table
diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go
index 4147011ac..bcab0a387 100644
--- a/nomad/alloc_endpoint_test.go
+++ b/nomad/alloc_endpoint_test.go
@@ -74,7 +74,7 @@ func TestAllocEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 2 {
@@ -101,7 +101,7 @@ func TestAllocEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
@@ -186,7 +186,7 @@ func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go
index 6f3d154e5..55782a031 100644
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -92,7 +92,7 @@ func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
@@ -117,7 +117,7 @@ func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 300 {
@@ -440,7 +440,7 @@ func TestEvalEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 2 {
@@ -464,7 +464,7 @@ func TestEvalEndpoint_List_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
@@ -551,7 +551,7 @@ func TestEvalEndpoint_Allocations_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 9e09de538..c12e5b463 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -402,7 +402,7 @@ func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
@@ -427,7 +427,7 @@ func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 300 {
@@ -501,7 +501,7 @@ func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 100 {
@@ -525,7 +525,7 @@ func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 200 {
@@ -613,7 +613,7 @@ func TestJobEndpoint_Allocations_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go
index 9a74316c7..74b154655 100644
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -410,7 +410,7 @@ func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 200*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 200 {
@@ -437,7 +437,7 @@ func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp2.Index != 300 {
@@ -461,7 +461,7 @@ func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp3.Index != 400 {
@@ -910,7 +910,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
 	}
 	if resp.Index != 2 {
@@ -934,7 +934,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
 	}
 	if resp2.Index != 3 {
@@ -958,7 +958,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp3)
 	}
 	if resp3.Index != 4 {
@@ -982,7 +982,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	if elapsed := time.Now().Sub(start); elapsed < 100*time.Millisecond {
+	if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
 		t.Fatalf("should block (returned in %s) %#v", elapsed, resp4)
 	}
 	if resp4.Index != 5 {
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 47ead285e..ec5aef29f 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -455,8 +455,11 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
 		if err := txn.Delete("allocs", existing); err != nil {
 			return fmt.Errorf("alloc delete failed: %v", err)
 		}
-		watcher.Add(watch.Item{Alloc: alloc})
-		watcher.Add(watch.Item{AllocNode: existing.(*structs.Allocation).NodeID})
+		realAlloc := existing.(*structs.Allocation)
+		watcher.Add(watch.Item{Alloc: realAlloc.ID})
+		watcher.Add(watch.Item{AllocEval: realAlloc.EvalID})
+		watcher.Add(watch.Item{AllocJob: realAlloc.JobID})
+		watcher.Add(watch.Item{AllocNode: realAlloc.NodeID})
 	}
 
 	// Update the indexes
@@ -795,6 +798,9 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
 // AllocRestore is used to restore an allocation
 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
 	r.items.Add(watch.Item{Table: "allocs"})
+	r.items.Add(watch.Item{Alloc: alloc.ID})
+	r.items.Add(watch.Item{AllocEval: alloc.EvalID})
+	r.items.Add(watch.Item{AllocJob: alloc.JobID})
 	r.items.Add(watch.Item{AllocNode: alloc.NodeID})
 	if err := r.txn.Insert("allocs", alloc); err != nil {
 		return fmt.Errorf("alloc insert failed: %v", err)
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 788b9f26a..2a5967450 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -26,11 +26,10 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -61,11 +60,10 @@ func TestStateStore_DeleteNode_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -101,11 +99,10 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -144,11 +141,10 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	err := state.UpsertNode(1000, node)
 	if err != nil {
@@ -223,11 +219,10 @@ func TestStateStore_RestoreNode(t *testing.T) {
 	state := testStateStore(t)
 	node := mock.Node()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "nodes"}},
-		{desc: "node", item: watch.Item{Node: node.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "nodes"},
+		watch.Item{Node: node.ID})
 
 	restore, err := state.Restore()
 	if err != nil {
@@ -256,11 +251,10 @@ func TestStateStore_UpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	err := state.UpsertJob(1000, job)
 	if err != nil {
@@ -291,11 +285,10 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	err := state.UpsertJob(1000, job)
 	if err != nil {
@@ -340,11 +333,10 @@ func TestStateStore_DeleteJob_Job(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	err := state.UpsertJob(1000, job)
 	if err != nil {
@@ -483,11 +475,10 @@ func TestStateStore_RestoreJob(t *testing.T) {
 	state := testStateStore(t)
 	job := mock.Job()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "jobs"}},
-		{desc: "job", item: watch.Item{Job: job.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "jobs"},
+		watch.Item{Job: job.ID})
 
 	restore, err := state.Restore()
 	if err != nil {
@@ -574,11 +565,10 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval", item: watch.Item{Eval: eval.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
 
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
 	if err != nil {
@@ -614,11 +604,10 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval", item: watch.Item{Eval: eval.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
 
 	eval2 := mock.Eval()
 	eval2.ID = eval.ID
@@ -661,16 +650,19 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	alloc1 := mock.Alloc()
 	alloc2 := mock.Alloc()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval1", item: watch.Item{Eval: eval1.ID}},
-		{desc: "eval2", item: watch.Item{Eval: eval2.ID}},
-		{desc: "alloc1", item: watch.Item{Alloc: alloc1.ID}},
-		{desc: "alloc2", item: watch.Item{Alloc: alloc2.ID}},
-		{desc: "allocnode1", item: watch.Item{AllocNode: alloc1.NodeID}},
-		{desc: "allocnode2", item: watch.Item{AllocNode: alloc2.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval1.ID},
+		watch.Item{Eval: eval2.ID},
+		watch.Item{Alloc: alloc1.ID},
+		watch.Item{Alloc: alloc2.ID},
+		watch.Item{AllocEval: alloc1.EvalID},
+		watch.Item{AllocEval: alloc2.EvalID},
+		watch.Item{AllocJob: alloc1.JobID},
+		watch.Item{AllocJob: alloc2.JobID},
+		watch.Item{AllocNode: alloc1.NodeID},
+		watch.Item{AllocNode: alloc2.NodeID})
 
 	err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2})
 	if err != nil {
@@ -813,11 +805,10 @@ func TestStateStore_RestoreEval(t *testing.T) {
 	state := testStateStore(t)
 	eval := mock.Eval()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "evals"}},
-		{desc: "eval", item: watch.Item{Eval: eval.ID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "evals"},
+		watch.Item{Eval: eval.ID})
 
 	restore, err := state.Restore()
 	if err != nil {
@@ -846,14 +837,13 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) {
 	state := testStateStore(t)
 	alloc := mock.Alloc()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "allocs"}},
-		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
-		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
-		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
-		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
 
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
@@ -894,14 +884,13 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
 	state := testStateStore(t)
 	alloc := mock.Alloc()
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "allocs"}},
-		{desc: "alloc", item: watch.Item{Alloc: alloc.ID}},
-		{desc: "alloceval", item: watch.Item{AllocEval: alloc.EvalID}},
-		{desc: "allocjob", item: watch.Item{AllocJob: alloc.JobID}},
-		{desc: "allocnode", item: watch.Item{AllocNode: alloc.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
 
 	err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
 	if err != nil {
@@ -941,14 +930,13 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
 	alloc2.ID = alloc.ID
 	alloc2.NodeID = alloc.NodeID + ".new"
 
-	notify := notifyTest{
-		{desc: "table", item: watch.Item{Table: "allocs"}},
-		{desc: "alloc", item: watch.Item{Alloc: alloc2.ID}},
-		{desc: "alloceval", item: watch.Item{AllocEval: alloc2.EvalID}},
-		{desc: "allocjob", item: watch.Item{AllocJob: alloc2.JobID}},
-		{desc: "allocnode", item: watch.Item{AllocNode: alloc2.NodeID}},
-	}
-	notify.start(state)
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc2.ID},
+		watch.Item{AllocEval: alloc2.EvalID},
+		watch.Item{AllocJob: alloc2.JobID},
+		watch.Item{AllocNode: alloc2.NodeID})
 
 	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2})
 	if err != nil {
@@ -1111,13 +1099,21 @@ func TestStateStore_Allocs(t *testing.T) {
 
 func TestStateStore_RestoreAlloc(t *testing.T) {
 	state := testStateStore(t)
+	alloc := mock.Alloc()
+
+	notify := setupNotifyTest(
+		state,
+		watch.Item{Table: "allocs"},
+		watch.Item{Alloc: alloc.ID},
+		watch.Item{AllocEval: alloc.EvalID},
+		watch.Item{AllocJob: alloc.JobID},
+		watch.Item{AllocNode: alloc.NodeID})
 
 	restore, err := state.Restore()
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
-	alloc := mock.Alloc()
 	err = restore.AllocRestore(alloc)
 	if err != nil {
 		t.Fatalf("err: %v", err)
@@ -1133,6 +1129,8 @@ func TestStateStore_RestoreAlloc(t *testing.T) {
 	if !reflect.DeepEqual(out, alloc) {
 		t.Fatalf("Bad: %#v %#v", out, alloc)
 	}
+
+	notify.verify(t)
 }
 
 func TestStateWatch_watch(t *testing.T) {
@@ -1172,15 +1170,30 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	// Unsubscribe stop notifications
 	sw.stopWatch(watch.Item{Table: "foo"}, notify)
 
+	// Check that the group was removed
+	if _, ok := sw.items[watch.Item{Table: "foo"}]; ok {
+		t.Fatalf("should remove group")
+	}
+
+	// Check that we are not notified
 	sw.notify(watch.NewItems(watch.Item{Table: "foo"}))
 	if len(notify) != 0 {
 		t.Fatalf("should not notify")
 	}
 }
 
+func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest {
+	var n notifyTest
+	for _, item := range items {
+		ch := make(chan struct{}, 1)
+		state.Watch(watch.NewItems(item), ch)
+		n = append(n, &notifyTestCase{item, ch})
+	}
+	return n
+}
+
 // notifyTestCase is used to set up and verify watch triggers.
 type notifyTestCase struct {
-	desc string
 	item watch.Item
 	ch   chan struct{}
 }
@@ -1188,19 +1201,11 @@ type notifyTestCase struct {
 // notifyTest is a suite of notifyTestCases.
 type notifyTest []*notifyTestCase
 
-// start creates the notify channels and subscribes them.
-func (n notifyTest) start(state *StateStore) {
-	for _, tcase := range n {
-		tcase.ch = make(chan struct{}, 1)
-		state.Watch(watch.NewItems(tcase.item), tcase.ch)
-	}
-}
-
 // verify ensures that each channel received a notification.
 func (n notifyTest) verify(t *testing.T) {
 	for _, tcase := range n {
 		if len(tcase.ch) != 1 {
-			t.Fatalf("should notify %s", tcase.desc)
+			t.Fatalf("should notify %#v", tcase.item)
 		}
 	}
 }

From 5f53478137ec01b71cda970f3cba1f1e4d5d2944 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Fri, 30 Oct 2015 08:42:23 -0700
Subject: [PATCH 29/92] nomad/state: subscribe/unsubscribe all watch items
 while holding the lock

---
 nomad/state/state_store.go      | 40 +++++++++++++++++----------------
 nomad/state/state_store_test.go | 13 ++++++-----
 nomad/watch/watch.go            |  1 +
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index ec5aef29f..30ee87259 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -75,16 +75,14 @@ func (s *StateStore) Restore() (*StateRestore, error) {
 	return r, nil
 }
 
+// Watch subscribes a channel to a set of watch items.
 func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
-	for wi, _ := range items {
-		s.watch.watch(wi, notify)
-	}
+	s.watch.watch(items, notify)
 }
 
+// StopWatch unsubscribes a channel from a set of watch items.
 func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
-	for wi, _ := range items {
-		s.watch.stopWatch(wi, notify)
-	}
+	s.watch.stopWatch(items, notify)
 }
 
 // UpsertNode is used to register a node or update a node definition
@@ -830,28 +828,32 @@ func newStateWatch() *stateWatch {
 	}
 }
 
-// watch subscribes a channel to the given watch item.
-func (w *stateWatch) watch(wi watch.Item, ch chan struct{}) {
+// watch subscribes a channel to the given watch items.
+func (w *stateWatch) watch(items watch.Items, ch chan struct{}) {
 	w.l.Lock()
 	defer w.l.Unlock()
 
-	grp, ok := w.items[wi]
-	if !ok {
-		grp = new(NotifyGroup)
-		w.items[wi] = grp
+	for item, _ := range items {
+		grp, ok := w.items[item]
+		if !ok {
+			grp = new(NotifyGroup)
+			w.items[item] = grp
+		}
+		grp.Wait(ch)
 	}
-	grp.Wait(ch)
 }
 
-// stopWatch unsubscribes a channel from the given watch item.
-func (w *stateWatch) stopWatch(wi watch.Item, ch chan struct{}) {
+// stopWatch unsubscribes a channel from the given watch items.
+func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) {
 	w.l.Lock()
 	defer w.l.Unlock()
 
-	if grp, ok := w.items[wi]; ok {
-		grp.Clear(ch)
-		if grp.Empty() {
-			delete(w.items, wi)
+	for item, _ := range items {
+		if grp, ok := w.items[item]; ok {
+			grp.Clear(ch)
+			if grp.Empty() {
+				delete(w.items, item)
+			}
 		}
 	}
 }
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 2a5967450..5e1021e55 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -653,6 +653,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
 	notify := setupNotifyTest(
 		state,
 		watch.Item{Table: "evals"},
+		watch.Item{Table: "allocs"},
 		watch.Item{Eval: eval1.ID},
 		watch.Item{Eval: eval2.ID},
 		watch.Item{Alloc: alloc1.ID},
@@ -1140,9 +1141,9 @@ func TestStateWatch_watch(t *testing.T) {
 	notify3 := make(chan struct{}, 1)
 
 	// Notifications trigger subscribed channels
-	sw.watch(watch.Item{Table: "foo"}, notify1)
-	sw.watch(watch.Item{Table: "bar"}, notify2)
-	sw.watch(watch.Item{Table: "baz"}, notify3)
+	sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify1)
+	sw.watch(watch.NewItems(watch.Item{Table: "bar"}), notify2)
+	sw.watch(watch.NewItems(watch.Item{Table: "baz"}), notify3)
 
 	items := watch.NewItems()
 	items.Add(watch.Item{Table: "foo"})
@@ -1165,10 +1166,10 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	notify := make(chan struct{})
 
 	// First subscribe
-	sw.watch(watch.Item{Table: "foo"}, notify)
+	sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify)
 
 	// Unsubscribe stop notifications
-	sw.stopWatch(watch.Item{Table: "foo"}, notify)
+	sw.stopWatch(watch.NewItems(watch.Item{Table: "foo"}), notify)
 
 	// Check that the group was removed
 	if _, ok := sw.items[watch.Item{Table: "foo"}]; ok {
@@ -1182,6 +1183,8 @@ func TestStateWatch_stopWatch(t *testing.T) {
 	}
 }
 
+// setupNotifyTest takes a state store and a set of watch items, then creates
+// and subscribes a notification channel for each item.
 func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest {
 	var n notifyTest
 	for _, item := range items {
diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index c71fe5087..102e535b2 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -21,6 +21,7 @@ type Item struct {
 // the items as they are added using map keys.
 type Items map[Item]struct{}
 
+// NewItems creates a new Items set and adds the given items.
 func NewItems(items ...Item) Items {
 	wi := make(Items)
 	for _, item := range items {

From 8f2bb251578a6419265dd6f3ff03d8fe3336be2a Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Fri, 30 Oct 2015 14:38:51 -0700
Subject: [PATCH 30/92] website: clean up HTTP docs, add blocking queries

---
 website/source/docs/http/alloc.html.md  |   6 +-
 website/source/docs/http/allocs.html.md |   6 +-
 website/source/docs/http/eval.html.md   |  18 +-
 website/source/docs/http/evals.html.md  |   6 +-
 website/source/docs/http/index.html.md  |   1 +
 website/source/docs/http/job.html.md    | 269 ++++++++++++------------
 website/source/docs/http/jobs.html.md   |   6 +-
 website/source/docs/http/node.html.md   |  20 +-
 website/source/docs/http/nodes.html.md  |   7 +-
 9 files changed, 186 insertions(+), 153 deletions(-)

diff --git a/website/source/docs/http/alloc.html.md b/website/source/docs/http/alloc.html.md
index 3c224fd54..822858a8c 100644
--- a/website/source/docs/http/alloc.html.md
+++ b/website/source/docs/http/alloc.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -179,4 +184,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/allocs.html.md b/website/source/docs/http/allocs.html.md
index 44ad8aa7e..b59a4f204 100644
--- a/website/source/docs/http/allocs.html.md
+++ b/website/source/docs/http/allocs.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -56,4 +61,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/eval.html.md b/website/source/docs/http/eval.html.md
index cba43900c..87e048209 100644
--- a/website/source/docs/http/eval.html.md
+++ b/website/source/docs/http/eval.html.md
@@ -3,7 +3,7 @@ layout: "http"
 page_title: "HTTP API: /v1/evaluation"
 sidebar_current: "docs-http-eval-"
 description: |-
-  The '/1/evaluation' endpoint is used to query a specific evaluation.
+  The '/v1/evaluation' endpoint is used to query a specific evaluation.
 ---
 
 # /v1/evaluation
@@ -17,7 +17,7 @@ be specified using the `?region=` query parameter.
 <dl>
   <dt>Description</dt>
   <dd>
-    Lists all the evaluations.
+    Query a specific evaluation.
   </dd>
 
   <dt>Method</dt>
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -57,9 +62,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/evaluation/\<ID\>/allocations
-## GET
-
 <dl>
   <dt>Description</dt>
   <dd>
@@ -77,6 +79,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -102,4 +109,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/evals.html.md b/website/source/docs/http/evals.html.md
index 3bc22da8f..23d98cc95 100644
--- a/website/source/docs/http/evals.html.md
+++ b/website/source/docs/http/evals.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -59,4 +64,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/index.html.md b/website/source/docs/http/index.html.md
index 671d19fa5..7ed3f0dbd 100644
--- a/website/source/docs/http/index.html.md
+++ b/website/source/docs/http/index.html.md
@@ -31,6 +31,7 @@ The API is modeled closely on the underlying data model. Use the links to the le
 documentation about specific endpoints. There are also "Agent" APIs which interact with
 a specific agent and not the broader cluster used for administration.
 
+<a name="blocking-queries"></a>
 ## Blocking Queries
 
 Certain endpoints support a feature called a "blocking query." A blocking query
diff --git a/website/source/docs/http/job.html.md b/website/source/docs/http/job.html.md
index 211963e6a..cbf0f5097 100644
--- a/website/source/docs/http/job.html.md
+++ b/website/source/docs/http/job.html.md
@@ -6,7 +6,7 @@ description: |-
   The '/1/job' endpoint is used for CRUD on a single job.
 ---
 
-# /v1/job/\<ID\>
+# /v1/job
 
 The `job` endpoint is used for CRUD on a single job. By default, the agent's local
 region is used; another region can be specified using the `?region=` query parameter.
@@ -30,6 +30,11 @@ region is used; another region can be specified using the `?region=` query param
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -136,6 +141,105 @@ region is used; another region can be specified using the `?region=` query param
   </dd>
 </dl>
 
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Query the allocations belonging to a single job.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>GET</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<id>/allocations`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    [
+    {
+        "ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
+        "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
+        "Name": "binstore-storagelocker.binsl[0]",
+        "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
+        "JobID": "binstore-storagelocker",
+        "TaskGroup": "binsl",
+        "DesiredStatus": "run",
+        "DesiredDescription": "",
+        "ClientStatus": "running",
+        "ClientDescription": "",
+        "CreateIndex": 16,
+        "ModifyIndex": 16
+    },
+    ...
+    ]
+    ```
+
+  </dd>
+</dl>
+
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Query the evaluations belonging to a single job.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>GET</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<id>/evaluations`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    [
+    {
+        "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
+        "Priority": 50,
+        "Type": "service",
+        "TriggeredBy": "job-register",
+        "JobID": "binstore-storagelocker",
+        "JobModifyIndex": 14,
+        "NodeID": "",
+        "NodeModifyIndex": 0,
+        "Status": "complete",
+        "StatusDescription": "",
+        "Wait": 0,
+        "NextEval": "",
+        "PreviousEval": "",
+        "CreateIndex": 15,
+        "ModifyIndex": 17
+    },
+    ...
+    ]
+    ```
+
+  </dd>
+</dl>
+
 ## PUT / POST
 
 <dl>
@@ -177,6 +281,38 @@ region is used; another region can be specified using the `?region=` query param
   </dd>
 </dl>
 
+<dl>
+  <dt>Description</dt>
+  <dd>
+    Creates a new evaluation for the given job. This can be used to force
+    run the scheduling logic if necessary.
+  </dd>
+
+  <dt>Method</dt>
+  <dd>PUT or POST</dd>
+
+  <dt>URL</dt>
+  <dd>`/v1/job/<ID>/evaluate`</dd>
+
+  <dt>Parameters</dt>
+  <dd>
+    None
+  </dd>
+
+  <dt>Returns</dt>
+  <dd>
+
+    ```javascript
+    {
+    "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
+    "EvalCreateIndex": 35,
+    "JobModifyIndex": 34,
+    }
+    ```
+
+  </dd>
+</dl>
+
 ## DELETE
 
 <dl>
@@ -209,134 +345,3 @@ region is used; another region can be specified using the `?region=` query param
 
   </dd>
 </dl>
-
-# /v1/job/\<ID\>/allocations
-## GET
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Query the allocations belonging to a single job.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>GET</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<id>/allocations`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    [
-    {
-        "ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
-        "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
-        "Name": "binstore-storagelocker.binsl[0]",
-        "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
-        "JobID": "binstore-storagelocker",
-        "TaskGroup": "binsl",
-        "DesiredStatus": "run",
-        "DesiredDescription": "",
-        "ClientStatus": "running",
-        "ClientDescription": "",
-        "CreateIndex": 16,
-        "ModifyIndex": 16
-    },
-    ...
-    ]
-    ```
-
-  </dd>
-</dl>
-
-# /v1/job/\<ID\>/evaluate
-## PUT / POST
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Creates a new evaluation for the given job. This can be used to force
-    run the scheduling logic if necessary.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>PUT or POST</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<ID>/evaluate`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    {
-    "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
-    "EvalCreateIndex": 35,
-    "JobModifyIndex": 34,
-    }
-    ```
-
-  </dd>
-</dl>
-
-# /v1/job/\<ID\>/evaluations
-## GET
-
-<dl>
-  <dt>Description</dt>
-  <dd>
-    Query the evaluations belonging to a single job.
-  </dd>
-
-  <dt>Method</dt>
-  <dd>GET</dd>
-
-  <dt>URL</dt>
-  <dd>`/v1/job/<id>/evaluations`</dd>
-
-  <dt>Parameters</dt>
-  <dd>
-    None
-  </dd>
-
-  <dt>Returns</dt>
-  <dd>
-
-    ```javascript
-    [
-    {
-        "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
-        "Priority": 50,
-        "Type": "service",
-        "TriggeredBy": "job-register",
-        "JobID": "binstore-storagelocker",
-        "JobModifyIndex": 14,
-        "NodeID": "",
-        "NodeModifyIndex": 0,
-        "Status": "complete",
-        "StatusDescription": "",
-        "Wait": 0,
-        "NextEval": "",
-        "PreviousEval": "",
-        "CreateIndex": 15,
-        "ModifyIndex": 17
-    },
-    ...
-    ]
-    ```
-
-  </dd>
-</dl>
-
diff --git a/website/source/docs/http/jobs.html.md b/website/source/docs/http/jobs.html.md
index f724ce0ac..8f098b1ca 100644
--- a/website/source/docs/http/jobs.html.md
+++ b/website/source/docs/http/jobs.html.md
@@ -31,6 +31,11 @@ another region can be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -93,4 +98,3 @@ another region can be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/node.html.md b/website/source/docs/http/node.html.md
index f16131f97..df09426d6 100644
--- a/website/source/docs/http/node.html.md
+++ b/website/source/docs/http/node.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -82,9 +87,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/node/\<ID\>/allocations
-## GET
-
 <dl>
   <dt>Description</dt>
   <dd>
@@ -102,6 +104,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -128,7 +135,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/node/\<ID\>/evaluate
 ## PUT / POST
 
 <dl>
@@ -163,9 +169,6 @@ be specified using the `?region=` query parameter.
   </dd>
 </dl>
 
-# /v1/node/\<ID\>/drain
-## PUT / POST
-
 <dl>
   <dt>Description</dt>
   <dd>
@@ -175,7 +178,7 @@ be specified using the `?region=` query parameter.
   </dd>
 
   <dt>Method</dt>
-  <dd>PUT or POSt</dd>
+  <dd>PUT or POST</dd>
 
   <dt>URL</dt>
   <dd>`/v1/node/<ID>/drain`</dd>
@@ -205,4 +208,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
diff --git a/website/source/docs/http/nodes.html.md b/website/source/docs/http/nodes.html.md
index 36fa96fcd..b8e2b91a9 100644
--- a/website/source/docs/http/nodes.html.md
+++ b/website/source/docs/http/nodes.html.md
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
     None
   </dd>
 
+  <dt>Blocking Queries</dt>
+  <dd>
+    [Supported](/docs/http/index.html#blocking-queries)
+  </dd>
+
   <dt>Returns</dt>
   <dd>
 
@@ -53,5 +58,3 @@ be specified using the `?region=` query parameter.
 
   </dd>
 </dl>
-
-

From 83695cb5d10c09acc1e335081815b79d86e59e4f Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 15:51:39 -0700
Subject: [PATCH 31/92] Added support for parsing restart blocks

---
 command/init.go                 | 11 +++++
 jobspec/parse.go                | 83 +++++++++++++++++++++++++++++++--
 jobspec/parse_test.go           |  5 ++
 jobspec/test-fixtures/basic.hcl |  5 ++
 nomad/structs/structs.go        | 39 ++++++++++++++++
 5 files changed, 139 insertions(+), 4 deletions(-)

diff --git a/command/init.go b/command/init.go
index 0b9be934b..8827f5e9d 100644
--- a/command/init.go
+++ b/command/init.go
@@ -104,6 +104,17 @@ job "example" {
 		# Defaults to 1
 		# count = 1
 
+		# Restart Policy - This block defines the restart policy for TaskGroups
+		# attempts defines the number of restarts Nomad will do if Tasks
+		# in this TaskGroup fails in a rolling window of interval duration
+		# The delay value makes Nomad wait for that duration to restart after a Task
+		# fails or crashes.
+		restart {
+			interval = 5m
+			attempts = 10
+			delay = 25s
+		}
+
 		# Define a task to run
 		task "redis" {
 			# Use Docker to run the task.
diff --git a/jobspec/parse.go b/jobspec/parse.go
index f63ac5294..c3c71ac9c 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -30,6 +30,7 @@ func Parse(r io.Reader) (*structs.Job, error) {
 
 	// Parse the buffer
 	obj, err := hcl.Parse(buf.String())
+
 	if err != nil {
 		return nil, fmt.Errorf("error parsing: %s", err)
 	}
@@ -124,7 +125,7 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
 		}
 	}
 
-	// If we have tasks outside, do those
+	// If we have tasks outside, create TaskGroups for them
 	if o := obj.Get("task", false); o != nil {
 		var tasks []*structs.Task
 		if err := parseTasks(&tasks, o); err != nil {
@@ -134,9 +135,10 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
 		result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2)
 		for i, t := range tasks {
 			result.TaskGroups[i] = &structs.TaskGroup{
-				Name:  t.Name,
-				Count: 1,
-				Tasks: []*structs.Task{t},
+				Name:          t.Name,
+				Count:         1,
+				Tasks:         []*structs.Task{t},
+				RestartPolicy: structs.NewRestartPolicy(result.Type),
 			}
 		}
 	}
@@ -180,6 +182,7 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 		delete(m, "constraint")
 		delete(m, "meta")
 		delete(m, "task")
+		delete(m, "restart")
 
 		// Default count to 1 if not specified
 		if _, ok := m["count"]; !ok {
@@ -200,6 +203,10 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 			}
 		}
 
+		if err := parseRestartPolicy(structs.NewRestartPolicy(result.Type), o); err != nil {
+			return err
+		}
+
 		// Parse out meta fields. These are in HCL as a list so we need
 		// to iterate over them and merge them.
 		if metaO := o.Get("meta", false); metaO != nil {
@@ -228,6 +235,42 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 	return nil
 }
 
+func parseRestartPolicy(result *structs.RestartPolicy, obj *hclobj.Object) error {
+	var restartHclObj *hclobj.Object
+	var m map[string]interface{}
+	if restartHclObj = obj.Get("restart", false); restartHclObj == nil {
+		return nil
+	}
+	if err := hcl.DecodeObject(&m, restartHclObj); err != nil {
+		return err
+	}
+
+	if delay, ok := m["delay"]; ok {
+		d, err := toDuration(delay)
+		if err != nil {
+			return fmt.Errorf("Invalid Delay time in restart policy: %v", err)
+		}
+		result.Delay = d
+	}
+
+	if interval, ok := m["interval"]; ok {
+		i, err := toDuration(interval)
+		if err != nil {
+			return fmt.Errorf("Invalid Interval time in restart policy: %v", err)
+		}
+		result.Interval = i
+	}
+
+	if attempts, ok := m["attempts"]; ok {
+		a, err := toInteger(attempts)
+		if err != nil {
+			return fmt.Errorf("Invalid value in attempts: %v", err)
+		}
+		result.Attempts = a
+	}
+	return nil
+}
+
 func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error {
 	for _, o := range obj.Elem(false) {
 		var m map[string]interface{}
@@ -477,3 +520,35 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
 	}
 	return nil
 }
+
+func toDuration(value interface{}) (time.Duration, error) {
+	var dur time.Duration
+	var err error
+	switch v := value.(type) {
+	case string:
+		dur, err = time.ParseDuration(v)
+	case int:
+		dur = time.Duration(v) * time.Second
+	default:
+		err = fmt.Errorf("Invalid time %s", value)
+	}
+
+	return dur, err
+}
+
+func toInteger(value interface{}) (int, error) {
+	var integer int
+	var err error
+	switch v := value.(type) {
+	case string:
+		var i int64
+		i, err = strconv.ParseInt(v, 10, 32)
+		integer = int(i)
+	case int:
+		integer = v
+	default:
+		err = fmt.Errorf("Value: %v can't be parsed into int", value)
+	}
+
+	return integer, err
+}
diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go
index f91789ddb..c3b91e785 100644
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -48,6 +48,11 @@ func TestParse(t *testing.T) {
 					&structs.TaskGroup{
 						Name:  "outside",
 						Count: 1,
+						RestartPolicy: &structs.RestartPolicy{
+							Attempts: 2,
+							Interval: 1 * time.Minute,
+							Delay:    15 * time.Second,
+						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "outside",
diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl
index 941272b2d..bf81a6ae7 100644
--- a/jobspec/test-fixtures/basic.hcl
+++ b/jobspec/test-fixtures/basic.hcl
@@ -31,6 +31,11 @@ job "binstore-storagelocker" {
 
     group "binsl" {
         count = 5
+        restart {
+            attempts = 5
+            interval = "10m"
+            delay = "15s"
+        }
         task "binstore" {
             driver = "docker"
             config {
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index f5d20552a..8afe1c452 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -898,6 +898,37 @@ func (u *UpdateStrategy) Rolling() bool {
 	return u.Stagger > 0 && u.MaxParallel > 0
 }
 
+// RestartPolicy influences how Nomad restarts Tasks when they
+// crash or fail.
+type RestartPolicy struct {
+	Attempts int
+	Interval time.Duration
+	Delay    time.Duration
+}
+
+func (r *RestartPolicy) Validate() error {
+	if time.Duration(r.Attempts)*r.Delay > r.Interval {
+		return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
+	}
+	return nil
+}
+
+func NewRestartPolicy(jobType string) *RestartPolicy {
+	defaultDelayBetweenRestarts := 15 * time.Second
+	defaultAttempts := 15
+	var defaultRestartInterval time.Duration
+
+	if jobType == "service" {
+		defaultRestartInterval = 1 * time.Minute
+		defaultAttempts = 2
+	}
+	return &RestartPolicy{
+		Attempts: defaultAttempts,
+		Interval: defaultRestartInterval,
+		Delay:    defaultDelayBetweenRestarts,
+	}
+}
+
 // TaskGroup is an atomic unit of placement. Each task group belongs to
 // a job and may contain any number of tasks. A task group support running
 // in many replicas using the same configuration..
@@ -913,6 +944,9 @@ type TaskGroup struct {
 	// all the tasks contained.
 	Constraints []*Constraint
 
+	//RestartPolicy of a TaskGroup
+	RestartPolicy *RestartPolicy
+
 	// Tasks are the collection of tasks that this task group needs to run
 	Tasks []*Task
 
@@ -940,6 +974,10 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}
 
+	if err := tg.RestartPolicy.Validate(); err != nil {
+		mErr.Errors = append(mErr.Errors, err)
+	}
+
 	// Check for duplicate tasks
 	tasks := make(map[string]int)
 	for idx, task := range tg.Tasks {
@@ -954,6 +992,7 @@ func (tg *TaskGroup) Validate() error {
 
 	// Validate the tasks
 	for idx, task := range tg.Tasks {
+
 		if err := task.Validate(); err != nil {
 			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
 			mErr.Errors = append(mErr.Errors, outer)

From e2f61e25e9529f439d3a9b7fc836004816d30f6a Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 16:32:05 -0700
Subject: [PATCH 32/92] Sending restart policies to the Nomad API

---
 api/tasks.go          | 23 ++++++++++++++++++-----
 jobspec/parse.go      |  3 ++-
 jobspec/parse_test.go |  5 +++++
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/api/tasks.go b/api/tasks.go
index c1d5bf2ff..b2516e706 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -1,12 +1,25 @@
 package api
 
+import (
+	"time"
+)
+
+//RestartPolicy defines how the Nomad client restarts
+//tasks in a taskgroup when they fail
+type RestartPolicy struct {
+	Interval time.Duration
+	Attempts int
+	Delay    time.Duration
+}
+
 // TaskGroup is the unit of scheduling.
 type TaskGroup struct {
-	Name        string
-	Count       int
-	Constraints []*Constraint
-	Tasks       []*Task
-	Meta        map[string]string
+	Name          string
+	Count         int
+	Constraints   []*Constraint
+	Tasks         []*Task
+	RestartPolicy *RestartPolicy
+	Meta          map[string]string
 }
 
 // NewTaskGroup creates a new TaskGroup.
diff --git a/jobspec/parse.go b/jobspec/parse.go
index c3c71ac9c..548632239 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -202,8 +202,9 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
 				return err
 			}
 		}
+		g.RestartPolicy = structs.NewRestartPolicy(result.Type)
 
-		if err := parseRestartPolicy(structs.NewRestartPolicy(result.Type), o); err != nil {
+		if err := parseRestartPolicy(g.RestartPolicy, o); err != nil {
 			return err
 		}
 
diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go
index c3b91e785..e785443b7 100644
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -82,6 +82,11 @@ func TestParse(t *testing.T) {
 							"elb_interval": "10",
 							"elb_checks":   "3",
 						},
+						RestartPolicy: &structs.RestartPolicy{
+							Interval: 10 * time.Minute,
+							Attempts: 5,
+							Delay:    15 * time.Second,
+						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "binstore",

From a035dcf2c0d96439bc3fe9bafd26f7039bae0381 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 16:49:08 -0700
Subject: [PATCH 33/92] Re-using toDuration while figuring out staggertime

---
 jobspec/parse.go | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/jobspec/parse.go b/jobspec/parse.go
index 548632239..1c28d59ee 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -499,19 +499,11 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
 		}
 		for _, key := range []string{"stagger", "Stagger"} {
 			if raw, ok := m[key]; ok {
-				switch v := raw.(type) {
-				case string:
-					dur, err := time.ParseDuration(v)
-					if err != nil {
-						return fmt.Errorf("invalid stagger time '%s'", raw)
-					}
-					m[key] = dur
-				case int:
-					m[key] = time.Duration(v) * time.Second
-				default:
-					return fmt.Errorf("invalid type for stagger time '%s'",
-						raw)
+				staggerTime, err := toDuration(raw)
+				if err != nil {
+					return fmt.Errorf("Invalid stagger time: %v", err)
 				}
+				m[key] = staggerTime
 			}
 		}
 

From 93cdcb5ac24d8430bd0fc83ad97c0616b087c65f Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 18:34:23 -0700
Subject: [PATCH 34/92] Added the restart policies to mocks

---
 nomad/mock/mock.go | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go
index 87c426dce..329ecd872 100644
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -1,6 +1,9 @@
 package mock
 
-import "github.com/hashicorp/nomad/nomad/structs"
+import (
+	"github.com/hashicorp/nomad/nomad/structs"
+	"time"
+)
 
 func Node() *structs.Node {
 	node := &structs.Node{
@@ -71,6 +74,11 @@ func Job() *structs.Job {
 			&structs.TaskGroup{
 				Name:  "web",
 				Count: 10,
+				RestartPolicy: &structs.RestartPolicy{
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+				},
 				Tasks: []*structs.Task{
 					&structs.Task{
 						Name:   "web",
@@ -131,6 +139,11 @@ func SystemJob() *structs.Job {
 			&structs.TaskGroup{
 				Name:  "web",
 				Count: 1,
+				RestartPolicy: &structs.RestartPolicy{
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+				},
 				Tasks: []*structs.Task{
 					&structs.Task{
 						Name:   "web",

From 0d17430306ecf6a535f5786c681e87aa7688ef44 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 21:06:56 -0700
Subject: [PATCH 35/92] Fixed grammer of comment

---
 command/init.go          | 4 ++--
 jobspec/parse.go         | 1 -
 nomad/structs/structs.go | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/command/init.go b/command/init.go
index 8827f5e9d..851f366be 100644
--- a/command/init.go
+++ b/command/init.go
@@ -104,8 +104,8 @@ job "example" {
 		# Defaults to 1
 		# count = 1
 
-		# Restart Policy - This block defines the restart policy for TaskGroups
-		# attempts defines the number of restarts Nomad will do if Tasks
+		# Restart Policy - This block defines the restart policy for TaskGroups,
+		# the attempts value defines the number of restarts Nomad will do if Tasks
 		# in this TaskGroup fails in a rolling window of interval duration
 		# The delay value makes Nomad wait for that duration to restart after a Task
 		# fails or crashes.
diff --git a/jobspec/parse.go b/jobspec/parse.go
index 1c28d59ee..77f9b819f 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -30,7 +30,6 @@ func Parse(r io.Reader) (*structs.Job, error) {
 
 	// Parse the buffer
 	obj, err := hcl.Parse(buf.String())
-
 	if err != nil {
 		return nil, fmt.Errorf("error parsing: %s", err)
 	}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 8afe1c452..a42a8f822 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -992,7 +992,6 @@ func (tg *TaskGroup) Validate() error {
 
 	// Validate the tasks
 	for idx, task := range tg.Tasks {
-
 		if err := task.Validate(); err != nil {
 			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
 			mErr.Errors = append(mErr.Errors, outer)

From 67c21e4b31b1d4ab6e4bef7f2bbad572cf4cbeef Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 21:28:56 -0700
Subject: [PATCH 36/92] Added a RestartPolicy to some mocks

---
 api/compose_test.go |  1 +
 api/tasks.go        | 14 ++++++++++++--
 api/tasks_test.go   |  5 +++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/api/compose_test.go b/api/compose_test.go
index 68801519f..2a509bc55 100644
--- a/api/compose_test.go
+++ b/api/compose_test.go
@@ -69,6 +69,7 @@ func TestCompose(t *testing.T) {
 						Operand: "=",
 					},
 				},
+				RestartPolicy: NewRestartPolicy(),
 				Tasks: []*Task{
 					&Task{
 						Name:   "task1",
diff --git a/api/tasks.go b/api/tasks.go
index b2516e706..3ef918850 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -12,6 +12,14 @@ type RestartPolicy struct {
 	Delay    time.Duration
 }
 
+func NewRestartPolicy() *RestartPolicy {
+	return &RestartPolicy{
+		Attempts: 10,
+		Interval: 3 * time.Minute,
+		Delay:    5 * time.Second,
+	}
+}
+
 // TaskGroup is the unit of scheduling.
 type TaskGroup struct {
 	Name          string
@@ -24,9 +32,11 @@ type TaskGroup struct {
 
 // NewTaskGroup creates a new TaskGroup.
 func NewTaskGroup(name string, count int) *TaskGroup {
+	restartPolicy := NewRestartPolicy()
 	return &TaskGroup{
-		Name:  name,
-		Count: count,
+		Name:          name,
+		Count:         count,
+		RestartPolicy: restartPolicy,
 	}
 }
 
diff --git a/api/tasks_test.go b/api/tasks_test.go
index 877f84d5c..945fdf9bf 100644
--- a/api/tasks_test.go
+++ b/api/tasks_test.go
@@ -8,8 +8,9 @@ import (
 func TestTaskGroup_NewTaskGroup(t *testing.T) {
 	grp := NewTaskGroup("grp1", 2)
 	expect := &TaskGroup{
-		Name:  "grp1",
-		Count: 2,
+		Name:          "grp1",
+		Count:         2,
+		RestartPolicy: NewRestartPolicy(),
 	}
 	if !reflect.DeepEqual(grp, expect) {
 		t.Fatalf("expect: %#v, got: %#v", expect, grp)

From 96f946b88e901641ad15ee1d93c6a1c36a6e4f83 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Fri, 30 Oct 2015 21:43:00 -0700
Subject: [PATCH 37/92] Not validating task groups if it's nil in a job

---
 nomad/structs/structs.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index a42a8f822..cf81c6afb 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -974,8 +974,10 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}
 
-	if err := tg.RestartPolicy.Validate(); err != nil {
-		mErr.Errors = append(mErr.Errors, err)
+	if tg.RestartPolicy != nil {
+		if err := tg.RestartPolicy.Validate(); err != nil {
+			mErr.Errors = append(mErr.Errors, err)
+		}
 	}
 
 	// Check for duplicate tasks

From 6fa5b45c3e3c3bfdcbe81f2ed6554073e2df672a Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 10:15:26 -0700
Subject: [PATCH 38/92] Remove redundant 'all'

---
 website/source/docs/agent/config.html.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 6f0b9b0dc..a8c6412a3 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -42,7 +42,7 @@ nodes, unless otherwise specified:
   as `us-west` and `us-east`. Defaults to `global`.
 
 * `datacenter`: Datacenter of the local agent. All members of a datacenter
-  should all share a local LAN connection. Defaults to `dc1`.
+  should share a local LAN connection. Defaults to `dc1`.
 
 * <a id="name">`name`</a>: The name of the local node. This value is used to
   identify individual nodes in a given datacenter and must be unique

From 4bdaa1bbc0057703ff667d5970e8efae7e490859 Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 11:10:12 -0700
Subject: [PATCH 39/92] appicable -> applicable

---
 website/source/docs/agent/config.html.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 6f0b9b0dc..7199e8160 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -103,7 +103,7 @@ nodes, unless otherwise specified:
   This can be used to advertise a different address to the peers of a server
   node to support more complex network configurations such as NAT. This
   configuration is optional, and defaults to the bind address of the specific
-  network service if it is not provided. This configuration is only appicable
+  network service if it is not provided. This configuration is only applicable
   on server nodes. The value is a map of IP addresses and supports the
   following keys:
   <br>

From b770d1a7098be7536db7780a98678b8554c4406b Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 11:18:42 -0700
Subject: [PATCH 40/92] leave -> leaving

---
 website/source/docs/agent/config.html.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 7199e8160..5f186fdac 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -125,10 +125,10 @@ nodes, unless otherwise specified:
   * `disable_hostname`: A boolean indicating if gauge values should not be
     prefixed with the local hostname.
 
-* `leave_on_interrupt`: Enables gracefully leave when receiving the
+* `leave_on_interrupt`: Enables gracefully leaving when receiving the
   interrupt signal. By default, the agent will exit forcefully on any signal.
 
-* `leave_on_terminate`: Enables gracefully leave when receiving the
+* `leave_on_terminate`: Enables gracefully leaving when receiving the
   terminate signal. By default, the agent will exit forcefully on any signal.
 
 * `enable_syslog`: Enables logging to syslog. This option only work on

From 614a01fb4bf8cd8644219ae84b4f23bc4404a46b Mon Sep 17 00:00:00 2001
From: Charlie O'Keefe <github.com@charlie.okeefe.name>
Date: Mon, 2 Nov 2015 11:19:38 -0700
Subject: [PATCH 41/92] This option only work -> This option only works

---
 website/source/docs/agent/config.html.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md
index 7199e8160..f2631ef45 100644
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -131,7 +131,7 @@ nodes, unless otherwise specified:
 * `leave_on_terminate`: Enables gracefully leave when receiving the
   terminate signal. By default, the agent will exit forcefully on any signal.
 
-* `enable_syslog`: Enables logging to syslog. This option only work on
+* `enable_syslog`: Enables logging to syslog. This option only works on
   Unix based systems.
 
 * `syslog_facility`: Controls the syslog facility that is used. By default,

From ec819f9761acc65dad3f1df9153e6c5cdbcc7d7a Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 13:24:59 -0800
Subject: [PATCH 42/92] Fixing tests to not create a TG without restart
 policies

---
 api/tasks.go                  |  4 ++--
 nomad/structs/structs.go      |  6 ++----
 nomad/structs/structs_test.go | 35 +++++++++++++++++++++++++++++++----
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/api/tasks.go b/api/tasks.go
index 3ef918850..2535d5ec5 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -4,8 +4,8 @@ import (
 	"time"
 )
 
-//RestartPolicy defines how the Nomad client restarts
-//tasks in a taskgroup when they fail
+// RestartPolicy defines how the Nomad client restarts
+// tasks in a taskgroup when they fail
 type RestartPolicy struct {
 	Interval time.Duration
 	Attempts int
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index cf81c6afb..a42a8f822 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -974,10 +974,8 @@ func (tg *TaskGroup) Validate() error {
 		}
 	}
 
-	if tg.RestartPolicy != nil {
-		if err := tg.RestartPolicy.Validate(); err != nil {
-			mErr.Errors = append(mErr.Errors, err)
-		}
+	if err := tg.RestartPolicy.Validate(); err != nil {
+		mErr.Errors = append(mErr.Errors, err)
 	}
 
 	// Check for duplicate tasks
diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go
index cabf83dfa..1f107b095 100644
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -1,11 +1,11 @@
 package structs
 
 import (
+	"github.com/hashicorp/go-multierror"
 	"reflect"
 	"strings"
 	"testing"
-
-	"github.com/hashicorp/go-multierror"
+	"time"
 )
 
 func TestJob_Validate(t *testing.T) {
@@ -44,11 +44,27 @@ func TestJob_Validate(t *testing.T) {
 		TaskGroups: []*TaskGroup{
 			&TaskGroup{
 				Name: "web",
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
 			},
 			&TaskGroup{
 				Name: "web",
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
+			},
+			&TaskGroup{
+				RestartPolicy: &RestartPolicy{
+					Interval: 5 * time.Minute,
+					Delay:    10 * time.Second,
+					Attempts: 10,
+				},
 			},
-			&TaskGroup{},
 		},
 	}
 	err = j.Validate()
@@ -65,7 +81,13 @@ func TestJob_Validate(t *testing.T) {
 }
 
 func TestTaskGroup_Validate(t *testing.T) {
-	tg := &TaskGroup{}
+	tg := &TaskGroup{
+		RestartPolicy: &RestartPolicy{
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+		},
+	}
 	err := tg.Validate()
 	mErr := err.(*multierror.Error)
 	if !strings.Contains(mErr.Errors[0].Error(), "group name") {
@@ -86,6 +108,11 @@ func TestTaskGroup_Validate(t *testing.T) {
 			&Task{Name: "web"},
 			&Task{},
 		},
+		RestartPolicy: &RestartPolicy{
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+		},
 	}
 	err = tg.Validate()
 	mErr = err.(*multierror.Error)

From c7d31e56839f30c95756052029a6e3925cec1d56 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 13:35:51 -0800
Subject: [PATCH 43/92] Declaring Batch and Service default restart policies

---
 nomad/structs/structs.go | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index a42a8f822..f6feaa3de 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,8 +14,17 @@ import (
 )
 
 var (
-	ErrNoLeader     = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath = fmt.Errorf("No path to region")
+	ErrNoLeader           = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath       = fmt.Errorf("No path to region")
+	BatchJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 15,
+	}
+	ServiceJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 2,
+		Interval: 1 * time.Minute,
+	}
 )
 
 type MessageType uint8
@@ -914,18 +923,13 @@ func (r *RestartPolicy) Validate() error {
 }
 
 func NewRestartPolicy(jobType string) *RestartPolicy {
-	defaultDelayBetweenRestarts := 15 * time.Second
-	defaultAttempts := 15
-	var defaultRestartInterval time.Duration
-
-	if jobType == "service" {
-		defaultRestartInterval = 1 * time.Minute
-		defaultAttempts = 2
-	}
-	return &RestartPolicy{
-		Attempts: defaultAttempts,
-		Interval: defaultRestartInterval,
-		Delay:    defaultDelayBetweenRestarts,
+	switch jobType {
+	case JobTypeService:
+		return &ServiceJobRestartPolicy
+	case JobTypeBatch:
+		return &BatchJobRestartPolicy
+	default:
+		return nil
 	}
 }
 

From 795c786ca51bd6b79e6ea967ba269ebfc865b95e Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 15:04:04 -0800
Subject: [PATCH 44/92] Fixed the tests

---
 nomad/structs/structs.go | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index f6feaa3de..589781580 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,17 +14,8 @@ import (
 )
 
 var (
-	ErrNoLeader           = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath       = fmt.Errorf("No path to region")
-	BatchJobRestartPolicy = RestartPolicy{
-		Delay:    15 * time.Second,
-		Attempts: 15,
-	}
-	ServiceJobRestartPolicy = RestartPolicy{
-		Delay:    15 * time.Second,
-		Attempts: 2,
-		Interval: 1 * time.Minute,
-	}
+	ErrNoLeader     = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath = fmt.Errorf("No path to region")
 )
 
 type MessageType uint8
@@ -925,9 +916,16 @@ func (r *RestartPolicy) Validate() error {
 func NewRestartPolicy(jobType string) *RestartPolicy {
 	switch jobType {
 	case JobTypeService:
-		return &ServiceJobRestartPolicy
+		return &RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 2,
+			Interval: 1 * time.Minute,
+		}
 	case JobTypeBatch:
-		return &BatchJobRestartPolicy
+		return &RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 15,
+		}
 	default:
 		return nil
 	}

From ef841d5e89f638a7de184643ef5c05e956473f92 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 17:00:17 -0800
Subject: [PATCH 45/92] Introducing vars to create default batch and service
 restart policies

---
 nomad/structs/structs.go | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 589781580..15e58d333 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -16,6 +16,15 @@ import (
 var (
 	ErrNoLeader     = fmt.Errorf("No cluster leader")
 	ErrNoRegionPath = fmt.Errorf("No path to region")
+    defaultServiceJobRestartPolicy = RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 2,
+			Interval: 1 * time.Minute,
+		}
+	defaultBatchJobRestartPolicy = RestartPolicy{
+			Delay:    15 * time.Second,
+			Attempts: 15,
+		}
 )
 
 type MessageType uint8
@@ -916,19 +925,13 @@ func (r *RestartPolicy) Validate() error {
 func NewRestartPolicy(jobType string) *RestartPolicy {
 	switch jobType {
 	case JobTypeService:
-		return &RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 2,
-			Interval: 1 * time.Minute,
-		}
+		rp := defaultServiceJobRestartPolicy
+		return &rp
 	case JobTypeBatch:
-		return &RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 15,
-		}
-	default:
-		return nil
+		rp  := defaultBatchJobRestartPolicy
+		return &rp
 	}
+	return nil
 }
 
 // TaskGroup is an atomic unit of placement. Each task group belongs to

From 6a56218fb79e7be603d81d27ba43936cb3a81bec Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Mon, 2 Nov 2015 17:30:41 -0800
Subject: [PATCH 46/92] Fixed the restart policy syntax

---
 command/init.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/command/init.go b/command/init.go
index 851f366be..356337ae8 100644
--- a/command/init.go
+++ b/command/init.go
@@ -110,9 +110,9 @@ job "example" {
 		# The delay value makes Nomad wait for that duration to restart after a Task
 		# fails or crashes.
 		restart {
-			interval = 5m
+			interval = "5m"
 			attempts = 10
-			delay = 25s
+			delay = "25s"
 		}
 
 		# Define a task to run

From 3576f489932ede9b12640d5bbfbf44324f0cc527 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Mon, 2 Nov 2015 20:28:37 -0800
Subject: [PATCH 47/92] Create Spawn pkg that handles IPC with the spawn-daemon
 and update exec_linux to use that

---
 client/driver/exec_test.go    |   2 +-
 client/executor/exec_linux.go | 221 +++++------------------
 client/spawn/spawn.go         | 322 ++++++++++++++++++++++++++++++++++
 client/spawn/spawn_test.go    | 252 ++++++++++++++++++++++++++
 command/spawn_daemon.go       |  47 +++--
 helper/discover/discover.go   |  10 +-
 6 files changed, 657 insertions(+), 197 deletions(-)
 create mode 100644 client/spawn/spawn.go
 create mode 100644 client/spawn/spawn_test.go

diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go
index 488847c5c..1bb4adf36 100644
--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -293,7 +293,7 @@ func TestExecDriver_Start_Kill_Wait(t *testing.T) {
 		if err == nil {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(8 * time.Second):
 		t.Fatalf("timeout")
 	}
 }
diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index 9c4bcd9a4..be70379d2 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -5,12 +5,9 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"io"
 	"os"
-	"os/exec"
 	"os/user"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"syscall"
 
@@ -18,8 +15,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/client/driver/environment"
-	"github.com/hashicorp/nomad/command"
-	"github.com/hashicorp/nomad/helper/discover"
+	"github.com/hashicorp/nomad/client/spawn"
 	"github.com/hashicorp/nomad/nomad/structs"
 
 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -53,18 +49,13 @@ type LinuxExecutor struct {
 
 	// Isolation configurations.
 	groups   *cgroupConfig.Cgroup
-	alloc    *allocdir.AllocDir
 	taskName string
 	taskDir  string
+	allocDir string
 
-	// Tracking of spawn process.
-	spawnChild        *os.Process
-	spawnOutputWriter *os.File
-	spawnOutputReader *os.File
-
-	// Tracking of user process.
-	exitStatusFile string
-	userPid        int
+	// Spawn process.
+	spawn      *spawn.Spawner
+	spawnState string
 }
 
 func (e *LinuxExecutor) Command() *cmd {
@@ -82,11 +73,9 @@ func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
 // execLinuxID contains the necessary information to reattach to an executed
 // process and cleanup the created cgroups.
 type ExecLinuxID struct {
-	Groups         *cgroupConfig.Cgroup
-	SpawnPid       int
-	UserPid        int
-	ExitStatusFile string
-	TaskDir        string
+	Groups  *cgroupConfig.Cgroup
+	Spawn   *spawn.Spawner
+	TaskDir string
 }
 
 func (e *LinuxExecutor) Open(id string) error {
@@ -99,30 +88,22 @@ func (e *LinuxExecutor) Open(id string) error {
 
 	// Setup the executor.
 	e.groups = execID.Groups
-	e.exitStatusFile = execID.ExitStatusFile
-	e.userPid = execID.UserPid
+	e.spawn = execID.Spawn
 	e.taskDir = execID.TaskDir
 
-	proc, err := os.FindProcess(execID.SpawnPid)
-	if proc != nil && err == nil {
-		e.spawnChild = proc
-	}
-
 	return nil
 }
 
 func (e *LinuxExecutor) ID() (string, error) {
-	if e.spawnChild == nil {
-		return "", fmt.Errorf("Process has finished or was never started")
+	if e.groups == nil || e.spawn == nil || e.taskDir == "" {
+		return "", fmt.Errorf("LinuxExecutor not properly initialized.")
 	}
 
 	// Build the ID.
 	id := ExecLinuxID{
-		Groups:         e.groups,
-		SpawnPid:       e.spawnChild.Pid,
-		UserPid:        e.userPid,
-		ExitStatusFile: e.exitStatusFile,
-		TaskDir:        e.taskDir,
+		Groups:  e.groups,
+		Spawn:   e.spawn,
+		TaskDir: e.taskDir,
 	}
 
 	var buffer bytes.Buffer
@@ -170,10 +151,6 @@ func (e *LinuxExecutor) Start() error {
 		e.cmd.SetGID(e.user.Gid)
 	}
 
-	if e.alloc == nil {
-		return errors.New("ConfigureTaskDir() must be called before Start()")
-	}
-
 	// Parse the commands arguments and replace instances of Nomad environment
 	// variables.
 	envVars, err := environment.ParseFromList(e.Cmd.Env)
@@ -196,129 +173,42 @@ func (e *LinuxExecutor) Start() error {
 	}
 	e.Cmd.Args = parsed
 
-	return e.spawnDaemon()
-}
+	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
+	e.spawn = spawn.NewSpawner(spawnState)
+	e.spawn.SetCommand(&e.cmd.Cmd)
+	e.spawn.SetChroot(e.taskDir)
+	e.spawn.SetLogs(&spawn.Logs{
+		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
+		Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
+		Stdin:  "/dev/null",
+	})
 
-// spawnDaemon executes a double fork to start the user command with proper
-// isolation. Stores the child process for use in Wait.
-func (e *LinuxExecutor) spawnDaemon() error {
-	bin, err := discover.NomadExecutable()
-	if err != nil {
-		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
-	}
+	enterCgroup := func(pid int) error {
+		// Join the spawn-daemon to the cgroup.
+		manager := e.getCgroupManager(e.groups)
 
-	c := command.DaemonConfig{
-		Cmd:            e.cmd.Cmd,
-		Chroot:         e.taskDir,
-		StdoutFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
-		StderrFile:     filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		StdinFile:      "/dev/null",
-		ExitStatusFile: e.exitStatusFile,
-	}
-
-	// Serialize the cmd and the cgroup configuration so it can be passed to the
-	// sub-process.
-	var buffer bytes.Buffer
-	enc := json.NewEncoder(&buffer)
-	if err := enc.Encode(c); err != nil {
-		return fmt.Errorf("Failed to serialize daemon configuration: %v", err)
-	}
-
-	// Create a pipe to capture stdout.
-	if e.spawnOutputReader, e.spawnOutputWriter, err = os.Pipe(); err != nil {
-		return err
-	}
-
-	// Call ourselves using a hidden flag. The new instance of nomad will join
-	// the passed cgroup, forkExec the cmd, and return statuses through stdout.
-	escaped := strconv.Quote(buffer.String())
-	spawn := exec.Command(bin, "spawn-daemon", escaped)
-	spawn.Stdout = e.spawnOutputWriter
-
-	// Capture its Stdin.
-	spawnStdIn, err := spawn.StdinPipe()
-	if err != nil {
-		return err
-	}
-
-	if err := spawn.Start(); err != nil {
-		fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
-	}
-
-	// Join the spawn-daemon to the cgroup.
-	manager := e.getCgroupManager(e.groups)
-
-	// Apply will place the spawn dameon into the created cgroups.
-	if err := manager.Apply(spawn.Process.Pid); err != nil {
-		errs := new(multierror.Error)
-		errs = multierror.Append(errs,
-			fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err))
-
-		if err := sendAbortCommand(spawnStdIn); err != nil {
-			errs = multierror.Append(errs, err)
+		// Apply will place the spawn dameon into the created cgroups.
+		if err := manager.Apply(pid); err != nil {
+			return fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err)
 		}
 
-		return errs
+		return nil
 	}
 
-	// Tell it to start.
-	if err := sendStartCommand(spawnStdIn); err != nil {
-		return err
-	}
-
-	// Parse the response.
-	dec := json.NewDecoder(e.spawnOutputReader)
-	var resp command.SpawnStartStatus
-	if err := dec.Decode(&resp); err != nil {
-		return fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
-	}
-
-	if resp.ErrorMsg != "" {
-		return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
-	}
-
-	e.userPid = resp.UserPID
-	e.spawnChild = spawn.Process
-	return nil
-}
-
-// sendStartCommand sends the necessary command to the spawn-daemon to have it
-// start the user process.
-func sendStartCommand(w io.Writer) error {
-	enc := json.NewEncoder(w)
-	if err := enc.Encode(true); err != nil {
-		return fmt.Errorf("Failed to serialize start command: %v", err)
-	}
-
-	return nil
-}
-
-// sendAbortCommand sends the necessary command to the spawn-daemon to have it
-// abort starting the user process. This should be invoked if the spawn-daemon
-// could not be isolated into a cgroup.
-func sendAbortCommand(w io.Writer) error {
-	enc := json.NewEncoder(w)
-	if err := enc.Encode(false); err != nil {
-		return fmt.Errorf("Failed to serialize abort command: %v", err)
-	}
-
-	return nil
+	return e.spawn.Spawn(enterCgroup)
 }
 
 // Wait waits til the user process exits and returns an error on non-zero exit
 // codes. Wait also cleans up the task directory and created cgroups.
 func (e *LinuxExecutor) Wait() error {
-	if e.spawnOutputReader != nil {
-		e.spawnOutputReader.Close()
-	}
-
-	if e.spawnOutputWriter != nil {
-		e.spawnOutputWriter.Close()
-	}
-
 	errs := new(multierror.Error)
-	if err := e.spawnWait(); err != nil {
-		errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Pid, err))
+	code, err := e.spawn.Wait()
+	if err != nil {
+		errs = multierror.Append(errs, err)
+	}
+
+	if code != 0 {
+		errs = multierror.Append(errs, fmt.Errorf("Task exited with code: %d", code))
 	}
 
 	if err := e.destroyCgroup(); err != nil {
@@ -332,20 +222,6 @@ func (e *LinuxExecutor) Wait() error {
 	return errs.ErrorOrNil()
 }
 
-// spawnWait waits on the spawn-daemon and can handle the spawn-daemon not being
-// a child of this process.
-func (e *LinuxExecutor) spawnWait() error {
-	// TODO: This needs to be able to wait on non-child processes.
-	state, err := e.spawnChild.Wait()
-	if err != nil {
-		return err
-	} else if !state.Success() {
-		return fmt.Errorf("exited with non-zero code")
-	}
-
-	return nil
-}
-
 func (e *LinuxExecutor) Shutdown() error {
 	return e.ForceStop()
 }
@@ -353,19 +229,9 @@ func (e *LinuxExecutor) Shutdown() error {
 // ForceStop immediately exits the user process and cleans up both the task
 // directory and the cgroups.
 func (e *LinuxExecutor) ForceStop() error {
-	if e.spawnOutputReader != nil {
-		e.spawnOutputReader.Close()
-	}
-
-	if e.spawnOutputWriter != nil {
-		e.spawnOutputWriter.Close()
-	}
-
 	errs := new(multierror.Error)
-	if e.groups != nil {
-		if err := e.destroyCgroup(); err != nil {
-			errs = multierror.Append(errs, err)
-		}
+	if err := e.destroyCgroup(); err != nil {
+		errs = multierror.Append(errs, err)
 	}
 
 	if err := e.cleanTaskDir(); err != nil {
@@ -381,6 +247,8 @@ func (e *LinuxExecutor) ForceStop() error {
 // chroot. cleanTaskDir should be called after.
 func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
 	e.taskName = taskName
+	e.allocDir = alloc.AllocDir
+
 	taskDir, ok := alloc.TaskDirs[taskName]
 	if !ok {
 		fmt.Errorf("Couldn't find task directory for task %v", taskName)
@@ -424,10 +292,6 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
 	e.Cmd.Env = env.List()
 
-	// Store the file path to save the exit status to.
-	e.exitStatusFile = filepath.Join(alloc.AllocDir, fmt.Sprintf("%s_%s", taskName, "exit_status"))
-
-	e.alloc = alloc
 	return nil
 }
 
@@ -445,6 +309,7 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 // should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
 	// Unmount dev.
+	// TODO: This should check if it is a mount.
 	errs := new(multierror.Error)
 	dev := filepath.Join(e.taskDir, "dev")
 	if e.pathExists(dev) {
diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
new file mode 100644
index 000000000..fa75b3940
--- /dev/null
+++ b/client/spawn/spawn.go
@@ -0,0 +1,322 @@
+package spawn
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"strconv"
+	"time"
+
+	"github.com/docker/docker/vendor/src/gopkg.in/fsnotify.v1"
+	"github.com/hashicorp/go-multierror"
+	"github.com/hashicorp/nomad/command"
+	"github.com/hashicorp/nomad/helper/discover"
+)
+
+// Spawner is used to start a user command in an isolated fashion that is
+// resistent to Nomad agent failure.
+type Spawner struct {
+	spawn     *os.Process
+	SpawnPid  int
+	SpawnPpid int
+	StateFile string
+
+	// User configuration
+	UserCmd *exec.Cmd
+	Logs    *Logs
+	Chroot  string
+}
+
+// Logs is used to define the filepaths the user command's logs should be
+// redirected to. The files do not need to exist.
+type Logs struct {
+	Stdin, Stdout, Stderr string
+}
+
+// NewSpawner takes a path to a state file. This state file can be used to
+// create a new Spawner that can be used to wait on the exit status of a
+// process even through Nomad restarts.
+func NewSpawner(stateFile string) *Spawner {
+	return &Spawner{StateFile: stateFile}
+}
+
+// SetCommand sets the user command to spawn.
+func (s *Spawner) SetCommand(cmd *exec.Cmd) {
+	s.UserCmd = cmd
+}
+
+// SetLogs sets the redirection of user command log files.
+func (s *Spawner) SetLogs(l *Logs) {
+	s.Logs = l
+}
+
+// SetChroot puts the user command into a chroot.
+func (s *Spawner) SetChroot(root string) {
+	s.Chroot = root
+}
+
+// Spawn does a double-fork to start and isolate the user command. It takes a
+// call-back that is invoked with the pid of the intermediary process. If the
+// call back returns an error, the user command is not started and the spawn is
+// cancelled. This can be used to put the process into a cgroup or jail and
+// cancel starting the user process if that was not successful. An error is
+// returned if the call-back returns an error or the user-command couldn't be
+// started.
+func (s *Spawner) Spawn(cb func(pid int) error) error {
+	bin, err := discover.NomadExecutable()
+	if err != nil {
+		return fmt.Errorf("Failed to determine the nomad executable: %v", err)
+	}
+
+	exitFile, err := os.OpenFile(s.StateFile, os.O_CREATE|os.O_WRONLY, 0666)
+	defer exitFile.Close()
+	if err != nil {
+		return fmt.Errorf("Error opening file to store exit status: %v", err)
+	}
+
+	config, err := s.spawnConfig()
+	if err != nil {
+		return err
+	}
+
+	spawn := exec.Command(bin, "spawn-daemon", config)
+
+	// Capture stdout
+	spawnStdout, err := spawn.StdoutPipe()
+	defer spawnStdout.Close()
+	if err != nil {
+		return fmt.Errorf("Failed to capture spawn-daemon stdout: %v", err)
+	}
+
+	// Capture stdin.
+	spawnStdin, err := spawn.StdinPipe()
+	defer spawnStdin.Close()
+	if err != nil {
+		return fmt.Errorf("Failed to capture spawn-daemon stdin: %v", err)
+	}
+
+	if err := spawn.Start(); err != nil {
+		return fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
+	}
+
+	if cb != nil {
+		cbErr := cb(spawn.Process.Pid)
+		if cbErr != nil {
+			errs := new(multierror.Error)
+			errs = multierror.Append(errs, cbErr)
+			if err := s.sendAbortCommand(spawnStdin); err != nil {
+				errs = multierror.Append(errs, err)
+			}
+
+			return errs
+		}
+	}
+
+	if err := s.sendStartCommand(spawnStdin); err != nil {
+		return err
+	}
+
+	respCh := make(chan command.SpawnStartStatus, 1)
+	errCh := make(chan error, 1)
+
+	go func() {
+		var resp command.SpawnStartStatus
+		dec := json.NewDecoder(spawnStdout)
+		if err := dec.Decode(&resp); err != nil {
+			errCh <- fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
+		}
+		respCh <- resp
+	}()
+
+	select {
+	case err := <-errCh:
+		return err
+	case resp := <-respCh:
+		if resp.ErrorMsg != "" {
+			return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
+		}
+	case <-time.After(5 * time.Second):
+		return fmt.Errorf("timed out waiting for response")
+	}
+
+	// Store the spawn process.
+	s.spawn = spawn.Process
+	s.SpawnPpid = os.Getpid()
+	return nil
+}
+
+// spawnConfig returns a serialized config to pass to the Nomad spawn-daemon
+// command.
+func (s *Spawner) spawnConfig() (string, error) {
+	if s.UserCmd == nil {
+		return "", fmt.Errorf("Must specify user command")
+	}
+
+	config := command.DaemonConfig{
+		Cmd:            *s.UserCmd,
+		Chroot:         s.Chroot,
+		ExitStatusFile: s.StateFile,
+	}
+
+	if s.Logs != nil {
+		config.StdoutFile = s.Logs.Stdout
+		config.StdinFile = s.Logs.Stdin
+		config.StderrFile = s.Logs.Stderr
+	}
+
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(config); err != nil {
+		return "", fmt.Errorf("Failed to serialize configuration: %v", err)
+	}
+
+	return strconv.Quote(buffer.String()), nil
+}
+
+// sendStartCommand sends the necessary command to the spawn-daemon to have it
+// start the user process.
+func (s *Spawner) sendStartCommand(w io.Writer) error {
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(true); err != nil {
+		return fmt.Errorf("Failed to serialize start command: %v", err)
+	}
+
+	return nil
+}
+
+// sendAbortCommand sends the necessary command to the spawn-daemon to have it
+// abort starting the user process. This should be invoked if the spawn-daemon
+// could not be isolated into a cgroup.
+func (s *Spawner) sendAbortCommand(w io.Writer) error {
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(false); err != nil {
+		return fmt.Errorf("Failed to serialize abort command: %v", err)
+	}
+
+	return nil
+}
+
+// Wait returns the exit code of the user process or an error if the wait
+// failed.
+func (s *Spawner) Wait() (int, error) {
+	if os.Getpid() == s.SpawnPpid {
+		return s.waitAsParent()
+	}
+
+	return s.waitOnStatusFile()
+}
+
+// waitAsParent waits on the process if the current process was the spawner.
+func (s *Spawner) waitAsParent() (int, error) {
+	if s.SpawnPpid != os.Getpid() {
+		return -1, fmt.Errorf("not the parent. Spawner parent is %v; current pid is %v", s.SpawnPpid, os.Getpid())
+	}
+
+	// Try to reattach to the spawn.
+	if s.spawn == nil {
+		// If it can't be reattached, it means the spawn process has exited so
+		// we should just read its exit file.
+		var err error
+		if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil {
+			return s.waitOnStatusFile()
+		}
+	}
+
+	if state, err := s.spawn.Wait(); err != nil {
+		return -1, err
+	} else if !state.Exited() {
+		return -1, fmt.Errorf("Task was killed or crashed")
+	}
+
+	return s.waitOnStatusFile()
+}
+
+// waitOnStatusFile uses OS level file watching APIs to wait on the status file
+// and returns the exit code and possibly an error.
+func (s *Spawner) waitOnStatusFile() (int, error) {
+	// Set up a watcher for the exit status file.
+	watcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to create file watcher to read exit code: %v", err)
+	}
+
+	if err := watcher.Add(s.StateFile); err != nil {
+		return -1, fmt.Errorf("Failed to watch %v to read exit code: %v", s.StateFile, err)
+	}
+
+	// Stat to check if it is there to avoid a race condition.
+	stat, err := os.Stat(s.StateFile)
+	if err != nil {
+		return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+	}
+
+	// If there is data it means that the file has already been written.
+	if stat.Size() > 0 {
+		return s.readExitCode()
+	}
+
+	// Store the mod time as a way to heartbeat. If the file doesn't get touched
+	// then we know the spawner has died. This avoids an infinite loop.
+	prevModTime := stat.ModTime()
+
+	// Wait on watcher.
+	for {
+		select {
+		case event := <-watcher.Events:
+			if event.Op&fsnotify.Write == fsnotify.Write {
+				stat, err := os.Stat(s.StateFile)
+				if err != nil {
+					return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+				}
+
+				if stat.Size() > 0 {
+					return s.readExitCode()
+				}
+			}
+		case err := <-watcher.Errors:
+			return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
+		case <-time.After(5 * time.Second):
+			stat, err := os.Stat(s.StateFile)
+			if err != nil {
+				return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
+			}
+
+			modTime := stat.ModTime()
+			if modTime.Equal(prevModTime) {
+				return -1, fmt.Errorf("Task is dead and exit code unreadable")
+			}
+
+			prevModTime = modTime
+		}
+	}
+}
+
+// readExitCode parses the state file and returns the exit code of the task. It
+// returns an error if the file can't be read.
+func (s *Spawner) readExitCode() (int, error) {
+	f, err := os.Open(s.StateFile)
+	defer f.Close()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to open %v to read exit code: %v", s.StateFile, err)
+	}
+
+	stat, err := f.Stat()
+	if err != nil {
+		return -1, fmt.Errorf("Failed to stat file %v: %v", s.StateFile, err)
+	}
+
+	if stat.Size() == 0 {
+		return -1, fmt.Errorf("Empty state file: %v", s.StateFile)
+	}
+
+	var exitStatus command.SpawnExitStatus
+	dec := json.NewDecoder(f)
+	if err := dec.Decode(&exitStatus); err != nil {
+		return -1, fmt.Errorf("Failed to parse exit status from %v: %v", s.StateFile, err)
+	}
+
+	return exitStatus.ExitCode, nil
+}
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
new file mode 100644
index 000000000..d624f9d9f
--- /dev/null
+++ b/client/spawn/spawn_test.go
@@ -0,0 +1,252 @@
+package spawn
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestSpawn_NoCmd(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	if err := spawn.Spawn(nil); err == nil {
+		t.Fatalf("Spawn() with no user command should fail")
+	}
+}
+
+func TestSpawn_InvalidCmd(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("foo"))
+	if err := spawn.Spawn(nil); err == nil {
+		t.Fatalf("Spawn() with no invalid command should fail")
+	}
+}
+
+func TestSpawn_SetsLogs(t *testing.T) {
+	// TODO: Figure out why this test fails. If the spawn-daemon directly writes
+	// to the opened stdout file it works but not the user command. Maybe a
+	// flush issue?
+	if runtime.GOOS == "windows" {
+		t.Skip("Test fails on windows; unknown reason. Skipping")
+	}
+
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	exp := "foo"
+	spawn.SetCommand(exec.Command("echo", exp))
+
+	// Create file for stdout.
+	stdout, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(stdout.Name())
+	spawn.SetLogs(&Logs{Stdout: stdout.Name()})
+
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed: %v", err)
+	}
+
+	if code, err := spawn.Wait(); code != 0 && err != nil {
+		t.Fatalf("Wait() returned %v, %v; want 0, nil", code, err)
+	}
+
+	stdout2, err := os.Open(stdout.Name())
+	if err != nil {
+		t.Fatalf("Open() failed: %v", err)
+	}
+
+	data, err := ioutil.ReadAll(stdout2)
+	if err != nil {
+		t.Fatalf("ReadAll() failed: %v", err)
+	}
+
+	act := strings.TrimSpace(string(data))
+	if act != exp {
+		t.Fatalf("Unexpected data written to stdout; got %v; want %v", act, exp)
+	}
+}
+
+func TestSpawn_Callback(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "1"))
+
+	called := false
+	cbErr := fmt.Errorf("ERROR CB")
+	cb := func(_ int) error {
+		called = true
+		return cbErr
+	}
+
+	if err := spawn.Spawn(cb); err == nil {
+		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, err, cbErr)
+	}
+
+	if !called {
+		t.Fatalf("Spawn(%#v) didn't call callback", cb)
+	}
+}
+
+func TestSpawn_ParentWaitExited(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("echo", "foo"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	time.Sleep(1 * time.Second)
+
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_ParentWait(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_NonParentWaitExited(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("echo", "foo"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	time.Sleep(1 * time.Second)
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_NonParentWait(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "2"))
+	if err := spawn.Spawn(nil); err != nil {
+		t.Fatalf("Spawn() failed %v", err)
+	}
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	code, err := spawn.Wait()
+	if err != nil {
+		t.Fatalf("Wait() failed %v", err)
+	}
+
+	if code != 0 {
+		t.Fatalf("Wait() returned %v; want 0", code)
+	}
+}
+
+func TestSpawn_DeadSpawnDaemon(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	var spawnPid int
+	cb := func(pid int) error {
+		spawnPid = pid
+		return nil
+	}
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "5"))
+	if err := spawn.Spawn(cb); err != nil {
+		t.Fatalf("Spawn() errored: %v", err)
+	}
+
+	proc, err := os.FindProcess(spawnPid)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if err := proc.Kill(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := proc.Wait(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := spawn.Wait(); err == nil {
+		t.Fatalf("Wait() should have failed: %v", err)
+	}
+}
diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index 81117ce2e..81f5ca2ca 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
+	"time"
 )
 
 type SpawnDaemonCommand struct {
@@ -108,24 +109,31 @@ func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) {
 // configureLogs creates the log files and redirects the process
 // stdin/stderr/stdout to them. If unsuccessful, an error is returned.
 func (c *SpawnDaemonCommand) configureLogs() error {
-	stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+	if len(c.config.StdoutFile) != 0 {
+		stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stdout: %v", err)
+		}
+
+		c.config.Cmd.Stdout = stdo
 	}
 
-	stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	if len(c.config.StderrFile) != 0 {
+		stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stderr: %v", err)
+		}
+		c.config.Cmd.Stderr = stde
 	}
 
-	stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
-	if err != nil {
-		return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+	if len(c.config.StdinFile) != 0 {
+		stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
+		if err != nil {
+			return fmt.Errorf("Error opening file to redirect stdin: %v", err)
+		}
+		c.config.Cmd.Stdin = stdi
 	}
 
-	c.config.Cmd.Stdout = stdo
-	c.config.Cmd.Stderr = stde
-	c.config.Cmd.Stdin = stdi
 	return nil
 }
 
@@ -139,7 +147,7 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
 	// Open the file we will be using to write exit codes to. We do this early
 	// to ensure that we don't start the user process when we can't capture its
 	// exit status.
-	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_CREATE|os.O_RDWR, 0666)
+	c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_WRONLY, 0666)
 	if err != nil {
 		return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1)
 	}
@@ -177,6 +185,17 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
 	// Indicate that the command was started successfully.
 	c.outputStartStatus(nil, 0)
 
+	// Start a go routine that touches the exit file periodically.
+	go func() {
+		for {
+			select {
+			case <-time.After(2 * time.Second):
+				now := time.Now()
+				os.Chtimes(c.config.ExitStatusFile, now, now)
+			}
+		}
+	}()
+
 	// Wait and then output the exit status.
 	return c.writeExitStatus(c.config.Cmd.Wait())
 }
@@ -192,7 +211,7 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
 		startStatus.ErrorMsg = err.Error()
 	}
 
-	if c.config != nil && c.config.Process != nil {
+	if c.config != nil && c.config.Cmd.Process != nil {
 		startStatus.UserPID = c.config.Process.Pid
 	}
 
diff --git a/helper/discover/discover.go b/helper/discover/discover.go
index d90ddb4cc..d172970f7 100644
--- a/helper/discover/discover.go
+++ b/helper/discover/discover.go
@@ -4,17 +4,19 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"runtime"
 
 	"github.com/kardianos/osext"
 )
 
-const (
-	nomadExe = "nomad"
-)
-
 // Checks the current executable, then $GOPATH/bin, and finally the CWD, in that
 // order. If it can't be found, an error is returned.
 func NomadExecutable() (string, error) {
+	nomadExe := "nomad"
+	if runtime.GOOS == "windows" {
+		nomadExe = "nomad.exe"
+	}
+
 	// Check the current executable.
 	bin, err := osext.Executable()
 	if err != nil {

From 84dc194d8a586e177300eb0a33f7aefb922225dd Mon Sep 17 00:00:00 2001
From: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
Date: Wed, 4 Nov 2015 00:06:14 +0900
Subject: [PATCH 48/92] Use const value for AWS metadata URL

---
 client/fingerprint/env_aws.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go
index 839285a1d..575409bf8 100644
--- a/client/fingerprint/env_aws.go
+++ b/client/fingerprint/env_aws.go
@@ -15,6 +15,10 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+// This is where the AWS metadata server normally resides. We hardcode the
+// "instance" path as well since it's the only one we access here.
+const DEFAULT_AWS_URL = "http//169.254.169.254/latest/meta-data/"
+
 // map of instance type to approximate speed, in Mbits/s
 // http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797
 // which itself cites these sources:
@@ -89,7 +93,7 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node)
 	}
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}
 
 	// assume 2 seconds is enough time for inside AWS network
@@ -161,7 +165,7 @@ func isAWS() bool {
 	// provide their own
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}
 
 	// assume 2 seconds is enough time for inside AWS network
@@ -205,7 +209,7 @@ func (f *EnvAWSFingerprint) linkSpeed() int {
 	// the network speed
 	metadataURL := os.Getenv("AWS_ENV_URL")
 	if metadataURL == "" {
-		metadataURL = "http://169.254.169.254/latest/meta-data/"
+		metadataURL = DEFAULT_AWS_URL
 	}
 
 	// assume 2 seconds is enough time for inside AWS network

From 6cf8eeb21618125358a0507a80e5da63ba22981e Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 10:50:30 -0800
Subject: [PATCH 49/92] Small improvements

---
 client/driver/java_test.go    |  2 +-
 client/executor/exec_linux.go | 14 ++++++++++----
 client/spawn/spawn.go         |  4 +---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/client/driver/java_test.go b/client/driver/java_test.go
index ad8f5e578..eecfc0faf 100644
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -179,7 +179,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 		if err == nil {
 			t.Fatal("should err")
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(8 * time.Second):
 		t.Fatalf("timeout")
 	}
 
diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go
index be70379d2..35090be78 100644
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -54,8 +54,7 @@ type LinuxExecutor struct {
 	allocDir string
 
 	// Spawn process.
-	spawn      *spawn.Spawner
-	spawnState string
+	spawn *spawn.Spawner
 }
 
 func (e *LinuxExecutor) Command() *cmd {
@@ -180,7 +179,7 @@ func (e *LinuxExecutor) Start() error {
 	e.spawn.SetLogs(&spawn.Logs{
 		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
 		Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
-		Stdin:  "/dev/null",
+		Stdin:  os.DevNull,
 	})
 
 	enterCgroup := func(pid int) error {
@@ -309,13 +308,16 @@ func (e *LinuxExecutor) pathExists(path string) bool {
 // should be called when tearing down the task.
 func (e *LinuxExecutor) cleanTaskDir() error {
 	// Unmount dev.
-	// TODO: This should check if it is a mount.
 	errs := new(multierror.Error)
 	dev := filepath.Join(e.taskDir, "dev")
 	if e.pathExists(dev) {
 		if err := syscall.Unmount(dev, 0); err != nil {
 			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
 		}
+
+		if err := os.RemoveAll(dev); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err))
+		}
 	}
 
 	// Unmount proc.
@@ -324,6 +326,10 @@ func (e *LinuxExecutor) cleanTaskDir() error {
 		if err := syscall.Unmount(proc, 0); err != nil {
 			errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
 		}
+
+		if err := os.RemoveAll(proc); err != nil {
+			errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err))
+		}
 	}
 
 	return errs.ErrorOrNil()
diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index fa75b3940..4b9bb5ddc 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -225,10 +225,8 @@ func (s *Spawner) waitAsParent() (int, error) {
 		}
 	}
 
-	if state, err := s.spawn.Wait(); err != nil {
+	if _, err := s.spawn.Wait(); err != nil {
 		return -1, err
-	} else if !state.Exited() {
-		return -1, fmt.Errorf("Task was killed or crashed")
 	}
 
 	return s.waitOnStatusFile()

From 13ea9bc9fff4da16852838da31aec30a635a8339 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 12:47:48 -0800
Subject: [PATCH 50/92] Make a basic executor that can be shared and fix some
 fingerprinting/tests

---
 client/driver/java.go                |   4 +-
 client/driver/java_test.go           |   6 +-
 client/executor/exec_basic.go        | 107 +++++++++++++++++++++++++++
 client/executor/exec_universal.go    |  24 ++----
 client/testutil/driver_compatible.go |   6 ++
 5 files changed, 125 insertions(+), 22 deletions(-)
 create mode 100644 client/executor/exec_basic.go

diff --git a/client/driver/java.go b/client/driver/java.go
index ac2c3c6f3..8aad1dd65 100644
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -38,8 +38,8 @@ func NewJavaDriver(ctx *DriverContext) Driver {
 
 func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
 	// Only enable if we are root when running on non-windows systems.
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling")
+	if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
+		d.logger.Printf("[DEBUG] driver.java: must run as root user on linux, disabling")
 		return false, nil
 	}
 
diff --git a/client/driver/java_test.go b/client/driver/java_test.go
index eecfc0faf..b4f2f2e15 100644
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -19,7 +19,7 @@ func javaLocated() bool {
 
 // The fingerprinter test should always pass, even if Java is not installed.
 func TestJavaDriver_Fingerprint(t *testing.T) {
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	d := NewJavaDriver(testDriverContext(""))
 	node := &structs.Node{
 		Attributes: make(map[string]string),
@@ -93,7 +93,7 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
 		t.Skip("Java not found; skipping")
 	}
 
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
@@ -141,7 +141,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 		t.Skip("Java not found; skipping")
 	}
 
-	ctestutils.ExecCompatible(t)
+	ctestutils.JavaCompatible(t)
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
diff --git a/client/executor/exec_basic.go b/client/executor/exec_basic.go
new file mode 100644
index 000000000..81f17d414
--- /dev/null
+++ b/client/executor/exec_basic.go
@@ -0,0 +1,107 @@
+package executor
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/driver/environment"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// BasicExecutor should work everywhere, and as a result does not include
+// any resource restrictions or runas capabilities.
+type BasicExecutor struct {
+	cmd
+}
+
+// TODO: Update to use the Spawner.
+// TODO: Have raw_exec use this as well.
+func NewBasicExecutor() Executor {
+	return &BasicExecutor{}
+}
+
+func (e *BasicExecutor) Limit(resources *structs.Resources) error {
+	if resources == nil {
+		return errNoResources
+	}
+	return nil
+}
+
+func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
+	taskDir, ok := alloc.TaskDirs[taskName]
+	if !ok {
+		return fmt.Errorf("Error finding task dir for (%s)", taskName)
+	}
+	e.Dir = taskDir
+	return nil
+}
+
+func (e *BasicExecutor) Start() error {
+	// Parse the commands arguments and replace instances of Nomad environment
+	// variables.
+	envVars, err := environment.ParseFromList(e.cmd.Env)
+	if err != nil {
+		return err
+	}
+
+	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
+	if err != nil {
+		return err
+	} else if len(parsedPath) != 1 {
+		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
+	}
+
+	e.cmd.Path = parsedPath[0]
+	combined := strings.Join(e.cmd.Args, " ")
+	parsed, err := args.ParseAndReplace(combined, envVars.Map())
+	if err != nil {
+		return err
+	}
+	e.Cmd.Args = parsed
+
+	// We don't want to call ourself. We want to call Start on our embedded Cmd
+	return e.cmd.Start()
+}
+
+func (e *BasicExecutor) Open(pid string) error {
+	pidNum, err := strconv.Atoi(pid)
+	if err != nil {
+		return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
+	}
+
+	process, err := os.FindProcess(pidNum)
+	if err != nil {
+		return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err)
+	}
+	e.Process = process
+	return nil
+}
+
+func (e *BasicExecutor) Wait() error {
+	// We don't want to call ourself. We want to call Start on our embedded Cmd
+	return e.cmd.Wait()
+}
+
+func (e *BasicExecutor) ID() (string, error) {
+	if e.cmd.Process != nil {
+		return strconv.Itoa(e.cmd.Process.Pid), nil
+	} else {
+		return "", fmt.Errorf("Process has finished or was never started")
+	}
+}
+
+func (e *BasicExecutor) Shutdown() error {
+	return e.ForceStop()
+}
+
+func (e *BasicExecutor) ForceStop() error {
+	return e.Process.Kill()
+}
+
+func (e *BasicExecutor) Command() *cmd {
+	return &e.cmd
+}
diff --git a/client/executor/exec_universal.go b/client/executor/exec_universal.go
index 4979ae3b7..318faea4b 100644
--- a/client/executor/exec_universal.go
+++ b/client/executor/exec_universal.go
@@ -2,21 +2,11 @@
 
 package executor
 
-import (
-	"github.com/hashicorp/nomad/client/allocdir"
-	"github.com/hashicorp/nomad/nomad/structs"
-)
+func NewExecutor() Executor {
+	return &UniversalExecutor{BasicExecutor{}}
+}
 
-// UniversalExecutor exists to make the exec driver compile on all operating systems.
-type UniversalExecutor struct{}
-
-func NewExecutor() Executor                                                    { return &UniversalExecutor{} }
-func (e *UniversalExecutor) Limit(resources *structs.Resources) error          { return nil }
-func (e *UniversalExecutor) ConfigureTaskDir(string, *allocdir.AllocDir) error { return nil }
-func (e *UniversalExecutor) Start() error                                      { return nil }
-func (e *UniversalExecutor) Open(pid string) error                             { return nil }
-func (e *UniversalExecutor) Wait() error                                       { return nil }
-func (e *UniversalExecutor) ID() (string, error)                               { return "", nil }
-func (e *UniversalExecutor) Shutdown() error                                   { return nil }
-func (e *UniversalExecutor) ForceStop() error                                  { return nil }
-func (e *UniversalExecutor) Command() *cmd                                     { return nil }
+// UniversalExecutor wraps the BasicExecutor
+type UniversalExecutor struct {
+	BasicExecutor
+}
diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go
index 94ae6225c..768051e63 100644
--- a/client/testutil/driver_compatible.go
+++ b/client/testutil/driver_compatible.go
@@ -13,6 +13,12 @@ func ExecCompatible(t *testing.T) {
 	}
 }
 
+func JavaCompatible(t *testing.T) {
+	if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
+		t.Skip("Test only available when running as root on linux")
+	}
+}
+
 func QemuCompatible(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("Must be on non-windows environments to run test")

From fc5b418e7e2377afdeea33b416106e991984c79f Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 12:57:39 -0800
Subject: [PATCH 51/92] Update website

---
 website/source/docs/drivers/exec.html.md | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/website/source/docs/drivers/exec.html.md b/website/source/docs/drivers/exec.html.md
index dadf28549..e82aa1505 100644
--- a/website/source/docs/drivers/exec.html.md
+++ b/website/source/docs/drivers/exec.html.md
@@ -11,7 +11,7 @@ description: |-
 Name: `exec`
 
 The `exec` driver is used to simply execute a particular command for a task.
-However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
+However, unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
 primitives of the operating system to limit the tasks access to resources. While
 simple, since the `exec` driver  can invoke any command, it can be used to call
 scripts or other wrappers which provide higher level features.
@@ -28,9 +28,10 @@ must reference it in the `command` as show in the examples below
 
 ## Client Requirements
 
-The `exec` driver can run on all supported operating systems but to provide
-proper isolation the client must be run as root on non-Windows operating systems.
-Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
+The `exec` driver can only be run when on Linux and running Nomad as root.
+`exec` is limited to this configuration because currently isolation of resources
+is only guaranteed on Linux. Further the host must have cgroups mounted properly
+in order for the driver to work.
 
 You must specify a `command` to be executed. Optionally you can specify an
 `artifact_source` to be downloaded as well. Any `command` is assumed to be present on the 
@@ -68,8 +69,5 @@ The `exec` driver will set the following client attributes:
 The resource isolation provided varies by the operating system of
 the client and the configuration.
 
-On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the
+On Linux, Nomad will use cgroups, and a chroot to isolate the
 resources of a process and as such the Nomad agent must be run as root.
-
-On Windows, the task driver will just execute the command with no additional
-resource isolation.

From 2291ea9060b756677733c1585775ea5452419aa4 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 13:26:09 -0800
Subject: [PATCH 52/92] Search path

---
 helper/discover/discover.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/helper/discover/discover.go b/helper/discover/discover.go
index d172970f7..8582a0133 100644
--- a/helper/discover/discover.go
+++ b/helper/discover/discover.go
@@ -3,6 +3,7 @@ package discover
 import (
 	"fmt"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"runtime"
 
@@ -27,6 +28,11 @@ func NomadExecutable() (string, error) {
 		return bin, nil
 	}
 
+	// Check the $PATH
+	if bin, err := exec.LookPath(nomadExe); err == nil {
+		return bin, nil
+	}
+
 	// Check the $GOPATH.
 	bin = filepath.Join(os.Getenv("GOPATH"), "bin", nomadExe)
 	if _, err := os.Stat(bin); err == nil {

From a6f9aeb1b1823d9342dba703cbe49de14dcfe2fa Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 3 Nov 2015 13:37:45 -0800
Subject: [PATCH 53/92] Vet errors

---
 client/spawn/spawn.go      | 2 +-
 client/spawn/spawn_test.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index 4b9bb5ddc..ac0d8c444 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -10,7 +10,7 @@ import (
 	"strconv"
 	"time"
 
-	"github.com/docker/docker/vendor/src/gopkg.in/fsnotify.v1"
+	"github.com/go-fsnotify/fsnotify"
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/nomad/command"
 	"github.com/hashicorp/nomad/helper/discover"
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
index d624f9d9f..e8ddfbaf5 100644
--- a/client/spawn/spawn_test.go
+++ b/client/spawn/spawn_test.go
@@ -106,7 +106,7 @@ func TestSpawn_Callback(t *testing.T) {
 	}
 
 	if err := spawn.Spawn(cb); err == nil {
-		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, err, cbErr)
+		t.Fatalf("Spawn(%#v) should have errored; want %v", cb, cbErr)
 	}
 
 	if !called {

From 1870f9b79994213d3c96596ebe0c92d4d70981b3 Mon Sep 17 00:00:00 2001
From: Clint Shryock <clint@ctshryock.com>
Date: Tue, 3 Nov 2015 15:54:29 -0600
Subject: [PATCH 54/92] go fmt this file

---
 nomad/structs/structs.go | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 15e58d333..bfec26fce 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -14,17 +14,17 @@ import (
 )
 
 var (
-	ErrNoLeader     = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath = fmt.Errorf("No path to region")
-    defaultServiceJobRestartPolicy = RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 2,
-			Interval: 1 * time.Minute,
-		}
+	ErrNoLeader                    = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath                = fmt.Errorf("No path to region")
+	defaultServiceJobRestartPolicy = RestartPolicy{
+		Delay:    15 * time.Second,
+		Attempts: 2,
+		Interval: 1 * time.Minute,
+	}
 	defaultBatchJobRestartPolicy = RestartPolicy{
-			Delay:    15 * time.Second,
-			Attempts: 15,
-		}
+		Delay:    15 * time.Second,
+		Attempts: 15,
+	}
 )
 
 type MessageType uint8
@@ -928,7 +928,7 @@ func NewRestartPolicy(jobType string) *RestartPolicy {
 		rp := defaultServiceJobRestartPolicy
 		return &rp
 	case JobTypeBatch:
-		rp  := defaultBatchJobRestartPolicy
+		rp := defaultBatchJobRestartPolicy
 		return &rp
 	}
 	return nil

From 2a1577ec8823500d82cd9cdf9fbb8c6acb9d3e93 Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 4 Nov 2015 11:18:17 -0800
Subject: [PATCH 55/92] nomad/watch: add a note about the Item struct

---
 nomad/watch/watch.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go
index 102e535b2..4e9bafbc9 100644
--- a/nomad/watch/watch.go
+++ b/nomad/watch/watch.go
@@ -5,7 +5,9 @@ package watch
 // the underlying state store.
 
 // Item describes the scope of a watch. It is used to provide a uniform
-// input for subscribe/unsubscribe and notification firing.
+// input for subscribe/unsubscribe and notification firing. Specifying
+// multiple fields does not place a watch on multiple items. Each Item
+// describes exactly one scoped watch.
 type Item struct {
 	Alloc     string
 	AllocEval string

From 4e88552044d4cc6bb0722febab3f76e0e5f7ed1f Mon Sep 17 00:00:00 2001
From: Ryan Uber <ru@ryanuber.com>
Date: Wed, 4 Nov 2015 11:22:20 -0800
Subject: [PATCH 56/92] Update CHANGELOG.md

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 786380eef..118e0cfc8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.2.0 (Unreleased)
+
+FEATURES:
+
+  * Blocking queries supported in API [GH-366]
+
 ## 0.1.2 (October 6, 2015)
 
 IMPROVEMENTS:

From 4958be618c269ec649289740cfb72233d78567bc Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 14:50:44 -0800
Subject: [PATCH 57/92] Check if the PID is alive instead of heartbeating
 through modify time

---
 client/spawn/spawn.go         | 15 ++-----------
 client/spawn/spawn_posix.go   | 14 ++++++++++++
 client/spawn/spawn_test.go    | 41 ++++++++++++++++++++++++++++++++++-
 client/spawn/spawn_windows.go | 21 ++++++++++++++++++
 command/spawn_daemon.go       | 12 ----------
 5 files changed, 77 insertions(+), 26 deletions(-)
 create mode 100644 client/spawn/spawn_posix.go
 create mode 100644 client/spawn/spawn_windows.go

diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index ac0d8c444..5338b8777 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -256,10 +256,6 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
 		return s.readExitCode()
 	}
 
-	// Store the mod time as a way to heartbeat. If the file doesn't get touched
-	// then we know the spawner has died. This avoids an infinite loop.
-	prevModTime := stat.ModTime()
-
 	// Wait on watcher.
 	for {
 		select {
@@ -277,17 +273,10 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
 		case err := <-watcher.Errors:
 			return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
 		case <-time.After(5 * time.Second):
-			stat, err := os.Stat(s.StateFile)
-			if err != nil {
-				return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
-			}
-
-			modTime := stat.ModTime()
-			if modTime.Equal(prevModTime) {
+			// Check if the process is still alive.
+			if !s.Alive() {
 				return -1, fmt.Errorf("Task is dead and exit code unreadable")
 			}
-
-			prevModTime = modTime
 		}
 	}
 }
diff --git a/client/spawn/spawn_posix.go b/client/spawn/spawn_posix.go
new file mode 100644
index 000000000..7df381064
--- /dev/null
+++ b/client/spawn/spawn_posix.go
@@ -0,0 +1,14 @@
+// +build !windows
+
+package spawn
+
+import "syscall"
+
+func (s *Spawner) Alive() bool {
+	if s.spawn == nil {
+		return false
+	}
+
+	err := s.spawn.Signal(syscall.Signal(0))
+	return err == nil
+}
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
index e8ddfbaf5..9553470a0 100644
--- a/client/spawn/spawn_test.go
+++ b/client/spawn/spawn_test.go
@@ -214,7 +214,7 @@ func TestSpawn_NonParentWait(t *testing.T) {
 	}
 }
 
-func TestSpawn_DeadSpawnDaemon(t *testing.T) {
+func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) {
 	f, err := ioutil.TempFile("", "")
 	if err != nil {
 		t.Fatalf("TempFile() failed")
@@ -250,3 +250,42 @@ func TestSpawn_DeadSpawnDaemon(t *testing.T) {
 		t.Fatalf("Wait() should have failed: %v", err)
 	}
 }
+
+func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatalf("TempFile() failed")
+	}
+	defer os.Remove(f.Name())
+
+	var spawnPid int
+	cb := func(pid int) error {
+		spawnPid = pid
+		return nil
+	}
+
+	spawn := NewSpawner(f.Name())
+	spawn.SetCommand(exec.Command("sleep", "5"))
+	if err := spawn.Spawn(cb); err != nil {
+		t.Fatalf("Spawn() errored: %v", err)
+	}
+
+	proc, err := os.FindProcess(spawnPid)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if err := proc.Kill(); err != nil {
+		t.FailNow()
+	}
+
+	if _, err := proc.Wait(); err != nil {
+		t.FailNow()
+	}
+
+	// Force the wait to assume non-parent.
+	spawn.SpawnPpid = 0
+	if _, err := spawn.Wait(); err == nil {
+		t.Fatalf("Wait() should have failed: %v", err)
+	}
+}
diff --git a/client/spawn/spawn_windows.go b/client/spawn/spawn_windows.go
new file mode 100644
index 000000000..9683dce97
--- /dev/null
+++ b/client/spawn/spawn_windows.go
@@ -0,0 +1,21 @@
+package spawn
+
+import "syscall"
+
+const STILL_ACTIVE = 259
+
+func (s *Spawner) Alive() bool {
+	const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE
+	h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid))
+	if e != nil {
+		return false
+	}
+
+	var ec uint32
+	e = syscall.GetExitCodeProcess(h, &ec)
+	if e != nil {
+		return false
+	}
+
+	return ec == STILL_ACTIVE
+}
diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go
index 81f5ca2ca..52ffd8e6c 100644
--- a/command/spawn_daemon.go
+++ b/command/spawn_daemon.go
@@ -9,7 +9,6 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
-	"time"
 )
 
 type SpawnDaemonCommand struct {
@@ -185,17 +184,6 @@ func (c *SpawnDaemonCommand) Run(args []string) int {
 	// Indicate that the command was started successfully.
 	c.outputStartStatus(nil, 0)
 
-	// Start a go routine that touches the exit file periodically.
-	go func() {
-		for {
-			select {
-			case <-time.After(2 * time.Second):
-				now := time.Now()
-				os.Chtimes(c.config.ExitStatusFile, now, now)
-			}
-		}
-	}()
-
 	// Wait and then output the exit status.
 	return c.writeExitStatus(c.config.Cmd.Wait())
 }

From c94bf6b7c940c0aa4bf8ee05e8efabcb7c4f0b0c Mon Sep 17 00:00:00 2001
From: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
Date: Tue, 3 Nov 2015 23:40:24 +0900
Subject: [PATCH 58/92] Apply SELinux label to allocate directory of docker
 driver

---
 client/driver/docker.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/client/driver/docker.go b/client/driver/docker.go
index bbd52a9d8..d5031d5d9 100644
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -108,8 +108,10 @@ func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Ta
 	}
 
 	return []string{
-		fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocName),
-		fmt.Sprintf("%s:%s", local, allocdir.TaskLocal),
+		// "z" and "Z" option is to allocate directory with SELinux label.
+		fmt.Sprintf("%s:/%s:rw,z", shared, allocdir.SharedAllocName),
+		// capital "Z" will label with Multi-Category Security (MCS) labels
+		fmt.Sprintf("%s:/%s:rw,Z", local, allocdir.TaskLocal),
 	}, nil
 }
 

From 29d72b7477cf2a21732c0783bafa17a318c5f121 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 16:38:28 -0800
Subject: [PATCH 59/92] Remove file watching

---
 client/spawn/spawn.go      | 52 ++++++++++----------------------------
 client/spawn/spawn_test.go | 11 +++++++-
 2 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/client/spawn/spawn.go b/client/spawn/spawn.go
index 5338b8777..ef160611e 100644
--- a/client/spawn/spawn.go
+++ b/client/spawn/spawn.go
@@ -10,7 +10,6 @@ import (
 	"strconv"
 	"time"
 
-	"github.com/go-fsnotify/fsnotify"
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/nomad/command"
 	"github.com/hashicorp/nomad/helper/discover"
@@ -144,6 +143,7 @@ func (s *Spawner) Spawn(cb func(pid int) error) error {
 
 	// Store the spawn process.
 	s.spawn = spawn.Process
+	s.SpawnPid = s.spawn.Pid
 	s.SpawnPpid = os.Getpid()
 	return nil
 }
@@ -206,7 +206,7 @@ func (s *Spawner) Wait() (int, error) {
 		return s.waitAsParent()
 	}
 
-	return s.waitOnStatusFile()
+	return s.pollWait()
 }
 
 // waitAsParent waits on the process if the current process was the spawner.
@@ -221,7 +221,7 @@ func (s *Spawner) waitAsParent() (int, error) {
 		// we should just read its exit file.
 		var err error
 		if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil {
-			return s.waitOnStatusFile()
+			return s.pollWait()
 		}
 	}
 
@@ -229,22 +229,13 @@ func (s *Spawner) waitAsParent() (int, error) {
 		return -1, err
 	}
 
-	return s.waitOnStatusFile()
+	return s.pollWait()
 }
 
-// waitOnStatusFile uses OS level file watching APIs to wait on the status file
-// and returns the exit code and possibly an error.
-func (s *Spawner) waitOnStatusFile() (int, error) {
-	// Set up a watcher for the exit status file.
-	watcher, err := fsnotify.NewWatcher()
-	if err != nil {
-		return -1, fmt.Errorf("Failed to create file watcher to read exit code: %v", err)
-	}
-
-	if err := watcher.Add(s.StateFile); err != nil {
-		return -1, fmt.Errorf("Failed to watch %v to read exit code: %v", s.StateFile, err)
-	}
-
+// pollWait polls on the spawn daemon to determine when it exits. After it
+// exits, it reads the state file and returns the exit code and possibly an
+// error.
+func (s *Spawner) pollWait() (int, error) {
 	// Stat to check if it is there to avoid a race condition.
 	stat, err := os.Stat(s.StateFile)
 	if err != nil {
@@ -256,29 +247,14 @@ func (s *Spawner) waitOnStatusFile() (int, error) {
 		return s.readExitCode()
 	}
 
-	// Wait on watcher.
-	for {
-		select {
-		case event := <-watcher.Events:
-			if event.Op&fsnotify.Write == fsnotify.Write {
-				stat, err := os.Stat(s.StateFile)
-				if err != nil {
-					return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
-				}
-
-				if stat.Size() > 0 {
-					return s.readExitCode()
-				}
-			}
-		case err := <-watcher.Errors:
-			return -1, fmt.Errorf("Failed to watch %v for an exit code: %v", s.StateFile, err)
-		case <-time.After(5 * time.Second):
-			// Check if the process is still alive.
-			if !s.Alive() {
-				return -1, fmt.Errorf("Task is dead and exit code unreadable")
-			}
+	// Read after the process exits.
+	for _ = range time.Tick(5 * time.Second) {
+		if !s.Alive() {
+			break
 		}
 	}
+
+	return s.readExitCode()
 }
 
 // readExitCode parses the state file and returns the exit code of the task. It
diff --git a/client/spawn/spawn_test.go b/client/spawn/spawn_test.go
index 9553470a0..bbb8c8dca 100644
--- a/client/spawn/spawn_test.go
+++ b/client/spawn/spawn_test.go
@@ -202,6 +202,15 @@ func TestSpawn_NonParentWait(t *testing.T) {
 		t.Fatalf("Spawn() failed %v", err)
 	}
 
+	// Need to wait on the spawner, otherwise it becomes a zombie and the test
+	// only finishes after the init process cleans it. This speeds that up.
+	go func() {
+		time.Sleep(3 * time.Second)
+		if _, err := spawn.spawn.Wait(); err != nil {
+			t.FailNow()
+		}
+	}()
+
 	// Force the wait to assume non-parent.
 	spawn.SpawnPpid = 0
 	code, err := spawn.Wait()
@@ -265,7 +274,7 @@ func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
 	}
 
 	spawn := NewSpawner(f.Name())
-	spawn.SetCommand(exec.Command("sleep", "5"))
+	spawn.SetCommand(exec.Command("sleep", "2"))
 	if err := spawn.Spawn(cb); err != nil {
 		t.Fatalf("Spawn() errored: %v", err)
 	}

From 0f1050b1bf471da9fedaf2ddd8550618d7701acc Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 16:53:27 -0800
Subject: [PATCH 60/92] Move the executor and spawn package into driver

---
 client/driver/exec.go                           | 2 +-
 client/{ => driver}/executor/exec.go            | 0
 client/{ => driver}/executor/exec_basic.go      | 0
 client/{ => driver}/executor/exec_linux.go      | 2 +-
 client/{ => driver}/executor/exec_linux_test.go | 0
 client/{ => driver}/executor/exec_universal.go  | 0
 client/{ => driver}/executor/setuid.go          | 0
 client/{ => driver}/executor/setuid_windows.go  | 0
 client/driver/java.go                           | 2 +-
 client/{ => driver}/spawn/spawn.go              | 0
 client/{ => driver}/spawn/spawn_posix.go        | 0
 client/{ => driver}/spawn/spawn_test.go         | 0
 client/{ => driver}/spawn/spawn_windows.go      | 0
 13 files changed, 3 insertions(+), 3 deletions(-)
 rename client/{ => driver}/executor/exec.go (100%)
 rename client/{ => driver}/executor/exec_basic.go (100%)
 rename client/{ => driver}/executor/exec_linux.go (99%)
 rename client/{ => driver}/executor/exec_linux_test.go (100%)
 rename client/{ => driver}/executor/exec_universal.go (100%)
 rename client/{ => driver}/executor/setuid.go (100%)
 rename client/{ => driver}/executor/setuid_windows.go (100%)
 rename client/{ => driver}/spawn/spawn.go (100%)
 rename client/{ => driver}/spawn/spawn_posix.go (100%)
 rename client/{ => driver}/spawn/spawn_test.go (100%)
 rename client/{ => driver}/spawn/spawn_windows.go (100%)

diff --git a/client/driver/exec.go b/client/driver/exec.go
index cbcb85a0a..e48604894 100644
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -12,7 +12,7 @@ import (
 	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/executor"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
diff --git a/client/executor/exec.go b/client/driver/executor/exec.go
similarity index 100%
rename from client/executor/exec.go
rename to client/driver/executor/exec.go
diff --git a/client/executor/exec_basic.go b/client/driver/executor/exec_basic.go
similarity index 100%
rename from client/executor/exec_basic.go
rename to client/driver/executor/exec_basic.go
diff --git a/client/executor/exec_linux.go b/client/driver/executor/exec_linux.go
similarity index 99%
rename from client/executor/exec_linux.go
rename to client/driver/executor/exec_linux.go
index 35090be78..1b4b312bf 100644
--- a/client/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -15,7 +15,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/client/driver/environment"
-	"github.com/hashicorp/nomad/client/spawn"
+	"github.com/hashicorp/nomad/client/driver/spawn"
 	"github.com/hashicorp/nomad/nomad/structs"
 
 	"github.com/opencontainers/runc/libcontainer/cgroups"
diff --git a/client/executor/exec_linux_test.go b/client/driver/executor/exec_linux_test.go
similarity index 100%
rename from client/executor/exec_linux_test.go
rename to client/driver/executor/exec_linux_test.go
diff --git a/client/executor/exec_universal.go b/client/driver/executor/exec_universal.go
similarity index 100%
rename from client/executor/exec_universal.go
rename to client/driver/executor/exec_universal.go
diff --git a/client/executor/setuid.go b/client/driver/executor/setuid.go
similarity index 100%
rename from client/executor/setuid.go
rename to client/driver/executor/setuid.go
diff --git a/client/executor/setuid_windows.go b/client/driver/executor/setuid_windows.go
similarity index 100%
rename from client/executor/setuid_windows.go
rename to client/driver/executor/setuid_windows.go
diff --git a/client/driver/java.go b/client/driver/java.go
index 8aad1dd65..e7563f6e2 100644
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -14,7 +14,7 @@ import (
 	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/executor"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
diff --git a/client/spawn/spawn.go b/client/driver/spawn/spawn.go
similarity index 100%
rename from client/spawn/spawn.go
rename to client/driver/spawn/spawn.go
diff --git a/client/spawn/spawn_posix.go b/client/driver/spawn/spawn_posix.go
similarity index 100%
rename from client/spawn/spawn_posix.go
rename to client/driver/spawn/spawn_posix.go
diff --git a/client/spawn/spawn_test.go b/client/driver/spawn/spawn_test.go
similarity index 100%
rename from client/spawn/spawn_test.go
rename to client/driver/spawn/spawn_test.go
diff --git a/client/spawn/spawn_windows.go b/client/driver/spawn/spawn_windows.go
similarity index 100%
rename from client/spawn/spawn_windows.go
rename to client/driver/spawn/spawn_windows.go

From 8ed031c45e4d2eccc2f5100f0841c91aebd2b1a4 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 4 Nov 2015 17:20:52 -0800
Subject: [PATCH 61/92] Get rid of exec.cmd struct and setuid file

---
 client/driver/executor/exec.go           | 16 +-----
 client/driver/executor/exec_basic.go     | 14 ++---
 client/driver/executor/exec_linux.go     | 68 +++++++++++++-----------
 client/driver/executor/setuid.go         | 41 --------------
 client/driver/executor/setuid_windows.go | 13 -----
 5 files changed, 44 insertions(+), 108 deletions(-)
 delete mode 100644 client/driver/executor/setuid.go
 delete mode 100644 client/driver/executor/setuid_windows.go

diff --git a/client/driver/executor/exec.go b/client/driver/executor/exec.go
index ca104ca3e..b3878ba0c 100644
--- a/client/driver/executor/exec.go
+++ b/client/driver/executor/exec.go
@@ -70,7 +70,7 @@ type Executor interface {
 
 	// Command provides access the underlying Cmd struct in case the Executor
 	// interface doesn't expose the functionality you need.
-	Command() *cmd
+	Command() *exec.Cmd
 }
 
 // Command is a mirror of exec.Command that returns a platform-specific Executor
@@ -100,17 +100,3 @@ func OpenId(id string) (Executor, error) {
 	}
 	return executor, nil
 }
-
-// Cmd is an extension of exec.Cmd that incorporates functionality for
-// re-attaching to processes, dropping priviledges, etc., based on platform-
-// specific implementations.
-type cmd struct {
-	exec.Cmd
-
-	// Resources is used to limit CPU and RAM used by the process, by way of
-	// cgroups or a similar mechanism.
-	Resources structs.Resources
-
-	// RunAs may be a username or Uid. The implementation will decide how to use it.
-	RunAs string
-}
diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go
index 81f17d414..b9bdebd7d 100644
--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -3,6 +3,7 @@ package executor
 import (
 	"fmt"
 	"os"
+	"os/exec"
 	"strconv"
 	"strings"
 
@@ -15,10 +16,9 @@ import (
 // BasicExecutor should work everywhere, and as a result does not include
 // any resource restrictions or runas capabilities.
 type BasicExecutor struct {
-	cmd
+	cmd exec.Cmd
 }
 
-// TODO: Update to use the Spawner.
 // TODO: Have raw_exec use this as well.
 func NewBasicExecutor() Executor {
 	return &BasicExecutor{}
@@ -36,7 +36,7 @@ func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	if !ok {
 		return fmt.Errorf("Error finding task dir for (%s)", taskName)
 	}
-	e.Dir = taskDir
+	e.cmd.Dir = taskDir
 	return nil
 }
 
@@ -61,7 +61,7 @@ func (e *BasicExecutor) Start() error {
 	if err != nil {
 		return err
 	}
-	e.Cmd.Args = parsed
+	e.cmd.Args = parsed
 
 	// We don't want to call ourself. We want to call Start on our embedded Cmd
 	return e.cmd.Start()
@@ -77,7 +77,7 @@ func (e *BasicExecutor) Open(pid string) error {
 	if err != nil {
 		return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err)
 	}
-	e.Process = process
+	e.cmd.Process = process
 	return nil
 }
 
@@ -99,9 +99,9 @@ func (e *BasicExecutor) Shutdown() error {
 }
 
 func (e *BasicExecutor) ForceStop() error {
-	return e.Process.Kill()
+	return e.cmd.Process.Kill()
 }
 
-func (e *BasicExecutor) Command() *cmd {
+func (e *BasicExecutor) Command() *exec.Cmd {
 	return &e.cmd
 }
diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go
index 1b4b312bf..a75440ba8 100644
--- a/client/driver/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -6,8 +6,10 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"os/exec"
 	"os/user"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"syscall"
 
@@ -44,7 +46,7 @@ func NewExecutor() Executor {
 
 // Linux executor is designed to run on linux kernel 2.8+.
 type LinuxExecutor struct {
-	cmd
+	cmd  exec.Cmd
 	user *user.User
 
 	// Isolation configurations.
@@ -57,7 +59,7 @@ type LinuxExecutor struct {
 	spawn *spawn.Spawner
 }
 
-func (e *LinuxExecutor) Command() *cmd {
+func (e *LinuxExecutor) Command() *exec.Cmd {
 	return &e.cmd
 }
 
@@ -114,45 +116,47 @@ func (e *LinuxExecutor) ID() (string, error) {
 	return buffer.String(), nil
 }
 
-// runAs takes a user id as a string and looks up the user. It stores the
-// results in the executor and returns an error if the user could not be found.
+// runAs takes a user id as a string and looks up the user, and sets the command
+// to execute as that user.
 func (e *LinuxExecutor) runAs(userid string) error {
-	errs := new(multierror.Error)
-
-	// First, try to lookup the user by uid
-	u, err := user.LookupId(userid)
-	if err == nil {
-		e.user = u
-		return nil
-	} else {
-		errs = multierror.Append(errs, err)
+	u, err := user.Lookup(userid)
+	if err != nil {
+		return fmt.Errorf("Failed to identify user %v: %v", userid, err)
 	}
 
-	// Lookup failed, so try by username instead
-	u, err = user.Lookup(userid)
-	if err == nil {
-		e.user = u
-		return nil
-	} else {
-		errs = multierror.Append(errs, err)
+	// Convert the uid and gid
+	uid, err := strconv.ParseUint(u.Uid, 10, 32)
+	if err != nil {
+		return fmt.Errorf("Unable to convert userid to uint32: %s", err)
+	}
+	gid, err := strconv.ParseUint(u.Gid, 10, 32)
+	if err != nil {
+		return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
 	}
 
-	// If we got here we failed to lookup based on id and username, so we'll
-	// return those errors.
-	return fmt.Errorf("Failed to identify user to run as: %s", errs)
+	// Set the command to run as that user and group.
+	if e.cmd.SysProcAttr == nil {
+		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+	if e.cmd.SysProcAttr.Credential == nil {
+		e.cmd.SysProcAttr.Credential = &syscall.Credential{}
+	}
+	e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
+	e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
+
+	return nil
 }
 
 func (e *LinuxExecutor) Start() error {
 	// Run as "nobody" user so we don't leak root privilege to the spawned
 	// process.
-	if err := e.runAs("nobody"); err == nil && e.user != nil {
-		e.cmd.SetUID(e.user.Uid)
-		e.cmd.SetGID(e.user.Gid)
+	if err := e.runAs("nobody"); err != nil {
+		return err
 	}
 
 	// Parse the commands arguments and replace instances of Nomad environment
 	// variables.
-	envVars, err := environment.ParseFromList(e.Cmd.Env)
+	envVars, err := environment.ParseFromList(e.cmd.Env)
 	if err != nil {
 		return err
 	}
@@ -165,16 +169,16 @@ func (e *LinuxExecutor) Start() error {
 	}
 	e.cmd.Path = parsedPath[0]
 
-	combined := strings.Join(e.Cmd.Args, " ")
+	combined := strings.Join(e.cmd.Args, " ")
 	parsed, err := args.ParseAndReplace(combined, envVars.Map())
 	if err != nil {
 		return err
 	}
-	e.Cmd.Args = parsed
+	e.cmd.Args = parsed
 
 	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
 	e.spawn = spawn.NewSpawner(spawnState)
-	e.spawn.SetCommand(&e.cmd.Cmd)
+	e.spawn.SetCommand(&e.cmd)
 	e.spawn.SetChroot(e.taskDir)
 	e.spawn.SetLogs(&spawn.Logs{
 		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
@@ -283,13 +287,13 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	}
 
 	// Set the tasks AllocDir environment variable.
-	env, err := environment.ParseFromList(e.Cmd.Env)
+	env, err := environment.ParseFromList(e.cmd.Env)
 	if err != nil {
 		return err
 	}
 	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
 	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
-	e.Cmd.Env = env.List()
+	e.cmd.Env = env.List()
 
 	return nil
 }
diff --git a/client/driver/executor/setuid.go b/client/driver/executor/setuid.go
deleted file mode 100644
index 4793f8e2c..000000000
--- a/client/driver/executor/setuid.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// +build !windows
-
-package executor
-
-import (
-	"fmt"
-	"strconv"
-	"syscall"
-)
-
-// SetUID changes the Uid for this command (must be set before starting)
-func (c *cmd) SetUID(userid string) error {
-	uid, err := strconv.ParseUint(userid, 10, 32)
-	if err != nil {
-		return fmt.Errorf("Unable to convert userid to uint32: %s", err)
-	}
-	if c.SysProcAttr == nil {
-		c.SysProcAttr = &syscall.SysProcAttr{}
-	}
-	if c.SysProcAttr.Credential == nil {
-		c.SysProcAttr.Credential = &syscall.Credential{}
-	}
-	c.SysProcAttr.Credential.Uid = uint32(uid)
-	return nil
-}
-
-// SetGID changes the Gid for this command (must be set before starting)
-func (c *cmd) SetGID(groupid string) error {
-	gid, err := strconv.ParseUint(groupid, 10, 32)
-	if err != nil {
-		return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
-	}
-	if c.SysProcAttr == nil {
-		c.SysProcAttr = &syscall.SysProcAttr{}
-	}
-	if c.SysProcAttr.Credential == nil {
-		c.SysProcAttr.Credential = &syscall.Credential{}
-	}
-	c.SysProcAttr.Credential.Gid = uint32(gid)
-	return nil
-}
diff --git a/client/driver/executor/setuid_windows.go b/client/driver/executor/setuid_windows.go
deleted file mode 100644
index 9c18bed53..000000000
--- a/client/driver/executor/setuid_windows.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package executor
-
-// SetUID changes the Uid for this command (must be set before starting)
-func (c *cmd) SetUID(userid string) error {
-	// TODO implement something for windows
-	return nil
-}
-
-// SetGID changes the Gid for this command (must be set before starting)
-func (c *cmd) SetGID(groupid string) error {
-	// TODO implement something for windows
-	return nil
-}

From 741965e6d7866336893c695b578dd9d192bdb05c Mon Sep 17 00:00:00 2001
From: Clint Shryock <clint@ctshryock.com>
Date: Tue, 3 Nov 2015 15:15:40 -0600
Subject: [PATCH 62/92] add getter package, a thin wrapper of go-getter

---
 client/getter/getter.go      |  44 +++++++++++++++
 client/getter/getter_test.go | 103 +++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 client/getter/getter.go
 create mode 100644 client/getter/getter_test.go

diff --git a/client/getter/getter.go b/client/getter/getter.go
new file mode 100644
index 000000000..a113dd1cb
--- /dev/null
+++ b/client/getter/getter.go
@@ -0,0 +1,44 @@
+package getter
+
+import (
+        "fmt"
+        "log"
+        "net/url"
+        "path"
+        "path/filepath"
+        "runtime"
+        "strings"
+        "syscall"
+
+        gg "github.com/hashicorp/go-getter"
+)
+
+func GetArtifact(destDir, source, checksum string, logger *log.Logger) (string, error) {
+        // We use go-getter to support a variety of protocols, but need to change
+        // file permissions of the resulted download to be executable
+
+        u, err := url.Parse(source)
+        if err != nil {
+                return "", err
+        }
+
+        // look for checksum, apply to URL
+        if checksum != "" {
+                source = strings.Join([]string{source, fmt.Sprintf("checksum=%s", checksum)}, "?")
+                logger.Printf("[DEBUG] Applying checksum to Artifact Source URL, new url: %s", source)
+        }
+
+        artifactName := path.Base(u.Path)
+        artifactFile := filepath.Join(destDir, artifactName)
+        if err := gg.GetFile(artifactFile, source); err != nil {
+                return "", fmt.Errorf("Error downloading artifact: %s", err)
+        }
+
+        // Add execution permissions to the newly downloaded artifact
+        if runtime.GOOS != "windows" {
+                if err := syscall.Chmod(artifactFile, 0755); err != nil {
+                        logger.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err)
+                }
+        }
+        return artifactFile, nil
+}
diff --git a/client/getter/getter_test.go b/client/getter/getter_test.go
new file mode 100644
index 000000000..570b2a8c9
--- /dev/null
+++ b/client/getter/getter_test.go
@@ -0,0 +1,103 @@
+package getter
+
+import (
+        "fmt"
+        "io/ioutil"
+        "log"
+        "os"
+        "strings"
+        "testing"
+)
+
+func TestGetArtifact_basic(t *testing.T) {
+
+        logger := log.New(os.Stderr, "", log.LstdFlags)
+
+        // TODO: Use http.TestServer to serve these files from fixtures dir
+        passing := []struct {
+                Source, Checksum string
+        }{
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64",
+                        "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5df51e33871d6b295a60e0f9f6dd573da977",
+                },
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+                        "sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+                },
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+                        "md5:a9b14903a8942748e4f8474e11f795d3",
+                },
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64?checksum=sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+                        "",
+                },
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+                        "",
+                },
+        }
+
+        for i, p := range passing {
+                destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i))
+                if err != nil {
+                        t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err)
+                }
+
+                path, err := GetArtifact(destDir, p.Source, p.Checksum, logger)
+                if err != nil {
+                        t.Fatalf("TestGetArtifact_basic unexpected failure here: %s", err)
+                }
+
+                if p.Checksum != "" {
+                        if ok := strings.Contains(path, p.Checksum); ok {
+                                t.Fatalf("path result should not contain the checksum, got: %s", path)
+                        }
+                }
+
+                // verify artifact exists
+                if _, err := os.Stat(path); err != nil {
+                        t.Fatalf("source path error: %s", err)
+                }
+        }
+}
+
+func TestGetArtifact_fails(t *testing.T) {
+
+        logger := log.New(os.Stderr, "", log.LstdFlags)
+
+        failing := []struct {
+                Source, Checksum string
+        }{
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64",
+                        "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d",
+                },
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+                        "sha257:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+                },
+                // malformed checksum
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+                        "6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+                },
+                // 404
+                {
+                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd86",
+                        "",
+                },
+        }
+        for i, p := range failing {
+                destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i))
+                if err != nil {
+                        t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err)
+                }
+
+                _, err = GetArtifact(destDir, p.Source, p.Checksum, logger)
+                if err == nil {
+                        t.Fatalf("TestGetArtifact_basic expected failure, but got none")
+                }
+        }
+}

From add12a6a004e69396bc83c48b6e247af82eeeb83 Mon Sep 17 00:00:00 2001
From: Clint Shryock <clint@ctshryock.com>
Date: Tue, 3 Nov 2015 15:16:17 -0600
Subject: [PATCH 63/92] refactor drivers to use new getter

---
 client/driver/exec.go                        |  37 ++--
 client/driver/java.go                        |  27 ++-
 client/driver/java_test.go                   |  10 +-
 client/driver/qemu.go                        |  44 ++---
 client/driver/qemu_test.go                   |  18 +-
 client/driver/raw_exec.go                    |  30 +---
 client/getter/getter.go                      |  65 ++++---
 client/getter/getter_test.go                 | 176 ++++++++++---------
 website/source/docs/drivers/exec.html.md     |   5 +
 website/source/docs/drivers/java.html.md     |  25 ++-
 website/source/docs/drivers/qemu.html.md     |  10 +-
 website/source/docs/drivers/raw_exec.html.md |   5 +
 12 files changed, 224 insertions(+), 228 deletions(-)

diff --git a/client/driver/exec.go b/client/driver/exec.go
index e48604894..213bc574f 100644
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -2,17 +2,15 @@ package driver
 
 import (
 	"fmt"
-	"log"
-	"path"
 	"path/filepath"
 	"runtime"
 	"syscall"
 	"time"
 
-	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/executor"
+	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -55,29 +53,24 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		return nil, fmt.Errorf("missing command for exec driver")
 	}
 
+	// Create a location to download the artifact.
+	taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
+	if !ok {
+		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
+	}
+
 	// Check if an artificat is specified and attempt to download it
 	source, ok := task.Config["artifact_source"]
 	if ok && source != "" {
 		// Proceed to download an artifact to be executed.
-		// We use go-getter to support a variety of protocols, but need to change
-		// file permissions of the resulted download to be executable
-
-		// Create a location to download the artifact.
-		taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
-		if !ok {
-			return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
-		}
-		destDir := filepath.Join(taskDir, allocdir.TaskLocal)
-
-		artifactName := path.Base(source)
-		artifactFile := filepath.Join(destDir, artifactName)
-		if err := getter.GetFile(artifactFile, source); err != nil {
-			return nil, fmt.Errorf("Error downloading artifact for Exec driver: %s", err)
-		}
-
-		// Add execution permissions to the newly downloaded artifact
-		if err := syscall.Chmod(artifactFile, 0755); err != nil {
-			log.Printf("[ERR] driver.exec: Error making artifact executable: %s", err)
+		_, err := getter.GetArtifact(
+			filepath.Join(taskDir, allocdir.TaskLocal),
+			task.Config["artifact_source"],
+			task.Config["checksum"],
+			d.logger,
+		)
+		if err != nil {
+			return nil, err
 		}
 	}
 
diff --git a/client/driver/java.go b/client/driver/java.go
index e7563f6e2..808bdfe5b 100644
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -4,17 +4,16 @@ import (
 	"bytes"
 	"fmt"
 	"os/exec"
-	"path"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"syscall"
 	"time"
 
-	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/executor"
+	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -89,26 +88,24 @@ func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
 }
 
 func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
-	// Get the jar source
-	source, ok := task.Config["jar_source"]
-	if !ok || source == "" {
-		return nil, fmt.Errorf("missing jar source for Java Jar driver")
-	}
-
 	taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
 	if !ok {
 		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
 	}
 
-	destDir := filepath.Join(taskDir, allocdir.TaskLocal)
-
-	// Create a location to download the binary.
-	jarName := path.Base(source)
-	jarPath := filepath.Join(destDir, jarName)
-	if err := getter.GetFile(jarPath, source); err != nil {
-		return nil, fmt.Errorf("Error downloading source for Java driver: %s", err)
+	// Proceed to download an artifact to be executed.
+	path, err := getter.GetArtifact(
+		filepath.Join(taskDir, allocdir.TaskLocal),
+		task.Config["artifact_source"],
+		task.Config["checksum"],
+		d.logger,
+	)
+	if err != nil {
+		return nil, err
 	}
 
+	jarName := filepath.Base(path)
+
 	// Get the environment variables.
 	envVars := TaskEnvironmentVariables(ctx, task)
 
diff --git a/client/driver/java_test.go b/client/driver/java_test.go
index b4f2f2e15..e525d4dce 100644
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -97,10 +97,8 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
-			"jar_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
-			// "jar_source": "https://s3-us-west-2.amazonaws.com/java-jar-thing/demoapp.jar",
-			// "args": "-d64",
-			"jvm_options": "-Xmx2048m -Xms256m",
+                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
+                        "jvm_options":     "-Xmx2048m -Xms256m",
 		},
 		Resources: basicResources,
 	}
@@ -145,9 +143,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
-			"jar_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
-			// "jar_source": "https://s3-us-west-2.amazonaws.com/java-jar-thing/demoapp.jar",
-			// "args": "-d64",
+                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
 		},
 		Resources: basicResources,
 	}
diff --git a/client/driver/qemu.go b/client/driver/qemu.go
index abf6d4dfa..b06c5dd47 100644
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@@ -2,11 +2,8 @@ package driver
 
 import (
 	"bytes"
-	"crypto/sha256"
-	"encoding/hex"
 	"encoding/json"
 	"fmt"
-	"io"
 	"log"
 	"os"
 	"os/exec"
@@ -17,9 +14,9 @@ import (
 	"strings"
 	"time"
 
-	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
+        "github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -82,7 +79,7 @@ func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
 // image and save it to the Drivers Allocation Dir
 func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
 	// Get the image source
-	source, ok := task.Config["image_source"]
+        source, ok := task.Config["artifact_source"]
 	if !ok || source == "" {
 		return nil, fmt.Errorf("Missing source image Qemu driver")
 	}
@@ -99,34 +96,19 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
 	}
 
-	// Create a location to download the binary.
-	destDir := filepath.Join(taskDir, allocdir.TaskLocal)
-	vmID := fmt.Sprintf("qemu-vm-%s-%s", structs.GenerateUUID(), filepath.Base(source))
-	vmPath := filepath.Join(destDir, vmID)
-	if err := getter.GetFile(vmPath, source); err != nil {
-		return nil, fmt.Errorf("Error downloading artifact for Qemu driver: %s", err)
+        // Proceed to download an artifact to be executed.
+        vmPath, err := getter.GetArtifact(
+                filepath.Join(taskDir, allocdir.TaskLocal),
+                task.Config["artifact_source"],
+                task.Config["checksum"],
+                d.logger,
+        )
+        if err != nil {
+                d.logger.Printf("\n---\nDEBUG\n---\n error here: %s\n---\n", err)
+                return nil, err
 	}
 
-	// compute and check checksum
-	if check, ok := task.Config["checksum"]; ok {
-		d.logger.Printf("[DEBUG] Running checksum on (%s)", vmID)
-		hasher := sha256.New()
-		file, err := os.Open(vmPath)
-		if err != nil {
-			return nil, fmt.Errorf("Failed to open file for checksum")
-		}
-
-		defer file.Close()
-		io.Copy(hasher, file)
-
-		sum := hex.EncodeToString(hasher.Sum(nil))
-		if sum != check {
-			return nil, fmt.Errorf(
-				"Error in Qemu: checksums did not match.\nExpected (%s), got (%s)",
-				check,
-				sum)
-		}
-	}
+        vmID := filepath.Base(vmPath)
 
 	// Parse configuration arguments
 	// Create the base arguments
diff --git a/client/driver/qemu_test.go b/client/driver/qemu_test.go
index e9a9e5744..7b6ca1bb7 100644
--- a/client/driver/qemu_test.go
+++ b/client/driver/qemu_test.go
@@ -54,10 +54,10 @@ func TestQemuDriver_Start(t *testing.T) {
 	task := &structs.Task{
 		Name: "linux",
 		Config: map[string]string{
-			"image_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
-			"checksum":     "a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
-			"accelerator":  "tcg",
-			"guest_ports":  "22,8080",
+                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
+                        "checksum":        "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
+                        "accelerator":     "tcg",
+                        "guest_ports":     "22,8080",
 		},
 		Resources: &structs.Resources{
 			MemoryMB: 512,
@@ -103,11 +103,11 @@ func TestQemuDriver_RequiresMemory(t *testing.T) {
 	task := &structs.Task{
 		Name: "linux",
 		Config: map[string]string{
-			"image_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
-			"accelerator":  "tcg",
-			"host_port":    "8080",
-			"guest_port":   "8081",
-			"checksum":     "a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
+                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
+                        "accelerator":     "tcg",
+                        "host_port":       "8080",
+                        "guest_port":      "8081",
+                        "checksum":        "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
 			// ssh u/p would be here
 		},
 	}
diff --git a/client/driver/raw_exec.go b/client/driver/raw_exec.go
index fd54e1b86..9a2716297 100644
--- a/client/driver/raw_exec.go
+++ b/client/driver/raw_exec.go
@@ -2,21 +2,18 @@ package driver
 
 import (
 	"fmt"
-	"log"
 	"os"
 	"os/exec"
-	"path"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
-	"syscall"
 	"time"
 
-	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/args"
+        "github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -83,23 +80,14 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
 	source, ok := task.Config["artifact_source"]
 	if ok && source != "" {
 		// Proceed to download an artifact to be executed.
-		// We use go-getter to support a variety of protocols, but need to change
-		// file permissions of the resulted download to be executable
-
-		// Create a location to download the artifact.
-		destDir := filepath.Join(taskDir, allocdir.TaskLocal)
-
-		artifactName := path.Base(source)
-		artifactFile := filepath.Join(destDir, artifactName)
-		if err := getter.GetFile(artifactFile, source); err != nil {
-			return nil, fmt.Errorf("Error downloading artifact for Raw Exec driver: %s", err)
-		}
-
-		// Add execution permissions to the newly downloaded artifact
-		if runtime.GOOS != "windows" {
-			if err := syscall.Chmod(artifactFile, 0755); err != nil {
-				log.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err)
-			}
+                _, err := getter.GetArtifact(
+                        filepath.Join(taskDir, allocdir.TaskLocal),
+                        task.Config["artifact_source"],
+                        task.Config["checksum"],
+                        d.logger,
+                )
+                if err != nil {
+                        return nil, err
 		}
 	}
 
diff --git a/client/getter/getter.go b/client/getter/getter.go
index a113dd1cb..1a721c3f2 100644
--- a/client/getter/getter.go
+++ b/client/getter/getter.go
@@ -1,44 +1,43 @@
 package getter
 
 import (
-        "fmt"
-        "log"
-        "net/url"
-        "path"
-        "path/filepath"
-        "runtime"
-        "strings"
-        "syscall"
+	"fmt"
+	"log"
+	"net/url"
+	"path"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"syscall"
 
-        gg "github.com/hashicorp/go-getter"
+	gg "github.com/hashicorp/go-getter"
 )
 
 func GetArtifact(destDir, source, checksum string, logger *log.Logger) (string, error) {
-        // We use go-getter to support a variety of protocols, but need to change
-        // file permissions of the resulted download to be executable
+	if source == "" {
+		return "", fmt.Errorf("Source url is empty in Artifact Getter")
+	}
+	u, err := url.Parse(source)
+	if err != nil {
+		return "", err
+	}
 
-        u, err := url.Parse(source)
-        if err != nil {
-                return "", err
-        }
+	// if checksum is seperate, apply to source
+	if checksum != "" {
+		source = strings.Join([]string{source, fmt.Sprintf("checksum=%s", checksum)}, "?")
+		logger.Printf("[DEBUG] client.getter: Applying checksum to Artifact Source URL, new url: %s", source)
+	}
 
-        // look for checksum, apply to URL
-        if checksum != "" {
-                source = strings.Join([]string{source, fmt.Sprintf("checksum=%s", checksum)}, "?")
-                logger.Printf("[DEBUG] Applying checksum to Artifact Source URL, new url: %s", source)
-        }
+	artifactFile := filepath.Join(destDir, path.Base(u.Path))
+	if err := gg.GetFile(artifactFile, source); err != nil {
+		return "", fmt.Errorf("Error downloading artifact: %s", err)
+	}
 
-        artifactName := path.Base(u.Path)
-        artifactFile := filepath.Join(destDir, artifactName)
-        if err := gg.GetFile(artifactFile, source); err != nil {
-                return "", fmt.Errorf("Error downloading artifact: %s", err)
-        }
-
-        // Add execution permissions to the newly downloaded artifact
-        if runtime.GOOS != "windows" {
-                if err := syscall.Chmod(artifactFile, 0755); err != nil {
-                        logger.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err)
-                }
-        }
-        return artifactFile, nil
+	// Add execution permissions to the newly downloaded artifact
+	if runtime.GOOS != "windows" {
+		if err := syscall.Chmod(artifactFile, 0755); err != nil {
+			logger.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err)
+		}
+	}
+	return artifactFile, nil
 }
diff --git a/client/getter/getter_test.go b/client/getter/getter_test.go
index 570b2a8c9..54eff20c6 100644
--- a/client/getter/getter_test.go
+++ b/client/getter/getter_test.go
@@ -1,103 +1,111 @@
 package getter
 
 import (
-        "fmt"
-        "io/ioutil"
-        "log"
-        "os"
-        "strings"
-        "testing"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"strings"
+	"testing"
 )
 
 func TestGetArtifact_basic(t *testing.T) {
 
-        logger := log.New(os.Stderr, "", log.LstdFlags)
+	logger := log.New(os.Stderr, "", log.LstdFlags)
 
-        // TODO: Use http.TestServer to serve these files from fixtures dir
-        passing := []struct {
-                Source, Checksum string
-        }{
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64",
-                        "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5df51e33871d6b295a60e0f9f6dd573da977",
-                },
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
-                        "sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
-                },
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
-                        "md5:a9b14903a8942748e4f8474e11f795d3",
-                },
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64?checksum=sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
-                        "",
-                },
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
-                        "",
-                },
-        }
+	// TODO: Use http.TestServer to serve these files from fixtures dir
+	passing := []struct {
+		Source, Checksum string
+	}{
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64",
+			"sha256:66aa0f05fc0cfcf1e5ed8cc5307b5df51e33871d6b295a60e0f9f6dd573da977",
+		},
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+			"sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+		},
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+			"md5:a9b14903a8942748e4f8474e11f795d3",
+		},
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64?checksum=sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+			"",
+		},
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+			"",
+		},
+	}
 
-        for i, p := range passing {
-                destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i))
-                if err != nil {
-                        t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err)
-                }
+	for i, p := range passing {
+		destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i))
+		if err != nil {
+			t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err)
+		}
 
-                path, err := GetArtifact(destDir, p.Source, p.Checksum, logger)
-                if err != nil {
-                        t.Fatalf("TestGetArtifact_basic unexpected failure here: %s", err)
-                }
+		path, err := GetArtifact(destDir, p.Source, p.Checksum, logger)
+		if err != nil {
+			t.Fatalf("TestGetArtifact_basic unexpected failure here: %s", err)
+		}
 
-                if p.Checksum != "" {
-                        if ok := strings.Contains(path, p.Checksum); ok {
-                                t.Fatalf("path result should not contain the checksum, got: %s", path)
-                        }
-                }
+		if p.Checksum != "" {
+			if ok := strings.Contains(path, p.Checksum); ok {
+				t.Fatalf("path result should not contain the checksum, got: %s", path)
+			}
+		}
 
-                // verify artifact exists
-                if _, err := os.Stat(path); err != nil {
-                        t.Fatalf("source path error: %s", err)
-                }
-        }
+		// verify artifact exists
+		if _, err := os.Stat(path); err != nil {
+			t.Fatalf("source path error: %s", err)
+		}
+	}
 }
 
 func TestGetArtifact_fails(t *testing.T) {
 
-        logger := log.New(os.Stderr, "", log.LstdFlags)
+	logger := log.New(os.Stderr, "", log.LstdFlags)
 
-        failing := []struct {
-                Source, Checksum string
-        }{
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64",
-                        "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d",
-                },
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
-                        "sha257:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
-                },
-                // malformed checksum
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
-                        "6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
-                },
-                // 404
-                {
-                        "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd86",
-                        "",
-                },
-        }
-        for i, p := range failing {
-                destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i))
-                if err != nil {
-                        t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err)
-                }
+	failing := []struct {
+		Source, Checksum string
+	}{
+		{
+			"",
+			"sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d",
+		},
+		{
+			"/u/47675/jar_thing/hi_darwin_amd64",
+			"sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d",
+		},
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64",
+			"sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d",
+		},
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+			"sha257:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+		},
+		// malformed checksum
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64",
+			"6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c",
+		},
+		// 404
+		{
+			"https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd86",
+			"",
+		},
+	}
+	for i, p := range failing {
+		destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i))
+		if err != nil {
+			t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err)
+		}
 
-                _, err = GetArtifact(destDir, p.Source, p.Checksum, logger)
-                if err == nil {
-                        t.Fatalf("TestGetArtifact_basic expected failure, but got none")
-                }
-        }
+		_, err = GetArtifact(destDir, p.Source, p.Checksum, logger)
+		if err == nil {
+			t.Fatalf("TestGetArtifact_basic expected failure, but got none")
+		}
+	}
 }
diff --git a/website/source/docs/drivers/exec.html.md b/website/source/docs/drivers/exec.html.md
index e82aa1505..f897b1ea4 100644
--- a/website/source/docs/drivers/exec.html.md
+++ b/website/source/docs/drivers/exec.html.md
@@ -24,6 +24,10 @@ The `exec` driver supports the following configuration in the job spec:
 * `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible
 from the Nomad client. If you specify an `artifact_source` to be executed, you
 must reference it in the `command` as show in the examples below
+* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image.
+The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`,
+and the value is the computed checksum. If a checksum is supplied and does not
+match the downloaded artifact, the driver will fail to start
 * `args` - The argument list to the command, space seperated. Optional.
 
 ## Client Requirements
@@ -53,6 +57,7 @@ To execute a binary specified by `artifact_source`:
 ```
   config {
     artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin"
+    checksum = "sha256:abd123445ds4555555555"
     command = "$NOMAD_TASK_DIR/binary.bin"
   }
 ```
diff --git a/website/source/docs/drivers/java.html.md b/website/source/docs/drivers/java.html.md
index ecfddb645..f2bbd2b76 100644
--- a/website/source/docs/drivers/java.html.md
+++ b/website/source/docs/drivers/java.html.md
@@ -18,8 +18,12 @@ HTTP from the Nomad client.
 
 The `java` driver supports the following configuration in the job spec:
 
-* `jar_source` - **(Required)** The hosted location of the source Jar file. Must be accessible
+* `artifact_source` - **(Required)** The hosted location of the source Jar file. Must be accessible
 from the Nomad client
+* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image.
+The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`,
+and the value is the computed checksum. If a checksum is supplied and does not
+match the downloaded artifact, the driver will fail to start
 
 * `args` - **(Optional)** The argument list for the `java` command, space separated.
 
@@ -29,10 +33,27 @@ from the Nomad client
 ## Client Requirements
 
 The `java` driver requires Java to be installed and in your systems `$PATH`.
-The `jar_source` must be accessible by the node running Nomad. This can be an 
+The `artifact_source` must be accessible by the node running Nomad. This can be an
 internal source, private to your cluster, but it must be reachable by the client 
 over HTTP. 
 
+## Examples
+
+A simple config block to run a Java Jar:
+
+```json
+# Define a task to run
+task "web" {
+  # Run a Java Jar
+  driver = "java"
+
+  config {
+    artifact_source = "https://dl.dropboxusercontent.com/u/1234/hello.jar"
+    checksum = "md5:123445555555555"
+    jvm_options = "-Xmx2048m -Xms256m"
+  }
+```
+
 ## Client Attributes
 
 The `java` driver will set the following client attributes:
diff --git a/website/source/docs/drivers/qemu.html.md b/website/source/docs/drivers/qemu.html.md
index 3e19076a0..403926b4c 100644
--- a/website/source/docs/drivers/qemu.html.md
+++ b/website/source/docs/drivers/qemu.html.md
@@ -23,10 +23,12 @@ The `Qemu` driver can execute any regular `qemu` image (e.g. `qcow`, `img`,
 
 The `Qemu` driver supports the following configuration in the job spec:
 
-* `image_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible
+* `artifact_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible
 from the Nomad client, via HTTP.
-* `checksum` - **(Required)** The SHA256 checksum of the `qemu` image. If the
-checksums do not match, the `Qemu` driver will fail to start the image
+* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image.
+The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`,
+and the value is the computed checksum. If a checksum is supplied and does not
+match the downloaded artifact, the driver will fail to start
 * `accelerator` - (Optional) The type of accelerator to use in the invocation.
  If the host machine has `Qemu` installed with KVM support, users can specify `kvm` for the `accelerator`. Default is `tcg`
 * `host_port` - **(Required)** Port on the host machine to forward to the guest
@@ -38,7 +40,7 @@ in the `Task` specification
 ## Client Requirements
 
 The `Qemu` driver requires Qemu to be installed and in your system's `$PATH`.
-The `image_source` must be accessible by the node running Nomad. This can be an
+The `artifact_source` must be accessible by the node running Nomad. This can be an
 internal source, private to your cluster, but it must be reachable by the client
 over HTTP.
 
diff --git a/website/source/docs/drivers/raw_exec.html.md b/website/source/docs/drivers/raw_exec.html.md
index fa67129ba..2dc741887 100644
--- a/website/source/docs/drivers/raw_exec.html.md
+++ b/website/source/docs/drivers/raw_exec.html.md
@@ -22,6 +22,10 @@ The `raw_exec` driver supports the following configuration in the job spec:
 * `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible
 from the Nomad client. If you specify an `artifact_source` to be executed, you
 must reference it in the `command` as show in the examples below
+* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image.
+The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`,
+and the value is the computed checksum. If a checksum is supplied and does not
+match the downloaded artifact, the driver will fail to start
 * `args` - The argument list to the command, space seperated. Optional.
 
 ## Client Requirements
@@ -57,6 +61,7 @@ To execute a binary specified by `artifact_source`:
 ```
   config {
     artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin"
+    checksum = "sha256:133jifjiofu9090fsadjofsdjlk"
     command = "$NOMAD_TASK_DIR/binary.bin"
   }
 ```

From e3376d102c86fb4f2c848294d504e578b4b4d2da Mon Sep 17 00:00:00 2001
From: Clint Shryock <clint@ctshryock.com>
Date: Wed, 4 Nov 2015 10:16:22 -0600
Subject: [PATCH 64/92] remove debug line

---
 client/driver/qemu.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/client/driver/qemu.go b/client/driver/qemu.go
index b06c5dd47..52b2e78e0 100644
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@@ -104,9 +104,8 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
                 d.logger,
         )
         if err != nil {
-                d.logger.Printf("\n---\nDEBUG\n---\n error here: %s\n---\n", err)
                 return nil, err
-	}
+        }
 
         vmID := filepath.Base(vmPath)
 

From 0fe2ea9b04377d9922e3a7e6d8cbb03d3c18d175 Mon Sep 17 00:00:00 2001
From: Clint Shryock <clint@ctshryock.com>
Date: Thu, 5 Nov 2015 09:23:45 -0600
Subject: [PATCH 65/92] add checksums to tests, formatting after rebase

---
 client/driver/exec_test.go     |  3 ++-
 client/driver/java_test.go     |  7 ++++---
 client/driver/qemu.go          | 26 +++++++++++++-------------
 client/driver/qemu_test.go     | 18 +++++++++---------
 client/driver/raw_exec.go      | 18 +++++++++---------
 client/driver/raw_exec_test.go |  5 ++++-
 6 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go
index 1bb4adf36..bc8323889 100644
--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -123,11 +123,12 @@ func TestExecDriver_Start_Wait(t *testing.T) {
 func TestExecDriver_Start_Artifact_basic(t *testing.T) {
 	ctestutils.ExecCompatible(t)
 	file := "hi_linux_amd64"
+	checksum := "sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c"
 
 	task := &structs.Task{
 		Name: "sleep",
 		Config: map[string]string{
-			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
+			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s?checksum=%s", file, checksum),
 			"command":         filepath.Join("$NOMAD_TASK_DIR", file),
 		},
 		Resources: basicResources,
diff --git a/client/driver/java_test.go b/client/driver/java_test.go
index e525d4dce..206cc1c78 100644
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -97,8 +97,9 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
-                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
-                        "jvm_options":     "-Xmx2048m -Xms256m",
+			"artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
+			"jvm_options":     "-Xmx2048m -Xms256m",
+			"checksum":        "sha256:58d6e8130308d32e197c5108edd4f56ddf1417408f743097c2e662df0f0b17c8",
 		},
 		Resources: basicResources,
 	}
@@ -143,7 +144,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
 	task := &structs.Task{
 		Name: "demo-app",
 		Config: map[string]string{
-                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
+			"artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
 		},
 		Resources: basicResources,
 	}
diff --git a/client/driver/qemu.go b/client/driver/qemu.go
index 52b2e78e0..0eab4e659 100644
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@@ -16,7 +16,7 @@ import (
 
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-        "github.com/hashicorp/nomad/client/getter"
+	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -79,7 +79,7 @@ func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
 // image and save it to the Drivers Allocation Dir
 func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
 	// Get the image source
-        source, ok := task.Config["artifact_source"]
+	source, ok := task.Config["artifact_source"]
 	if !ok || source == "" {
 		return nil, fmt.Errorf("Missing source image Qemu driver")
 	}
@@ -96,18 +96,18 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
 	}
 
-        // Proceed to download an artifact to be executed.
-        vmPath, err := getter.GetArtifact(
-                filepath.Join(taskDir, allocdir.TaskLocal),
-                task.Config["artifact_source"],
-                task.Config["checksum"],
-                d.logger,
-        )
-        if err != nil {
-                return nil, err
-        }
+	// Proceed to download an artifact to be executed.
+	vmPath, err := getter.GetArtifact(
+		filepath.Join(taskDir, allocdir.TaskLocal),
+		task.Config["artifact_source"],
+		task.Config["checksum"],
+		d.logger,
+	)
+	if err != nil {
+		return nil, err
+	}
 
-        vmID := filepath.Base(vmPath)
+	vmID := filepath.Base(vmPath)
 
 	// Parse configuration arguments
 	// Create the base arguments
diff --git a/client/driver/qemu_test.go b/client/driver/qemu_test.go
index 7b6ca1bb7..dffdc7bf0 100644
--- a/client/driver/qemu_test.go
+++ b/client/driver/qemu_test.go
@@ -54,10 +54,10 @@ func TestQemuDriver_Start(t *testing.T) {
 	task := &structs.Task{
 		Name: "linux",
 		Config: map[string]string{
-                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
-                        "checksum":        "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
-                        "accelerator":     "tcg",
-                        "guest_ports":     "22,8080",
+			"artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
+			"checksum":        "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
+			"accelerator":     "tcg",
+			"guest_ports":     "22,8080",
 		},
 		Resources: &structs.Resources{
 			MemoryMB: 512,
@@ -103,11 +103,11 @@ func TestQemuDriver_RequiresMemory(t *testing.T) {
 	task := &structs.Task{
 		Name: "linux",
 		Config: map[string]string{
-                        "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
-                        "accelerator":     "tcg",
-                        "host_port":       "8080",
-                        "guest_port":      "8081",
-                        "checksum":        "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
+			"artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img",
+			"accelerator":     "tcg",
+			"host_port":       "8080",
+			"guest_port":      "8081",
+			"checksum":        "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544",
 			// ssh u/p would be here
 		},
 	}
diff --git a/client/driver/raw_exec.go b/client/driver/raw_exec.go
index 9a2716297..856f2b7fc 100644
--- a/client/driver/raw_exec.go
+++ b/client/driver/raw_exec.go
@@ -13,7 +13,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/args"
-        "github.com/hashicorp/nomad/client/getter"
+	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -80,14 +80,14 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
 	source, ok := task.Config["artifact_source"]
 	if ok && source != "" {
 		// Proceed to download an artifact to be executed.
-                _, err := getter.GetArtifact(
-                        filepath.Join(taskDir, allocdir.TaskLocal),
-                        task.Config["artifact_source"],
-                        task.Config["checksum"],
-                        d.logger,
-                )
-                if err != nil {
-                        return nil, err
+		_, err := getter.GetArtifact(
+			filepath.Join(taskDir, allocdir.TaskLocal),
+			task.Config["artifact_source"],
+			task.Config["checksum"],
+			d.logger,
+		)
+		if err != nil {
+			return nil, err
 		}
 	}
 
diff --git a/client/driver/raw_exec_test.go b/client/driver/raw_exec_test.go
index 0a6133df9..1dda991c8 100644
--- a/client/driver/raw_exec_test.go
+++ b/client/driver/raw_exec_test.go
@@ -94,12 +94,14 @@ func TestRawExecDriver_StartOpen_Wait(t *testing.T) {
 }
 
 func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
-	var file string
+	var file, checksum string
 	switch runtime.GOOS {
 	case "darwin":
 		file = "hi_darwin_amd64"
+		checksum = "md5:d7f2fdb13b36dcb7407721d78926b335"
 	default:
 		file = "hi_linux_amd64"
+		checksum = "md5:a9b14903a8942748e4f8474e11f795d3"
 	}
 
 	task := &structs.Task{
@@ -107,6 +109,7 @@ func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
 		Config: map[string]string{
 			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
 			"command":         filepath.Join("$NOMAD_TASK_DIR", file),
+			"checksum":        checksum,
 		},
 	}
 	driverCtx := testDriverContext(task.Name)

From 1d21991e280ffd97e166af2a5922b7e57286abac Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 09:58:57 -0800
Subject: [PATCH 66/92] exec_basic uses Spawner; create shared test harness for
 executors

---
 client/driver/executor/exec.go            |  12 +-
 client/driver/executor/exec_basic.go      |  84 ++++++--
 client/driver/executor/exec_basic_test.go |   7 +
 client/driver/executor/exec_linux.go      |   4 +
 client/driver/executor/exec_linux_test.go | 210 +-------------------
 client/driver/executor/test_harness.go    | 231 ++++++++++++++++++++++
 client/driver/spawn/spawn.go              |   2 +
 7 files changed, 317 insertions(+), 233 deletions(-)
 create mode 100644 client/driver/executor/exec_basic_test.go
 create mode 100644 client/driver/executor/test_harness.go

diff --git a/client/driver/executor/exec.go b/client/driver/executor/exec.go
index b3878ba0c..8cf076bab 100644
--- a/client/driver/executor/exec.go
+++ b/client/driver/executor/exec.go
@@ -74,11 +74,16 @@ type Executor interface {
 }
 
 // Command is a mirror of exec.Command that returns a platform-specific Executor
-func Command(name string, arg ...string) Executor {
+func Command(name string, args ...string) Executor {
 	executor := NewExecutor()
-	cmd := executor.Command()
+	SetCommand(executor, name, args)
+	return executor
+}
+
+func SetCommand(e Executor, name string, args []string) {
+	cmd := e.Command()
 	cmd.Path = name
-	cmd.Args = append([]string{name}, arg...)
+	cmd.Args = append([]string{name}, args...)
 
 	if filepath.Base(name) == name {
 		if lp, err := exec.LookPath(name); err != nil {
@@ -87,7 +92,6 @@ func Command(name string, arg ...string) Executor {
 			cmd.Path = lp
 		}
 	}
-	return executor
 }
 
 // OpenId is similar to executor.Command but will attempt to reopen with the
diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go
index b9bdebd7d..4b865fbd9 100644
--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -1,22 +1,29 @@
 package executor
 
 import (
+	"bytes"
+	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
-	"strconv"
+	"path/filepath"
 	"strings"
 
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/client/driver/environment"
+	"github.com/hashicorp/nomad/client/driver/spawn"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
 // BasicExecutor should work everywhere, and as a result does not include
 // any resource restrictions or runas capabilities.
 type BasicExecutor struct {
-	cmd exec.Cmd
+	cmd      exec.Cmd
+	spawn    *spawn.Spawner
+	taskName string
+	taskDir  string
+	allocDir string
 }
 
 // TODO: Have raw_exec use this as well.
@@ -34,9 +41,13 @@ func (e *BasicExecutor) Limit(resources *structs.Resources) error {
 func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
 	taskDir, ok := alloc.TaskDirs[taskName]
 	if !ok {
-		return fmt.Errorf("Error finding task dir for (%s)", taskName)
+		fmt.Errorf("Couldn't find task directory for task %v", taskName)
 	}
 	e.cmd.Dir = taskDir
+
+	e.taskDir = taskDir
+	e.taskName = taskName
+	e.allocDir = alloc.AllocDir
 	return nil
 }
 
@@ -64,42 +75,73 @@ func (e *BasicExecutor) Start() error {
 	e.cmd.Args = parsed
 
 	// We don't want to call ourself. We want to call Start on our embedded Cmd
-	return e.cmd.Start()
+	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
+	e.spawn = spawn.NewSpawner(spawnState)
+	e.spawn.SetCommand(&e.cmd)
+	e.spawn.SetLogs(&spawn.Logs{
+		Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
+		Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
+		Stdin:  os.DevNull,
+	})
+
+	return e.spawn.Spawn(nil)
 }
 
-func (e *BasicExecutor) Open(pid string) error {
-	pidNum, err := strconv.Atoi(pid)
-	if err != nil {
-		return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
+func (e *BasicExecutor) Open(id string) error {
+	var spawn spawn.Spawner
+	dec := json.NewDecoder(strings.NewReader(id))
+	if err := dec.Decode(&spawn); err != nil {
+		return fmt.Errorf("Failed to parse id: %v", err)
 	}
 
-	process, err := os.FindProcess(pidNum)
-	if err != nil {
-		return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err)
-	}
-	e.cmd.Process = process
+	// Setup the executor.
+	e.spawn = &spawn
 	return nil
 }
 
 func (e *BasicExecutor) Wait() error {
-	// We don't want to call ourself. We want to call Start on our embedded Cmd
-	return e.cmd.Wait()
+	code, err := e.spawn.Wait()
+	if err != nil {
+		return err
+	}
+
+	if code != 0 {
+		return fmt.Errorf("Task exited with code: %d", code)
+	}
+
+	return nil
 }
 
 func (e *BasicExecutor) ID() (string, error) {
-	if e.cmd.Process != nil {
-		return strconv.Itoa(e.cmd.Process.Pid), nil
-	} else {
-		return "", fmt.Errorf("Process has finished or was never started")
+	if e.spawn == nil {
+		return "", fmt.Errorf("Process was never started")
 	}
+
+	var buffer bytes.Buffer
+	enc := json.NewEncoder(&buffer)
+	if err := enc.Encode(e.spawn); err != nil {
+		return "", fmt.Errorf("Failed to serialize id: %v", err)
+	}
+
+	return buffer.String(), nil
 }
 
 func (e *BasicExecutor) Shutdown() error {
-	return e.ForceStop()
+	proc, err := os.FindProcess(e.spawn.UserPid)
+	if err != nil {
+		return fmt.Errorf("Failed to find user processes %v: %v", e.spawn.UserPid, err)
+	}
+
+	return proc.Signal(os.Interrupt)
 }
 
 func (e *BasicExecutor) ForceStop() error {
-	return e.cmd.Process.Kill()
+	proc, err := os.FindProcess(e.spawn.UserPid)
+	if err != nil {
+		return fmt.Errorf("Failed to find user processes %v: %v", e.spawn.UserPid, err)
+	}
+
+	return proc.Kill()
 }
 
 func (e *BasicExecutor) Command() *exec.Cmd {
diff --git a/client/driver/executor/exec_basic_test.go b/client/driver/executor/exec_basic_test.go
new file mode 100644
index 000000000..d9eed49f9
--- /dev/null
+++ b/client/driver/executor/exec_basic_test.go
@@ -0,0 +1,7 @@
+package executor
+
+import "testing"
+
+func TestExecutorBasic(t *testing.T) {
+	testExecutor(t, NewBasicExecutor, nil)
+}
diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go
index a75440ba8..cc428ecd7 100644
--- a/client/driver/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -41,6 +41,10 @@ var (
 )
 
 func NewExecutor() Executor {
+	return NewLinuxExecutor()
+}
+
+func NewLinuxExecutor() Executor {
 	return &LinuxExecutor{}
 }
 
diff --git a/client/driver/executor/exec_linux_test.go b/client/driver/executor/exec_linux_test.go
index 1b8307b02..c0bd2087a 100644
--- a/client/driver/executor/exec_linux_test.go
+++ b/client/driver/executor/exec_linux_test.go
@@ -1,217 +1,11 @@
 package executor
 
 import (
-	"fmt"
-	"io/ioutil"
-	"os"
-	"path/filepath"
 	"testing"
-	"time"
-
-	"github.com/hashicorp/nomad/client/allocdir"
-	"github.com/hashicorp/nomad/nomad/mock"
-	"github.com/hashicorp/nomad/nomad/structs"
 
 	ctestutil "github.com/hashicorp/nomad/client/testutil"
 )
 
-var (
-	constraint = &structs.Resources{
-		CPU:      250,
-		MemoryMB: 256,
-		Networks: []*structs.NetworkResource{
-			&structs.NetworkResource{
-				MBits:        50,
-				DynamicPorts: []string{"http"},
-			},
-		},
-	}
-)
-
-func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
-	alloc := mock.Alloc()
-	task := alloc.Job.TaskGroups[0].Tasks[0]
-
-	allocDir := allocdir.NewAllocDir(filepath.Join(os.TempDir(), alloc.ID))
-	if err := allocDir.Build([]*structs.Task{task}); err != nil {
-		t.Fatalf("allocDir.Build() failed: %v", err)
-	}
-
-	return task.Name, allocDir
-}
-
-func TestExecutorLinux_Start_Invalid(t *testing.T) {
-	ctestutil.ExecCompatible(t)
-	invalid := "/bin/foobar"
-	e := Command(invalid, "1")
-
-	if err := e.Limit(constraint); err != nil {
-		t.Fatalf("Limit() failed: %v", err)
-	}
-
-	task, alloc := mockAllocDir(t)
-	defer alloc.Destroy()
-	if err := e.ConfigureTaskDir(task, alloc); err != nil {
-		t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
-	}
-
-	if err := e.Start(); err == nil {
-		t.Fatalf("Start(%v) should have failed", invalid)
-	}
-}
-
-func TestExecutorLinux_Start_Wait_Failure_Code(t *testing.T) {
-	ctestutil.ExecCompatible(t)
-	e := Command("/bin/date", "-invalid")
-
-	if err := e.Limit(constraint); err != nil {
-		t.Fatalf("Limit() failed: %v", err)
-	}
-
-	task, alloc := mockAllocDir(t)
-	defer alloc.Destroy()
-	if err := e.ConfigureTaskDir(task, alloc); err != nil {
-		t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
-	}
-
-	if err := e.Start(); err != nil {
-		t.Fatalf("Start() failed: %v", err)
-	}
-
-	if err := e.Wait(); err == nil {
-		t.Fatalf("Wait() should have failed")
-	}
-}
-
-func TestExecutorLinux_Start_Wait(t *testing.T) {
-	ctestutil.ExecCompatible(t)
-	task, alloc := mockAllocDir(t)
-	defer alloc.Destroy()
-
-	taskDir, ok := alloc.TaskDirs[task]
-	if !ok {
-		t.Fatalf("No task directory found for task %v", task)
-	}
-
-	expected := "hello world"
-	file := filepath.Join(allocdir.TaskLocal, "output.txt")
-	absFilePath := filepath.Join(taskDir, file)
-	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
-	e := Command("/bin/bash", "-c", cmd)
-
-	if err := e.Limit(constraint); err != nil {
-		t.Fatalf("Limit() failed: %v", err)
-	}
-
-	if err := e.ConfigureTaskDir(task, alloc); err != nil {
-		t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
-	}
-
-	if err := e.Start(); err != nil {
-		t.Fatalf("Start() failed: %v", err)
-	}
-
-	if err := e.Wait(); err != nil {
-		t.Fatalf("Wait() failed: %v", err)
-	}
-
-	output, err := ioutil.ReadFile(absFilePath)
-	if err != nil {
-		t.Fatalf("Couldn't read file %v", absFilePath)
-	}
-
-	act := string(output)
-	if act != expected {
-		t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
-	}
-}
-
-func TestExecutorLinux_Start_Kill(t *testing.T) {
-	ctestutil.ExecCompatible(t)
-	task, alloc := mockAllocDir(t)
-	defer alloc.Destroy()
-
-	taskDir, ok := alloc.TaskDirs[task]
-	if !ok {
-		t.Fatalf("No task directory found for task %v", task)
-	}
-
-	filePath := filepath.Join(taskDir, "output")
-	e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
-
-	if err := e.Limit(constraint); err != nil {
-		t.Fatalf("Limit() failed: %v", err)
-	}
-
-	if err := e.ConfigureTaskDir(task, alloc); err != nil {
-		t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
-	}
-
-	if err := e.Start(); err != nil {
-		t.Fatalf("Start() failed: %v", err)
-	}
-
-	if err := e.Shutdown(); err != nil {
-		t.Fatalf("Shutdown() failed: %v", err)
-	}
-
-	time.Sleep(1500 * time.Millisecond)
-
-	// Check that the file doesn't exist.
-	if _, err := os.Stat(filePath); err == nil {
-		t.Fatalf("Stat(%v) should have failed: task not killed", filePath)
-	}
-}
-
-func TestExecutorLinux_Open(t *testing.T) {
-	ctestutil.ExecCompatible(t)
-	task, alloc := mockAllocDir(t)
-	defer alloc.Destroy()
-
-	taskDir, ok := alloc.TaskDirs[task]
-	if !ok {
-		t.Fatalf("No task directory found for task %v", task)
-	}
-
-	expected := "hello world"
-	file := filepath.Join(allocdir.TaskLocal, "output.txt")
-	absFilePath := filepath.Join(taskDir, file)
-	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
-	e := Command("/bin/bash", "-c", cmd)
-
-	if err := e.Limit(constraint); err != nil {
-		t.Fatalf("Limit() failed: %v", err)
-	}
-
-	if err := e.ConfigureTaskDir(task, alloc); err != nil {
-		t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
-	}
-
-	if err := e.Start(); err != nil {
-		t.Fatalf("Start() failed: %v", err)
-	}
-
-	id, err := e.ID()
-	if err != nil {
-		t.Fatalf("ID() failed: %v", err)
-	}
-
-	e2 := NewExecutor()
-	if err := e2.Open(id); err != nil {
-		t.Fatalf("Open(%v) failed: %v", id, err)
-	}
-
-	if err := e2.Wait(); err != nil {
-		t.Fatalf("Wait() failed: %v", err)
-	}
-
-	output, err := ioutil.ReadFile(absFilePath)
-	if err != nil {
-		t.Fatalf("Couldn't read file %v", absFilePath)
-	}
-
-	act := string(output)
-	if act != expected {
-		t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
-	}
+func TestExecutorLinux(t *testing.T) {
+	testExecutor(t, NewLinuxExecutor, ctestutil.ExecCompatible)
 }
diff --git a/client/driver/executor/test_harness.go b/client/driver/executor/test_harness.go
new file mode 100644
index 000000000..afdf8610f
--- /dev/null
+++ b/client/driver/executor/test_harness.go
@@ -0,0 +1,231 @@
+package executor
+
+import (
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+var (
+	constraint = &structs.Resources{
+		CPU:      250,
+		MemoryMB: 256,
+		Networks: []*structs.NetworkResource{
+			&structs.NetworkResource{
+				MBits:        50,
+				DynamicPorts: []string{"http"},
+			},
+		},
+	}
+)
+
+func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
+	alloc := mock.Alloc()
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+
+	allocDir := allocdir.NewAllocDir(filepath.Join(os.TempDir(), alloc.ID))
+	if err := allocDir.Build([]*structs.Task{task}); err != nil {
+		log.Panicf("allocDir.Build() failed: %v", err)
+	}
+
+	return task.Name, allocDir
+}
+
+func testExecutor(t *testing.T, newExecutor func() Executor, compatible func(*testing.T)) {
+	if compatible != nil {
+		compatible(t)
+	}
+
+	command := func(name string, args ...string) Executor {
+		b := NewExecutor()
+		SetCommand(b, name, args)
+		return b
+	}
+
+	Executor_Start_Invalid(t, command)
+	Executor_Start_Wait_Failure_Code(t, command)
+	Executor_Start_Wait(t, command)
+	Executor_Start_Kill(t, command)
+	Executor_Open(t, command)
+}
+
+type buildExecCommand func(name string, args ...string) Executor
+
+func Executor_Start_Invalid(t *testing.T, command buildExecCommand) {
+	invalid := "/bin/foobar"
+	e := command(invalid, "1")
+
+	if err := e.Limit(constraint); err != nil {
+		log.Panicf("Limit() failed: %v", err)
+	}
+
+	task, alloc := mockAllocDir(t)
+	defer alloc.Destroy()
+	if err := e.ConfigureTaskDir(task, alloc); err != nil {
+		log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
+	}
+
+	if err := e.Start(); err == nil {
+		log.Panicf("Start(%v) should have failed", invalid)
+	}
+}
+
+func Executor_Start_Wait_Failure_Code(t *testing.T, command buildExecCommand) {
+	e := command("/bin/date", "-invalid")
+
+	if err := e.Limit(constraint); err != nil {
+		log.Panicf("Limit() failed: %v", err)
+	}
+
+	task, alloc := mockAllocDir(t)
+	defer alloc.Destroy()
+	if err := e.ConfigureTaskDir(task, alloc); err != nil {
+		log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
+	}
+
+	if err := e.Start(); err != nil {
+		log.Panicf("Start() failed: %v", err)
+	}
+
+	if err := e.Wait(); err == nil {
+		log.Panicf("Wait() should have failed")
+	}
+}
+
+func Executor_Start_Wait(t *testing.T, command buildExecCommand) {
+	task, alloc := mockAllocDir(t)
+	defer alloc.Destroy()
+
+	taskDir, ok := alloc.TaskDirs[task]
+	if !ok {
+		log.Panicf("No task directory found for task %v", task)
+	}
+
+	expected := "hello world"
+	file := filepath.Join(allocdir.TaskLocal, "output.txt")
+	absFilePath := filepath.Join(taskDir, file)
+	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
+	e := command("/bin/bash", "-c", cmd)
+
+	if err := e.Limit(constraint); err != nil {
+		log.Panicf("Limit() failed: %v", err)
+	}
+
+	if err := e.ConfigureTaskDir(task, alloc); err != nil {
+		log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
+	}
+
+	if err := e.Start(); err != nil {
+		log.Panicf("Start() failed: %v", err)
+	}
+
+	if err := e.Wait(); err != nil {
+		log.Panicf("Wait() failed: %v", err)
+	}
+
+	output, err := ioutil.ReadFile(absFilePath)
+	if err != nil {
+		log.Panicf("Couldn't read file %v", absFilePath)
+	}
+
+	act := string(output)
+	if act != expected {
+		log.Panicf("Command output incorrectly: want %v; got %v", expected, act)
+	}
+}
+
+func Executor_Start_Kill(t *testing.T, command buildExecCommand) {
+	task, alloc := mockAllocDir(t)
+	defer alloc.Destroy()
+
+	taskDir, ok := alloc.TaskDirs[task]
+	if !ok {
+		log.Panicf("No task directory found for task %v", task)
+	}
+
+	filePath := filepath.Join(taskDir, "output")
+	e := command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
+
+	if err := e.Limit(constraint); err != nil {
+		log.Panicf("Limit() failed: %v", err)
+	}
+
+	if err := e.ConfigureTaskDir(task, alloc); err != nil {
+		log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
+	}
+
+	if err := e.Start(); err != nil {
+		log.Panicf("Start() failed: %v", err)
+	}
+
+	if err := e.Shutdown(); err != nil {
+		log.Panicf("Shutdown() failed: %v", err)
+	}
+
+	time.Sleep(1500 * time.Millisecond)
+
+	// Check that the file doesn't exist.
+	if _, err := os.Stat(filePath); err == nil {
+		log.Panicf("Stat(%v) should have failed: task not killed", filePath)
+	}
+}
+
+func Executor_Open(t *testing.T, command buildExecCommand) {
+	task, alloc := mockAllocDir(t)
+	defer alloc.Destroy()
+
+	taskDir, ok := alloc.TaskDirs[task]
+	if !ok {
+		log.Panicf("No task directory found for task %v", task)
+	}
+
+	expected := "hello world"
+	file := filepath.Join(allocdir.TaskLocal, "output.txt")
+	absFilePath := filepath.Join(taskDir, file)
+	cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
+	e := command("/bin/bash", "-c", cmd)
+
+	if err := e.Limit(constraint); err != nil {
+		log.Panicf("Limit() failed: %v", err)
+	}
+
+	if err := e.ConfigureTaskDir(task, alloc); err != nil {
+		log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err)
+	}
+
+	if err := e.Start(); err != nil {
+		log.Panicf("Start() failed: %v", err)
+	}
+
+	id, err := e.ID()
+	if err != nil {
+		log.Panicf("ID() failed: %v", err)
+	}
+
+	e2 := NewExecutor()
+	if err := e2.Open(id); err != nil {
+		log.Panicf("Open(%v) failed: %v", id, err)
+	}
+
+	if err := e2.Wait(); err != nil {
+		log.Panicf("Wait() failed: %v", err)
+	}
+
+	output, err := ioutil.ReadFile(absFilePath)
+	if err != nil {
+		log.Panicf("Couldn't read file %v", absFilePath)
+	}
+
+	act := string(output)
+	if act != expected {
+		log.Panicf("Command output incorrectly: want %v; got %v", expected, act)
+	}
+}
diff --git a/client/driver/spawn/spawn.go b/client/driver/spawn/spawn.go
index ef160611e..b962a1ab4 100644
--- a/client/driver/spawn/spawn.go
+++ b/client/driver/spawn/spawn.go
@@ -22,6 +22,7 @@ type Spawner struct {
 	SpawnPid  int
 	SpawnPpid int
 	StateFile string
+	UserPid   int
 
 	// User configuration
 	UserCmd *exec.Cmd
@@ -137,6 +138,7 @@ func (s *Spawner) Spawn(cb func(pid int) error) error {
 		if resp.ErrorMsg != "" {
 			return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
 		}
+		s.UserPid = resp.UserPID
 	case <-time.After(5 * time.Second):
 		return fmt.Errorf("timed out waiting for response")
 	}

From 0521562d9cb95bbc3eb2e1d354a15c5807b09e50 Mon Sep 17 00:00:00 2001
From: Shiem Edelbrock <shiem.edelbrock@me.com>
Date: Thu, 5 Nov 2015 10:47:41 -0800
Subject: [PATCH 67/92] Added task configuraton options for docker driver

- Added `priviliged` option to task config to allow containers to run in
 priviliged mode.
- Added `dns-servers` option to task config to allow containers to use
  custom DNS servers
- Added `search-domains` option to task config to allow containers to
  use custom dns search domains
- Added authentication options (under key namespace `auth.*`) to allow
  authentication on a task level for docker remote.
- Updated site docs to reflect changes
---
 client/driver/docker.go                    | 37 ++++++++++++++++++++--
 website/source/docs/drivers/docker.html.md | 18 +++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/client/driver/docker.go b/client/driver/docker.go
index bbd52a9d8..80b31d840 100644
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"log"
+	"net"
 	"path/filepath"
 	"strconv"
 	"strings"
@@ -166,6 +167,32 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (do
 	d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
 	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])
 
+	//  set privileged (fallback to false)
+	hostConfig.Privileged, _ = strconv.ParseBool(task.Config["privileged"])
+
+	// set DNS servers
+	dns, ok := task.Config["dns-servers"]
+
+	if ok && dns != "" {
+		for _, v := range strings.Split(dns, ",") {
+			ip := strings.TrimSpace(v)
+			if net.ParseIP(ip) != nil {
+				hostConfig.DNS = append(hostConfig.DNS, ip)
+			} else {
+				d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip)
+			}
+		}
+	}
+
+	// set DNS search domains
+	dnsSearch, ok := task.Config["search-domains"]
+
+	if ok && dnsSearch != "" {
+		for _, v := range strings.Split(dnsSearch, ",") {
+			hostConfig.DNSSearch = append(hostConfig.DNSSearch, strings.TrimSpace(v))
+		}
+	}
+
 	mode, ok := task.Config["network_mode"]
 	if !ok || mode == "" {
 		// docker default
@@ -303,8 +330,14 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
 			Repository: repo,
 			Tag:        tag,
 		}
-		// TODO add auth configuration for private repos
-		authOptions := docker.AuthConfiguration{}
+
+		authOptions := docker.AuthConfiguration{
+			Username:      task.Config["auth.username"],
+			Password:      task.Config["auth.password"],
+			Email:         task.Config["auth.email"],
+			ServerAddress: task.Config["auth.server-address"],
+		}
+
 		err = client.PullImage(pullOptions, authOptions)
 		if err != nil {
 			d.logger.Printf("[ERR] driver.docker: pulling container %s", err)
diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md
index 444e7f6ca..d04740028 100644
--- a/website/source/docs/drivers/docker.html.md
+++ b/website/source/docs/drivers/docker.html.md
@@ -32,6 +32,24 @@ The `docker` driver supports the following configuration in the job specificatio
    network mode is not supported right now and is reported as an invalid
    option.
 
+* `privileged` - (optional) Privileged mode gives the container full access to 
+   the host. Valid options are `"true"` and `"false"` (defaults to `"false"`).
+
+* `dns-servers` - (optional) A comma separated list of DNS servers for the container 
+   to use (e.g. "8.8.8.8,8.8.4.4"). *Docker API v1.10 and above only*
+
+* `search-domains` - (optional) A comma separated list of DNS search domains for the 
+  container to use.
+  
+**Authentication**  
+Registry authentication can be set per task with the following authentication 
+parameters.  These options can provide access to private repositories that 
+utilize the docker remote api (e.g. dockerhub, quay.io)  
+    - `auth.username` - (optional) The account username  
+    - `auth.password` - (optional) The account password  
+    - `auth.email` - (optional) The account email  
+    - `auth.server-address` - (optional) The server domain/ip without the protocol  
+
 ### Port Mapping
 
 Nomad uses port binding to expose services running in containers using the port

From a6c06c7279305fa6a24ba473437440d597670ffe Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Tue, 3 Nov 2015 09:24:26 -0800
Subject: [PATCH 68/92] Setting the restart policy to AllocRunner and Task
 Runners

---
 client/alloc_runner.go     | 17 +++++++---
 client/task_runner.go      | 69 ++++++++++++++++++++++++++++----------
 client/task_runner_test.go | 22 +++++++++++-
 3 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index f41be4558..444a44d1d 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -41,9 +41,10 @@ type AllocRunner struct {
 
 	dirtyCh chan struct{}
 
-	ctx      *driver.ExecContext
-	tasks    map[string]*TaskRunner
-	taskLock sync.RWMutex
+	ctx           *driver.ExecContext
+	tasks         map[string]*TaskRunner
+	RestartPolicy *structs.RestartPolicy
+	taskLock      sync.RWMutex
 
 	taskStatus     map[string]taskStatus
 	taskStatusLock sync.RWMutex
@@ -59,6 +60,7 @@ type AllocRunner struct {
 // allocRunnerState is used to snapshot the state of the alloc runner
 type allocRunnerState struct {
 	Alloc      *structs.Allocation
+	RestartPolicy *structs.RestartPolicy
 	TaskStatus map[string]taskStatus
 	Context    *driver.ExecContext
 }
@@ -95,6 +97,7 @@ func (r *AllocRunner) RestoreState() error {
 
 	// Restore fields
 	r.alloc = snap.Alloc
+	r.RestartPolicy = snap.RestartPolicy
 	r.taskStatus = snap.TaskStatus
 	r.ctx = snap.Context
 
@@ -102,7 +105,7 @@ func (r *AllocRunner) RestoreState() error {
 	var mErr multierror.Error
 	for name := range r.taskStatus {
 		task := &structs.Task{Name: name}
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task)
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, r.RestartPolicy)
 		r.tasks[name] = tr
 		if err := tr.RestoreState(); err != nil {
 			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err)
@@ -119,6 +122,7 @@ func (r *AllocRunner) SaveState() error {
 	r.taskStatusLock.RLock()
 	snap := allocRunnerState{
 		Alloc:      r.alloc,
+		RestartPolicy: r.RestartPolicy,
 		TaskStatus: r.taskStatus,
 		Context:    r.ctx,
 	}
@@ -279,6 +283,9 @@ func (r *AllocRunner) Run() {
 		return
 	}
 
+	// Extract the RestartPolicy from the TG and set it on the alloc
+	r.RestartPolicy = tg.RestartPolicy
+
 	// Create the execution context
 	if r.ctx == nil {
 		allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID))
@@ -301,7 +308,7 @@ func (r *AllocRunner) Run() {
 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
 
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task)
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, r.RestartPolicy)
 		r.tasks[task.Name] = tr
 		go tr.Run()
 	}
diff --git a/client/task_runner.go b/client/task_runner.go
index 14a45ffc3..d23c8204b 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -8,23 +8,51 @@ import (
 	"os"
 	"path/filepath"
 	"sync"
+	"time"
 
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+type errorCounter struct {
+	count       int
+	maxAttempts int
+	startTime   time.Time
+	interval    time.Duration
+}
+
+func newErrorCounter(maxAttempts int, interval time.Duration) *errorCounter {
+	return &errorCounter{maxAttempts: maxAttempts, startTime: time.Now(), interval: interval}
+}
+
+func (c *errorCounter) Increment() {
+	if c.count <= c.maxAttempts {
+		c.count = c.count + 1
+	}
+}
+
+func (c *errorCounter) shouldRestart() bool {
+	if time.Now().After(c.startTime.Add(c.interval)) {
+		c.count = 0
+		c.startTime = time.Now()
+	}
+	return c.count < c.maxAttempts
+}
+
 // TaskRunner is used to wrap a task within an allocation and provide the execution context.
 type TaskRunner struct {
-	config  *config.Config
-	updater TaskStateUpdater
-	logger  *log.Logger
-	ctx     *driver.ExecContext
-	allocID string
+	config       *config.Config
+	updater      TaskStateUpdater
+	logger       *log.Logger
+	ctx          *driver.ExecContext
+	allocID      string
+	errorCounter *errorCounter
 
-	task     *structs.Task
-	updateCh chan *structs.Task
-	handle   driver.DriverHandle
+	task          *structs.Task
+	restartPolicy *structs.RestartPolicy
+	updateCh      chan *structs.Task
+	handle        driver.DriverHandle
 
 	destroy     bool
 	destroyCh   chan struct{}
@@ -44,17 +72,22 @@ type TaskStateUpdater func(taskName, status, desc string)
 // NewTaskRunner is used to create a new task context
 func NewTaskRunner(logger *log.Logger, config *config.Config,
 	updater TaskStateUpdater, ctx *driver.ExecContext,
-	allocID string, task *structs.Task) *TaskRunner {
+	allocID string, task *structs.Task,
+	restartPolicy *structs.RestartPolicy) *TaskRunner {
+
+	ec := newErrorCounter(restartPolicy.Attempts, restartPolicy.Interval)
 	tc := &TaskRunner{
-		config:    config,
-		updater:   updater,
-		logger:    logger,
-		ctx:       ctx,
-		allocID:   allocID,
-		task:      task,
-		updateCh:  make(chan *structs.Task, 8),
-		destroyCh: make(chan struct{}),
-		waitCh:    make(chan struct{}),
+		config:        config,
+		updater:       updater,
+		logger:        logger,
+		errorCounter:  ec,
+		ctx:           ctx,
+		allocID:       allocID,
+		task:          task,
+		restartPolicy: restartPolicy,
+		updateCh:      make(chan *structs.Task, 8),
+		destroyCh:     make(chan struct{}),
+		waitCh:        make(chan struct{}),
 	}
 	return tc
 }
diff --git a/client/task_runner_test.go b/client/task_runner_test.go
index e173b9176..cc49b2b26 100644
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -52,7 +52,8 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) {
 	allocDir.Build([]*structs.Task{task})
 
 	ctx := driver.NewExecContext(allocDir)
-	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task)
+	rp := structs.NewRestartPolicy(structs.JobTypeService)
+	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, rp)
 	return upd, tr
 }
 
@@ -154,6 +155,25 @@ func TestTaskRunner_Update(t *testing.T) {
 	})
 }
 
+func TestTaskRunner_RestartCounter(t *testing.T) {
+	rc := newErrorCounter(3, 1*time.Second)
+	rc.Increment()
+	rc.Increment()
+	rc.Increment()
+	rc.Increment()
+	rc.Increment()
+	actual := rc.shouldRestart()
+	if actual {
+		t.Fatalf("Expect %v, Actual: %v", false, actual)
+	}
+
+	time.Sleep(1 * time.Second)
+	actual = rc.shouldRestart()
+	if !actual {
+		t.Fatalf("Expect %v, Actual: %v", false, actual)
+	}
+}
+
 /*
 TODO: This test is disabled til a follow-up api changes the restore state interface.
 The driver/executor interface will be changed from Open to Cleanup, in which

From ff55f2faf9f234c02e8b746bb2d452b077d7f0e7 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Tue, 3 Nov 2015 13:56:59 -0800
Subject: [PATCH 69/92] Added the logic to restart Tasks if possible

---
 client/task_runner.go | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/client/task_runner.go b/client/task_runner.go
index d23c8204b..04c02615e 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -195,6 +195,25 @@ func (r *TaskRunner) startTask() error {
 	return nil
 }
 
+func (r *TaskRunner) restartTask() (bool, error) {
+	r.errorCounter.Increment()
+	if !r.errorCounter.shouldRestart() {
+		r.logger.Printf("[INFO] client: Not restarting task since it has been started %v times since %v", r.errorCounter.count, r.errorCounter.startTime)
+		return false, nil
+	}
+
+	fmt.Printf("[DEBUG] client: Sleeping for %v before restaring task: %v", r.restartPolicy.Delay, r.task.Name)
+	time.Sleep(r.restartPolicy.Delay)
+	fmt.Printf("[DEBUG] client: Restarting Task: %v", r.task.Name)
+
+	if err := r.startTask(); err != nil {
+		r.logger.Printf("[ERR] client: Couldn't re-start task: %v because of error: %v", r.task.Name, err)
+		return false, err
+	}
+	r.logger.Printf("[INFO] client: Successfuly restated Task: %v", r.task.Name)
+	return true, nil
+}
+
 // Run is a long running routine used to manage the task
 func (r *TaskRunner) Run() {
 	defer close(r.waitCh)
@@ -214,6 +233,12 @@ OUTER:
 		select {
 		case err := <-r.handle.WaitCh():
 			if err != nil {
+				// Trying to restart the task
+				if _, err := r.restartTask(); err == nil {
+					// We have succesfully restarted the task, going
+					// back to listening to events
+					continue
+				}
 				r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v",
 					r.task.Name, r.allocID, err)
 				r.setStatus(structs.AllocClientStatusDead,

From 2a9dd21a53521723c9c1969e3f5e010af5187043 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Tue, 3 Nov 2015 14:44:24 -0800
Subject: [PATCH 70/92] Fixed the log statements

---
 client/task_runner.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/client/task_runner.go b/client/task_runner.go
index 04c02615e..d4cbe583d 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -202,9 +202,9 @@ func (r *TaskRunner) restartTask() (bool, error) {
 		return false, nil
 	}
 
-	fmt.Printf("[DEBUG] client: Sleeping for %v before restaring task: %v", r.restartPolicy.Delay, r.task.Name)
+	r.logger.Printf("[DEBUG] client: Sleeping for %v before restaring task: %v", r.restartPolicy.Delay, r.task.Name)
 	time.Sleep(r.restartPolicy.Delay)
-	fmt.Printf("[DEBUG] client: Restarting Task: %v", r.task.Name)
+	r.logger.Printf("[DEBUG] client: Restarting Task: %v", r.task.Name)
 
 	if err := r.startTask(); err != nil {
 		r.logger.Printf("[ERR] client: Couldn't re-start task: %v because of error: %v", r.task.Name, err)

From 830172256d3fa36c1dd4b200f94c6ab30a0efcd3 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 11:31:50 -0800
Subject: [PATCH 71/92] Fix typo

---
 client/driver/executor/exec_basic.go   | 1 -
 client/driver/executor/test_harness.go | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go
index 4b865fbd9..a554acfc1 100644
--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -74,7 +74,6 @@ func (e *BasicExecutor) Start() error {
 	}
 	e.cmd.Args = parsed
 
-	// We don't want to call ourself. We want to call Start on our embedded Cmd
 	spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
 	e.spawn = spawn.NewSpawner(spawnState)
 	e.spawn.SetCommand(&e.cmd)
diff --git a/client/driver/executor/test_harness.go b/client/driver/executor/test_harness.go
index afdf8610f..9ec8a9b3a 100644
--- a/client/driver/executor/test_harness.go
+++ b/client/driver/executor/test_harness.go
@@ -39,13 +39,13 @@ func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
 	return task.Name, allocDir
 }
 
-func testExecutor(t *testing.T, newExecutor func() Executor, compatible func(*testing.T)) {
+func testExecutor(t *testing.T, buildExecutor func() Executor, compatible func(*testing.T)) {
 	if compatible != nil {
 		compatible(t)
 	}
 
 	command := func(name string, args ...string) Executor {
-		b := NewExecutor()
+		b := buildExecutor()
 		SetCommand(b, name, args)
 		return b
 	}

From c7718e662898fc7f839eca66dd0313d1b1271963 Mon Sep 17 00:00:00 2001
From: Clint <catsby@users.noreply.github.com>
Date: Thu, 5 Nov 2015 13:50:18 -0600
Subject: [PATCH 72/92] Update CHANGELOG.md

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 118e0cfc8..3fb99bfa2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,11 @@
 FEATURES:
 
   * Blocking queries supported in API [GH-366]
+  * Add support for downloading external artifacts to execute for Exec, Raw exec drivers [GH-381]
+
+BACKWARDS INCOMPATIBILITIES:
+
+  * Qemu and Java driver configurations have been updated to both use `artifact_source` as the source for external images/jars to be ran
 
 ## 0.1.2 (October 6, 2015)
 

From 34c942236a9b2ab8ed6921fc22243593ebebb94a Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 11:54:51 -0800
Subject: [PATCH 73/92] Calling incorrect executor build method

---
 client/driver/executor/test_harness.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/client/driver/executor/test_harness.go b/client/driver/executor/test_harness.go
index 9ec8a9b3a..6eabb556d 100644
--- a/client/driver/executor/test_harness.go
+++ b/client/driver/executor/test_harness.go
@@ -54,7 +54,7 @@ func testExecutor(t *testing.T, buildExecutor func() Executor, compatible func(*
 	Executor_Start_Wait_Failure_Code(t, command)
 	Executor_Start_Wait(t, command)
 	Executor_Start_Kill(t, command)
-	Executor_Open(t, command)
+	Executor_Open(t, command, buildExecutor)
 }
 
 type buildExecCommand func(name string, args ...string) Executor
@@ -178,7 +178,7 @@ func Executor_Start_Kill(t *testing.T, command buildExecCommand) {
 	}
 }
 
-func Executor_Open(t *testing.T, command buildExecCommand) {
+func Executor_Open(t *testing.T, command buildExecCommand, newExecutor func() Executor) {
 	task, alloc := mockAllocDir(t)
 	defer alloc.Destroy()
 
@@ -210,7 +210,7 @@ func Executor_Open(t *testing.T, command buildExecCommand) {
 		log.Panicf("ID() failed: %v", err)
 	}
 
-	e2 := NewExecutor()
+	e2 := newExecutor()
 	if err := e2.Open(id); err != nil {
 		log.Panicf("Open(%v) failed: %v", id, err)
 	}

From 5b067a3e4fa2bee98096db0ba98155a585c2eb73 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 13:46:02 -0800
Subject: [PATCH 74/92] Merge fix

---
 client/driver/docker.go               |  4 +++-
 client/driver/exec.go                 |  4 +++-
 client/driver/java.go                 |  4 +++-
 client/driver/qemu.go                 |  4 +++-
 client/driver/raw_exec.go             |  4 +++-
 client/driver/rkt.go                  |  4 +++-
 client/fingerprint/arch.go            |  1 +
 client/fingerprint/consul.go          |  4 ++++
 client/fingerprint/cpu.go             |  1 +
 client/fingerprint/env_aws.go         |  1 +
 client/fingerprint/env_gce.go         |  1 +
 client/fingerprint/fingerprint.go     | 17 +++++++++++++++++
 client/fingerprint/host.go            |  1 +
 client/fingerprint/memory.go          |  1 +
 client/fingerprint/network_unix.go    |  1 +
 client/fingerprint/network_windows.go |  1 +
 client/fingerprint/storage.go         |  1 +
 17 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/client/driver/docker.go b/client/driver/docker.go
index d5031d5d9..1708c6921 100644
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -13,11 +13,13 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
 type DockerDriver struct {
 	DriverContext
+	fingerprint.StaticFingerprinter
 }
 
 type dockerPID struct {
@@ -37,7 +39,7 @@ type dockerHandle struct {
 }
 
 func NewDockerDriver(ctx *DriverContext) Driver {
-	return &DockerDriver{*ctx}
+	return &DockerDriver{DriverContext: *ctx}
 }
 
 // dockerClient creates *docker.Client. In test / dev mode we can use ENV vars
diff --git a/client/driver/exec.go b/client/driver/exec.go
index 213bc574f..4de719c46 100644
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -10,6 +10,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/executor"
+	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
@@ -18,6 +19,7 @@ import (
 // features.
 type ExecDriver struct {
 	DriverContext
+	fingerprint.StaticFingerprinter
 }
 
 // execHandle is returned from Start/Open as a handle to the PID
@@ -29,7 +31,7 @@ type execHandle struct {
 
 // NewExecDriver is used to create a new exec driver
 func NewExecDriver(ctx *DriverContext) Driver {
-	return &ExecDriver{*ctx}
+	return &ExecDriver{DriverContext: *ctx}
 }
 
 func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
diff --git a/client/driver/java.go b/client/driver/java.go
index 808bdfe5b..1aa2c6d3f 100644
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -13,6 +13,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/executor"
+	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
@@ -21,6 +22,7 @@ import (
 // It literally just fork/execs tasks with the java command.
 type JavaDriver struct {
 	DriverContext
+	fingerprint.StaticFingerprinter
 }
 
 // javaHandle is returned from Start/Open as a handle to the PID
@@ -32,7 +34,7 @@ type javaHandle struct {
 
 // NewJavaDriver is used to create a new exec driver
 func NewJavaDriver(ctx *DriverContext) Driver {
-	return &JavaDriver{*ctx}
+	return &JavaDriver{DriverContext: *ctx}
 }
 
 func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
diff --git a/client/driver/qemu.go b/client/driver/qemu.go
index 0eab4e659..b0d0afc62 100644
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@@ -16,6 +16,7 @@ import (
 
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
@@ -29,6 +30,7 @@ var (
 // planned in the future
 type QemuDriver struct {
 	DriverContext
+	fingerprint.StaticFingerprinter
 }
 
 // qemuHandle is returned from Start/Open as a handle to the PID
@@ -48,7 +50,7 @@ type qemuPID struct {
 
 // NewQemuDriver is used to create a new exec driver
 func NewQemuDriver(ctx *DriverContext) Driver {
-	return &QemuDriver{*ctx}
+	return &QemuDriver{DriverContext: *ctx}
 }
 
 func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
diff --git a/client/driver/raw_exec.go b/client/driver/raw_exec.go
index 856f2b7fc..c53e73a2b 100644
--- a/client/driver/raw_exec.go
+++ b/client/driver/raw_exec.go
@@ -13,6 +13,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
@@ -31,6 +32,7 @@ const (
 // and this should only be used when explicitly needed.
 type RawExecDriver struct {
 	DriverContext
+	fingerprint.StaticFingerprinter
 }
 
 // rawExecHandle is returned from Start/Open as a handle to the PID
@@ -42,7 +44,7 @@ type rawExecHandle struct {
 
 // NewRawExecDriver is used to create a new raw exec driver
 func NewRawExecDriver(ctx *DriverContext) Driver {
-	return &RawExecDriver{*ctx}
+	return &RawExecDriver{DriverContext: *ctx}
 }
 
 func (d *RawExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
diff --git a/client/driver/rkt.go b/client/driver/rkt.go
index 456e4e02b..3f1912531 100644
--- a/client/driver/rkt.go
+++ b/client/driver/rkt.go
@@ -17,6 +17,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -30,6 +31,7 @@ var (
 // planned in the future
 type RktDriver struct {
 	DriverContext
+	fingerprint.StaticFingerprinter
 }
 
 // rktHandle is returned from Start/Open as a handle to the PID
@@ -50,7 +52,7 @@ type rktPID struct {
 
 // NewRktDriver is used to create a new exec driver
 func NewRktDriver(ctx *DriverContext) Driver {
-	return &RktDriver{*ctx}
+	return &RktDriver{DriverContext: *ctx}
 }
 
 func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
diff --git a/client/fingerprint/arch.go b/client/fingerprint/arch.go
index 869c542eb..16d8c99a8 100644
--- a/client/fingerprint/arch.go
+++ b/client/fingerprint/arch.go
@@ -10,6 +10,7 @@ import (
 
 // ArchFingerprint is used to fingerprint the architecture
 type ArchFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 
diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go
index a03dfeec1..a8c054e21 100644
--- a/client/fingerprint/consul.go
+++ b/client/fingerprint/consul.go
@@ -63,3 +63,7 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod
 
 	return true, nil
 }
+
+func (f *ConsulFingerprint) Periodic() (bool, time.Duration) {
+	return false, 15 * time.Second
+}
diff --git a/client/fingerprint/cpu.go b/client/fingerprint/cpu.go
index 3e809397e..5027c8e9a 100644
--- a/client/fingerprint/cpu.go
+++ b/client/fingerprint/cpu.go
@@ -11,6 +11,7 @@ import (
 
 // CPUFingerprint is used to fingerprint the CPU
 type CPUFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 
diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go
index 575409bf8..f5e26e7cb 100644
--- a/client/fingerprint/env_aws.go
+++ b/client/fingerprint/env_aws.go
@@ -69,6 +69,7 @@ var ec2InstanceSpeedMap = map[string]int{
 
 // EnvAWSFingerprint is used to fingerprint AWS metadata
 type EnvAWSFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 
diff --git a/client/fingerprint/env_gce.go b/client/fingerprint/env_gce.go
index f721fc36a..faef7deab 100644
--- a/client/fingerprint/env_gce.go
+++ b/client/fingerprint/env_gce.go
@@ -46,6 +46,7 @@ func lastToken(s string) string {
 
 // EnvGCEFingerprint is used to fingerprint GCE metadata
 type EnvGCEFingerprint struct {
+	StaticFingerprinter
 	client      *http.Client
 	logger      *log.Logger
 	metadataURL string
diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go
index 4a42057b2..3154aad2b 100644
--- a/client/fingerprint/fingerprint.go
+++ b/client/fingerprint/fingerprint.go
@@ -3,11 +3,15 @@ package fingerprint
 import (
 	"fmt"
 	"log"
+	"time"
 
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+// EmptyDuration is to be used by fingerprinters that are not periodic.
+const EmptyDuration = time.Duration(0)
+
 // BuiltinFingerprints is a slice containing the key names of all regestered
 // fingerprints available, to provided an ordered iteration
 var BuiltinFingerprints = []string{
@@ -59,4 +63,17 @@ type Fingerprint interface {
 	// Fingerprint is used to update properties of the Node,
 	// and returns if the fingerprint was applicable and a potential error.
 	Fingerprint(*config.Config, *structs.Node) (bool, error)
+
+	// Periodic is a mechanism for the fingerprinter to indicate that it should
+	// be run periodically. The return value is a boolean indicating if it
+	// should be periodic, and if true, a duration.
+	Periodic() (bool, time.Duration)
+}
+
+// StaticFingerprinter can be embeded in a struct that has a Fingerprint method
+// to make it non-periodic.
+type StaticFingerprinter struct{}
+
+func (s *StaticFingerprinter) Periodic() (bool, time.Duration) {
+	return false, EmptyDuration
 }
diff --git a/client/fingerprint/host.go b/client/fingerprint/host.go
index ac7a347f2..87acac63c 100644
--- a/client/fingerprint/host.go
+++ b/client/fingerprint/host.go
@@ -14,6 +14,7 @@ import (
 
 // HostFingerprint is used to fingerprint the host
 type HostFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 
diff --git a/client/fingerprint/memory.go b/client/fingerprint/memory.go
index 5af097848..b249bebf5 100644
--- a/client/fingerprint/memory.go
+++ b/client/fingerprint/memory.go
@@ -11,6 +11,7 @@ import (
 
 // MemoryFingerprint is used to fingerprint the available memory on the node
 type MemoryFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 
diff --git a/client/fingerprint/network_unix.go b/client/fingerprint/network_unix.go
index 4278384e9..9adb5f41b 100644
--- a/client/fingerprint/network_unix.go
+++ b/client/fingerprint/network_unix.go
@@ -19,6 +19,7 @@ import (
 
 // NetworkFingerprint is used to fingerprint the Network capabilities of a node
 type NetworkFingerprint struct {
+	StaticFingerprinter
 	logger            *log.Logger
 	interfaceDetector NetworkInterfaceDetector
 }
diff --git a/client/fingerprint/network_windows.go b/client/fingerprint/network_windows.go
index 99467bcc8..b438b7292 100644
--- a/client/fingerprint/network_windows.go
+++ b/client/fingerprint/network_windows.go
@@ -11,6 +11,7 @@ import (
 
 // NetworkFingerprint is used to fingerprint the Network capabilities of a node
 type NetworkFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 
diff --git a/client/fingerprint/storage.go b/client/fingerprint/storage.go
index 6abbe52e4..ead264845 100644
--- a/client/fingerprint/storage.go
+++ b/client/fingerprint/storage.go
@@ -18,6 +18,7 @@ import (
 // StorageFingerprint is used to measure the amount of storage free for
 // applications that the Nomad agent will run on this machine.
 type StorageFingerprint struct {
+	StaticFingerprinter
 	logger *log.Logger
 }
 

From 8e0ab77b1042525cd231d729917da0442aaa2240 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 10:39:52 -0800
Subject: [PATCH 75/92] Add consul fingerprinter to builtin list; sort list;
 fix bad error in consul fingerprinter

---
 client/fingerprint/consul.go      |  2 +-
 client/fingerprint/fingerprint.go | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go
index a8c054e21..01e3a658b 100644
--- a/client/fingerprint/consul.go
+++ b/client/fingerprint/consul.go
@@ -48,7 +48,7 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod
 	// If we can't hit this URL consul is probably not running on this machine.
 	info, err := consulClient.Agent().Self()
 	if err != nil {
-		return false, fmt.Errorf("Failed to query consul for agent status: %s", err)
+		return false, nil
 	}
 
 	node.Attributes["consul.server"] = strconv.FormatBool(info["Config"]["Server"].(bool))
diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go
index 3154aad2b..a0139d485 100644
--- a/client/fingerprint/fingerprint.go
+++ b/client/fingerprint/fingerprint.go
@@ -16,26 +16,28 @@ const EmptyDuration = time.Duration(0)
 // fingerprints available, to provided an ordered iteration
 var BuiltinFingerprints = []string{
 	"arch",
+	"consul",
 	"cpu",
-	"host",
-	"memory",
-	"storage",
-	"network",
 	"env_aws",
 	"env_gce",
+	"host",
+	"memory",
+	"network",
+	"storage",
 }
 
 // builtinFingerprintMap contains the built in registered fingerprints
 // which are available, corresponding to a key found in BuiltinFingerprints
 var builtinFingerprintMap = map[string]Factory{
 	"arch":    NewArchFingerprint,
+	"consul":  NewConsulFingerprint,
 	"cpu":     NewCPUFingerprint,
-	"host":    NewHostFingerprint,
-	"memory":  NewMemoryFingerprint,
-	"storage": NewStorageFingerprint,
-	"network": NewNetworkFingerprinter,
 	"env_aws": NewEnvAWSFingerprint,
 	"env_gce": NewEnvGCEFingerprint,
+	"host":    NewHostFingerprint,
+	"memory":  NewMemoryFingerprint,
+	"network": NewNetworkFingerprinter,
+	"storage": NewStorageFingerprint,
 }
 
 // NewFingerprint is used to instantiate and return a new fingerprint

From f43c067b3e62c98afeb7314302f1b3db03669db2 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 13:41:41 -0800
Subject: [PATCH 76/92] Client handles periodic fingerprinters

---
 client/client.go             | 24 ++++++++++++++++++++++++
 client/fingerprint/consul.go |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/client/client.go b/client/client.go
index 744bad876..2d466a784 100644
--- a/client/client.go
+++ b/client/client.go
@@ -443,11 +443,35 @@ func (c *Client) fingerprint() error {
 		if applies {
 			applied = append(applied, name)
 		}
+		p, period := f.Periodic()
+		if p {
+			// TODO: If more periodic fingerprinters are added, then
+			// fingerprintPeriodic should be used to handle all the periodic
+			// fingerprinters by using a priority queue.
+			go c.fingerprintPeriodic(name, f, period)
+		}
 	}
 	c.logger.Printf("[DEBUG] client: applied fingerprints %v", applied)
 	return nil
 }
 
+// fingerprintPeriodic runs a fingerprinter at the specified duration. If the
+// fingerprinter returns an error, the function exits.
+func (c *Client) fingerprintPeriodic(name string, f fingerprint.Fingerprint, d time.Duration) {
+	c.logger.Printf("[DEBUG] client: periodically fingerprinting %v at duration %v", name, d)
+	for {
+		select {
+		case <-time.After(d):
+			if _, err := f.Fingerprint(c.config, c.config.Node); err != nil {
+				c.logger.Printf("[DEBUG] client: disabling periodic fingerprinting for %v: %v", name, err)
+				return
+			}
+		case <-c.shutdownCh:
+			return
+		}
+	}
+}
+
 // setupDrivers is used to find the available drivers
 func (c *Client) setupDrivers() error {
 	var avail []string
diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go
index 01e3a658b..9ae81faf6 100644
--- a/client/fingerprint/consul.go
+++ b/client/fingerprint/consul.go
@@ -65,5 +65,5 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod
 }
 
 func (f *ConsulFingerprint) Periodic() (bool, time.Duration) {
-	return false, 15 * time.Second
+	return true, 15 * time.Second
 }

From 2e168af9d37211aa0b9d34a0a354bf1887c2b821 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 11:12:31 -0800
Subject: [PATCH 77/92] Cleaned up the logic to calculate restart duration

---
 client/alloc_runner.go     |  19 ++---
 client/restarts.go         |  81 +++++++++++++++++++
 client/restarts_test.go    |  39 +++++++++
 client/task_runner.go      | 160 +++++++++++++++++--------------------
 client/task_runner_test.go |  21 +----
 5 files changed, 204 insertions(+), 116 deletions(-)
 create mode 100644 client/restarts.go
 create mode 100644 client/restarts_test.go

diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index 444a44d1d..3129b0fb3 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -59,10 +59,10 @@ type AllocRunner struct {
 
 // allocRunnerState is used to snapshot the state of the alloc runner
 type allocRunnerState struct {
-	Alloc      *structs.Allocation
+	Alloc         *structs.Allocation
 	RestartPolicy *structs.RestartPolicy
-	TaskStatus map[string]taskStatus
-	Context    *driver.ExecContext
+	TaskStatus    map[string]taskStatus
+	Context       *driver.ExecContext
 }
 
 // NewAllocRunner is used to create a new allocation context
@@ -102,10 +102,11 @@ func (r *AllocRunner) RestoreState() error {
 	r.ctx = snap.Context
 
 	// Restore the task runners
+	jobType := r.alloc.Job.Type
 	var mErr multierror.Error
 	for name := range r.taskStatus {
 		task := &structs.Task{Name: name}
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, r.RestartPolicy)
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, jobType, r.RestartPolicy)
 		r.tasks[name] = tr
 		if err := tr.RestoreState(); err != nil {
 			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err)
@@ -121,10 +122,10 @@ func (r *AllocRunner) RestoreState() error {
 func (r *AllocRunner) SaveState() error {
 	r.taskStatusLock.RLock()
 	snap := allocRunnerState{
-		Alloc:      r.alloc,
+		Alloc:         r.alloc,
 		RestartPolicy: r.RestartPolicy,
-		TaskStatus: r.taskStatus,
-		Context:    r.ctx,
+		TaskStatus:    r.taskStatus,
+		Context:       r.ctx,
 	}
 	err := persistState(r.stateFilePath(), &snap)
 	r.taskStatusLock.RUnlock()
@@ -307,8 +308,8 @@ func (r *AllocRunner) Run() {
 
 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
-
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, r.RestartPolicy)
+		jobType := r.alloc.Job.Type
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, jobType, r.RestartPolicy)
 		r.tasks[task.Name] = tr
 		go tr.Run()
 	}
diff --git a/client/restarts.go b/client/restarts.go
new file mode 100644
index 000000000..a18518473
--- /dev/null
+++ b/client/restarts.go
@@ -0,0 +1,81 @@
+package client
+
+import (
+	"github.com/hashicorp/nomad/nomad/structs"
+	"time"
+)
+
+// The errorCounter keeps track of the number of times a process has exited
+// It returns the duration after which a task is restarted
+// For Batch jobs, the interval is set to zero value since the takss
+// will be restarted only upto maxAttempts times
+type restartTracker interface {
+	nextRestart() (bool, time.Duration)
+	increment()
+}
+
+func newRestartTracker(jobType string, restartPolicy *structs.RestartPolicy) restartTracker {
+	switch jobType {
+	case structs.JobTypeService:
+		return &serviceRestartTracker{
+			maxAttempts: restartPolicy.Attempts,
+			startTime:   time.Now(),
+			interval:    restartPolicy.Interval,
+			delay:       restartPolicy.Delay,
+		}
+	default:
+		return &batchRestartTracker{
+			maxAttempts: restartPolicy.Attempts,
+			delay:       restartPolicy.Delay,
+		}
+	}
+}
+
+type batchRestartTracker struct {
+	maxAttempts int
+	delay       time.Duration
+
+	count int
+}
+
+func (b *batchRestartTracker) increment() {
+	b.count = b.count + 1
+}
+
+func (b *batchRestartTracker) nextRestart() (bool, time.Duration) {
+	if b.count < b.maxAttempts {
+		return true, b.delay
+	}
+	return false, 0
+}
+
+type serviceRestartTracker struct {
+	maxAttempts int
+	delay       time.Duration
+	interval    time.Duration
+
+	count     int
+	startTime time.Time
+}
+
+func (c *serviceRestartTracker) increment() {
+	if c.count <= c.maxAttempts {
+		c.count = c.count + 1
+	}
+}
+
+func (c *serviceRestartTracker) nextRestart() (bool, time.Duration) {
+	windowEndTime := c.startTime.Add(c.interval)
+	now := time.Now()
+	if now.After(windowEndTime) {
+		c.count = 0
+		c.startTime = time.Now()
+		return true, c.delay
+	}
+
+	if c.count < c.maxAttempts {
+		return true, c.delay
+	}
+
+	return true, windowEndTime.Sub(now)
+}
diff --git a/client/restarts_test.go b/client/restarts_test.go
new file mode 100644
index 000000000..4cd4b8336
--- /dev/null
+++ b/client/restarts_test.go
@@ -0,0 +1,39 @@
+package client
+
+import (
+	"github.com/hashicorp/nomad/nomad/structs"
+	"testing"
+	"time"
+)
+
+func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
+	rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: 2, Interval: 2 * time.Minute, Delay: 1 * time.Second})
+	rt.increment()
+	rt.increment()
+	rt.increment()
+	rt.increment()
+	rt.increment()
+	actual, when := rt.nextRestart()
+	if !actual {
+		t.Fatalf("Expect %v, Actual: %v", true, actual)
+	}
+}
+
+func TestTaskRunner_BatchRestartCounter(t *testing.T) {
+	rt := newRestartTracker(structs.JobTypeBatch, &structs.RestartPolicy{Attempts: 2, Interval: 1 * time.Second, Delay: 1 * time.Second})
+	rt.increment()
+	rt.increment()
+	rt.increment()
+	rt.increment()
+	rt.increment()
+	actual, _ := rt.nextRestart()
+	if actual {
+		t.Fatalf("Expect %v, Actual: %v", false, actual)
+	}
+
+	time.Sleep(1 * time.Second)
+	actual, _ = rt.nextRestart()
+	if actual {
+		t.Fatalf("Expect %v, Actual: %v", false, actual)
+	}
+}
diff --git a/client/task_runner.go b/client/task_runner.go
index d4cbe583d..21649d6c1 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -15,39 +15,14 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-type errorCounter struct {
-	count       int
-	maxAttempts int
-	startTime   time.Time
-	interval    time.Duration
-}
-
-func newErrorCounter(maxAttempts int, interval time.Duration) *errorCounter {
-	return &errorCounter{maxAttempts: maxAttempts, startTime: time.Now(), interval: interval}
-}
-
-func (c *errorCounter) Increment() {
-	if c.count <= c.maxAttempts {
-		c.count = c.count + 1
-	}
-}
-
-func (c *errorCounter) shouldRestart() bool {
-	if time.Now().After(c.startTime.Add(c.interval)) {
-		c.count = 0
-		c.startTime = time.Now()
-	}
-	return c.count < c.maxAttempts
-}
-
 // TaskRunner is used to wrap a task within an allocation and provide the execution context.
 type TaskRunner struct {
-	config       *config.Config
-	updater      TaskStateUpdater
-	logger       *log.Logger
-	ctx          *driver.ExecContext
-	allocID      string
-	errorCounter *errorCounter
+	config         *config.Config
+	updater        TaskStateUpdater
+	logger         *log.Logger
+	ctx            *driver.ExecContext
+	allocID        string
+	restartTracker restartTracker
 
 	task          *structs.Task
 	restartPolicy *structs.RestartPolicy
@@ -72,22 +47,22 @@ type TaskStateUpdater func(taskName, status, desc string)
 // NewTaskRunner is used to create a new task context
 func NewTaskRunner(logger *log.Logger, config *config.Config,
 	updater TaskStateUpdater, ctx *driver.ExecContext,
-	allocID string, task *structs.Task,
+	allocID string, task *structs.Task, taskType string,
 	restartPolicy *structs.RestartPolicy) *TaskRunner {
 
-	ec := newErrorCounter(restartPolicy.Attempts, restartPolicy.Interval)
+	rt := newRestartTracker(taskType, restartPolicy)
 	tc := &TaskRunner{
-		config:        config,
-		updater:       updater,
-		logger:        logger,
-		errorCounter:  ec,
-		ctx:           ctx,
-		allocID:       allocID,
-		task:          task,
-		restartPolicy: restartPolicy,
-		updateCh:      make(chan *structs.Task, 8),
-		destroyCh:     make(chan struct{}),
-		waitCh:        make(chan struct{}),
+		config:         config,
+		updater:        updater,
+		logger:         logger,
+		restartTracker: rt,
+		ctx:            ctx,
+		allocID:        allocID,
+		task:           task,
+		restartPolicy:  restartPolicy,
+		updateCh:       make(chan *structs.Task, 8),
+		destroyCh:      make(chan struct{}),
+		waitCh:         make(chan struct{}),
 	}
 	return tc
 }
@@ -195,27 +170,9 @@ func (r *TaskRunner) startTask() error {
 	return nil
 }
 
-func (r *TaskRunner) restartTask() (bool, error) {
-	r.errorCounter.Increment()
-	if !r.errorCounter.shouldRestart() {
-		r.logger.Printf("[INFO] client: Not restarting task since it has been started %v times since %v", r.errorCounter.count, r.errorCounter.startTime)
-		return false, nil
-	}
-
-	r.logger.Printf("[DEBUG] client: Sleeping for %v before restaring task: %v", r.restartPolicy.Delay, r.task.Name)
-	time.Sleep(r.restartPolicy.Delay)
-	r.logger.Printf("[DEBUG] client: Restarting Task: %v", r.task.Name)
-
-	if err := r.startTask(); err != nil {
-		r.logger.Printf("[ERR] client: Couldn't re-start task: %v because of error: %v", r.task.Name, err)
-		return false, err
-	}
-	r.logger.Printf("[INFO] client: Successfuly restated Task: %v", r.task.Name)
-	return true, nil
-}
-
 // Run is a long running routine used to manage the task
 func (r *TaskRunner) Run() {
+	var err error
 	defer close(r.waitCh)
 	r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')",
 		r.task.Name, r.allocID)
@@ -227,31 +184,62 @@ func (r *TaskRunner) Run() {
 		}
 	}
 
+	// Monitoring the Driver
+	err = r.monitorDriver(r.handle.WaitCh(), r.updateCh, r.destroyCh)
+	for err != nil {
+		r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v",
+			r.task.Name, r.allocID, err)
+		r.restartTracker.increment()
+		shouldRestart, when := r.restartTracker.nextRestart()
+		if !shouldRestart {
+			r.logger.Printf("[INFO] Not restarting")
+			r.setStatus(structs.AllocClientStatusDead, fmt.Sprintf("task failed with: %v", err))
+			break
+		}
+
+		r.logger.Printf("[INFO] Restarting Task: %v", r.task.Name)
+		r.logger.Printf("[DEBUG] Sleeping for %v before restarting Task %v", when, r.task.Name)
+		ch := time.After(when)
+	L:
+		for {
+			select {
+			case <-ch:
+				break L
+			case <-r.destroyCh:
+				break L
+			}
+		}
+		r.destroyLock.Lock()
+		if r.destroy {
+			r.logger.Printf("[DEBUG] Not restarting task: %v because it's destroyed by user", r.task.Name)
+			break
+		}
+		if err = r.startTask(); err != nil {
+			r.destroyLock.Unlock()
+			continue
+		}
+		r.destroyLock.Unlock()
+		err = r.monitorDriver(r.handle.WaitCh(), r.updateCh, r.destroyCh)
+	}
+
+	// Cleanup after ourselves
+	r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'",
+		r.task.Name, r.allocID)
+	r.setStatus(structs.AllocClientStatusDead,
+		"task completed")
+
+	r.DestroyState()
+}
+
+func (r *TaskRunner) monitorDriver(waitCh chan error, updateCh chan *structs.Task, destroyCh chan struct{}) error {
+	var err error
 OUTER:
 	// Wait for updates
 	for {
 		select {
-		case err := <-r.handle.WaitCh():
-			if err != nil {
-				// Trying to restart the task
-				if _, err := r.restartTask(); err == nil {
-					// We have succesfully restarted the task, going
-					// back to listening to events
-					continue
-				}
-				r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v",
-					r.task.Name, r.allocID, err)
-				r.setStatus(structs.AllocClientStatusDead,
-					fmt.Sprintf("task failed with: %v", err))
-			} else {
-				r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'",
-					r.task.Name, r.allocID)
-				r.setStatus(structs.AllocClientStatusDead,
-					"task completed")
-			}
+		case err = <-waitCh:
 			break OUTER
-
-		case update := <-r.updateCh:
+		case update := <-updateCh:
 			// Update
 			r.task = update
 			if err := r.handle.Update(update); err != nil {
@@ -259,7 +247,7 @@ OUTER:
 					r.task.Name, r.allocID, err)
 			}
 
-		case <-r.destroyCh:
+		case <-destroyCh:
 			// Send the kill signal, and use the WaitCh to block until complete
 			if err := r.handle.Kill(); err != nil {
 				r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc '%s': %v",
@@ -267,9 +255,7 @@ OUTER:
 			}
 		}
 	}
-
-	// Cleanup after ourselves
-	r.DestroyState()
+	return err
 }
 
 // Update is used to update the task of the context
diff --git a/client/task_runner_test.go b/client/task_runner_test.go
index cc49b2b26..3d5199670 100644
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -53,7 +53,7 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) {
 
 	ctx := driver.NewExecContext(allocDir)
 	rp := structs.NewRestartPolicy(structs.JobTypeService)
-	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, rp)
+	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, structs.JobTypeService, rp)
 	return upd, tr
 }
 
@@ -155,25 +155,6 @@ func TestTaskRunner_Update(t *testing.T) {
 	})
 }
 
-func TestTaskRunner_RestartCounter(t *testing.T) {
-	rc := newErrorCounter(3, 1*time.Second)
-	rc.Increment()
-	rc.Increment()
-	rc.Increment()
-	rc.Increment()
-	rc.Increment()
-	actual := rc.shouldRestart()
-	if actual {
-		t.Fatalf("Expect %v, Actual: %v", false, actual)
-	}
-
-	time.Sleep(1 * time.Second)
-	actual = rc.shouldRestart()
-	if !actual {
-		t.Fatalf("Expect %v, Actual: %v", false, actual)
-	}
-}
-
 /*
 TODO: This test is disabled til a follow-up api changes the restore state interface.
 The driver/executor interface will be changed from Open to Cleanup, in which

From ac6f7eded5dd0120e6ff28bb683933c0adad0b3d Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 15:26:00 -0800
Subject: [PATCH 78/92] Fixing the test

---
 client/restarts_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/client/restarts_test.go b/client/restarts_test.go
index 4cd4b8336..8015afd6e 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -13,7 +13,7 @@ func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
 	rt.increment()
 	rt.increment()
 	rt.increment()
-	actual, when := rt.nextRestart()
+	actual, _ := rt.nextRestart()
 	if !actual {
 		t.Fatalf("Expect %v, Actual: %v", true, actual)
 	}

From cf76e3a4566e209f8bc6efba2ba731498542bca5 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 16:38:19 -0800
Subject: [PATCH 79/92] Passing restart tracker in the task runner

---
 client/alloc_runner.go     |  6 ++++--
 client/restarts.go         | 30 ++++++++++++------------------
 client/restarts_test.go    | 20 ++++++++++----------
 client/task_runner.go      | 16 ++++++----------
 client/task_runner_test.go |  3 ++-
 5 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index 3129b0fb3..114d0c8d5 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -106,7 +106,8 @@ func (r *AllocRunner) RestoreState() error {
 	var mErr multierror.Error
 	for name := range r.taskStatus {
 		task := &structs.Task{Name: name}
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, jobType, r.RestartPolicy)
+		restartTracker := newRestartTracker(jobType, r.RestartPolicy)
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, restartTracker)
 		r.tasks[name] = tr
 		if err := tr.RestoreState(); err != nil {
 			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err)
@@ -309,7 +310,8 @@ func (r *AllocRunner) Run() {
 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
 		jobType := r.alloc.Job.Type
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, jobType, r.RestartPolicy)
+		restartTracker := newRestartTracker(jobType, r.RestartPolicy)
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, restartTracker)
 		r.tasks[task.Name] = tr
 		go tr.Run()
 	}
diff --git a/client/restarts.go b/client/restarts.go
index a18518473..4004f82f8 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -11,7 +11,6 @@ import (
 // will be restarted only upto maxAttempts times
 type restartTracker interface {
 	nextRestart() (bool, time.Duration)
-	increment()
 }
 
 func newRestartTracker(jobType string, restartPolicy *structs.RestartPolicy) restartTracker {
@@ -38,11 +37,8 @@ type batchRestartTracker struct {
 	count int
 }
 
-func (b *batchRestartTracker) increment() {
-	b.count = b.count + 1
-}
-
 func (b *batchRestartTracker) nextRestart() (bool, time.Duration) {
+	b.count += 1
 	if b.count < b.maxAttempts {
 		return true, b.delay
 	}
@@ -58,24 +54,22 @@ type serviceRestartTracker struct {
 	startTime time.Time
 }
 
-func (c *serviceRestartTracker) increment() {
-	if c.count <= c.maxAttempts {
-		c.count = c.count + 1
-	}
-}
-
-func (c *serviceRestartTracker) nextRestart() (bool, time.Duration) {
-	windowEndTime := c.startTime.Add(c.interval)
+func (s *serviceRestartTracker) nextRestart() (bool, time.Duration) {
+	s.count += 1
+	windowEndTime := s.startTime.Add(s.interval)
 	now := time.Now()
+	// If the window of restart is over we wait until the delay duration
 	if now.After(windowEndTime) {
-		c.count = 0
-		c.startTime = time.Now()
-		return true, c.delay
+		s.count = 0
+		s.startTime = time.Now()
+		return true, s.delay
 	}
 
-	if c.count < c.maxAttempts {
-		return true, c.delay
+	// If we are within the delay duration and didn't exhaust all retries
+	if s.count < s.maxAttempts {
+		return true, s.delay
 	}
 
+	// If we exhausted all the retries and are withing the time window
 	return true, windowEndTime.Sub(now)
 }
diff --git a/client/restarts_test.go b/client/restarts_test.go
index 8015afd6e..952d33649 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -8,11 +8,11 @@ import (
 
 func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
 	rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: 2, Interval: 2 * time.Minute, Delay: 1 * time.Second})
-	rt.increment()
-	rt.increment()
-	rt.increment()
-	rt.increment()
-	rt.increment()
+	rt.nextRestart()
+	rt.nextRestart()
+	rt.nextRestart()
+	rt.nextRestart()
+	rt.nextRestart()
 	actual, _ := rt.nextRestart()
 	if !actual {
 		t.Fatalf("Expect %v, Actual: %v", true, actual)
@@ -21,11 +21,11 @@ func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
 
 func TestTaskRunner_BatchRestartCounter(t *testing.T) {
 	rt := newRestartTracker(structs.JobTypeBatch, &structs.RestartPolicy{Attempts: 2, Interval: 1 * time.Second, Delay: 1 * time.Second})
-	rt.increment()
-	rt.increment()
-	rt.increment()
-	rt.increment()
-	rt.increment()
+	rt.nextRestart()
+	rt.nextRestart()
+	rt.nextRestart()
+	rt.nextRestart()
+	rt.nextRestart()
 	actual, _ := rt.nextRestart()
 	if actual {
 		t.Fatalf("Expect %v, Actual: %v", false, actual)
diff --git a/client/task_runner.go b/client/task_runner.go
index 21649d6c1..ae97fb3c7 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -24,10 +24,9 @@ type TaskRunner struct {
 	allocID        string
 	restartTracker restartTracker
 
-	task          *structs.Task
-	restartPolicy *structs.RestartPolicy
-	updateCh      chan *structs.Task
-	handle        driver.DriverHandle
+	task     *structs.Task
+	updateCh chan *structs.Task
+	handle   driver.DriverHandle
 
 	destroy     bool
 	destroyCh   chan struct{}
@@ -47,19 +46,16 @@ type TaskStateUpdater func(taskName, status, desc string)
 // NewTaskRunner is used to create a new task context
 func NewTaskRunner(logger *log.Logger, config *config.Config,
 	updater TaskStateUpdater, ctx *driver.ExecContext,
-	allocID string, task *structs.Task, taskType string,
-	restartPolicy *structs.RestartPolicy) *TaskRunner {
+	allocID string, task *structs.Task, restartTracker restartTracker) *TaskRunner {
 
-	rt := newRestartTracker(taskType, restartPolicy)
 	tc := &TaskRunner{
 		config:         config,
 		updater:        updater,
 		logger:         logger,
-		restartTracker: rt,
+		restartTracker: restartTracker,
 		ctx:            ctx,
 		allocID:        allocID,
 		task:           task,
-		restartPolicy:  restartPolicy,
 		updateCh:       make(chan *structs.Task, 8),
 		destroyCh:      make(chan struct{}),
 		waitCh:         make(chan struct{}),
@@ -189,7 +185,6 @@ func (r *TaskRunner) Run() {
 	for err != nil {
 		r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v",
 			r.task.Name, r.allocID, err)
-		r.restartTracker.increment()
 		shouldRestart, when := r.restartTracker.nextRestart()
 		if !shouldRestart {
 			r.logger.Printf("[INFO] Not restarting")
@@ -198,6 +193,7 @@ func (r *TaskRunner) Run() {
 		}
 
 		r.logger.Printf("[INFO] Restarting Task: %v", r.task.Name)
+		r.setStatus(structs.AllocClientStatusPending, "Task Restarting")
 		r.logger.Printf("[DEBUG] Sleeping for %v before restarting Task %v", when, r.task.Name)
 		ch := time.After(when)
 	L:
diff --git a/client/task_runner_test.go b/client/task_runner_test.go
index 3d5199670..7a7242e7b 100644
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -53,7 +53,8 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) {
 
 	ctx := driver.NewExecContext(allocDir)
 	rp := structs.NewRestartPolicy(structs.JobTypeService)
-	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, structs.JobTypeService, rp)
+	restartTracker := newRestartTracker(structs.JobTypeService, rp)
+	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, restartTracker)
 	return upd, tr
 }
 

From af39146b039ff1a1fa4867cbcba6ce562486a06c Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 16:39:57 -0800
Subject: [PATCH 80/92] Added the client word to log lines

---
 client/task_runner.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/client/task_runner.go b/client/task_runner.go
index ae97fb3c7..f4ca99c81 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -187,14 +187,14 @@ func (r *TaskRunner) Run() {
 			r.task.Name, r.allocID, err)
 		shouldRestart, when := r.restartTracker.nextRestart()
 		if !shouldRestart {
-			r.logger.Printf("[INFO] Not restarting")
+			r.logger.Printf("[INFO] client: Not restarting task: %v ", r.task.Name)
 			r.setStatus(structs.AllocClientStatusDead, fmt.Sprintf("task failed with: %v", err))
 			break
 		}
 
-		r.logger.Printf("[INFO] Restarting Task: %v", r.task.Name)
+		r.logger.Printf("[INFO] client: Restarting Task: %v", r.task.Name)
 		r.setStatus(structs.AllocClientStatusPending, "Task Restarting")
-		r.logger.Printf("[DEBUG] Sleeping for %v before restarting Task %v", when, r.task.Name)
+		r.logger.Printf("[DEBUG] client: Sleeping for %v before restarting Task %v", when, r.task.Name)
 		ch := time.After(when)
 	L:
 		for {
@@ -207,7 +207,7 @@ func (r *TaskRunner) Run() {
 		}
 		r.destroyLock.Lock()
 		if r.destroy {
-			r.logger.Printf("[DEBUG] Not restarting task: %v because it's destroyed by user", r.task.Name)
+			r.logger.Printf("[DEBUG] client: Not restarting task: %v because it's destroyed by user", r.task.Name)
 			break
 		}
 		if err = r.startTask(); err != nil {

From b8b8fe504cfe2dfa23805cda2afb70a9fb4abac3 Mon Sep 17 00:00:00 2001
From: Shiem Edelbrock <shiem.edelbrock@me.com>
Date: Thu, 5 Nov 2015 16:40:20 -0800
Subject: [PATCH 81/92] Updated `priviliged` option, added client `priviliged`
 option

- Added error checking on priviliged mode.
- Added `docker.privileged.enabled` to client config/fingerprint
---
 client/driver/docker.go                    | 19 +++++++++++++++++--
 website/source/docs/drivers/docker.html.md |  7 +++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/client/driver/docker.go b/client/driver/docker.go
index 80b31d840..bebe74d30 100644
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -74,6 +74,15 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
 		return false, nil
 	}
 
+	privileged, err = strconv.ParseBool(d.config.ReadDefault("docker.privileged.enabled", "false"))
+	if err != nil {
+		return false, fmt.Errorf("Unable to parse docker.privileged.enabled: %s", err)
+	}
+	if privileged == true {
+		d.logger.Printf("[DEBUG] driver.docker: privileged containers enabled. Only enable if needed")
+		node.Attributes["docker.privileged.enabled"] = "1"
+	}
+
 	_, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
 	if err != nil {
 		return false, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
@@ -167,8 +176,14 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (do
 	d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
 	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])
 
-	//  set privileged (fallback to false)
-	hostConfig.Privileged, _ = strconv.ParseBool(task.Config["privileged"])
+	//  set privileged mode
+	if v, ok := task.Config["privileged"]; ok {
+		taskPrivileged, err := strconv.ParseBool(v)
+		if err != nil {
+			return hostConfig, fmt.Errorf("Unable to parse boolean value from task config option 'privileged': %s", err)
+		}
+		hostConfig.Privileged = taskPrivileged
+	}
 
 	// set DNS servers
 	dns, ok := task.Config["dns-servers"]
diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md
index d04740028..c24a171af 100644
--- a/website/source/docs/drivers/docker.html.md
+++ b/website/source/docs/drivers/docker.html.md
@@ -34,6 +34,8 @@ The `docker` driver supports the following configuration in the job specificatio
 
 * `privileged` - (optional) Privileged mode gives the container full access to 
    the host. Valid options are `"true"` and `"false"` (defaults to `"false"`).
+   In order to use privileged mode, a client with the option 
+   `docker.privileged.enabled = "true"` must be available.
 
 * `dns-servers` - (optional) A comma separated list of DNS servers for the container 
    to use (e.g. "8.8.8.8,8.8.4.4"). *Docker API v1.10 and above only*
@@ -139,6 +141,11 @@ The `docker` driver has the following configuration options:
 * `docker.cleanup.image` Defaults to `true`. Changing this to `false` will
   prevent Nomad from removing images from stopped tasks.
 
+* `docker.privileged.enabled` Defaults to `false`. Changing this to `true` will
+   allow containers to use "privileged" mode, which gives the containers full access
+   to the host
+
+
 Note: When testing or using the `-dev` flag you can use `DOCKER_HOST`,
 `DOCKER_TLS_VERIFY`, and `DOCKER_CERT_PATH` to customize Nomad's behavior. In
 production Nomad will always read `docker.endpoint`.

From ad44e4eeb591bb626a26469690c6238548678abf Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 16:48:15 -0800
Subject: [PATCH 82/92] Added some comments to code

---
 client/task_runner.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/client/task_runner.go b/client/task_runner.go
index f4ca99c81..b868968df 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -187,7 +187,7 @@ func (r *TaskRunner) Run() {
 			r.task.Name, r.allocID, err)
 		shouldRestart, when := r.restartTracker.nextRestart()
 		if !shouldRestart {
-			r.logger.Printf("[INFO] client: Not restarting task: %v ", r.task.Name)
+			r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.allocID)
 			r.setStatus(structs.AllocClientStatusDead, fmt.Sprintf("task failed with: %v", err))
 			break
 		}
@@ -219,14 +219,14 @@ func (r *TaskRunner) Run() {
 	}
 
 	// Cleanup after ourselves
-	r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'",
-		r.task.Name, r.allocID)
-	r.setStatus(structs.AllocClientStatusDead,
-		"task completed")
+	r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", r.task.Name, r.allocID)
+	r.setStatus(structs.AllocClientStatusDead, "task completed")
 
 	r.DestroyState()
 }
 
+// This functions listens to messages from the driver and blocks until the
+// driver exits
 func (r *TaskRunner) monitorDriver(waitCh chan error, updateCh chan *structs.Task, destroyCh chan struct{}) error {
 	var err error
 OUTER:

From 93184d9a458cb09e5c97234605fa07e3d8e1d3c5 Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 17:13:25 -0800
Subject: [PATCH 83/92] Incremeneting the counter once we calculate next
 restart

---
 client/restarts.go      | 12 ++++++++++--
 client/restarts_test.go | 34 +++++++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/client/restarts.go b/client/restarts.go
index 4004f82f8..b06b3f179 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -37,8 +37,12 @@ type batchRestartTracker struct {
 	count int
 }
 
-func (b *batchRestartTracker) nextRestart() (bool, time.Duration) {
+func (b *batchRestartTracker) increment() {
 	b.count += 1
+}
+
+func (b *batchRestartTracker) nextRestart() (bool, time.Duration) {
+	defer b.increment()
 	if b.count < b.maxAttempts {
 		return true, b.delay
 	}
@@ -54,8 +58,12 @@ type serviceRestartTracker struct {
 	startTime time.Time
 }
 
-func (s *serviceRestartTracker) nextRestart() (bool, time.Duration) {
+func (s *serviceRestartTracker) increment() {
 	s.count += 1
+}
+
+func (s *serviceRestartTracker) nextRestart() (bool, time.Duration) {
+	defer s.increment()
 	windowEndTime := s.startTime.Add(s.interval)
 	now := time.Now()
 	// If the window of restart is over we wait until the delay duration
diff --git a/client/restarts_test.go b/client/restarts_test.go
index 952d33649..a200d3beb 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -7,16 +7,32 @@ import (
 )
 
 func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
-	rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: 2, Interval: 2 * time.Minute, Delay: 1 * time.Second})
-	rt.nextRestart()
-	rt.nextRestart()
-	rt.nextRestart()
-	rt.nextRestart()
-	rt.nextRestart()
-	actual, _ := rt.nextRestart()
-	if !actual {
-		t.Fatalf("Expect %v, Actual: %v", true, actual)
+	interval := 2 * time.Minute
+	delay := 1 * time.Second
+	attempts := 3
+	rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: attempts, Interval: interval, Delay: delay})
+
+	for i := 0; i < attempts; i++ {
+		actual, when := rt.nextRestart()
+		if !actual {
+			t.Fatalf("should restart returned %v, actual %v", actual, true)
+		}
+		if when != delay {
+			t.Fatalf("nextRestart() returned %v; want %v", when, delay)
+		}
 	}
+
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		actual, when := rt.nextRestart()
+		if !actual {
+			t.Fail()
+		}
+		if !(when > delay && when < interval) {
+			t.Fatalf("nextRestart() returned %v; want less than %v and more than %v", when, interval, delay)
+		}
+	}
+
 }
 
 func TestTaskRunner_BatchRestartCounter(t *testing.T) {

From 64c1a0b18cd49c9393b2bd3bf4451b628d10678d Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 17:30:41 -0800
Subject: [PATCH 84/92] Fixed some tests and refactored logic

---
 client/alloc_runner.go  |  6 ++----
 client/restarts.go      |  2 +-
 client/restarts_test.go | 25 +++++++++++++------------
 client/task_runner.go   | 12 +++---------
 4 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index 114d0c8d5..c434ef65f 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -102,11 +102,10 @@ func (r *AllocRunner) RestoreState() error {
 	r.ctx = snap.Context
 
 	// Restore the task runners
-	jobType := r.alloc.Job.Type
 	var mErr multierror.Error
 	for name := range r.taskStatus {
 		task := &structs.Task{Name: name}
-		restartTracker := newRestartTracker(jobType, r.RestartPolicy)
+		restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, restartTracker)
 		r.tasks[name] = tr
 		if err := tr.RestoreState(); err != nil {
@@ -309,8 +308,7 @@ func (r *AllocRunner) Run() {
 
 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
-		jobType := r.alloc.Job.Type
-		restartTracker := newRestartTracker(jobType, r.RestartPolicy)
+		restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, restartTracker)
 		r.tasks[task.Name] = tr
 		go tr.Run()
diff --git a/client/restarts.go b/client/restarts.go
index b06b3f179..4141405f8 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -42,8 +42,8 @@ func (b *batchRestartTracker) increment() {
 }
 
 func (b *batchRestartTracker) nextRestart() (bool, time.Duration) {
-	defer b.increment()
 	if b.count < b.maxAttempts {
+		b.increment()
 		return true, b.delay
 	}
 	return false, 0
diff --git a/client/restarts_test.go b/client/restarts_test.go
index a200d3beb..e27f10390 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -36,20 +36,21 @@ func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
 }
 
 func TestTaskRunner_BatchRestartCounter(t *testing.T) {
-	rt := newRestartTracker(structs.JobTypeBatch, &structs.RestartPolicy{Attempts: 2, Interval: 1 * time.Second, Delay: 1 * time.Second})
-	rt.nextRestart()
-	rt.nextRestart()
-	rt.nextRestart()
-	rt.nextRestart()
-	rt.nextRestart()
+	attempts := 2
+	interval := 1 * time.Second
+	delay := 1 * time.Second
+	rt := newRestartTracker(structs.JobTypeBatch, &structs.RestartPolicy{Attempts: attempts, Interval: interval, Delay: delay})
+	for i := 0; i < attempts; i++ {
+		shouldRestart, when := rt.nextRestart()
+		if !shouldRestart {
+			t.Fatalf("should restart returned %v, actual %v", shouldRestart, true)
+		}
+		if when != delay {
+			t.Fatalf("Delay should be %v, actual: %v", delay, when)
+		}
+	}
 	actual, _ := rt.nextRestart()
 	if actual {
 		t.Fatalf("Expect %v, Actual: %v", false, actual)
 	}
-
-	time.Sleep(1 * time.Second)
-	actual, _ = rt.nextRestart()
-	if actual {
-		t.Fatalf("Expect %v, Actual: %v", false, actual)
-	}
 }
diff --git a/client/task_runner.go b/client/task_runner.go
index b868968df..a59c72fb8 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -195,15 +195,9 @@ func (r *TaskRunner) Run() {
 		r.logger.Printf("[INFO] client: Restarting Task: %v", r.task.Name)
 		r.setStatus(structs.AllocClientStatusPending, "Task Restarting")
 		r.logger.Printf("[DEBUG] client: Sleeping for %v before restarting Task %v", when, r.task.Name)
-		ch := time.After(when)
-	L:
-		for {
-			select {
-			case <-ch:
-				break L
-			case <-r.destroyCh:
-				break L
-			}
+		select {
+		case <-time.After(when):
+		case <-r.destroyCh:
 		}
 		r.destroyLock.Lock()
 		if r.destroy {

From f695f687f8ad21f2c78b8119c5ae2d63809e84fd Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 17:33:03 -0800
Subject: [PATCH 85/92] Refactored test

---
 client/restarts_test.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/client/restarts_test.go b/client/restarts_test.go
index e27f10390..9d5b59bb4 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -39,7 +39,12 @@ func TestTaskRunner_BatchRestartCounter(t *testing.T) {
 	attempts := 2
 	interval := 1 * time.Second
 	delay := 1 * time.Second
-	rt := newRestartTracker(structs.JobTypeBatch, &structs.RestartPolicy{Attempts: attempts, Interval: interval, Delay: delay})
+	rt := newRestartTracker(structs.JobTypeBatch,
+		&structs.RestartPolicy{Attempts: attempts,
+			Interval: interval,
+			Delay:    delay,
+		},
+	)
 	for i := 0; i < attempts; i++ {
 		shouldRestart, when := rt.nextRestart()
 		if !shouldRestart {

From e2f1599604977774b9b619201a4ea6e0d8611397 Mon Sep 17 00:00:00 2001
From: Shiem Edelbrock <shiem.edelbrock@me.com>
Date: Thu, 5 Nov 2015 17:57:51 -0800
Subject: [PATCH 86/92] Fix build

returned wrong value, and forot a ":" :/
---
 client/driver/docker.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/client/driver/docker.go b/client/driver/docker.go
index bebe74d30..e9d0cb32e 100644
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -74,7 +74,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
 		return false, nil
 	}
 
-	privileged, err = strconv.ParseBool(d.config.ReadDefault("docker.privileged.enabled", "false"))
+	privileged, err := strconv.ParseBool(d.config.ReadDefault("docker.privileged.enabled", "false"))
 	if err != nil {
 		return false, fmt.Errorf("Unable to parse docker.privileged.enabled: %s", err)
 	}
@@ -180,7 +180,7 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (do
 	if v, ok := task.Config["privileged"]; ok {
 		taskPrivileged, err := strconv.ParseBool(v)
 		if err != nil {
-			return hostConfig, fmt.Errorf("Unable to parse boolean value from task config option 'privileged': %s", err)
+			return c, fmt.Errorf("Unable to parse boolean value from task config option 'privileged': %s", err)
 		}
 		hostConfig.Privileged = taskPrivileged
 	}

From fb8f922f2e65417d5ee1f216c3b04610156449ab Mon Sep 17 00:00:00 2001
From: Diptanu Choudhury <diptanuc@gmail.com>
Date: Thu, 5 Nov 2015 18:31:39 -0800
Subject: [PATCH 87/92] Passing in the AllocID to exec context so that it can
 be used in the drivers

---
 client/alloc_runner.go       | 2 +-
 client/driver/driver.go      | 7 +++++--
 client/driver/driver_test.go | 2 +-
 client/task_runner_test.go   | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index c434ef65f..1504900c1 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -295,7 +295,7 @@ func (r *AllocRunner) Run() {
 			r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
 			return
 		}
-		r.ctx = driver.NewExecContext(allocDir)
+		r.ctx = driver.NewExecContext(allocDir, r.alloc.ID)
 	}
 
 	// Start the task runners
diff --git a/client/driver/driver.go b/client/driver/driver.go
index dd4fcf43c..e2739e2b8 100644
--- a/client/driver/driver.go
+++ b/client/driver/driver.go
@@ -100,11 +100,14 @@ type ExecContext struct {
 
 	// AllocDir contains information about the alloc directory structure.
 	AllocDir *allocdir.AllocDir
+
+	// Alloc ID
+	AllocID string
 }
 
 // NewExecContext is used to create a new execution context
-func NewExecContext(alloc *allocdir.AllocDir) *ExecContext {
-	return &ExecContext{AllocDir: alloc}
+func NewExecContext(alloc *allocdir.AllocDir, allocID string) *ExecContext {
+	return &ExecContext{AllocDir: alloc, AllocID: allocID}
 }
 
 // TaskEnvironmentVariables converts exec context and task configuration into a
diff --git a/client/driver/driver_test.go b/client/driver/driver_test.go
index a6f621455..106eeb96b 100644
--- a/client/driver/driver_test.go
+++ b/client/driver/driver_test.go
@@ -43,7 +43,7 @@ func testDriverContext(task string) *DriverContext {
 func testDriverExecContext(task *structs.Task, driverCtx *DriverContext) *ExecContext {
 	allocDir := allocdir.NewAllocDir(filepath.Join(driverCtx.config.AllocDir, structs.GenerateUUID()))
 	allocDir.Build([]*structs.Task{task})
-	ctx := NewExecContext(allocDir)
+	ctx := NewExecContext(allocDir, "dummyAllocId")
 	return ctx
 }
 
diff --git a/client/task_runner_test.go b/client/task_runner_test.go
index 7a7242e7b..17709316f 100644
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -51,7 +51,7 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) {
 	allocDir := allocdir.NewAllocDir(filepath.Join(conf.AllocDir, alloc.ID))
 	allocDir.Build([]*structs.Task{task})
 
-	ctx := driver.NewExecContext(allocDir)
+	ctx := driver.NewExecContext(allocDir, alloc.ID)
 	rp := structs.NewRestartPolicy(structs.JobTypeService)
 	restartTracker := newRestartTracker(structs.JobTypeService, rp)
 	tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, restartTracker)

From 10e19064d947c6cc2345821ec72ebbec6e989a22 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 5 Nov 2015 18:47:16 -0800
Subject: [PATCH 88/92] Make periodic fingerprinting log the error

---
 client/client.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/client/client.go b/client/client.go
index 2d466a784..029ac3954 100644
--- a/client/client.go
+++ b/client/client.go
@@ -455,16 +455,14 @@ func (c *Client) fingerprint() error {
 	return nil
 }
 
-// fingerprintPeriodic runs a fingerprinter at the specified duration. If the
-// fingerprinter returns an error, the function exits.
+// fingerprintPeriodic runs a fingerprinter at the specified duration.
 func (c *Client) fingerprintPeriodic(name string, f fingerprint.Fingerprint, d time.Duration) {
 	c.logger.Printf("[DEBUG] client: periodically fingerprinting %v at duration %v", name, d)
 	for {
 		select {
 		case <-time.After(d):
 			if _, err := f.Fingerprint(c.config, c.config.Node); err != nil {
-				c.logger.Printf("[DEBUG] client: disabling periodic fingerprinting for %v: %v", name, err)
-				return
+				c.logger.Printf("[DEBUG] client: periodic fingerprinting for %v failed: %v", name, err)
 			}
 		case <-c.shutdownCh:
 			return

From 325de0ec350c591f95a53bf8147f80ed8430f4cb Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 6 Nov 2015 10:38:54 -0800
Subject: [PATCH 89/92] Docker privileged checks if host enabled it

---
 client/driver/docker.go                    | 11 ++++++++++-
 website/source/docs/drivers/docker.html.md |  6 +++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/client/driver/docker.go b/client/driver/docker.go
index 77f129fc6..a2d614a89 100644
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -181,11 +181,20 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (do
 	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])
 
 	//  set privileged mode
+	hostPrivileged, err := strconv.ParseBool(d.config.ReadDefault("docker.privileged.enabled", "false"))
+	if err != nil {
+		return c, fmt.Errorf("Unable to parse docker.privileged.enabled: %s", err)
+	}
+
 	if v, ok := task.Config["privileged"]; ok {
 		taskPrivileged, err := strconv.ParseBool(v)
 		if err != nil {
-			return c, fmt.Errorf("Unable to parse boolean value from task config option 'privileged': %s", err)
+			return c, fmt.Errorf("Unable to parse boolean value from task config option 'privileged': %v", err)
 		}
+		if taskPrivileged && !hostPrivileged {
+			return c, fmt.Errorf(`Unable to set privileged flag since "docker.privileged.enabled" is false`)
+		}
+
 		hostConfig.Privileged = taskPrivileged
 	}
 
diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md
index c24a171af..dfa4c85a5 100644
--- a/website/source/docs/drivers/docker.html.md
+++ b/website/source/docs/drivers/docker.html.md
@@ -34,8 +34,8 @@ The `docker` driver supports the following configuration in the job specificatio
 
 * `privileged` - (optional) Privileged mode gives the container full access to 
    the host. Valid options are `"true"` and `"false"` (defaults to `"false"`).
-   In order to use privileged mode, a client with the option 
-   `docker.privileged.enabled = "true"` must be available.
+   Tasks with `privileged` set can only run on Nomad Agents with
+   `docker.privileged.enabled = "true"`.
 
 * `dns-servers` - (optional) A comma separated list of DNS servers for the container 
    to use (e.g. "8.8.8.8,8.8.4.4"). *Docker API v1.10 and above only*
@@ -143,7 +143,7 @@ The `docker` driver has the following configuration options:
 
 * `docker.privileged.enabled` Defaults to `false`. Changing this to `true` will
    allow containers to use "privileged" mode, which gives the containers full access
-   to the host
+   to the host.
 
 
 Note: When testing or using the `-dev` flag you can use `DOCKER_HOST`,

From 5ec6aeaa39b9178634470bf4c52422396479c05e Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 6 Nov 2015 10:41:42 -0800
Subject: [PATCH 90/92] Merge

---
 client/driver/args/args.go           |  4 +-
 client/driver/executor/exec_basic.go |  9 +---
 client/driver/executor/exec_linux.go |  9 +---
 client/driver/qemu.go                | 79 ++++++++--------------------
 client/driver/qemu_test.go           | 19 +------
 5 files changed, 29 insertions(+), 91 deletions(-)

diff --git a/client/driver/args/args.go b/client/driver/args/args.go
index b447a7c99..51793bd8b 100644
--- a/client/driver/args/args.go
+++ b/client/driver/args/args.go
@@ -27,7 +27,7 @@ func ParseAndReplace(args string, env map[string]string) ([]string, error) {
 
 	replaced := make([]string, len(parsed))
 	for i, arg := range parsed {
-		replaced[i] = replaceEnv(arg, env)
+		replaced[i] = ReplaceEnv(arg, env)
 	}
 
 	return replaced, nil
@@ -36,7 +36,7 @@ func ParseAndReplace(args string, env map[string]string) ([]string, error) {
 // replaceEnv takes an arg and replaces all occurences of environment variables.
 // If the variable is found in the passed map it is replaced, otherwise the
 // original string is returned.
-func replaceEnv(arg string, env map[string]string) string {
+func ReplaceEnv(arg string, env map[string]string) string {
 	return envRe.ReplaceAllStringFunc(arg, func(arg string) string {
 		stripped := arg[1:]
 		if stripped[0] == '{' {
diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go
index a554acfc1..ff6258a7e 100644
--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -59,14 +59,7 @@ func (e *BasicExecutor) Start() error {
 		return err
 	}
 
-	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
-	if err != nil {
-		return err
-	} else if len(parsedPath) != 1 {
-		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
-	}
-
-	e.cmd.Path = parsedPath[0]
+	e.cmd.Path = args.ReplaceEnv(e.cmd.Path, envVars.Map())
 	combined := strings.Join(e.cmd.Args, " ")
 	parsed, err := args.ParseAndReplace(combined, envVars.Map())
 	if err != nil {
diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go
index cc428ecd7..a7bbdd03c 100644
--- a/client/driver/executor/exec_linux.go
+++ b/client/driver/executor/exec_linux.go
@@ -165,14 +165,7 @@ func (e *LinuxExecutor) Start() error {
 		return err
 	}
 
-	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
-	if err != nil {
-		return err
-	} else if len(parsedPath) != 1 {
-		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
-	}
-	e.cmd.Path = parsedPath[0]
-
+	e.cmd.Path = args.ReplaceEnv(e.cmd.Path, envVars.Map())
 	combined := strings.Join(e.cmd.Args, " ")
 	parsed, err := args.ParseAndReplace(combined, envVars.Map())
 	if err != nil {
diff --git a/client/driver/qemu.go b/client/driver/qemu.go
index b0d0afc62..79193a217 100644
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@@ -1,11 +1,7 @@
 package driver
 
 import (
-	"bytes"
-	"encoding/json"
 	"fmt"
-	"log"
-	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
@@ -16,6 +12,7 @@ import (
 
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -35,19 +32,11 @@ type QemuDriver struct {
 
 // qemuHandle is returned from Start/Open as a handle to the PID
 type qemuHandle struct {
-	proc   *os.Process
-	vmID   string
+	cmd    executor.Executor
 	waitCh chan error
 	doneCh chan struct{}
 }
 
-// qemuPID is a struct to map the pid running the process to the vm image on
-// disk
-type qemuPID struct {
-	Pid  int
-	VmID string
-}
-
 // NewQemuDriver is used to create a new exec driver
 func NewQemuDriver(ctx *DriverContext) Driver {
 	return &QemuDriver{DriverContext: *ctx}
@@ -184,25 +173,25 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		)
 	}
 
-	// Start Qemu
-	var outBuf, errBuf bytes.Buffer
-	cmd := exec.Command(args[0], args[1:]...)
-	cmd.Stdout = &outBuf
-	cmd.Stderr = &errBuf
+	// Setup the command
+	cmd := executor.Command(args[0], args[1:]...)
+	if err := cmd.Limit(task.Resources); err != nil {
+		return nil, fmt.Errorf("failed to constrain resources: %s", err)
+	}
+
+	if err := cmd.ConfigureTaskDir(d.taskName, ctx.AllocDir); err != nil {
+		return nil, fmt.Errorf("failed to configure task directory: %v", err)
+	}
 
 	d.logger.Printf("[DEBUG] Starting QemuVM command: %q", strings.Join(args, " "))
 	if err := cmd.Start(); err != nil {
-		return nil, fmt.Errorf(
-			"Error running QEMU: %s\n\nOutput: %s\n\nError: %s",
-			err, outBuf.String(), errBuf.String())
+		return nil, fmt.Errorf("failed to start command: %v", err)
 	}
-
 	d.logger.Printf("[INFO] Started new QemuVM: %s", vmID)
 
 	// Create and Return Handle
 	h := &qemuHandle{
-		proc:   cmd.Process,
-		vmID:   vmPath,
+		cmd:    cmd,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
 	}
@@ -212,42 +201,25 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 }
 
 func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
-	// Parse the handle
-	pidBytes := []byte(strings.TrimPrefix(handleID, "QEMU:"))
-	qpid := &qemuPID{}
-	if err := json.Unmarshal(pidBytes, qpid); err != nil {
-		return nil, fmt.Errorf("failed to parse Qemu handle '%s': %v", handleID, err)
-	}
-
 	// Find the process
-	proc, err := os.FindProcess(qpid.Pid)
-	if proc == nil || err != nil {
-		return nil, fmt.Errorf("failed to find Qemu PID %d: %v", qpid.Pid, err)
+	cmd, err := executor.OpenId(handleID)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
 	}
 
 	// Return a driver handle
-	h := &qemuHandle{
-		proc:   proc,
-		vmID:   qpid.VmID,
+	h := &execHandle{
+		cmd:    cmd,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
 	}
-
 	go h.run()
 	return h, nil
 }
 
 func (h *qemuHandle) ID() string {
-	// Return a handle to the PID
-	pid := &qemuPID{
-		Pid:  h.proc.Pid,
-		VmID: h.vmID,
-	}
-	data, err := json.Marshal(pid)
-	if err != nil {
-		log.Printf("[ERR] failed to marshal Qemu PID to JSON: %s", err)
-	}
-	return fmt.Sprintf("QEMU:%s", string(data))
+	id, _ := h.cmd.ID()
+	return id
 }
 
 func (h *qemuHandle) WaitCh() chan error {
@@ -259,28 +231,23 @@ func (h *qemuHandle) Update(task *structs.Task) error {
 	return nil
 }
 
-// Kill is used to terminate the task. We send an Interrupt
-// and then provide a 5 second grace period before doing a Kill.
-//
 // TODO: allow a 'shutdown_command' that can be executed over a ssh connection
 // to the VM
 func (h *qemuHandle) Kill() error {
-	h.proc.Signal(os.Interrupt)
+	h.cmd.Shutdown()
 	select {
 	case <-h.doneCh:
 		return nil
 	case <-time.After(5 * time.Second):
-		return h.proc.Kill()
+		return h.cmd.ForceStop()
 	}
 }
 
 func (h *qemuHandle) run() {
-	ps, err := h.proc.Wait()
+	err := h.cmd.Wait()
 	close(h.doneCh)
 	if err != nil {
 		h.waitCh <- err
-	} else if !ps.Success() {
-		h.waitCh <- fmt.Errorf("task exited with error")
 	}
 	close(h.waitCh)
 }
diff --git a/client/driver/qemu_test.go b/client/driver/qemu_test.go
index dffdc7bf0..0ab60f86d 100644
--- a/client/driver/qemu_test.go
+++ b/client/driver/qemu_test.go
@@ -2,7 +2,6 @@ package driver
 
 import (
 	"fmt"
-	"os"
 	"testing"
 
 	"github.com/hashicorp/nomad/client/config"
@@ -11,21 +10,6 @@ import (
 	ctestutils "github.com/hashicorp/nomad/client/testutil"
 )
 
-func TestQemuDriver_Handle(t *testing.T) {
-	h := &qemuHandle{
-		proc:   &os.Process{Pid: 123},
-		vmID:   "vmid",
-		doneCh: make(chan struct{}),
-		waitCh: make(chan error, 1),
-	}
-
-	actual := h.ID()
-	expected := `QEMU:{"Pid":123,"VmID":"vmid"}`
-	if actual != expected {
-		t.Errorf("Expected `%s`, found `%s`", expected, actual)
-	}
-}
-
 // The fingerprinter test should always pass, even if QEMU is not installed.
 func TestQemuDriver_Fingerprint(t *testing.T) {
 	ctestutils.QemuCompatible(t)
@@ -48,7 +32,7 @@ func TestQemuDriver_Fingerprint(t *testing.T) {
 	}
 }
 
-func TestQemuDriver_Start(t *testing.T) {
+func TestQemuDriver_StartOpen_Wait(t *testing.T) {
 	ctestutils.QemuCompatible(t)
 	// TODO: use test server to load from a fixture
 	task := &structs.Task{
@@ -60,6 +44,7 @@ func TestQemuDriver_Start(t *testing.T) {
 			"guest_ports":     "22,8080",
 		},
 		Resources: &structs.Resources{
+			CPU:      500,
 			MemoryMB: 512,
 			Networks: []*structs.NetworkResource{
 				&structs.NetworkResource{

From e49f3383b692480090e8c4cd2e409685570f28e9 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 6 Nov 2015 10:42:49 -0800
Subject: [PATCH 91/92] merge

---
 client/driver/executor/exec_basic.go |   5 ++
 client/driver/raw_exec.go            | 105 ++++++---------------------
 client/driver/raw_exec_test.go       |  27 ++-----
 3 files changed, 35 insertions(+), 102 deletions(-)

diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go
index a554acfc1..fd0f09049 100644
--- a/client/driver/executor/exec_basic.go
+++ b/client/driver/executor/exec_basic.go
@@ -7,6 +7,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"runtime"
 	"strings"
 
 	"github.com/hashicorp/nomad/client/allocdir"
@@ -131,6 +132,10 @@ func (e *BasicExecutor) Shutdown() error {
 		return fmt.Errorf("Failed to find user processes %v: %v", e.spawn.UserPid, err)
 	}
 
+	if runtime.GOOS == "windows" {
+		return proc.Kill()
+	}
+
 	return proc.Signal(os.Interrupt)
 }
 
diff --git a/client/driver/raw_exec.go b/client/driver/raw_exec.go
index c53e73a2b..12e99b7f4 100644
--- a/client/driver/raw_exec.go
+++ b/client/driver/raw_exec.go
@@ -2,17 +2,13 @@ package driver
 
 import (
 	"fmt"
-	"os"
-	"os/exec"
 	"path/filepath"
-	"runtime"
 	"strconv"
-	"strings"
 	"time"
 
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	"github.com/hashicorp/nomad/client/fingerprint"
 	"github.com/hashicorp/nomad/client/getter"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -21,10 +17,6 @@ import (
 const (
 	// The option that enables this driver in the Config.Options map.
 	rawExecConfigOption = "driver.raw_exec.enable"
-
-	// Null files to use as stdin.
-	unixNull    = "/dev/null"
-	windowsNull = "nul"
 )
 
 // The RawExecDriver is a privileged version of the exec driver. It provides no
@@ -37,7 +29,7 @@ type RawExecDriver struct {
 
 // rawExecHandle is returned from Start/Open as a handle to the PID
 type rawExecHandle struct {
-	proc   *os.Process
+	cmd    executor.Executor
 	waitCh chan error
 	doneCh chan struct{}
 }
@@ -70,7 +62,6 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
 	if !ok {
 		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
 	}
-	taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
 
 	// Get the command to be ran
 	command, ok := task.Config["command"]
@@ -96,65 +87,33 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
 	// Get the environment variables.
 	envVars := TaskEnvironmentVariables(ctx, task)
 
-	// expand NOMAD_TASK_DIR
-	parsedPath, err := args.ParseAndReplace(command, envVars.Map())
-	if err != nil {
-		return nil, fmt.Errorf("failure to parse arguments in command path: %v", command)
-	} else if len(parsedPath) != 1 {
-		return nil, fmt.Errorf("couldn't properly parse command path: %v", command)
-	}
-
-	cm := parsedPath[0]
-
 	// Look for arguments
-	var cmdArgs []string
+	var args []string
 	if argRaw, ok := task.Config["args"]; ok {
-		parsed, err := args.ParseAndReplace(argRaw, envVars.Map())
-		if err != nil {
-			return nil, err
-		}
-		cmdArgs = append(cmdArgs, parsed...)
+		args = append(args, argRaw)
 	}
 
 	// Setup the command
-	cmd := exec.Command(cm, cmdArgs...)
-	cmd.Dir = taskDir
-	cmd.Env = envVars.List()
-
-	// Capture the stdout/stderr and redirect stdin to /dev/null
-	stdoutFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stdout", taskName))
-	stderrFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stderr", taskName))
-	stdinFilename := unixNull
-	if runtime.GOOS == "windows" {
-		stdinFilename = windowsNull
+	cmd := executor.NewBasicExecutor()
+	executor.SetCommand(cmd, command, args)
+	if err := cmd.Limit(task.Resources); err != nil {
+		return nil, fmt.Errorf("failed to constrain resources: %s", err)
 	}
 
-	stdo, err := os.OpenFile(stdoutFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return nil, fmt.Errorf("Error opening file to redirect stdout: %v", err)
-	}
+	// Populate environment variables
+	cmd.Command().Env = envVars.List()
 
-	stde, err := os.OpenFile(stderrFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
-	if err != nil {
-		return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	if err := cmd.ConfigureTaskDir(d.taskName, ctx.AllocDir); err != nil {
+		return nil, fmt.Errorf("failed to configure task directory: %v", err)
 	}
 
-	stdi, err := os.OpenFile(stdinFilename, os.O_CREATE|os.O_RDONLY, 0666)
-	if err != nil {
-		return nil, fmt.Errorf("Error opening file to redirect stdin: %v", err)
-	}
-
-	cmd.Stdout = stdo
-	cmd.Stderr = stde
-	cmd.Stdin = stdi
-
 	if err := cmd.Start(); err != nil {
 		return nil, fmt.Errorf("failed to start command: %v", err)
 	}
 
 	// Return a driver handle
-	h := &rawExecHandle{
-		proc:   cmd.Process,
+	h := &execHandle{
+		cmd:    cmd,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
 	}
@@ -163,22 +122,15 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
 }
 
 func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
-	// Split the handle
-	pidStr := strings.TrimPrefix(handleID, "PID:")
-	pid, err := strconv.Atoi(pidStr)
-	if err != nil {
-		return nil, fmt.Errorf("failed to parse handle '%s': %v", handleID, err)
-	}
-
 	// Find the process
-	proc, err := os.FindProcess(pid)
-	if proc == nil || err != nil {
-		return nil, fmt.Errorf("failed to find PID %d: %v", pid, err)
+	cmd := executor.NewBasicExecutor()
+	if err := cmd.Open(handleID); err != nil {
+		return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
 	}
 
 	// Return a driver handle
-	h := &rawExecHandle{
-		proc:   proc,
+	h := &execHandle{
+		cmd:    cmd,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
 	}
@@ -187,8 +139,8 @@ func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, e
 }
 
 func (h *rawExecHandle) ID() string {
-	// Return a handle to the PID
-	return fmt.Sprintf("PID:%d", h.proc.Pid)
+	id, _ := h.cmd.ID()
+	return id
 }
 
 func (h *rawExecHandle) WaitCh() chan error {
@@ -200,30 +152,21 @@ func (h *rawExecHandle) Update(task *structs.Task) error {
 	return nil
 }
 
-// Kill is used to terminate the task. We send an Interrupt
-// and then provide a 5 second grace period before doing a Kill on supported
-// OS's, otherwise we kill immediately.
 func (h *rawExecHandle) Kill() error {
-	if runtime.GOOS == "windows" {
-		return h.proc.Kill()
-	}
-
-	h.proc.Signal(os.Interrupt)
+	h.cmd.Shutdown()
 	select {
 	case <-h.doneCh:
 		return nil
 	case <-time.After(5 * time.Second):
-		return h.proc.Kill()
+		return h.cmd.ForceStop()
 	}
 }
 
 func (h *rawExecHandle) run() {
-	ps, err := h.proc.Wait()
+	err := h.cmd.Wait()
 	close(h.doneCh)
 	if err != nil {
 		h.waitCh <- err
-	} else if !ps.Success() {
-		h.waitCh <- fmt.Errorf("task exited with error")
 	}
 	close(h.waitCh)
 }
diff --git a/client/driver/raw_exec_test.go b/client/driver/raw_exec_test.go
index 1dda991c8..053f29337 100644
--- a/client/driver/raw_exec_test.go
+++ b/client/driver/raw_exec_test.go
@@ -55,6 +55,7 @@ func TestRawExecDriver_StartOpen_Wait(t *testing.T) {
 			"command": "/bin/sleep",
 			"args":    "1",
 		},
+		Resources: basicResources,
 	}
 	driverCtx := testDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
@@ -84,13 +85,6 @@ func TestRawExecDriver_StartOpen_Wait(t *testing.T) {
 	case <-time.After(2 * time.Second):
 		t.Fatalf("timeout")
 	}
-
-	// Check they are both tracking the same PID.
-	pid1 := handle.(*rawExecHandle).proc.Pid
-	pid2 := handle2.(*rawExecHandle).proc.Pid
-	if pid1 != pid2 {
-		t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
-	}
 }
 
 func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
@@ -111,6 +105,7 @@ func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
 			"command":         filepath.Join("$NOMAD_TASK_DIR", file),
 			"checksum":        checksum,
 		},
+		Resources: basicResources,
 	}
 	driverCtx := testDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
@@ -140,13 +135,6 @@ func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timeout")
 	}
-
-	// Check they are both tracking the same PID.
-	pid1 := handle.(*rawExecHandle).proc.Pid
-	pid2 := handle2.(*rawExecHandle).proc.Pid
-	if pid1 != pid2 {
-		t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
-	}
 }
 
 func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) {
@@ -165,6 +153,7 @@ func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) {
 			"command":         "/bin/bash",
 			"args":            fmt.Sprintf("-c '/bin/sleep 1 && %s'", filepath.Join("$NOMAD_TASK_DIR", file)),
 		},
+		Resources: basicResources,
 	}
 	driverCtx := testDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
@@ -194,13 +183,6 @@ func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) {
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timeout")
 	}
-
-	// Check they are both tracking the same PID.
-	pid1 := handle.(*rawExecHandle).proc.Pid
-	pid2 := handle2.(*rawExecHandle).proc.Pid
-	if pid1 != pid2 {
-		t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
-	}
 }
 
 func TestRawExecDriver_Start_Wait(t *testing.T) {
@@ -210,6 +192,7 @@ func TestRawExecDriver_Start_Wait(t *testing.T) {
 			"command": "/bin/sleep",
 			"args":    "1",
 		},
+		Resources: basicResources,
 	}
 
 	driverCtx := testDriverContext(task.Name)
@@ -251,6 +234,7 @@ func TestRawExecDriver_Start_Wait_AllocDir(t *testing.T) {
 			"command": "/bin/bash",
 			"args":    fmt.Sprintf(`-c "sleep 1; echo -n %s > $%s/%s"`, string(exp), environment.AllocDir, file),
 		},
+		Resources: basicResources,
 	}
 
 	driverCtx := testDriverContext(task.Name)
@@ -295,6 +279,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) {
 			"command": "/bin/sleep",
 			"args":    "1",
 		},
+		Resources: basicResources,
 	}
 
 	driverCtx := testDriverContext(task.Name)

From b97cc505a1839cd1cb3dd89b0ca7c70f9fac386a Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 6 Nov 2015 12:38:25 -0800
Subject: [PATCH 92/92] System jobs use Service restart policy

---
 nomad/structs/structs.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index bfec26fce..6e57fc58c 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -924,7 +924,7 @@ func (r *RestartPolicy) Validate() error {
 
 func NewRestartPolicy(jobType string) *RestartPolicy {
 	switch jobType {
-	case JobTypeService:
+	case JobTypeService, JobTypeSystem:
 		rp := defaultServiceJobRestartPolicy
 		return &rp
 	case JobTypeBatch: