Merge branch 'master' into b-vagrantfile

2026-01-09 03:45:41 +03:00 · 2015-10-27 12:53:25 -07:00
parent f6a16f5a5c 0353c8eeee
commit a1366e9f3c
112 changed files with 5563 additions and 686 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -0,0 +1,21 @@
+sudo: false
+
+language: go
+
+go:
+  - 1.5.1
+  - tip
+
+matrix:
+  allow_failures:
+    - go: tip
+
+branches:
+  only:
+    - master
+
+install:
+  - make bootstrap
+
+script:
+  - make test
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 IMPROVEMENTS:

  * Nomad client cleans allocations on exit when in dev mode
+  * drivers: Use go-getter for artifact retrieval, add artifact support to Exec, Raw Exec drivers [GH-288]

 ## 0.1.1 (October 5, 2015)

--- a/api/agent.go
+++ b/api/agent.go
@@ -178,3 +178,22 @@ type AgentMember struct {
 	DelegateMax uint8
 	DelegateCur uint8
 }
+
+// AgentMembersNameSort implements sort.Interface for []*AgentMembersNameSort
+// based on the Name, DC and Region
+type AgentMembersNameSort []*AgentMember
+
+func (a AgentMembersNameSort) Len() int      { return len(a) }
+func (a AgentMembersNameSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a AgentMembersNameSort) Less(i, j int) bool {
+	if a[i].Tags["region"] != a[j].Tags["region"] {
+		return a[i].Tags["region"] < a[j].Tags["region"]
+	}
+
+	if a[i].Tags["dc"] != a[j].Tags["dc"] {
+		return a[i].Tags["dc"] < a[j].Tags["dc"]
+	}
+
+	return a[i].Name < a[j].Name
+
+}
--- a/api/agent_test.go
+++ b/api/agent_test.go
@@ -1,6 +1,8 @@
 package api

 import (
+	"reflect"
+	"sort"
 	"testing"

 	"github.com/hashicorp/nomad/testutil"
@@ -154,3 +156,117 @@ func TestAgent_SetServers(t *testing.T) {
 		t.Fatalf("bad server list: %v", out)
 	}
 }
+
+func (a *AgentMember) String() string {
+	return "{Name: " + a.Name + " Region: " + a.Tags["region"] + " DC: " + a.Tags["dc"] + "}"
+}
+
+func TestAgents_Sort(t *testing.T) {
+	var sortTests = []struct {
+		in  []*AgentMember
+		out []*AgentMember
+	}{
+		{
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-2.vac.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
+				&AgentMember{Name: "nomad-1.global",
+					Tags: map[string]string{"region": "global", "dc": "dc1"}},
+				&AgentMember{Name: "nomad-1.vac.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
+			},
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-1.global",
+					Tags: map[string]string{"region": "global", "dc": "dc1"}},
+				&AgentMember{Name: "nomad-1.vac.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
+				&AgentMember{Name: "nomad-2.vac.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
+			},
+		},
+		{
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-02.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+				&AgentMember{Name: "nomad-02.pal.us-west",
+					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
+				&AgentMember{Name: "nomad-01.pal.us-west",
+					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
+				&AgentMember{Name: "nomad-01.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+			},
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-01.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+				&AgentMember{Name: "nomad-02.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+				&AgentMember{Name: "nomad-01.pal.us-west",
+					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
+				&AgentMember{Name: "nomad-02.pal.us-west",
+					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
+			},
+		},
+		{
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-02.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+				&AgentMember{Name: "nomad-02.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-01.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+				&AgentMember{Name: "nomad-01.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+			},
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-01.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-02.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-01.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+				&AgentMember{Name: "nomad-02.tam.us-east",
+					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
+			},
+		},
+		{
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-02.ber.europe",
+					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
+				&AgentMember{Name: "nomad-02.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-01.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-01.ber.europe",
+					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
+			},
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-01.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-02.ams.europe",
+					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
+				&AgentMember{Name: "nomad-01.ber.europe",
+					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
+				&AgentMember{Name: "nomad-02.ber.europe",
+					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
+			},
+		},
+		{
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-1.global"},
+				&AgentMember{Name: "nomad-3.global"},
+				&AgentMember{Name: "nomad-2.global"},
+			},
+			[]*AgentMember{
+				&AgentMember{Name: "nomad-1.global"},
+				&AgentMember{Name: "nomad-2.global"},
+				&AgentMember{Name: "nomad-3.global"},
+			},
+		},
+	}
+	for _, tt := range sortTests {
+		sort.Sort(AgentMembersNameSort(tt.in))
+		if !reflect.DeepEqual(tt.in, tt.out) {
+			t.Errorf("\necpected: %s\nget     : %s", tt.in, tt.out)
+		}
+	}
+}
--- a/api/api.go
+++ b/api/api.go
@@ -10,6 +10,8 @@ import (
 	"os"
 	"strconv"
 	"time"
+
+	"github.com/hashicorp/go-cleanhttp"
 )

 // QueryOptions are used to parameterize a query
@@ -86,7 +88,7 @@ type Config struct {
 func DefaultConfig() *Config {
 	config := &Config{
 		Address:    "http://127.0.0.1:4646",
-		HttpClient: http.DefaultClient,
+		HttpClient: cleanhttp.DefaultClient(),
 	}
 	if addr := os.Getenv("NOMAD_ADDR"); addr != "" {
 		config.Address = addr
--- a/api/jobs.go
+++ b/api/jobs.go
@@ -2,6 +2,7 @@ package api

 import (
 	"sort"
+	"time"
 )

 const (
@@ -100,6 +101,12 @@ func (j *Jobs) ForceEvaluate(jobID string, q *WriteOptions) (string, *WriteMeta,
 	return resp.EvalID, wm, nil
 }

+//UpdateStrategy is for serializing update strategy for a job.
+type UpdateStrategy struct {
+	Stagger     time.Duration
+	MaxParallel int
+}
+
 // Job is used to serialize a job.
 type Job struct {
 	Region            string
@@ -111,6 +118,7 @@ type Job struct {
 	Datacenters       []string
 	Constraints       []*Constraint
 	TaskGroups        []*TaskGroup
+	Update            *UpdateStrategy
 	Meta              map[string]string
 	Status            string
 	StatusDescription string
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -282,7 +282,11 @@ func (r *AllocRunner) Run() {
 	// Create the execution context
 	if r.ctx == nil {
 		allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID))
-		allocDir.Build(tg.Tasks)
+		if err := allocDir.Build(tg.Tasks); err != nil {
+			r.logger.Printf("[WARN] client: failed to build task directories: %v", err)
+			r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
+			return
+		}
 		r.ctx = driver.NewExecContext(allocDir)
 	}

--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@@ -4,12 +4,15 @@ import (
 	"encoding/json"
 	"fmt"
 	"log"
+	"path/filepath"
 	"strconv"
 	"strings"

 	docker "github.com/fsouza/go-dockerclient"

+	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

@@ -37,11 +40,36 @@ func NewDockerDriver(ctx *DriverContext) Driver {
 	return &DockerDriver{*ctx}
 }

+// dockerClient creates *docker.Client. In test / dev mode we can use ENV vars
+// to connect to the docker daemon. In production mode we will read
+// docker.endpoint from the config file.
+func (d *DockerDriver) dockerClient() (*docker.Client, error) {
+	// In dev mode, read DOCKER_* environment variables DOCKER_HOST,
+	// DOCKER_TLS_VERIFY, and DOCKER_CERT_PATH. This allows you to run tests and
+	// demo against boot2docker or a VM on OSX and Windows. This falls back on
+	// the default unix socket on linux if tests are run on linux.
+	//
+	// Also note that we need to turn on DevMode in the test configs.
+	if d.config.DevMode {
+		return docker.NewClientFromEnv()
+	}
+
+	// In prod mode we'll read the docker.endpoint configuration and fall back
+	// on the host-specific default. We do not read from the environment.
+	defaultEndpoint, err := docker.DefaultDockerHost()
+	if err != nil {
+		return nil, fmt.Errorf("Unable to determine default docker endpoint: %s", err)
+	}
+	dockerEndpoint := d.config.ReadDefault("docker.endpoint", defaultEndpoint)
+
+	return docker.NewClient(dockerEndpoint)
+}
+
 func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
 	// Initialize docker API client
-	dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
-	client, err := docker.NewClient(dockerEndpoint)
+	client, err := d.dockerClient()
 	if err != nil {
+		d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon: %v", err)
 		return false, nil
 	}

@@ -56,6 +84,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool

 	env, err := client.Version()
 	if err != nil {
+		d.logger.Printf("[DEBUG] driver.docker: could not read version from daemon: %v", err)
 		// Check the "no such file" error if the unix file is missing
 		if strings.Contains(err.Error(), "no such file") {
 			return false, nil
@@ -65,18 +94,39 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
 		// is broken.
 		return false, err
 	}
-	node.Attributes["driver.docker"] = "true"
+	node.Attributes["driver.docker"] = "1"
 	node.Attributes["driver.docker.version"] = env.Get("Version")

 	return true, nil
 }

-// We have to call this when we create the container AND when we start it so
-// we'll make a function.
-func createHostConfig(task *structs.Task) *docker.HostConfig {
-	// hostConfig holds options for the docker container that are unique to this
-	// machine, such as resource limits and port mappings
-	return &docker.HostConfig{
+func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) {
+	shared := alloc.SharedDir
+	local, ok := alloc.TaskDirs[task.Name]
+	if !ok {
+		return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name)
+	}
+
+	return []string{
+		fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocName),
+		fmt.Sprintf("%s:%s", local, allocdir.TaskLocal),
+	}, nil
+}
+
+// createContainer initializes a struct needed to call docker.client.CreateContainer()
+func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (docker.CreateContainerOptions, error) {
+	var c docker.CreateContainerOptions
+	if task.Resources == nil {
+		d.logger.Printf("[ERR] driver.docker: task.Resources is empty")
+		return c, fmt.Errorf("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.")
+	}
+
+	binds, err := d.containerBinds(ctx.AllocDir, task)
+	if err != nil {
+		return c, err
+	}
+
+	hostConfig := &docker.HostConfig{
 		// Convert MB to bytes. This is an absolute value.
 		//
 		// This value represents the total amount of memory a process can use.
@@ -105,40 +155,38 @@ func createHostConfig(task *structs.Task) *docker.HostConfig {
 		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
 		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt
 		CPUShares: int64(task.Resources.CPU),
-	}
-}

-// createContainer initializes a struct needed to call docker.client.CreateContainer()
-func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) docker.CreateContainerOptions {
-	if task.Resources == nil {
-		panic("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.")
+		// Binds are used to mount a host volume into the container. We mount a
+		// local directory for storage and a shared alloc directory that can be
+		// used to share data between different tasks in the same task group.
+		Binds: binds,
 	}

-	hostConfig := createHostConfig(task)
-	logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
-	logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
+	d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
+	d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
+	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])

 	mode, ok := task.Config["network_mode"]
 	if !ok || mode == "" {
 		// docker default
-		logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge")
+		d.logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge")
 		mode = "bridge"
 	}

 	// Ignore the container mode for now
 	switch mode {
 	case "default", "bridge", "none", "host":
-		logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode)
+		d.logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode)
 	default:
-		logger.Printf("[WARN] invalid setting for network mode %s, defaulting to bridge mode on docker0", mode)
-		mode = "bridge"
+		d.logger.Printf("[ERR] driver.docker: invalid setting for network mode: %s", mode)
+		return c, fmt.Errorf("Invalid setting for network mode: %s", mode)
 	}
 	hostConfig.NetworkMode = mode

 	// Setup port mapping (equivalent to -p on docker CLI). Ports must already be
 	// exposed in the container.
 	if len(task.Resources.Networks) == 0 {
-		logger.Print("[WARN] driver.docker: No networks are available for port mapping")
+		d.logger.Print("[WARN] driver.docker: No networks are available for port mapping")
 	} else {
 		network := task.Resources.Networks[0]
 		dockerPorts := map[docker.Port][]docker.PortBinding{}
@@ -146,7 +194,7 @@ func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) d
 		for _, port := range network.ListStaticPorts() {
 			dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
 			dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
-			logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port)
+			d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port)
 		}

 		for label, port := range network.MapDynamicPorts() {
@@ -160,30 +208,47 @@ func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) d
 			if _, err := strconv.Atoi(label); err == nil {
 				dockerPorts[docker.Port(label+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
 				dockerPorts[docker.Port(label+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
-				logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label)
+				d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label)
 			} else {
 				dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
 				dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
-				logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label)
+				d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label)
 			}
 		}
 		hostConfig.PortBindings = dockerPorts
 	}

+	// Create environment variables.
+	env := TaskEnvironmentVariables(ctx, task)
+	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
+	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
+
 	config := &docker.Config{
-		Env:   TaskEnvironmentVariables(ctx, task).List(),
+		Env:   env.List(),
 		Image: task.Config["image"],
 	}

+	rawArgs, hasArgs := task.Config["args"]
+	parsedArgs, err := args.ParseAndReplace(rawArgs, env.Map())
+	if err != nil {
+		return c, err
+	}
+
 	// If the user specified a custom command to run, we'll inject it here.
 	if command, ok := task.Config["command"]; ok {
-		config.Cmd = strings.Split(command, " ")
+		cmd := []string{command}
+		if hasArgs {
+			cmd = append(cmd, parsedArgs...)
+		}
+		config.Cmd = cmd
+	} else if hasArgs {
+		d.logger.Println("[DEBUG] driver.docker: ignoring args because command not specified")
 	}

 	return docker.CreateContainerOptions{
 		Config:     config,
 		HostConfig: hostConfig,
-	}
+	}, nil
 }

 func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
@@ -212,10 +277,9 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
 	}

 	// Initialize docker API client
-	dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
-	client, err := docker.NewClient(dockerEndpoint)
+	client, err := d.dockerClient()
 	if err != nil {
-		return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err)
+		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
 	}

 	repo, tag := docker.ParseRepositoryTag(image)
@@ -258,8 +322,13 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
 	d.logger.Printf("[DEBUG] driver.docker: using image %s", dockerImage.ID)
 	d.logger.Printf("[INFO] driver.docker: identified image %s as %s", image, dockerImage.ID)

+	config, err := d.createContainer(ctx, task)
+	if err != nil {
+		d.logger.Printf("[ERR] driver.docker: %s", err)
+		return nil, fmt.Errorf("Failed to create container config for image %s", image)
+	}
 	// Create a container
-	container, err := client.CreateContainer(createContainer(ctx, task, d.logger))
+	container, err := client.CreateContainer(config)
 	if err != nil {
 		d.logger.Printf("[ERR] driver.docker: %s", err)
 		return nil, fmt.Errorf("Failed to create container from image %s", image)
@@ -309,10 +378,9 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
 	d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID)

 	// Initialize docker API client
-	dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
-	client, err := docker.NewClient(dockerEndpoint)
+	client, err := d.dockerClient()
 	if err != nil {
-		return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err)
+		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
 	}

 	// Look for a running container with this ID
@@ -401,6 +469,7 @@ func (h *dockerHandle) Kill() error {
 		err = h.client.RemoveImage(h.imageID)
 		if err != nil {
 			containers, err := h.client.ListContainers(docker.ListContainersOptions{
+				// The image might be in use by a stopped container, so check everything
 				All: true,
 				Filters: map[string][]string{
 					"image": []string{h.imageID},
--- a/client/driver/docker_test.go
+++ b/client/driver/docker_test.go
@@ -1,14 +1,25 @@
 package driver

 import (
+	"fmt"
+	"io/ioutil"
 	"os/exec"
+	"path/filepath"
+	"reflect"
 	"testing"
 	"time"

 	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/driver/environment"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

+func testDockerDriverContext(task string) *DriverContext {
+	cfg := testConfig()
+	cfg.DevMode = true
+	return NewDriverContext(task, cfg, cfg.Node, testLogger())
+}
+
 // dockerLocated looks to see whether docker is available on this system before
 // we try to run tests. We'll keep it simple and just check for the CLI.
 func dockerLocated() bool {
@@ -33,7 +44,7 @@ func TestDockerDriver_Handle(t *testing.T) {

 // The fingerprinter test should always pass, even if Docker is not installed.
 func TestDockerDriver_Fingerprint(t *testing.T) {
-	d := NewDockerDriver(testDriverContext(""))
+	d := NewDockerDriver(testDockerDriverContext(""))
 	node := &structs.Node{
 		Attributes: make(map[string]string),
 	}
@@ -44,7 +55,7 @@ func TestDockerDriver_Fingerprint(t *testing.T) {
 	if apply != dockerLocated() {
 		t.Fatalf("Fingerprinter should detect Docker when it is installed")
 	}
-	if node.Attributes["driver.docker"] == "" {
+	if node.Attributes["driver.docker"] != "1" {
 		t.Log("Docker not found. The remainder of the docker tests will be skipped.")
 	}
 	t.Logf("Found docker version %s", node.Attributes["driver.docker.version"])
@@ -56,14 +67,14 @@ func TestDockerDriver_StartOpen_Wait(t *testing.T) {
 	}

 	task := &structs.Task{
-		Name: "python-demo",
+		Name: "redis-demo",
 		Config: map[string]string{
 			"image": "redis",
 		},
 		Resources: basicResources,
 	}

-	driverCtx := testDriverContext(task.Name)
+	driverCtx := testDockerDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
 	defer ctx.AllocDir.Destroy()
 	d := NewDockerDriver(driverCtx)
@@ -93,10 +104,11 @@ func TestDockerDriver_Start_Wait(t *testing.T) {
 	}

 	task := &structs.Task{
-		Name: "python-demo",
+		Name: "redis-demo",
 		Config: map[string]string{
 			"image":   "redis",
-			"command": "redis-server -v",
+			"command": "redis-server",
+			"args":    "-v",
 		},
 		Resources: &structs.Resources{
 			MemoryMB: 256,
@@ -104,7 +116,7 @@ func TestDockerDriver_Start_Wait(t *testing.T) {
 		},
 	}

-	driverCtx := testDriverContext(task.Name)
+	driverCtx := testDockerDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
 	defer ctx.AllocDir.Destroy()
 	d := NewDockerDriver(driverCtx)
@@ -134,21 +146,77 @@ func TestDockerDriver_Start_Wait(t *testing.T) {
 	}
 }

+func TestDockerDriver_Start_Wait_AllocDir(t *testing.T) {
+	if !dockerLocated() {
+		t.SkipNow()
+	}
+
+	exp := []byte{'w', 'i', 'n'}
+	file := "output.txt"
+	task := &structs.Task{
+		Name: "redis-demo",
+		Config: map[string]string{
+			"image":   "redis",
+			"command": "/bin/bash",
+			"args":    fmt.Sprintf(`-c "sleep 1; echo -n %s > $%s/%s"`, string(exp), environment.AllocDir, file),
+		},
+		Resources: &structs.Resources{
+			MemoryMB: 256,
+			CPU:      512,
+		},
+	}
+
+	driverCtx := testDockerDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+	d := NewDockerDriver(driverCtx)
+
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+	defer handle.Kill()
+
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+
+	// Check that data was written to the shared alloc directory.
+	outputFile := filepath.Join(ctx.AllocDir.SharedDir, file)
+	act, err := ioutil.ReadFile(outputFile)
+	if err != nil {
+		t.Fatalf("Couldn't read expected output: %v", err)
+	}
+
+	if !reflect.DeepEqual(act, exp) {
+		t.Fatalf("Command outputted %v; want %v", act, exp)
+	}
+}
+
 func TestDockerDriver_Start_Kill_Wait(t *testing.T) {
 	if !dockerLocated() {
 		t.SkipNow()
 	}

 	task := &structs.Task{
-		Name: "python-demo",
+		Name: "redis-demo",
 		Config: map[string]string{
 			"image":   "redis",
-			"command": "sleep 10",
+			"command": "/bin/sleep",
+			"args":    "10",
 		},
 		Resources: basicResources,
 	}

-	driverCtx := testDriverContext(task.Name)
+	driverCtx := testDockerDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
 	defer ctx.AllocDir.Destroy()
 	d := NewDockerDriver(driverCtx)
@@ -182,6 +250,7 @@ func TestDockerDriver_Start_Kill_Wait(t *testing.T) {

 func taskTemplate() *structs.Task {
 	return &structs.Task{
+		Name: "redis-demo",
 		Config: map[string]string{
 			"image": "redis",
 		},
@@ -222,7 +291,7 @@ func TestDocker_StartN(t *testing.T) {
 	// Let's spin up a bunch of things
 	var err error
 	for idx, task := range taskList {
-		driverCtx := testDriverContext(task.Name)
+		driverCtx := testDockerDriverContext(task.Name)
 		ctx := testDriverExecContext(task, driverCtx)
 		defer ctx.AllocDir.Destroy()
 		d := NewDockerDriver(driverCtx)
@@ -236,6 +305,11 @@ func TestDocker_StartN(t *testing.T) {
 	t.Log("==> All tasks are started. Terminating...")

 	for idx, handle := range handles {
+		if handle == nil {
+			t.Errorf("Bad handle for task #%d", idx+1)
+			continue
+		}
+
 		err := handle.Kill()
 		if err != nil {
 			t.Errorf("Failed stopping task #%d: %s", idx+1, err)
@@ -271,7 +345,7 @@ func TestDocker_StartNVersions(t *testing.T) {
 	// Let's spin up a bunch of things
 	var err error
 	for idx, task := range taskList {
-		driverCtx := testDriverContext(task.Name)
+		driverCtx := testDockerDriverContext(task.Name)
 		ctx := testDriverExecContext(task, driverCtx)
 		defer ctx.AllocDir.Destroy()
 		d := NewDockerDriver(driverCtx)
@@ -285,6 +359,11 @@ func TestDocker_StartNVersions(t *testing.T) {
 	t.Log("==> All tasks are started. Terminating...")

 	for idx, handle := range handles {
+		if handle == nil {
+			t.Errorf("Bad handle for task #%d", idx+1)
+			continue
+		}
+
 		err := handle.Kill()
 		if err != nil {
 			t.Errorf("Failed stopping task #%d: %s", idx+1, err)
@@ -300,6 +379,7 @@ func TestDockerHostNet(t *testing.T) {
 	}

 	task := &structs.Task{
+		Name: "redis-demo",
 		Config: map[string]string{
 			"image":        "redis",
 			"network_mode": "host",
@@ -309,7 +389,7 @@ func TestDockerHostNet(t *testing.T) {
 			CPU:      512,
 		},
 	}
-	driverCtx := testDriverContext(task.Name)
+	driverCtx := testDockerDriverContext(task.Name)
 	ctx := testDriverExecContext(task, driverCtx)
 	defer ctx.AllocDir.Destroy()
 	d := NewDockerDriver(driverCtx)
--- a/client/driver/driver.go
+++ b/client/driver/driver.go
@@ -3,6 +3,7 @@ package driver
 import (
 	"fmt"
 	"log"
+	"path/filepath"
 	"sync"

 	"github.com/hashicorp/nomad/client/allocdir"
@@ -15,11 +16,12 @@ import (
 // BuiltinDrivers contains the built in registered drivers
 // which are available for allocation handling
 var BuiltinDrivers = map[string]Factory{
-	"docker": NewDockerDriver,
-	"exec":   NewExecDriver,
-	"java":   NewJavaDriver,
-	"qemu":   NewQemuDriver,
-	"rkt":    NewRktDriver,
+	"docker":   NewDockerDriver,
+	"exec":     NewExecDriver,
+	"raw_exec": NewRawExecDriver,
+	"java":     NewJavaDriver,
+	"qemu":     NewQemuDriver,
+	"rkt":      NewRktDriver,
 }

 // NewDriver is used to instantiate and return a new driver
@@ -112,7 +114,13 @@ func TaskEnvironmentVariables(ctx *ExecContext, task *structs.Task) environment.
 	env.SetMeta(task.Meta)

 	if ctx.AllocDir != nil {
-		env.SetAllocDir(ctx.AllocDir.AllocDir)
+		env.SetAllocDir(ctx.AllocDir.SharedDir)
+		taskdir, ok := ctx.AllocDir.TaskDirs[task.Name]
+		if !ok {
+			// TODO: Update this to return an error
+		}
+
+		env.SetTaskLocalDir(filepath.Join(taskdir, allocdir.TaskLocal))
 	}

 	if task.Resources != nil {
--- a/client/driver/environment/vars.go
+++ b/client/driver/environment/vars.go
@@ -12,6 +12,10 @@ const (
 	// group.
 	AllocDir = "NOMAD_ALLOC_DIR"

+	// The path to the tasks local directory where it can store data that is
+	// persisted to the alloc is removed.
+	TaskLocalDir = "NOMAD_TASK_DIR"
+
 	// The tasks memory limit in MBs.
 	MemLimit = "NOMAD_MEMORY_LIMIT"

@@ -30,6 +34,10 @@ const (
 	MetaPrefix = "NOMAD_META_"
 )

+var (
+	nomadVars = []string{AllocDir, TaskLocalDir, MemLimit, CpuLimit, TaskIP, PortPrefix, MetaPrefix}
+)
+
 type TaskEnvironment map[string]string

 func NewTaskEnivornment() TaskEnvironment {
@@ -70,18 +78,42 @@ func (t TaskEnvironment) SetAllocDir(dir string) {
 	t[AllocDir] = dir
 }

+func (t TaskEnvironment) ClearAllocDir() {
+	delete(t, AllocDir)
+}
+
+func (t TaskEnvironment) SetTaskLocalDir(dir string) {
+	t[TaskLocalDir] = dir
+}
+
+func (t TaskEnvironment) ClearTaskLocalDir() {
+	delete(t, TaskLocalDir)
+}
+
 func (t TaskEnvironment) SetMemLimit(limit int) {
 	t[MemLimit] = strconv.Itoa(limit)
 }

+func (t TaskEnvironment) ClearMemLimit() {
+	delete(t, MemLimit)
+}
+
 func (t TaskEnvironment) SetCpuLimit(limit int) {
 	t[CpuLimit] = strconv.Itoa(limit)
 }

+func (t TaskEnvironment) ClearCpuLimit() {
+	delete(t, CpuLimit)
+}
+
 func (t TaskEnvironment) SetTaskIp(ip string) {
 	t[TaskIP] = ip
 }

+func (t TaskEnvironment) ClearTaskIp() {
+	delete(t, TaskIP)
+}
+
 // Takes a map of port labels to their port value.
 func (t TaskEnvironment) SetPorts(ports map[string]int) {
 	for label, port := range ports {
@@ -89,6 +121,14 @@ func (t TaskEnvironment) SetPorts(ports map[string]int) {
 	}
 }

+func (t TaskEnvironment) ClearPorts() {
+	for k, _ := range t {
+		if strings.HasPrefix(k, PortPrefix) {
+			delete(t, k)
+		}
+	}
+}
+
 // Takes a map of meta values to be passed to the task. The keys are capatilized
 // when the environent variable is set.
 func (t TaskEnvironment) SetMeta(m map[string]string) {
@@ -97,8 +137,28 @@ func (t TaskEnvironment) SetMeta(m map[string]string) {
 	}
 }

+func (t TaskEnvironment) ClearMeta() {
+	for k, _ := range t {
+		if strings.HasPrefix(k, MetaPrefix) {
+			delete(t, k)
+		}
+	}
+}
+
 func (t TaskEnvironment) SetEnvvars(m map[string]string) {
 	for k, v := range m {
 		t[k] = v
 	}
 }
+
+func (t TaskEnvironment) ClearEnvvars() {
+OUTER:
+	for k, _ := range t {
+		for _, nomadPrefix := range nomadVars {
+			if strings.HasPrefix(k, nomadPrefix) {
+				continue OUTER
+			}
+		}
+		delete(t, k)
+	}
+}
--- a/client/driver/environment/vars_test.go
+++ b/client/driver/environment/vars_test.go
@@ -21,7 +21,7 @@ func TestEnvironment_AsList(t *testing.T) {
 	}
 }

-func TastEnvironment_ParseFromList(t *testing.T) {
+func TestEnvironment_ParseFromList(t *testing.T) {
 	input := []string{"foo=bar", "BAZ=baM"}
 	env, err := ParseFromList(input)
 	if err != nil {
@@ -29,10 +29,44 @@ func TastEnvironment_ParseFromList(t *testing.T) {
 	}

 	exp := map[string]string{
-		"foo": "baz",
+		"foo": "bar",
 		"BAZ": "baM",
 	}
-	if !reflect.DeepEqual(env, exp) {
-		t.Fatalf("ParseFromList(%#v) returned %v; want %v", input, env, exp)
+
+	if len(env) != len(exp) {
+		t.Fatalf("ParseFromList(%#v) has length %v; want %v", input, len(env), len(exp))
+	}
+
+	for k, v := range exp {
+		if actV, ok := env[k]; !ok {
+			t.Fatalf("ParseFromList(%#v) doesn't contain expected %v", input, k)
+		} else if actV != v {
+			t.Fatalf("ParseFromList(%#v) has incorrect value for %v; got %v; want %v", input, k, actV, v)
+		}
+	}
+}
+
+func TestEnvironment_ClearEnvvars(t *testing.T) {
+	env := NewTaskEnivornment()
+	env.SetTaskIp("127.0.0.1")
+	env.SetEnvvars(map[string]string{"foo": "baz", "bar": "bang"})
+
+	act := env.List()
+	exp := []string{"NOMAD_IP=127.0.0.1", "bar=bang", "foo=baz"}
+	sort.Strings(act)
+	sort.Strings(exp)
+	if !reflect.DeepEqual(act, exp) {
+		t.Fatalf("env.List() returned %v; want %v", act, exp)
+	}
+
+	// Clear the environent variables.
+	env.ClearEnvvars()
+
+	act = env.List()
+	exp = []string{"NOMAD_IP=127.0.0.1"}
+	sort.Strings(act)
+	sort.Strings(exp)
+	if !reflect.DeepEqual(act, exp) {
+		t.Fatalf("env.List() returned %v; want %v", act, exp)
 	}
 }
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@@ -2,10 +2,15 @@ package driver

 import (
 	"fmt"
+	"log"
+	"path"
+	"path/filepath"
 	"runtime"
 	"syscall"
 	"time"

+	"github.com/hashicorp/go-getter"
+	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/executor"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -41,12 +46,40 @@ func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
 }

 func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
-	// Get the command
+	// Get the command to be ran
 	command, ok := task.Config["command"]
 	if !ok || command == "" {
 		return nil, fmt.Errorf("missing command for exec driver")
 	}

+	// Check if an artificat is specified and attempt to download it
+	source, ok := task.Config["artifact_source"]
+	if ok && source != "" {
+		// Proceed to download an artifact to be executed.
+		// We use go-getter to support a variety of protocols, but need to change
+		// file permissions of the resulted download to be executable
+
+		// Create a location to download the artifact.
+		taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
+		if !ok {
+			return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
+		}
+		destDir := filepath.Join(taskDir, allocdir.TaskLocal)
+
+		artifactName := path.Base(source)
+		artifactFile := filepath.Join(destDir, artifactName)
+		if err := getter.GetFile(artifactFile, source); err != nil {
+			return nil, fmt.Errorf("Error downloading artifact for Exec driver: %s", err)
+		}
+
+		// Add execution permissions to the newly downloaded artifact
+		if runtime.GOOS != "windows" {
+			if err := syscall.Chmod(artifactFile, 0755); err != nil {
+				log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err)
+			}
+		}
+	}
+
 	// Get the environment variables.
 	envVars := TaskEnvironmentVariables(ctx, task)

--- a/client/driver/exec_test.go
+++ b/client/driver/exec_test.go
@@ -5,10 +5,10 @@ import (
 	"io/ioutil"
 	"path/filepath"
 	"reflect"
+	"runtime"
 	"testing"
 	"time"

-	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/driver/environment"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -86,7 +86,7 @@ func TestExecDriver_Start_Wait(t *testing.T) {
 		Name: "sleep",
 		Config: map[string]string{
 			"command": "/bin/sleep",
-			"args":    "1",
+			"args":    "2",
 		},
 		Resources: basicResources,
 	}
@@ -116,11 +116,109 @@ func TestExecDriver_Start_Wait(t *testing.T) {
 		if err != nil {
 			t.Fatalf("err: %v", err)
 		}
-	case <-time.After(2 * time.Second):
+	case <-time.After(4 * time.Second):
 		t.Fatalf("timeout")
 	}
 }

+func TestExecDriver_Start_Artifact_basic(t *testing.T) {
+	ctestutils.ExecCompatible(t)
+	var file string
+	switch runtime.GOOS {
+	case "darwin":
+		file = "hi_darwin_amd64"
+	default:
+		file = "hi_linux_amd64"
+	}
+
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
+			"command":         filepath.Join("$NOMAD_TASK_DIR", file),
+		},
+		Resources: basicResources,
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+	d := NewExecDriver(driverCtx)
+
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Update should be a no-op
+	err = handle.Update(task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Task should terminate quickly
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+}
+
+func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
+	ctestutils.ExecCompatible(t)
+	var file string
+	switch runtime.GOOS {
+	case "darwin":
+		file = "hi_darwin_amd64"
+	default:
+		file = "hi_linux_amd64"
+	}
+
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
+			"command":         "/bin/bash",
+			"args":            fmt.Sprintf("-c '/bin/sleep 1 && %s'", filepath.Join("$NOMAD_TASK_DIR", file)),
+		},
+		Resources: basicResources,
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+	d := NewExecDriver(driverCtx)
+
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Update should be a no-op
+	err = handle.Update(task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Task should terminate quickly
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+}
 func TestExecDriver_Start_Wait_AllocDir(t *testing.T) {
 	ctestutils.ExecCompatible(t)

@@ -159,7 +257,7 @@ func TestExecDriver_Start_Wait_AllocDir(t *testing.T) {
 	}

 	// Check that data was written to the shared alloc directory.
-	outputFile := filepath.Join(ctx.AllocDir.AllocDir, allocdir.SharedAllocName, file)
+	outputFile := filepath.Join(ctx.AllocDir.SharedDir, file)
 	act, err := ioutil.ReadFile(outputFile)
 	if err != nil {
 		t.Fatalf("Couldn't read expected output: %v", err)
--- a/client/driver/java.go
+++ b/client/driver/java.go
@@ -3,9 +3,6 @@ package driver
 import (
 	"bytes"
 	"fmt"
-	"io"
-	"net/http"
-	"os"
 	"os/exec"
 	"path"
 	"path/filepath"
@@ -14,6 +11,7 @@ import (
 	"syscall"
 	"time"

+	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/client/executor"
@@ -69,7 +67,7 @@ func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
 	}

 	if infoString == "" {
-		d.logger.Println("[WARN] Error parsing Java version information, aborting")
+		d.logger.Println("[WARN] driver.java: error parsing Java version information, aborting")
 		return false, nil
 	}

@@ -97,44 +95,33 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		return nil, fmt.Errorf("missing jar source for Java Jar driver")
 	}

-	// Attempt to download the thing
-	// Should be extracted to some kind of Http Fetcher
-	// Right now, assume publicly accessible HTTP url
-	resp, err := http.Get(source)
-	if err != nil {
-		return nil, fmt.Errorf("Error downloading source for Java driver: %s", err)
-	}
-
-	// Get the tasks local directory.
 	taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
 	if !ok {
 		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
 	}
-	taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
+
+	destDir := filepath.Join(taskDir, allocdir.TaskLocal)

 	// Create a location to download the binary.
-	fName := path.Base(source)
-	fPath := filepath.Join(taskLocal, fName)
-	f, err := os.OpenFile(fPath, os.O_CREATE|os.O_WRONLY, 0666)
-	if err != nil {
-		return nil, fmt.Errorf("Error opening file to download to: %s", err)
-	}
-
-	defer f.Close()
-	defer resp.Body.Close()
-
-	// Copy remote file to local directory for execution
-	// TODO: a retry of sort if io.Copy fails, for large binaries
-	_, ioErr := io.Copy(f, resp.Body)
-	if ioErr != nil {
-		return nil, fmt.Errorf("Error copying jar from source: %s", ioErr)
+	jarName := path.Base(source)
+	jarPath := filepath.Join(destDir, jarName)
+	if err := getter.GetFile(jarPath, source); err != nil {
+		return nil, fmt.Errorf("Error downloading source for Java driver: %s", err)
 	}

 	// Get the environment variables.
 	envVars := TaskEnvironmentVariables(ctx, task)

+	args := []string{}
+	// Look for jvm options
+	jvm_options, ok := task.Config["jvm_options"]
+	if ok && jvm_options != "" {
+		d.logger.Printf("[DEBUG] driver.java: found JVM options: %s", jvm_options)
+		args = append(args, jvm_options)
+	}
+
 	// Build the argument list.
-	args := []string{"-jar", filepath.Join(allocdir.TaskLocal, fName)}
+	args = append(args, "-jar", filepath.Join(allocdir.TaskLocal, jarName))
 	if argRaw, ok := task.Config["args"]; ok {
 		args = append(args, argRaw)
 	}
--- a/client/driver/java_test.go
+++ b/client/driver/java_test.go
@@ -100,6 +100,7 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
 			"jar_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
 			// "jar_source": "https://s3-us-west-2.amazonaws.com/java-jar-thing/demoapp.jar",
 			// "args": "-d64",
+			"jvm_options": "-Xmx2048m -Xms256m",
 		},
 		Resources: basicResources,
 	}
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@@ -8,7 +8,6 @@ import (
 	"fmt"
 	"io"
 	"log"
-	"net/http"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -19,6 +18,7 @@ import (
 	"syscall"
 	"time"

+	"github.com/hashicorp/go-getter"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -94,45 +94,25 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		return nil, fmt.Errorf("Missing required Task Resource: Memory")
 	}

-	// Attempt to download the thing
-	// Should be extracted to some kind of Http Fetcher
-	// Right now, assume publicly accessible HTTP url
-	resp, err := http.Get(source)
-	if err != nil {
-		return nil, fmt.Errorf("Error downloading source for Qemu driver: %s", err)
-	}
-
 	// Get the tasks local directory.
 	taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
 	if !ok {
 		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
 	}
-	taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)

-	// Create a location in the local directory to download and store the image.
-	// TODO: Caching
+	// Create a location to download the binary.
+	destDir := filepath.Join(taskDir, allocdir.TaskLocal)
 	vmID := fmt.Sprintf("qemu-vm-%s-%s", structs.GenerateUUID(), filepath.Base(source))
-	fPath := filepath.Join(taskLocal, vmID)
-	vmPath, err := os.OpenFile(fPath, os.O_CREATE|os.O_WRONLY, 0666)
-	if err != nil {
-		return nil, fmt.Errorf("Error opening file to download to: %s", err)
-	}
-
-	defer vmPath.Close()
-	defer resp.Body.Close()
-
-	// Copy remote file to local AllocDir for execution
-	// TODO: a retry of sort if io.Copy fails, for large binaries
-	_, ioErr := io.Copy(vmPath, resp.Body)
-	if ioErr != nil {
-		return nil, fmt.Errorf("Error copying Qemu image from source: %s", ioErr)
+	vmPath := filepath.Join(destDir, vmID)
+	if err := getter.GetFile(vmPath, source); err != nil {
+		return nil, fmt.Errorf("Error downloading artifact for Qemu driver: %s", err)
 	}

 	// compute and check checksum
 	if check, ok := task.Config["checksum"]; ok {
 		d.logger.Printf("[DEBUG] Running checksum on (%s)", vmID)
 		hasher := sha256.New()
-		file, err := os.Open(vmPath.Name())
+		file, err := os.Open(vmPath)
 		if err != nil {
 			return nil, fmt.Errorf("Failed to open file for checksum")
 		}
@@ -163,7 +143,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		"-machine", "type=pc,accel=" + accelerator,
 		"-name", vmID,
 		"-m", mem,
-		"-drive", "file=" + vmPath.Name(),
+		"-drive", "file=" + vmPath,
 		"-nodefconfig",
 		"-nodefaults",
 		"-nographic",
@@ -240,7 +220,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 	// Create and Return Handle
 	h := &qemuHandle{
 		proc:   cmd.Process,
-		vmID:   vmPath.Name(),
+		vmID:   vmPath,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
 	}
--- a/client/driver/qemu_test.go
+++ b/client/driver/qemu_test.go
@@ -3,7 +3,6 @@ package driver
 import (
 	"fmt"
 	"os"
-	"os/exec"
 	"testing"

 	"github.com/hashicorp/nomad/client/config"
@@ -12,14 +11,6 @@ import (
 	ctestutils "github.com/hashicorp/nomad/client/testutil"
 )

-// qemuLocated looks to see whether qemu binaries are available on this system
-// before we try to run tests. We may need to tweak this for cross-OS support
-// but I think this should work on *nix at least.
-func qemuLocated() bool {
-	_, err := exec.Command("qemu-x86_64", "-version").CombinedOutput()
-	return err == nil
-}
-
 func TestQemuDriver_Handle(t *testing.T) {
 	h := &qemuHandle{
 		proc:   &os.Process{Pid: 123},
@@ -58,10 +49,7 @@ func TestQemuDriver_Fingerprint(t *testing.T) {
 }

 func TestQemuDriver_Start(t *testing.T) {
-	if !qemuLocated() {
-		t.Skip("QEMU not found; skipping")
-	}
-
+	ctestutils.QemuCompatible(t)
 	// TODO: use test server to load from a fixture
 	task := &structs.Task{
 		Name: "linux",
@@ -110,10 +98,7 @@ func TestQemuDriver_Start(t *testing.T) {
 }

 func TestQemuDriver_RequiresMemory(t *testing.T) {
-	if !qemuLocated() {
-		t.Skip("QEMU not found; skipping")
-	}
-
+	ctestutils.QemuCompatible(t)
 	// TODO: use test server to load from a fixture
 	task := &structs.Task{
 		Name: "linux",
@@ -136,5 +121,4 @@ func TestQemuDriver_RequiresMemory(t *testing.T) {
 	if err == nil {
 		t.Fatalf("Expected error when not specifying memory")
 	}
-
 }
--- a/client/driver/raw_exec.go
+++ b/client/driver/raw_exec.go
@@ -0,0 +1,239 @@
+package driver
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+
+	"github.com/hashicorp/go-getter"
+	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+const (
+	// The option that enables this driver in the Config.Options map.
+	rawExecConfigOption = "driver.raw_exec.enable"
+
+	// Null files to use as stdin.
+	unixNull    = "/dev/null"
+	windowsNull = "nul"
+)
+
+// The RawExecDriver is a privileged version of the exec driver. It provides no
+// resource isolation and just fork/execs. The Exec driver should be preferred
+// and this should only be used when explicitly needed.
+type RawExecDriver struct {
+	DriverContext
+}
+
+// rawExecHandle is returned from Start/Open as a handle to the PID
+type rawExecHandle struct {
+	proc   *os.Process
+	waitCh chan error
+	doneCh chan struct{}
+}
+
+// NewRawExecDriver is used to create a new raw exec driver
+func NewRawExecDriver(ctx *DriverContext) Driver {
+	return &RawExecDriver{*ctx}
+}
+
+func (d *RawExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
+	// Check that the user has explicitly enabled this executor.
+	enabled, err := strconv.ParseBool(cfg.ReadDefault(rawExecConfigOption, "false"))
+	if err != nil {
+		return false, fmt.Errorf("Failed to parse %v option: %v", rawExecConfigOption, err)
+	}
+
+	if enabled {
+		d.logger.Printf("[WARN] driver.raw_exec: raw exec is enabled. Only enable if needed")
+		node.Attributes["driver.raw_exec"] = "1"
+		return true, nil
+	}
+
+	return false, nil
+}
+
+func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
+	// Get the tasks local directory.
+	taskName := d.DriverContext.taskName
+	taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
+	if !ok {
+		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
+	}
+	taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
+
+	// Get the command to be ran
+	command, ok := task.Config["command"]
+	if !ok || command == "" {
+		return nil, fmt.Errorf("missing command for Raw Exec driver")
+	}
+
+	// Check if an artificat is specified and attempt to download it
+	source, ok := task.Config["artifact_source"]
+	if ok && source != "" {
+		// Proceed to download an artifact to be executed.
+		// We use go-getter to support a variety of protocols, but need to change
+		// file permissions of the resulted download to be executable
+
+		// Create a location to download the artifact.
+		destDir := filepath.Join(taskDir, allocdir.TaskLocal)
+
+		artifactName := path.Base(source)
+		artifactFile := filepath.Join(destDir, artifactName)
+		if err := getter.GetFile(artifactFile, source); err != nil {
+			return nil, fmt.Errorf("Error downloading artifact for Raw Exec driver: %s", err)
+		}
+
+		// Add execution permissions to the newly downloaded artifact
+		if runtime.GOOS != "windows" {
+			if err := syscall.Chmod(artifactFile, 0755); err != nil {
+				log.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err)
+			}
+		}
+	}
+
+	// Get the environment variables.
+	envVars := TaskEnvironmentVariables(ctx, task)
+
+	// expand NOMAD_TASK_DIR
+	parsedPath, err := args.ParseAndReplace(command, envVars.Map())
+	if err != nil {
+		return nil, fmt.Errorf("failure to parse arguments in command path: %v", command)
+	} else if len(parsedPath) != 1 {
+		return nil, fmt.Errorf("couldn't properly parse command path: %v", command)
+	}
+
+	cm := parsedPath[0]
+
+	// Look for arguments
+	var cmdArgs []string
+	if argRaw, ok := task.Config["args"]; ok {
+		parsed, err := args.ParseAndReplace(argRaw, envVars.Map())
+		if err != nil {
+			return nil, err
+		}
+		cmdArgs = append(cmdArgs, parsed...)
+	}
+
+	// Setup the command
+	cmd := exec.Command(cm, cmdArgs...)
+	cmd.Dir = taskDir
+	cmd.Env = envVars.List()
+
+	// Capture the stdout/stderr and redirect stdin to /dev/null
+	stdoutFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stdout", taskName))
+	stderrFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stderr", taskName))
+	stdinFilename := unixNull
+	if runtime.GOOS == "windows" {
+		stdinFilename = windowsNull
+	}
+
+	stdo, err := os.OpenFile(stdoutFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return nil, fmt.Errorf("Error opening file to redirect stdout: %v", err)
+	}
+
+	stde, err := os.OpenFile(stderrFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	}
+
+	stdi, err := os.OpenFile(stdinFilename, os.O_CREATE|os.O_RDONLY, 0666)
+	if err != nil {
+		return nil, fmt.Errorf("Error opening file to redirect stdin: %v", err)
+	}
+
+	cmd.Stdout = stdo
+	cmd.Stderr = stde
+	cmd.Stdin = stdi
+
+	if err := cmd.Start(); err != nil {
+		return nil, fmt.Errorf("failed to start command: %v", err)
+	}
+
+	// Return a driver handle
+	h := &rawExecHandle{
+		proc:   cmd.Process,
+		doneCh: make(chan struct{}),
+		waitCh: make(chan error, 1),
+	}
+	go h.run()
+	return h, nil
+}
+
+func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
+	// Split the handle
+	pidStr := strings.TrimPrefix(handleID, "PID:")
+	pid, err := strconv.Atoi(pidStr)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse handle '%s': %v", handleID, err)
+	}
+
+	// Find the process
+	proc, err := os.FindProcess(pid)
+	if proc == nil || err != nil {
+		return nil, fmt.Errorf("failed to find PID %d: %v", pid, err)
+	}
+
+	// Return a driver handle
+	h := &rawExecHandle{
+		proc:   proc,
+		doneCh: make(chan struct{}),
+		waitCh: make(chan error, 1),
+	}
+	go h.run()
+	return h, nil
+}
+
+func (h *rawExecHandle) ID() string {
+	// Return a handle to the PID
+	return fmt.Sprintf("PID:%d", h.proc.Pid)
+}
+
+func (h *rawExecHandle) WaitCh() chan error {
+	return h.waitCh
+}
+
+func (h *rawExecHandle) Update(task *structs.Task) error {
+	// Update is not possible
+	return nil
+}
+
+// Kill is used to terminate the task. We send an Interrupt
+// and then provide a 5 second grace period before doing a Kill on supported
+// OS's, otherwise we kill immediately.
+func (h *rawExecHandle) Kill() error {
+	if runtime.GOOS == "windows" {
+		return h.proc.Kill()
+	}
+
+	h.proc.Signal(os.Interrupt)
+	select {
+	case <-h.doneCh:
+		return nil
+	case <-time.After(5 * time.Second):
+		return h.proc.Kill()
+	}
+}
+
+func (h *rawExecHandle) run() {
+	ps, err := h.proc.Wait()
+	close(h.doneCh)
+	if err != nil {
+		h.waitCh <- err
+	} else if !ps.Success() {
+		h.waitCh <- fmt.Errorf("task exited with error")
+	}
+	close(h.waitCh)
+}
--- a/client/driver/raw_exec_test.go
+++ b/client/driver/raw_exec_test.go
@@ -0,0 +1,327 @@
+package driver
+
+import (
+	"fmt"
+	"io/ioutil"
+	"path/filepath"
+	"reflect"
+	"runtime"
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/driver/environment"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+func TestRawExecDriver_Fingerprint(t *testing.T) {
+	d := NewRawExecDriver(testDriverContext(""))
+	node := &structs.Node{
+		Attributes: make(map[string]string),
+	}
+
+	// Disable raw exec.
+	cfg := &config.Config{Options: map[string]string{rawExecConfigOption: "false"}}
+
+	apply, err := d.Fingerprint(cfg, node)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if apply {
+		t.Fatalf("should not apply")
+	}
+	if node.Attributes["driver.raw_exec"] != "" {
+		t.Fatalf("driver incorrectly enabled")
+	}
+
+	// Enable raw exec.
+	cfg.Options[rawExecConfigOption] = "true"
+	apply, err = d.Fingerprint(cfg, node)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if !apply {
+		t.Fatalf("should apply")
+	}
+	if node.Attributes["driver.raw_exec"] != "1" {
+		t.Fatalf("driver not enabled")
+	}
+}
+
+func TestRawExecDriver_StartOpen_Wait(t *testing.T) {
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"command": "/bin/sleep",
+			"args":    "1",
+		},
+	}
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	d := NewRawExecDriver(driverCtx)
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Attempt to open
+	handle2, err := d.Open(ctx, handle.ID())
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle2 == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Task should terminate quickly
+	select {
+	case <-handle2.WaitCh():
+	case <-time.After(2 * time.Second):
+		t.Fatalf("timeout")
+	}
+
+	// Check they are both tracking the same PID.
+	pid1 := handle.(*rawExecHandle).proc.Pid
+	pid2 := handle2.(*rawExecHandle).proc.Pid
+	if pid1 != pid2 {
+		t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
+	}
+}
+
+func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
+	var file string
+	switch runtime.GOOS {
+	case "darwin":
+		file = "hi_darwin_amd64"
+	default:
+		file = "hi_linux_amd64"
+	}
+
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
+			"command":         filepath.Join("$NOMAD_TASK_DIR", file),
+		},
+	}
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	d := NewRawExecDriver(driverCtx)
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Attempt to open
+	handle2, err := d.Open(ctx, handle.ID())
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle2 == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Task should terminate quickly
+	select {
+	case <-handle2.WaitCh():
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+
+	// Check they are both tracking the same PID.
+	pid1 := handle.(*rawExecHandle).proc.Pid
+	pid2 := handle2.(*rawExecHandle).proc.Pid
+	if pid1 != pid2 {
+		t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
+	}
+}
+
+func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) {
+	var file string
+	switch runtime.GOOS {
+	case "darwin":
+		file = "hi_darwin_amd64"
+	default:
+		file = "hi_linux_amd64"
+	}
+
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
+			"command":         "/bin/bash",
+			"args":            fmt.Sprintf("-c '/bin/sleep 1 && %s'", filepath.Join("$NOMAD_TASK_DIR", file)),
+		},
+	}
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	d := NewRawExecDriver(driverCtx)
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Attempt to open
+	handle2, err := d.Open(ctx, handle.ID())
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle2 == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Task should terminate quickly
+	select {
+	case <-handle2.WaitCh():
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+
+	// Check they are both tracking the same PID.
+	pid1 := handle.(*rawExecHandle).proc.Pid
+	pid2 := handle2.(*rawExecHandle).proc.Pid
+	if pid1 != pid2 {
+		t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
+	}
+}
+
+func TestRawExecDriver_Start_Wait(t *testing.T) {
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"command": "/bin/sleep",
+			"args":    "1",
+		},
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	d := NewRawExecDriver(driverCtx)
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Update should be a no-op
+	err = handle.Update(task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Task should terminate quickly
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatalf("timeout")
+	}
+}
+
+func TestRawExecDriver_Start_Wait_AllocDir(t *testing.T) {
+	exp := []byte{'w', 'i', 'n'}
+	file := "output.txt"
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"command": "/bin/bash",
+			"args":    fmt.Sprintf(`-c "sleep 1; echo -n %s > $%s/%s"`, string(exp), environment.AllocDir, file),
+		},
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	d := NewRawExecDriver(driverCtx)
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	// Task should terminate quickly
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatalf("timeout")
+	}
+
+	// Check that data was written to the shared alloc directory.
+	outputFile := filepath.Join(ctx.AllocDir.SharedDir, file)
+	act, err := ioutil.ReadFile(outputFile)
+	if err != nil {
+		t.Fatalf("Couldn't read expected output: %v", err)
+	}
+
+	if !reflect.DeepEqual(act, exp) {
+		t.Fatalf("Command outputted %v; want %v", act, exp)
+	}
+}
+
+func TestRawExecDriver_Start_Kill_Wait(t *testing.T) {
+	task := &structs.Task{
+		Name: "sleep",
+		Config: map[string]string{
+			"command": "/bin/sleep",
+			"args":    "1",
+		},
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	d := NewRawExecDriver(driverCtx)
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+
+	go func() {
+		time.Sleep(100 * time.Millisecond)
+		err := handle.Kill()
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}()
+
+	// Task should terminate quickly
+	select {
+	case err := <-handle.WaitCh():
+		if err == nil {
+			t.Fatal("should err")
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatalf("timeout")
+	}
+}
--- a/client/driver/rkt.go
+++ b/client/driver/rkt.go
@@ -7,19 +7,22 @@ import (
 	"log"
 	"os"
 	"os/exec"
+	"path/filepath"
 	"regexp"
 	"runtime"
 	"strings"
 	"syscall"
 	"time"

+	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/client/driver/args"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

 var (
-	reRktVersion  = regexp.MustCompile("rkt version ([\\d\\.]+).+")
-	reAppcVersion = regexp.MustCompile("appc version ([\\d\\.]+).+")
+	reRktVersion  = regexp.MustCompile(`rkt version (\d[.\d]+)`)
+	reAppcVersion = regexp.MustCompile(`appc version (\d[.\d]+)`)
 )

 // RktDriver is a driver for running images via Rkt
@@ -32,7 +35,7 @@ type RktDriver struct {
 // rktHandle is returned from Start/Open as a handle to the PID
 type rktHandle struct {
 	proc   *os.Process
-	name   string
+	image  string
 	logger *log.Logger
 	waitCh chan error
 	doneCh chan struct{}
@@ -41,8 +44,8 @@ type rktHandle struct {
 // rktPID is a struct to map the pid running the process to the vm image on
 // disk
 type rktPID struct {
-	Pid  int
-	Name string
+	Pid   int
+	Image string
 }

 // NewRktDriver is used to create a new exec driver
@@ -64,13 +67,13 @@ func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, e
 	out := strings.TrimSpace(string(outBytes))

 	rktMatches := reRktVersion.FindStringSubmatch(out)
-	appcMatches := reRktVersion.FindStringSubmatch(out)
+	appcMatches := reAppcVersion.FindStringSubmatch(out)
 	if len(rktMatches) != 2 || len(appcMatches) != 2 {
 		return false, fmt.Errorf("Unable to parse Rkt version string: %#v", rktMatches)
 	}

-	node.Attributes["driver.rkt"] = "true"
-	node.Attributes["driver.rkt.version"] = rktMatches[0]
+	node.Attributes["driver.rkt"] = "1"
+	node.Attributes["driver.rkt.version"] = rktMatches[1]
 	node.Attributes["driver.rkt.appc.version"] = appcMatches[1]

 	return true, nil
@@ -78,61 +81,104 @@ func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, e

 // Run an existing Rkt image.
 func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
-	trust_prefix, ok := task.Config["trust_prefix"]
-	if !ok || trust_prefix == "" {
-		return nil, fmt.Errorf("Missing trust prefix for rkt")
+	// Validate that the config is valid.
+	img, ok := task.Config["image"]
+	if !ok || img == "" {
+		return nil, fmt.Errorf("Missing ACI image for rkt")
 	}

+	// Get the tasks local directory.
+	taskName := d.DriverContext.taskName
+	taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
+	if !ok {
+		return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
+	}
+	taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
+
 	// Add the given trust prefix
-	var outBuf, errBuf bytes.Buffer
-	cmd := exec.Command("rkt", "trust", fmt.Sprintf("--prefix=%s", trust_prefix))
-	cmd.Stdout = &outBuf
-	cmd.Stderr = &errBuf
-	d.logger.Printf("[DEBUG] driver.rkt: starting rkt command: %q", cmd.Args)
-	if err := cmd.Run(); err != nil {
-		return nil, fmt.Errorf(
-			"Error running rkt: %s\n\nOutput: %s\n\nError: %s",
-			err, outBuf.String(), errBuf.String())
-	}
-	d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trust_prefix)
-
-	name, ok := task.Config["name"]
-	if !ok || name == "" {
-		return nil, fmt.Errorf("Missing ACI name for rkt")
+	trust_prefix, trust_cmd := task.Config["trust_prefix"]
+	if trust_cmd {
+		var outBuf, errBuf bytes.Buffer
+		cmd := exec.Command("rkt", "trust", fmt.Sprintf("--prefix=%s", trust_prefix))
+		cmd.Stdout = &outBuf
+		cmd.Stderr = &errBuf
+		if err := cmd.Run(); err != nil {
+			return nil, fmt.Errorf("Error running rkt trust: %s\n\nOutput: %s\n\nError: %s",
+				err, outBuf.String(), errBuf.String())
+		}
+		d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trust_prefix)
 	}

-	exec_cmd, ok := task.Config["exec"]
-	if !ok || exec_cmd == "" {
-		d.logger.Printf("[WARN] driver.rkt: could not find a command to execute in the ACI, the default command will be executed")
+	// Build the command.
+	var cmd_args []string
+
+	// Inject the environment variables.
+	envVars := TaskEnvironmentVariables(ctx, task)
+
+	// Clear the task directories as they are not currently supported.
+	envVars.ClearTaskLocalDir()
+	envVars.ClearAllocDir()
+
+	for k, v := range envVars.Map() {
+		cmd_args = append(cmd_args, fmt.Sprintf("--set-env=%v=%v", k, v))
 	}

-	// Run the ACI
-	var aoutBuf, aerrBuf bytes.Buffer
-	run_cmd := []string{
-		"rkt",
-		"run",
-		"--mds-register=false",
-		name,
+	// Disble signature verification if the trust command was not run.
+	if !trust_cmd {
+		cmd_args = append(cmd_args, "--insecure-skip-verify")
 	}
-	if exec_cmd != "" {
-		splitted := strings.Fields(exec_cmd)
-		run_cmd = append(run_cmd, "--exec=", splitted[0], "--")
-		run_cmd = append(run_cmd, splitted[1:]...)
-		run_cmd = append(run_cmd, "---")
+
+	// Append the run command.
+	cmd_args = append(cmd_args, "run", "--mds-register=false", img)
+
+	// Check if the user has overriden the exec command.
+	if exec_cmd, ok := task.Config["command"]; ok {
+		cmd_args = append(cmd_args, fmt.Sprintf("--exec=%v", exec_cmd))
 	}
-	acmd := exec.Command(run_cmd[0], run_cmd[1:]...)
-	acmd.Stdout = &aoutBuf
-	acmd.Stderr = &aerrBuf
-	d.logger.Printf("[DEBUG] driver:rkt: starting rkt command: %q", acmd.Args)
-	if err := acmd.Start(); err != nil {
-		return nil, fmt.Errorf(
-			"Error running rkt: %s\n\nOutput: %s\n\nError: %s",
-			err, aoutBuf.String(), aerrBuf.String())
+
+	// Add user passed arguments.
+	if userArgs, ok := task.Config["args"]; ok {
+		parsed, err := args.ParseAndReplace(userArgs, envVars.Map())
+		if err != nil {
+			return nil, err
+		}
+
+		// Need to start arguments with "--"
+		if len(parsed) > 0 {
+			cmd_args = append(cmd_args, "--")
+		}
+
+		for _, arg := range parsed {
+			cmd_args = append(cmd_args, fmt.Sprintf("%v", arg))
+		}
 	}
-	d.logger.Printf("[DEBUG] driver.rkt: started ACI: %q", name)
+
+	// Create files to capture stdin and out.
+	stdoutFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stdout", taskName))
+	stderrFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stderr", taskName))
+
+	stdo, err := os.OpenFile(stdoutFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return nil, fmt.Errorf("Error opening file to redirect stdout: %v", err)
+	}
+
+	stde, err := os.OpenFile(stderrFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
+	if err != nil {
+		return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err)
+	}
+
+	cmd := exec.Command("rkt", cmd_args...)
+	cmd.Stdout = stdo
+	cmd.Stderr = stde
+
+	if err := cmd.Start(); err != nil {
+		return nil, fmt.Errorf("Error running rkt: %v", err)
+	}
+
+	d.logger.Printf("[DEBUG] driver.rkt: started ACI %q with: %v", img, cmd.Args)
 	h := &rktHandle{
-		proc:   acmd.Process,
-		name:   name,
+		proc:   cmd.Process,
+		image:  img,
 		logger: d.logger,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
@@ -158,7 +204,7 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
 	// Return a driver handle
 	h := &rktHandle{
 		proc:   proc,
-		name:   qpid.Name,
+		image:  qpid.Image,
 		logger: d.logger,
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
@@ -171,8 +217,8 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
 func (h *rktHandle) ID() string {
 	// Return a handle to the PID
 	pid := &rktPID{
-		Pid:  h.proc.Pid,
-		Name: h.name,
+		Pid:   h.proc.Pid,
+		Image: h.image,
 	}
 	data, err := json.Marshal(pid)
 	if err != nil {
--- a/client/driver/rkt_test.go
+++ b/client/driver/rkt_test.go
@@ -2,26 +2,44 @@ package driver

 import (
 	"fmt"
+	"io/ioutil"
 	"os"
+	"path/filepath"
 	"testing"
 	"time"

+	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/nomad/structs"

 	ctestutils "github.com/hashicorp/nomad/client/testutil"
 )

+func TestRktVersionRegex(t *testing.T) {
+	input_rkt := "rkt version 0.8.1"
+	input_appc := "appc version 1.2.0"
+	expected_rkt := "0.8.1"
+	expected_appc := "1.2.0"
+	rktMatches := reRktVersion.FindStringSubmatch(input_rkt)
+	appcMatches := reAppcVersion.FindStringSubmatch(input_appc)
+	if rktMatches[1] != expected_rkt {
+		fmt.Printf("Test failed; got %q; want %q\n", rktMatches[1], expected_rkt)
+	}
+	if appcMatches[1] != expected_appc {
+		fmt.Printf("Test failed; got %q; want %q\n", appcMatches[1], expected_appc)
+	}
+}
+
 func TestRktDriver_Handle(t *testing.T) {
 	h := &rktHandle{
 		proc:   &os.Process{Pid: 123},
-		name:   "foo",
+		image:  "foo",
 		doneCh: make(chan struct{}),
 		waitCh: make(chan error, 1),
 	}

 	actual := h.ID()
-	expected := `Rkt:{"Pid":123,"Name":"foo"}`
+	expected := `Rkt:{"Pid":123,"Image":"foo"}`
 	if actual != expected {
 		t.Errorf("Expected `%s`, found `%s`", expected, actual)
 	}
@@ -41,7 +59,7 @@ func TestRktDriver_Fingerprint(t *testing.T) {
 	if !apply {
 		t.Fatalf("should apply")
 	}
-	if node.Attributes["driver.rkt"] == "" {
+	if node.Attributes["driver.rkt"] != "1" {
 		t.Fatalf("Missing Rkt driver")
 	}
 	if node.Attributes["driver.rkt.version"] == "" {
@@ -59,8 +77,8 @@ func TestRktDriver_Start(t *testing.T) {
 		Name: "etcd",
 		Config: map[string]string{
 			"trust_prefix": "coreos.com/etcd",
-			"name":         "coreos.com/etcd:v2.0.4",
-			"exec":         "/etcd --version",
+			"image":        "coreos.com/etcd:v2.0.4",
+			"command":      "/etcd",
 		},
 	}

@@ -98,8 +116,9 @@ func TestRktDriver_Start_Wait(t *testing.T) {
 		Name: "etcd",
 		Config: map[string]string{
 			"trust_prefix": "coreos.com/etcd",
-			"name":         "coreos.com/etcd:v2.0.4",
-			"exec":         "/etcd --version",
+			"image":        "coreos.com/etcd:v2.0.4",
+			"command":      "/etcd",
+			"args":         "--version",
 		},
 	}

@@ -132,3 +151,94 @@ func TestRktDriver_Start_Wait(t *testing.T) {
 		t.Fatalf("timeout")
 	}
 }
+
+func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
+	ctestutils.RktCompatible(t)
+	task := &structs.Task{
+		Name: "etcd",
+		Config: map[string]string{
+			"image":   "coreos.com/etcd:v2.0.4",
+			"command": "/etcd",
+			"args":    "--version",
+		},
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	d := NewRktDriver(driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+	defer handle.Kill()
+
+	// Update should be a no-op
+	err = handle.Update(task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+}
+
+func TestRktDriver_Start_Wait_Logs(t *testing.T) {
+	ctestutils.RktCompatible(t)
+	task := &structs.Task{
+		Name: "etcd",
+		Config: map[string]string{
+			"trust_prefix": "coreos.com/etcd",
+			"image":        "coreos.com/etcd:v2.0.4",
+			"command":      "/etcd",
+			"args":         "--version",
+		},
+	}
+
+	driverCtx := testDriverContext(task.Name)
+	ctx := testDriverExecContext(task, driverCtx)
+	d := NewRktDriver(driverCtx)
+	defer ctx.AllocDir.Destroy()
+
+	handle, err := d.Start(ctx, task)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if handle == nil {
+		t.Fatalf("missing handle")
+	}
+	defer handle.Kill()
+
+	select {
+	case err := <-handle.WaitCh():
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatalf("timeout")
+	}
+
+	taskDir, ok := ctx.AllocDir.TaskDirs[task.Name]
+	if !ok {
+		t.Fatalf("Could not find task directory for task: %v", task)
+	}
+	stdout := filepath.Join(taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", task.Name))
+	data, err := ioutil.ReadFile(stdout)
+	if err != nil {
+		t.Fatalf("Failed to read tasks stdout: %v", err)
+	}
+
+	if len(data) == 0 {
+		t.Fatal("Task's stdout is empty")
+	}
+}
--- a/client/executor/exec_linux.go
+++ b/client/executor/exec_linux.go
@@ -112,7 +112,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	// Mount dev
 	dev := filepath.Join(taskDir, "dev")
 	if err := os.Mkdir(dev, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", dev)
+		return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
 	}

 	if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
@@ -122,7 +122,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 	// Mount proc
 	proc := filepath.Join(taskDir, "proc")
 	if err := os.Mkdir(proc, 0777); err != nil {
-		return fmt.Errorf("Mkdir(%v) failed: %v", proc)
+		return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
 	}

 	if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
@@ -135,6 +135,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
 		return err
 	}
 	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
+	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
 	e.Cmd.Env = env.List()

 	e.alloc = alloc
@@ -195,7 +196,11 @@ func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
 		e.groups.MemorySwap = int64(-1)
 	}

-	if resources.CPU > 0.0 {
+	if resources.CPU != 0 {
+		if resources.CPU < 2 {
+			return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
+		}
+
 		// Set the relative CPU shares for this cgroup.
 		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
 		// given process will have at least that amount of resources, but likely
@@ -261,6 +266,14 @@ func (e *LinuxExecutor) Start() error {
 		return err
 	}

+	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
+	if err != nil {
+		return err
+	} else if len(parsedPath) != 1 {
+		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
+	}
+	e.cmd.Path = parsedPath[0]
+
 	combined := strings.Join(e.Cmd.Args, " ")
 	parsed, err := args.ParseAndReplace(combined, envVars.Map())
 	if err != nil {
@@ -542,6 +555,11 @@ func (e *LinuxExecutor) destroyCgroup() error {
 			multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
 			continue
 		}
+
+		if _, err := process.Wait(); err != nil {
+			multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err))
+			continue
+		}
 	}

 	// Remove the cgroup.
--- a/client/executor/exec_universal.go
+++ b/client/executor/exec_universal.go
@@ -6,8 +6,11 @@ import (
 	"fmt"
 	"os"
 	"strconv"
+	"strings"

 	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/client/driver/args"
+	"github.com/hashicorp/nomad/client/driver/environment"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

@@ -29,11 +32,37 @@ func (e *UniversalExecutor) Limit(resources *structs.Resources) error {
 }

 func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
-	// No-op
+	taskDir, ok := alloc.TaskDirs[taskName]
+	if !ok {
+		return fmt.Errorf("Error finding task dir for (%s)", taskName)
+	}
+	e.Dir = taskDir
 	return nil
 }

 func (e *UniversalExecutor) Start() error {
+	// Parse the commands arguments and replace instances of Nomad environment
+	// variables.
+	envVars, err := environment.ParseFromList(e.cmd.Env)
+	if err != nil {
+		return err
+	}
+
+	parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
+	if err != nil {
+		return err
+	} else if len(parsedPath) != 1 {
+		return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
+	}
+
+	e.cmd.Path = parsedPath[0]
+	combined := strings.Join(e.cmd.Args, " ")
+	parsed, err := args.ParseAndReplace(combined, envVars.Map())
+	if err != nil {
+		return err
+	}
+	e.Cmd.Args = parsed
+
 	// We don't want to call ourself. We want to call Start on our embedded Cmd
 	return e.cmd.Start()
 }
--- a/client/fingerprint/env_aws.go
+++ b/client/fingerprint/env_aws.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 	"time"

+	"github.com/hashicorp/go-cleanhttp"
 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
@@ -62,12 +63,12 @@ var ec2InstanceSpeedMap = map[string]int{
 	"d2.8xlarge":  10000,
 }

-// EnvAWSFingerprint is used to fingerprint the CPU
+// EnvAWSFingerprint is used to fingerprint AWS metadata
 type EnvAWSFingerprint struct {
 	logger *log.Logger
 }

-// NewEnvAWSFingerprint is used to create a CPU fingerprint
+// NewEnvAWSFingerprint is used to create a fingerprint from AWS metadata
 func NewEnvAWSFingerprint(logger *log.Logger) Fingerprint {
 	f := &EnvAWSFingerprint{logger: logger}
 	return f
@@ -93,7 +94,8 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node)

 	// assume 2 seconds is enough time for inside AWS network
 	client := &http.Client{
-		Timeout: 2 * time.Second,
+		Timeout:   2 * time.Second,
+		Transport: cleanhttp.DefaultTransport(),
 	}

 	keys := []string{
@@ -164,7 +166,8 @@ func isAWS() bool {

 	// assume 2 seconds is enough time for inside AWS network
 	client := &http.Client{
-		Timeout: 2 * time.Second,
+		Timeout:   2 * time.Second,
+		Transport: cleanhttp.DefaultTransport(),
 	}

 	// Query the metadata url for the ami-id, to veryify we're on AWS
@@ -207,7 +210,8 @@ func (f *EnvAWSFingerprint) linkSpeed() int {

 	// assume 2 seconds is enough time for inside AWS network
 	client := &http.Client{
-		Timeout: 2 * time.Second,
+		Timeout:   2 * time.Second,
+		Transport: cleanhttp.DefaultTransport(),
 	}

 	res, err := client.Get(metadataURL + "instance-type")
--- a/client/fingerprint/env_gce.go
+++ b/client/fingerprint/env_gce.go
@@ -0,0 +1,231 @@
+package fingerprint
+
+import (
+	"encoding/json"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/hashicorp/go-cleanhttp"
+	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// This is where the GCE metadata server normally resides. We hardcode the
+// "instance" path as well since it's the only one we access here.
+const DEFAULT_GCE_URL = "http://169.254.169.254/computeMetadata/v1/instance/"
+
+type GCEMetadataNetworkInterface struct {
+	AccessConfigs []struct {
+		ExternalIp string
+		Type       string
+	}
+	ForwardedIps []string
+	Ip           string
+	Network      string
+}
+
+type ReqError struct {
+	StatusCode int
+}
+
+func (e ReqError) Error() string {
+	return http.StatusText(e.StatusCode)
+}
+
+func lastToken(s string) string {
+	index := strings.LastIndex(s, "/")
+	return s[index+1:]
+}
+
+// EnvGCEFingerprint is used to fingerprint GCE metadata
+type EnvGCEFingerprint struct {
+	client      *http.Client
+	logger      *log.Logger
+	metadataURL string
+}
+
+// NewEnvGCEFingerprint is used to create a fingerprint from GCE metadata
+func NewEnvGCEFingerprint(logger *log.Logger) Fingerprint {
+	// Read the internal metadata URL from the environment, allowing test files to
+	// provide their own
+	metadataURL := os.Getenv("GCE_ENV_URL")
+	if metadataURL == "" {
+		metadataURL = DEFAULT_GCE_URL
+	}
+
+	// assume 2 seconds is enough time for inside GCE network
+	client := &http.Client{
+		Timeout:   2 * time.Second,
+		Transport: cleanhttp.DefaultTransport(),
+	}
+
+	return &EnvGCEFingerprint{
+		client:      client,
+		logger:      logger,
+		metadataURL: metadataURL,
+	}
+}
+
+func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error) {
+	reqUrl := f.metadataURL + attribute
+	if recursive {
+		reqUrl = reqUrl + "?recursive=true"
+	}
+
+	parsedUrl, err := url.Parse(reqUrl)
+	if err != nil {
+		return "", err
+	}
+
+	req := &http.Request{
+		Method: "GET",
+		URL:    parsedUrl,
+		Header: http.Header{
+			"Metadata-Flavor": []string{"Google"},
+		},
+	}
+
+	res, err := f.client.Do(req)
+	if err != nil {
+		return "", err
+	}
+
+	resp, err := ioutil.ReadAll(res.Body)
+	res.Body.Close()
+	if err != nil {
+		f.logger.Printf("[ERR]: fingerprint.env_gce: Error reading response body for GCE %s", attribute)
+		return "", err
+	}
+
+	if res.StatusCode >= 400 {
+		return "", ReqError{res.StatusCode}
+	}
+
+	return string(resp), nil
+}
+
+func checkError(err error, logger *log.Logger, desc string) error {
+	// If it's a URL error, assume we're not actually in an GCE environment.
+	// To the outer layers, this isn't an error so return nil.
+	if _, ok := err.(*url.Error); ok {
+		logger.Printf("[ERR] fingerprint.env_gce: Error querying GCE " + desc + ", skipping")
+		return nil
+	}
+	// Otherwise pass the error through.
+	return err
+}
+
+func (f *EnvGCEFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
+	if !f.isGCE() {
+		return false, nil
+	}
+
+	if node.Links == nil {
+		node.Links = make(map[string]string)
+	}
+
+	keys := []string{
+		"hostname",
+		"id",
+		"cpu-platform",
+		"scheduling/automatic-restart",
+		"scheduling/on-host-maintenance",
+	}
+	for _, k := range keys {
+		value, err := f.Get(k, false)
+		if err != nil {
+			return false, checkError(err, f.logger, k)
+		}
+
+		// assume we want blank entries
+		key := strings.Replace(k, "/", ".", -1)
+		node.Attributes["platform.gce."+key] = strings.Trim(string(value), "\n")
+	}
+
+	// These keys need everything before the final slash removed to be usable.
+	keys = []string{
+		"machine-type",
+		"zone",
+	}
+	for _, k := range keys {
+		value, err := f.Get(k, false)
+		if err != nil {
+			return false, checkError(err, f.logger, k)
+		}
+
+		node.Attributes["platform.gce."+k] = strings.Trim(lastToken(value), "\n")
+	}
+
+	// Get internal and external IPs (if they exist)
+	value, err := f.Get("network-interfaces/", true)
+	var interfaces []GCEMetadataNetworkInterface
+	if err := json.Unmarshal([]byte(value), &interfaces); err != nil {
+		f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding network interface information: %s", err.Error())
+	}
+
+	for _, intf := range interfaces {
+		prefix := "platform.gce.network." + lastToken(intf.Network)
+		node.Attributes[prefix] = "true"
+		node.Attributes[prefix+".ip"] = strings.Trim(intf.Ip, "\n")
+		for index, accessConfig := range intf.AccessConfigs {
+			node.Attributes[prefix+".external-ip."+strconv.Itoa(index)] = accessConfig.ExternalIp
+		}
+	}
+
+	var tagList []string
+	value, err = f.Get("tags", false)
+	if err != nil {
+		return false, checkError(err, f.logger, "tags")
+	}
+	if err := json.Unmarshal([]byte(value), &tagList); err != nil {
+		f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance tags: %s", err.Error())
+	}
+	for _, tag := range tagList {
+		node.Attributes["platform.gce.tag."+tag] = "true"
+	}
+
+	var attrDict map[string]string
+	value, err = f.Get("attributes/", true)
+	if err != nil {
+		return false, checkError(err, f.logger, "attributes/")
+	}
+	if err := json.Unmarshal([]byte(value), &attrDict); err != nil {
+		f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance attributes: %s", err.Error())
+	}
+	for k, v := range attrDict {
+		node.Attributes["platform.gce.attr."+k] = strings.Trim(v, "\n")
+	}
+
+	// populate Links
+	node.Links["gce"] = node.Attributes["platform.gce.id"]
+
+	return true, nil
+}
+
+func (f *EnvGCEFingerprint) isGCE() bool {
+	// TODO: better way to detect GCE?
+
+	// Query the metadata url for the machine type, to verify we're on GCE
+	machineType, err := f.Get("machine-type", false)
+	if err != nil {
+		if re, ok := err.(ReqError); !ok || re.StatusCode != 404 {
+			// If it wasn't a 404 error, print an error message.
+			f.logger.Printf("[ERR] fingerprint.env_gce: Error querying GCE Metadata URL, skipping")
+		}
+		return false
+	}
+
+	match, err := regexp.MatchString("projects/.+/machineTypes/.+", machineType)
+	if !match {
+		return false
+	}
+
+	return true
+}
--- a/client/fingerprint/env_gce_test.go
+++ b/client/fingerprint/env_gce_test.go
@@ -0,0 +1,193 @@
+package fingerprint
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"testing"
+
+	"github.com/hashicorp/nomad/client/config"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+func TestGCEFingerprint_nonGCE(t *testing.T) {
+	os.Setenv("GCE_ENV_URL", "http://127.0.0.1/computeMetadata/v1/instance/")
+	f := NewEnvGCEFingerprint(testLogger())
+	node := &structs.Node{
+		Attributes: make(map[string]string),
+	}
+
+	ok, err := f.Fingerprint(&config.Config{}, node)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if ok {
+		t.Fatalf("Should be false without test server")
+	}
+}
+
+func testFingerprint_GCE(t *testing.T, withExternalIp bool) {
+	node := &structs.Node{
+		Attributes: make(map[string]string),
+	}
+
+	// configure mock server with fixture routes, data
+	routes := routes{}
+	if err := json.Unmarshal([]byte(GCE_routes), &routes); err != nil {
+		t.Fatalf("Failed to unmarshal JSON in GCE ENV test: %s", err)
+	}
+	networkEndpoint := &endpoint{
+		Uri:         "/computeMetadata/v1/instance/network-interfaces/?recursive=true",
+		ContentType: "application/json",
+	}
+	if withExternalIp {
+		networkEndpoint.Body = `[{"accessConfigs":[{"externalIp":"104.44.55.66","type":"ONE_TO_ONE_NAT"},{"externalIp":"104.44.55.67","type":"ONE_TO_ONE_NAT"}],"forwardedIps":[],"ip":"10.240.0.5","network":"projects/555555/networks/default"}]`
+	} else {
+		networkEndpoint.Body = `[{"accessConfigs":[],"forwardedIps":[],"ip":"10.240.0.5","network":"projects/555555/networks/default"}]`
+	}
+	routes.Endpoints = append(routes.Endpoints, networkEndpoint)
+
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		value, ok := r.Header["Metadata-Flavor"]
+		if !ok {
+			t.Fatal("Metadata-Flavor not present in HTTP request header")
+		}
+		if value[0] != "Google" {
+			t.Fatalf("Expected Metadata-Flavor Google, saw %s", value[0])
+		}
+
+		found := false
+		for _, e := range routes.Endpoints {
+			if r.RequestURI == e.Uri {
+				w.Header().Set("Content-Type", e.ContentType)
+				fmt.Fprintln(w, e.Body)
+			}
+			found = true
+		}
+
+		if !found {
+			w.WriteHeader(404)
+		}
+	}))
+	defer ts.Close()
+	os.Setenv("GCE_ENV_URL", ts.URL+"/computeMetadata/v1/instance/")
+	f := NewEnvGCEFingerprint(testLogger())
+
+	ok, err := f.Fingerprint(&config.Config{}, node)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if !ok {
+		t.Fatalf("should apply")
+	}
+
+	keys := []string{
+		"platform.gce.id",
+		"platform.gce.hostname",
+		"platform.gce.zone",
+		"platform.gce.machine-type",
+		"platform.gce.zone",
+		"platform.gce.tag.abc",
+		"platform.gce.tag.def",
+		"platform.gce.attr.ghi",
+		"platform.gce.attr.jkl",
+	}
+
+	for _, k := range keys {
+		assertNodeAttributeContains(t, node, k)
+	}
+
+	if len(node.Links) == 0 {
+		t.Fatalf("Empty links for Node in GCE Fingerprint test")
+	}
+
+	// Make sure Links contains the GCE ID.
+	for _, k := range []string{"gce"} {
+		assertNodeLinksContains(t, node, k)
+	}
+
+	assertNodeAttributeEquals(t, node, "platform.gce.id", "12345")
+	assertNodeAttributeEquals(t, node, "platform.gce.hostname", "instance-1.c.project.internal")
+	assertNodeAttributeEquals(t, node, "platform.gce.zone", "us-central1-f")
+	assertNodeAttributeEquals(t, node, "platform.gce.machine-type", "n1-standard-1")
+	assertNodeAttributeEquals(t, node, "platform.gce.network.default", "true")
+	assertNodeAttributeEquals(t, node, "platform.gce.network.default.ip", "10.240.0.5")
+	if withExternalIp {
+		assertNodeAttributeEquals(t, node, "platform.gce.network.default.external-ip.0", "104.44.55.66")
+		assertNodeAttributeEquals(t, node, "platform.gce.network.default.external-ip.1", "104.44.55.67")
+	} else if _, ok := node.Attributes["platform.gce.network.default.external-ip.0"]; ok {
+		t.Fatal("platform.gce.network.default.external-ip is set without an external IP")
+	}
+
+	assertNodeAttributeEquals(t, node, "platform.gce.scheduling.automatic-restart", "TRUE")
+	assertNodeAttributeEquals(t, node, "platform.gce.scheduling.on-host-maintenance", "MIGRATE")
+	assertNodeAttributeEquals(t, node, "platform.gce.cpu-platform", "Intel Ivy Bridge")
+	assertNodeAttributeEquals(t, node, "platform.gce.tag.abc", "true")
+	assertNodeAttributeEquals(t, node, "platform.gce.tag.def", "true")
+	assertNodeAttributeEquals(t, node, "platform.gce.attr.ghi", "111")
+	assertNodeAttributeEquals(t, node, "platform.gce.attr.jkl", "222")
+}
+
+const GCE_routes = `
+{
+  "endpoints": [
+    {
+      "uri": "/computeMetadata/v1/instance/id",
+      "content-type": "text/plain",
+      "body": "12345"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/hostname",
+      "content-type": "text/plain",
+      "body": "instance-1.c.project.internal"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/zone",
+      "content-type": "text/plain",
+      "body": "projects/555555/zones/us-central1-f"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/machine-type",
+      "content-type": "text/plain",
+      "body": "projects/555555/machineTypes/n1-standard-1"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/tags",
+      "content-type": "application/json",
+      "body": "[\"abc\", \"def\"]"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/attributes/?recursive=true",
+      "content-type": "application/json",
+      "body": "{\"ghi\":\"111\",\"jkl\":\"222\"}"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/scheduling/automatic-restart",
+      "content-type": "text/plain",
+      "body": "TRUE"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/scheduling/on-host-maintenance",
+      "content-type": "text/plain",
+      "body": "MIGRATE"
+    },
+    {
+      "uri": "/computeMetadata/v1/instance/cpu-platform",
+      "content-type": "text/plain",
+      "body": "Intel Ivy Bridge"
+    }
+  ]
+}
+`
+
+func TestFingerprint_GCEWithExternalIp(t *testing.T) {
+	testFingerprint_GCE(t, true)
+}
+
+func TestFingerprint_GCEWithoutExternalIp(t *testing.T) {
+	testFingerprint_GCE(t, false)
+}
--- a/client/fingerprint/fingerprint.go
+++ b/client/fingerprint/fingerprint.go
@@ -18,6 +18,7 @@ var BuiltinFingerprints = []string{
 	"storage",
 	"network",
 	"env_aws",
+	"env_gce",
 }

 // builtinFingerprintMap contains the built in registered fingerprints
@@ -30,6 +31,7 @@ var builtinFingerprintMap = map[string]Factory{
 	"storage": NewStorageFingerprint,
 	"network": NewNetworkFingerprinter,
 	"env_aws": NewEnvAWSFingerprint,
+	"env_gce": NewEnvGCEFingerprint,
 }

 // NewFingerprint is used to instantiate and return a new fingerprint
--- a/client/fingerprint/host.go
+++ b/client/fingerprint/host.go
@@ -5,6 +5,7 @@ import (
 	"log"
 	"os/exec"
 	"runtime"
+	"strings"

 	"github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -40,7 +41,7 @@ func (f *HostFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (b
 		if err != nil {
 			return false, fmt.Errorf("Failed to run uname: %s", err)
 		}
-		node.Attributes["kernel.version"] = string(out)
+		node.Attributes["kernel.version"] = strings.Trim(string(out), "\n")
 	}

 	node.Attributes["hostname"] = hostInfo.Hostname
--- a/client/fingerprint/network_unix.go
+++ b/client/fingerprint/network_unix.go
@@ -33,25 +33,36 @@ func NewNetworkFingerprinter(logger *log.Logger) Fingerprint {
 func (f *NetworkFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
 	// newNetwork is populated and addded to the Nodes resources
 	newNetwork := &structs.NetworkResource{}
+	defaultDevice := ""

-	// eth0 is the default device for Linux, and en0 is default for OS X
-	defaultDevice := "eth0"
-	if "darwin" == runtime.GOOS {
-		defaultDevice = "en0"
-	}
-	// User-defined override for the default interface
+	// 1. Use user-defined network device
+	// 2. Use first interface found in the system for non-dev mode. (dev mode uses lo by default.)
 	if cfg.NetworkInterface != "" {
 		defaultDevice = cfg.NetworkInterface
+	} else {
+
+		intfs, err := net.Interfaces()
+		if err != nil {
+			return false, err
+		}
+
+		for _, i := range intfs {
+			if (i.Flags&net.FlagUp != 0) && (i.Flags&(net.FlagLoopback|net.FlagPointToPoint) == 0) {
+				if ip := f.ipAddress(i.Name); ip != "" {
+					defaultDevice = i.Name
+					node.Attributes["network.ip-address"] = ip
+					newNetwork.IP = ip
+					newNetwork.CIDR = newNetwork.IP + "/32"
+					break
+				}
+			}
+		}
 	}

-	newNetwork.Device = defaultDevice
-
-	if ip := f.ipAddress(defaultDevice); ip != "" {
-		node.Attributes["network.ip-address"] = ip
-		newNetwork.IP = ip
-		newNetwork.CIDR = newNetwork.IP + "/32"
+	if defaultDevice != "" {
+		newNetwork.Device = defaultDevice
 	} else {
-		return false, fmt.Errorf("Unable to determine IP on network interface %v", defaultDevice)
+		return false, fmt.Errorf("Unable to find any network interface which has IP address")
 	}

 	if throughput := f.linkSpeed(defaultDevice); throughput > 0 {
--- a/client/testutil/driver_compatible.go
+++ b/client/testutil/driver_compatible.go
@@ -14,8 +14,13 @@ func ExecCompatible(t *testing.T) {
 }

 func QemuCompatible(t *testing.T) {
-	if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
-		t.Skip("Must be root on non-windows environments to run test")
+	if runtime.GOOS == "windows" {
+		t.Skip("Must be on non-windows environments to run test")
+	}
+	// else see if qemu exists
+	_, err := exec.Command("qemu-system-x86_64", "-version").CombinedOutput()
+	if err != nil {
+		t.Skip("Must have Qemu installed for Qemu specific tests to run")
 	}
 }

--- a/command/agent/config.go
+++ b/command/agent/config.go
@@ -220,6 +220,7 @@ func DevConfig() *Config {
 	conf.DevMode = true
 	conf.EnableDebug = true
 	conf.DisableAnonymousSignature = true
+	conf.Client.NetworkInterface = "lo"
 	return conf
 }

--- a/command/node_status.go
+++ b/command/node_status.go
@@ -2,6 +2,7 @@ package command

 import (
 	"fmt"
+	"sort"
 	"strings"
 )

@@ -103,6 +104,20 @@ func (c *NodeStatusCommand) Run(args []string) int {
 		return 1
 	}

+	m := node.Attributes
+	keys := make([]string, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	var attributes []string
+	for _, k := range keys {
+		if k != "" {
+			attributes = append(attributes, fmt.Sprintf("%s:%s", k, m[k]))
+		}
+	}
+
 	// Format the output
 	basic := []string{
 		fmt.Sprintf("ID|%s", node.ID),
@@ -111,6 +126,7 @@ func (c *NodeStatusCommand) Run(args []string) int {
 		fmt.Sprintf("Datacenter|%s", node.Datacenter),
 		fmt.Sprintf("Drain|%v", node.Drain),
 		fmt.Sprintf("Status|%s", node.Status),
+		fmt.Sprintf("Attributes|%s", strings.Join(attributes, ", ")),
 	}

 	var allocs []string
--- a/command/server_members.go
+++ b/command/server_members.go
@@ -2,6 +2,7 @@ package command

 import (
 	"fmt"
+	"sort"
 	"strings"

 	"github.com/hashicorp/nomad/api"
@@ -68,6 +69,9 @@ func (c *ServerMembersCommand) Run(args []string) int {
 		return 1
 	}

+	// Sort the members
+	sort.Sort(api.AgentMembersNameSort(mem))
+
 	// Format the list
 	var out []string
 	if detailed {
--- a/demo/vagrant/Vagrantfile
+++ b/demo/vagrant/Vagrantfile
@@ -4,7 +4,7 @@
 $script = <<SCRIPT
 # Update apt and get dependencies
 sudo apt-get update
-sudo apt-get install -y unzip curl wget
+sudo apt-get install -y unzip curl wget vim

 # Download Nomad
 echo Fetching Nomad...
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -7,6 +7,7 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"strconv"
 	"strings"
 	"time"

@@ -242,6 +243,34 @@ func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error {
 			m["hard"] = true
 		}

+		// If "version" is provided, set the operand
+		// to "version" and the value to the "RTarget"
+		if constraint, ok := m[structs.ConstraintVersion]; ok {
+			m["Operand"] = structs.ConstraintVersion
+			m["RTarget"] = constraint
+		}
+
+		// If "regexp" is provided, set the operand
+		// to "regexp" and the value to the "RTarget"
+		if constraint, ok := m[structs.ConstraintRegex]; ok {
+			m["Operand"] = structs.ConstraintRegex
+			m["RTarget"] = constraint
+		}
+
+		if value, ok := m[structs.ConstraintDistinctHosts]; ok {
+			enabled, err := strconv.ParseBool(value.(string))
+			if err != nil {
+				return err
+			}
+
+			// If it is not enabled, skip the constraint.
+			if !enabled {
+				continue
+			}
+
+			m["Operand"] = structs.ConstraintDistinctHosts
+		}
+
 		// Build the constraint
 		var c structs.Constraint
 		if err := mapstructure.WeakDecode(m, &c); err != nil {
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -152,6 +152,64 @@ func TestParse(t *testing.T) {
 			false,
 		},

+		{
+			"version-constraint.hcl",
+			&structs.Job{
+				ID:       "foo",
+				Name:     "foo",
+				Priority: 50,
+				Region:   "global",
+				Type:     "service",
+				Constraints: []*structs.Constraint{
+					&structs.Constraint{
+						Hard:    true,
+						LTarget: "$attr.kernel.version",
+						RTarget: "~> 3.2",
+						Operand: structs.ConstraintVersion,
+					},
+				},
+			},
+			false,
+		},
+
+		{
+			"regexp-constraint.hcl",
+			&structs.Job{
+				ID:       "foo",
+				Name:     "foo",
+				Priority: 50,
+				Region:   "global",
+				Type:     "service",
+				Constraints: []*structs.Constraint{
+					&structs.Constraint{
+						Hard:    true,
+						LTarget: "$attr.kernel.version",
+						RTarget: "[0-9.]+",
+						Operand: structs.ConstraintRegex,
+					},
+				},
+			},
+			false,
+		},
+
+		{
+			"distinctHosts-constraint.hcl",
+			&structs.Job{
+				ID:       "foo",
+				Name:     "foo",
+				Priority: 50,
+				Region:   "global",
+				Type:     "service",
+				Constraints: []*structs.Constraint{
+					&structs.Constraint{
+						Hard:    true,
+						Operand: structs.ConstraintDistinctHosts,
+					},
+				},
+			},
+			false,
+		},
+
 		{
 			"specify-job.hcl",
 			&structs.Job{
--- a/jobspec/test-fixtures/distinctHosts-constraint.hcl
+++ b/jobspec/test-fixtures/distinctHosts-constraint.hcl
@@ -0,0 +1,5 @@
+job "foo" {
+    constraint {
+        distinct_hosts = "true"
+    }
+}
--- a/jobspec/test-fixtures/regexp-constraint.hcl
+++ b/jobspec/test-fixtures/regexp-constraint.hcl
@@ -0,0 +1,6 @@
+job "foo" {
+    constraint {
+        attribute = "$attr.kernel.version"
+        regexp = "[0-9.]+"
+    }
+}
--- a/jobspec/test-fixtures/version-constraint.hcl
+++ b/jobspec/test-fixtures/version-constraint.hcl
@@ -0,0 +1,6 @@
+job "foo" {
+    constraint {
+        attribute = "$attr.kernel.version"
+        version = "~> 3.2"
+    }
+}
--- a/nomad/eval_broker.go
+++ b/nomad/eval_broker.go
@@ -2,6 +2,7 @@ package nomad

 import (
 	"container/heap"
+	"errors"
 	"fmt"
 	"math/rand"
 	"sync"
@@ -18,6 +19,17 @@ const (
 	failedQueue = "_failed"
 )

+var (
+	// ErrNotOutstanding is returned if an evaluation is not outstanding
+	ErrNotOutstanding = errors.New("evaluation is not outstanding")
+
+	// ErrTokenMismatch is the outstanding eval has a different token
+	ErrTokenMismatch = errors.New("evaluation token does not match")
+
+	// ErrNackTimeoutReached is returned if an expired evaluation is reset
+	ErrNackTimeoutReached = errors.New("evaluation nack timeout reached")
+)
+
 // EvalBroker is used to manage brokering of evaluations. When an evaluation is
 // created, due to a change in a job specification or a node, we put it into the
 // broker. The broker sorts by evaluations by priority and scheduler type. This
@@ -381,6 +393,24 @@ func (b *EvalBroker) Outstanding(evalID string) (string, bool) {
 	return unack.Token, true
 }

+// OutstandingReset resets the Nack timer for the EvalID if the
+// token matches and the eval is outstanding
+func (b *EvalBroker) OutstandingReset(evalID, token string) error {
+	b.l.RLock()
+	defer b.l.RUnlock()
+	unack, ok := b.unack[evalID]
+	if !ok {
+		return ErrNotOutstanding
+	}
+	if unack.Token != token {
+		return ErrTokenMismatch
+	}
+	if !unack.NackTimer.Reset(b.nackTimeout) {
+		return ErrNackTimeoutReached
+	}
+	return nil
+}
+
 // Ack is used to positively acknowledge handling an evaluation
 func (b *EvalBroker) Ack(evalID, token string) error {
 	b.l.Lock()
--- a/nomad/eval_broker_test.go
+++ b/nomad/eval_broker_test.go
@@ -89,6 +89,20 @@ func TestEvalBroker_Enqueue_Dequeue_Nack_Ack(t *testing.T) {
 		t.Fatalf("Bad: %#v %#v", token, tokenOut)
 	}

+	// OutstandingReset should verify the token
+	err = b.OutstandingReset("nope", "foo")
+	if err != ErrNotOutstanding {
+		t.Fatalf("err: %v", err)
+	}
+	err = b.OutstandingReset(out.ID, "foo")
+	if err != ErrTokenMismatch {
+		t.Fatalf("err: %v", err)
+	}
+	err = b.OutstandingReset(out.ID, tokenOut)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
 	// Check the stats
 	stats = b.Stats()
 	if stats.TotalReady != 0 {
@@ -560,6 +574,50 @@ func TestEvalBroker_Nack_Timeout(t *testing.T) {
 	}
 }

+// Ensure we nack in a timely manner
+func TestEvalBroker_Nack_TimeoutReset(t *testing.T) {
+	b := testBroker(t, 5*time.Millisecond)
+	b.SetEnabled(true)
+
+	// Enqueue
+	eval := mock.Eval()
+	err := b.Enqueue(eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Dequeue
+	out, token, err := b.Dequeue(defaultSched, time.Second)
+	start := time.Now()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if out != eval {
+		t.Fatalf("bad: %v", out)
+	}
+
+	// Reset in 2 milliseconds
+	time.Sleep(2 * time.Millisecond)
+	if err := b.OutstandingReset(out.ID, token); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Dequeue, should block on Nack timer
+	out, _, err = b.Dequeue(defaultSched, time.Second)
+	end := time.Now()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if out != eval {
+		t.Fatalf("bad: %v", out)
+	}
+
+	// Check the nack timer
+	if diff := end.Sub(start); diff < 7*time.Millisecond {
+		t.Fatalf("bad: %#v", diff)
+	}
+}
+
 func TestEvalBroker_DeliveryLimit(t *testing.T) {
 	b := testBroker(t, 0)
 	b.SetEnabled(true)
--- a/nomad/eval_endpoint.go
+++ b/nomad/eval_endpoint.go
@@ -134,12 +134,8 @@ func (e *Eval) Update(args *structs.EvalUpdateRequest,
 	eval := args.Evals[0]

 	// Verify the evaluation is outstanding, and that the tokens match.
-	token, ok := e.srv.evalBroker.Outstanding(eval.ID)
-	if !ok {
-		return fmt.Errorf("evaluation is not outstanding")
-	}
-	if args.EvalToken != token {
-		return fmt.Errorf("evaluation token does not match")
+	if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
+		return err
 	}

 	// Update via Raft
@@ -168,12 +164,8 @@ func (e *Eval) Create(args *structs.EvalUpdateRequest,
 	eval := args.Evals[0]

 	// Verify the parent evaluation is outstanding, and that the tokens match.
-	token, ok := e.srv.evalBroker.Outstanding(eval.PreviousEval)
-	if !ok {
-		return fmt.Errorf("previous evaluation is not outstanding")
-	}
-	if args.EvalToken != token {
-		return fmt.Errorf("previous evaluation token does not match")
+	if err := e.srv.evalBroker.OutstandingReset(eval.PreviousEval, args.EvalToken); err != nil {
+		return err
 	}

 	// Look for the eval
--- a/nomad/eval_endpoint_test.go
+++ b/nomad/eval_endpoint_test.go
@@ -224,7 +224,9 @@ func TestEvalEndpoint_Update(t *testing.T) {
 }

 func TestEvalEndpoint_Create(t *testing.T) {
-	s1 := testServer(t, nil)
+	s1 := testServer(t, func(c *Config) {
+		c.NumSchedulers = 0 // Prevent automatic dequeue
+	})
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)

--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -11,7 +11,9 @@ import (
 )

 func TestJobEndpoint_Register(t *testing.T) {
-	s1 := testServer(t, nil)
+	s1 := testServer(t, func(c *Config) {
+		c.NumSchedulers = 0 // Prevent automatic dequeue
+	})
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
@@ -78,7 +80,9 @@ func TestJobEndpoint_Register(t *testing.T) {
 }

 func TestJobEndpoint_Register_Existing(t *testing.T) {
-	s1 := testServer(t, nil)
+	s1 := testServer(t, func(c *Config) {
+		c.NumSchedulers = 0 // Prevent automatic dequeue
+	})
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
@@ -162,7 +166,9 @@ func TestJobEndpoint_Register_Existing(t *testing.T) {
 }

 func TestJobEndpoint_Evaluate(t *testing.T) {
-	s1 := testServer(t, nil)
+	s1 := testServer(t, func(c *Config) {
+		c.NumSchedulers = 0 // Prevent automatic dequeue
+	})
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
@@ -231,7 +237,9 @@ func TestJobEndpoint_Evaluate(t *testing.T) {
 }

 func TestJobEndpoint_Deregister(t *testing.T) {
-	s1 := testServer(t, nil)
+	s1 := testServer(t, func(c *Config) {
+		c.NumSchedulers = 0 // Prevent automatic dequeue
+	})
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
--- a/nomad/leader_test.go
+++ b/nomad/leader_test.go
@@ -228,6 +228,7 @@ func TestLeader_EvalBroker_Reset(t *testing.T) {
 	defer s3.Shutdown()
 	servers := []*Server{s1, s2, s3}
 	testJoin(t, s1, s2, s3)
+	testutil.WaitForLeader(t, s1.RPC)

 	for _, s := range servers {
 		testutil.WaitForResult(func() (bool, error) {
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -80,6 +80,9 @@ func Job() *structs.Job {
 							"command": "/bin/date",
 							"args":    "+%s",
 						},
+						Env: map[string]string{
+							"FOO": "bar",
+						},
 						Resources: &structs.Resources{
 							CPU:      500,
 							MemoryMB: 256,
@@ -109,6 +112,59 @@ func Job() *structs.Job {
 	return job
 }

+func SystemJob() *structs.Job {
+	job := &structs.Job{
+		Region:      "global",
+		ID:          structs.GenerateUUID(),
+		Name:        "my-job",
+		Type:        structs.JobTypeSystem,
+		Priority:    100,
+		AllAtOnce:   false,
+		Datacenters: []string{"dc1"},
+		Constraints: []*structs.Constraint{
+			&structs.Constraint{
+				Hard:    true,
+				LTarget: "$attr.kernel.name",
+				RTarget: "linux",
+				Operand: "=",
+			},
+		},
+		TaskGroups: []*structs.TaskGroup{
+			&structs.TaskGroup{
+				Name:  "web",
+				Count: 1,
+				Tasks: []*structs.Task{
+					&structs.Task{
+						Name:   "web",
+						Driver: "exec",
+						Config: map[string]string{
+							"command": "/bin/date",
+							"args":    "+%s",
+						},
+						Resources: &structs.Resources{
+							CPU:      500,
+							MemoryMB: 256,
+							Networks: []*structs.NetworkResource{
+								&structs.NetworkResource{
+									MBits:        50,
+									DynamicPorts: []string{"http"},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+		Meta: map[string]string{
+			"owner": "armon",
+		},
+		Status:      structs.JobStatusPending,
+		CreateIndex: 42,
+		ModifyIndex: 99,
+	}
+	return job
+}
+
 func Eval() *structs.Evaluation {
 	eval := &structs.Evaluation{
 		ID:       structs.GenerateUUID(),
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -154,7 +154,10 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct
 	}

 	// Check if we should trigger evaluations
-	if structs.ShouldDrainNode(args.Status) {
+	initToReady := node.Status == structs.NodeStatusInit && args.Status == structs.NodeStatusReady
+	terminalToReady := node.Status == structs.NodeStatusDown && args.Status == structs.NodeStatusReady
+	transitionToReady := initToReady || terminalToReady
+	if structs.ShouldDrainNode(args.Status) || transitionToReady {
 		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
 		if err != nil {
 			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
@@ -271,7 +274,7 @@ func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUp
 	return nil
 }

-// GetNode is used to request information about a specific ndoe
+// GetNode is used to request information about a specific node
 func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 	reply *structs.SingleNodeResponse) error {
 	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
@@ -312,7 +315,7 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
 	return nil
 }

-// GetAllocs is used to request allocations for a specific ndoe
+// GetAllocs is used to request allocations for a specific node
 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
 	reply *structs.NodeAllocsResponse) error {
 	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
@@ -447,8 +450,18 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6
 		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
 	}

+	sysJobsIter, err := snap.JobsByScheduler("system")
+	if err != nil {
+		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
+	}
+
+	var sysJobs []*structs.Job
+	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
+		sysJobs = append(sysJobs, job.(*structs.Job))
+	}
+
 	// Fast-path if nothing to do
-	if len(allocs) == 0 {
+	if len(allocs) == 0 && len(sysJobs) == 0 {
 		return nil, 0, nil
 	}

@@ -479,6 +492,29 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6
 		evalIDs = append(evalIDs, eval.ID)
 	}

+	// Create an evaluation for each system job.
+	for _, job := range sysJobs {
+		// Still dedup on JobID as the node may already have the system job.
+		if _, ok := jobIDs[job.ID]; ok {
+			continue
+		}
+		jobIDs[job.ID] = struct{}{}
+
+		// Create a new eval
+		eval := &structs.Evaluation{
+			ID:              structs.GenerateUUID(),
+			Priority:        job.Priority,
+			Type:            job.Type,
+			TriggeredBy:     structs.EvalTriggerNodeUpdate,
+			JobID:           job.ID,
+			NodeID:          nodeID,
+			NodeModifyIndex: nodeIndex,
+			Status:          structs.EvalStatusPending,
+		}
+		evals = append(evals, eval)
+		evalIDs = append(evalIDs, eval.ID)
+	}
+
 	// Create the Raft transaction
 	update := &structs.EvalUpdateRequest{
 		Evals:        evals,
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@@ -149,6 +149,87 @@ func TestClientEndpoint_UpdateStatus(t *testing.T) {
 	}
 }

+func TestClientEndpoint_UpdateStatus_GetEvals(t *testing.T) {
+	s1 := testServer(t, nil)
+	defer s1.Shutdown()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Register a system job.
+	job := mock.SystemJob()
+	state := s1.fsm.State()
+	if err := state.UpsertJob(1, job); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Create the register request
+	node := mock.Node()
+	node.Status = structs.NodeStatusInit
+	reg := &structs.NodeRegisterRequest{
+		Node:         node,
+		WriteRequest: structs.WriteRequest{Region: "global"},
+	}
+
+	// Fetch the response
+	var resp structs.NodeUpdateResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check for heartbeat interval
+	ttl := resp.HeartbeatTTL
+	if ttl < s1.config.MinHeartbeatTTL || ttl > 2*s1.config.MinHeartbeatTTL {
+		t.Fatalf("bad: %#v", ttl)
+	}
+
+	// Update the status
+	update := &structs.NodeUpdateStatusRequest{
+		NodeID:       node.ID,
+		Status:       structs.NodeStatusReady,
+		WriteRequest: structs.WriteRequest{Region: "global"},
+	}
+	var resp2 structs.NodeUpdateResponse
+	if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", update, &resp2); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if resp2.Index == 0 {
+		t.Fatalf("bad index: %d", resp2.Index)
+	}
+
+	// Check for an eval caused by the system job.
+	if len(resp2.EvalIDs) != 1 {
+		t.Fatalf("expected one eval; got %#v", resp2.EvalIDs)
+	}
+
+	evalID := resp2.EvalIDs[0]
+	eval, err := state.EvalByID(evalID)
+	if err != nil {
+		t.Fatalf("could not get eval %v", evalID)
+	}
+
+	if eval.Type != "system" {
+		t.Fatalf("unexpected eval type; got %v; want %q", eval.Type, "system")
+	}
+
+	// Check for heartbeat interval
+	ttl = resp2.HeartbeatTTL
+	if ttl < s1.config.MinHeartbeatTTL || ttl > 2*s1.config.MinHeartbeatTTL {
+		t.Fatalf("bad: %#v", ttl)
+	}
+
+	// Check for the node in the FSM
+	out, err := state.NodeByID(node.ID)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if out == nil {
+		t.Fatalf("expected node")
+	}
+	if out.ModifyIndex != resp2.Index {
+		t.Fatalf("index mis-match")
+	}
+}
+
 func TestClientEndpoint_UpdateStatus_HeartbeatOnly(t *testing.T) {
 	s1 := testServer(t, nil)
 	defer s1.Shutdown()
@@ -476,8 +557,13 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) {
 	// Inject fake evaluations
 	alloc := mock.Alloc()
 	state := s1.fsm.State()
-	err := state.UpsertAllocs(1, []*structs.Allocation{alloc})
-	if err != nil {
+	if err := state.UpsertAllocs(1, []*structs.Allocation{alloc}); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Inject a fake system job.
+	job := mock.SystemJob()
+	if err := state.UpsertJob(1, job); err != nil {
 		t.Fatalf("err: %v", err)
 	}

@@ -489,47 +575,69 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) {
 	if index == 0 {
 		t.Fatalf("bad: %d", index)
 	}
-	if len(ids) != 1 {
+	if len(ids) != 2 {
 		t.Fatalf("bad: %s", ids)
 	}

-	// Lookup the evaluation
-	eval, err := state.EvalByID(ids[0])
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-	if eval == nil {
-		t.Fatalf("expected eval")
-	}
-	if eval.CreateIndex != index {
-		t.Fatalf("index mis-match")
+	// Lookup the evaluations
+	evalByType := make(map[string]*structs.Evaluation, 2)
+	for _, id := range ids {
+		eval, err := state.EvalByID(id)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		if eval == nil {
+			t.Fatalf("expected eval")
+		}
+
+		if old, ok := evalByType[eval.Type]; ok {
+			t.Fatalf("multiple evals of the same type: %v and %v", old, eval)
+		}
+
+		evalByType[eval.Type] = eval
 	}

-	if eval.Priority != alloc.Job.Priority {
-		t.Fatalf("bad: %#v", eval)
+	if len(evalByType) != 2 {
+		t.Fatalf("Expected a service and system job; got %#v", evalByType)
 	}
-	if eval.Type != alloc.Job.Type {
-		t.Fatalf("bad: %#v", eval)
-	}
-	if eval.TriggeredBy != structs.EvalTriggerNodeUpdate {
-		t.Fatalf("bad: %#v", eval)
-	}
-	if eval.JobID != alloc.JobID {
-		t.Fatalf("bad: %#v", eval)
-	}
-	if eval.NodeID != alloc.NodeID {
-		t.Fatalf("bad: %#v", eval)
-	}
-	if eval.NodeModifyIndex != 1 {
-		t.Fatalf("bad: %#v", eval)
-	}
-	if eval.Status != structs.EvalStatusPending {
-		t.Fatalf("bad: %#v", eval)
+
+	// Ensure the evals are correct.
+	for schedType, eval := range evalByType {
+		expPriority := alloc.Job.Priority
+		expJobID := alloc.JobID
+		if schedType == "system" {
+			expPriority = job.Priority
+			expJobID = job.ID
+		}
+
+		if eval.CreateIndex != index {
+			t.Fatalf("CreateIndex mis-match on type %v: %#v", schedType, eval)
+		}
+		if eval.TriggeredBy != structs.EvalTriggerNodeUpdate {
+			t.Fatalf("TriggeredBy incorrect on type %v: %#v", schedType, eval)
+		}
+		if eval.NodeID != alloc.NodeID {
+			t.Fatalf("NodeID incorrect on type %v: %#v", schedType, eval)
+		}
+		if eval.NodeModifyIndex != 1 {
+			t.Fatalf("NodeModifyIndex incorrect on type %v: %#v", schedType, eval)
+		}
+		if eval.Status != structs.EvalStatusPending {
+			t.Fatalf("Status incorrect on type %v: %#v", schedType, eval)
+		}
+		if eval.Priority != expPriority {
+			t.Fatalf("Priority incorrect on type %v: %#v", schedType, eval)
+		}
+		if eval.JobID != expJobID {
+			t.Fatalf("JobID incorrect on type %v: %#v", schedType, eval)
+		}
 	}
 }

 func TestClientEndpoint_Evaluate(t *testing.T) {
-	s1 := testServer(t, nil)
+	s1 := testServer(t, func(c *Config) {
+		c.NumSchedulers = 0 // Prevent automatic dequeue
+	})
 	defer s1.Shutdown()
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -7,12 +7,41 @@ import (
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/raft"
 )

 // planApply is a long lived goroutine that reads plan allocations from
 // the plan queue, determines if they can be applied safely and applies
 // them via Raft.
+//
+// Naively, we could simply dequeue a plan, verify, apply and then respond.
+// However, the plan application is bounded by the Raft apply time and
+// subject to some latency. This creates a stall condition, where we are
+// not evaluating, but simply waiting for a transaction to apply.
+//
+// To avoid this, we overlap verification with apply. This means once
+// we've verified plan N we attempt to apply it. However, while waiting
+// for apply, we begin to verify plan N+1 under the assumption that plan
+// N has succeeded.
+//
+// In this sense, we track two parallel versions of the world. One is
+// the pessimistic one driven by the Raft log which is replicated. The
+// other is optimistic and assumes our transactions will succeed. In the
+// happy path, this lets us do productive work during the latency of
+// apply.
+//
+// In the unhappy path (Raft transaction fails), effectively we only
+// wasted work during a time we would have been waiting anyways. However,
+// in anticipation of this case we cannot respond to the plan until
+// the Raft log is updated. This means our schedulers will stall,
+// but there are many of those and only a single plan verifier.
+//
 func (s *Server) planApply() {
+	// waitCh is used to track an outstanding application while snap
+	// holds an optimistic state which includes that plan application.
+	var waitCh chan struct{}
+	var snap *state.StateSnapshot
+
 	for {
 		// Pull the next pending plan, exit if we are no longer leader
 		pending, err := s.planQueue.Dequeue(0)
@@ -21,26 +50,30 @@ func (s *Server) planApply() {
 		}

 		// Verify the evaluation is outstanding, and that the tokens match.
-		token, ok := s.evalBroker.Outstanding(pending.plan.EvalID)
-		if !ok {
-			s.logger.Printf("[ERR] nomad: plan received for non-outstanding evaluation %s",
-				pending.plan.EvalID)
-			pending.respond(nil, fmt.Errorf("evaluation is not outstanding"))
-			continue
-		}
-		if pending.plan.EvalToken != token {
-			s.logger.Printf("[ERR] nomad: plan received for evaluation %s with wrong token",
-				pending.plan.EvalID)
-			pending.respond(nil, fmt.Errorf("evaluation token does not match"))
+		if err := s.evalBroker.OutstandingReset(pending.plan.EvalID, pending.plan.EvalToken); err != nil {
+			s.logger.Printf("[ERR] nomad: plan rejected for evaluation %s: %v",
+				pending.plan.EvalID, err)
+			pending.respond(nil, err)
 			continue
 		}

+		// Check if out last plan has completed
+		select {
+		case <-waitCh:
+			waitCh = nil
+			snap = nil
+		default:
+		}
+
 		// Snapshot the state so that we have a consistent view of the world
-		snap, err := s.fsm.State().Snapshot()
-		if err != nil {
-			s.logger.Printf("[ERR] nomad: failed to snapshot state: %v", err)
-			pending.respond(nil, err)
-			continue
+		// if no snapshot is available
+		if waitCh == nil || snap == nil {
+			snap, err = s.fsm.State().Snapshot()
+			if err != nil {
+				s.logger.Printf("[ERR] nomad: failed to snapshot state: %v", err)
+				pending.respond(nil, err)
+				continue
+			}
 		}

 		// Evaluate the plan
@@ -51,25 +84,40 @@ func (s *Server) planApply() {
 			continue
 		}

-		// Apply the plan if there is anything to do
-		if !result.IsNoOp() {
-			allocIndex, err := s.applyPlan(result)
+		// Fast-path the response if there is nothing to do
+		if result.IsNoOp() {
+			pending.respond(result, nil)
+			continue
+		}
+
+		// Ensure any parallel apply is complete before starting the next one.
+		// This also limits how out of date our snapshot can be.
+		if waitCh != nil {
+			<-waitCh
+			snap, err = s.fsm.State().Snapshot()
 			if err != nil {
-				s.logger.Printf("[ERR] nomad: failed to apply plan: %v", err)
+				s.logger.Printf("[ERR] nomad: failed to snapshot state: %v", err)
 				pending.respond(nil, err)
 				continue
 			}
-			result.AllocIndex = allocIndex
 		}

-		// Respond to the plan
-		pending.respond(result, nil)
+		// Dispatch the Raft transaction for the plan
+		future, err := s.applyPlan(result, snap)
+		if err != nil {
+			s.logger.Printf("[ERR] nomad: failed to submit plan: %v", err)
+			pending.respond(nil, err)
+			continue
+		}
+
+		// Respond to the plan in async
+		waitCh = make(chan struct{})
+		go s.asyncPlanWait(waitCh, future, result, pending)
 	}
 }

 // applyPlan is used to apply the plan result and to return the alloc index
-func (s *Server) applyPlan(result *structs.PlanResult) (uint64, error) {
-	defer metrics.MeasureSince([]string{"nomad", "plan", "apply"}, time.Now())
+func (s *Server) applyPlan(result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) {
 	req := structs.AllocUpdateRequest{}
 	for _, updateList := range result.NodeUpdate {
 		req.Alloc = append(req.Alloc, updateList...)
@@ -79,8 +127,38 @@ func (s *Server) applyPlan(result *structs.PlanResult) (uint64, error) {
 	}
 	req.Alloc = append(req.Alloc, result.FailedAllocs...)

-	_, index, err := s.raftApply(structs.AllocUpdateRequestType, &req)
-	return index, err
+	// Dispatch the Raft transaction
+	future, err := s.raftApplyFuture(structs.AllocUpdateRequestType, &req)
+	if err != nil {
+		return nil, err
+	}
+
+	// Optimistically apply to our state view
+	if snap != nil {
+		nextIdx := s.raft.AppliedIndex() + 1
+		if err := snap.UpsertAllocs(nextIdx, req.Alloc); err != nil {
+			return future, err
+		}
+	}
+	return future, nil
+}
+
+// asyncPlanWait is used to apply and respond to a plan async
+func (s *Server) asyncPlanWait(waitCh chan struct{}, future raft.ApplyFuture,
+	result *structs.PlanResult, pending *pendingPlan) {
+	defer metrics.MeasureSince([]string{"nomad", "plan", "apply"}, time.Now())
+	defer close(waitCh)
+
+	// Wait for the plan to apply
+	if err := future.Error(); err != nil {
+		s.logger.Printf("[ERR] nomad: failed to apply plan: %v", err)
+		pending.respond(nil, err)
+		return
+	}
+
+	// Respond to the plan
+	result.AllocIndex = future.Index()
+	pending.respond(result, nil)
 }

 // evaluatePlan is used to determine what portions of a plan
--- a/nomad/plan_apply_test.go
+++ b/nomad/plan_apply_test.go
@@ -7,8 +7,17 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/testutil"
+	"github.com/hashicorp/raft"
 )

+// planWaitFuture is used to wait for the Raft future to complete
+func planWaitFuture(future raft.ApplyFuture) (uint64, error) {
+	if err := future.Error(); err != nil {
+		return 0, err
+	}
+	return future.Index(), nil
+}
+
 func testRegisterNode(t *testing.T, s *Server, n *structs.Node) {
 	// Create the register request
 	req := &structs.NodeRegisterRequest{
@@ -45,8 +54,25 @@ func TestPlanApply_applyPlan(t *testing.T) {
 		FailedAllocs: []*structs.Allocation{allocFail},
 	}

+	// Snapshot the state
+	snap, err := s1.State().Snapshot()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
 	// Apply the plan
-	index, err := s1.applyPlan(plan)
+	future, err := s1.applyPlan(plan, snap)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Verify our optimistic snapshot is updated
+	if out, err := snap.AllocByID(alloc.ID); err != nil || out == nil {
+		t.Fatalf("bad: %v %v", out, err)
+	}
+
+	// Check plan does apply cleanly
+	index, err := planWaitFuture(future)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -86,8 +112,25 @@ func TestPlanApply_applyPlan(t *testing.T) {
 		},
 	}

+	// Snapshot the state
+	snap, err = s1.State().Snapshot()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
 	// Apply the plan
-	index, err = s1.applyPlan(plan)
+	future, err = s1.applyPlan(plan, snap)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check that our optimistic view is updated
+	if out, _ := snap.AllocByID(allocEvict.ID); out.DesiredStatus != structs.AllocDesiredStatusEvict {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	// Verify plan applies cleanly
+	index, err = planWaitFuture(future)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
--- a/nomad/rpc.go
+++ b/nomad/rpc.go
@@ -13,6 +13,7 @@ import (
 	"github.com/hashicorp/net-rpc-msgpackrpc"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/raft"
 	"github.com/hashicorp/yamux"
 )

@@ -225,12 +226,11 @@ func (s *Server) forwardRegion(region, method string, args interface{}, reply in
 	return s.connPool.RPC(region, server.Addr, server.Version, method, args, reply)
 }

-// raftApply is used to encode a message, run it through raft, and return
-// the FSM response along with any errors
-func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) {
+// raftApplyFuture is used to encode a message, run it through raft, and return the Raft future.
+func (s *Server) raftApplyFuture(t structs.MessageType, msg interface{}) (raft.ApplyFuture, error) {
 	buf, err := structs.Encode(t, msg)
 	if err != nil {
-		return nil, 0, fmt.Errorf("Failed to encode request: %v", err)
+		return nil, fmt.Errorf("Failed to encode request: %v", err)
 	}

 	// Warn if the command is very large
@@ -239,10 +239,19 @@ func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{},
 	}

 	future := s.raft.Apply(buf, enqueueLimit)
+	return future, nil
+}
+
+// raftApply is used to encode a message, run it through raft, and return
+// the FSM response along with any errors
+func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) {
+	future, err := s.raftApplyFuture(t, msg)
+	if err != nil {
+		return nil, 0, err
+	}
 	if err := future.Error(); err != nil {
 		return nil, 0, err
 	}
-
 	return future.Response(), future.Index(), nil
 }

--- a/nomad/rpc_test.go
+++ b/nomad/rpc_test.go
@@ -37,6 +37,8 @@ func TestRPC_forwardRegion(t *testing.T) {
 	})
 	defer s2.Shutdown()
 	testJoin(t, s1, s2)
+	testutil.WaitForLeader(t, s1.RPC)
+	testutil.WaitForLeader(t, s2.RPC)

 	var out struct{}
 	err := s1.forwardRegion("region2", "Status.Ping", struct{}{}, &out)
--- a/nomad/server_test.go
+++ b/nomad/server_test.go
@@ -44,9 +44,9 @@ func testServer(t *testing.T, cb func(*Config)) *Server {
 	config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond

 	// Tighten the Raft timing
-	config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond
-	config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond
-	config.RaftConfig.ElectionTimeout = 40 * time.Millisecond
+	config.RaftConfig.LeaderLeaseTimeout = 50 * time.Millisecond
+	config.RaftConfig.HeartbeatTimeout = 50 * time.Millisecond
+	config.RaftConfig.ElectionTimeout = 50 * time.Millisecond
 	config.RaftTimeout = 500 * time.Millisecond

 	// Invoke the callback if any
--- a/nomad/state/schema.go
+++ b/nomad/state/schema.go
@@ -91,6 +91,15 @@ func jobTableSchema() *memdb.TableSchema {
 					Lowercase: true,
 				},
 			},
+			"type": &memdb.IndexSchema{
+				Name:         "type",
+				AllowMissing: false,
+				Unique:       false,
+				Indexer: &memdb.StringFieldIndex{
+					Field:     "Type",
+					Lowercase: false,
+				},
+			},
 		},
 	}
 }
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -399,6 +399,19 @@ func (s *StateStore) Jobs() (memdb.ResultIterator, error) {
 	return iter, nil
 }

+// JobsByScheduler returns an iterator over all the jobs with the specific
+// scheduler type.
+func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) {
+	txn := s.db.Txn(false)
+
+	// Return an iterator for jobs with the specific type.
+	iter, err := txn.Get("jobs", "type", schedulerType)
+	if err != nil {
+		return nil, err
+	}
+	return iter, nil
+}
+
 // UpsertEvaluation is used to upsert an evaluation
 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error {
 	txn := s.db.Txn(true)
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -348,6 +348,73 @@ func TestStateStore_Jobs(t *testing.T) {
 	}
 }

+func TestStateStore_JobsByScheduler(t *testing.T) {
+	state := testStateStore(t)
+	var serviceJobs []*structs.Job
+	var sysJobs []*structs.Job
+
+	for i := 0; i < 10; i++ {
+		job := mock.Job()
+		serviceJobs = append(serviceJobs, job)
+
+		err := state.UpsertJob(1000+uint64(i), job)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}
+
+	for i := 0; i < 10; i++ {
+		job := mock.SystemJob()
+		sysJobs = append(sysJobs, job)
+
+		err := state.UpsertJob(2000+uint64(i), job)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}
+
+	iter, err := state.JobsByScheduler("service")
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	var outService []*structs.Job
+	for {
+		raw := iter.Next()
+		if raw == nil {
+			break
+		}
+		outService = append(outService, raw.(*structs.Job))
+	}
+
+	iter, err = state.JobsByScheduler("system")
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	var outSystem []*structs.Job
+	for {
+		raw := iter.Next()
+		if raw == nil {
+			break
+		}
+		outSystem = append(outSystem, raw.(*structs.Job))
+	}
+
+	sort.Sort(JobIDSort(serviceJobs))
+	sort.Sort(JobIDSort(sysJobs))
+	sort.Sort(JobIDSort(outService))
+	sort.Sort(JobIDSort(outSystem))
+
+	if !reflect.DeepEqual(serviceJobs, outService) {
+		t.Fatalf("bad: %#v %#v", serviceJobs, outService)
+	}
+
+	if !reflect.DeepEqual(sysJobs, outSystem) {
+		t.Fatalf("bad: %#v %#v", sysJobs, outSystem)
+	}
+}
+
 func TestStateStore_RestoreJob(t *testing.T) {
 	state := testStateStore(t)

--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4,11 +4,13 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
+	"regexp"
 	"strings"
 	"time"

 	"github.com/hashicorp/go-msgpack/codec"
 	"github.com/hashicorp/go-multierror"
+	"github.com/hashicorp/go-version"
 )

 var (
@@ -697,6 +699,7 @@ const (
 	JobTypeCore    = "_core"
 	JobTypeService = "service"
 	JobTypeBatch   = "batch"
+	JobTypeSystem  = "system"
 )

 const (
@@ -809,6 +812,12 @@ func (j *Job) Validate() error {
 	if len(j.TaskGroups) == 0 {
 		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
 	}
+	for idx, constr := range j.Constraints {
+		if err := constr.Validate(); err != nil {
+			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
+			mErr.Errors = append(mErr.Errors, outer)
+		}
+	}

 	// Check for duplicate task groups
 	taskGroups := make(map[string]int)
@@ -820,6 +829,12 @@ func (j *Job) Validate() error {
 		} else {
 			taskGroups[tg.Name] = idx
 		}
+
+		if j.Type == "system" && tg.Count != 1 {
+			mErr.Errors = append(mErr.Errors,
+				fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler",
+					idx+1, tg.Count))
+		}
 	}

 	// Validate the task group
@@ -918,6 +933,12 @@ func (tg *TaskGroup) Validate() error {
 	if len(tg.Tasks) == 0 {
 		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
 	}
+	for idx, constr := range tg.Constraints {
+		if err := constr.Validate(); err != nil {
+			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
+			mErr.Errors = append(mErr.Errors, outer)
+		}
+	}

 	// Check for duplicate tasks
 	tasks := make(map[string]int)
@@ -997,9 +1018,21 @@ func (t *Task) Validate() error {
 	if t.Resources == nil {
 		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
 	}
+	for idx, constr := range t.Constraints {
+		if err := constr.Validate(); err != nil {
+			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
+			mErr.Errors = append(mErr.Errors, outer)
+		}
+	}
 	return mErr.ErrorOrNil()
 }

+const (
+	ConstraintDistinctHosts = "distinct_hosts"
+	ConstraintRegex         = "regexp"
+	ConstraintVersion       = "version"
+)
+
 // Constraints are used to restrict placement options in the case of
 // a hard constraint, and used to prefer a placement in the case of
 // a soft constraint.
@@ -1015,6 +1048,26 @@ func (c *Constraint) String() string {
 	return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
 }

+func (c *Constraint) Validate() error {
+	var mErr multierror.Error
+	if c.Operand == "" {
+		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
+	}
+
+	// Perform additional validation based on operand
+	switch c.Operand {
+	case ConstraintRegex:
+		if _, err := regexp.Compile(c.RTarget); err != nil {
+			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
+		}
+	case ConstraintVersion:
+		if _, err := version.NewConstraint(c.RTarget); err != nil {
+			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
+		}
+	}
+	return mErr.ErrorOrNil()
+}
+
 const (
 	AllocDesiredStatusRun    = "run"    // Allocation should run
 	AllocDesiredStatusStop   = "stop"   // Allocation should stop
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -125,6 +125,43 @@ func TestTask_Validate(t *testing.T) {
 	}
 }

+func TestConstraint_Validate(t *testing.T) {
+	c := &Constraint{}
+	err := c.Validate()
+	mErr := err.(*multierror.Error)
+	if !strings.Contains(mErr.Errors[0].Error(), "Missing constraint operand") {
+		t.Fatalf("err: %s", err)
+	}
+
+	c = &Constraint{
+		LTarget: "$attr.kernel.name",
+		RTarget: "linux",
+		Operand: "=",
+	}
+	err = c.Validate()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Perform additional regexp validation
+	c.Operand = ConstraintRegex
+	c.RTarget = "(foo"
+	err = c.Validate()
+	mErr = err.(*multierror.Error)
+	if !strings.Contains(mErr.Errors[0].Error(), "missing closing") {
+		t.Fatalf("err: %s", err)
+	}
+
+	// Perform version validation
+	c.Operand = ConstraintVersion
+	c.RTarget = "~> foo"
+	err = c.Validate()
+	mErr = err.(*multierror.Error)
+	if !strings.Contains(mErr.Errors[0].Error(), "Malformed constraint") {
+		t.Fatalf("err: %s", err)
+	}
+}
+
 func TestResource_NetIndex(t *testing.T) {
 	r := &Resources{
 		Networks: []*NetworkResource{
--- a/nomad/worker_test.go
+++ b/nomad/worker_test.go
@@ -52,7 +52,12 @@ func TestWorker_dequeueEvaluation(t *testing.T) {

 	// Create the evaluation
 	eval1 := mock.Eval()
-	s1.evalBroker.Enqueue(eval1)
+	testutil.WaitForResult(func() (bool, error) {
+		err := s1.evalBroker.Enqueue(eval1)
+		return err == nil, err
+	}, func(err error) {
+		t.Fatalf("err: %v", err)
+	})

 	// Create a worker
 	w := &Worker{srv: s1, logger: s1.logger}
@@ -82,7 +87,12 @@ func TestWorker_dequeueEvaluation_paused(t *testing.T) {

 	// Create the evaluation
 	eval1 := mock.Eval()
-	s1.evalBroker.Enqueue(eval1)
+	testutil.WaitForResult(func() (bool, error) {
+		err := s1.evalBroker.Enqueue(eval1)
+		return err == nil, err
+	}, func(err error) {
+		t.Fatalf("err: %v", err)
+	})

 	// Create a worker
 	w := &Worker{srv: s1, logger: s1.logger}
@@ -153,7 +163,12 @@ func TestWorker_sendAck(t *testing.T) {

 	// Create the evaluation
 	eval1 := mock.Eval()
-	s1.evalBroker.Enqueue(eval1)
+	testutil.WaitForResult(func() (bool, error) {
+		err := s1.evalBroker.Enqueue(eval1)
+		return err == nil, err
+	}, func(err error) {
+		t.Fatalf("err: %v", err)
+	})

 	// Create a worker
 	w := &Worker{srv: s1, logger: s1.logger}
--- a/scheduler/context.go
+++ b/scheduler/context.go
@@ -2,7 +2,9 @@ package scheduler

 import (
 	"log"
+	"regexp"

+	"github.com/hashicorp/go-version"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

@@ -27,10 +29,36 @@ type Context interface {
 	// which is the existing allocations, removing evictions, and
 	// adding any planned placements.
 	ProposedAllocs(nodeID string) ([]*structs.Allocation, error)
+
+	// RegexpCache is a cache of regular expressions
+	RegexpCache() map[string]*regexp.Regexp
+
+	// ConstraintCache is a cache of version constraints
+	ConstraintCache() map[string]version.Constraints
+}
+
+// EvalCache is used to cache certain things during an evaluation
+type EvalCache struct {
+	reCache         map[string]*regexp.Regexp
+	constraintCache map[string]version.Constraints
+}
+
+func (e *EvalCache) RegexpCache() map[string]*regexp.Regexp {
+	if e.reCache == nil {
+		e.reCache = make(map[string]*regexp.Regexp)
+	}
+	return e.reCache
+}
+func (e *EvalCache) ConstraintCache() map[string]version.Constraints {
+	if e.constraintCache == nil {
+		e.constraintCache = make(map[string]version.Constraints)
+	}
+	return e.constraintCache
 }

 // EvalContext is a Context used during an Evaluation
 type EvalContext struct {
+	EvalCache
 	state   State
 	plan    *structs.Plan
 	logger  *log.Logger
--- a/scheduler/context_test.go
+++ b/scheduler/context_test.go
@@ -9,7 +9,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )

-func testContext(t *testing.T) (*state.StateStore, *EvalContext) {
+func testContext(t testing.TB) (*state.StateStore, *EvalContext) {
 	state, err := state.NewStateStore(os.Stderr)
 	if err != nil {
 		t.Fatalf("err: %v", err)
--- a/scheduler/feasible.go
+++ b/scheduler/feasible.go
@@ -3,8 +3,11 @@ package scheduler
 import (
 	"fmt"
 	"reflect"
+	"regexp"
+	"strconv"
 	"strings"

+	"github.com/hashicorp/go-version"
 	"github.com/hashicorp/nomad/nomad/structs"
 )

@@ -127,14 +130,126 @@ func (iter *DriverIterator) Reset() {
 func (iter *DriverIterator) hasDrivers(option *structs.Node) bool {
 	for driver := range iter.drivers {
 		driverStr := fmt.Sprintf("driver.%s", driver)
-		_, ok := option.Attributes[driverStr]
+		value, ok := option.Attributes[driverStr]
 		if !ok {
 			return false
 		}
+
+		enabled, err := strconv.ParseBool(value)
+		if err != nil {
+			iter.ctx.Logger().
+				Printf("[WARN] scheduler.DriverIterator: node %v has invalid driver setting %v: %v",
+				option.ID, driverStr, value)
+			return false
+		}
+
+		if !enabled {
+			return false
+		}
 	}
 	return true
 }

+// ProposedAllocConstraintIterator is a FeasibleIterator which returns nodes that
+// match constraints that are not static such as Node attributes but are
+// effected by proposed alloc placements. Examples are distinct_hosts and
+// tenancy constraints. This is used to filter on job and task group
+// constraints.
+type ProposedAllocConstraintIterator struct {
+	ctx    Context
+	source FeasibleIterator
+	tg     *structs.TaskGroup
+	job    *structs.Job
+
+	// Store whether the Job or TaskGroup has a distinct_hosts constraints so
+	// they don't have to be calculated every time Next() is called.
+	tgDistinctHosts  bool
+	jobDistinctHosts bool
+}
+
+// NewProposedAllocConstraintIterator creates a ProposedAllocConstraintIterator
+// from a source.
+func NewProposedAllocConstraintIterator(ctx Context, source FeasibleIterator) *ProposedAllocConstraintIterator {
+	iter := &ProposedAllocConstraintIterator{
+		ctx:    ctx,
+		source: source,
+	}
+	return iter
+}
+
+func (iter *ProposedAllocConstraintIterator) SetTaskGroup(tg *structs.TaskGroup) {
+	iter.tg = tg
+	iter.tgDistinctHosts = iter.hasDistinctHostsConstraint(tg.Constraints)
+}
+
+func (iter *ProposedAllocConstraintIterator) SetJob(job *structs.Job) {
+	iter.job = job
+	iter.jobDistinctHosts = iter.hasDistinctHostsConstraint(job.Constraints)
+}
+
+func (iter *ProposedAllocConstraintIterator) hasDistinctHostsConstraint(constraints []*structs.Constraint) bool {
+	for _, con := range constraints {
+		if con.Operand == structs.ConstraintDistinctHosts {
+			return true
+		}
+	}
+	return false
+}
+
+func (iter *ProposedAllocConstraintIterator) Next() *structs.Node {
+	for {
+		// Get the next option from the source
+		option := iter.source.Next()
+
+		// Hot-path if the option is nil or there are no distinct_hosts constraints.
+		if option == nil || !(iter.jobDistinctHosts || iter.tgDistinctHosts) {
+			return option
+		}
+
+		if !iter.satisfiesDistinctHosts(option) {
+			iter.ctx.Metrics().FilterNode(option, structs.ConstraintDistinctHosts)
+			continue
+		}
+
+		return option
+	}
+}
+
+// satisfiesDistinctHosts checks if the node satisfies a distinct_hosts
+// constraint either specified at the job level or the TaskGroup level.
+func (iter *ProposedAllocConstraintIterator) satisfiesDistinctHosts(option *structs.Node) bool {
+	// Check if there is no constraint set.
+	if !(iter.jobDistinctHosts || iter.tgDistinctHosts) {
+		return true
+	}
+
+	// Get the proposed allocations
+	proposed, err := iter.ctx.ProposedAllocs(option.ID)
+	if err != nil {
+		iter.ctx.Logger().Printf(
+			"[ERR] scheduler.dynamic-constraint: failed to get proposed allocations: %v", err)
+		return false
+	}
+
+	// Skip the node if the task group has already been allocated on it.
+	for _, alloc := range proposed {
+		// If the job has a distinct_hosts constraint we only need an alloc
+		// collision on the JobID but if the constraint is on the TaskGroup then
+		// we need both a job and TaskGroup collision.
+		jobCollision := alloc.JobID == iter.job.ID
+		taskCollision := alloc.TaskGroup == iter.tg.Name
+		if iter.jobDistinctHosts && jobCollision || jobCollision && taskCollision {
+			return false
+		}
+	}
+
+	return true
+}
+
+func (iter *ProposedAllocConstraintIterator) Reset() {
+	iter.source.Reset()
+}
+
 // ConstraintIterator is a FeasibleIterator which returns nodes
 // that match a given set of constraints. This is used to filter
 // on job, task group, and task constraints.
@@ -204,7 +319,7 @@ func (iter *ConstraintIterator) meetsConstraint(constraint *structs.Constraint,
 	}

 	// Check if satisfied
-	return checkConstraint(constraint.Operand, lVal, rVal)
+	return checkConstraint(iter.ctx, constraint.Operand, lVal, rVal)
 }

 // resolveConstraintTarget is used to resolve the LTarget and RTarget of a Constraint
@@ -241,19 +356,129 @@ func resolveConstraintTarget(target string, node *structs.Node) (interface{}, bo
 }

 // checkConstraint checks if a constraint is satisfied
-func checkConstraint(operand string, lVal, rVal interface{}) bool {
+func checkConstraint(ctx Context, operand string, lVal, rVal interface{}) bool {
+	// Check for constraints not handled by this iterator.
+	switch operand {
+	case structs.ConstraintDistinctHosts:
+		return true
+	default:
+		break
+	}
+
 	switch operand {
 	case "=", "==", "is":
 		return reflect.DeepEqual(lVal, rVal)
 	case "!=", "not":
 		return !reflect.DeepEqual(lVal, rVal)
 	case "<", "<=", ">", ">=":
-		// TODO: Implement
-		return false
-	case "contains":
-		// TODO: Implement
-		return false
+		return checkLexicalOrder(operand, lVal, rVal)
+	case structs.ConstraintVersion:
+		return checkVersionConstraint(ctx, lVal, rVal)
+	case structs.ConstraintRegex:
+		return checkRegexpConstraint(ctx, lVal, rVal)
 	default:
 		return false
 	}
 }
+
+// checkLexicalOrder is used to check for lexical ordering
+func checkLexicalOrder(op string, lVal, rVal interface{}) bool {
+	// Ensure the values are strings
+	lStr, ok := lVal.(string)
+	if !ok {
+		return false
+	}
+	rStr, ok := rVal.(string)
+	if !ok {
+		return false
+	}
+
+	switch op {
+	case "<":
+		return lStr < rStr
+	case "<=":
+		return lStr <= rStr
+	case ">":
+		return lStr > rStr
+	case ">=":
+		return lStr >= rStr
+	default:
+		return false
+	}
+}
+
+// checkVersionConstraint is used to compare a version on the
+// left hand side with a set of constraints on the right hand side
+func checkVersionConstraint(ctx Context, lVal, rVal interface{}) bool {
+	// Parse the version
+	var versionStr string
+	switch v := lVal.(type) {
+	case string:
+		versionStr = v
+	case int:
+		versionStr = fmt.Sprintf("%d", v)
+	default:
+		return false
+	}
+
+	// Parse the verison
+	vers, err := version.NewVersion(versionStr)
+	if err != nil {
+		return false
+	}
+
+	// Constraint must be a string
+	constraintStr, ok := rVal.(string)
+	if !ok {
+		return false
+	}
+
+	// Check the cache for a match
+	cache := ctx.ConstraintCache()
+	constraints := cache[constraintStr]
+
+	// Parse the constraints
+	if constraints == nil {
+		constraints, err = version.NewConstraint(constraintStr)
+		if err != nil {
+			return false
+		}
+		cache[constraintStr] = constraints
+	}
+
+	// Check the constraints against the version
+	return constraints.Check(vers)
+}
+
+// checkRegexpConstraint is used to compare a value on the
+// left hand side with a regexp on the right hand side
+func checkRegexpConstraint(ctx Context, lVal, rVal interface{}) bool {
+	// Ensure left-hand is string
+	lStr, ok := lVal.(string)
+	if !ok {
+		return false
+	}
+
+	// Regexp must be a string
+	regexpStr, ok := rVal.(string)
+	if !ok {
+		return false
+	}
+
+	// Check the cache
+	cache := ctx.RegexpCache()
+	re := cache[regexpStr]
+
+	// Parse the regexp
+	if re == nil {
+		var err error
+		re, err = regexp.Compile(regexpStr)
+		if err != nil {
+			return false
+		}
+		cache[regexpStr] = re
+	}
+
+	// Look for a match
+	return re.MatchString(lStr)
+}
--- a/scheduler/feasible_test.go
+++ b/scheduler/feasible_test.go
@@ -82,11 +82,14 @@ func TestDriverIterator(t *testing.T) {
 		mock.Node(),
 		mock.Node(),
 		mock.Node(),
+		mock.Node(),
 	}
 	static := NewStaticIterator(ctx, nodes)

-	nodes[0].Attributes["driver.foo"] = "2"
-	nodes[2].Attributes["driver.foo"] = "2"
+	nodes[0].Attributes["driver.foo"] = "1"
+	nodes[1].Attributes["driver.foo"] = "0"
+	nodes[2].Attributes["driver.foo"] = "true"
+	nodes[3].Attributes["driver.foo"] = "False"

 	drivers := map[string]struct{}{
 		"exec": struct{}{},
@@ -244,15 +247,315 @@ func TestCheckConstraint(t *testing.T) {
 			lVal: "foo", rVal: "bar",
 			result: true,
 		},
+		{
+			op:   structs.ConstraintVersion,
+			lVal: "1.2.3", rVal: "~> 1.0",
+			result: true,
+		},
+		{
+			op:   structs.ConstraintRegex,
+			lVal: "foobarbaz", rVal: "[\\w]+",
+			result: true,
+		},
+		{
+			op:   "<",
+			lVal: "foo", rVal: "bar",
+			result: false,
+		},
 	}

 	for _, tc := range cases {
-		if res := checkConstraint(tc.op, tc.lVal, tc.rVal); res != tc.result {
+		_, ctx := testContext(t)
+		if res := checkConstraint(ctx, tc.op, tc.lVal, tc.rVal); res != tc.result {
 			t.Fatalf("TC: %#v, Result: %v", tc, res)
 		}
 	}
 }

+func TestCheckLexicalOrder(t *testing.T) {
+	type tcase struct {
+		op         string
+		lVal, rVal interface{}
+		result     bool
+	}
+	cases := []tcase{
+		{
+			op:   "<",
+			lVal: "bar", rVal: "foo",
+			result: true,
+		},
+		{
+			op:   "<=",
+			lVal: "foo", rVal: "foo",
+			result: true,
+		},
+		{
+			op:   ">",
+			lVal: "bar", rVal: "foo",
+			result: false,
+		},
+		{
+			op:   ">=",
+			lVal: "bar", rVal: "bar",
+			result: true,
+		},
+		{
+			op:   ">",
+			lVal: 1, rVal: "foo",
+			result: false,
+		},
+	}
+	for _, tc := range cases {
+		if res := checkLexicalOrder(tc.op, tc.lVal, tc.rVal); res != tc.result {
+			t.Fatalf("TC: %#v, Result: %v", tc, res)
+		}
+	}
+}
+
+func TestCheckVersionConstraint(t *testing.T) {
+	type tcase struct {
+		lVal, rVal interface{}
+		result     bool
+	}
+	cases := []tcase{
+		{
+			lVal: "1.2.3", rVal: "~> 1.0",
+			result: true,
+		},
+		{
+			lVal: "1.2.3", rVal: ">= 1.0, < 1.4",
+			result: true,
+		},
+		{
+			lVal: "2.0.1", rVal: "~> 1.0",
+			result: false,
+		},
+		{
+			lVal: "1.4", rVal: ">= 1.0, < 1.4",
+			result: false,
+		},
+		{
+			lVal: 1, rVal: "~> 1.0",
+			result: true,
+		},
+	}
+	for _, tc := range cases {
+		_, ctx := testContext(t)
+		if res := checkVersionConstraint(ctx, tc.lVal, tc.rVal); res != tc.result {
+			t.Fatalf("TC: %#v, Result: %v", tc, res)
+		}
+	}
+}
+
+func TestCheckRegexpConstraint(t *testing.T) {
+	type tcase struct {
+		lVal, rVal interface{}
+		result     bool
+	}
+	cases := []tcase{
+		{
+			lVal: "foobar", rVal: "bar",
+			result: true,
+		},
+		{
+			lVal: "foobar", rVal: "^foo",
+			result: true,
+		},
+		{
+			lVal: "foobar", rVal: "^bar",
+			result: false,
+		},
+		{
+			lVal: "zipzap", rVal: "foo",
+			result: false,
+		},
+		{
+			lVal: 1, rVal: "foo",
+			result: false,
+		},
+	}
+	for _, tc := range cases {
+		_, ctx := testContext(t)
+		if res := checkRegexpConstraint(ctx, tc.lVal, tc.rVal); res != tc.result {
+			t.Fatalf("TC: %#v, Result: %v", tc, res)
+		}
+	}
+}
+
+func TestProposedAllocConstraint_JobDistinctHosts(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+	}
+	static := NewStaticIterator(ctx, nodes)
+
+	// Create a job with a distinct_hosts constraint and two task groups.
+	tg1 := &structs.TaskGroup{Name: "bar"}
+	tg2 := &structs.TaskGroup{Name: "baz"}
+
+	job := &structs.Job{
+		ID:          "foo",
+		Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}},
+		TaskGroups:  []*structs.TaskGroup{tg1, tg2},
+	}
+
+	propsed := NewProposedAllocConstraintIterator(ctx, static)
+	propsed.SetTaskGroup(tg1)
+	propsed.SetJob(job)
+
+	out := collectFeasible(propsed)
+	if len(out) != 4 {
+		t.Fatalf("Bad: %#v", out)
+	}
+
+	selected := make(map[string]struct{}, 4)
+	for _, option := range out {
+		if _, ok := selected[option.ID]; ok {
+			t.Fatalf("selected node %v for more than one alloc", option)
+		}
+		selected[option.ID] = struct{}{}
+	}
+}
+
+func TestProposedAllocConstraint_JobDistinctHosts_Infeasible(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+	}
+	static := NewStaticIterator(ctx, nodes)
+
+	// Create a job with a distinct_hosts constraint and two task groups.
+	tg1 := &structs.TaskGroup{Name: "bar"}
+	tg2 := &structs.TaskGroup{Name: "baz"}
+
+	job := &structs.Job{
+		ID:          "foo",
+		Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}},
+		TaskGroups:  []*structs.TaskGroup{tg1, tg2},
+	}
+
+	// Add allocs placing tg1 on node1 and tg2 on node2. This should make the
+	// job unsatisfiable.
+	plan := ctx.Plan()
+	plan.NodeAllocation[nodes[0].ID] = []*structs.Allocation{
+		&structs.Allocation{
+			TaskGroup: tg1.Name,
+			JobID:     job.ID,
+		},
+
+		// Should be ignored as it is a different job.
+		&structs.Allocation{
+			TaskGroup: tg2.Name,
+			JobID:     "ignore 2",
+		},
+	}
+	plan.NodeAllocation[nodes[1].ID] = []*structs.Allocation{
+		&structs.Allocation{
+			TaskGroup: tg2.Name,
+			JobID:     job.ID,
+		},
+
+		// Should be ignored as it is a different job.
+		&structs.Allocation{
+			TaskGroup: tg1.Name,
+			JobID:     "ignore 2",
+		},
+	}
+
+	propsed := NewProposedAllocConstraintIterator(ctx, static)
+	propsed.SetTaskGroup(tg1)
+	propsed.SetJob(job)
+
+	out := collectFeasible(propsed)
+	if len(out) != 0 {
+		t.Fatalf("Bad: %#v", out)
+	}
+}
+
+func TestProposedAllocConstraint_JobDistinctHosts_InfeasibleCount(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+	}
+	static := NewStaticIterator(ctx, nodes)
+
+	// Create a job with a distinct_hosts constraint and three task groups.
+	tg1 := &structs.TaskGroup{Name: "bar"}
+	tg2 := &structs.TaskGroup{Name: "baz"}
+	tg3 := &structs.TaskGroup{Name: "bam"}
+
+	job := &structs.Job{
+		ID:          "foo",
+		Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}},
+		TaskGroups:  []*structs.TaskGroup{tg1, tg2, tg3},
+	}
+
+	propsed := NewProposedAllocConstraintIterator(ctx, static)
+	propsed.SetTaskGroup(tg1)
+	propsed.SetJob(job)
+
+	// It should not be able to place 3 tasks with only two nodes.
+	out := collectFeasible(propsed)
+	if len(out) != 2 {
+		t.Fatalf("Bad: %#v", out)
+	}
+}
+
+func TestProposedAllocConstraint_TaskGroupDistinctHosts(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+	}
+	static := NewStaticIterator(ctx, nodes)
+
+	// Create a task group with a distinct_hosts constraint.
+	taskGroup := &structs.TaskGroup{
+		Name: "example",
+		Constraints: []*structs.Constraint{
+			{Operand: structs.ConstraintDistinctHosts},
+		},
+	}
+
+	// Add a planned alloc to node1.
+	plan := ctx.Plan()
+	plan.NodeAllocation[nodes[0].ID] = []*structs.Allocation{
+		&structs.Allocation{
+			TaskGroup: taskGroup.Name,
+			JobID:     "foo",
+		},
+	}
+
+	// Add a planned alloc to node2 with the same task group name but a
+	// different job.
+	plan.NodeAllocation[nodes[1].ID] = []*structs.Allocation{
+		&structs.Allocation{
+			TaskGroup: taskGroup.Name,
+			JobID:     "bar",
+		},
+	}
+
+	propsed := NewProposedAllocConstraintIterator(ctx, static)
+	propsed.SetTaskGroup(taskGroup)
+	propsed.SetJob(&structs.Job{ID: "foo"})
+
+	out := collectFeasible(propsed)
+	if len(out) != 1 {
+		t.Fatalf("Bad: %#v", out)
+	}
+
+	// Expect it to skip the first node as there is a previous alloc on it for
+	// the same task group.
+	if out[0] != nodes[1] {
+		t.Fatalf("Bad: %v", out)
+	}
+}
+
 func collectFeasible(iter FeasibleIterator) (out []*structs.Node) {
 	for {
 		next := iter.Next()
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -82,18 +82,6 @@ func NewBatchScheduler(logger *log.Logger, state State, planner Planner) Schedul
 	return s
 }

-// setStatus is used to update the status of the evaluation
-func (s *GenericScheduler) setStatus(status, desc string) error {
-	s.logger.Printf("[DEBUG] sched: %#v: setting status to %s", s.eval, status)
-	newEval := s.eval.Copy()
-	newEval.Status = status
-	newEval.StatusDescription = desc
-	if s.nextEval != nil {
-		newEval.NextEval = s.nextEval.ID
-	}
-	return s.planner.UpdateEval(newEval)
-}
-
 // Process is used to handle a single evaluation
 func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	// Store the evaluation
@@ -106,7 +94,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return s.setStatus(structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
 	}

 	// Retry up to the maxScheduleAttempts
@@ -116,13 +104,13 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	}
 	if err := retryMax(limit, s.process); err != nil {
 		if statusErr, ok := err.(*SetStatusError); ok {
-			return s.setStatus(statusErr.EvalStatus, err.Error())
+			return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
 		}
 		return err
 	}

 	// Update the status to complete
-	return s.setStatus(structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
 }

 // process is wrapped in retryMax to iteratively run the handler until we have no
@@ -143,7 +131,7 @@ func (s *GenericScheduler) process() (bool, error) {
 	s.ctx = NewEvalContext(s.state, s.plan, s.logger)

 	// Construct the placement stack
-	s.stack = NewGenericStack(s.batch, s.ctx, nil)
+	s.stack = NewGenericStack(s.batch, s.ctx)
 	if s.job != nil {
 		s.stack.SetJob(s.job)
 	}
@@ -231,7 +219,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
 	}

 	// Attempt to do the upgrades in place
-	diff.update = s.inplaceUpdate(diff.update)
+	diff.update = inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update)

 	// Check if a rolling upgrade strategy is being used
 	limit := len(diff.update) + len(diff.migrate)
@@ -240,10 +228,10 @@ func (s *GenericScheduler) computeJobAllocs() error {
 	}

 	// Treat migrations as an eviction and a new placement.
-	s.evictAndPlace(diff, diff.migrate, allocMigrating, &limit)
+	s.limitReached = evictAndPlace(s.ctx, diff, diff.migrate, allocMigrating, &limit)

 	// Treat non in-place updates as an eviction and new placement.
-	s.evictAndPlace(diff, diff.update, allocUpdating, &limit)
+	s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit)

 	// Nothing remaining to do if placement is not required
 	if len(diff.place) == 0 {
@@ -254,101 +242,6 @@ func (s *GenericScheduler) computeJobAllocs() error {
 	return s.computePlacements(diff.place)
 }

-// evictAndPlace is used to mark allocations for evicts and add them to the placement queue
-func (s *GenericScheduler) evictAndPlace(diff *diffResult, allocs []allocTuple, desc string, limit *int) {
-	n := len(allocs)
-	for i := 0; i < n && i < *limit; i++ {
-		a := allocs[i]
-		s.plan.AppendUpdate(a.Alloc, structs.AllocDesiredStatusStop, desc)
-		diff.place = append(diff.place, a)
-	}
-	if n <= *limit {
-		*limit -= n
-	} else {
-		*limit = 0
-		s.limitReached = true
-	}
-}
-
-// inplaceUpdate attempts to update allocations in-place where possible.
-func (s *GenericScheduler) inplaceUpdate(updates []allocTuple) []allocTuple {
-	n := len(updates)
-	inplace := 0
-	for i := 0; i < n; i++ {
-		// Get the udpate
-		update := updates[i]
-
-		// Check if the task drivers or config has changed, requires
-		// a rolling upgrade since that cannot be done in-place.
-		existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name)
-		if tasksUpdated(update.TaskGroup, existing) {
-			continue
-		}
-
-		// Get the existing node
-		node, err := s.state.NodeByID(update.Alloc.NodeID)
-		if err != nil {
-			s.logger.Printf("[ERR] sched: %#v failed to get node '%s': %v",
-				s.eval, update.Alloc.NodeID, err)
-			continue
-		}
-		if node == nil {
-			continue
-		}
-
-		// Set the existing node as the base set
-		s.stack.SetNodes([]*structs.Node{node})
-
-		// Stage an eviction of the current allocation
-		s.plan.AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop,
-			allocInPlace)
-
-		// Attempt to match the task group
-		option, size := s.stack.Select(update.TaskGroup)
-
-		// Pop the allocation
-		s.plan.PopUpdate(update.Alloc)
-
-		// Skip if we could not do an in-place update
-		if option == nil {
-			continue
-		}
-
-		// Restore the network offers from the existing allocation.
-		// We do not allow network resources (reserved/dynamic ports)
-		// to be updated. This is guarded in taskUpdated, so we can
-		// safely restore those here.
-		for task, resources := range option.TaskResources {
-			existing := update.Alloc.TaskResources[task]
-			resources.Networks = existing.Networks
-		}
-
-		// Create a shallow copy
-		newAlloc := new(structs.Allocation)
-		*newAlloc = *update.Alloc
-
-		// Update the allocation
-		newAlloc.EvalID = s.eval.ID
-		newAlloc.Job = s.job
-		newAlloc.Resources = size
-		newAlloc.TaskResources = option.TaskResources
-		newAlloc.Metrics = s.ctx.Metrics()
-		newAlloc.DesiredStatus = structs.AllocDesiredStatusRun
-		newAlloc.ClientStatus = structs.AllocClientStatusPending
-		s.plan.AppendAlloc(newAlloc)
-
-		// Remove this allocation from the slice
-		updates[i] = updates[n-1]
-		i--
-		n--
-		inplace++
-	}
-	if len(updates) > 0 {
-		s.logger.Printf("[DEBUG] sched: %#v: %d in-place updates of %d", s.eval, inplace, len(updates))
-	}
-	return updates[:n]
-}
-
 // computePlacements computes placements for allocations
 func (s *GenericScheduler) computePlacements(place []allocTuple) error {
 	// Get the base nodes
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -22,7 +22,7 @@ func TestServiceSched_JobRegister(t *testing.T) {
 	job := mock.Job()
 	noErr(t, h.State.UpsertJob(h.NextIndex(), job))

-	// Create a mock evaluation to deregister the job
+	// Create a mock evaluation to register the job
 	eval := &structs.Evaluation{
 		ID:          structs.GenerateUUID(),
 		Priority:    job.Priority,
@@ -71,7 +71,7 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
 	job := mock.Job()
 	noErr(t, h.State.UpsertJob(h.NextIndex(), job))

-	// Create a mock evaluation to deregister the job
+	// Create a mock evaluation to register the job
 	eval := &structs.Evaluation{
 		ID:          structs.GenerateUUID(),
 		Priority:    job.Priority,
@@ -550,7 +550,7 @@ func TestServiceSched_RetryLimit(t *testing.T) {
 	job := mock.Job()
 	noErr(t, h.State.UpsertJob(h.NextIndex(), job))

-	// Create a mock evaluation to deregister the job
+	// Create a mock evaluation to register the job
 	eval := &structs.Evaluation{
 		ID:          structs.GenerateUUID(),
 		Priority:    job.Priority,
--- a/scheduler/scheduler.go
+++ b/scheduler/scheduler.go
@@ -13,6 +13,7 @@ import (
 var BuiltinSchedulers = map[string]Factory{
 	"service": NewServiceScheduler,
 	"batch":   NewBatchScheduler,
+	"system":  NewSystemScheduler,
 }

 // NewScheduler is used to instantiate and return a new scheduler
--- a/scheduler/stack.go
+++ b/scheduler/stack.go
@@ -35,20 +35,21 @@ type Stack interface {
 // GenericStack is the Stack used for the Generic scheduler. It is
 // designed to make better placement decisions at the cost of performance.
 type GenericStack struct {
-	batch               bool
-	ctx                 Context
-	source              *StaticIterator
-	jobConstraint       *ConstraintIterator
-	taskGroupDrivers    *DriverIterator
-	taskGroupConstraint *ConstraintIterator
-	binPack             *BinPackIterator
-	jobAntiAff          *JobAntiAffinityIterator
-	limit               *LimitIterator
-	maxScore            *MaxScoreIterator
+	batch                   bool
+	ctx                     Context
+	source                  *StaticIterator
+	jobConstraint           *ConstraintIterator
+	taskGroupDrivers        *DriverIterator
+	taskGroupConstraint     *ConstraintIterator
+	proposedAllocConstraint *ProposedAllocConstraintIterator
+	binPack                 *BinPackIterator
+	jobAntiAff              *JobAntiAffinityIterator
+	limit                   *LimitIterator
+	maxScore                *MaxScoreIterator
 }

 // NewGenericStack constructs a stack used for selecting service placements
-func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *GenericStack {
+func NewGenericStack(batch bool, ctx Context) *GenericStack {
 	// Create a new stack
 	s := &GenericStack{
 		batch: batch,
@@ -58,7 +59,7 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
 	// Create the source iterator. We randomize the order we visit nodes
 	// to reduce collisions between schedulers and to do a basic load
 	// balancing across eligible nodes.
-	s.source = NewRandomIterator(ctx, baseNodes)
+	s.source = NewRandomIterator(ctx, nil)

 	// Attach the job constraints. The job is filled in later.
 	s.jobConstraint = NewConstraintIterator(ctx, s.source, nil)
@@ -69,8 +70,11 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
 	// Filter on task group constraints second
 	s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil)

+	// Filter on constraints that are affected by propsed allocations.
+	s.proposedAllocConstraint = NewProposedAllocConstraintIterator(ctx, s.taskGroupConstraint)
+
 	// Upgrade from feasible to rank iterator
-	rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint)
+	rankSource := NewFeasibleRankIterator(ctx, s.proposedAllocConstraint)

 	// Apply the bin packing, this depends on the resources needed
 	// by a particular task group. Only enable eviction for the service
@@ -92,11 +96,6 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi

 	// Select the node with the maximum score for placement
 	s.maxScore = NewMaxScoreIterator(ctx, s.limit)
-
-	// Set the nodes if given
-	if len(baseNodes) != 0 {
-		s.SetNodes(baseNodes)
-	}
 	return s
 }

@@ -109,7 +108,7 @@ func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {

 	// Apply a limit function. This is to avoid scanning *every* possible node.
 	// For batch jobs we only need to evaluate 2 options and depend on the
-	// powwer of two choices. For services jobs we need to visit "enough".
+	// power of two choices. For services jobs we need to visit "enough".
 	// Using a log of the total number of nodes is a good restriction, with
 	// at least 2 as the floor
 	limit := 2
@@ -124,6 +123,7 @@ func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {

 func (s *GenericStack) SetJob(job *structs.Job) {
 	s.jobConstraint.SetConstraints(job.Constraints)
+	s.proposedAllocConstraint.SetJob(job)
 	s.binPack.SetPriority(job.Priority)
 	s.jobAntiAff.SetJob(job.ID)
 }
@@ -134,21 +134,13 @@ func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Reso
 	s.ctx.Reset()
 	start := time.Now()

-	// Collect the constraints, drivers and resources required by each
-	// sub-task to aggregate the TaskGroup totals
-	constr := make([]*structs.Constraint, 0, len(tg.Constraints))
-	drivers := make(map[string]struct{})
-	size := new(structs.Resources)
-	constr = append(constr, tg.Constraints...)
-	for _, task := range tg.Tasks {
-		drivers[task.Driver] = struct{}{}
-		constr = append(constr, task.Constraints...)
-		size.Add(task.Resources)
-	}
+	// Get the task groups constraints.
+	tgConstr := taskGroupConstraints(tg)

 	// Update the parameters of iterators
-	s.taskGroupDrivers.SetDrivers(drivers)
-	s.taskGroupConstraint.SetConstraints(constr)
+	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
+	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
+	s.proposedAllocConstraint.SetTaskGroup(tg)
 	s.binPack.SetTasks(tg.Tasks)

 	// Find the node with the max score
@@ -163,5 +155,83 @@ func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Reso

 	// Store the compute time
 	s.ctx.Metrics().AllocationTime = time.Since(start)
-	return option, size
+	return option, tgConstr.size
+}
+
+// SystemStack is the Stack used for the System scheduler. It is designed to
+// attempt to make placements on all nodes.
+type SystemStack struct {
+	ctx                 Context
+	source              *StaticIterator
+	jobConstraint       *ConstraintIterator
+	taskGroupDrivers    *DriverIterator
+	taskGroupConstraint *ConstraintIterator
+	binPack             *BinPackIterator
+}
+
+// NewSystemStack constructs a stack used for selecting service placements
+func NewSystemStack(ctx Context) *SystemStack {
+	// Create a new stack
+	s := &SystemStack{ctx: ctx}
+
+	// Create the source iterator. We visit nodes in a linear order because we
+	// have to evaluate on all nodes.
+	s.source = NewStaticIterator(ctx, nil)
+
+	// Attach the job constraints. The job is filled in later.
+	s.jobConstraint = NewConstraintIterator(ctx, s.source, nil)
+
+	// Filter on task group drivers first as they are faster
+	s.taskGroupDrivers = NewDriverIterator(ctx, s.jobConstraint, nil)
+
+	// Filter on task group constraints second
+	s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil)
+
+	// Upgrade from feasible to rank iterator
+	rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint)
+
+	// Apply the bin packing, this depends on the resources needed
+	// by a particular task group. Enable eviction as system jobs are high
+	// priority.
+	s.binPack = NewBinPackIterator(ctx, rankSource, true, 0)
+	return s
+}
+
+func (s *SystemStack) SetNodes(baseNodes []*structs.Node) {
+	// Update the set of base nodes
+	s.source.SetNodes(baseNodes)
+}
+
+func (s *SystemStack) SetJob(job *structs.Job) {
+	s.jobConstraint.SetConstraints(job.Constraints)
+	s.binPack.SetPriority(job.Priority)
+}
+
+func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
+	// Reset the binpack selector and context
+	s.binPack.Reset()
+	s.ctx.Reset()
+	start := time.Now()
+
+	// Get the task groups constraints.
+	tgConstr := taskGroupConstraints(tg)
+
+	// Update the parameters of iterators
+	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
+	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
+	s.binPack.SetTasks(tg.Tasks)
+
+	// Get the next option that satisfies the constraints.
+	option := s.binPack.Next()
+
+	// Ensure that the task resources were specified
+	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
+		for _, task := range tg.Tasks {
+			option.SetTaskResources(task, task.Resources)
+		}
+	}
+
+	// Store the compute time
+	s.ctx.Metrics().AllocationTime = time.Since(start)
+	return option, tgConstr.size
 }
--- a/scheduler/stack_test.go
+++ b/scheduler/stack_test.go
@@ -10,7 +10,7 @@ import (

 func TestServiceStack_SetNodes(t *testing.T) {
 	_, ctx := testContext(t)
-	stack := NewGenericStack(false, ctx, nil)
+	stack := NewGenericStack(false, ctx)

 	nodes := []*structs.Node{
 		mock.Node(),
@@ -37,7 +37,7 @@ func TestServiceStack_SetNodes(t *testing.T) {

 func TestServiceStack_SetJob(t *testing.T) {
 	_, ctx := testContext(t)
-	stack := NewGenericStack(false, ctx, nil)
+	stack := NewGenericStack(false, ctx)

 	job := mock.Job()
 	stack.SetJob(job)
@@ -55,7 +55,8 @@ func TestServiceStack_Select_Size(t *testing.T) {
 	nodes := []*structs.Node{
 		mock.Node(),
 	}
-	stack := NewGenericStack(false, ctx, nodes)
+	stack := NewGenericStack(false, ctx)
+	stack.SetNodes(nodes)

 	job := mock.Job()
 	stack.SetJob(job)
@@ -85,7 +86,8 @@ func TestServiceStack_Select_MetricsReset(t *testing.T) {
 		mock.Node(),
 		mock.Node(),
 	}
-	stack := NewGenericStack(false, ctx, nodes)
+	stack := NewGenericStack(false, ctx)
+	stack.SetNodes(nodes)

 	job := mock.Job()
 	stack.SetJob(job)
@@ -120,7 +122,8 @@ func TestServiceStack_Select_DriverFilter(t *testing.T) {
 	zero := nodes[0]
 	zero.Attributes["driver.foo"] = "1"

-	stack := NewGenericStack(false, ctx, nodes)
+	stack := NewGenericStack(false, ctx)
+	stack.SetNodes(nodes)

 	job := mock.Job()
 	job.TaskGroups[0].Tasks[0].Driver = "foo"
@@ -145,7 +148,8 @@ func TestServiceStack_Select_ConstraintFilter(t *testing.T) {
 	zero := nodes[0]
 	zero.Attributes["kernel.name"] = "freebsd"

-	stack := NewGenericStack(false, ctx, nodes)
+	stack := NewGenericStack(false, ctx)
+	stack.SetNodes(nodes)

 	job := mock.Job()
 	job.Constraints[0].RTarget = "freebsd"
@@ -182,7 +186,8 @@ func TestServiceStack_Select_BinPack_Overflow(t *testing.T) {
 	one := nodes[1]
 	one.Reserved = one.Resources

-	stack := NewGenericStack(false, ctx, nodes)
+	stack := NewGenericStack(false, ctx)
+	stack.SetNodes(nodes)

 	job := mock.Job()
 	stack.SetJob(job)
@@ -207,3 +212,209 @@ func TestServiceStack_Select_BinPack_Overflow(t *testing.T) {
 		t.Fatalf("bad: %#v", met)
 	}
 }
+
+func TestSystemStack_SetNodes(t *testing.T) {
+	_, ctx := testContext(t)
+	stack := NewSystemStack(ctx)
+
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+	}
+	stack.SetNodes(nodes)
+
+	out := collectFeasible(stack.source)
+	if !reflect.DeepEqual(out, nodes) {
+		t.Fatalf("bad: %#v", out)
+	}
+}
+
+func TestSystemStack_SetJob(t *testing.T) {
+	_, ctx := testContext(t)
+	stack := NewSystemStack(ctx)
+
+	job := mock.Job()
+	stack.SetJob(job)
+
+	if stack.binPack.priority != job.Priority {
+		t.Fatalf("bad")
+	}
+	if !reflect.DeepEqual(stack.jobConstraint.constraints, job.Constraints) {
+		t.Fatalf("bad")
+	}
+}
+
+func TestSystemStack_Select_Size(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{mock.Node()}
+	stack := NewSystemStack(ctx)
+	stack.SetNodes(nodes)
+
+	job := mock.Job()
+	stack.SetJob(job)
+	node, size := stack.Select(job.TaskGroups[0])
+	if node == nil {
+		t.Fatalf("missing node %#v", ctx.Metrics())
+	}
+	if size == nil {
+		t.Fatalf("missing size")
+	}
+
+	if size.CPU != 500 || size.MemoryMB != 256 {
+		t.Fatalf("bad: %#v", size)
+	}
+
+	met := ctx.Metrics()
+	if met.AllocationTime == 0 {
+		t.Fatalf("missing time")
+	}
+}
+
+func TestSystemStack_Select_MetricsReset(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+	}
+	stack := NewSystemStack(ctx)
+	stack.SetNodes(nodes)
+
+	job := mock.Job()
+	stack.SetJob(job)
+	n1, _ := stack.Select(job.TaskGroups[0])
+	m1 := ctx.Metrics()
+	if n1 == nil {
+		t.Fatalf("missing node %#v", m1)
+	}
+
+	if m1.NodesEvaluated != 1 {
+		t.Fatalf("should only be 1")
+	}
+
+	n2, _ := stack.Select(job.TaskGroups[0])
+	m2 := ctx.Metrics()
+	if n2 == nil {
+		t.Fatalf("missing node %#v", m2)
+	}
+
+	// If we don't reset, this would be 2
+	if m2.NodesEvaluated != 1 {
+		t.Fatalf("should only be 2")
+	}
+}
+
+func TestSystemStack_Select_DriverFilter(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+	}
+	zero := nodes[0]
+	zero.Attributes["driver.foo"] = "1"
+
+	stack := NewSystemStack(ctx)
+	stack.SetNodes(nodes)
+
+	job := mock.Job()
+	job.TaskGroups[0].Tasks[0].Driver = "foo"
+	stack.SetJob(job)
+
+	node, _ := stack.Select(job.TaskGroups[0])
+	if node == nil {
+		t.Fatalf("missing node %#v", ctx.Metrics())
+	}
+
+	if node.Node != zero {
+		t.Fatalf("bad")
+	}
+
+	zero.Attributes["driver.foo"] = "0"
+	stack = NewSystemStack(ctx)
+	stack.SetNodes(nodes)
+	stack.SetJob(job)
+	node, _ = stack.Select(job.TaskGroups[0])
+	if node != nil {
+		t.Fatalf("node not filtered %#v", node)
+	}
+}
+
+func TestSystemStack_Select_ConstraintFilter(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+	}
+	zero := nodes[1]
+	zero.Attributes["kernel.name"] = "freebsd"
+
+	stack := NewSystemStack(ctx)
+	stack.SetNodes(nodes)
+
+	job := mock.Job()
+	job.Constraints[0].RTarget = "freebsd"
+	stack.SetJob(job)
+
+	node, _ := stack.Select(job.TaskGroups[0])
+	if node == nil {
+		t.Fatalf("missing node %#v", ctx.Metrics())
+	}
+
+	if node.Node != zero {
+		t.Fatalf("bad")
+	}
+
+	met := ctx.Metrics()
+	if met.NodesFiltered != 1 {
+		t.Fatalf("bad: %#v", met)
+	}
+	if met.ClassFiltered["linux-medium-pci"] != 1 {
+		t.Fatalf("bad: %#v", met)
+	}
+	if met.ConstraintFiltered["$attr.kernel.name = freebsd"] != 1 {
+		t.Fatalf("bad: %#v", met)
+	}
+}
+
+func TestSystemStack_Select_BinPack_Overflow(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*structs.Node{
+		mock.Node(),
+		mock.Node(),
+	}
+	zero := nodes[0]
+	zero.Reserved = zero.Resources
+	one := nodes[1]
+
+	stack := NewSystemStack(ctx)
+	stack.SetNodes(nodes)
+
+	job := mock.Job()
+	stack.SetJob(job)
+
+	node, _ := stack.Select(job.TaskGroups[0])
+	if node == nil {
+		t.Fatalf("missing node %#v", ctx.Metrics())
+	}
+
+	if node.Node != one {
+		t.Fatalf("bad")
+	}
+
+	met := ctx.Metrics()
+	if met.NodesExhausted != 1 {
+		t.Fatalf("bad: %#v", met)
+	}
+	if met.ClassExhausted["linux-medium-pci"] != 1 {
+		t.Fatalf("bad: %#v", met)
+	}
+	if len(met.Scores) != 1 {
+		t.Fatalf("bad: %#v", met)
+	}
+}
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@@ -0,0 +1,265 @@
+package scheduler
+
+import (
+	"fmt"
+	"log"
+
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+const (
+	// maxSystemScheduleAttempts is used to limit the number of times
+	// we will attempt to schedule if we continue to hit conflicts for system
+	// jobs.
+	maxSystemScheduleAttempts = 5
+
+	// allocNodeTainted is the status used when stopping an alloc because it's
+	// node is tainted.
+	allocNodeTainted = "system alloc not needed as node is tainted"
+)
+
+// SystemScheduler is used for 'system' jobs. This scheduler is
+// designed for services that should be run on every client.
+type SystemScheduler struct {
+	logger  *log.Logger
+	state   State
+	planner Planner
+
+	eval  *structs.Evaluation
+	job   *structs.Job
+	plan  *structs.Plan
+	ctx   *EvalContext
+	stack *SystemStack
+	nodes []*structs.Node
+
+	limitReached bool
+	nextEval     *structs.Evaluation
+}
+
+// NewSystemScheduler is a factory function to instantiate a new system
+// scheduler.
+func NewSystemScheduler(logger *log.Logger, state State, planner Planner) Scheduler {
+	return &SystemScheduler{
+		logger:  logger,
+		state:   state,
+		planner: planner,
+	}
+}
+
+// Process is used to handle a single evaluation.
+func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
+	// Store the evaluation
+	s.eval = eval
+
+	// Verify the evaluation trigger reason is understood
+	switch eval.TriggeredBy {
+	case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,
+		structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate:
+	default:
+		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
+			eval.TriggeredBy)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+	}
+
+	// Retry up to the maxSystemScheduleAttempts
+	if err := retryMax(maxSystemScheduleAttempts, s.process); err != nil {
+		if statusErr, ok := err.(*SetStatusError); ok {
+			return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
+		}
+		return err
+	}
+
+	// Update the status to complete
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+}
+
+// process is wrapped in retryMax to iteratively run the handler until we have no
+// further work or we've made the maximum number of attempts.
+func (s *SystemScheduler) process() (bool, error) {
+	// Lookup the Job by ID
+	var err error
+	s.job, err = s.state.JobByID(s.eval.JobID)
+	if err != nil {
+		return false, fmt.Errorf("failed to get job '%s': %v",
+			s.eval.JobID, err)
+	}
+
+	// Get the ready nodes in the required datacenters
+	if s.job != nil {
+		s.nodes, err = readyNodesInDCs(s.state, s.job.Datacenters)
+		if err != nil {
+			return false, fmt.Errorf("failed to get ready nodes: %v", err)
+		}
+	}
+
+	// Create a plan
+	s.plan = s.eval.MakePlan(s.job)
+
+	// Create an evaluation context
+	s.ctx = NewEvalContext(s.state, s.plan, s.logger)
+
+	// Construct the placement stack
+	s.stack = NewSystemStack(s.ctx)
+	if s.job != nil {
+		s.stack.SetJob(s.job)
+	}
+
+	// Compute the target job allocations
+	if err := s.computeJobAllocs(); err != nil {
+		s.logger.Printf("[ERR] sched: %#v: %v", s.eval, err)
+		return false, err
+	}
+
+	// If the plan is a no-op, we can bail
+	if s.plan.IsNoOp() {
+		return true, nil
+	}
+
+	// If the limit of placements was reached we need to create an evaluation
+	// to pickup from here after the stagger period.
+	if s.limitReached && s.nextEval == nil {
+		s.nextEval = s.eval.NextRollingEval(s.job.Update.Stagger)
+		if err := s.planner.CreateEval(s.nextEval); err != nil {
+			s.logger.Printf("[ERR] sched: %#v failed to make next eval for rolling update: %v", s.eval, err)
+			return false, err
+		}
+		s.logger.Printf("[DEBUG] sched: %#v: rolling update limit reached, next eval '%s' created", s.eval, s.nextEval.ID)
+	}
+
+	// Submit the plan
+	result, newState, err := s.planner.SubmitPlan(s.plan)
+	if err != nil {
+		return false, err
+	}
+
+	// If we got a state refresh, try again since we have stale data
+	if newState != nil {
+		s.logger.Printf("[DEBUG] sched: %#v: refresh forced", s.eval)
+		s.state = newState
+		return false, nil
+	}
+
+	// Try again if the plan was not fully committed, potential conflict
+	fullCommit, expected, actual := result.FullCommit(s.plan)
+	if !fullCommit {
+		s.logger.Printf("[DEBUG] sched: %#v: attempted %d placements, %d placed",
+			s.eval, expected, actual)
+		return false, nil
+	}
+
+	// Success!
+	return true, nil
+}
+
+// computeJobAllocs is used to reconcile differences between the job,
+// existing allocations and node status to update the allocations.
+func (s *SystemScheduler) computeJobAllocs() error {
+	// Lookup the allocations by JobID
+	allocs, err := s.state.AllocsByJob(s.eval.JobID)
+	if err != nil {
+		return fmt.Errorf("failed to get allocs for job '%s': %v",
+			s.eval.JobID, err)
+	}
+
+	// Filter out the allocations in a terminal state
+	allocs = structs.FilterTerminalAllocs(allocs)
+
+	// Determine the tainted nodes containing job allocs
+	tainted, err := taintedNodes(s.state, allocs)
+	if err != nil {
+		return fmt.Errorf("failed to get tainted nodes for job '%s': %v",
+			s.eval.JobID, err)
+	}
+
+	// Diff the required and existing allocations
+	diff := diffSystemAllocs(s.job, s.nodes, tainted, allocs)
+	s.logger.Printf("[DEBUG] sched: %#v: %#v", s.eval, diff)
+
+	// Add all the allocs to stop
+	for _, e := range diff.stop {
+		s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocNotNeeded)
+	}
+
+	// Attempt to do the upgrades in place
+	diff.update = inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update)
+
+	// Check if a rolling upgrade strategy is being used
+	limit := len(diff.update)
+	if s.job != nil && s.job.Update.Rolling() {
+		limit = s.job.Update.MaxParallel
+	}
+
+	// Treat non in-place updates as an eviction and new placement.
+	s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit)
+
+	// Nothing remaining to do if placement is not required
+	if len(diff.place) == 0 {
+		return nil
+	}
+
+	// Compute the placements
+	return s.computePlacements(diff.place)
+}
+
+// computePlacements computes placements for allocations
+func (s *SystemScheduler) computePlacements(place []allocTuple) error {
+	nodeByID := make(map[string]*structs.Node, len(s.nodes))
+	for _, node := range s.nodes {
+		nodeByID[node.ID] = node
+	}
+
+	// Track the failed task groups so that we can coalesce
+	// the failures together to avoid creating many failed allocs.
+	failedTG := make(map[*structs.TaskGroup]*structs.Allocation)
+
+	nodes := make([]*structs.Node, 1)
+	for _, missing := range place {
+		node, ok := nodeByID[missing.Alloc.NodeID]
+		if !ok {
+			return fmt.Errorf("could not find node %q", missing.Alloc.NodeID)
+		}
+
+		// Update the set of placement ndoes
+		nodes[0] = node
+		s.stack.SetNodes(nodes)
+
+		// Attempt to match the task group
+		option, size := s.stack.Select(missing.TaskGroup)
+
+		if option == nil {
+			// Check if this task group has already failed
+			if alloc, ok := failedTG[missing.TaskGroup]; ok {
+				alloc.Metrics.CoalescedFailures += 1
+				continue
+			}
+		}
+
+		// Create an allocation for this
+		alloc := &structs.Allocation{
+			ID:        structs.GenerateUUID(),
+			EvalID:    s.eval.ID,
+			Name:      missing.Name,
+			JobID:     s.job.ID,
+			Job:       s.job,
+			TaskGroup: missing.TaskGroup.Name,
+			Resources: size,
+			Metrics:   s.ctx.Metrics(),
+		}
+
+		// Set fields based on if we found an allocation option
+		if option != nil {
+			alloc.NodeID = option.Node.ID
+			alloc.TaskResources = option.TaskResources
+			alloc.DesiredStatus = structs.AllocDesiredStatusRun
+			alloc.ClientStatus = structs.AllocClientStatusPending
+			s.plan.AppendAlloc(alloc)
+		} else {
+			alloc.DesiredStatus = structs.AllocDesiredStatusFailed
+			alloc.DesiredDescription = "failed to find a node for placement"
+			alloc.ClientStatus = structs.AllocClientStatusFailed
+			s.plan.AppendFailed(alloc)
+			failedTG[missing.TaskGroup] = alloc
+		}
+	}
+	return nil
+}
--- a/scheduler/system_sched_test.go
+++ b/scheduler/system_sched_test.go
@@ -0,0 +1,651 @@
+package scheduler
+
+import (
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+func TestSystemSched_JobRegister(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Create a job
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	// Create a mock evaluation to deregister the job
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan allocated
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	if len(planned) != 10 {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure all allocations placed
+	if len(out) != 10 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSystemSched_JobRegister_AddNode(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-job.web[0]"
+		allocs = append(allocs, alloc)
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
+
+	// Add a new node.
+	node := mock.Node()
+	noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+
+	// Create a mock evaluation to deal with the node update
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan had no node updates
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	if len(update) != 0 {
+		t.Log(len(update))
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Ensure the plan allocated on the new node
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	if len(planned) != 1 {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Ensure it allocated on the right node
+	if _, ok := plan.NodeAllocation[node.ID]; !ok {
+		t.Fatalf("allocated on wrong node: %#v", plan)
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure all allocations placed
+	out = structs.FilterTerminalAllocs(out)
+	if len(out) != 11 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSystemSched_JobRegister_AllocFail(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create NO nodes
+	// Create a job
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	// Create a mock evaluation to register the job
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure no plan as this should be a no-op.
+	if len(h.Plans) != 0 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSystemSched_JobModify(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-job.web[0]"
+		allocs = append(allocs, alloc)
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
+
+	// Add a few terminal status allocations, these should be ignored
+	var terminal []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = nodes[i].ID
+		alloc.Name = "my-job.web[0]"
+		alloc.DesiredStatus = structs.AllocDesiredStatusFailed
+		terminal = append(terminal, alloc)
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal))
+
+	// Update the job
+	job2 := mock.SystemJob()
+	job2.ID = job.ID
+
+	// Update the task, such that it cannot be done in-place
+	job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job2))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted all allocs
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	if len(update) != len(allocs) {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Ensure the plan allocated
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	if len(planned) != 10 {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure all allocations placed
+	out = structs.FilterTerminalAllocs(out)
+	if len(out) != 10 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSystemSched_JobModify_Rolling(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-job.web[0]"
+		allocs = append(allocs, alloc)
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
+
+	// Update the job
+	job2 := mock.SystemJob()
+	job2.ID = job.ID
+	job2.Update = structs.UpdateStrategy{
+		Stagger:     30 * time.Second,
+		MaxParallel: 5,
+	}
+
+	// Update the task, such that it cannot be done in-place
+	job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job2))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted only MaxParallel
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	if len(update) != job2.Update.MaxParallel {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Ensure the plan allocated
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	if len(planned) != job2.Update.MaxParallel {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+
+	// Ensure a follow up eval was created
+	eval = h.Evals[0]
+	if eval.NextEval == "" {
+		t.Fatalf("missing next eval")
+	}
+
+	// Check for create
+	if len(h.CreateEvals) == 0 {
+		t.Fatalf("missing created eval")
+	}
+	create := h.CreateEvals[0]
+	if eval.NextEval != create.ID {
+		t.Fatalf("ID mismatch")
+	}
+	if create.PreviousEval != eval.ID {
+		t.Fatalf("missing previous eval")
+	}
+
+	if create.TriggeredBy != structs.EvalTriggerRollingUpdate {
+		t.Fatalf("bad: %#v", create)
+	}
+}
+
+func TestSystemSched_JobModify_InPlace(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-job.web[0]"
+		allocs = append(allocs, alloc)
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
+
+	// Update the job
+	job2 := mock.SystemJob()
+	job2.ID = job.ID
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job2))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan did not evict any allocs
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	if len(update) != 0 {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Ensure the plan updated the existing allocs
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	if len(planned) != 10 {
+		t.Fatalf("bad: %#v", plan)
+	}
+	for _, p := range planned {
+		if p.Job != job2 {
+			t.Fatalf("should update job")
+		}
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure all allocations placed
+	if len(out) != 10 {
+		t.Fatalf("bad: %#v", out)
+	}
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+
+	// Verify the network did not change
+	for _, alloc := range out {
+		for _, resources := range alloc.TaskResources {
+			if resources.Networks[0].ReservedPorts[0] != 5000 {
+				t.Fatalf("bad: %#v", alloc)
+			}
+		}
+	}
+}
+
+func TestSystemSched_JobDeregister(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	job := mock.SystemJob()
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-job.web[0]"
+		allocs = append(allocs, alloc)
+	}
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
+
+	// Create a mock evaluation to deregister the job
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobDeregister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted the job from all nodes.
+	for _, node := range nodes {
+		if len(plan.NodeUpdate[node.ID]) != 1 {
+			t.Fatalf("bad: %#v", plan)
+		}
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure no remaining allocations
+	out = structs.FilterTerminalAllocs(out)
+	if len(out) != 0 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSystemSched_NodeDrain(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register a draining node
+	node := mock.Node()
+	node.Drain = true
+	noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+
+	// Generate a fake job allocated on that node.
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	alloc := mock.Alloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-job.web[0]"
+	noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a single plan
+	if len(h.Plans) != 1 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted all allocs
+	if len(plan.NodeUpdate[node.ID]) != 1 {
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Ensure the plan updated the allocation.
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeUpdate {
+		planned = append(planned, allocList...)
+	}
+	if len(planned) != 1 {
+		t.Log(len(planned))
+		t.Fatalf("bad: %#v", plan)
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure the allocations is stopped
+	if planned[0].DesiredStatus != structs.AllocDesiredStatusStop {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSystemSched_RetryLimit(t *testing.T) {
+	h := NewHarness(t)
+	h.Planner = &RejectPlan{h}
+
+	// Create some nodes
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		noErr(t, h.State.UpsertNode(h.NextIndex(), node))
+	}
+
+	// Create a job
+	job := mock.SystemJob()
+	noErr(t, h.State.UpsertJob(h.NextIndex(), job))
+
+	// Create a mock evaluation to deregister the job
+	eval := &structs.Evaluation{
+		ID:          structs.GenerateUUID(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+	}
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure multiple plans
+	if len(h.Plans) == 0 {
+		t.Fatalf("bad: %#v", h.Plans)
+	}
+
+	// Lookup the allocations by JobID
+	out, err := h.State.AllocsByJob(job.ID)
+	noErr(t, err)
+
+	// Ensure no allocations placed
+	if len(out) != 0 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	// Should hit the retry limit
+	h.AssertEvalStatus(t, structs.EvalStatusFailed)
+}
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -2,6 +2,7 @@ package scheduler

 import (
 	"fmt"
+	"log"
 	"math/rand"
 	"reflect"

@@ -19,6 +20,10 @@ type allocTuple struct {
 // a job requires. This is used to do the count expansion.
 func materializeTaskGroups(job *structs.Job) map[string]*structs.TaskGroup {
 	out := make(map[string]*structs.TaskGroup)
+	if job == nil {
+		return out
+	}
+
 	for _, tg := range job.TaskGroups {
 		for i := 0; i < tg.Count; i++ {
 			name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)
@@ -38,6 +43,14 @@ func (d *diffResult) GoString() string {
 		len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore))
 }

+func (d *diffResult) Append(other *diffResult) {
+	d.place = append(d.place, other.place...)
+	d.update = append(d.update, other.update...)
+	d.migrate = append(d.migrate, other.migrate...)
+	d.stop = append(d.stop, other.stop...)
+	d.ignore = append(d.ignore, other.ignore...)
+}
+
 // diffAllocs is used to do a set difference between the target allocations
 // and the existing allocations. This returns 5 sets of results, the list of
 // named task groups that need to be placed (no existing allocation), the
@@ -117,6 +130,48 @@ func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
 	return result
 }

+// diffSystemAllocs is like diffAllocs however, the allocations in the
+// diffResult contain the specific nodeID they should be allocated on.
+func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[string]bool,
+	allocs []*structs.Allocation) *diffResult {
+
+	// Build a mapping of nodes to all their allocs.
+	nodeAllocs := make(map[string][]*structs.Allocation, len(allocs))
+	for _, alloc := range allocs {
+		nallocs := append(nodeAllocs[alloc.NodeID], alloc)
+		nodeAllocs[alloc.NodeID] = nallocs
+	}
+
+	for _, node := range nodes {
+		if _, ok := nodeAllocs[node.ID]; !ok {
+			nodeAllocs[node.ID] = nil
+		}
+	}
+
+	// Create the required task groups.
+	required := materializeTaskGroups(job)
+
+	result := &diffResult{}
+	for nodeID, allocs := range nodeAllocs {
+		diff := diffAllocs(job, taintedNodes, required, allocs)
+
+		// Mark the alloc as being for a specific node.
+		for i := range diff.place {
+			alloc := &diff.place[i]
+			alloc.Alloc = &structs.Allocation{NodeID: nodeID}
+		}
+
+		// Migrate does not apply to system jobs and instead should be marked as
+		// stop because if a node is tainted, the job is invalid on that node.
+		diff.stop = append(diff.stop, diff.migrate...)
+		diff.migrate = nil
+
+		result.Append(diff)
+	}
+
+	return result
+}
+
 // readyNodesInDCs returns all the ready nodes in the given datacenters
 func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
 	// Index the DCs
@@ -208,7 +263,7 @@ func shuffleNodes(nodes []*structs.Node) {
 }

 // tasksUpdated does a diff between task groups to see if the
-// tasks, their drivers or config have updated.
+// tasks, their drivers, environment variables or config have updated.
 func tasksUpdated(a, b *structs.TaskGroup) bool {
 	// If the number of tasks do not match, clearly there is an update
 	if len(a.Tasks) != len(b.Tasks) {
@@ -227,6 +282,9 @@ func tasksUpdated(a, b *structs.TaskGroup) bool {
 		if !reflect.DeepEqual(at.Config, bt.Config) {
 			return true
 		}
+		if !reflect.DeepEqual(at.Env, bt.Env) {
+			return true
+		}

 		// Inspect the network to see if the dynamic ports are different
 		if len(at.Resources.Networks) != len(bt.Resources.Networks) {
@@ -242,3 +300,148 @@ func tasksUpdated(a, b *structs.TaskGroup) bool {
 	}
 	return false
 }
+
+// setStatus is used to update the status of the evaluation
+func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
+	logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
+	newEval := eval.Copy()
+	newEval.Status = status
+	newEval.StatusDescription = desc
+	if nextEval != nil {
+		newEval.NextEval = nextEval.ID
+	}
+	return planner.UpdateEval(newEval)
+}
+
+// inplaceUpdate attempts to update allocations in-place where possible.
+func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
+	stack Stack, updates []allocTuple) []allocTuple {
+
+	n := len(updates)
+	inplace := 0
+	for i := 0; i < n; i++ {
+		// Get the update
+		update := updates[i]
+
+		// Check if the task drivers or config has changed, requires
+		// a rolling upgrade since that cannot be done in-place.
+		existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name)
+		if tasksUpdated(update.TaskGroup, existing) {
+			continue
+		}
+
+		// Get the existing node
+		node, err := ctx.State().NodeByID(update.Alloc.NodeID)
+		if err != nil {
+			ctx.Logger().Printf("[ERR] sched: %#v failed to get node '%s': %v",
+				eval, update.Alloc.NodeID, err)
+			continue
+		}
+		if node == nil {
+			continue
+		}
+
+		// Set the existing node as the base set
+		stack.SetNodes([]*structs.Node{node})
+
+		// Stage an eviction of the current allocation. This is done so that
+		// the current allocation is discounted when checking for feasability.
+		// Otherwise we would be trying to fit the tasks current resources and
+		// updated resources. After select is called we can remove the evict.
+		ctx.Plan().AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop,
+			allocInPlace)
+
+		// Attempt to match the task group
+		option, size := stack.Select(update.TaskGroup)
+
+		// Pop the allocation
+		ctx.Plan().PopUpdate(update.Alloc)
+
+		// Skip if we could not do an in-place update
+		if option == nil {
+			continue
+		}
+
+		// Restore the network offers from the existing allocation.
+		// We do not allow network resources (reserved/dynamic ports)
+		// to be updated. This is guarded in taskUpdated, so we can
+		// safely restore those here.
+		for task, resources := range option.TaskResources {
+			existing := update.Alloc.TaskResources[task]
+			resources.Networks = existing.Networks
+		}
+
+		// Create a shallow copy
+		newAlloc := new(structs.Allocation)
+		*newAlloc = *update.Alloc
+
+		// Update the allocation
+		newAlloc.EvalID = eval.ID
+		newAlloc.Job = job
+		newAlloc.Resources = size
+		newAlloc.TaskResources = option.TaskResources
+		newAlloc.Metrics = ctx.Metrics()
+		newAlloc.DesiredStatus = structs.AllocDesiredStatusRun
+		newAlloc.ClientStatus = structs.AllocClientStatusPending
+		ctx.Plan().AppendAlloc(newAlloc)
+
+		// Remove this allocation from the slice
+		updates[i] = updates[n-1]
+		i--
+		n--
+		inplace++
+	}
+	if len(updates) > 0 {
+		ctx.Logger().Printf("[DEBUG] sched: %#v: %d in-place updates of %d", eval, inplace, len(updates))
+	}
+	return updates[:n]
+}
+
+// evictAndPlace is used to mark allocations for evicts and add them to the
+// placement queue. evictAndPlace modifies both the the diffResult and the
+// limit. It returns true if the limit has been reached.
+func evictAndPlace(ctx Context, diff *diffResult, allocs []allocTuple, desc string, limit *int) bool {
+	n := len(allocs)
+	for i := 0; i < n && i < *limit; i++ {
+		a := allocs[i]
+		ctx.Plan().AppendUpdate(a.Alloc, structs.AllocDesiredStatusStop, desc)
+		diff.place = append(diff.place, a)
+	}
+	if n <= *limit {
+		*limit -= n
+		return false
+	}
+	*limit = 0
+	return true
+}
+
+// tgConstrainTuple is used to store the total constraints of a task group.
+type tgConstrainTuple struct {
+	// Holds the combined constraints of the task group and all it's sub-tasks.
+	constraints []*structs.Constraint
+
+	// The set of required drivers within the task group.
+	drivers map[string]struct{}
+
+	// The combined resources of all tasks within the task group.
+	size *structs.Resources
+}
+
+// taskGroupConstraints collects the constraints, drivers and resources required by each
+// sub-task to aggregate the TaskGroup totals
+func taskGroupConstraints(tg *structs.TaskGroup) tgConstrainTuple {
+	c := tgConstrainTuple{
+		constraints: make([]*structs.Constraint, 0, len(tg.Constraints)),
+		drivers:     make(map[string]struct{}),
+		size:        new(structs.Resources),
+	}
+
+	c.constraints = append(c.constraints, tg.Constraints...)
+	for _, task := range tg.Tasks {
+		c.drivers[task.Driver] = struct{}{}
+		c.constraints = append(c.constraints, task.Constraints...)
+		c.size.Add(task.Resources)
+	}
+
+	return c
+}
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -2,6 +2,7 @@ package scheduler

 import (
 	"fmt"
+	"log"
 	"os"
 	"reflect"
 	"testing"
@@ -109,6 +110,80 @@ func TestDiffAllocs(t *testing.T) {
 	}
 }

+func TestDiffSystemAllocs(t *testing.T) {
+	job := mock.SystemJob()
+
+	// Create three alive nodes.
+	nodes := []*structs.Node{{ID: "foo"}, {ID: "bar"}, {ID: "baz"}}
+
+	// The "old" job has a previous modify index
+	oldJob := new(structs.Job)
+	*oldJob = *job
+	oldJob.ModifyIndex -= 1
+
+	tainted := map[string]bool{
+		"dead": true,
+		"baz":  false,
+	}
+
+	allocs := []*structs.Allocation{
+		// Update allocation on baz
+		&structs.Allocation{
+			ID:     structs.GenerateUUID(),
+			NodeID: "baz",
+			Name:   "my-job.web[0]",
+			Job:    oldJob,
+		},
+
+		// Ignore allocation on bar
+		&structs.Allocation{
+			ID:     structs.GenerateUUID(),
+			NodeID: "bar",
+			Name:   "my-job.web[0]",
+			Job:    job,
+		},
+
+		// Stop allocation on dead.
+		&structs.Allocation{
+			ID:     structs.GenerateUUID(),
+			NodeID: "dead",
+			Name:   "my-job.web[0]",
+		},
+	}
+
+	diff := diffSystemAllocs(job, nodes, tainted, allocs)
+	place := diff.place
+	update := diff.update
+	migrate := diff.migrate
+	stop := diff.stop
+	ignore := diff.ignore
+
+	// We should update the first alloc
+	if len(update) != 1 || update[0].Alloc != allocs[0] {
+		t.Fatalf("bad: %#v", update)
+	}
+
+	// We should ignore the second alloc
+	if len(ignore) != 1 || ignore[0].Alloc != allocs[1] {
+		t.Fatalf("bad: %#v", ignore)
+	}
+
+	// We should stop the third alloc
+	if len(stop) != 1 || stop[0].Alloc != allocs[2] {
+		t.Fatalf("bad: %#v", stop)
+	}
+
+	// There should be no migrates.
+	if len(migrate) != 0 {
+		t.Fatalf("bad: %#v", migrate)
+	}
+
+	// We should place 1
+	if len(place) != 1 {
+		t.Fatalf("bad: %#v", place)
+	}
+}
+
 func TestReadyNodesInDCs(t *testing.T) {
 	state, err := state.NewStateStore(os.Stderr)
 	if err != nil {
@@ -213,18 +288,25 @@ func TestTaintedNodes(t *testing.T) {
 }

 func TestShuffleNodes(t *testing.T) {
+	// Use a large number of nodes to make the probability of shuffling to the
+	// original order very low.
 	nodes := []*structs.Node{
 		mock.Node(),
 		mock.Node(),
 		mock.Node(),
 		mock.Node(),
 		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
+		mock.Node(),
 	}
 	orig := make([]*structs.Node, len(nodes))
 	copy(orig, nodes)
 	shuffleNodes(nodes)
 	if reflect.DeepEqual(nodes, orig) {
-		t.Fatalf("shoudl not match")
+		t.Fatalf("should not match")
 	}
 }

@@ -265,4 +347,304 @@ func TestTasksUpdated(t *testing.T) {
 	if !tasksUpdated(j1.TaskGroups[0], j6.TaskGroups[0]) {
 		t.Fatalf("bad")
 	}
+
+	j7 := mock.Job()
+	j7.TaskGroups[0].Tasks[0].Env["NEW_ENV"] = "NEW_VALUE"
+	if !tasksUpdated(j1.TaskGroups[0], j7.TaskGroups[0]) {
+		t.Fatalf("bad")
+	}
+}
+
+func TestEvictAndPlace_LimitLessThanAllocs(t *testing.T) {
+	_, ctx := testContext(t)
+	allocs := []allocTuple{
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+	}
+	diff := &diffResult{}
+
+	limit := 2
+	if !evictAndPlace(ctx, diff, allocs, "", &limit) {
+		t.Fatal("evictAndReplace() should have returned true")
+	}
+
+	if limit != 0 {
+		t.Fatalf("evictAndReplace() should decremented limit; got %v; want 0", limit)
+	}
+
+	if len(diff.place) != 2 {
+		t.Fatalf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)
+	}
+}
+
+func TestEvictAndPlace_LimitEqualToAllocs(t *testing.T) {
+	_, ctx := testContext(t)
+	allocs := []allocTuple{
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+	}
+	diff := &diffResult{}
+
+	limit := 4
+	if evictAndPlace(ctx, diff, allocs, "", &limit) {
+		t.Fatal("evictAndReplace() should have returned false")
+	}
+
+	if limit != 0 {
+		t.Fatalf("evictAndReplace() should decremented limit; got %v; want 0", limit)
+	}
+
+	if len(diff.place) != 4 {
+		t.Fatalf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)
+	}
+}
+
+func TestSetStatus(t *testing.T) {
+	h := NewHarness(t)
+	logger := log.New(os.Stderr, "", log.LstdFlags)
+	eval := mock.Eval()
+	status := "a"
+	desc := "b"
+	if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
+		t.Fatalf("setStatus() failed: %v", err)
+	}
+
+	if len(h.Evals) != 1 {
+		t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
+	}
+
+	newEval := h.Evals[0]
+	if newEval.ID != eval.ID || newEval.Status != status || newEval.StatusDescription != desc {
+		t.Fatalf("setStatus() submited invalid eval: %v", newEval)
+	}
+
+	h = NewHarness(t)
+	next := mock.Eval()
+	if err := setStatus(logger, h, eval, next, status, desc); err != nil {
+		t.Fatalf("setStatus() failed: %v", err)
+	}
+
+	if len(h.Evals) != 1 {
+		t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
+	}
+
+	newEval = h.Evals[0]
+	if newEval.NextEval != next.ID {
+		t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
+	}
+}
+
+func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {
+	state, ctx := testContext(t)
+	eval := mock.Eval()
+	job := mock.Job()
+
+	node := mock.Node()
+	noErr(t, state.UpsertNode(1000, node))
+
+	// Register an alloc
+	alloc := &structs.Allocation{
+		ID:     structs.GenerateUUID(),
+		EvalID: eval.ID,
+		NodeID: node.ID,
+		JobID:  job.ID,
+		Job:    job,
+		Resources: &structs.Resources{
+			CPU:      2048,
+			MemoryMB: 2048,
+		},
+		DesiredStatus: structs.AllocDesiredStatusRun,
+	}
+	alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources}
+	noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
+
+	// Create a new task group that prevents in-place updates.
+	tg := &structs.TaskGroup{}
+	*tg = *job.TaskGroups[0]
+	task := &structs.Task{Name: "FOO"}
+	tg.Tasks = nil
+	tg.Tasks = append(tg.Tasks, task)
+
+	updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}}
+	stack := NewGenericStack(false, ctx)
+
+	// Do the inplace update.
+	unplaced := inplaceUpdate(ctx, eval, job, stack, updates)
+
+	if len(unplaced) != 1 {
+		t.Fatal("inplaceUpdate incorrectly did an inplace update")
+	}
+
+	if len(ctx.plan.NodeAllocation) != 0 {
+		t.Fatal("inplaceUpdate incorrectly did an inplace update")
+	}
+}
+
+func TestInplaceUpdate_NoMatch(t *testing.T) {
+	state, ctx := testContext(t)
+	eval := mock.Eval()
+	job := mock.Job()
+
+	node := mock.Node()
+	noErr(t, state.UpsertNode(1000, node))
+
+	// Register an alloc
+	alloc := &structs.Allocation{
+		ID:     structs.GenerateUUID(),
+		EvalID: eval.ID,
+		NodeID: node.ID,
+		JobID:  job.ID,
+		Job:    job,
+		Resources: &structs.Resources{
+			CPU:      2048,
+			MemoryMB: 2048,
+		},
+		DesiredStatus: structs.AllocDesiredStatusRun,
+	}
+	alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources}
+	noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
+
+	// Create a new task group that requires too much resources.
+	tg := &structs.TaskGroup{}
+	*tg = *job.TaskGroups[0]
+	resource := &structs.Resources{CPU: 9999}
+	tg.Tasks[0].Resources = resource
+
+	updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}}
+	stack := NewGenericStack(false, ctx)
+
+	// Do the inplace update.
+	unplaced := inplaceUpdate(ctx, eval, job, stack, updates)
+
+	if len(unplaced) != 1 {
+		t.Fatal("inplaceUpdate incorrectly did an inplace update")
+	}
+
+	if len(ctx.plan.NodeAllocation) != 0 {
+		t.Fatal("inplaceUpdate incorrectly did an inplace update")
+	}
+}
+
+func TestInplaceUpdate_Success(t *testing.T) {
+	state, ctx := testContext(t)
+	eval := mock.Eval()
+	job := mock.Job()
+
+	node := mock.Node()
+	noErr(t, state.UpsertNode(1000, node))
+
+	// Register an alloc
+	alloc := &structs.Allocation{
+		ID:     structs.GenerateUUID(),
+		EvalID: eval.ID,
+		NodeID: node.ID,
+		JobID:  job.ID,
+		Job:    job,
+		Resources: &structs.Resources{
+			CPU:      2048,
+			MemoryMB: 2048,
+		},
+		DesiredStatus: structs.AllocDesiredStatusRun,
+	}
+	alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources}
+	noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
+
+	// Create a new task group that updates the resources.
+	tg := &structs.TaskGroup{}
+	*tg = *job.TaskGroups[0]
+	resource := &structs.Resources{CPU: 737}
+	tg.Tasks[0].Resources = resource
+
+	updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}}
+	stack := NewGenericStack(false, ctx)
+	stack.SetJob(job)
+
+	// Do the inplace update.
+	unplaced := inplaceUpdate(ctx, eval, job, stack, updates)
+
+	if len(unplaced) != 0 {
+		t.Fatal("inplaceUpdate did not do an inplace update")
+	}
+
+	if len(ctx.plan.NodeAllocation) != 1 {
+		t.Fatal("inplaceUpdate did not do an inplace update")
+	}
+}
+
+func TestEvictAndPlace_LimitGreaterThanAllocs(t *testing.T) {
+	_, ctx := testContext(t)
+	allocs := []allocTuple{
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+		allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
+	}
+	diff := &diffResult{}
+
+	limit := 6
+	if evictAndPlace(ctx, diff, allocs, "", &limit) {
+		t.Fatal("evictAndReplace() should have returned false")
+	}
+
+	if limit != 2 {
+		t.Fatalf("evictAndReplace() should decremented limit; got %v; want 2", limit)
+	}
+
+	if len(diff.place) != 4 {
+		t.Fatalf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)
+	}
+}
+
+func TestTaskGroupConstraints(t *testing.T) {
+	constr := &structs.Constraint{Hard: true}
+	constr2 := &structs.Constraint{LTarget: "foo"}
+	constr3 := &structs.Constraint{Weight: 10}
+
+	tg := &structs.TaskGroup{
+		Name:        "web",
+		Count:       10,
+		Constraints: []*structs.Constraint{constr},
+		Tasks: []*structs.Task{
+			&structs.Task{
+				Driver: "exec",
+				Resources: &structs.Resources{
+					CPU:      500,
+					MemoryMB: 256,
+				},
+				Constraints: []*structs.Constraint{constr2},
+			},
+			&structs.Task{
+				Driver: "docker",
+				Resources: &structs.Resources{
+					CPU:      500,
+					MemoryMB: 256,
+				},
+				Constraints: []*structs.Constraint{constr3},
+			},
+		},
+	}
+
+	// Build the expected values.
+	expConstr := []*structs.Constraint{constr, constr2, constr3}
+	expDrivers := map[string]struct{}{"exec": struct{}{}, "docker": struct{}{}}
+	expSize := &structs.Resources{
+		CPU:      1000,
+		MemoryMB: 512,
+	}
+
+	actConstrains := taskGroupConstraints(tg)
+	if !reflect.DeepEqual(actConstrains.constraints, expConstr) {
+		t.Fatalf("taskGroupConstraints(%v) returned %v; want %v", tg, actConstrains.constraints, expConstr)
+	}
+	if !reflect.DeepEqual(actConstrains.drivers, expDrivers) {
+		t.Fatalf("taskGroupConstraints(%v) returned %v; want %v", tg, actConstrains.drivers, expDrivers)
+	}
+	if !reflect.DeepEqual(actConstrains.size, expSize) {
+		t.Fatalf("taskGroupConstraints(%v) returned %v; want %v", tg, actConstrains.size, expSize)
+	}
+
 }
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -44,6 +44,7 @@ gox \
    -arch="${XC_ARCH}" \
    -osarch="!linux/arm !darwin/386" \
    -ldflags "-X main.GitCommit ${GIT_COMMIT}${GIT_DIRTY}" \
+    -cgo \
    -output "pkg/{{.OS}}_{{.Arch}}/nomad" \
    .

--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -10,4 +10,4 @@ go build -o $TEMPDIR/nomad || exit 1

 # Run the tests
 echo "--> Running tests"
-go list ./... | PATH=$TEMPDIR:$PATH xargs -n1 go test -timeout=40s
+go list ./... | PATH=$TEMPDIR:$PATH xargs -n1 go test -cover -timeout=40s
--- a/scripts/website_push.sh
+++ b/scripts/website_push.sh
@@ -16,7 +16,8 @@ while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
 DIR="$( cd -P "$( dirname "$SOURCE" )/.." && pwd )"

 # Copy into tmpdir
-cp -R $DIR/website/ $DEPLOY/
+shopt -s dotglob
+cp -r $DIR/website/* $DEPLOY/

 # Change into that directory
 pushd $DEPLOY &>/dev/null
@@ -25,6 +26,7 @@ pushd $DEPLOY &>/dev/null
 touch .gitignore
 echo ".sass-cache" >> .gitignore
 echo "build" >> .gitignore
+echo "vendor" >> .gitignore

 # Add everything
 git init -q .
--- a/testutil/server.go
+++ b/testutil/server.go
@@ -22,6 +22,8 @@ import (
 	"os/exec"
 	"sync/atomic"
 	"testing"
+
+	"github.com/hashicorp/go-cleanhttp"
 )

 // offset is used to atomically increment the port numbers.
@@ -156,8 +158,7 @@ func NewTestServer(t *testing.T, cb ServerConfigCallback) *TestServer {
 		t.Fatalf("err: %s", err)
 	}

-	var client *http.Client
-	client = http.DefaultClient
+	client := cleanhttp.DefaultClient()

 	server := &TestServer{
 		Config: nomadConfig,
--- a/website/.ruby-version
+++ b/website/.ruby-version
@@ -0,0 +1 @@
+2.2.2
--- a/website/Gemfile.lock
+++ b/website/Gemfile.lock
@@ -1,12 +1,12 @@
 GIT
  remote: git://github.com/hashicorp/middleman-hashicorp.git
-  revision: 76f0f284ad44cea0457484ea83467192f02daf87
+  revision: 15cbda0cf1d963fa71292dee921229e7ee618272
  specs:
-    middleman-hashicorp (0.1.0)
+    middleman-hashicorp (0.2.0)
      bootstrap-sass (~> 3.3)
      builder (~> 3.2)
      less (~> 2.6)
-      middleman (~> 3.3)
+      middleman (~> 3.4)
      middleman-livereload (~> 3.4)
      middleman-minify-html (~> 3.4)
      middleman-syntax (~> 2.0)
@@ -21,21 +21,25 @@ GIT
 GEM
  remote: https://rubygems.org/
  specs:
-    activesupport (4.1.12)
-      i18n (~> 0.6, >= 0.6.9)
+    activesupport (4.2.4)
+      i18n (~> 0.7)
      json (~> 1.7, >= 1.7.7)
      minitest (~> 5.1)
-      thread_safe (~> 0.1)
+      thread_safe (~> 0.3, >= 0.3.4)
      tzinfo (~> 1.1)
-    autoprefixer-rails (5.2.1)
+    autoprefixer-rails (6.0.3)
      execjs
      json
    bootstrap-sass (3.3.5.1)
      autoprefixer-rails (>= 5.0.0.1)
      sass (>= 3.3.0)
    builder (3.2.2)
-    celluloid (0.16.0)
-      timers (~> 4.0.0)
+    capybara (2.4.4)
+      mime-types (>= 1.16)
+      nokogiri (>= 1.3.3)
+      rack (>= 1.0.0)
+      rack-test (>= 0.5.4)
+      xpath (~> 2.0)
    chunky_png (1.3.4)
    coffee-script (2.4.1)
      coffee-script-source
@@ -59,52 +63,50 @@ GEM
      eventmachine (>= 0.12.9)
      http_parser.rb (~> 0.6.0)
    erubis (2.7.0)
-    eventmachine (1.0.7)
-    execjs (2.5.2)
+    eventmachine (1.0.8)
+    execjs (2.6.0)
    ffi (1.9.10)
    git-version-bump (0.15.1)
-    haml (4.0.6)
+    haml (4.0.7)
      tilt
    hike (1.2.3)
-    hitimes (1.2.2)
-    hooks (0.4.0)
-      uber (~> 0.0.4)
+    hooks (0.4.1)
+      uber (~> 0.0.14)
    htmlcompressor (0.2.0)
    http_parser.rb (0.6.0)
    i18n (0.7.0)
    json (1.8.3)
-    kramdown (1.8.0)
+    kramdown (1.9.0)
    less (2.6.0)
      commonjs (~> 0.2.7)
-    libv8 (3.16.14.11)
-    listen (2.10.1)
-      celluloid (~> 0.16.0)
+    libv8 (3.16.14.13)
+    listen (3.0.3)
      rb-fsevent (>= 0.9.3)
      rb-inotify (>= 0.9)
-    middleman (3.3.12)
+    middleman (3.4.0)
      coffee-script (~> 2.2)
      compass (>= 1.0.0, < 2.0.0)
      compass-import-once (= 1.0.5)
      execjs (~> 2.0)
      haml (>= 4.0.5)
      kramdown (~> 1.2)
-      middleman-core (= 3.3.12)
+      middleman-core (= 3.4.0)
      middleman-sprockets (>= 3.1.2)
      sass (>= 3.4.0, < 4.0)
      uglifier (~> 2.5)
-    middleman-core (3.3.12)
-      activesupport (~> 4.1.0)
+    middleman-core (3.4.0)
+      activesupport (~> 4.1)
      bundler (~> 1.1)
+      capybara (~> 2.4.4)
      erubis
      hooks (~> 0.3)
      i18n (~> 0.7.0)
-      listen (>= 2.7.9, < 3.0)
+      listen (~> 3.0.3)
      padrino-helpers (~> 0.12.3)
      rack (>= 1.4.5, < 2.0)
-      rack-test (~> 0.6.2)
      thor (>= 0.15.2, < 2.0)
      tilt (~> 1.4.1, < 2.0)
-    middleman-livereload (3.4.2)
+    middleman-livereload (3.4.3)
      em-websocket (~> 0.5.1)
      middleman-core (>= 3.3)
      rack-livereload (~> 0.3.15)
@@ -119,8 +121,12 @@ GEM
    middleman-syntax (2.0.0)
      middleman-core (~> 3.2)
      rouge (~> 1.0)
-    minitest (5.7.0)
+    mime-types (2.6.2)
+    mini_portile (0.6.2)
+    minitest (5.8.1)
    multi_json (1.11.2)
+    nokogiri (1.6.6.2)
+      mini_portile (~> 0.6.0)
    padrino-helpers (0.12.5)
      i18n (~> 0.6, >= 0.6.7)
      padrino-support (= 0.12.5)
@@ -128,7 +134,7 @@ GEM
    padrino-support (0.12.5)
      activesupport (>= 3.1)
    rack (1.6.4)
-    rack-contrib (1.3.0)
+    rack-contrib (1.4.0)
      git-version-bump (~> 0.15)
      rack (~> 1.4)
    rack-livereload (0.3.16)
@@ -136,16 +142,16 @@ GEM
    rack-protection (1.5.3)
      rack
    rack-rewrite (1.5.1)
-    rack-ssl-enforcer (0.2.8)
+    rack-ssl-enforcer (0.2.9)
    rack-test (0.6.3)
      rack (>= 1.0)
-    rb-fsevent (0.9.5)
+    rb-fsevent (0.9.6)
    rb-inotify (0.9.5)
      ffi (>= 0.5.0)
-    redcarpet (3.3.2)
+    redcarpet (3.3.3)
    ref (2.0.0)
-    rouge (1.9.1)
-    sass (3.4.16)
+    rouge (1.10.1)
+    sass (3.4.19)
    sprockets (2.12.4)
      hike (~> 1.2)
      multi_json (~> 1.0)
@@ -159,21 +165,21 @@ GEM
    therubyracer (0.12.2)
      libv8 (~> 3.16.14.0)
      ref
-    thin (1.6.3)
+    thin (1.6.4)
      daemons (~> 1.0, >= 1.0.9)
-      eventmachine (~> 1.0)
+      eventmachine (~> 1.0, >= 1.0.4)
      rack (~> 1.0)
    thor (0.19.1)
    thread_safe (0.3.5)
    tilt (1.4.1)
-    timers (4.0.1)
-      hitimes
    tzinfo (1.2.2)
      thread_safe (~> 0.1)
-    uber (0.0.13)
-    uglifier (2.7.1)
+    uber (0.0.15)
+    uglifier (2.7.2)
      execjs (>= 0.3.0)
      json (>= 1.8.0)
+    xpath (2.0.0)
+      nokogiri (~> 1.3)

 PLATFORMS
  ruby
--- a/website/Makefile
+++ b/website/Makefile
@@ -0,0 +1,10 @@
+all: build
+
+init:
+	bundle
+
+dev: init
+	bundle exec middleman server
+
+build: init
+	bundle exec middleman build
--- a/website/README.md
+++ b/website/README.md
@@ -12,13 +12,7 @@ requests like any normal GitHub project, and we'll merge it in.

 ## Running the Site Locally

-Running the site locally is simple. Clone this repo and run the following
-commands:
-
-```
-$ bundle
-$ bundle exec middleman server
-```
+Running the site locally is simple. Clone this repo and run `make dev`.

 Then open up `http://localhost:4567`. Note that some URLs you may need to append
 ".html" to make them work (in the navigation).
--- a/website/config.rb
+++ b/website/config.rb
@@ -1,22 +1,9 @@
-#-------------------------------------------------------------------------
-# Configure Middleman
-#-------------------------------------------------------------------------
-
-helpers do
-  def livestream_active?
-    # Must set key for date
-    ENV["LIVESTREAM_ACTIVE"].present?
-  end
-end
-
 set :base_url, "https://www.nomadproject.io/"

 activate :hashicorp do |h|
-  h.version         = ENV["NOMAD_VERSION"]
-  h.bintray_enabled = ENV["BINTRAY_ENABLED"] == "1"
-  h.bintray_repo    = "mitchellh/nomad"
-  h.bintray_user    = "mitchellh"
-  h.bintray_key     = ENV["BINTRAY_API_KEY"]
+  h.name        = "nomad"
+  h.version     = "0.1.2"
+  h.github_slug = "hashicorp/nomad"

  h.minify_javascript = false
 end
--- a/website/source/docs/agent/config.html.md
+++ b/website/source/docs/agent/config.html.md
@@ -207,8 +207,8 @@ configured on server nodes.
    option is not required and has no default.
  * <a id="meta">`meta`</a>: This is a key/value mapping of metadata pairs. This
    is a free-form map and can contain any string values.
-  * `options`: This is a key/value mapping of internal configuration for clients,
-    such as for driver configuration.
+  * <a id="options">`options`</a>: This is a key/value mapping of internal
+    configuration for clients, such as for driver configuration.
  * <a id="network_interface">`network_interface`</a>: This is a string to force
    network fingerprinting to use a specific network interface
  * <a id="network_speed">`network_speed`</a>: This is an int that sets the
--- a/website/source/docs/commands/init.html.md.erb
+++ b/website/source/docs/commands/init.html.md.erb
@@ -9,7 +9,7 @@ description: >
 # Command: init

 The `init` command creates an example [job specification](/docs/jobspec/) in the current
-directory that demonstrates some common configurations for tasks, tasks groups,
+directory that demonstrates some common configurations for tasks, task groups,
 runtime constraints, and resource allocation.

 Please refer to the [jobspec](/docs/jobspec/) and [drivers](/docs/drivers/)
--- a/website/source/docs/commands/status.html.md.erb
+++ b/website/source/docs/commands/status.html.md.erb
@@ -16,7 +16,7 @@ The `status` command displays status information for jobs.
 nomad status [options] [job]
 ```

-This command accepts an option job ID as the sole argument. If the job ID is
+This command accepts an optional job ID as the sole argument. If the job ID is
 provided, information about the specific job is queried and displayed. If the ID
 is omitted, the command lists out all of the existing jobs and a few of the most
 useful status fields for each.
--- a/website/source/docs/commands/stop.html.md.erb
+++ b/website/source/docs/commands/stop.html.md.erb
@@ -20,7 +20,7 @@ nomad stop [options] <job>
 The stop command requires a single argument, specifying the job ID to
 cancel.

-Upon successful deregistraion, an interactive monitor session will start to
+Upon successful deregistration, an interactive monitor session will start to
 display log lines as the job unwinds its allocations and completes shutting
 down. The monitor will exit once all allocations are stopped and the job has
 reached a terminal state. It is safe to exit the monitor early using ctrl+c.
--- a/website/source/docs/commands/version.html.md.erb
+++ b/website/source/docs/commands/version.html.md.erb
@@ -20,7 +20,7 @@ nomad version
 ## Output

 This command prints both the version number as well as the exact commit SHA used
-during the build. The SHA may also have a the string `+CHANGES` appended to the
+during the build. The SHA may also have the string `+CHANGES` appended to the
 end, indicating that local, uncommitted changes were detected at build time.

 ## Examples
--- a/website/source/docs/drivers/docker.html.md
+++ b/website/source/docs/drivers/docker.html.md
@@ -23,10 +23,14 @@ The `docker` driver supports the following configuration in the job specificatio

 * `command` - (Optional) The command to run when starting the container.

+* `args` - (Optional) Arguments to the optional `command`. If no `command` is
+  present, `args` are ignored.
+
 * `network_mode` - (Optional) The network mode to be used for the container.
   Valid options are `default`, `bridge`, `host` or `none`. If nothing is
   specified, the container will start in `bridge` mode. The `container`
-   network mode is not supported right now.
+   network mode is not supported right now and is reported as an invalid
+   option.

 ### Port Mapping

@@ -47,8 +51,8 @@ port mapping will still be able to make outbound network connections.

 Typically when you create a Docker container you configure the service to start
 listening on a port (or ports) when you start the container. For example, redis
-starts listening on `6379` when you `Docker run redis`. Nomad supports this by
-mapping the random port to the port inside the container.
+starts listening on `6379` when you `docker run redis`. Nomad can support this by
+mapping a random port on the host machine to the port inside the container.

 You need to tell Nomad which ports your container is using so Nomad can map
 allocated ports for you. You do so by specifying a **numeric port value** for
@@ -111,12 +115,24 @@ The `docker` driver has the following configuration options:
 * `docker.endpoint` - Defaults to `unix:///var/run/docker.sock`. You will need
  to customize this if you use a non-standard socket (http or another location).

+* `docker.cleanup.container` Defaults to `true`. Changing this to `false` will
+  prevent Nomad from removing containers from stopped tasks.
+
+* `docker.cleanup.image` Defaults to `true`. Changing this to `false` will
+  prevent Nomad from removing images from stopped tasks.
+
+Note: When testing or using the `-dev` flag you can use `DOCKER_HOST`,
+`DOCKER_TLS_VERIFY`, and `DOCKER_CERT_PATH` to customize Nomad's behavior. In
+production Nomad will always read `docker.endpoint`.
+
 ## Client Attributes

 The `docker` driver will set the following client attributes:

-* `driver.Docker` - This will be set to "1", indicating the
+* `driver.docker` - This will be set to "1", indicating the
  driver is available.
+* `driver.docker.version` - This will be set to version of the
+  docker server

 ## Resource Isolation

@@ -125,7 +141,7 @@ The `docker` driver will set the following client attributes:
 Nomad limits containers' CPU based on CPU shares. CPU shares allow containers to
 burst past their CPU limits. CPU limits will only be imposed when there is
 contention for resources. When the host is under load your process may be
-throttled to stabilize QOS depending how how many shares it has. You can see how
+throttled to stabilize QOS depending on how many shares it has. You can see how
 many CPU shares are available to your process by reading `NOMAD_CPU_LIMIT`. 1000
 shares are approximately equal to 1Ghz.

--- a/website/source/docs/drivers/exec.html.md
+++ b/website/source/docs/drivers/exec.html.md
@@ -6,21 +6,24 @@ description: |-
  The Exec task driver is used to run binaries using OS isolation primitives.
 ---

-# Fork/Exec Driver
+# Isolated Fork/Exec Driver

 Name: `exec`

 The `exec` driver is used to simply execute a particular command for a task.
-This is the simplest driver and is extremely flexible. In particlar, because
-it can invoke any command, it can be used to call scripts or other wrappers
-which provide higher level features.
+However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
+primitives of the operating system to limit the tasks access to resources. While
+simple, since the `exec` driver  can invoke any command, it can be used to call
+scripts or other wrappers which provide higher level features.

 ## Task Configuration

 The `exec` driver supports the following configuration in the job spec:

-* `command` - The command to execute. Must be provided.
-
+* `command` - (Required) The command to execute. Must be provided.
+* `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible
+from the Nomad client. If you specify an `artifact_source` to be executed, you
+must reference it in the `command` as show in the examples below
 * `args` - The argument list to the command, space seperated. Optional.

 ## Client Requirements
@@ -29,6 +32,30 @@ The `exec` driver can run on all supported operating systems but to provide
 proper isolation the client must be run as root on non-Windows operating systems.
 Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.

+You must specify a `command` to be executed. Optionally you can specify an
+`artifact_source` to be downloaded as well. Any `command` is assumed to be present on the 
+running client, or a downloaded artifact.
+
+## Examples
+
+To run a binary present on the Node:
+
+```
+  config {
+    command = "/bin/sleep"
+    args = 1
+  }
+```
+
+To execute a binary specified by `artifact_source`:
+
+```
+  config {
+    artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin"
+    command = "$NOMAD_TASK_DIR/binary.bin"
+  }
+```
+
 ## Client Attributes

 The `exec` driver will set the following client attributes:
--- a/website/source/docs/drivers/java.html.md
+++ b/website/source/docs/drivers/java.html.md
@@ -19,9 +19,12 @@ HTTP from the Nomad client.
 The `java` driver supports the following configuration in the job spec:

 * `jar_source` - **(Required)** The hosted location of the source Jar file. Must be accessible
-from the Nomad client, via HTTP
+from the Nomad client

-* `args` - (Optional) The argument list for the `java` command, space separated. 
+* `args` - **(Optional)** The argument list for the `java` command, space separated.
+
+* `jvm_options` - **(Optional)** JVM options to be passed while invoking java. These options
+  are passed not validated in any way in Nomad.

 ## Client Requirements

@@ -49,6 +52,6 @@ On Linux, Nomad will attempt to use cgroups, namespaces, and chroot
 to isolate the resources of a process. If the Nomad agent is not
 running as root many of these mechanisms cannot be used.

-As a baseline, the Java jars will be ran inside a Java Virtual Machine,
+As a baseline, the Java jars will be run inside a Java Virtual Machine,
 providing a minimum amount of isolation.

--- a/website/source/docs/drivers/qemu.html.md
+++ b/website/source/docs/drivers/qemu.html.md
@@ -25,8 +25,8 @@ The `Qemu` driver supports the following configuration in the job spec:

 * `image_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible
 from the Nomad client, via HTTP.
-* `checksum` - **(Required)** The MD5 checksum of the `qemu` image. If the
-checksums do not match, the `Qemu` diver will fail to start the image
+* `checksum` - **(Required)** The SHA256 checksum of the `qemu` image. If the
+checksums do not match, the `Qemu` driver will fail to start the image
 * `accelerator` - (Optional) The type of accelerator to use in the invocation.
 If the host machine has `Qemu` installed with KVM support, users can specify `kvm` for the `accelerator`. Default is `tcg`
 * `host_port` - **(Required)** Port on the host machine to forward to the guest
@@ -37,7 +37,7 @@ in the `Task` specification

 ## Client Requirements

-The `Qemu` driver requires Qemu to be installed and in your systems `$PATH`.
+The `Qemu` driver requires Qemu to be installed and in your system's `$PATH`.
 The `image_source` must be accessible by the node running Nomad. This can be an
 internal source, private to your cluster, but it must be reachable by the client
 over HTTP.
@@ -48,7 +48,7 @@ The `Qemu` driver will set the following client attributes:

 * `driver.qemu` - Set to `1` if Qemu is found on the host node. Nomad determines
 this by executing `qemu-system-x86_64 -version` on the host and parsing the output
-* `driver.qemu.version` - Version of `qemu-system-x86_64, ex: `2.4.0`
+* `driver.qemu.version` - Version of `qemu-system-x86_64`, ex: `2.4.0`

 ## Resource Isolation

@@ -57,6 +57,6 @@ workloads. Nomad can use Qemu KVM's hardware-assisted virtualization to deliver
 better performance.

 Virtualization provides the highest level of isolation for workloads that
-require additional security, and resources use is constrained by the Qemu
+require additional security, and resource use is constrained by the Qemu
 hypervisor rather than the host kernel. VM network traffic still flows through
-the host's interface(s).
+the host's interface(s).
--- a/website/source/docs/drivers/raw_exec.html.md
+++ b/website/source/docs/drivers/raw_exec.html.md
@@ -0,0 +1,73 @@
+---
+layout: "docs"
+page_title: "Drivers: Raw Exec"
+sidebar_current: "docs-drivers-raw-exec"
+description: |-
+  The Raw Exec task driver simply fork/execs and provides no isolation.
+---
+
+# Raw Fork/Exec Driver
+
+Name: `raw_exec`
+
+The `raw_exec` driver is used to execute a command for a task without any
+isolation. Further, the task is started as the same user as the Nomad process.
+As such, it should be used with extreme care and is disabled by default.
+
+## Task Configuration
+
+The `raw_exec` driver supports the following configuration in the job spec:
+
+* `command` - (Required) The command to execute. Must be provided.
+* `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible
+from the Nomad client. If you specify an `artifact_source` to be executed, you
+must reference it in the `command` as show in the examples below
+* `args` - The argument list to the command, space seperated. Optional.
+
+## Client Requirements
+
+The `raw_exec` driver can run on all supported operating systems. It is however
+disabled by default. In order to be enabled, the Nomad client configuration must
+explicitly enable the `raw_exec` driver in the
+[options](../agent/config.html#options) field:
+
+```
+options = {
+    driver.raw_exec.enable = "1"
+}
+```
+
+You must specify a `command` to be executed. Optionally you can specify an
+`artifact_source` to be executed. Any `command` is assumed to be present on the 
+running client, or a downloaded artifact
+
+## Examples
+
+To run a binary present on the Node:
+
+```
+  config {
+    command = "/bin/sleep"
+    args = 1
+  }
+```
+
+To execute a binary specified by `artifact_source`:
+
+```
+  config {
+    artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin"
+    command = "$NOMAD_TASK_DIR/binary.bin"
+  }
+```
+
+## Client Attributes
+
+The `raw_exec` driver will set the following client attributes:
+
+* `driver.raw_exec` - This will be set to "1", indicating the
+  driver is available.
+
+## Resource Isolation
+
+The `raw_exec` driver provides no isolation.
--- a/website/source/docs/drivers/rkt.html.md
+++ b/website/source/docs/drivers/rkt.html.md
@@ -18,10 +18,20 @@ containers.

 The `Rkt` driver supports the following configuration in the job spec:

-* `trust_prefix` - **(Required)** The trust prefix to be passed to rkt. Must be reachable from
-the box running the nomad agent.
-* `name` - **(Required)** Fully qualified name of an image to run using rkt
-* `exec` - **(Optional**) A command to execute on the ACI
+* `trust_prefix` - **(Optional)** The trust prefix to be passed to rkt. Must be reachable from
+the box running the nomad agent. If not specified, the image is run without
+verifying the image signature.
+* `image` - **(Required)** The image to run which may be specified by name,
+hash, ACI address or docker registry.
+* `command` - **(Optional**) A command to execute on the ACI.
+* `args` - **(Optional**) A string of args to pass into the image.
+
+## Task Directories
+
+The `Rkt` driver does not currently support mounting the `alloc/` and `local/`
+directory. It is currently blocked by this [Rkt
+issue](https://github.com/coreos/rkt/issues/761). As such the coresponding
+[environment variables](/docs/jobspec/environment.html#task_dir) are not set.

 ## Client Requirements

@@ -34,7 +44,7 @@ over HTTP.

 The `Rkt` driver will set the following client attributes:

-* `driver.rkt` - Set to `true` if Rkt is found on the host node. Nomad determines
+* `driver.rkt` - Set to `1` if Rkt is found on the host node. Nomad determines
 this by executing `rkt version` on the host and parsing the output
 * `driver.rkt.version` - Version of `rkt` eg: `0.8.1`
 * `driver.rkt.appc.version` - Version of `appc` that `rkt` is using eg: `0.8.1`
--- a/website/source/docs/http/node.html.md
+++ b/website/source/docs/http/node.html.md
@@ -52,7 +52,7 @@ be specified using the `?region=` query parameter.
        "driver.java.vm": "Java HotSpot(TM) 64-Bit Server VM (build 25.5-b02, mixed mode)",
        "hostname": "Armons-MacBook-Air.local",
        "kernel.name": "darwin",
-        "kernel.version": "14.4.0\n",
+        "kernel.version": "14.4.0",
        "memory.totalbytes": "8589934592",
        "os.name": "darwin",
        "os.version": "14.4.0",
--- a/website/source/docs/internals/architecture.html.md
+++ b/website/source/docs/internals/architecture.html.md
@@ -121,15 +121,15 @@ specified by the job. Resource utilization is maximized by bin packing, in which
 the scheduling tries to make use of all the resources of a machine without
 exhausting any dimension. Job constraints can be used to ensure an application is
 running in an appropriate environment. Constraints can be technical requirements based
-on hardware features such as architecture, availability of GPUs, or software features
+on hardware features such as architecture and availability of GPUs, or software features
 like operating system and kernel version, or they can be business constraints like
 ensuring PCI compliant workloads run on appropriate servers.

 # Getting in Depth

 This has been a brief high-level overview of the architecture of Nomad. There
-are more details available for each of the sub-systems. The [scheduler design](/docs/internals/scheduling.html),
-[consensus protocol](/docs/internals/consensus.html), and [gossip protocol](/docs/internals/gossip.html)
+are more details available for each of the sub-systems. The [consensus protocol](/docs/internals/consensus.html),
+[gossip protocol](/docs/internals/gossip.html), and [scheduler design](/docs/internals/scheduling.html)
 are all documented in more detail.

 For other details, either consult the code, ask in IRC or reach out to the mailing list.
--- a/website/source/docs/internals/consensus.html.md
+++ b/website/source/docs/internals/consensus.html.md
@@ -45,7 +45,7 @@ same sequence of logs must result in the same state, meaning behavior must be de
 For Nomad's purposes, all server nodes are in the peer set of the local region.

 * **Quorum** - A quorum is a majority of members from a peer set: for a set of size `n`,
-quorum requires at least `(n/2)+1` members.
+quorum requires at least `⌊(n/2)+1⌋` members.
 For example, if there are 5 members in the peer set, we would need 3 nodes
 to form a quorum. If a quorum of nodes is unavailable for any reason, the
 cluster becomes *unavailable* and no new logs can be committed.
--- a/website/source/docs/internals/scheduling.html.md
+++ b/website/source/docs/internals/scheduling.html.md
@@ -53,10 +53,11 @@ and ensure at least once delivery.

 Nomad servers run scheduling workers, defaulting to one per CPU core, which are used to
 process evaluations. The workers dequeue evaluations from the broker, and then invoke
-the appropriate schedule as specified by the job. Nomad ships with a `service` scheduler
+the appropriate scheduler as specified by the job. Nomad ships with a `service` scheduler
 that optimizes for long-lived services, a `batch` scheduler that is used for fast placement
-of batch jobs, and a `core` scheduler which is used for internal maintenance. Nomad can
-be extended to support custom schedulers as well.
+of batch jobs, a `system` scheduler that is used to run jobs on every node,
+and a `core` scheduler which is used for internal maintenance.
+Nomad can be extended to support custom schedulers as well.

 Schedulers are responsible for processing an evaluation and generating an allocation _plan_.
 The plan is the set of allocations to evict, update, or create. The specific logic used to
@@ -75,8 +76,8 @@ and density of applications, but is also augmented by affinity and anti-affinity
 Once the scheduler has ranked enough nodes, the highest ranking node is selected and
 added to the allocation plan.

-When planning is complete, the scheduler submits the plan to the leader and
-gets added to the plan queue. The plan queue manages pending plans, provides priority
+When planning is complete, the scheduler submits the plan to the leader which adds
+the plan to the plan queue. The plan queue manages pending plans, provides priority
 ordering, and allows Nomad to handle concurrency races. Multiple schedulers are running
 in parallel without locking or reservations, making Nomad optimistically concurrent.
 As a result, schedulers might overlap work on the same node and cause resource
--- a/website/source/docs/jobspec/environment.html.md
+++ b/website/source/docs/jobspec/environment.html.md
@@ -17,7 +17,7 @@ environment variables.

 When you request resources for a job, Nomad creates a resource offer. The final
 resources for your job are not determined until it is scheduled. Nomad will
-tell you which resources have been allocated after evaulation and placement.
+tell you which resources have been allocated after evaluation and placement.

 ### CPU and Memory

@@ -28,7 +28,7 @@ the memory limit to inform how large your in-process cache should be, or to
 decide when to flush buffers to disk.

 Both CPU and memory are presented as integers. The unit for CPU limit is
-`1024 = 1Ghz`. The unit for memory `1 = 1 megabytes`.
+`1024 = 1Ghz`. The unit for memory is `1 = 1 megabytes`.

 Writing your applications to adjust to these values at runtime provides greater
 scheduling flexibility since you can adjust the resource allocations in your
@@ -56,6 +56,27 @@ exported as environment variables for consistency, e.g. `NOMAD_PORT_5000`.

 Please see the relevant driver documentation for details.

+<a id="task_dir">### Task Directories</a>
+
+Nomad makes the following two directories available to tasks:
+
+* `alloc/`: This directory is shared across all tasks in a task group and can be
+  used to store data that needs to be used by multiple tasks, such as a log
+  shipper.
+* `local/`: This directory is private to each task. It can be used to store
+  arbitrary data that shouldn't be shared by tasks in the task group.
+
+Both these directories are persisted until the allocation is removed, which
+occurs hours after all the tasks in the task group enter terminal states. This
+gives time to view the data produced by tasks.
+
+Depending on the driver and operating system being targeted, the directories are
+made available in various ways. For example, on `docker` the directories are
+binded to the container, while on `exec` on Linux the directories are mounted into the
+chroot. Regardless of how the directories are made available, the path to the
+directories can be read through the following environment variables:
+`NOMAD_ALLOC_DIR` and `NOMAD_TASK_DIR`.
+
 ## Meta

 The job specification also allows you to specify a `meta` block to supply arbitrary
--- a/website/source/docs/jobspec/index.html.md
+++ b/website/source/docs/jobspec/index.html.md
@@ -9,8 +9,7 @@ description: |-
 # Job Specification

 Jobs can be specified either in [HCL](https://github.com/hashicorp/hcl) or JSON.
-HCL is meant to strike a balance between human readable and editable, as well
-as being machine-friendly.
+HCL is meant to strike a balance between human readable and editable, and machine-friendly.

 For machine-friendliness, Nomad can also read JSON configurations. In general, we recommend
 using the HCL syntax.
@@ -29,6 +28,9 @@ job "my-service" {
    # Spread tasks between us-west-1 and us-east-1
    datacenters = ["us-west-1", "us-east-1"]

+    # run this job globally
+    type = "system"
+
    # Rolling updates should be sequential
    update {
        stagger = "30s"
@@ -132,7 +134,7 @@ The `job` object supports the following keys:
  a task group of the same name.

 * `type` - Specifies the job type and switches which scheduler
-  is used. Nomad provides the `service` and `batch` schedulers,
+  is used. Nomad provides the `service`, `system` and `batch` schedulers,
  and defaults to `service`.

 * `update` - Specifies the task update strategy. This requires providing
@@ -218,11 +220,35 @@ The `constraint` object supports the following keys:
  to true. Soft constraints are not currently supported.

 * `operator` - Specifies the comparison operator. Defaults to equality,
-  and can be `=`, `==`, `is`, `!=`, `not`.
+  and can be `=`, `==`, `is`, `!=`, `not`, `>`, `>=`, `<`, `<=`. The
+  ordering is compared lexically.

 * `value` - Specifies the value to compare the attribute against.
  This can be a literal value or another attribute.

+* `version` - Specifies a version constraint against the attribute.
+  This sets the operator to "version" and the `value` to what is
+  specified. This supports a comma seperated list of constraints,
+  including the pessimistic operator. See the
+  [go-version](https://github.com/hashicorp/go-version) repository
+  for examples.
+
+* `regexp` - Specifies a regular expression constraint against
+  the attribute. This sets the operator to "regexp" and the `value`
+  to the regular expression.
+
+* `distinct_hosts` - `distinct_hosts` accepts a boolean `true`. The default is
+  `false`.
+
+  When `distinct_hosts` is `true` at the Job level, each instance of all Task
+  Groups specified in the job is placed on a separate host.
+
+  When `distinct_hosts` is `true` at the Task Group level with count > 1, each
+  instance of a Task Group is placed on a separate host. Different task groups in
+  the same job _may_ be co-scheduled.
+
+  Tasks within a task group are always co-scheduled.
+
 Below is a table documenting the variables that can be interpreted:

 <table class="table table-bordered table-striped">
--- a/website/source/downloads.html.erb
+++ b/website/source/downloads.html.erb
@@ -9,40 +9,51 @@ description: |-
 <h1>Download Nomad</h1>

 <section class="downloads">
-	<div class="description row">
-		<div class="col-md-12">
-			<p>
-			Below are all available downloads for the latest version of Nomad
-			(<%= latest_version %>). Please download the proper package for your
-			operating system and architecture. You can find SHA256 checksums
-			for packages <a href="https://dl.bintray.com/mitchellh/nomad/nomad_<%= latest_version %>_SHA256SUMS?direct">here</a>. You can verify the SHA256 checksums using <a href="https://hashicorp.com/security.html">our PGP public key</a> and the <a href="https://dl.bintray.com/mitchellh/nomad/nomad_<%= latest_version %>_SHA256SUMS.sig?direct">SHA256SUMs signature file</a>.
-			</p>
-		</div>
-	</div>
-	<% product_versions.each do |os, versions| %>
-		<div class="row">
-		<div class="col-md-12 download">
-			<div class="icon pull-left"><%= system_icon(os) %>
-		</div>
-			<div class="details">
-				<h2 class="os-name"><%= os %></h2>
-				<ul>
-					<% versions.each do |url| %>
-						<li><a href="<%= url %>"><%= arch_for_filename(url) %></a></li>
-					<% end %>
-				</ul>
-				<div class="clearfix">
-				</div>
-			</div>
-		</div>
-	</div>
-<% end %>
+  <div class="description row">
+    <div class="col-md-12">
+      <p>
+        Below are the available downloads for the latest version of Nomad
+        (<%= latest_version %>). Please download the proper package for your
+        operating system and architecture.
+      </p>
+      <p>
+        You can find the
+        <a href="https://releases.hashicorp.com/nomad/<%= latest_version %>/nomad_<%= latest_version %>_SHA256SUMS">
+          SHA256 checksums for Nomad <%= latest_version %>
+        </a>
+        online and you can
+        <a href="https://releases.hashicorp.com/nomad/<%= latest_version %>/nomad_<%= latest_version %>_SHA256SUMS.sig">
+          verify the checksums signature file
+        </a>
+        which has been signed using <a href="https://hashicorp.com/security.html" target="_TOP">HashiCorp's GPG key</a>.
+        You can also <a href="https://releases.hashicorp.com/nomad" target="_TOP">download older versions of Nomad</a> from the releases service.
+      </p>
+    </div>
+  </div>

-	<div class="row">
-		<div class="col-md-12 poweredby">
-			<a href='http://www.bintray.com'>
-				<img src='https://www.bintray.com/docs/images/poweredByBintray_ColorTransparent.png'>
-			</a>
-		</div>
-	</div>
+  <% product_versions.each do |os, arches| %>
+    <% next if os == "web" %>
+    <div class="row">
+      <div class="col-md-12 download">
+        <div class="icon pull-left"><%= system_icon(os) %></div>
+        <div class="details">
+          <h2 class="os-name"><%= pretty_os(os) %></h2>
+          <ul>
+            <% arches.each do |arch, url| %>
+              <li><a href="<%= url %>"><%= pretty_arch(arch) %></a></li>
+            <% end %>
+          </ul>
+          <div class="clearfix"></div>
+        </div>
+      </div>
+    </div>
+  <% end %>
+
+  <div class="row">
+    <div class="col-md-12 poweredby">
+      <a href="https://www.fastly.com?utm_source=hashicorp" target="_TOP">
+        <%= image_tag "fastly_logo.png" %>
+      </a>
+    </div>
+  </div>
 </section>
--- a/Show More
+++ b/Show More