Merge branch 'master' of https://github.com/hashicorp/nomad into f-docker-driver-options

* 'master' of https://github.com/hashicorp/nomad: (59 commits)
  Move the executor and spawn package into driver
  Remove file watching
  Check if the PID is alive instead of heartbeating through modify time
  Update CHANGELOG.md
  nomad/watch: add a note about the Item struct
  go fmt this file
  Vet errors
  Search path
  Update website
  Make a basic executor that can be shared and fix some fingerprinting/tests
  Small improvements
  Use const value for AWS metadata URL
  Create Spawn pkg that handles IPC with the spawn-daemon and update exec_linux to use that
  Fixed the restart policy syntax
  Introducing vars to create default batch and service restart policies
  Fixed the tests
  Declaring Batch and Service default restart policies
  Fixing tests to not create a TG without restart policies
  This option only work -> This option only works
  leave -> leaving
  ...
This commit is contained in:
Shiem Edelbrock
2015-11-05 10:48:09 -08:00
62 changed files with 3447 additions and 1416 deletions

View File

@@ -1,3 +1,9 @@
## 0.2.0 (Unreleased)
FEATURES:
* Blocking queries supported in API [GH-366]
## 0.1.2 (October 6, 2015)
IMPROVEMENTS:

View File

@@ -69,6 +69,7 @@ func TestCompose(t *testing.T) {
Operand: "=",
},
},
RestartPolicy: NewRestartPolicy(),
Tasks: []*Task{
&Task{
Name: "task1",

View File

@@ -1,19 +1,42 @@
package api
import (
"time"
)
// RestartPolicy defines how the Nomad client restarts
// tasks in a taskgroup when they fail
type RestartPolicy struct {
Interval time.Duration
Attempts int
Delay time.Duration
}
func NewRestartPolicy() *RestartPolicy {
return &RestartPolicy{
Attempts: 10,
Interval: 3 * time.Minute,
Delay: 5 * time.Second,
}
}
// TaskGroup is the unit of scheduling.
type TaskGroup struct {
Name string
Count int
Constraints []*Constraint
Tasks []*Task
Meta map[string]string
Name string
Count int
Constraints []*Constraint
Tasks []*Task
RestartPolicy *RestartPolicy
Meta map[string]string
}
// NewTaskGroup creates a new TaskGroup.
func NewTaskGroup(name string, count int) *TaskGroup {
restartPolicy := NewRestartPolicy()
return &TaskGroup{
Name: name,
Count: count,
Name: name,
Count: count,
RestartPolicy: restartPolicy,
}
}

View File

@@ -8,8 +8,9 @@ import (
func TestTaskGroup_NewTaskGroup(t *testing.T) {
grp := NewTaskGroup("grp1", 2)
expect := &TaskGroup{
Name: "grp1",
Count: 2,
Name: "grp1",
Count: 2,
RestartPolicy: NewRestartPolicy(),
}
if !reflect.DeepEqual(grp, expect) {
t.Fatalf("expect: %#v, got: %#v", expect, grp)

View File

@@ -12,7 +12,7 @@ import (
"github.com/hashicorp/go-getter"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/executor"
"github.com/hashicorp/nomad/client/driver/executor"
"github.com/hashicorp/nomad/nomad/structs"
)
@@ -35,8 +35,11 @@ func NewExecDriver(ctx *DriverContext) Driver {
}
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Only enable if we are root when running on non-windows systems.
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
// Only enable if we are root on linux.
if runtime.GOOS != "linux" {
d.logger.Printf("[DEBUG] driver.exec: only available on linux, disabling")
return false, nil
} else if syscall.Geteuid() != 0 {
d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
return false, nil
}
@@ -73,10 +76,8 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
}
// Add execution permissions to the newly downloaded artifact
if runtime.GOOS != "windows" {
if err := syscall.Chmod(artifactFile, 0755); err != nil {
log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err)
}
if err := syscall.Chmod(artifactFile, 0755); err != nil {
log.Printf("[ERR] driver.exec: Error making artifact executable: %s", err)
}
}

View File

@@ -5,7 +5,6 @@ import (
"io/ioutil"
"path/filepath"
"reflect"
"runtime"
"testing"
"time"
@@ -123,13 +122,7 @@ func TestExecDriver_Start_Wait(t *testing.T) {
func TestExecDriver_Start_Artifact_basic(t *testing.T) {
ctestutils.ExecCompatible(t)
var file string
switch runtime.GOOS {
case "darwin":
file = "hi_darwin_amd64"
default:
file = "hi_linux_amd64"
}
file := "hi_linux_amd64"
task := &structs.Task{
Name: "sleep",
@@ -172,13 +165,7 @@ func TestExecDriver_Start_Artifact_basic(t *testing.T) {
func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
ctestutils.ExecCompatible(t)
var file string
switch runtime.GOOS {
case "darwin":
file = "hi_darwin_amd64"
default:
file = "hi_linux_amd64"
}
file := "hi_linux_amd64"
task := &structs.Task{
Name: "sleep",
@@ -306,7 +293,7 @@ func TestExecDriver_Start_Kill_Wait(t *testing.T) {
if err == nil {
t.Fatal("should err")
}
case <-time.After(2 * time.Second):
case <-time.After(8 * time.Second):
t.Fatalf("timeout")
}
}

View File

@@ -1,5 +1,3 @@
// +build !linux
package executor
import (
@@ -14,24 +12,26 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)
func NewExecutor() Executor {
return &UniversalExecutor{}
}
// UniversalExecutor should work everywhere, and as a result does not include
// BasicExecutor should work everywhere, and as a result does not include
// any resource restrictions or runas capabilities.
type UniversalExecutor struct {
type BasicExecutor struct {
cmd
}
func (e *UniversalExecutor) Limit(resources *structs.Resources) error {
// TODO: Update to use the Spawner.
// TODO: Have raw_exec use this as well.
func NewBasicExecutor() Executor {
return &BasicExecutor{}
}
func (e *BasicExecutor) Limit(resources *structs.Resources) error {
if resources == nil {
return errNoResources
}
return nil
}
func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
taskDir, ok := alloc.TaskDirs[taskName]
if !ok {
return fmt.Errorf("Error finding task dir for (%s)", taskName)
@@ -40,7 +40,7 @@ func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.Al
return nil
}
func (e *UniversalExecutor) Start() error {
func (e *BasicExecutor) Start() error {
// Parse the commands arguments and replace instances of Nomad environment
// variables.
envVars, err := environment.ParseFromList(e.cmd.Env)
@@ -67,7 +67,7 @@ func (e *UniversalExecutor) Start() error {
return e.cmd.Start()
}
func (e *UniversalExecutor) Open(pid string) error {
func (e *BasicExecutor) Open(pid string) error {
pidNum, err := strconv.Atoi(pid)
if err != nil {
return fmt.Errorf("Failed to parse pid %v: %v", pid, err)
@@ -81,12 +81,12 @@ func (e *UniversalExecutor) Open(pid string) error {
return nil
}
func (e *UniversalExecutor) Wait() error {
func (e *BasicExecutor) Wait() error {
// We don't want to call ourself. We want to call Start on our embedded Cmd
return e.cmd.Wait()
}
func (e *UniversalExecutor) ID() (string, error) {
func (e *BasicExecutor) ID() (string, error) {
if e.cmd.Process != nil {
return strconv.Itoa(e.cmd.Process.Pid), nil
} else {
@@ -94,14 +94,14 @@ func (e *UniversalExecutor) ID() (string, error) {
}
}
func (e *UniversalExecutor) Shutdown() error {
func (e *BasicExecutor) Shutdown() error {
return e.ForceStop()
}
func (e *UniversalExecutor) ForceStop() error {
func (e *BasicExecutor) ForceStop() error {
return e.Process.Kill()
}
func (e *UniversalExecutor) Command() *cmd {
func (e *BasicExecutor) Command() *cmd {
return &e.cmd
}

View File

@@ -0,0 +1,422 @@
package executor
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"os/user"
"path/filepath"
"strings"
"syscall"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/driver/args"
"github.com/hashicorp/nomad/client/driver/environment"
"github.com/hashicorp/nomad/client/driver/spawn"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
)
var (
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
chrootEnv = map[string]string{
"/bin": "/bin",
"/etc": "/etc",
"/lib": "/lib",
"/lib32": "/lib32",
"/lib64": "/lib64",
"/usr/bin": "/usr/bin",
"/usr/lib": "/usr/lib",
}
)
func NewExecutor() Executor {
return &LinuxExecutor{}
}
// Linux executor is designed to run on linux kernel 2.8+.
type LinuxExecutor struct {
cmd
user *user.User
// Isolation configurations.
groups *cgroupConfig.Cgroup
taskName string
taskDir string
allocDir string
// Spawn process.
spawn *spawn.Spawner
}
func (e *LinuxExecutor) Command() *cmd {
return &e.cmd
}
func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
if resources == nil {
return errNoResources
}
return e.configureCgroups(resources)
}
// execLinuxID contains the necessary information to reattach to an executed
// process and cleanup the created cgroups.
type ExecLinuxID struct {
Groups *cgroupConfig.Cgroup
Spawn *spawn.Spawner
TaskDir string
}
func (e *LinuxExecutor) Open(id string) error {
// De-serialize the ID.
dec := json.NewDecoder(strings.NewReader(id))
var execID ExecLinuxID
if err := dec.Decode(&execID); err != nil {
return fmt.Errorf("Failed to parse id: %v", err)
}
// Setup the executor.
e.groups = execID.Groups
e.spawn = execID.Spawn
e.taskDir = execID.TaskDir
return nil
}
func (e *LinuxExecutor) ID() (string, error) {
if e.groups == nil || e.spawn == nil || e.taskDir == "" {
return "", fmt.Errorf("LinuxExecutor not properly initialized.")
}
// Build the ID.
id := ExecLinuxID{
Groups: e.groups,
Spawn: e.spawn,
TaskDir: e.taskDir,
}
var buffer bytes.Buffer
enc := json.NewEncoder(&buffer)
if err := enc.Encode(id); err != nil {
return "", fmt.Errorf("Failed to serialize id: %v", err)
}
return buffer.String(), nil
}
// runAs takes a user id as a string and looks up the user. It stores the
// results in the executor and returns an error if the user could not be found.
func (e *LinuxExecutor) runAs(userid string) error {
errs := new(multierror.Error)
// First, try to lookup the user by uid
u, err := user.LookupId(userid)
if err == nil {
e.user = u
return nil
} else {
errs = multierror.Append(errs, err)
}
// Lookup failed, so try by username instead
u, err = user.Lookup(userid)
if err == nil {
e.user = u
return nil
} else {
errs = multierror.Append(errs, err)
}
// If we got here we failed to lookup based on id and username, so we'll
// return those errors.
return fmt.Errorf("Failed to identify user to run as: %s", errs)
}
func (e *LinuxExecutor) Start() error {
// Run as "nobody" user so we don't leak root privilege to the spawned
// process.
if err := e.runAs("nobody"); err == nil && e.user != nil {
e.cmd.SetUID(e.user.Uid)
e.cmd.SetGID(e.user.Gid)
}
// Parse the commands arguments and replace instances of Nomad environment
// variables.
envVars, err := environment.ParseFromList(e.Cmd.Env)
if err != nil {
return err
}
parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
if err != nil {
return err
} else if len(parsedPath) != 1 {
return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
}
e.cmd.Path = parsedPath[0]
combined := strings.Join(e.Cmd.Args, " ")
parsed, err := args.ParseAndReplace(combined, envVars.Map())
if err != nil {
return err
}
e.Cmd.Args = parsed
spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status"))
e.spawn = spawn.NewSpawner(spawnState)
e.spawn.SetCommand(&e.cmd.Cmd)
e.spawn.SetChroot(e.taskDir)
e.spawn.SetLogs(&spawn.Logs{
Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
Stdin: os.DevNull,
})
enterCgroup := func(pid int) error {
// Join the spawn-daemon to the cgroup.
manager := e.getCgroupManager(e.groups)
// Apply will place the spawn dameon into the created cgroups.
if err := manager.Apply(pid); err != nil {
return fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err)
}
return nil
}
return e.spawn.Spawn(enterCgroup)
}
// Wait waits til the user process exits and returns an error on non-zero exit
// codes. Wait also cleans up the task directory and created cgroups.
func (e *LinuxExecutor) Wait() error {
errs := new(multierror.Error)
code, err := e.spawn.Wait()
if err != nil {
errs = multierror.Append(errs, err)
}
if code != 0 {
errs = multierror.Append(errs, fmt.Errorf("Task exited with code: %d", code))
}
if err := e.destroyCgroup(); err != nil {
errs = multierror.Append(errs, err)
}
if err := e.cleanTaskDir(); err != nil {
errs = multierror.Append(errs, err)
}
return errs.ErrorOrNil()
}
func (e *LinuxExecutor) Shutdown() error {
return e.ForceStop()
}
// ForceStop immediately exits the user process and cleans up both the task
// directory and the cgroups.
func (e *LinuxExecutor) ForceStop() error {
errs := new(multierror.Error)
if err := e.destroyCgroup(); err != nil {
errs = multierror.Append(errs, err)
}
if err := e.cleanTaskDir(); err != nil {
errs = multierror.Append(errs, err)
}
return errs.ErrorOrNil()
}
// Task Directory related functions.
// ConfigureTaskDir creates the necessary directory structure for a proper
// chroot. cleanTaskDir should be called after.
func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
e.taskName = taskName
e.allocDir = alloc.AllocDir
taskDir, ok := alloc.TaskDirs[taskName]
if !ok {
fmt.Errorf("Couldn't find task directory for task %v", taskName)
}
e.taskDir = taskDir
if err := alloc.MountSharedDir(taskName); err != nil {
return err
}
if err := alloc.Embed(taskName, chrootEnv); err != nil {
return err
}
// Mount dev
dev := filepath.Join(taskDir, "dev")
if err := os.Mkdir(dev, 0777); err != nil {
return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
}
if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
}
// Mount proc
proc := filepath.Join(taskDir, "proc")
if err := os.Mkdir(proc, 0777); err != nil {
return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
}
if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
}
// Set the tasks AllocDir environment variable.
env, err := environment.ParseFromList(e.Cmd.Env)
if err != nil {
return err
}
env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
e.Cmd.Env = env.List()
return nil
}
// pathExists is a helper function to check if the path exists.
func (e *LinuxExecutor) pathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return false
}
}
return true
}
// cleanTaskDir is an idempotent operation to clean the task directory and
// should be called when tearing down the task.
func (e *LinuxExecutor) cleanTaskDir() error {
// Unmount dev.
errs := new(multierror.Error)
dev := filepath.Join(e.taskDir, "dev")
if e.pathExists(dev) {
if err := syscall.Unmount(dev, 0); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
}
if err := os.RemoveAll(dev); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err))
}
}
// Unmount proc.
proc := filepath.Join(e.taskDir, "proc")
if e.pathExists(proc) {
if err := syscall.Unmount(proc, 0); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
}
if err := os.RemoveAll(proc); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err))
}
}
return errs.ErrorOrNil()
}
// Cgroup related functions.
// configureCgroups converts a Nomad Resources specification into the equivalent
// cgroup configuration. It returns an error if the resources are invalid.
func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
e.groups = &cgroupConfig.Cgroup{}
e.groups.Name = structs.GenerateUUID()
// TODO: verify this is needed for things like network access
e.groups.AllowAllDevices = true
if resources.MemoryMB > 0 {
// Total amount of memory allowed to consume
e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
// Disable swap to avoid issues on the machine
e.groups.MemorySwap = int64(-1)
}
if resources.CPU < 2 {
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
}
// Set the relative CPU shares for this cgroup.
e.groups.CpuShares = int64(resources.CPU)
if resources.IOPS != 0 {
// Validate it is in an acceptable range.
if resources.IOPS < 10 || resources.IOPS > 1000 {
return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
}
e.groups.BlkioWeight = uint16(resources.IOPS)
}
return nil
}
// destroyCgroup kills all processes in the cgroup and removes the cgroup
// configuration from the host.
func (e *LinuxExecutor) destroyCgroup() error {
if e.groups == nil {
return errors.New("Can't destroy: cgroup configuration empty")
}
manager := e.getCgroupManager(e.groups)
pids, err := manager.GetPids()
if err != nil {
return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err)
}
errs := new(multierror.Error)
for _, pid := range pids {
process, err := os.FindProcess(pid)
if err != nil {
multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err))
continue
}
if err := process.Kill(); err != nil {
multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
continue
}
}
// Remove the cgroup.
if err := manager.Destroy(); err != nil {
multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err))
}
if len(errs.Errors) != 0 {
return fmt.Errorf("Failed to destroy cgroup: %v", errs)
}
return nil
}
// getCgroupManager returns the correct libcontainer cgroup manager.
func (e *LinuxExecutor) getCgroupManager(groups *cgroupConfig.Cgroup) cgroups.Manager {
var manager cgroups.Manager
manager = &cgroupFs.Manager{Cgroups: groups}
if systemd.UseSystemd() {
manager = &systemd.Manager{Cgroups: groups}
}
return manager
}

View File

@@ -139,11 +139,6 @@ func TestExecutorLinux_Start_Kill(t *testing.T) {
filePath := filepath.Join(taskDir, "output")
e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
// This test can only be run if cgroups are enabled.
if !e.(*LinuxExecutor).cgroupEnabled {
t.SkipNow()
}
if err := e.Limit(constraint); err != nil {
t.Fatalf("Limit() failed: %v", err)
}
@@ -178,13 +173,11 @@ func TestExecutorLinux_Open(t *testing.T) {
t.Fatalf("No task directory found for task %v", task)
}
filePath := filepath.Join(taskDir, "output")
e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath)
// This test can only be run if cgroups are enabled.
if !e.(*LinuxExecutor).cgroupEnabled {
t.SkipNow()
}
expected := "hello world"
file := filepath.Join(allocdir.TaskLocal, "output.txt")
absFilePath := filepath.Join(taskDir, file)
cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file)
e := Command("/bin/bash", "-c", cmd)
if err := e.Limit(constraint); err != nil {
t.Fatalf("Limit() failed: %v", err)
@@ -203,14 +196,22 @@ func TestExecutorLinux_Open(t *testing.T) {
t.Fatalf("ID() failed: %v", err)
}
if _, err := OpenId(id); err == nil {
t.Fatalf("Open(%v) should have failed", id)
e2 := NewExecutor()
if err := e2.Open(id); err != nil {
t.Fatalf("Open(%v) failed: %v", id, err)
}
time.Sleep(1500 * time.Millisecond)
if err := e2.Wait(); err != nil {
t.Fatalf("Wait() failed: %v", err)
}
// Check that the file doesn't exist, open should have killed the process.
if _, err := os.Stat(filePath); err == nil {
t.Fatalf("Stat(%v) should have failed: task not killed", filePath)
output, err := ioutil.ReadFile(absFilePath)
if err != nil {
t.Fatalf("Couldn't read file %v", absFilePath)
}
act := string(output)
if act != expected {
t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
}
}

View File

@@ -0,0 +1,12 @@
// +build !linux
package executor
func NewExecutor() Executor {
return &UniversalExecutor{BasicExecutor{}}
}
// UniversalExecutor wraps the BasicExecutor
type UniversalExecutor struct {
BasicExecutor
}

View File

@@ -14,7 +14,7 @@ import (
"github.com/hashicorp/go-getter"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/executor"
"github.com/hashicorp/nomad/client/driver/executor"
"github.com/hashicorp/nomad/nomad/structs"
)
@@ -38,8 +38,8 @@ func NewJavaDriver(ctx *DriverContext) Driver {
func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Only enable if we are root when running on non-windows systems.
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling")
if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
d.logger.Printf("[DEBUG] driver.java: must run as root user on linux, disabling")
return false, nil
}

View File

@@ -19,7 +19,7 @@ func javaLocated() bool {
// The fingerprinter test should always pass, even if Java is not installed.
func TestJavaDriver_Fingerprint(t *testing.T) {
ctestutils.ExecCompatible(t)
ctestutils.JavaCompatible(t)
d := NewJavaDriver(testDriverContext(""))
node := &structs.Node{
Attributes: make(map[string]string),
@@ -93,7 +93,7 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
t.Skip("Java not found; skipping")
}
ctestutils.ExecCompatible(t)
ctestutils.JavaCompatible(t)
task := &structs.Task{
Name: "demo-app",
Config: map[string]string{
@@ -141,7 +141,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
t.Skip("Java not found; skipping")
}
ctestutils.ExecCompatible(t)
ctestutils.JavaCompatible(t)
task := &structs.Task{
Name: "demo-app",
Config: map[string]string{
@@ -179,7 +179,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) {
if err == nil {
t.Fatal("should err")
}
case <-time.After(2 * time.Second):
case <-time.After(8 * time.Second):
t.Fatalf("timeout")
}

View File

@@ -0,0 +1,285 @@
package spawn
import (
"bytes"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"strconv"
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/command"
"github.com/hashicorp/nomad/helper/discover"
)
// Spawner is used to start a user command in an isolated fashion that is
// resistent to Nomad agent failure.
type Spawner struct {
spawn *os.Process
SpawnPid int
SpawnPpid int
StateFile string
// User configuration
UserCmd *exec.Cmd
Logs *Logs
Chroot string
}
// Logs is used to define the filepaths the user command's logs should be
// redirected to. The files do not need to exist.
type Logs struct {
Stdin, Stdout, Stderr string
}
// NewSpawner takes a path to a state file. This state file can be used to
// create a new Spawner that can be used to wait on the exit status of a
// process even through Nomad restarts.
func NewSpawner(stateFile string) *Spawner {
return &Spawner{StateFile: stateFile}
}
// SetCommand sets the user command to spawn.
func (s *Spawner) SetCommand(cmd *exec.Cmd) {
s.UserCmd = cmd
}
// SetLogs sets the redirection of user command log files.
func (s *Spawner) SetLogs(l *Logs) {
s.Logs = l
}
// SetChroot puts the user command into a chroot.
func (s *Spawner) SetChroot(root string) {
s.Chroot = root
}
// Spawn does a double-fork to start and isolate the user command. It takes a
// call-back that is invoked with the pid of the intermediary process. If the
// call back returns an error, the user command is not started and the spawn is
// cancelled. This can be used to put the process into a cgroup or jail and
// cancel starting the user process if that was not successful. An error is
// returned if the call-back returns an error or the user-command couldn't be
// started.
func (s *Spawner) Spawn(cb func(pid int) error) error {
bin, err := discover.NomadExecutable()
if err != nil {
return fmt.Errorf("Failed to determine the nomad executable: %v", err)
}
exitFile, err := os.OpenFile(s.StateFile, os.O_CREATE|os.O_WRONLY, 0666)
defer exitFile.Close()
if err != nil {
return fmt.Errorf("Error opening file to store exit status: %v", err)
}
config, err := s.spawnConfig()
if err != nil {
return err
}
spawn := exec.Command(bin, "spawn-daemon", config)
// Capture stdout
spawnStdout, err := spawn.StdoutPipe()
defer spawnStdout.Close()
if err != nil {
return fmt.Errorf("Failed to capture spawn-daemon stdout: %v", err)
}
// Capture stdin.
spawnStdin, err := spawn.StdinPipe()
defer spawnStdin.Close()
if err != nil {
return fmt.Errorf("Failed to capture spawn-daemon stdin: %v", err)
}
if err := spawn.Start(); err != nil {
return fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
}
if cb != nil {
cbErr := cb(spawn.Process.Pid)
if cbErr != nil {
errs := new(multierror.Error)
errs = multierror.Append(errs, cbErr)
if err := s.sendAbortCommand(spawnStdin); err != nil {
errs = multierror.Append(errs, err)
}
return errs
}
}
if err := s.sendStartCommand(spawnStdin); err != nil {
return err
}
respCh := make(chan command.SpawnStartStatus, 1)
errCh := make(chan error, 1)
go func() {
var resp command.SpawnStartStatus
dec := json.NewDecoder(spawnStdout)
if err := dec.Decode(&resp); err != nil {
errCh <- fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
}
respCh <- resp
}()
select {
case err := <-errCh:
return err
case resp := <-respCh:
if resp.ErrorMsg != "" {
return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
}
case <-time.After(5 * time.Second):
return fmt.Errorf("timed out waiting for response")
}
// Store the spawn process.
s.spawn = spawn.Process
s.SpawnPid = s.spawn.Pid
s.SpawnPpid = os.Getpid()
return nil
}
// spawnConfig returns a serialized config to pass to the Nomad spawn-daemon
// command.
func (s *Spawner) spawnConfig() (string, error) {
if s.UserCmd == nil {
return "", fmt.Errorf("Must specify user command")
}
config := command.DaemonConfig{
Cmd: *s.UserCmd,
Chroot: s.Chroot,
ExitStatusFile: s.StateFile,
}
if s.Logs != nil {
config.StdoutFile = s.Logs.Stdout
config.StdinFile = s.Logs.Stdin
config.StderrFile = s.Logs.Stderr
}
var buffer bytes.Buffer
enc := json.NewEncoder(&buffer)
if err := enc.Encode(config); err != nil {
return "", fmt.Errorf("Failed to serialize configuration: %v", err)
}
return strconv.Quote(buffer.String()), nil
}
// sendStartCommand sends the necessary command to the spawn-daemon to have it
// start the user process.
func (s *Spawner) sendStartCommand(w io.Writer) error {
enc := json.NewEncoder(w)
if err := enc.Encode(true); err != nil {
return fmt.Errorf("Failed to serialize start command: %v", err)
}
return nil
}
// sendAbortCommand sends the necessary command to the spawn-daemon to have it
// abort starting the user process. This should be invoked if the spawn-daemon
// could not be isolated into a cgroup.
func (s *Spawner) sendAbortCommand(w io.Writer) error {
enc := json.NewEncoder(w)
if err := enc.Encode(false); err != nil {
return fmt.Errorf("Failed to serialize abort command: %v", err)
}
return nil
}
// Wait returns the exit code of the user process or an error if the wait
// failed.
func (s *Spawner) Wait() (int, error) {
if os.Getpid() == s.SpawnPpid {
return s.waitAsParent()
}
return s.pollWait()
}
// waitAsParent waits on the process if the current process was the spawner.
func (s *Spawner) waitAsParent() (int, error) {
if s.SpawnPpid != os.Getpid() {
return -1, fmt.Errorf("not the parent. Spawner parent is %v; current pid is %v", s.SpawnPpid, os.Getpid())
}
// Try to reattach to the spawn.
if s.spawn == nil {
// If it can't be reattached, it means the spawn process has exited so
// we should just read its exit file.
var err error
if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil {
return s.pollWait()
}
}
if _, err := s.spawn.Wait(); err != nil {
return -1, err
}
return s.pollWait()
}
// pollWait polls on the spawn daemon to determine when it exits. After it
// exits, it reads the state file and returns the exit code and possibly an
// error.
func (s *Spawner) pollWait() (int, error) {
// Stat to check if it is there to avoid a race condition.
stat, err := os.Stat(s.StateFile)
if err != nil {
return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)
}
// If there is data it means that the file has already been written.
if stat.Size() > 0 {
return s.readExitCode()
}
// Read after the process exits.
for _ = range time.Tick(5 * time.Second) {
if !s.Alive() {
break
}
}
return s.readExitCode()
}
// readExitCode parses the state file and returns the exit code of the task. It
// returns an error if the file can't be read.
func (s *Spawner) readExitCode() (int, error) {
f, err := os.Open(s.StateFile)
defer f.Close()
if err != nil {
return -1, fmt.Errorf("Failed to open %v to read exit code: %v", s.StateFile, err)
}
stat, err := f.Stat()
if err != nil {
return -1, fmt.Errorf("Failed to stat file %v: %v", s.StateFile, err)
}
if stat.Size() == 0 {
return -1, fmt.Errorf("Empty state file: %v", s.StateFile)
}
var exitStatus command.SpawnExitStatus
dec := json.NewDecoder(f)
if err := dec.Decode(&exitStatus); err != nil {
return -1, fmt.Errorf("Failed to parse exit status from %v: %v", s.StateFile, err)
}
return exitStatus.ExitCode, nil
}

View File

@@ -0,0 +1,14 @@
// +build !windows
package spawn
import "syscall"
func (s *Spawner) Alive() bool {
if s.spawn == nil {
return false
}
err := s.spawn.Signal(syscall.Signal(0))
return err == nil
}

View File

@@ -0,0 +1,300 @@
package spawn
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"runtime"
"strings"
"testing"
"time"
)
func TestSpawn_NoCmd(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
if err := spawn.Spawn(nil); err == nil {
t.Fatalf("Spawn() with no user command should fail")
}
}
func TestSpawn_InvalidCmd(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("foo"))
if err := spawn.Spawn(nil); err == nil {
t.Fatalf("Spawn() with no invalid command should fail")
}
}
func TestSpawn_SetsLogs(t *testing.T) {
// TODO: Figure out why this test fails. If the spawn-daemon directly writes
// to the opened stdout file it works but not the user command. Maybe a
// flush issue?
if runtime.GOOS == "windows" {
t.Skip("Test fails on windows; unknown reason. Skipping")
}
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
exp := "foo"
spawn.SetCommand(exec.Command("echo", exp))
// Create file for stdout.
stdout, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(stdout.Name())
spawn.SetLogs(&Logs{Stdout: stdout.Name()})
if err := spawn.Spawn(nil); err != nil {
t.Fatalf("Spawn() failed: %v", err)
}
if code, err := spawn.Wait(); code != 0 && err != nil {
t.Fatalf("Wait() returned %v, %v; want 0, nil", code, err)
}
stdout2, err := os.Open(stdout.Name())
if err != nil {
t.Fatalf("Open() failed: %v", err)
}
data, err := ioutil.ReadAll(stdout2)
if err != nil {
t.Fatalf("ReadAll() failed: %v", err)
}
act := strings.TrimSpace(string(data))
if act != exp {
t.Fatalf("Unexpected data written to stdout; got %v; want %v", act, exp)
}
}
func TestSpawn_Callback(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("sleep", "1"))
called := false
cbErr := fmt.Errorf("ERROR CB")
cb := func(_ int) error {
called = true
return cbErr
}
if err := spawn.Spawn(cb); err == nil {
t.Fatalf("Spawn(%#v) should have errored; want %v", cb, cbErr)
}
if !called {
t.Fatalf("Spawn(%#v) didn't call callback", cb)
}
}
func TestSpawn_ParentWaitExited(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("echo", "foo"))
if err := spawn.Spawn(nil); err != nil {
t.Fatalf("Spawn() failed %v", err)
}
time.Sleep(1 * time.Second)
code, err := spawn.Wait()
if err != nil {
t.Fatalf("Wait() failed %v", err)
}
if code != 0 {
t.Fatalf("Wait() returned %v; want 0", code)
}
}
func TestSpawn_ParentWait(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("sleep", "2"))
if err := spawn.Spawn(nil); err != nil {
t.Fatalf("Spawn() failed %v", err)
}
code, err := spawn.Wait()
if err != nil {
t.Fatalf("Wait() failed %v", err)
}
if code != 0 {
t.Fatalf("Wait() returned %v; want 0", code)
}
}
func TestSpawn_NonParentWaitExited(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("echo", "foo"))
if err := spawn.Spawn(nil); err != nil {
t.Fatalf("Spawn() failed %v", err)
}
time.Sleep(1 * time.Second)
// Force the wait to assume non-parent.
spawn.SpawnPpid = 0
code, err := spawn.Wait()
if err != nil {
t.Fatalf("Wait() failed %v", err)
}
if code != 0 {
t.Fatalf("Wait() returned %v; want 0", code)
}
}
func TestSpawn_NonParentWait(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("sleep", "2"))
if err := spawn.Spawn(nil); err != nil {
t.Fatalf("Spawn() failed %v", err)
}
// Need to wait on the spawner, otherwise it becomes a zombie and the test
// only finishes after the init process cleans it. This speeds that up.
go func() {
time.Sleep(3 * time.Second)
if _, err := spawn.spawn.Wait(); err != nil {
t.FailNow()
}
}()
// Force the wait to assume non-parent.
spawn.SpawnPpid = 0
code, err := spawn.Wait()
if err != nil {
t.Fatalf("Wait() failed %v", err)
}
if code != 0 {
t.Fatalf("Wait() returned %v; want 0", code)
}
}
func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
var spawnPid int
cb := func(pid int) error {
spawnPid = pid
return nil
}
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("sleep", "5"))
if err := spawn.Spawn(cb); err != nil {
t.Fatalf("Spawn() errored: %v", err)
}
proc, err := os.FindProcess(spawnPid)
if err != nil {
t.FailNow()
}
if err := proc.Kill(); err != nil {
t.FailNow()
}
if _, err := proc.Wait(); err != nil {
t.FailNow()
}
if _, err := spawn.Wait(); err == nil {
t.Fatalf("Wait() should have failed: %v", err)
}
}
func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) {
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("TempFile() failed")
}
defer os.Remove(f.Name())
var spawnPid int
cb := func(pid int) error {
spawnPid = pid
return nil
}
spawn := NewSpawner(f.Name())
spawn.SetCommand(exec.Command("sleep", "2"))
if err := spawn.Spawn(cb); err != nil {
t.Fatalf("Spawn() errored: %v", err)
}
proc, err := os.FindProcess(spawnPid)
if err != nil {
t.FailNow()
}
if err := proc.Kill(); err != nil {
t.FailNow()
}
if _, err := proc.Wait(); err != nil {
t.FailNow()
}
// Force the wait to assume non-parent.
spawn.SpawnPpid = 0
if _, err := spawn.Wait(); err == nil {
t.Fatalf("Wait() should have failed: %v", err)
}
}

View File

@@ -0,0 +1,21 @@
package spawn
import "syscall"
const STILL_ACTIVE = 259
func (s *Spawner) Alive() bool {
const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE
h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid))
if e != nil {
return false
}
var ec uint32
e = syscall.GetExitCodeProcess(h, &ec)
if e != nil {
return false
}
return ec == STILL_ACTIVE
}

View File

@@ -1,579 +0,0 @@
package executor
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/driver/args"
"github.com/hashicorp/nomad/client/driver/environment"
"github.com/hashicorp/nomad/command"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/nomad/structs"
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
)
const (
cgroupMount = "/sys/fs/cgroup"
)
var (
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
chrootEnv = map[string]string{
"/bin": "/bin",
"/etc": "/etc",
"/lib": "/lib",
"/lib32": "/lib32",
"/lib64": "/lib64",
"/usr/bin": "/usr/bin",
"/usr/lib": "/usr/lib",
}
)
func NewExecutor() Executor {
e := LinuxExecutor{}
// TODO: In a follow-up PR make it so this only happens once per client.
// Fingerprinting shouldn't happen per task.
// Check that cgroups are available.
if _, err := os.Stat(cgroupMount); err == nil {
e.cgroupEnabled = true
}
return &e
}
// Linux executor is designed to run on linux kernel 2.8+.
type LinuxExecutor struct {
cmd
user *user.User
// Finger print capabilities.
cgroupEnabled bool
// Isolation configurations.
groups *cgroupConfig.Cgroup
alloc *allocdir.AllocDir
taskName string
taskDir string
// Tracking of child process.
spawnChild exec.Cmd
spawnOutputWriter *os.File
spawnOutputReader *os.File
// Track whether there are filesystems mounted in the task dir.
mounts bool
}
func (e *LinuxExecutor) Limit(resources *structs.Resources) error {
if resources == nil {
return errNoResources
}
if e.cgroupEnabled {
return e.configureCgroups(resources)
}
return nil
}
func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
e.taskName = taskName
taskDir, ok := alloc.TaskDirs[taskName]
if !ok {
fmt.Errorf("Couldn't find task directory for task %v", taskName)
}
e.taskDir = taskDir
if err := alloc.MountSharedDir(taskName); err != nil {
return err
}
if err := alloc.Embed(taskName, chrootEnv); err != nil {
return err
}
// Mount dev
dev := filepath.Join(taskDir, "dev")
if err := os.Mkdir(dev, 0777); err != nil {
return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
}
if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
}
// Mount proc
proc := filepath.Join(taskDir, "proc")
if err := os.Mkdir(proc, 0777); err != nil {
return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
}
if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
}
// Set the tasks AllocDir environment variable.
env, err := environment.ParseFromList(e.Cmd.Env)
if err != nil {
return err
}
env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
e.Cmd.Env = env.List()
e.alloc = alloc
e.mounts = true
return nil
}
func (e *LinuxExecutor) cleanTaskDir() error {
if e.alloc == nil {
return errors.New("ConfigureTaskDir() must be called before Start()")
}
if !e.mounts {
return nil
}
// Unmount dev.
errs := new(multierror.Error)
dev := filepath.Join(e.taskDir, "dev")
if err := syscall.Unmount(dev, 0); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
}
// Unmount proc.
proc := filepath.Join(e.taskDir, "proc")
if err := syscall.Unmount(proc, 0); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
}
e.mounts = false
return errs.ErrorOrNil()
}
func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
if !e.cgroupEnabled {
return nil
}
e.groups = &cgroupConfig.Cgroup{}
// Groups will be created in a heiarchy according to the resource being
// constrained, current session, and then this unique name. Restraints are
// then placed in the corresponding files.
// Ex: restricting a process to 2048Mhz CPU and 2MB of memory:
// $ cat /sys/fs/cgroup/cpu/user/1000.user/4.session/<uuid>/cpu.shares
// 2028
// $ cat /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/memory.limit_in_bytes
// 2097152
e.groups.Name = structs.GenerateUUID()
// TODO: verify this is needed for things like network access
e.groups.AllowAllDevices = true
if resources.MemoryMB > 0 {
// Total amount of memory allowed to consume
e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024)
// Disable swap to avoid issues on the machine
e.groups.MemorySwap = int64(-1)
}
if resources.CPU != 0 {
if resources.CPU < 2 {
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
}
// Set the relative CPU shares for this cgroup.
// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
// given process will have at least that amount of resources, but likely
// more since it is (probably) rare that the machine will run at 100%
// CPU. This scale will cease to work if a node is overprovisioned.
e.groups.CpuShares = int64(resources.CPU)
}
if resources.IOPS != 0 {
// Validate it is in an acceptable range.
if resources.IOPS < 10 || resources.IOPS > 1000 {
return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
}
e.groups.BlkioWeight = uint16(resources.IOPS)
}
return nil
}
func (e *LinuxExecutor) runAs(userid string) error {
errs := new(multierror.Error)
// First, try to lookup the user by uid
u, err := user.LookupId(userid)
if err == nil {
e.user = u
return nil
} else {
errs = multierror.Append(errs, err)
}
// Lookup failed, so try by username instead
u, err = user.Lookup(userid)
if err == nil {
e.user = u
return nil
} else {
errs = multierror.Append(errs, err)
}
// If we got here we failed to lookup based on id and username, so we'll
// return those errors.
return fmt.Errorf("Failed to identify user to run as: %s", errs)
}
func (e *LinuxExecutor) Start() error {
// Run as "nobody" user so we don't leak root privilege to the
// spawned process.
if err := e.runAs("nobody"); err == nil && e.user != nil {
e.cmd.SetUID(e.user.Uid)
e.cmd.SetGID(e.user.Gid)
}
if e.alloc == nil {
return errors.New("ConfigureTaskDir() must be called before Start()")
}
// Parse the commands arguments and replace instances of Nomad environment
// variables.
envVars, err := environment.ParseFromList(e.Cmd.Env)
if err != nil {
return err
}
parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
if err != nil {
return err
} else if len(parsedPath) != 1 {
return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
}
e.cmd.Path = parsedPath[0]
combined := strings.Join(e.Cmd.Args, " ")
parsed, err := args.ParseAndReplace(combined, envVars.Map())
if err != nil {
return err
}
e.Cmd.Args = parsed
return e.spawnDaemon()
}
// spawnDaemon executes a double fork to start the user command with proper
// isolation. Stores the child process for use in Wait.
func (e *LinuxExecutor) spawnDaemon() error {
bin, err := discover.NomadExecutable()
if err != nil {
return fmt.Errorf("Failed to determine the nomad executable: %v", err)
}
// Serialize the cmd and the cgroup configuration so it can be passed to the
// sub-process.
var buffer bytes.Buffer
enc := json.NewEncoder(&buffer)
c := command.DaemonConfig{
Cmd: e.cmd.Cmd,
Chroot: e.taskDir,
StdoutFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)),
StderrFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)),
StdinFile: "/dev/null",
}
if err := enc.Encode(c); err != nil {
return fmt.Errorf("Failed to serialize daemon configuration: %v", err)
}
// Create a pipe to capture Stdout.
pr, pw, err := os.Pipe()
if err != nil {
return err
}
e.spawnOutputWriter = pw
e.spawnOutputReader = pr
// Call ourselves using a hidden flag. The new instance of nomad will join
// the passed cgroup, forkExec the cmd, and output status codes through
// Stdout.
escaped := strconv.Quote(buffer.String())
spawn := exec.Command(bin, "spawn-daemon", escaped)
spawn.Stdout = e.spawnOutputWriter
// Capture its Stdin.
spawnStdIn, err := spawn.StdinPipe()
if err != nil {
return err
}
if err := spawn.Start(); err != nil {
fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err)
}
// Join the spawn-daemon to the cgroup.
if e.groups != nil {
manager := cgroupFs.Manager{}
manager.Cgroups = e.groups
// Apply will place the current pid into the tasks file for each of the
// created cgroups:
// /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/tasks
//
// Apply requires superuser permissions, and may fail if Nomad is not run with
// the required permissions
if err := manager.Apply(spawn.Process.Pid); err != nil {
errs := new(multierror.Error)
errs = multierror.Append(errs, fmt.Errorf("Failed to join spawn-daemon to the cgroup (config => %+v): %v", manager.Cgroups, err))
if err := sendAbortCommand(spawnStdIn); err != nil {
errs = multierror.Append(errs, err)
}
return errs
}
}
// Tell it to start.
if err := sendStartCommand(spawnStdIn); err != nil {
return err
}
// Parse the response.
dec := json.NewDecoder(e.spawnOutputReader)
var resp command.SpawnStartStatus
if err := dec.Decode(&resp); err != nil {
return fmt.Errorf("Failed to parse spawn-daemon start response: %v", err)
}
if resp.ErrorMsg != "" {
return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg)
}
e.spawnChild = *spawn
return nil
}
func sendStartCommand(w io.Writer) error {
enc := json.NewEncoder(w)
if err := enc.Encode(true); err != nil {
return fmt.Errorf("Failed to serialize start command: %v", err)
}
return nil
}
func sendAbortCommand(w io.Writer) error {
enc := json.NewEncoder(w)
if err := enc.Encode(false); err != nil {
return fmt.Errorf("Failed to serialize abort command: %v", err)
}
return nil
}
// Open's behavior is to kill all processes associated with the id and return an
// error. This is done because it is not possible to re-attach to the
// spawn-daemon's stdout to retrieve status messages.
func (e *LinuxExecutor) Open(id string) error {
parts := strings.SplitN(id, ":", 2)
if len(parts) != 2 {
return fmt.Errorf("Invalid id: %v", id)
}
switch parts[0] {
case "PID":
pid, err := strconv.Atoi(parts[1])
if err != nil {
return fmt.Errorf("Invalid id: failed to parse pid %v", parts[1])
}
process, err := os.FindProcess(pid)
if err != nil {
return fmt.Errorf("Failed to find Pid %v: %v", pid, err)
}
if err := process.Kill(); err != nil {
return fmt.Errorf("Failed to kill Pid %v: %v", pid, err)
}
case "CGROUP":
if !e.cgroupEnabled {
return errors.New("Passed a a cgroup identifier, but cgroups are disabled")
}
// De-serialize the cgroup configuration.
dec := json.NewDecoder(strings.NewReader(parts[1]))
var groups cgroupConfig.Cgroup
if err := dec.Decode(&groups); err != nil {
return fmt.Errorf("Failed to parse cgroup configuration: %v", err)
}
e.groups = &groups
if err := e.destroyCgroup(); err != nil {
return err
}
// TODO: cleanTaskDir is a little more complicated here because the OS
// may have already unmounted in the case of a restart. Need to scan.
default:
return fmt.Errorf("Invalid id type: %v", parts[0])
}
return errors.New("Could not re-open to id (intended).")
}
func (e *LinuxExecutor) Wait() error {
if e.spawnChild.Process == nil {
return errors.New("Can not find child to wait on")
}
defer e.spawnOutputWriter.Close()
defer e.spawnOutputReader.Close()
errs := new(multierror.Error)
if err := e.spawnChild.Wait(); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Process.Pid, err))
}
// If they fork/exec and then exit, wait will return but they will be still
// running processes so we need to kill the full cgroup.
if e.groups != nil {
if err := e.destroyCgroup(); err != nil {
errs = multierror.Append(errs, err)
}
}
if err := e.cleanTaskDir(); err != nil {
errs = multierror.Append(errs, err)
}
return errs.ErrorOrNil()
}
// If cgroups are used, the ID is the cgroup structurue. Otherwise, it is the
// PID of the spawn-daemon process. An error is returned if the process was
// never started.
func (e *LinuxExecutor) ID() (string, error) {
if e.spawnChild.Process != nil {
if e.cgroupEnabled && e.groups != nil {
// Serialize the cgroup structure so it can be undone on suabsequent
// opens.
var buffer bytes.Buffer
enc := json.NewEncoder(&buffer)
if err := enc.Encode(e.groups); err != nil {
return "", fmt.Errorf("Failed to serialize daemon configuration: %v", err)
}
return fmt.Sprintf("CGROUP:%v", buffer.String()), nil
}
return fmt.Sprintf("PID:%d", e.spawnChild.Process.Pid), nil
}
return "", fmt.Errorf("Process has finished or was never started")
}
func (e *LinuxExecutor) Shutdown() error {
return e.ForceStop()
}
func (e *LinuxExecutor) ForceStop() error {
if e.spawnOutputReader != nil {
e.spawnOutputReader.Close()
}
if e.spawnOutputWriter != nil {
e.spawnOutputWriter.Close()
}
// If the task is not running inside a cgroup then just the spawn-daemon child is killed.
// TODO: Find a good way to kill the children of the spawn-daemon.
if e.groups == nil {
if err := e.spawnChild.Process.Kill(); err != nil {
return fmt.Errorf("Failed to kill child (%v): %v", e.spawnChild.Process.Pid, err)
}
return nil
}
errs := new(multierror.Error)
if e.groups != nil {
if err := e.destroyCgroup(); err != nil {
errs = multierror.Append(errs, err)
}
}
if err := e.cleanTaskDir(); err != nil {
errs = multierror.Append(errs, err)
}
return errs.ErrorOrNil()
}
func (e *LinuxExecutor) destroyCgroup() error {
if e.groups == nil {
return errors.New("Can't destroy: cgroup configuration empty")
}
manager := cgroupFs.Manager{}
manager.Cgroups = e.groups
pids, err := manager.GetPids()
if err != nil {
return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err)
}
errs := new(multierror.Error)
for _, pid := range pids {
process, err := os.FindProcess(pid)
if err != nil {
multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err))
continue
}
if err := process.Kill(); err != nil {
multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
continue
}
if _, err := process.Wait(); err != nil {
multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err))
continue
}
}
// Remove the cgroup.
if err := manager.Destroy(); err != nil {
multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err))
}
if len(errs.Errors) != 0 {
return fmt.Errorf("Failed to destroy cgroup: %v", errs)
}
return nil
}
func (e *LinuxExecutor) Command() *cmd {
return &e.cmd
}

View File

@@ -15,6 +15,10 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)
// This is where the AWS metadata server normally resides. We hardcode the
// "instance" path as well since it's the only one we access here.
const DEFAULT_AWS_URL = "http//169.254.169.254/latest/meta-data/"
// map of instance type to approximate speed, in Mbits/s
// http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797
// which itself cites these sources:
@@ -89,7 +93,7 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node)
}
metadataURL := os.Getenv("AWS_ENV_URL")
if metadataURL == "" {
metadataURL = "http://169.254.169.254/latest/meta-data/"
metadataURL = DEFAULT_AWS_URL
}
// assume 2 seconds is enough time for inside AWS network
@@ -161,7 +165,7 @@ func isAWS() bool {
// provide their own
metadataURL := os.Getenv("AWS_ENV_URL")
if metadataURL == "" {
metadataURL = "http://169.254.169.254/latest/meta-data/"
metadataURL = DEFAULT_AWS_URL
}
// assume 2 seconds is enough time for inside AWS network
@@ -205,7 +209,7 @@ func (f *EnvAWSFingerprint) linkSpeed() int {
// the network speed
metadataURL := os.Getenv("AWS_ENV_URL")
if metadataURL == "" {
metadataURL = "http://169.254.169.254/latest/meta-data/"
metadataURL = DEFAULT_AWS_URL
}
// assume 2 seconds is enough time for inside AWS network

View File

@@ -8,8 +8,14 @@ import (
)
func ExecCompatible(t *testing.T) {
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
t.Skip("Must be root on non-windows environments to run test")
if runtime.GOOS != "linux" || syscall.Geteuid() != 0 {
t.Skip("Test only available running as root on linux")
}
}
func JavaCompatible(t *testing.T) {
if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
t.Skip("Test only available when running as root on linux")
}
}

View File

@@ -104,6 +104,17 @@ job "example" {
# Defaults to 1
# count = 1
# Restart Policy - This block defines the restart policy for TaskGroups,
# the attempts value defines the number of restarts Nomad will do if Tasks
# in this TaskGroup fails in a rolling window of interval duration
# The delay value makes Nomad wait for that duration to restart after a Task
# fails or crashes.
restart {
interval = "5m"
attempts = 10
delay = "25s"
}
# Define a task to run
task "redis" {
# Use Docker to run the task.

View File

@@ -2,19 +2,19 @@ package command
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
)
type SpawnDaemonCommand struct {
Meta
}
// Status of executing the user's command.
type SpawnStartStatus struct {
// ErrorMsg will be empty if the user command was started successfully.
// Otherwise it will have an error message.
ErrorMsg string
config *DaemonConfig
exitFile io.WriteCloser
}
func (c *SpawnDaemonCommand) Help() string {
@@ -23,15 +23,15 @@ Usage: nomad spawn-daemon [options] <daemon_config>
INTERNAL ONLY
Spawns a daemon process optionally inside a cgroup. The required daemon_config is a json
encoding of the DaemonConfig struct containing the isolation configuration and command to run.
SpawnStartStatus is json serialized to Stdout upon running the user command or if any error
prevents its execution. If there is no error, the process waits on the users
command and then json serializes SpawnExitStatus to Stdout after its termination.
General Options:
` + generalOptionsUsage()
Spawns a daemon process by double forking. The required daemon_config is a
json encoding of the DaemonConfig struct containing the isolation
configuration and command to run. SpawnStartStatus is json serialized to
stdout upon running the user command or if any error prevents its execution.
If there is no error, the process waits on the users command. Once the user
command exits, the exit code is written to a file specified in the
daemon_config and this process exits with the same exit status as the user
command.
`
return strings.TrimSpace(helpText)
}
@@ -40,6 +40,154 @@ func (c *SpawnDaemonCommand) Synopsis() string {
return "Spawn a daemon command with configurable isolation."
}
// Status of executing the user's command.
type SpawnStartStatus struct {
// The PID of the user's command.
UserPID int
// ErrorMsg will be empty if the user command was started successfully.
// Otherwise it will have an error message.
ErrorMsg string
}
// Exit status of the user's command.
type SpawnExitStatus struct {
// The exit code of the user's command.
ExitCode int
}
// Configuration for the command to start as a daemon.
type DaemonConfig struct {
exec.Cmd
// The filepath to write the exit status to.
ExitStatusFile string
// The paths, if not /dev/null, must be either in the tasks root directory
// or in the shared alloc directory.
StdoutFile string
StdinFile string
StderrFile string
// An optional path specifying the directory to chroot the process in.
Chroot string
}
// Whether to start the user command or abort.
type TaskStart bool
// parseConfig reads the DaemonConfig from the passed arguments. If not
// successful, an error is returned.
func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) {
flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
if err := flags.Parse(args); err != nil {
return nil, fmt.Errorf("failed to parse args: %v", err)
}
// Check that we got json input.
args = flags.Args()
if len(args) != 1 {
return nil, fmt.Errorf("incorrect number of args; got %v; want 1", len(args))
}
jsonInput, err := strconv.Unquote(args[0])
if err != nil {
return nil, fmt.Errorf("Failed to unquote json input: %v", err)
}
// De-serialize the passed command.
var config DaemonConfig
dec := json.NewDecoder(strings.NewReader(jsonInput))
if err := dec.Decode(&config); err != nil {
return nil, err
}
return &config, nil
}
// configureLogs creates the log files and redirects the process
// stdin/stderr/stdout to them. If unsuccessful, an error is returned.
func (c *SpawnDaemonCommand) configureLogs() error {
if len(c.config.StdoutFile) != 0 {
stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
if err != nil {
return fmt.Errorf("Error opening file to redirect stdout: %v", err)
}
c.config.Cmd.Stdout = stdo
}
if len(c.config.StderrFile) != 0 {
stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
if err != nil {
return fmt.Errorf("Error opening file to redirect stderr: %v", err)
}
c.config.Cmd.Stderr = stde
}
if len(c.config.StdinFile) != 0 {
stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
if err != nil {
return fmt.Errorf("Error opening file to redirect stdin: %v", err)
}
c.config.Cmd.Stdin = stdi
}
return nil
}
func (c *SpawnDaemonCommand) Run(args []string) int {
var err error
c.config, err = c.parseConfig(args)
if err != nil {
return c.outputStartStatus(err, 1)
}
// Open the file we will be using to write exit codes to. We do this early
// to ensure that we don't start the user process when we can't capture its
// exit status.
c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_WRONLY, 0666)
if err != nil {
return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1)
}
// Isolate the user process.
if err := c.isolateCmd(); err != nil {
return c.outputStartStatus(err, 1)
}
// Redirect logs.
if err := c.configureLogs(); err != nil {
return c.outputStartStatus(err, 1)
}
// Chroot jail the process and set its working directory.
c.configureChroot()
// Wait to get the start command.
var start TaskStart
dec := json.NewDecoder(os.Stdin)
if err := dec.Decode(&start); err != nil {
return c.outputStartStatus(err, 1)
}
// Aborted by Nomad process.
if !start {
return 0
}
// Spawn the user process.
if err := c.config.Cmd.Start(); err != nil {
return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
}
// Indicate that the command was started successfully.
c.outputStartStatus(nil, 0)
// Wait and then output the exit status.
return c.writeExitStatus(c.config.Cmd.Wait())
}
// outputStartStatus is a helper function that outputs a SpawnStartStatus to
// Stdout with the passed error, which may be nil to indicate no error. It
// returns the passed status.
@@ -51,6 +199,36 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int {
startStatus.ErrorMsg = err.Error()
}
if c.config != nil && c.config.Cmd.Process != nil {
startStatus.UserPID = c.config.Process.Pid
}
enc.Encode(startStatus)
return status
}
// writeExitStatus takes in the error result from calling wait and writes out
// the exit status to a file. It returns the same exit status as the user
// command.
func (c *SpawnDaemonCommand) writeExitStatus(exit error) int {
// Parse the exit code.
exitStatus := &SpawnExitStatus{}
if exit != nil {
// Default to exit code 1 if we can not get the actual exit code.
exitStatus.ExitCode = 1
if exiterr, ok := exit.(*exec.ExitError); ok {
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
exitStatus.ExitCode = status.ExitStatus()
}
}
}
if c.exitFile != nil {
enc := json.NewEncoder(c.exitFile)
enc.Encode(exitStatus)
c.exitFile.Close()
}
return exitStatus.ExitCode
}

View File

@@ -0,0 +1,4 @@
package command
// No chroot on darwin.
func (c *SpawnDaemonCommand) configureChroot() {}

View File

@@ -1,115 +1,16 @@
package command
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
)
import "syscall"
// Configuration for the command to start as a daemon.
type DaemonConfig struct {
exec.Cmd
// configureChroot enters the user command into a chroot if specified in the
// config and on an OS that supports Chroots.
func (c *SpawnDaemonCommand) configureChroot() {
if len(c.config.Chroot) != 0 {
if c.config.Cmd.SysProcAttr == nil {
c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
}
// The paths, if not /dev/null, must be either in the tasks root directory
// or in the shared alloc directory.
StdoutFile string
StdinFile string
StderrFile string
Chroot string
}
// Whether to start the user command or abort.
type TaskStart bool
func (c *SpawnDaemonCommand) Run(args []string) int {
flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
if err := flags.Parse(args); err != nil {
return 1
}
// Check that we got json input.
args = flags.Args()
if len(args) != 1 {
c.Ui.Error(c.Help())
return 1
}
jsonInput, err := strconv.Unquote(args[0])
if err != nil {
return c.outputStartStatus(fmt.Errorf("Failed to unquote json input: %v", err), 1)
}
// De-serialize the passed command.
var cmd DaemonConfig
dec := json.NewDecoder(strings.NewReader(jsonInput))
if err := dec.Decode(&cmd); err != nil {
return c.outputStartStatus(err, 1)
}
// Isolate the user process.
if _, err := syscall.Setsid(); err != nil {
return c.outputStartStatus(fmt.Errorf("Failed setting sid: %v", err), 1)
}
syscall.Umask(0)
// Redirect logs.
stdo, err := os.OpenFile(cmd.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
if err != nil {
return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdout: %v", err), 1)
}
stde, err := os.OpenFile(cmd.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
if err != nil {
return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stderr: %v", err), 1)
}
stdi, err := os.OpenFile(cmd.StdinFile, os.O_CREATE|os.O_RDONLY, 0666)
if err != nil {
return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdin: %v", err), 1)
}
cmd.Cmd.Stdout = stdo
cmd.Cmd.Stderr = stde
cmd.Cmd.Stdin = stdi
// Chroot jail the process and set its working directory.
if cmd.Cmd.SysProcAttr == nil {
cmd.Cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.Cmd.SysProcAttr.Chroot = cmd.Chroot
cmd.Cmd.Dir = "/"
// Wait to get the start command.
var start TaskStart
dec = json.NewDecoder(os.Stdin)
if err := dec.Decode(&start); err != nil {
return c.outputStartStatus(err, 1)
}
if !start {
return 0
}
// Spawn the user process.
if err := cmd.Cmd.Start(); err != nil {
return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1)
}
// Indicate that the command was started successfully.
c.outputStartStatus(nil, 0)
// Wait and then output the exit status.
if err := cmd.Wait(); err != nil {
return 1
}
return 0
c.config.Cmd.SysProcAttr.Chroot = c.config.Chroot
c.config.Cmd.Dir = "/"
}
}

View File

@@ -0,0 +1,48 @@
package command
import (
"bytes"
"encoding/json"
"fmt"
"io"
"os/exec"
"testing"
)
type nopCloser struct {
io.ReadWriter
}
func (n *nopCloser) Close() error {
return nil
}
func TestSpawnDaemon_WriteExitStatus(t *testing.T) {
// Check if there is python.
path, err := exec.LookPath("python")
if err != nil {
t.Skip("python not detected")
}
var b bytes.Buffer
daemon := &SpawnDaemonCommand{exitFile: &nopCloser{&b}}
code := 3
cmd := exec.Command(path, "./test-resources/exiter.py", fmt.Sprintf("%d", code))
err = cmd.Run()
actual := daemon.writeExitStatus(err)
if actual != code {
t.Fatalf("writeExitStatus(%v) returned %v; want %v", err, actual, code)
}
// De-serialize the passed command.
var exitStatus SpawnExitStatus
dec := json.NewDecoder(&b)
if err := dec.Decode(&exitStatus); err != nil {
t.Fatalf("failed to decode exit status: %v", err)
}
if exitStatus.ExitCode != code {
t.Fatalf("writeExitStatus(%v) wrote exit status %v; want %v", err, exitStatus.ExitCode, code)
}
}

View File

@@ -1,9 +0,0 @@
// +build !linux
package command
import "errors"
func (c *SpawnDaemonCommand) Run(args []string) int {
return c.outputStartStatus(errors.New("spawn-daemon not supported"), 1)
}

View File

@@ -0,0 +1,16 @@
// +build !windows
package command
import "syscall"
// isolateCmd sets the session id for the process and the umask.
func (c *SpawnDaemonCommand) isolateCmd() error {
if c.config.Cmd.SysProcAttr == nil {
c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{}
}
c.config.Cmd.SysProcAttr.Setsid = true
syscall.Umask(0)
return nil
}

View File

@@ -0,0 +1,7 @@
// build !linux !darwin
package command
// No isolation on Windows.
func (c *SpawnDaemonCommand) isolateCmd() error { return nil }
func (c *SpawnDaemonCommand) configureChroot() {}

View File

@@ -0,0 +1,3 @@
import sys
sys.exit(int(sys.argv[1]))

View File

@@ -3,18 +3,21 @@ package discover
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"github.com/kardianos/osext"
)
const (
nomadExe = "nomad"
)
// Checks the current executable, then $GOPATH/bin, and finally the CWD, in that
// order. If it can't be found, an error is returned.
func NomadExecutable() (string, error) {
nomadExe := "nomad"
if runtime.GOOS == "windows" {
nomadExe = "nomad.exe"
}
// Check the current executable.
bin, err := osext.Executable()
if err != nil {
@@ -25,6 +28,11 @@ func NomadExecutable() (string, error) {
return bin, nil
}
// Check the $PATH
if bin, err := exec.LookPath(nomadExe); err == nil {
return bin, nil
}
// Check the $GOPATH.
bin = filepath.Join(os.Getenv("GOPATH"), "bin", nomadExe)
if _, err := os.Stat(bin); err == nil {

View File

@@ -124,7 +124,7 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
}
}
// If we have tasks outside, do those
// If we have tasks outside, create TaskGroups for them
if o := obj.Get("task", false); o != nil {
var tasks []*structs.Task
if err := parseTasks(&tasks, o); err != nil {
@@ -134,9 +134,10 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error {
result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2)
for i, t := range tasks {
result.TaskGroups[i] = &structs.TaskGroup{
Name: t.Name,
Count: 1,
Tasks: []*structs.Task{t},
Name: t.Name,
Count: 1,
Tasks: []*structs.Task{t},
RestartPolicy: structs.NewRestartPolicy(result.Type),
}
}
}
@@ -180,6 +181,7 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
delete(m, "constraint")
delete(m, "meta")
delete(m, "task")
delete(m, "restart")
// Default count to 1 if not specified
if _, ok := m["count"]; !ok {
@@ -199,6 +201,11 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
return err
}
}
g.RestartPolicy = structs.NewRestartPolicy(result.Type)
if err := parseRestartPolicy(g.RestartPolicy, o); err != nil {
return err
}
// Parse out meta fields. These are in HCL as a list so we need
// to iterate over them and merge them.
@@ -228,6 +235,42 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error {
return nil
}
func parseRestartPolicy(result *structs.RestartPolicy, obj *hclobj.Object) error {
var restartHclObj *hclobj.Object
var m map[string]interface{}
if restartHclObj = obj.Get("restart", false); restartHclObj == nil {
return nil
}
if err := hcl.DecodeObject(&m, restartHclObj); err != nil {
return err
}
if delay, ok := m["delay"]; ok {
d, err := toDuration(delay)
if err != nil {
return fmt.Errorf("Invalid Delay time in restart policy: %v", err)
}
result.Delay = d
}
if interval, ok := m["interval"]; ok {
i, err := toDuration(interval)
if err != nil {
return fmt.Errorf("Invalid Interval time in restart policy: %v", err)
}
result.Interval = i
}
if attempts, ok := m["attempts"]; ok {
a, err := toInteger(attempts)
if err != nil {
return fmt.Errorf("Invalid value in attempts: %v", err)
}
result.Attempts = a
}
return nil
}
func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error {
for _, o := range obj.Elem(false) {
var m map[string]interface{}
@@ -455,19 +498,11 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
}
for _, key := range []string{"stagger", "Stagger"} {
if raw, ok := m[key]; ok {
switch v := raw.(type) {
case string:
dur, err := time.ParseDuration(v)
if err != nil {
return fmt.Errorf("invalid stagger time '%s'", raw)
}
m[key] = dur
case int:
m[key] = time.Duration(v) * time.Second
default:
return fmt.Errorf("invalid type for stagger time '%s'",
raw)
staggerTime, err := toDuration(raw)
if err != nil {
return fmt.Errorf("Invalid stagger time: %v", err)
}
m[key] = staggerTime
}
}
@@ -477,3 +512,35 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error {
}
return nil
}
func toDuration(value interface{}) (time.Duration, error) {
var dur time.Duration
var err error
switch v := value.(type) {
case string:
dur, err = time.ParseDuration(v)
case int:
dur = time.Duration(v) * time.Second
default:
err = fmt.Errorf("Invalid time %s", value)
}
return dur, err
}
func toInteger(value interface{}) (int, error) {
var integer int
var err error
switch v := value.(type) {
case string:
var i int64
i, err = strconv.ParseInt(v, 10, 32)
integer = int(i)
case int:
integer = v
default:
err = fmt.Errorf("Value: %v can't be parsed into int", value)
}
return integer, err
}

View File

@@ -48,6 +48,11 @@ func TestParse(t *testing.T) {
&structs.TaskGroup{
Name: "outside",
Count: 1,
RestartPolicy: &structs.RestartPolicy{
Attempts: 2,
Interval: 1 * time.Minute,
Delay: 15 * time.Second,
},
Tasks: []*structs.Task{
&structs.Task{
Name: "outside",
@@ -77,6 +82,11 @@ func TestParse(t *testing.T) {
"elb_interval": "10",
"elb_checks": "3",
},
RestartPolicy: &structs.RestartPolicy{
Interval: 10 * time.Minute,
Attempts: 5,
Delay: 15 * time.Second,
},
Tasks: []*structs.Task{
&structs.Task{
Name: "binstore",

View File

@@ -31,6 +31,11 @@ job "binstore-storagelocker" {
group "binsl" {
count = 5
restart {
attempts = 5
interval = "10m"
delay = "15s"
}
task "binstore" {
driver = "docker"
config {

View File

@@ -5,6 +5,7 @@ import (
"github.com/armon/go-metrics"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
)
// Alloc endpoint is used for manipulating allocations
@@ -19,35 +20,45 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "list"}, time.Now())
// Capture all the allocations
snap, err := a.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Allocs()
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Table: "allocs"}),
run: func() error {
// Capture all the allocations
snap, err := a.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Allocs()
if err != nil {
return err
}
for {
raw := iter.Next()
if raw == nil {
break
}
alloc := raw.(*structs.Allocation)
reply.Allocations = append(reply.Allocations, alloc.Stub())
}
var allocs []*structs.AllocListStub
for {
raw := iter.Next()
if raw == nil {
break
}
alloc := raw.(*structs.Allocation)
allocs = append(allocs, alloc.Stub())
}
reply.Allocations = allocs
// Use the last index that affected the jobs table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
// Use the last index that affected the jobs table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
// Set the query response
a.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
a.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return a.srv.blockingRPC(&opts)
}
// GetAlloc is used to lookup a particular allocation
@@ -58,30 +69,38 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_alloc"}, time.Now())
// Lookup the allocation
snap, err := a.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.AllocByID(args.AllocID)
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Alloc: args.AllocID}),
run: func() error {
// Lookup the allocation
snap, err := a.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.AllocByID(args.AllocID)
if err != nil {
return err
}
// Setup the output
if out != nil {
reply.Alloc = out
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
}
// Setup the output
reply.Alloc = out
if out != nil {
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
}
// Set the query response
a.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
a.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return a.srv.blockingRPC(&opts)
}

View File

@@ -3,6 +3,7 @@ package nomad
import (
"reflect"
"testing"
"time"
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/nomad/mock"
@@ -44,6 +45,74 @@ func TestAllocEndpoint_List(t *testing.T) {
}
}
func TestAllocEndpoint_List_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the alloc
alloc := mock.Alloc()
// Upsert alloc triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpsertAllocs(2, []*structs.Allocation{alloc}); err != nil {
t.Fatalf("err: %v", err)
}
})
req := &structs.AllocListRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 1,
},
}
start := time.Now()
var resp structs.AllocListResponse
if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 2 {
t.Fatalf("Bad index: %d %d", resp.Index, 2)
}
if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID {
t.Fatalf("bad: %#v", resp.Allocations)
}
// Client updates trigger watches
alloc2 := mock.Alloc()
alloc2.ID = alloc.ID
alloc2.ClientStatus = structs.AllocClientStatusRunning
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpdateAllocFromClient(3, alloc2); err != nil {
t.Fatalf("err: %v", err)
}
})
req.MinQueryIndex = 2
start = time.Now()
var resp2 structs.AllocListResponse
if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
}
if resp2.Index != 3 {
t.Fatalf("Bad index: %d %d", resp2.Index, 3)
}
if len(resp2.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID ||
resp2.Allocations[0].ClientStatus != structs.AllocClientStatusRunning {
t.Fatalf("bad: %#v", resp2.Allocations)
}
}
func TestAllocEndpoint_GetAlloc(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
@@ -75,3 +144,55 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
t.Fatalf("bad: %#v", resp.Alloc)
}
}
func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the allocs
alloc1 := mock.Alloc()
alloc2 := mock.Alloc()
// First create an unrelated alloc
time.AfterFunc(100*time.Millisecond, func() {
err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Create the alloc we are watching later
time.AfterFunc(200*time.Millisecond, func() {
err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Lookup the jobs
get := &structs.AllocSpecificRequest{
AllocID: alloc2.ID,
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
var resp structs.SingleAllocResponse
start := time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Alloc.GetAlloc", get, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 200 {
t.Fatalf("Bad index: %d %d", resp.Index, 200)
}
if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID {
t.Fatalf("bad: %#v", resp.Alloc)
}
}

View File

@@ -6,6 +6,7 @@ import (
"github.com/armon/go-metrics"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
)
const (
@@ -26,32 +27,40 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())
// Look for the job
snap, err := e.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.EvalByID(args.EvalID)
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Eval: args.EvalID}),
run: func() error {
// Look for the job
snap, err := e.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.EvalByID(args.EvalID)
if err != nil {
return err
}
// Setup the output
if out != nil {
reply.Eval = out
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("evals")
if err != nil {
return err
}
reply.Index = index
}
// Setup the output
reply.Eval = out
if out != nil {
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("evals")
if err != nil {
return err
}
reply.Index = index
}
// Set the query response
e.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
e.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return e.srv.blockingRPC(&opts)
}
// Dequeue is used to dequeue a pending evaluation
@@ -219,35 +228,45 @@ func (e *Eval) List(args *structs.EvalListRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())
// Scan all the evaluations
snap, err := e.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Evals()
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Table: "evals"}),
run: func() error {
// Scan all the evaluations
snap, err := e.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Evals()
if err != nil {
return err
}
for {
raw := iter.Next()
if raw == nil {
break
}
eval := raw.(*structs.Evaluation)
reply.Evaluations = append(reply.Evaluations, eval)
}
var evals []*structs.Evaluation
for {
raw := iter.Next()
if raw == nil {
break
}
eval := raw.(*structs.Evaluation)
evals = append(evals, eval)
}
reply.Evaluations = evals
// Use the last index that affected the jobs table
index, err := snap.Index("evals")
if err != nil {
return err
}
reply.Index = index
// Use the last index that affected the jobs table
index, err := snap.Index("evals")
if err != nil {
return err
}
reply.Index = index
// Set the query response
e.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
e.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return e.srv.blockingRPC(&opts)
}
// Allocations is used to list the allocations for an evaluation
@@ -258,32 +277,40 @@ func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())
// Capture the allocations
snap, err := e.srv.fsm.State().Snapshot()
if err != nil {
return err
}
allocs, err := snap.AllocsByEval(args.EvalID)
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{AllocEval: args.EvalID}),
run: func() error {
// Capture the allocations
snap, err := e.srv.fsm.State().Snapshot()
if err != nil {
return err
}
allocs, err := snap.AllocsByEval(args.EvalID)
if err != nil {
return err
}
// Convert to a stub
if len(allocs) > 0 {
reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
for _, alloc := range allocs {
reply.Allocations = append(reply.Allocations, alloc.Stub())
}
}
// Convert to a stub
if len(allocs) > 0 {
reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
for _, alloc := range allocs {
reply.Allocations = append(reply.Allocations, alloc.Stub())
}
}
// Use the last index that affected the allocs table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
// Use the last index that affected the allocs table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
// Set the query response
e.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
e.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return e.srv.blockingRPC(&opts)
}

View File

@@ -51,6 +51,83 @@ func TestEvalEndpoint_GetEval(t *testing.T) {
}
}
func TestEvalEndpoint_GetEval_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the evals
eval1 := mock.Eval()
eval2 := mock.Eval()
// First create an unrelated eval
time.AfterFunc(100*time.Millisecond, func() {
err := state.UpsertEvals(100, []*structs.Evaluation{eval1})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Upsert the eval we are watching later
time.AfterFunc(200*time.Millisecond, func() {
err := state.UpsertEvals(200, []*structs.Evaluation{eval2})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Lookup the eval
req := &structs.EvalSpecificRequest{
EvalID: eval2.ID,
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
var resp structs.SingleEvalResponse
start := time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 200 {
t.Fatalf("Bad index: %d %d", resp.Index, 200)
}
if resp.Eval == nil || resp.Eval.ID != eval2.ID {
t.Fatalf("bad: %#v", resp.Eval)
}
// Eval delete triggers watches
time.AfterFunc(100*time.Millisecond, func() {
err := state.DeleteEval(300, []string{eval2.ID}, []string{})
if err != nil {
t.Fatalf("err: %v", err)
}
})
req.QueryOptions.MinQueryIndex = 250
var resp2 structs.SingleEvalResponse
start = time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
}
if resp2.Index != 300 {
t.Fatalf("Bad index: %d %d", resp2.Index, 300)
}
if resp2.Eval != nil {
t.Fatalf("bad: %#v", resp2.Eval)
}
}
func TestEvalEndpoint_Dequeue(t *testing.T) {
s1 := testServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
@@ -334,6 +411,70 @@ func TestEvalEndpoint_List(t *testing.T) {
}
}
func TestEvalEndpoint_List_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the ieval
eval := mock.Eval()
// Upsert eval triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpsertEvals(2, []*structs.Evaluation{eval}); err != nil {
t.Fatalf("err: %v", err)
}
})
req := &structs.EvalListRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 1,
},
}
start := time.Now()
var resp structs.EvalListResponse
if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 2 {
t.Fatalf("Bad index: %d %d", resp.Index, 2)
}
if len(resp.Evaluations) != 1 || resp.Evaluations[0].ID != eval.ID {
t.Fatalf("bad: %#v", resp.Evaluations)
}
// Eval deletion triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.DeleteEval(3, []string{eval.ID}, nil); err != nil {
t.Fatalf("err: %v", err)
}
})
req.MinQueryIndex = 2
start = time.Now()
var resp2 structs.EvalListResponse
if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
}
if resp2.Index != 3 {
t.Fatalf("Bad index: %d %d", resp2.Index, 3)
}
if len(resp2.Evaluations) != 0 {
t.Fatalf("bad: %#v", resp2.Evaluations)
}
}
func TestEvalEndpoint_Allocations(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
@@ -368,3 +509,55 @@ func TestEvalEndpoint_Allocations(t *testing.T) {
t.Fatalf("bad: %#v", resp.Allocations)
}
}
func TestEvalEndpoint_Allocations_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the allocs
alloc1 := mock.Alloc()
alloc2 := mock.Alloc()
// Upsert an unrelated alloc first
time.AfterFunc(100*time.Millisecond, func() {
err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Upsert an alloc which will trigger the watch later
time.AfterFunc(200*time.Millisecond, func() {
err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Lookup the eval
get := &structs.EvalSpecificRequest{
EvalID: alloc2.EvalID,
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
var resp structs.EvalAllocationsResponse
start := time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Eval.Allocations", get, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 200 {
t.Fatalf("Bad index: %d %d", resp.Index, 200)
}
if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID {
t.Fatalf("bad: %#v", resp.Allocations)
}
}

View File

@@ -6,6 +6,7 @@ import (
"github.com/armon/go-metrics"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
)
// Job endpoint is used for job interactions
@@ -180,32 +181,41 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest,
}
defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now())
// Look for the job
snap, err := j.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.JobByID(args.JobID)
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Job: args.JobID}),
run: func() error {
// Setup the output
if out != nil {
reply.Job = out
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("jobs")
if err != nil {
return err
}
reply.Index = index
}
// Look for the job
snap, err := j.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.JobByID(args.JobID)
if err != nil {
return err
}
// Set the query response
j.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Setup the output
reply.Job = out
if out != nil {
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("jobs")
if err != nil {
return err
}
reply.Index = index
}
// Set the query response
j.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return j.srv.blockingRPC(&opts)
}
// List is used to list the jobs registered in the system
@@ -216,35 +226,45 @@ func (j *Job) List(args *structs.JobListRequest,
}
defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now())
// Capture all the jobs
snap, err := j.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Jobs()
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Table: "jobs"}),
run: func() error {
// Capture all the jobs
snap, err := j.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Jobs()
if err != nil {
return err
}
for {
raw := iter.Next()
if raw == nil {
break
}
job := raw.(*structs.Job)
reply.Jobs = append(reply.Jobs, job.Stub())
}
var jobs []*structs.JobListStub
for {
raw := iter.Next()
if raw == nil {
break
}
job := raw.(*structs.Job)
jobs = append(jobs, job.Stub())
}
reply.Jobs = jobs
// Use the last index that affected the jobs table
index, err := snap.Index("jobs")
if err != nil {
return err
}
reply.Index = index
// Use the last index that affected the jobs table
index, err := snap.Index("jobs")
if err != nil {
return err
}
reply.Index = index
// Set the query response
j.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
j.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return j.srv.blockingRPC(&opts)
}
// Allocations is used to list the allocations for a job
@@ -255,34 +275,43 @@ func (j *Job) Allocations(args *structs.JobSpecificRequest,
}
defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now())
// Capture the allocations
snap, err := j.srv.fsm.State().Snapshot()
if err != nil {
return err
}
allocs, err := snap.AllocsByJob(args.JobID)
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{AllocJob: args.JobID}),
run: func() error {
// Capture the allocations
snap, err := j.srv.fsm.State().Snapshot()
if err != nil {
return err
}
allocs, err := snap.AllocsByJob(args.JobID)
if err != nil {
return err
}
// Convert to stubs
if len(allocs) > 0 {
reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
for _, alloc := range allocs {
reply.Allocations = append(reply.Allocations, alloc.Stub())
}
}
// Convert to stubs
if len(allocs) > 0 {
reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
for _, alloc := range allocs {
reply.Allocations = append(reply.Allocations, alloc.Stub())
}
}
// Use the last index that affected the allocs table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
// Use the last index that affected the allocs table
index, err := snap.Index("allocs")
if err != nil {
return err
}
reply.Index = index
// Set the query response
j.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
j.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return j.srv.blockingRPC(&opts)
}
// Evaluations is used to list the evaluations for a job

View File

@@ -3,6 +3,7 @@ package nomad
import (
"reflect"
"testing"
"time"
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/nomad/mock"
@@ -363,6 +364,80 @@ func TestJobEndpoint_GetJob(t *testing.T) {
}
}
func TestJobEndpoint_GetJob_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the jobs
job1 := mock.Job()
job2 := mock.Job()
// Upsert a job we are not interested in first.
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpsertJob(100, job1); err != nil {
t.Fatalf("err: %v", err)
}
})
// Upsert another job later which should trigger the watch.
time.AfterFunc(200*time.Millisecond, func() {
if err := state.UpsertJob(200, job2); err != nil {
t.Fatalf("err: %v", err)
}
})
req := &structs.JobSpecificRequest{
JobID: job2.ID,
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
start := time.Now()
var resp structs.SingleJobResponse
if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 200 {
t.Fatalf("Bad index: %d %d", resp.Index, 200)
}
if resp.Job == nil || resp.Job.ID != job2.ID {
t.Fatalf("bad: %#v", resp.Job)
}
// Job delete fires watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.DeleteJob(300, job2.ID); err != nil {
t.Fatalf("err: %v", err)
}
})
req.QueryOptions.MinQueryIndex = 250
start = time.Now()
var resp2 structs.SingleJobResponse
if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
}
if resp2.Index != 300 {
t.Fatalf("Bad index: %d %d", resp2.Index, 300)
}
if resp2.Job != nil {
t.Fatalf("bad: %#v", resp2.Job)
}
}
func TestJobEndpoint_ListJobs(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
@@ -397,6 +472,70 @@ func TestJobEndpoint_ListJobs(t *testing.T) {
}
}
func TestJobEndpoint_ListJobs_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the job
job := mock.Job()
// Upsert job triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpsertJob(100, job); err != nil {
t.Fatalf("err: %v", err)
}
})
req := &structs.JobListRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
start := time.Now()
var resp structs.JobListResponse
if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 100 {
t.Fatalf("Bad index: %d %d", resp.Index, 100)
}
if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID {
t.Fatalf("bad: %#v", resp.Jobs)
}
// Job deletion triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.DeleteJob(200, job.ID); err != nil {
t.Fatalf("err: %v", err)
}
})
req.MinQueryIndex = 150
start = time.Now()
var resp2 structs.JobListResponse
if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
}
if resp2.Index != 200 {
t.Fatalf("Bad index: %d %d", resp2.Index, 200)
}
if len(resp2.Jobs) != 0 {
t.Fatalf("bad: %#v", resp2.Jobs)
}
}
func TestJobEndpoint_Allocations(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
@@ -432,6 +571,59 @@ func TestJobEndpoint_Allocations(t *testing.T) {
}
}
func TestJobEndpoint_Allocations_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the register request
alloc1 := mock.Alloc()
alloc2 := mock.Alloc()
alloc2.JobID = "job1"
state := s1.fsm.State()
// First upsert an unrelated alloc
time.AfterFunc(100*time.Millisecond, func() {
err := state.UpsertAllocs(100, []*structs.Allocation{alloc1})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Upsert an alloc for the job we are interested in later
time.AfterFunc(200*time.Millisecond, func() {
err := state.UpsertAllocs(200, []*structs.Allocation{alloc2})
if err != nil {
t.Fatalf("err: %v", err)
}
})
// Lookup the jobs
get := &structs.JobSpecificRequest{
JobID: "job1",
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
var resp structs.JobAllocationsResponse
start := time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Job.Allocations", get, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 200 {
t.Fatalf("Bad index: %d %d", resp.Index, 200)
}
if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" {
t.Fatalf("bad: %#v", resp.Allocations)
}
}
func TestJobEndpoint_Evaluations(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()

View File

@@ -1,6 +1,9 @@
package mock
import "github.com/hashicorp/nomad/nomad/structs"
import (
"github.com/hashicorp/nomad/nomad/structs"
"time"
)
func Node() *structs.Node {
node := &structs.Node{
@@ -71,6 +74,11 @@ func Job() *structs.Job {
&structs.TaskGroup{
Name: "web",
Count: 10,
RestartPolicy: &structs.RestartPolicy{
Attempts: 3,
Interval: 10 * time.Minute,
Delay: 1 * time.Minute,
},
Tasks: []*structs.Task{
&structs.Task{
Name: "web",
@@ -131,6 +139,11 @@ func SystemJob() *structs.Job {
&structs.TaskGroup{
Name: "web",
Count: 1,
RestartPolicy: &structs.RestartPolicy{
Attempts: 3,
Interval: 10 * time.Minute,
Delay: 1 * time.Minute,
},
Tasks: []*structs.Task{
&structs.Task{
Name: "web",

View File

@@ -6,6 +6,7 @@ import (
"github.com/armon/go-metrics"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
)
// Node endpoint is used for client interactions
@@ -282,37 +283,45 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
}
defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
// Verify the arguments
if args.NodeID == "" {
return fmt.Errorf("missing node ID")
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Node: args.NodeID}),
run: func() error {
// Verify the arguments
if args.NodeID == "" {
return fmt.Errorf("missing node ID")
}
// Look for the node
snap, err := n.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.NodeByID(args.NodeID)
if err != nil {
return err
}
// Look for the node
snap, err := n.srv.fsm.State().Snapshot()
if err != nil {
return err
}
out, err := snap.NodeByID(args.NodeID)
if err != nil {
return err
}
// Setup the output
if out != nil {
reply.Node = out
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("nodes")
if err != nil {
return err
}
reply.Index = index
}
// Setup the output
reply.Node = out
if out != nil {
reply.Index = out.ModifyIndex
} else {
// Use the last index that affected the nodes table
index, err := snap.Index("nodes")
if err != nil {
return err
}
reply.Index = index
}
// Set the query response
n.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
n.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return n.srv.blockingRPC(&opts)
}
// GetAllocs is used to request allocations for a specific node
@@ -330,9 +339,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
allocWatch: args.NodeID,
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{AllocNode: args.NodeID}),
run: func() error {
// Look for the node
snap, err := n.srv.fsm.State().Snapshot()
@@ -404,35 +413,45 @@ func (n *Node) List(args *structs.NodeListRequest,
}
defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
// Capture all the nodes
snap, err := n.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Nodes()
if err != nil {
return err
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
watch: watch.NewItems(watch.Item{Table: "nodes"}),
run: func() error {
// Capture all the nodes
snap, err := n.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.Nodes()
if err != nil {
return err
}
for {
raw := iter.Next()
if raw == nil {
break
}
node := raw.(*structs.Node)
reply.Nodes = append(reply.Nodes, node.Stub())
}
var nodes []*structs.NodeListStub
for {
raw := iter.Next()
if raw == nil {
break
}
node := raw.(*structs.Node)
nodes = append(nodes, node.Stub())
}
reply.Nodes = nodes
// Use the last index that affected the jobs table
index, err := snap.Index("nodes")
if err != nil {
return err
}
reply.Index = index
// Use the last index that affected the jobs table
index, err := snap.Index("nodes")
if err != nil {
return err
}
reply.Index = index
// Set the query response
n.srv.setQueryMeta(&reply.QueryMeta)
return nil
// Set the query response
n.srv.setQueryMeta(&reply.QueryMeta)
return nil
}}
return n.srv.blockingRPC(&opts)
}
// createNodeEvals is used to create evaluations for each alloc on a node.

View File

@@ -371,6 +371,107 @@ func TestClientEndpoint_GetNode(t *testing.T) {
}
}
func TestClientEndpoint_GetNode_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the node
node1 := mock.Node()
node2 := mock.Node()
// First create an unrelated node.
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpsertNode(100, node1); err != nil {
t.Fatalf("err: %v", err)
}
})
// Upsert the node we are watching later
time.AfterFunc(200*time.Millisecond, func() {
if err := state.UpsertNode(200, node2); err != nil {
t.Fatalf("err: %v", err)
}
})
// Lookup the node
req := &structs.NodeSpecificRequest{
NodeID: node2.ID,
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 50,
},
}
var resp structs.SingleNodeResponse
start := time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 200*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 200 {
t.Fatalf("Bad index: %d %d", resp.Index, 200)
}
if resp.Node == nil || resp.Node.ID != node2.ID {
t.Fatalf("bad: %#v", resp.Node)
}
// Node update triggers watches
time.AfterFunc(100*time.Millisecond, func() {
nodeUpdate := mock.Node()
nodeUpdate.ID = node2.ID
nodeUpdate.Status = structs.NodeStatusDown
if err := state.UpsertNode(300, nodeUpdate); err != nil {
t.Fatalf("err: %v", err)
}
})
req.QueryOptions.MinQueryIndex = 250
var resp2 structs.SingleNodeResponse
start = time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp2.Index != 300 {
t.Fatalf("Bad index: %d %d", resp2.Index, 300)
}
if resp2.Node == nil || resp2.Node.Status != structs.NodeStatusDown {
t.Fatalf("bad: %#v", resp2.Node)
}
// Node delete triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.DeleteNode(400, node2.ID); err != nil {
t.Fatalf("err: %v", err)
}
})
req.QueryOptions.MinQueryIndex = 350
var resp3 structs.SingleNodeResponse
start = time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp3); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp3.Index != 400 {
t.Fatalf("Bad index: %d %d", resp2.Index, 400)
}
if resp3.Node != nil {
t.Fatalf("bad: %#v", resp3.Node)
}
}
func TestClientEndpoint_GetAllocs(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
@@ -457,16 +558,15 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
alloc.NodeID = node.ID
state := s1.fsm.State()
start := time.Now()
go func() {
time.Sleep(100 * time.Millisecond)
time.AfterFunc(100*time.Millisecond, func() {
err := state.UpsertAllocs(100, []*structs.Allocation{alloc})
if err != nil {
t.Fatalf("err: %v", err)
}
}()
})
// Lookup the allocs in a blocking query
get := &structs.NodeSpecificRequest{
req := &structs.NodeSpecificRequest{
NodeID: node.ID,
QueryOptions: structs.QueryOptions{
Region: "global",
@@ -475,7 +575,7 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
},
}
var resp2 structs.NodeAllocsResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", get, &resp2); err != nil {
if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
@@ -491,6 +591,34 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) {
if len(resp2.Allocs) != 1 || resp2.Allocs[0].ID != alloc.ID {
t.Fatalf("bad: %#v", resp2.Allocs)
}
// Alloc updates fire watches
time.AfterFunc(100*time.Millisecond, func() {
allocUpdate := mock.Alloc()
allocUpdate.NodeID = alloc.NodeID
allocUpdate.ID = alloc.ID
allocUpdate.ClientStatus = structs.AllocClientStatusRunning
err := state.UpdateAllocFromClient(200, allocUpdate)
if err != nil {
t.Fatalf("err: %v", err)
}
})
req.QueryOptions.MinQueryIndex = 150
var resp3 structs.NodeAllocsResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp3); err != nil {
t.Fatalf("err: %v", err)
}
if time.Since(start) < 100*time.Millisecond {
t.Fatalf("too fast")
}
if resp3.Index != 200 {
t.Fatalf("Bad index: %d %d", resp3.Index, 200)
}
if len(resp3.Allocs) != 1 || resp3.Allocs[0].ClientStatus != structs.AllocClientStatusRunning {
t.Fatalf("bad: %#v", resp3.Allocs[0])
}
}
func TestClientEndpoint_UpdateAlloc(t *testing.T) {
@@ -752,3 +880,115 @@ func TestClientEndpoint_ListNodes(t *testing.T) {
t.Fatalf("bad: %#v", resp2.Nodes[0])
}
}
func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
state := s1.fsm.State()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the node
node := mock.Node()
// Node upsert triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpsertNode(2, node); err != nil {
t.Fatalf("err: %v", err)
}
})
req := &structs.NodeListRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: 1,
},
}
start := time.Now()
var resp structs.NodeListResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp)
}
if resp.Index != 2 {
t.Fatalf("Bad index: %d %d", resp.Index, 2)
}
if len(resp.Nodes) != 1 || resp.Nodes[0].ID != node.ID {
t.Fatalf("bad: %#v", resp.Nodes)
}
// Node drain updates trigger watches.
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpdateNodeDrain(3, node.ID, true); err != nil {
t.Fatalf("err: %v", err)
}
})
req.MinQueryIndex = 2
var resp2 structs.NodeListResponse
start = time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp2)
}
if resp2.Index != 3 {
t.Fatalf("Bad index: %d %d", resp2.Index, 3)
}
if len(resp2.Nodes) != 1 || !resp2.Nodes[0].Drain {
t.Fatalf("bad: %#v", resp2.Nodes)
}
// Node status update triggers watches
time.AfterFunc(100*time.Millisecond, func() {
if err := state.UpdateNodeStatus(4, node.ID, structs.NodeStatusDown); err != nil {
t.Fatalf("err: %v", err)
}
})
req.MinQueryIndex = 3
var resp3 structs.NodeListResponse
start = time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp3); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp3)
}
if resp3.Index != 4 {
t.Fatalf("Bad index: %d %d", resp3.Index, 4)
}
if len(resp3.Nodes) != 1 || resp3.Nodes[0].Status != structs.NodeStatusDown {
t.Fatalf("bad: %#v", resp3.Nodes)
}
// Node delete triggers watches.
time.AfterFunc(100*time.Millisecond, func() {
if err := state.DeleteNode(5, node.ID); err != nil {
t.Fatalf("err: %v", err)
}
})
req.MinQueryIndex = 4
var resp4 structs.NodeListResponse
start = time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp4); err != nil {
t.Fatalf("err: %v", err)
}
if elapsed := time.Since(start); elapsed < 100*time.Millisecond {
t.Fatalf("should block (returned in %s) %#v", elapsed, resp4)
}
if resp4.Index != 5 {
t.Fatalf("Bad index: %d %d", resp4.Index, 5)
}
if len(resp4.Nodes) != 0 {
t.Fatalf("bad: %#v", resp4.Nodes)
}
}

View File

@@ -13,6 +13,7 @@ import (
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
"github.com/hashicorp/raft"
"github.com/hashicorp/yamux"
)
@@ -268,10 +269,10 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) {
// blockingOptions is used to parameterize blockingRPC
type blockingOptions struct {
queryOpts *structs.QueryOptions
queryMeta *structs.QueryMeta
allocWatch string
run func() error
queryOpts *structs.QueryOptions
queryMeta *structs.QueryMeta
watch watch.Items
run func() error
}
// blockingRPC is used for queries that need to wait for a
@@ -306,17 +307,13 @@ func (s *Server) blockingRPC(opts *blockingOptions) error {
state = s.fsm.State()
defer func() {
timeout.Stop()
if opts.allocWatch != "" {
state.StopWatchAllocs(opts.allocWatch, notifyCh)
}
state.StopWatch(opts.watch, notifyCh)
}()
REGISTER_NOTIFY:
// Register the notification channel. This may be done
// multiple times if we have not reached the target wait index.
if opts.allocWatch != "" {
state.WatchAllocs(opts.allocWatch, notifyCh)
}
state.Watch(opts.watch, notifyCh)
RUN_QUERY:
// Update the query meta data
@@ -327,7 +324,7 @@ RUN_QUERY:
err := opts.run()
// Check for minimum query time
if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
select {
case <-notifyCh:
goto REGISTER_NOTIFY

View File

@@ -8,8 +8,16 @@ import (
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
)
// IndexEntry is used with the "index" table
// for managing the latest Raft index affecting a table.
type IndexEntry struct {
Key string
Value uint64
}
// The StateStore is responsible for maintaining all the Nomad
// state. It is manipulated by the FSM which maintains consistency
// through the use of Raft. The goals of the StateStore are to provide
@@ -23,45 +31,6 @@ type StateStore struct {
watch *stateWatch
}
// StateSnapshot is used to provide a point-in-time snapshot
type StateSnapshot struct {
StateStore
}
// StateRestore is used to optimize the performance when
// restoring state by only using a single large transaction
// instead of thousands of sub transactions
type StateRestore struct {
txn *memdb.Txn
watch *stateWatch
allocNodes map[string]struct{}
}
// Abort is used to abort the restore operation
func (s *StateRestore) Abort() {
s.txn.Abort()
}
// Commit is used to commit the restore operation
func (s *StateRestore) Commit() {
s.txn.Defer(func() { s.watch.notifyAllocs(s.allocNodes) })
s.txn.Commit()
}
// IndexEntry is used with the "index" table
// for managing the latest Raft index affecting a table.
type IndexEntry struct {
Key string
Value uint64
}
// stateWatch holds shared state for watching updates. This is
// outside of StateStore so it can be shared with snapshots.
type stateWatch struct {
allocs map[string]*NotifyGroup
allocLock sync.Mutex
}
// NewStateStore is used to create a new state store
func NewStateStore(logOutput io.Writer) (*StateStore, error) {
// Create the MemDB
@@ -70,16 +39,11 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) {
return nil, fmt.Errorf("state store setup failed: %v", err)
}
// Create the watch entry
watch := &stateWatch{
allocs: make(map[string]*NotifyGroup),
}
// Create the state store
s := &StateStore{
logger: log.New(logOutput, "", log.LstdFlags),
db: db,
watch: watch,
watch: newStateWatch(),
}
return s, nil
}
@@ -104,55 +68,21 @@ func (s *StateStore) Snapshot() (*StateSnapshot, error) {
func (s *StateStore) Restore() (*StateRestore, error) {
txn := s.db.Txn(true)
r := &StateRestore{
txn: txn,
watch: s.watch,
allocNodes: make(map[string]struct{}),
txn: txn,
watch: s.watch,
items: watch.NewItems(),
}
return r, nil
}
// WatchAllocs is used to subscribe a channel to changes in allocations for a node
func (s *StateStore) WatchAllocs(node string, notify chan struct{}) {
s.watch.allocLock.Lock()
defer s.watch.allocLock.Unlock()
// Check for an existing notify group
if grp, ok := s.watch.allocs[node]; ok {
grp.Wait(notify)
return
}
// Create new notify group
grp := &NotifyGroup{}
grp.Wait(notify)
s.watch.allocs[node] = grp
// Watch subscribes a channel to a set of watch items.
func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
s.watch.watch(items, notify)
}
// StopWatchAllocs is used to unsubscribe a channel from changes in allocations
func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) {
s.watch.allocLock.Lock()
defer s.watch.allocLock.Unlock()
// Check for an existing notify group
if grp, ok := s.watch.allocs[node]; ok {
grp.Clear(notify)
if grp.Empty() {
delete(s.watch.allocs, node)
}
}
}
// notifyAllocs is used to notify any node alloc listeners of a change
func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) {
w.allocLock.Lock()
defer w.allocLock.Unlock()
for node := range nodes {
if grp, ok := w.allocs[node]; ok {
grp.Notify()
delete(w.allocs, node)
}
}
// StopWatch unsubscribes a channel from a set of watch items.
func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
s.watch.stopWatch(items, notify)
}
// UpsertNode is used to register a node or update a node definition
@@ -162,6 +92,10 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "nodes"})
watcher.Add(watch.Item{Node: node.ID})
// Check if the node already exists
existing, err := txn.First("nodes", "id", node.ID)
if err != nil {
@@ -187,6 +121,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -196,6 +131,10 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "nodes"})
watcher.Add(watch.Item{Node: nodeID})
// Lookup the node
existing, err := txn.First("nodes", "id", nodeID)
if err != nil {
@@ -213,6 +152,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -222,6 +162,10 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "nodes"})
watcher.Add(watch.Item{Node: nodeID})
// Lookup the node
existing, err := txn.First("nodes", "id", nodeID)
if err != nil {
@@ -248,6 +192,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -257,6 +202,10 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "nodes"})
watcher.Add(watch.Item{Node: nodeID})
// Lookup the node
existing, err := txn.First("nodes", "id", nodeID)
if err != nil {
@@ -283,6 +232,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -319,6 +269,10 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "jobs"})
watcher.Add(watch.Item{Job: job.ID})
// Check if the job already exists
existing, err := txn.First("jobs", "id", job.ID)
if err != nil {
@@ -342,6 +296,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -351,6 +306,10 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "jobs"})
watcher.Add(watch.Item{Job: jobID})
// Lookup the node
existing, err := txn.First("jobs", "id", jobID)
if err != nil {
@@ -368,6 +327,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error {
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -417,13 +377,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "evals"})
// Do a nested upsert
for _, eval := range evals {
watcher.Add(watch.Item{Eval: eval.ID})
if err := s.nestedUpsertEval(txn, index, eval); err != nil {
return err
}
}
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -459,7 +424,9 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct
func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
txn := s.db.Txn(true)
defer txn.Abort()
nodes := make(map[string]struct{})
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "evals"})
watcher.Add(watch.Item{Table: "allocs"})
for _, eval := range evals {
existing, err := txn.First("evals", "id", eval)
@@ -472,6 +439,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
if err := txn.Delete("evals", existing); err != nil {
return fmt.Errorf("eval delete failed: %v", err)
}
watcher.Add(watch.Item{Eval: eval})
}
for _, alloc := range allocs {
@@ -482,10 +450,14 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
if existing == nil {
continue
}
nodes[existing.(*structs.Allocation).NodeID] = struct{}{}
if err := txn.Delete("allocs", existing); err != nil {
return fmt.Errorf("alloc delete failed: %v", err)
}
realAlloc := existing.(*structs.Allocation)
watcher.Add(watch.Item{Alloc: realAlloc.ID})
watcher.Add(watch.Item{AllocEval: realAlloc.EvalID})
watcher.Add(watch.Item{AllocJob: realAlloc.JobID})
watcher.Add(watch.Item{AllocNode: realAlloc.NodeID})
}
// Update the indexes
@@ -495,7 +467,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e
if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notifyAllocs(nodes) })
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -557,6 +530,13 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
txn := s.db.Txn(true)
defer txn.Abort()
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "allocs"})
watcher.Add(watch.Item{Alloc: alloc.ID})
watcher.Add(watch.Item{AllocEval: alloc.EvalID})
watcher.Add(watch.Item{AllocJob: alloc.JobID})
watcher.Add(watch.Item{AllocNode: alloc.NodeID})
// Look for existing alloc
existing, err := txn.First("allocs", "id", alloc.ID)
if err != nil {
@@ -590,8 +570,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
return fmt.Errorf("index update failed: %v", err)
}
nodes := map[string]struct{}{alloc.NodeID: struct{}{}}
txn.Defer(func() { s.watch.notifyAllocs(nodes) })
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -601,7 +580,9 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati
func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
txn := s.db.Txn(true)
defer txn.Abort()
nodes := make(map[string]struct{})
watcher := watch.NewItems()
watcher.Add(watch.Item{Table: "allocs"})
// Handle the allocations
for _, alloc := range allocs {
@@ -620,10 +601,14 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
alloc.ClientStatus = exist.ClientStatus
alloc.ClientDescription = exist.ClientDescription
}
nodes[alloc.NodeID] = struct{}{}
if err := txn.Insert("allocs", alloc); err != nil {
return fmt.Errorf("alloc insert failed: %v", err)
}
watcher.Add(watch.Item{Alloc: alloc.ID})
watcher.Add(watch.Item{AllocEval: alloc.EvalID})
watcher.Add(watch.Item{AllocJob: alloc.JobID})
watcher.Add(watch.Item{AllocNode: alloc.NodeID})
}
// Update the indexes
@@ -631,7 +616,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er
return fmt.Errorf("index update failed: %v", err)
}
txn.Defer(func() { s.watch.notifyAllocs(nodes) })
txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
}
@@ -753,8 +738,35 @@ func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
return iter, nil
}
// StateSnapshot is used to provide a point-in-time snapshot
type StateSnapshot struct {
StateStore
}
// StateRestore is used to optimize the performance when
// restoring state by only using a single large transaction
// instead of thousands of sub transactions
type StateRestore struct {
txn *memdb.Txn
watch *stateWatch
items watch.Items
}
// Abort is used to abort the restore operation
func (s *StateRestore) Abort() {
s.txn.Abort()
}
// Commit is used to commit the restore operation
func (s *StateRestore) Commit() {
s.txn.Defer(func() { s.watch.notify(s.items) })
s.txn.Commit()
}
// NodeRestore is used to restore a node
func (r *StateRestore) NodeRestore(node *structs.Node) error {
r.items.Add(watch.Item{Table: "nodes"})
r.items.Add(watch.Item{Node: node.ID})
if err := r.txn.Insert("nodes", node); err != nil {
return fmt.Errorf("node insert failed: %v", err)
}
@@ -763,6 +775,8 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error {
// JobRestore is used to restore a job
func (r *StateRestore) JobRestore(job *structs.Job) error {
r.items.Add(watch.Item{Table: "jobs"})
r.items.Add(watch.Item{Job: job.ID})
if err := r.txn.Insert("jobs", job); err != nil {
return fmt.Errorf("job insert failed: %v", err)
}
@@ -771,6 +785,8 @@ func (r *StateRestore) JobRestore(job *structs.Job) error {
// EvalRestore is used to restore an evaluation
func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
r.items.Add(watch.Item{Table: "evals"})
r.items.Add(watch.Item{Eval: eval.ID})
if err := r.txn.Insert("evals", eval); err != nil {
return fmt.Errorf("eval insert failed: %v", err)
}
@@ -779,7 +795,11 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
// AllocRestore is used to restore an allocation
func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
r.allocNodes[alloc.NodeID] = struct{}{}
r.items.Add(watch.Item{Table: "allocs"})
r.items.Add(watch.Item{Alloc: alloc.ID})
r.items.Add(watch.Item{AllocEval: alloc.EvalID})
r.items.Add(watch.Item{AllocJob: alloc.JobID})
r.items.Add(watch.Item{AllocNode: alloc.NodeID})
if err := r.txn.Insert("allocs", alloc); err != nil {
return fmt.Errorf("alloc insert failed: %v", err)
}
@@ -793,3 +813,59 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
}
return nil
}
// stateWatch holds shared state for watching updates. This is
// outside of StateStore so it can be shared with snapshots.
type stateWatch struct {
items map[watch.Item]*NotifyGroup
l sync.Mutex
}
// newStateWatch creates a new stateWatch for change notification.
func newStateWatch() *stateWatch {
return &stateWatch{
items: make(map[watch.Item]*NotifyGroup),
}
}
// watch subscribes a channel to the given watch items.
func (w *stateWatch) watch(items watch.Items, ch chan struct{}) {
w.l.Lock()
defer w.l.Unlock()
for item, _ := range items {
grp, ok := w.items[item]
if !ok {
grp = new(NotifyGroup)
w.items[item] = grp
}
grp.Wait(ch)
}
}
// stopWatch unsubscribes a channel from the given watch items.
func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) {
w.l.Lock()
defer w.l.Unlock()
for item, _ := range items {
if grp, ok := w.items[item]; ok {
grp.Clear(ch)
if grp.Empty() {
delete(w.items, item)
}
}
}
}
// notify is used to fire notifications on the given watch items.
func (w *stateWatch) notify(items watch.Items) {
w.l.Lock()
defer w.l.Unlock()
for wi, _ := range items {
if grp, ok := w.items[wi]; ok {
grp.Notify()
}
}
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/watch"
)
func testStateStore(t *testing.T) *StateStore {
@@ -25,6 +26,11 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
state := testStateStore(t)
node := mock.Node()
notify := setupNotifyTest(
state,
watch.Item{Table: "nodes"},
watch.Item{Node: node.ID})
err := state.UpsertNode(1000, node)
if err != nil {
t.Fatalf("err: %v", err)
@@ -46,12 +52,19 @@ func TestStateStore_UpsertNode_Node(t *testing.T) {
if index != 1000 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_DeleteNode_Node(t *testing.T) {
state := testStateStore(t)
node := mock.Node()
notify := setupNotifyTest(
state,
watch.Item{Table: "nodes"},
watch.Item{Node: node.ID})
err := state.UpsertNode(1000, node)
if err != nil {
t.Fatalf("err: %v", err)
@@ -78,12 +91,19 @@ func TestStateStore_DeleteNode_Node(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
state := testStateStore(t)
node := mock.Node()
notify := setupNotifyTest(
state,
watch.Item{Table: "nodes"},
watch.Item{Node: node.ID})
err := state.UpsertNode(1000, node)
if err != nil {
t.Fatalf("err: %v", err)
@@ -113,12 +133,19 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
state := testStateStore(t)
node := mock.Node()
notify := setupNotifyTest(
state,
watch.Item{Table: "nodes"},
watch.Item{Node: node.ID})
err := state.UpsertNode(1000, node)
if err != nil {
t.Fatalf("err: %v", err)
@@ -148,6 +175,8 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_Nodes(t *testing.T) {
@@ -188,18 +217,22 @@ func TestStateStore_Nodes(t *testing.T) {
func TestStateStore_RestoreNode(t *testing.T) {
state := testStateStore(t)
node := mock.Node()
notify := setupNotifyTest(
state,
watch.Item{Table: "nodes"},
watch.Item{Node: node.ID})
restore, err := state.Restore()
if err != nil {
t.Fatalf("err: %v", err)
}
node := mock.Node()
err = restore.NodeRestore(node)
if err != nil {
t.Fatalf("err: %v", err)
}
restore.Commit()
out, err := state.NodeByID(node.ID)
@@ -210,12 +243,19 @@ func TestStateStore_RestoreNode(t *testing.T) {
if !reflect.DeepEqual(out, node) {
t.Fatalf("Bad: %#v %#v", out, node)
}
notify.verify(t)
}
func TestStateStore_UpsertJob_Job(t *testing.T) {
state := testStateStore(t)
job := mock.Job()
notify := setupNotifyTest(
state,
watch.Item{Table: "jobs"},
watch.Item{Job: job.ID})
err := state.UpsertJob(1000, job)
if err != nil {
t.Fatalf("err: %v", err)
@@ -237,12 +277,19 @@ func TestStateStore_UpsertJob_Job(t *testing.T) {
if index != 1000 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
state := testStateStore(t)
job := mock.Job()
notify := setupNotifyTest(
state,
watch.Item{Table: "jobs"},
watch.Item{Job: job.ID})
err := state.UpsertJob(1000, job)
if err != nil {
t.Fatalf("err: %v", err)
@@ -278,12 +325,19 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_DeleteJob_Job(t *testing.T) {
state := testStateStore(t)
job := mock.Job()
notify := setupNotifyTest(
state,
watch.Item{Table: "jobs"},
watch.Item{Job: job.ID})
err := state.UpsertJob(1000, job)
if err != nil {
t.Fatalf("err: %v", err)
@@ -310,6 +364,8 @@ func TestStateStore_DeleteJob_Job(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_Jobs(t *testing.T) {
@@ -417,18 +473,22 @@ func TestStateStore_JobsByScheduler(t *testing.T) {
func TestStateStore_RestoreJob(t *testing.T) {
state := testStateStore(t)
job := mock.Job()
notify := setupNotifyTest(
state,
watch.Item{Table: "jobs"},
watch.Item{Job: job.ID})
restore, err := state.Restore()
if err != nil {
t.Fatalf("err: %v", err)
}
job := mock.Job()
err = restore.JobRestore(job)
if err != nil {
t.Fatalf("err: %v", err)
}
restore.Commit()
out, err := state.JobByID(job.ID)
@@ -439,6 +499,8 @@ func TestStateStore_RestoreJob(t *testing.T) {
if !reflect.DeepEqual(out, job) {
t.Fatalf("Bad: %#v %#v", out, job)
}
notify.verify(t)
}
func TestStateStore_Indexes(t *testing.T) {
@@ -503,6 +565,11 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
state := testStateStore(t)
eval := mock.Eval()
notify := setupNotifyTest(
state,
watch.Item{Table: "evals"},
watch.Item{Eval: eval.ID})
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
if err != nil {
t.Fatalf("err: %v", err)
@@ -524,6 +591,8 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) {
if index != 1000 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
@@ -535,6 +604,11 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
t.Fatalf("err: %v", err)
}
notify := setupNotifyTest(
state,
watch.Item{Table: "evals"},
watch.Item{Eval: eval.ID})
eval2 := mock.Eval()
eval2.ID = eval.ID
err = state.UpsertEvals(1001, []*structs.Evaluation{eval2})
@@ -565,40 +639,54 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_DeleteEval_Eval(t *testing.T) {
state := testStateStore(t)
eval := mock.Eval()
eval1 := mock.Eval()
eval2 := mock.Eval()
alloc := mock.Alloc()
alloc1 := mock.Alloc()
alloc2 := mock.Alloc()
err := state.UpsertEvals(1000, []*structs.Evaluation{eval, eval2})
notify := setupNotifyTest(
state,
watch.Item{Table: "evals"},
watch.Item{Table: "allocs"},
watch.Item{Eval: eval1.ID},
watch.Item{Eval: eval2.ID},
watch.Item{Alloc: alloc1.ID},
watch.Item{Alloc: alloc2.ID},
watch.Item{AllocEval: alloc1.EvalID},
watch.Item{AllocEval: alloc2.EvalID},
watch.Item{AllocJob: alloc1.JobID},
watch.Item{AllocJob: alloc2.JobID},
watch.Item{AllocNode: alloc1.NodeID},
watch.Item{AllocNode: alloc2.NodeID})
err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2})
if err != nil {
t.Fatalf("err: %v", err)
}
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc1, alloc2})
if err != nil {
t.Fatalf("err: %v", err)
}
notify1 := make(chan struct{}, 1)
state.WatchAllocs(alloc.NodeID, notify1)
err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID})
err = state.DeleteEval(1002, []string{eval1.ID, eval2.ID}, []string{alloc1.ID, alloc2.ID})
if err != nil {
t.Fatalf("err: %v", err)
}
out, err := state.EvalByID(eval.ID)
out, err := state.EvalByID(eval1.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if out != nil {
t.Fatalf("bad: %#v %#v", eval, out)
t.Fatalf("bad: %#v %#v", eval1, out)
}
out, err = state.EvalByID(eval2.ID)
@@ -607,16 +695,16 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
}
if out != nil {
t.Fatalf("bad: %#v %#v", eval, out)
t.Fatalf("bad: %#v %#v", eval1, out)
}
outA, err := state.AllocByID(alloc.ID)
outA, err := state.AllocByID(alloc1.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if out != nil {
t.Fatalf("bad: %#v %#v", alloc, outA)
t.Fatalf("bad: %#v %#v", alloc1, outA)
}
outA, err = state.AllocByID(alloc2.ID)
@@ -625,7 +713,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
}
if out != nil {
t.Fatalf("bad: %#v %#v", alloc, outA)
t.Fatalf("bad: %#v %#v", alloc1, outA)
}
index, err := state.Index("evals")
@@ -644,11 +732,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) {
t.Fatalf("bad: %d", index)
}
select {
case <-notify1:
default:
t.Fatalf("should be notified")
}
notify.verify(t)
}
func TestStateStore_EvalsByJob(t *testing.T) {
@@ -720,34 +804,48 @@ func TestStateStore_Evals(t *testing.T) {
func TestStateStore_RestoreEval(t *testing.T) {
state := testStateStore(t)
eval := mock.Eval()
notify := setupNotifyTest(
state,
watch.Item{Table: "evals"},
watch.Item{Eval: eval.ID})
restore, err := state.Restore()
if err != nil {
t.Fatalf("err: %v", err)
}
job := mock.Eval()
err = restore.EvalRestore(job)
err = restore.EvalRestore(eval)
if err != nil {
t.Fatalf("err: %v", err)
}
restore.Commit()
out, err := state.EvalByID(job.ID)
out, err := state.EvalByID(eval.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if !reflect.DeepEqual(out, job) {
t.Fatalf("Bad: %#v %#v", out, job)
if !reflect.DeepEqual(out, eval) {
t.Fatalf("Bad: %#v %#v", out, eval)
}
notify.verify(t)
}
func TestStateStore_UpdateAllocFromClient(t *testing.T) {
state := testStateStore(t)
alloc := mock.Alloc()
notify := setupNotifyTest(
state,
watch.Item{Table: "allocs"},
watch.Item{Alloc: alloc.ID},
watch.Item{AllocEval: alloc.EvalID},
watch.Item{AllocJob: alloc.JobID},
watch.Item{AllocNode: alloc.NodeID})
err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
if err != nil {
t.Fatalf("err: %v", err)
@@ -779,12 +877,22 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
state := testStateStore(t)
alloc := mock.Alloc()
notify := setupNotifyTest(
state,
watch.Item{Table: "allocs"},
watch.Item{Alloc: alloc.ID},
watch.Item{AllocEval: alloc.EvalID},
watch.Item{AllocJob: alloc.JobID},
watch.Item{AllocNode: alloc.NodeID})
err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
if err != nil {
t.Fatalf("err: %v", err)
@@ -806,35 +914,8 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
if index != 1000 {
t.Fatalf("bad: %d", index)
}
}
func TestStateStore_WatchAllocs(t *testing.T) {
state := testStateStore(t)
notify1 := make(chan struct{}, 1)
notify2 := make(chan struct{}, 1)
state.WatchAllocs("foo", notify1)
state.WatchAllocs("foo", notify2)
state.StopWatchAllocs("foo", notify2)
alloc := mock.Alloc()
alloc.NodeID = "foo"
err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})
if err != nil {
t.Fatalf("err: %v", err)
}
select {
case <-notify1:
default:
t.Fatalf("should be notified")
}
select {
case <-notify2:
t.Fatalf("should not be notified")
default:
}
notify.verify(t)
}
func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
@@ -849,6 +930,15 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
alloc2 := mock.Alloc()
alloc2.ID = alloc.ID
alloc2.NodeID = alloc.NodeID + ".new"
notify := setupNotifyTest(
state,
watch.Item{Table: "allocs"},
watch.Item{Alloc: alloc2.ID},
watch.Item{AllocEval: alloc2.EvalID},
watch.Item{AllocJob: alloc2.JobID},
watch.Item{AllocNode: alloc2.NodeID})
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2})
if err != nil {
t.Fatalf("err: %v", err)
@@ -877,6 +967,8 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) {
if index != 1001 {
t.Fatalf("bad: %d", index)
}
notify.verify(t)
}
func TestStateStore_EvictAlloc_Alloc(t *testing.T) {
@@ -1008,13 +1100,21 @@ func TestStateStore_Allocs(t *testing.T) {
func TestStateStore_RestoreAlloc(t *testing.T) {
state := testStateStore(t)
alloc := mock.Alloc()
notify := setupNotifyTest(
state,
watch.Item{Table: "allocs"},
watch.Item{Alloc: alloc.ID},
watch.Item{AllocEval: alloc.EvalID},
watch.Item{AllocJob: alloc.JobID},
watch.Item{AllocNode: alloc.NodeID})
restore, err := state.Restore()
if err != nil {
t.Fatalf("err: %v", err)
}
alloc := mock.Alloc()
err = restore.AllocRestore(alloc)
if err != nil {
t.Fatalf("err: %v", err)
@@ -1030,6 +1130,87 @@ func TestStateStore_RestoreAlloc(t *testing.T) {
if !reflect.DeepEqual(out, alloc) {
t.Fatalf("Bad: %#v %#v", out, alloc)
}
notify.verify(t)
}
func TestStateWatch_watch(t *testing.T) {
sw := newStateWatch()
notify1 := make(chan struct{}, 1)
notify2 := make(chan struct{}, 1)
notify3 := make(chan struct{}, 1)
// Notifications trigger subscribed channels
sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify1)
sw.watch(watch.NewItems(watch.Item{Table: "bar"}), notify2)
sw.watch(watch.NewItems(watch.Item{Table: "baz"}), notify3)
items := watch.NewItems()
items.Add(watch.Item{Table: "foo"})
items.Add(watch.Item{Table: "bar"})
sw.notify(items)
if len(notify1) != 1 {
t.Fatalf("should notify")
}
if len(notify2) != 1 {
t.Fatalf("should notify")
}
if len(notify3) != 0 {
t.Fatalf("should not notify")
}
}
func TestStateWatch_stopWatch(t *testing.T) {
sw := newStateWatch()
notify := make(chan struct{})
// First subscribe
sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify)
// Unsubscribe stop notifications
sw.stopWatch(watch.NewItems(watch.Item{Table: "foo"}), notify)
// Check that the group was removed
if _, ok := sw.items[watch.Item{Table: "foo"}]; ok {
t.Fatalf("should remove group")
}
// Check that we are not notified
sw.notify(watch.NewItems(watch.Item{Table: "foo"}))
if len(notify) != 0 {
t.Fatalf("should not notify")
}
}
// setupNotifyTest takes a state store and a set of watch items, then creates
// and subscribes a notification channel for each item.
func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest {
var n notifyTest
for _, item := range items {
ch := make(chan struct{}, 1)
state.Watch(watch.NewItems(item), ch)
n = append(n, &notifyTestCase{item, ch})
}
return n
}
// notifyTestCase is used to set up and verify watch triggers.
type notifyTestCase struct {
item watch.Item
ch chan struct{}
}
// notifyTest is a suite of notifyTestCases.
type notifyTest []*notifyTestCase
// verify ensures that each channel received a notification.
func (n notifyTest) verify(t *testing.T) {
for _, tcase := range n {
if len(tcase.ch) != 1 {
t.Fatalf("should notify %#v", tcase.item)
}
}
}
// NodeIDSort is used to sort nodes by ID

View File

@@ -14,8 +14,17 @@ import (
)
var (
ErrNoLeader = fmt.Errorf("No cluster leader")
ErrNoRegionPath = fmt.Errorf("No path to region")
ErrNoLeader = fmt.Errorf("No cluster leader")
ErrNoRegionPath = fmt.Errorf("No path to region")
defaultServiceJobRestartPolicy = RestartPolicy{
Delay: 15 * time.Second,
Attempts: 2,
Interval: 1 * time.Minute,
}
defaultBatchJobRestartPolicy = RestartPolicy{
Delay: 15 * time.Second,
Attempts: 15,
}
)
type MessageType uint8
@@ -898,6 +907,33 @@ func (u *UpdateStrategy) Rolling() bool {
return u.Stagger > 0 && u.MaxParallel > 0
}
// RestartPolicy influences how Nomad restarts Tasks when they
// crash or fail.
type RestartPolicy struct {
Attempts int
Interval time.Duration
Delay time.Duration
}
func (r *RestartPolicy) Validate() error {
if time.Duration(r.Attempts)*r.Delay > r.Interval {
return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
}
return nil
}
func NewRestartPolicy(jobType string) *RestartPolicy {
switch jobType {
case JobTypeService:
rp := defaultServiceJobRestartPolicy
return &rp
case JobTypeBatch:
rp := defaultBatchJobRestartPolicy
return &rp
}
return nil
}
// TaskGroup is an atomic unit of placement. Each task group belongs to
// a job and may contain any number of tasks. A task group support running
// in many replicas using the same configuration..
@@ -913,6 +949,9 @@ type TaskGroup struct {
// all the tasks contained.
Constraints []*Constraint
//RestartPolicy of a TaskGroup
RestartPolicy *RestartPolicy
// Tasks are the collection of tasks that this task group needs to run
Tasks []*Task
@@ -940,6 +979,10 @@ func (tg *TaskGroup) Validate() error {
}
}
if err := tg.RestartPolicy.Validate(); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
// Check for duplicate tasks
tasks := make(map[string]int)
for idx, task := range tg.Tasks {

View File

@@ -1,11 +1,11 @@
package structs
import (
"github.com/hashicorp/go-multierror"
"reflect"
"strings"
"testing"
"github.com/hashicorp/go-multierror"
"time"
)
func TestJob_Validate(t *testing.T) {
@@ -44,11 +44,27 @@ func TestJob_Validate(t *testing.T) {
TaskGroups: []*TaskGroup{
&TaskGroup{
Name: "web",
RestartPolicy: &RestartPolicy{
Interval: 5 * time.Minute,
Delay: 10 * time.Second,
Attempts: 10,
},
},
&TaskGroup{
Name: "web",
RestartPolicy: &RestartPolicy{
Interval: 5 * time.Minute,
Delay: 10 * time.Second,
Attempts: 10,
},
},
&TaskGroup{
RestartPolicy: &RestartPolicy{
Interval: 5 * time.Minute,
Delay: 10 * time.Second,
Attempts: 10,
},
},
&TaskGroup{},
},
}
err = j.Validate()
@@ -65,7 +81,13 @@ func TestJob_Validate(t *testing.T) {
}
func TestTaskGroup_Validate(t *testing.T) {
tg := &TaskGroup{}
tg := &TaskGroup{
RestartPolicy: &RestartPolicy{
Interval: 5 * time.Minute,
Delay: 10 * time.Second,
Attempts: 10,
},
}
err := tg.Validate()
mErr := err.(*multierror.Error)
if !strings.Contains(mErr.Errors[0].Error(), "group name") {
@@ -86,6 +108,11 @@ func TestTaskGroup_Validate(t *testing.T) {
&Task{Name: "web"},
&Task{},
},
RestartPolicy: &RestartPolicy{
Interval: 5 * time.Minute,
Delay: 10 * time.Second,
Attempts: 10,
},
}
err = tg.Validate()
mErr = err.(*multierror.Error)

38
nomad/watch/watch.go Normal file
View File

@@ -0,0 +1,38 @@
package watch
// The watch package provides a means of describing a watch for a blocking
// query. It is exported so it may be shared between Nomad's RPC layer and
// the underlying state store.
// Item describes the scope of a watch. It is used to provide a uniform
// input for subscribe/unsubscribe and notification firing. Specifying
// multiple fields does not place a watch on multiple items. Each Item
// describes exactly one scoped watch.
type Item struct {
Alloc string
AllocEval string
AllocJob string
AllocNode string
Eval string
Job string
Node string
Table string
}
// Items is a helper used to construct a set of watchItems. It deduplicates
// the items as they are added using map keys.
type Items map[Item]struct{}
// NewItems creates a new Items set and adds the given items.
func NewItems(items ...Item) Items {
wi := make(Items)
for _, item := range items {
wi.Add(item)
}
return wi
}
// Add adds an item to the watch set.
func (wi Items) Add(i Item) {
wi[i] = struct{}{}
}

31
nomad/watch/watch_test.go Normal file
View File

@@ -0,0 +1,31 @@
package watch
import (
"testing"
)
func TestWatchItems(t *testing.T) {
// Creates an empty set of items
wi := NewItems()
if len(wi) != 0 {
t.Fatalf("expect 0 items, got: %#v", wi)
}
// Creates a new set of supplied items
wi = NewItems(Item{Table: "foo"})
if len(wi) != 1 {
t.Fatalf("expected 1 item, got: %#v", wi)
}
// Adding items works
wi.Add(Item{Node: "bar"})
if len(wi) != 2 {
t.Fatalf("expected 2 items, got: %#v", wi)
}
// Adding duplicates auto-dedupes
wi.Add(Item{Table: "foo"})
if len(wi) != 2 {
t.Fatalf("expected 2 items, got: %#v", wi)
}
}

View File

@@ -42,7 +42,7 @@ nodes, unless otherwise specified:
as `us-west` and `us-east`. Defaults to `global`.
* `datacenter`: Datacenter of the local agent. All members of a datacenter
should all share a local LAN connection. Defaults to `dc1`.
should share a local LAN connection. Defaults to `dc1`.
* <a id="name">`name`</a>: The name of the local node. This value is used to
identify individual nodes in a given datacenter and must be unique
@@ -103,7 +103,7 @@ nodes, unless otherwise specified:
This can be used to advertise a different address to the peers of a server
node to support more complex network configurations such as NAT. This
configuration is optional, and defaults to the bind address of the specific
network service if it is not provided. This configuration is only appicable
network service if it is not provided. This configuration is only applicable
on server nodes. The value is a map of IP addresses and supports the
following keys:
<br>
@@ -125,13 +125,13 @@ nodes, unless otherwise specified:
* `disable_hostname`: A boolean indicating if gauge values should not be
prefixed with the local hostname.
* `leave_on_interrupt`: Enables gracefully leave when receiving the
* `leave_on_interrupt`: Enables gracefully leaving when receiving the
interrupt signal. By default, the agent will exit forcefully on any signal.
* `leave_on_terminate`: Enables gracefully leave when receiving the
* `leave_on_terminate`: Enables gracefully leaving when receiving the
terminate signal. By default, the agent will exit forcefully on any signal.
* `enable_syslog`: Enables logging to syslog. This option only work on
* `enable_syslog`: Enables logging to syslog. This option only works on
Unix based systems.
* `syslog_facility`: Controls the syslog facility that is used. By default,

View File

@@ -11,7 +11,7 @@ description: |-
Name: `exec`
The `exec` driver is used to simply execute a particular command for a task.
However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
However, unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
primitives of the operating system to limit the tasks access to resources. While
simple, since the `exec` driver can invoke any command, it can be used to call
scripts or other wrappers which provide higher level features.
@@ -28,9 +28,10 @@ must reference it in the `command` as show in the examples below
## Client Requirements
The `exec` driver can run on all supported operating systems but to provide
proper isolation the client must be run as root on non-Windows operating systems.
Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
The `exec` driver can only be run when on Linux and running Nomad as root.
`exec` is limited to this configuration because currently isolation of resources
is only guaranteed on Linux. Further the host must have cgroups mounted properly
in order for the driver to work.
You must specify a `command` to be executed. Optionally you can specify an
`artifact_source` to be downloaded as well. Any `command` is assumed to be present on the
@@ -68,8 +69,5 @@ The `exec` driver will set the following client attributes:
The resource isolation provided varies by the operating system of
the client and the configuration.
On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the
On Linux, Nomad will use cgroups, and a chroot to isolate the
resources of a process and as such the Nomad agent must be run as root.
On Windows, the task driver will just execute the command with no additional
resource isolation.

View File

@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -179,4 +184,3 @@ be specified using the `?region=` query parameter.
</dd>
</dl>

View File

@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -56,4 +61,3 @@ be specified using the `?region=` query parameter.
</dd>
</dl>

View File

@@ -3,7 +3,7 @@ layout: "http"
page_title: "HTTP API: /v1/evaluation"
sidebar_current: "docs-http-eval-"
description: |-
The '/1/evaluation' endpoint is used to query a specific evaluation.
The '/v1/evaluation' endpoint is used to query a specific evaluation.
---
# /v1/evaluation
@@ -17,7 +17,7 @@ be specified using the `?region=` query parameter.
<dl>
<dt>Description</dt>
<dd>
Lists all the evaluations.
Query a specific evaluation.
</dd>
<dt>Method</dt>
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -57,9 +62,6 @@ be specified using the `?region=` query parameter.
</dd>
</dl>
# /v1/evaluation/\<ID\>/allocations
## GET
<dl>
<dt>Description</dt>
<dd>
@@ -77,6 +79,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -102,4 +109,3 @@ be specified using the `?region=` query parameter.
</dd>
</dl>

View File

@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -59,4 +64,3 @@ be specified using the `?region=` query parameter.
</dd>
</dl>

View File

@@ -31,6 +31,7 @@ The API is modeled closely on the underlying data model. Use the links to the le
documentation about specific endpoints. There are also "Agent" APIs which interact with
a specific agent and not the broader cluster used for administration.
<a name="blocking-queries"></a>
## Blocking Queries
Certain endpoints support a feature called a "blocking query." A blocking query

View File

@@ -6,7 +6,7 @@ description: |-
The '/1/job' endpoint is used for CRUD on a single job.
---
# /v1/job/\<ID\>
# /v1/job
The `job` endpoint is used for CRUD on a single job. By default, the agent's local
region is used; another region can be specified using the `?region=` query parameter.
@@ -30,6 +30,11 @@ region is used; another region can be specified using the `?region=` query param
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -136,6 +141,105 @@ region is used; another region can be specified using the `?region=` query param
</dd>
</dl>
<dl>
<dt>Description</dt>
<dd>
Query the allocations belonging to a single job.
</dd>
<dt>Method</dt>
<dd>GET</dd>
<dt>URL</dt>
<dd>`/v1/job/<id>/allocations`</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
```javascript
[
{
"ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
"EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
"Name": "binstore-storagelocker.binsl[0]",
"NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
"JobID": "binstore-storagelocker",
"TaskGroup": "binsl",
"DesiredStatus": "run",
"DesiredDescription": "",
"ClientStatus": "running",
"ClientDescription": "",
"CreateIndex": 16,
"ModifyIndex": 16
},
...
]
```
</dd>
</dl>
<dl>
<dt>Description</dt>
<dd>
Query the evaluations belonging to a single job.
</dd>
<dt>Method</dt>
<dd>GET</dd>
<dt>URL</dt>
<dd>`/v1/job/<id>/evaluations`</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
```javascript
[
{
"ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
"Priority": 50,
"Type": "service",
"TriggeredBy": "job-register",
"JobID": "binstore-storagelocker",
"JobModifyIndex": 14,
"NodeID": "",
"NodeModifyIndex": 0,
"Status": "complete",
"StatusDescription": "",
"Wait": 0,
"NextEval": "",
"PreviousEval": "",
"CreateIndex": 15,
"ModifyIndex": 17
},
...
]
```
</dd>
</dl>
## PUT / POST
<dl>
@@ -177,6 +281,38 @@ region is used; another region can be specified using the `?region=` query param
</dd>
</dl>
<dl>
<dt>Description</dt>
<dd>
Creates a new evaluation for the given job. This can be used to force
run the scheduling logic if necessary.
</dd>
<dt>Method</dt>
<dd>PUT or POST</dd>
<dt>URL</dt>
<dd>`/v1/job/<ID>/evaluate`</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Returns</dt>
<dd>
```javascript
{
"EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
"EvalCreateIndex": 35,
"JobModifyIndex": 34,
}
```
</dd>
</dl>
## DELETE
<dl>
@@ -209,134 +345,3 @@ region is used; another region can be specified using the `?region=` query param
</dd>
</dl>
# /v1/job/\<ID\>/allocations
## GET
<dl>
<dt>Description</dt>
<dd>
Query the allocations belonging to a single job.
</dd>
<dt>Method</dt>
<dd>GET</dd>
<dt>URL</dt>
<dd>`/v1/job/<id>/allocations`</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Returns</dt>
<dd>
```javascript
[
{
"ID": "3575ba9d-7a12-0c96-7b28-add168c67984",
"EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
"Name": "binstore-storagelocker.binsl[0]",
"NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b",
"JobID": "binstore-storagelocker",
"TaskGroup": "binsl",
"DesiredStatus": "run",
"DesiredDescription": "",
"ClientStatus": "running",
"ClientDescription": "",
"CreateIndex": 16,
"ModifyIndex": 16
},
...
]
```
</dd>
</dl>
# /v1/job/\<ID\>/evaluate
## PUT / POST
<dl>
<dt>Description</dt>
<dd>
Creates a new evaluation for the given job. This can be used to force
run the scheduling logic if necessary.
</dd>
<dt>Method</dt>
<dd>PUT or POST</dd>
<dt>URL</dt>
<dd>`/v1/job/<ID>/evaluate`</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Returns</dt>
<dd>
```javascript
{
"EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac",
"EvalCreateIndex": 35,
"JobModifyIndex": 34,
}
```
</dd>
</dl>
# /v1/job/\<ID\>/evaluations
## GET
<dl>
<dt>Description</dt>
<dd>
Query the evaluations belonging to a single job.
</dd>
<dt>Method</dt>
<dd>GET</dd>
<dt>URL</dt>
<dd>`/v1/job/<id>/evaluations`</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Returns</dt>
<dd>
```javascript
[
{
"ID": "151accaa-1ac6-90fe-d427-313e70ccbb88",
"Priority": 50,
"Type": "service",
"TriggeredBy": "job-register",
"JobID": "binstore-storagelocker",
"JobModifyIndex": 14,
"NodeID": "",
"NodeModifyIndex": 0,
"Status": "complete",
"StatusDescription": "",
"Wait": 0,
"NextEval": "",
"PreviousEval": "",
"CreateIndex": 15,
"ModifyIndex": 17
},
...
]
```
</dd>
</dl>

View File

@@ -31,6 +31,11 @@ another region can be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -93,4 +98,3 @@ another region can be specified using the `?region=` query parameter.
</dd>
</dl>

View File

@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -82,9 +87,6 @@ be specified using the `?region=` query parameter.
</dd>
</dl>
# /v1/node/\<ID\>/allocations
## GET
<dl>
<dt>Description</dt>
<dd>
@@ -102,6 +104,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -128,7 +135,6 @@ be specified using the `?region=` query parameter.
</dd>
</dl>
# /v1/node/\<ID\>/evaluate
## PUT / POST
<dl>
@@ -163,9 +169,6 @@ be specified using the `?region=` query parameter.
</dd>
</dl>
# /v1/node/\<ID\>/drain
## PUT / POST
<dl>
<dt>Description</dt>
<dd>
@@ -175,7 +178,7 @@ be specified using the `?region=` query parameter.
</dd>
<dt>Method</dt>
<dd>PUT or POSt</dd>
<dd>PUT or POST</dd>
<dt>URL</dt>
<dd>`/v1/node/<ID>/drain`</dd>
@@ -205,4 +208,3 @@ be specified using the `?region=` query parameter.
</dd>
</dl>

View File

@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter.
None
</dd>
<dt>Blocking Queries</dt>
<dd>
[Supported](/docs/http/index.html#blocking-queries)
</dd>
<dt>Returns</dt>
<dd>
@@ -53,5 +58,3 @@ be specified using the `?region=` query parameter.
</dd>
</dl>