diff --git a/CHANGELOG.md b/CHANGELOG.md index 786380eef..3fb99bfa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## 0.2.0 (Unreleased) + +FEATURES: + + * Blocking queries supported in API [GH-366] + * Add support for downloading external artifacts to execute for Exec, Raw exec drivers [GH-381] + +BACKWARDS INCOMPATIBILITIES: + + * Qemu and Java driver configurations have been updated to both use `artifact_source` as the source for external images/jars to be ran + ## 0.1.2 (October 6, 2015) IMPROVEMENTS: diff --git a/api/compose_test.go b/api/compose_test.go index 68801519f..2a509bc55 100644 --- a/api/compose_test.go +++ b/api/compose_test.go @@ -69,6 +69,7 @@ func TestCompose(t *testing.T) { Operand: "=", }, }, + RestartPolicy: NewRestartPolicy(), Tasks: []*Task{ &Task{ Name: "task1", diff --git a/api/tasks.go b/api/tasks.go index c1d5bf2ff..2535d5ec5 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -1,19 +1,42 @@ package api +import ( + "time" +) + +// RestartPolicy defines how the Nomad client restarts +// tasks in a taskgroup when they fail +type RestartPolicy struct { + Interval time.Duration + Attempts int + Delay time.Duration +} + +func NewRestartPolicy() *RestartPolicy { + return &RestartPolicy{ + Attempts: 10, + Interval: 3 * time.Minute, + Delay: 5 * time.Second, + } +} + // TaskGroup is the unit of scheduling. type TaskGroup struct { - Name string - Count int - Constraints []*Constraint - Tasks []*Task - Meta map[string]string + Name string + Count int + Constraints []*Constraint + Tasks []*Task + RestartPolicy *RestartPolicy + Meta map[string]string } // NewTaskGroup creates a new TaskGroup. func NewTaskGroup(name string, count int) *TaskGroup { + restartPolicy := NewRestartPolicy() return &TaskGroup{ - Name: name, - Count: count, + Name: name, + Count: count, + RestartPolicy: restartPolicy, } } diff --git a/api/tasks_test.go b/api/tasks_test.go index 877f84d5c..945fdf9bf 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -8,8 +8,9 @@ import ( func TestTaskGroup_NewTaskGroup(t *testing.T) { grp := NewTaskGroup("grp1", 2) expect := &TaskGroup{ - Name: "grp1", - Count: 2, + Name: "grp1", + Count: 2, + RestartPolicy: NewRestartPolicy(), } if !reflect.DeepEqual(grp, expect) { t.Fatalf("expect: %#v, got: %#v", expect, grp) diff --git a/client/alloc_runner.go b/client/alloc_runner.go index f41be4558..1504900c1 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -41,9 +41,10 @@ type AllocRunner struct { dirtyCh chan struct{} - ctx *driver.ExecContext - tasks map[string]*TaskRunner - taskLock sync.RWMutex + ctx *driver.ExecContext + tasks map[string]*TaskRunner + RestartPolicy *structs.RestartPolicy + taskLock sync.RWMutex taskStatus map[string]taskStatus taskStatusLock sync.RWMutex @@ -58,9 +59,10 @@ type AllocRunner struct { // allocRunnerState is used to snapshot the state of the alloc runner type allocRunnerState struct { - Alloc *structs.Allocation - TaskStatus map[string]taskStatus - Context *driver.ExecContext + Alloc *structs.Allocation + RestartPolicy *structs.RestartPolicy + TaskStatus map[string]taskStatus + Context *driver.ExecContext } // NewAllocRunner is used to create a new allocation context @@ -95,6 +97,7 @@ func (r *AllocRunner) RestoreState() error { // Restore fields r.alloc = snap.Alloc + r.RestartPolicy = snap.RestartPolicy r.taskStatus = snap.TaskStatus r.ctx = snap.Context @@ -102,7 +105,8 @@ func (r *AllocRunner) RestoreState() error { var mErr multierror.Error for name := range r.taskStatus { task := &structs.Task{Name: name} - tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task) + restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy) + tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, restartTracker) r.tasks[name] = tr if err := tr.RestoreState(); err != nil { r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err) @@ -118,9 +122,10 @@ func (r *AllocRunner) RestoreState() error { func (r *AllocRunner) SaveState() error { r.taskStatusLock.RLock() snap := allocRunnerState{ - Alloc: r.alloc, - TaskStatus: r.taskStatus, - Context: r.ctx, + Alloc: r.alloc, + RestartPolicy: r.RestartPolicy, + TaskStatus: r.taskStatus, + Context: r.ctx, } err := persistState(r.stateFilePath(), &snap) r.taskStatusLock.RUnlock() @@ -279,6 +284,9 @@ func (r *AllocRunner) Run() { return } + // Extract the RestartPolicy from the TG and set it on the alloc + r.RestartPolicy = tg.RestartPolicy + // Create the execution context if r.ctx == nil { allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID)) @@ -287,7 +295,7 @@ func (r *AllocRunner) Run() { r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) return } - r.ctx = driver.NewExecContext(allocDir) + r.ctx = driver.NewExecContext(allocDir, r.alloc.ID) } // Start the task runners @@ -300,8 +308,8 @@ func (r *AllocRunner) Run() { // Merge in the task resources task.Resources = alloc.TaskResources[task.Name] - - tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task) + restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy) + tr := NewTaskRunner(r.logger, r.config, r.setTaskStatus, r.ctx, r.alloc.ID, task, restartTracker) r.tasks[task.Name] = tr go tr.Run() } diff --git a/client/client.go b/client/client.go index 744bad876..029ac3954 100644 --- a/client/client.go +++ b/client/client.go @@ -443,11 +443,33 @@ func (c *Client) fingerprint() error { if applies { applied = append(applied, name) } + p, period := f.Periodic() + if p { + // TODO: If more periodic fingerprinters are added, then + // fingerprintPeriodic should be used to handle all the periodic + // fingerprinters by using a priority queue. + go c.fingerprintPeriodic(name, f, period) + } } c.logger.Printf("[DEBUG] client: applied fingerprints %v", applied) return nil } +// fingerprintPeriodic runs a fingerprinter at the specified duration. +func (c *Client) fingerprintPeriodic(name string, f fingerprint.Fingerprint, d time.Duration) { + c.logger.Printf("[DEBUG] client: periodically fingerprinting %v at duration %v", name, d) + for { + select { + case <-time.After(d): + if _, err := f.Fingerprint(c.config, c.config.Node); err != nil { + c.logger.Printf("[DEBUG] client: periodic fingerprinting for %v failed: %v", name, err) + } + case <-c.shutdownCh: + return + } + } +} + // setupDrivers is used to find the available drivers func (c *Client) setupDrivers() error { var avail []string diff --git a/client/driver/args/args.go b/client/driver/args/args.go index b447a7c99..51793bd8b 100644 --- a/client/driver/args/args.go +++ b/client/driver/args/args.go @@ -27,7 +27,7 @@ func ParseAndReplace(args string, env map[string]string) ([]string, error) { replaced := make([]string, len(parsed)) for i, arg := range parsed { - replaced[i] = replaceEnv(arg, env) + replaced[i] = ReplaceEnv(arg, env) } return replaced, nil @@ -36,7 +36,7 @@ func ParseAndReplace(args string, env map[string]string) ([]string, error) { // replaceEnv takes an arg and replaces all occurences of environment variables. // If the variable is found in the passed map it is replaced, otherwise the // original string is returned. -func replaceEnv(arg string, env map[string]string) string { +func ReplaceEnv(arg string, env map[string]string) string { return envRe.ReplaceAllStringFunc(arg, func(arg string) string { stripped := arg[1:] if stripped[0] == '{' { diff --git a/client/driver/docker.go b/client/driver/docker.go index bbd52a9d8..a2d614a89 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "log" + "net" "path/filepath" "strconv" "strings" @@ -13,11 +14,13 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver/args" + "github.com/hashicorp/nomad/client/fingerprint" "github.com/hashicorp/nomad/nomad/structs" ) type DockerDriver struct { DriverContext + fingerprint.StaticFingerprinter } type dockerPID struct { @@ -37,7 +40,7 @@ type dockerHandle struct { } func NewDockerDriver(ctx *DriverContext) Driver { - return &DockerDriver{*ctx} + return &DockerDriver{DriverContext: *ctx} } // dockerClient creates *docker.Client. In test / dev mode we can use ENV vars @@ -73,6 +76,15 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool return false, nil } + privileged, err := strconv.ParseBool(d.config.ReadDefault("docker.privileged.enabled", "false")) + if err != nil { + return false, fmt.Errorf("Unable to parse docker.privileged.enabled: %s", err) + } + if privileged == true { + d.logger.Printf("[DEBUG] driver.docker: privileged containers enabled. Only enable if needed") + node.Attributes["docker.privileged.enabled"] = "1" + } + _, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) if err != nil { return false, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) @@ -108,8 +120,10 @@ func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Ta } return []string{ - fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocName), - fmt.Sprintf("%s:%s", local, allocdir.TaskLocal), + // "z" and "Z" option is to allocate directory with SELinux label. + fmt.Sprintf("%s:/%s:rw,z", shared, allocdir.SharedAllocName), + // capital "Z" will label with Multi-Category Security (MCS) labels + fmt.Sprintf("%s:/%s:rw,Z", local, allocdir.TaskLocal), }, nil } @@ -166,6 +180,47 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (do d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"]) d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"]) + // set privileged mode + hostPrivileged, err := strconv.ParseBool(d.config.ReadDefault("docker.privileged.enabled", "false")) + if err != nil { + return c, fmt.Errorf("Unable to parse docker.privileged.enabled: %s", err) + } + + if v, ok := task.Config["privileged"]; ok { + taskPrivileged, err := strconv.ParseBool(v) + if err != nil { + return c, fmt.Errorf("Unable to parse boolean value from task config option 'privileged': %v", err) + } + if taskPrivileged && !hostPrivileged { + return c, fmt.Errorf(`Unable to set privileged flag since "docker.privileged.enabled" is false`) + } + + hostConfig.Privileged = taskPrivileged + } + + // set DNS servers + dns, ok := task.Config["dns-servers"] + + if ok && dns != "" { + for _, v := range strings.Split(dns, ",") { + ip := strings.TrimSpace(v) + if net.ParseIP(ip) != nil { + hostConfig.DNS = append(hostConfig.DNS, ip) + } else { + d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip) + } + } + } + + // set DNS search domains + dnsSearch, ok := task.Config["search-domains"] + + if ok && dnsSearch != "" { + for _, v := range strings.Split(dnsSearch, ",") { + hostConfig.DNSSearch = append(hostConfig.DNSSearch, strings.TrimSpace(v)) + } + } + mode, ok := task.Config["network_mode"] if !ok || mode == "" { // docker default @@ -303,8 +358,14 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle Repository: repo, Tag: tag, } - // TODO add auth configuration for private repos - authOptions := docker.AuthConfiguration{} + + authOptions := docker.AuthConfiguration{ + Username: task.Config["auth.username"], + Password: task.Config["auth.password"], + Email: task.Config["auth.email"], + ServerAddress: task.Config["auth.server-address"], + } + err = client.PullImage(pullOptions, authOptions) if err != nil { d.logger.Printf("[ERR] driver.docker: pulling container %s", err) diff --git a/client/driver/driver.go b/client/driver/driver.go index dd4fcf43c..e2739e2b8 100644 --- a/client/driver/driver.go +++ b/client/driver/driver.go @@ -100,11 +100,14 @@ type ExecContext struct { // AllocDir contains information about the alloc directory structure. AllocDir *allocdir.AllocDir + + // Alloc ID + AllocID string } // NewExecContext is used to create a new execution context -func NewExecContext(alloc *allocdir.AllocDir) *ExecContext { - return &ExecContext{AllocDir: alloc} +func NewExecContext(alloc *allocdir.AllocDir, allocID string) *ExecContext { + return &ExecContext{AllocDir: alloc, AllocID: allocID} } // TaskEnvironmentVariables converts exec context and task configuration into a diff --git a/client/driver/driver_test.go b/client/driver/driver_test.go index a6f621455..106eeb96b 100644 --- a/client/driver/driver_test.go +++ b/client/driver/driver_test.go @@ -43,7 +43,7 @@ func testDriverContext(task string) *DriverContext { func testDriverExecContext(task *structs.Task, driverCtx *DriverContext) *ExecContext { allocDir := allocdir.NewAllocDir(filepath.Join(driverCtx.config.AllocDir, structs.GenerateUUID())) allocDir.Build([]*structs.Task{task}) - ctx := NewExecContext(allocDir) + ctx := NewExecContext(allocDir, "dummyAllocId") return ctx } diff --git a/client/driver/exec.go b/client/driver/exec.go index 0324cad68..4de719c46 100644 --- a/client/driver/exec.go +++ b/client/driver/exec.go @@ -2,17 +2,16 @@ package driver import ( "fmt" - "log" - "path" "path/filepath" "runtime" "syscall" "time" - "github.com/hashicorp/go-getter" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/executor" + "github.com/hashicorp/nomad/client/driver/executor" + "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/getter" "github.com/hashicorp/nomad/nomad/structs" ) @@ -20,6 +19,7 @@ import ( // features. type ExecDriver struct { DriverContext + fingerprint.StaticFingerprinter } // execHandle is returned from Start/Open as a handle to the PID @@ -31,12 +31,15 @@ type execHandle struct { // NewExecDriver is used to create a new exec driver func NewExecDriver(ctx *DriverContext) Driver { - return &ExecDriver{*ctx} + return &ExecDriver{DriverContext: *ctx} } func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { - // Only enable if we are root when running on non-windows systems. - if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + // Only enable if we are root on linux. + if runtime.GOOS != "linux" { + d.logger.Printf("[DEBUG] driver.exec: only available on linux, disabling") + return false, nil + } else if syscall.Geteuid() != 0 { d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling") return false, nil } @@ -52,31 +55,24 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, return nil, fmt.Errorf("missing command for exec driver") } + // Create a location to download the artifact. + taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName] + if !ok { + return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) + } + // Check if an artificat is specified and attempt to download it source, ok := task.Config["artifact_source"] if ok && source != "" { // Proceed to download an artifact to be executed. - // We use go-getter to support a variety of protocols, but need to change - // file permissions of the resulted download to be executable - - // Create a location to download the artifact. - taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName] - if !ok { - return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) - } - destDir := filepath.Join(taskDir, allocdir.TaskLocal) - - artifactName := path.Base(source) - artifactFile := filepath.Join(destDir, artifactName) - if err := getter.GetFile(artifactFile, source); err != nil { - return nil, fmt.Errorf("Error downloading artifact for Exec driver: %s", err) - } - - // Add execution permissions to the newly downloaded artifact - if runtime.GOOS != "windows" { - if err := syscall.Chmod(artifactFile, 0755); err != nil { - log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err) - } + _, err := getter.GetArtifact( + filepath.Join(taskDir, allocdir.TaskLocal), + task.Config["artifact_source"], + task.Config["checksum"], + d.logger, + ) + if err != nil { + return nil, err } } diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go index ba8745176..bc8323889 100644 --- a/client/driver/exec_test.go +++ b/client/driver/exec_test.go @@ -5,7 +5,6 @@ import ( "io/ioutil" "path/filepath" "reflect" - "runtime" "testing" "time" @@ -123,18 +122,13 @@ func TestExecDriver_Start_Wait(t *testing.T) { func TestExecDriver_Start_Artifact_basic(t *testing.T) { ctestutils.ExecCompatible(t) - var file string - switch runtime.GOOS { - case "darwin": - file = "hi_darwin_amd64" - default: - file = "hi_linux_amd64" - } + file := "hi_linux_amd64" + checksum := "sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c" task := &structs.Task{ Name: "sleep", Config: map[string]string{ - "artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file), + "artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s?checksum=%s", file, checksum), "command": filepath.Join("$NOMAD_TASK_DIR", file), }, Resources: basicResources, @@ -172,13 +166,7 @@ func TestExecDriver_Start_Artifact_basic(t *testing.T) { func TestExecDriver_Start_Artifact_expanded(t *testing.T) { ctestutils.ExecCompatible(t) - var file string - switch runtime.GOOS { - case "darwin": - file = "hi_darwin_amd64" - default: - file = "hi_linux_amd64" - } + file := "hi_linux_amd64" task := &structs.Task{ Name: "sleep", @@ -306,7 +294,7 @@ func TestExecDriver_Start_Kill_Wait(t *testing.T) { if err == nil { t.Fatal("should err") } - case <-time.After(2 * time.Second): + case <-time.After(8 * time.Second): t.Fatalf("timeout") } } diff --git a/client/executor/exec.go b/client/driver/executor/exec.go similarity index 85% rename from client/executor/exec.go rename to client/driver/executor/exec.go index ca104ca3e..8cf076bab 100644 --- a/client/executor/exec.go +++ b/client/driver/executor/exec.go @@ -70,15 +70,20 @@ type Executor interface { // Command provides access the underlying Cmd struct in case the Executor // interface doesn't expose the functionality you need. - Command() *cmd + Command() *exec.Cmd } // Command is a mirror of exec.Command that returns a platform-specific Executor -func Command(name string, arg ...string) Executor { +func Command(name string, args ...string) Executor { executor := NewExecutor() - cmd := executor.Command() + SetCommand(executor, name, args) + return executor +} + +func SetCommand(e Executor, name string, args []string) { + cmd := e.Command() cmd.Path = name - cmd.Args = append([]string{name}, arg...) + cmd.Args = append([]string{name}, args...) if filepath.Base(name) == name { if lp, err := exec.LookPath(name); err != nil { @@ -87,7 +92,6 @@ func Command(name string, arg ...string) Executor { cmd.Path = lp } } - return executor } // OpenId is similar to executor.Command but will attempt to reopen with the @@ -100,17 +104,3 @@ func OpenId(id string) (Executor, error) { } return executor, nil } - -// Cmd is an extension of exec.Cmd that incorporates functionality for -// re-attaching to processes, dropping priviledges, etc., based on platform- -// specific implementations. -type cmd struct { - exec.Cmd - - // Resources is used to limit CPU and RAM used by the process, by way of - // cgroups or a similar mechanism. - Resources structs.Resources - - // RunAs may be a username or Uid. The implementation will decide how to use it. - RunAs string -} diff --git a/client/driver/executor/exec_basic.go b/client/driver/executor/exec_basic.go new file mode 100644 index 000000000..8f2d6fb2a --- /dev/null +++ b/client/driver/executor/exec_basic.go @@ -0,0 +1,146 @@ +package executor + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + + "github.com/hashicorp/nomad/client/allocdir" + "github.com/hashicorp/nomad/client/driver/args" + "github.com/hashicorp/nomad/client/driver/environment" + "github.com/hashicorp/nomad/client/driver/spawn" + "github.com/hashicorp/nomad/nomad/structs" +) + +// BasicExecutor should work everywhere, and as a result does not include +// any resource restrictions or runas capabilities. +type BasicExecutor struct { + cmd exec.Cmd + spawn *spawn.Spawner + taskName string + taskDir string + allocDir string +} + +// TODO: Have raw_exec use this as well. +func NewBasicExecutor() Executor { + return &BasicExecutor{} +} + +func (e *BasicExecutor) Limit(resources *structs.Resources) error { + if resources == nil { + return errNoResources + } + return nil +} + +func (e *BasicExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error { + taskDir, ok := alloc.TaskDirs[taskName] + if !ok { + fmt.Errorf("Couldn't find task directory for task %v", taskName) + } + e.cmd.Dir = taskDir + + e.taskDir = taskDir + e.taskName = taskName + e.allocDir = alloc.AllocDir + return nil +} + +func (e *BasicExecutor) Start() error { + // Parse the commands arguments and replace instances of Nomad environment + // variables. + envVars, err := environment.ParseFromList(e.cmd.Env) + if err != nil { + return err + } + + e.cmd.Path = args.ReplaceEnv(e.cmd.Path, envVars.Map()) + combined := strings.Join(e.cmd.Args, " ") + parsed, err := args.ParseAndReplace(combined, envVars.Map()) + if err != nil { + return err + } + e.cmd.Args = parsed + + spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status")) + e.spawn = spawn.NewSpawner(spawnState) + e.spawn.SetCommand(&e.cmd) + e.spawn.SetLogs(&spawn.Logs{ + Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)), + Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)), + Stdin: os.DevNull, + }) + + return e.spawn.Spawn(nil) +} + +func (e *BasicExecutor) Open(id string) error { + var spawn spawn.Spawner + dec := json.NewDecoder(strings.NewReader(id)) + if err := dec.Decode(&spawn); err != nil { + return fmt.Errorf("Failed to parse id: %v", err) + } + + // Setup the executor. + e.spawn = &spawn + return nil +} + +func (e *BasicExecutor) Wait() error { + code, err := e.spawn.Wait() + if err != nil { + return err + } + + if code != 0 { + return fmt.Errorf("Task exited with code: %d", code) + } + + return nil +} + +func (e *BasicExecutor) ID() (string, error) { + if e.spawn == nil { + return "", fmt.Errorf("Process was never started") + } + + var buffer bytes.Buffer + enc := json.NewEncoder(&buffer) + if err := enc.Encode(e.spawn); err != nil { + return "", fmt.Errorf("Failed to serialize id: %v", err) + } + + return buffer.String(), nil +} + +func (e *BasicExecutor) Shutdown() error { + proc, err := os.FindProcess(e.spawn.UserPid) + if err != nil { + return fmt.Errorf("Failed to find user processes %v: %v", e.spawn.UserPid, err) + } + + if runtime.GOOS == "windows" { + return proc.Kill() + } + + return proc.Signal(os.Interrupt) +} + +func (e *BasicExecutor) ForceStop() error { + proc, err := os.FindProcess(e.spawn.UserPid) + if err != nil { + return fmt.Errorf("Failed to find user processes %v: %v", e.spawn.UserPid, err) + } + + return proc.Kill() +} + +func (e *BasicExecutor) Command() *exec.Cmd { + return &e.cmd +} diff --git a/client/driver/executor/exec_basic_test.go b/client/driver/executor/exec_basic_test.go new file mode 100644 index 000000000..d9eed49f9 --- /dev/null +++ b/client/driver/executor/exec_basic_test.go @@ -0,0 +1,7 @@ +package executor + +import "testing" + +func TestExecutorBasic(t *testing.T) { + testExecutor(t, NewBasicExecutor, nil) +} diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go new file mode 100644 index 000000000..a7bbdd03c --- /dev/null +++ b/client/driver/executor/exec_linux.go @@ -0,0 +1,423 @@ +package executor + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "os/user" + "path/filepath" + "strconv" + "strings" + "syscall" + + "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/client/allocdir" + "github.com/hashicorp/nomad/client/driver/args" + "github.com/hashicorp/nomad/client/driver/environment" + "github.com/hashicorp/nomad/client/driver/spawn" + "github.com/hashicorp/nomad/nomad/structs" + + "github.com/opencontainers/runc/libcontainer/cgroups" + cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + // A mapping of directories on the host OS to attempt to embed inside each + // task's chroot. + chrootEnv = map[string]string{ + "/bin": "/bin", + "/etc": "/etc", + "/lib": "/lib", + "/lib32": "/lib32", + "/lib64": "/lib64", + "/usr/bin": "/usr/bin", + "/usr/lib": "/usr/lib", + } +) + +func NewExecutor() Executor { + return NewLinuxExecutor() +} + +func NewLinuxExecutor() Executor { + return &LinuxExecutor{} +} + +// Linux executor is designed to run on linux kernel 2.8+. +type LinuxExecutor struct { + cmd exec.Cmd + user *user.User + + // Isolation configurations. + groups *cgroupConfig.Cgroup + taskName string + taskDir string + allocDir string + + // Spawn process. + spawn *spawn.Spawner +} + +func (e *LinuxExecutor) Command() *exec.Cmd { + return &e.cmd +} + +func (e *LinuxExecutor) Limit(resources *structs.Resources) error { + if resources == nil { + return errNoResources + } + + return e.configureCgroups(resources) +} + +// execLinuxID contains the necessary information to reattach to an executed +// process and cleanup the created cgroups. +type ExecLinuxID struct { + Groups *cgroupConfig.Cgroup + Spawn *spawn.Spawner + TaskDir string +} + +func (e *LinuxExecutor) Open(id string) error { + // De-serialize the ID. + dec := json.NewDecoder(strings.NewReader(id)) + var execID ExecLinuxID + if err := dec.Decode(&execID); err != nil { + return fmt.Errorf("Failed to parse id: %v", err) + } + + // Setup the executor. + e.groups = execID.Groups + e.spawn = execID.Spawn + e.taskDir = execID.TaskDir + + return nil +} + +func (e *LinuxExecutor) ID() (string, error) { + if e.groups == nil || e.spawn == nil || e.taskDir == "" { + return "", fmt.Errorf("LinuxExecutor not properly initialized.") + } + + // Build the ID. + id := ExecLinuxID{ + Groups: e.groups, + Spawn: e.spawn, + TaskDir: e.taskDir, + } + + var buffer bytes.Buffer + enc := json.NewEncoder(&buffer) + if err := enc.Encode(id); err != nil { + return "", fmt.Errorf("Failed to serialize id: %v", err) + } + + return buffer.String(), nil +} + +// runAs takes a user id as a string and looks up the user, and sets the command +// to execute as that user. +func (e *LinuxExecutor) runAs(userid string) error { + u, err := user.Lookup(userid) + if err != nil { + return fmt.Errorf("Failed to identify user %v: %v", userid, err) + } + + // Convert the uid and gid + uid, err := strconv.ParseUint(u.Uid, 10, 32) + if err != nil { + return fmt.Errorf("Unable to convert userid to uint32: %s", err) + } + gid, err := strconv.ParseUint(u.Gid, 10, 32) + if err != nil { + return fmt.Errorf("Unable to convert groupid to uint32: %s", err) + } + + // Set the command to run as that user and group. + if e.cmd.SysProcAttr == nil { + e.cmd.SysProcAttr = &syscall.SysProcAttr{} + } + if e.cmd.SysProcAttr.Credential == nil { + e.cmd.SysProcAttr.Credential = &syscall.Credential{} + } + e.cmd.SysProcAttr.Credential.Uid = uint32(uid) + e.cmd.SysProcAttr.Credential.Gid = uint32(gid) + + return nil +} + +func (e *LinuxExecutor) Start() error { + // Run as "nobody" user so we don't leak root privilege to the spawned + // process. + if err := e.runAs("nobody"); err != nil { + return err + } + + // Parse the commands arguments and replace instances of Nomad environment + // variables. + envVars, err := environment.ParseFromList(e.cmd.Env) + if err != nil { + return err + } + + e.cmd.Path = args.ReplaceEnv(e.cmd.Path, envVars.Map()) + combined := strings.Join(e.cmd.Args, " ") + parsed, err := args.ParseAndReplace(combined, envVars.Map()) + if err != nil { + return err + } + e.cmd.Args = parsed + + spawnState := filepath.Join(e.allocDir, fmt.Sprintf("%s_%s", e.taskName, "exit_status")) + e.spawn = spawn.NewSpawner(spawnState) + e.spawn.SetCommand(&e.cmd) + e.spawn.SetChroot(e.taskDir) + e.spawn.SetLogs(&spawn.Logs{ + Stdout: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)), + Stderr: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)), + Stdin: os.DevNull, + }) + + enterCgroup := func(pid int) error { + // Join the spawn-daemon to the cgroup. + manager := e.getCgroupManager(e.groups) + + // Apply will place the spawn dameon into the created cgroups. + if err := manager.Apply(pid); err != nil { + return fmt.Errorf("Failed to join spawn-daemon to the cgroup (%+v): %v", e.groups, err) + } + + return nil + } + + return e.spawn.Spawn(enterCgroup) +} + +// Wait waits til the user process exits and returns an error on non-zero exit +// codes. Wait also cleans up the task directory and created cgroups. +func (e *LinuxExecutor) Wait() error { + errs := new(multierror.Error) + code, err := e.spawn.Wait() + if err != nil { + errs = multierror.Append(errs, err) + } + + if code != 0 { + errs = multierror.Append(errs, fmt.Errorf("Task exited with code: %d", code)) + } + + if err := e.destroyCgroup(); err != nil { + errs = multierror.Append(errs, err) + } + + if err := e.cleanTaskDir(); err != nil { + errs = multierror.Append(errs, err) + } + + return errs.ErrorOrNil() +} + +func (e *LinuxExecutor) Shutdown() error { + return e.ForceStop() +} + +// ForceStop immediately exits the user process and cleans up both the task +// directory and the cgroups. +func (e *LinuxExecutor) ForceStop() error { + errs := new(multierror.Error) + if err := e.destroyCgroup(); err != nil { + errs = multierror.Append(errs, err) + } + + if err := e.cleanTaskDir(); err != nil { + errs = multierror.Append(errs, err) + } + + return errs.ErrorOrNil() +} + +// Task Directory related functions. + +// ConfigureTaskDir creates the necessary directory structure for a proper +// chroot. cleanTaskDir should be called after. +func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error { + e.taskName = taskName + e.allocDir = alloc.AllocDir + + taskDir, ok := alloc.TaskDirs[taskName] + if !ok { + fmt.Errorf("Couldn't find task directory for task %v", taskName) + } + e.taskDir = taskDir + + if err := alloc.MountSharedDir(taskName); err != nil { + return err + } + + if err := alloc.Embed(taskName, chrootEnv); err != nil { + return err + } + + // Mount dev + dev := filepath.Join(taskDir, "dev") + if err := os.Mkdir(dev, 0777); err != nil { + return fmt.Errorf("Mkdir(%v) failed: %v", dev, err) + } + + if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil { + return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err) + } + + // Mount proc + proc := filepath.Join(taskDir, "proc") + if err := os.Mkdir(proc, 0777); err != nil { + return fmt.Errorf("Mkdir(%v) failed: %v", proc, err) + } + + if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil { + return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err) + } + + // Set the tasks AllocDir environment variable. + env, err := environment.ParseFromList(e.cmd.Env) + if err != nil { + return err + } + env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)) + env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)) + e.cmd.Env = env.List() + + return nil +} + +// pathExists is a helper function to check if the path exists. +func (e *LinuxExecutor) pathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + return false + } + } + return true +} + +// cleanTaskDir is an idempotent operation to clean the task directory and +// should be called when tearing down the task. +func (e *LinuxExecutor) cleanTaskDir() error { + // Unmount dev. + errs := new(multierror.Error) + dev := filepath.Join(e.taskDir, "dev") + if e.pathExists(dev) { + if err := syscall.Unmount(dev, 0); err != nil { + errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err)) + } + + if err := os.RemoveAll(dev); err != nil { + errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err)) + } + } + + // Unmount proc. + proc := filepath.Join(e.taskDir, "proc") + if e.pathExists(proc) { + if err := syscall.Unmount(proc, 0); err != nil { + errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err)) + } + + if err := os.RemoveAll(proc); err != nil { + errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err)) + } + } + + return errs.ErrorOrNil() +} + +// Cgroup related functions. + +// configureCgroups converts a Nomad Resources specification into the equivalent +// cgroup configuration. It returns an error if the resources are invalid. +func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error { + e.groups = &cgroupConfig.Cgroup{} + e.groups.Name = structs.GenerateUUID() + + // TODO: verify this is needed for things like network access + e.groups.AllowAllDevices = true + + if resources.MemoryMB > 0 { + // Total amount of memory allowed to consume + e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024) + // Disable swap to avoid issues on the machine + e.groups.MemorySwap = int64(-1) + } + + if resources.CPU < 2 { + return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU) + } + + // Set the relative CPU shares for this cgroup. + e.groups.CpuShares = int64(resources.CPU) + + if resources.IOPS != 0 { + // Validate it is in an acceptable range. + if resources.IOPS < 10 || resources.IOPS > 1000 { + return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) + } + + e.groups.BlkioWeight = uint16(resources.IOPS) + } + + return nil +} + +// destroyCgroup kills all processes in the cgroup and removes the cgroup +// configuration from the host. +func (e *LinuxExecutor) destroyCgroup() error { + if e.groups == nil { + return errors.New("Can't destroy: cgroup configuration empty") + } + + manager := e.getCgroupManager(e.groups) + pids, err := manager.GetPids() + if err != nil { + return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err) + } + + errs := new(multierror.Error) + for _, pid := range pids { + process, err := os.FindProcess(pid) + if err != nil { + multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err)) + continue + } + + if err := process.Kill(); err != nil { + multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err)) + continue + } + } + + // Remove the cgroup. + if err := manager.Destroy(); err != nil { + multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err)) + } + + if len(errs.Errors) != 0 { + return fmt.Errorf("Failed to destroy cgroup: %v", errs) + } + + return nil +} + +// getCgroupManager returns the correct libcontainer cgroup manager. +func (e *LinuxExecutor) getCgroupManager(groups *cgroupConfig.Cgroup) cgroups.Manager { + var manager cgroups.Manager + manager = &cgroupFs.Manager{Cgroups: groups} + if systemd.UseSystemd() { + manager = &systemd.Manager{Cgroups: groups} + } + return manager +} diff --git a/client/driver/executor/exec_linux_test.go b/client/driver/executor/exec_linux_test.go new file mode 100644 index 000000000..c0bd2087a --- /dev/null +++ b/client/driver/executor/exec_linux_test.go @@ -0,0 +1,11 @@ +package executor + +import ( + "testing" + + ctestutil "github.com/hashicorp/nomad/client/testutil" +) + +func TestExecutorLinux(t *testing.T) { + testExecutor(t, NewLinuxExecutor, ctestutil.ExecCompatible) +} diff --git a/client/driver/executor/exec_universal.go b/client/driver/executor/exec_universal.go new file mode 100644 index 000000000..318faea4b --- /dev/null +++ b/client/driver/executor/exec_universal.go @@ -0,0 +1,12 @@ +// +build !linux + +package executor + +func NewExecutor() Executor { + return &UniversalExecutor{BasicExecutor{}} +} + +// UniversalExecutor wraps the BasicExecutor +type UniversalExecutor struct { + BasicExecutor +} diff --git a/client/driver/executor/test_harness.go b/client/driver/executor/test_harness.go new file mode 100644 index 000000000..6eabb556d --- /dev/null +++ b/client/driver/executor/test_harness.go @@ -0,0 +1,231 @@ +package executor + +import ( + "fmt" + "io/ioutil" + "log" + "os" + "path/filepath" + "testing" + "time" + + "github.com/hashicorp/nomad/client/allocdir" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" +) + +var ( + constraint = &structs.Resources{ + CPU: 250, + MemoryMB: 256, + Networks: []*structs.NetworkResource{ + &structs.NetworkResource{ + MBits: 50, + DynamicPorts: []string{"http"}, + }, + }, + } +) + +func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) { + alloc := mock.Alloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + + allocDir := allocdir.NewAllocDir(filepath.Join(os.TempDir(), alloc.ID)) + if err := allocDir.Build([]*structs.Task{task}); err != nil { + log.Panicf("allocDir.Build() failed: %v", err) + } + + return task.Name, allocDir +} + +func testExecutor(t *testing.T, buildExecutor func() Executor, compatible func(*testing.T)) { + if compatible != nil { + compatible(t) + } + + command := func(name string, args ...string) Executor { + b := buildExecutor() + SetCommand(b, name, args) + return b + } + + Executor_Start_Invalid(t, command) + Executor_Start_Wait_Failure_Code(t, command) + Executor_Start_Wait(t, command) + Executor_Start_Kill(t, command) + Executor_Open(t, command, buildExecutor) +} + +type buildExecCommand func(name string, args ...string) Executor + +func Executor_Start_Invalid(t *testing.T, command buildExecCommand) { + invalid := "/bin/foobar" + e := command(invalid, "1") + + if err := e.Limit(constraint); err != nil { + log.Panicf("Limit() failed: %v", err) + } + + task, alloc := mockAllocDir(t) + defer alloc.Destroy() + if err := e.ConfigureTaskDir(task, alloc); err != nil { + log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) + } + + if err := e.Start(); err == nil { + log.Panicf("Start(%v) should have failed", invalid) + } +} + +func Executor_Start_Wait_Failure_Code(t *testing.T, command buildExecCommand) { + e := command("/bin/date", "-invalid") + + if err := e.Limit(constraint); err != nil { + log.Panicf("Limit() failed: %v", err) + } + + task, alloc := mockAllocDir(t) + defer alloc.Destroy() + if err := e.ConfigureTaskDir(task, alloc); err != nil { + log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) + } + + if err := e.Start(); err != nil { + log.Panicf("Start() failed: %v", err) + } + + if err := e.Wait(); err == nil { + log.Panicf("Wait() should have failed") + } +} + +func Executor_Start_Wait(t *testing.T, command buildExecCommand) { + task, alloc := mockAllocDir(t) + defer alloc.Destroy() + + taskDir, ok := alloc.TaskDirs[task] + if !ok { + log.Panicf("No task directory found for task %v", task) + } + + expected := "hello world" + file := filepath.Join(allocdir.TaskLocal, "output.txt") + absFilePath := filepath.Join(taskDir, file) + cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file) + e := command("/bin/bash", "-c", cmd) + + if err := e.Limit(constraint); err != nil { + log.Panicf("Limit() failed: %v", err) + } + + if err := e.ConfigureTaskDir(task, alloc); err != nil { + log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) + } + + if err := e.Start(); err != nil { + log.Panicf("Start() failed: %v", err) + } + + if err := e.Wait(); err != nil { + log.Panicf("Wait() failed: %v", err) + } + + output, err := ioutil.ReadFile(absFilePath) + if err != nil { + log.Panicf("Couldn't read file %v", absFilePath) + } + + act := string(output) + if act != expected { + log.Panicf("Command output incorrectly: want %v; got %v", expected, act) + } +} + +func Executor_Start_Kill(t *testing.T, command buildExecCommand) { + task, alloc := mockAllocDir(t) + defer alloc.Destroy() + + taskDir, ok := alloc.TaskDirs[task] + if !ok { + log.Panicf("No task directory found for task %v", task) + } + + filePath := filepath.Join(taskDir, "output") + e := command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath) + + if err := e.Limit(constraint); err != nil { + log.Panicf("Limit() failed: %v", err) + } + + if err := e.ConfigureTaskDir(task, alloc); err != nil { + log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) + } + + if err := e.Start(); err != nil { + log.Panicf("Start() failed: %v", err) + } + + if err := e.Shutdown(); err != nil { + log.Panicf("Shutdown() failed: %v", err) + } + + time.Sleep(1500 * time.Millisecond) + + // Check that the file doesn't exist. + if _, err := os.Stat(filePath); err == nil { + log.Panicf("Stat(%v) should have failed: task not killed", filePath) + } +} + +func Executor_Open(t *testing.T, command buildExecCommand, newExecutor func() Executor) { + task, alloc := mockAllocDir(t) + defer alloc.Destroy() + + taskDir, ok := alloc.TaskDirs[task] + if !ok { + log.Panicf("No task directory found for task %v", task) + } + + expected := "hello world" + file := filepath.Join(allocdir.TaskLocal, "output.txt") + absFilePath := filepath.Join(taskDir, file) + cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file) + e := command("/bin/bash", "-c", cmd) + + if err := e.Limit(constraint); err != nil { + log.Panicf("Limit() failed: %v", err) + } + + if err := e.ConfigureTaskDir(task, alloc); err != nil { + log.Panicf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) + } + + if err := e.Start(); err != nil { + log.Panicf("Start() failed: %v", err) + } + + id, err := e.ID() + if err != nil { + log.Panicf("ID() failed: %v", err) + } + + e2 := newExecutor() + if err := e2.Open(id); err != nil { + log.Panicf("Open(%v) failed: %v", id, err) + } + + if err := e2.Wait(); err != nil { + log.Panicf("Wait() failed: %v", err) + } + + output, err := ioutil.ReadFile(absFilePath) + if err != nil { + log.Panicf("Couldn't read file %v", absFilePath) + } + + act := string(output) + if act != expected { + log.Panicf("Command output incorrectly: want %v; got %v", expected, act) + } +} diff --git a/client/driver/java.go b/client/driver/java.go index ac2c3c6f3..1aa2c6d3f 100644 --- a/client/driver/java.go +++ b/client/driver/java.go @@ -4,17 +4,17 @@ import ( "bytes" "fmt" "os/exec" - "path" "path/filepath" "runtime" "strings" "syscall" "time" - "github.com/hashicorp/go-getter" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/executor" + "github.com/hashicorp/nomad/client/driver/executor" + "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/getter" "github.com/hashicorp/nomad/nomad/structs" ) @@ -22,6 +22,7 @@ import ( // It literally just fork/execs tasks with the java command. type JavaDriver struct { DriverContext + fingerprint.StaticFingerprinter } // javaHandle is returned from Start/Open as a handle to the PID @@ -33,13 +34,13 @@ type javaHandle struct { // NewJavaDriver is used to create a new exec driver func NewJavaDriver(ctx *DriverContext) Driver { - return &JavaDriver{*ctx} + return &JavaDriver{DriverContext: *ctx} } func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { // Only enable if we are root when running on non-windows systems. - if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { - d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling") + if runtime.GOOS == "linux" && syscall.Geteuid() != 0 { + d.logger.Printf("[DEBUG] driver.java: must run as root user on linux, disabling") return false, nil } @@ -89,26 +90,24 @@ func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, } func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { - // Get the jar source - source, ok := task.Config["jar_source"] - if !ok || source == "" { - return nil, fmt.Errorf("missing jar source for Java Jar driver") - } - taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName] if !ok { return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) } - destDir := filepath.Join(taskDir, allocdir.TaskLocal) - - // Create a location to download the binary. - jarName := path.Base(source) - jarPath := filepath.Join(destDir, jarName) - if err := getter.GetFile(jarPath, source); err != nil { - return nil, fmt.Errorf("Error downloading source for Java driver: %s", err) + // Proceed to download an artifact to be executed. + path, err := getter.GetArtifact( + filepath.Join(taskDir, allocdir.TaskLocal), + task.Config["artifact_source"], + task.Config["checksum"], + d.logger, + ) + if err != nil { + return nil, err } + jarName := filepath.Base(path) + // Get the environment variables. envVars := TaskEnvironmentVariables(ctx, task) diff --git a/client/driver/java_test.go b/client/driver/java_test.go index ad8f5e578..206cc1c78 100644 --- a/client/driver/java_test.go +++ b/client/driver/java_test.go @@ -19,7 +19,7 @@ func javaLocated() bool { // The fingerprinter test should always pass, even if Java is not installed. func TestJavaDriver_Fingerprint(t *testing.T) { - ctestutils.ExecCompatible(t) + ctestutils.JavaCompatible(t) d := NewJavaDriver(testDriverContext("")) node := &structs.Node{ Attributes: make(map[string]string), @@ -93,14 +93,13 @@ func TestJavaDriver_Start_Wait(t *testing.T) { t.Skip("Java not found; skipping") } - ctestutils.ExecCompatible(t) + ctestutils.JavaCompatible(t) task := &structs.Task{ Name: "demo-app", Config: map[string]string{ - "jar_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar", - // "jar_source": "https://s3-us-west-2.amazonaws.com/java-jar-thing/demoapp.jar", - // "args": "-d64", - "jvm_options": "-Xmx2048m -Xms256m", + "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar", + "jvm_options": "-Xmx2048m -Xms256m", + "checksum": "sha256:58d6e8130308d32e197c5108edd4f56ddf1417408f743097c2e662df0f0b17c8", }, Resources: basicResources, } @@ -141,13 +140,11 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) { t.Skip("Java not found; skipping") } - ctestutils.ExecCompatible(t) + ctestutils.JavaCompatible(t) task := &structs.Task{ Name: "demo-app", Config: map[string]string{ - "jar_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar", - // "jar_source": "https://s3-us-west-2.amazonaws.com/java-jar-thing/demoapp.jar", - // "args": "-d64", + "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar", }, Resources: basicResources, } @@ -179,7 +176,7 @@ func TestJavaDriver_Start_Kill_Wait(t *testing.T) { if err == nil { t.Fatal("should err") } - case <-time.After(2 * time.Second): + case <-time.After(8 * time.Second): t.Fatalf("timeout") } diff --git a/client/driver/qemu.go b/client/driver/qemu.go index abf6d4dfa..79193a217 100644 --- a/client/driver/qemu.go +++ b/client/driver/qemu.go @@ -1,14 +1,7 @@ package driver import ( - "bytes" - "crypto/sha256" - "encoding/hex" - "encoding/json" "fmt" - "io" - "log" - "os" "os/exec" "path/filepath" "regexp" @@ -17,9 +10,11 @@ import ( "strings" "time" - "github.com/hashicorp/go-getter" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/driver/executor" + "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/getter" "github.com/hashicorp/nomad/nomad/structs" ) @@ -32,26 +27,19 @@ var ( // planned in the future type QemuDriver struct { DriverContext + fingerprint.StaticFingerprinter } // qemuHandle is returned from Start/Open as a handle to the PID type qemuHandle struct { - proc *os.Process - vmID string + cmd executor.Executor waitCh chan error doneCh chan struct{} } -// qemuPID is a struct to map the pid running the process to the vm image on -// disk -type qemuPID struct { - Pid int - VmID string -} - // NewQemuDriver is used to create a new exec driver func NewQemuDriver(ctx *DriverContext) Driver { - return &QemuDriver{*ctx} + return &QemuDriver{DriverContext: *ctx} } func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { @@ -82,7 +70,7 @@ func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, // image and save it to the Drivers Allocation Dir func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { // Get the image source - source, ok := task.Config["image_source"] + source, ok := task.Config["artifact_source"] if !ok || source == "" { return nil, fmt.Errorf("Missing source image Qemu driver") } @@ -99,34 +87,18 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) } - // Create a location to download the binary. - destDir := filepath.Join(taskDir, allocdir.TaskLocal) - vmID := fmt.Sprintf("qemu-vm-%s-%s", structs.GenerateUUID(), filepath.Base(source)) - vmPath := filepath.Join(destDir, vmID) - if err := getter.GetFile(vmPath, source); err != nil { - return nil, fmt.Errorf("Error downloading artifact for Qemu driver: %s", err) + // Proceed to download an artifact to be executed. + vmPath, err := getter.GetArtifact( + filepath.Join(taskDir, allocdir.TaskLocal), + task.Config["artifact_source"], + task.Config["checksum"], + d.logger, + ) + if err != nil { + return nil, err } - // compute and check checksum - if check, ok := task.Config["checksum"]; ok { - d.logger.Printf("[DEBUG] Running checksum on (%s)", vmID) - hasher := sha256.New() - file, err := os.Open(vmPath) - if err != nil { - return nil, fmt.Errorf("Failed to open file for checksum") - } - - defer file.Close() - io.Copy(hasher, file) - - sum := hex.EncodeToString(hasher.Sum(nil)) - if sum != check { - return nil, fmt.Errorf( - "Error in Qemu: checksums did not match.\nExpected (%s), got (%s)", - check, - sum) - } - } + vmID := filepath.Base(vmPath) // Parse configuration arguments // Create the base arguments @@ -201,25 +173,25 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, ) } - // Start Qemu - var outBuf, errBuf bytes.Buffer - cmd := exec.Command(args[0], args[1:]...) - cmd.Stdout = &outBuf - cmd.Stderr = &errBuf + // Setup the command + cmd := executor.Command(args[0], args[1:]...) + if err := cmd.Limit(task.Resources); err != nil { + return nil, fmt.Errorf("failed to constrain resources: %s", err) + } + + if err := cmd.ConfigureTaskDir(d.taskName, ctx.AllocDir); err != nil { + return nil, fmt.Errorf("failed to configure task directory: %v", err) + } d.logger.Printf("[DEBUG] Starting QemuVM command: %q", strings.Join(args, " ")) if err := cmd.Start(); err != nil { - return nil, fmt.Errorf( - "Error running QEMU: %s\n\nOutput: %s\n\nError: %s", - err, outBuf.String(), errBuf.String()) + return nil, fmt.Errorf("failed to start command: %v", err) } - d.logger.Printf("[INFO] Started new QemuVM: %s", vmID) // Create and Return Handle h := &qemuHandle{ - proc: cmd.Process, - vmID: vmPath, + cmd: cmd, doneCh: make(chan struct{}), waitCh: make(chan error, 1), } @@ -229,42 +201,25 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, } func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { - // Parse the handle - pidBytes := []byte(strings.TrimPrefix(handleID, "QEMU:")) - qpid := &qemuPID{} - if err := json.Unmarshal(pidBytes, qpid); err != nil { - return nil, fmt.Errorf("failed to parse Qemu handle '%s': %v", handleID, err) - } - // Find the process - proc, err := os.FindProcess(qpid.Pid) - if proc == nil || err != nil { - return nil, fmt.Errorf("failed to find Qemu PID %d: %v", qpid.Pid, err) + cmd, err := executor.OpenId(handleID) + if err != nil { + return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err) } // Return a driver handle - h := &qemuHandle{ - proc: proc, - vmID: qpid.VmID, + h := &execHandle{ + cmd: cmd, doneCh: make(chan struct{}), waitCh: make(chan error, 1), } - go h.run() return h, nil } func (h *qemuHandle) ID() string { - // Return a handle to the PID - pid := &qemuPID{ - Pid: h.proc.Pid, - VmID: h.vmID, - } - data, err := json.Marshal(pid) - if err != nil { - log.Printf("[ERR] failed to marshal Qemu PID to JSON: %s", err) - } - return fmt.Sprintf("QEMU:%s", string(data)) + id, _ := h.cmd.ID() + return id } func (h *qemuHandle) WaitCh() chan error { @@ -276,28 +231,23 @@ func (h *qemuHandle) Update(task *structs.Task) error { return nil } -// Kill is used to terminate the task. We send an Interrupt -// and then provide a 5 second grace period before doing a Kill. -// // TODO: allow a 'shutdown_command' that can be executed over a ssh connection // to the VM func (h *qemuHandle) Kill() error { - h.proc.Signal(os.Interrupt) + h.cmd.Shutdown() select { case <-h.doneCh: return nil case <-time.After(5 * time.Second): - return h.proc.Kill() + return h.cmd.ForceStop() } } func (h *qemuHandle) run() { - ps, err := h.proc.Wait() + err := h.cmd.Wait() close(h.doneCh) if err != nil { h.waitCh <- err - } else if !ps.Success() { - h.waitCh <- fmt.Errorf("task exited with error") } close(h.waitCh) } diff --git a/client/driver/qemu_test.go b/client/driver/qemu_test.go index e9a9e5744..0ab60f86d 100644 --- a/client/driver/qemu_test.go +++ b/client/driver/qemu_test.go @@ -2,7 +2,6 @@ package driver import ( "fmt" - "os" "testing" "github.com/hashicorp/nomad/client/config" @@ -11,21 +10,6 @@ import ( ctestutils "github.com/hashicorp/nomad/client/testutil" ) -func TestQemuDriver_Handle(t *testing.T) { - h := &qemuHandle{ - proc: &os.Process{Pid: 123}, - vmID: "vmid", - doneCh: make(chan struct{}), - waitCh: make(chan error, 1), - } - - actual := h.ID() - expected := `QEMU:{"Pid":123,"VmID":"vmid"}` - if actual != expected { - t.Errorf("Expected `%s`, found `%s`", expected, actual) - } -} - // The fingerprinter test should always pass, even if QEMU is not installed. func TestQemuDriver_Fingerprint(t *testing.T) { ctestutils.QemuCompatible(t) @@ -48,18 +32,19 @@ func TestQemuDriver_Fingerprint(t *testing.T) { } } -func TestQemuDriver_Start(t *testing.T) { +func TestQemuDriver_StartOpen_Wait(t *testing.T) { ctestutils.QemuCompatible(t) // TODO: use test server to load from a fixture task := &structs.Task{ Name: "linux", Config: map[string]string{ - "image_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img", - "checksum": "a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544", - "accelerator": "tcg", - "guest_ports": "22,8080", + "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img", + "checksum": "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544", + "accelerator": "tcg", + "guest_ports": "22,8080", }, Resources: &structs.Resources{ + CPU: 500, MemoryMB: 512, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ @@ -103,11 +88,11 @@ func TestQemuDriver_RequiresMemory(t *testing.T) { task := &structs.Task{ Name: "linux", Config: map[string]string{ - "image_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img", - "accelerator": "tcg", - "host_port": "8080", - "guest_port": "8081", - "checksum": "a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544", + "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img", + "accelerator": "tcg", + "host_port": "8080", + "guest_port": "8081", + "checksum": "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544", // ssh u/p would be here }, } diff --git a/client/driver/raw_exec.go b/client/driver/raw_exec.go index fd54e1b86..12e99b7f4 100644 --- a/client/driver/raw_exec.go +++ b/client/driver/raw_exec.go @@ -2,31 +2,21 @@ package driver import ( "fmt" - "log" - "os" - "os/exec" - "path" "path/filepath" - "runtime" "strconv" - "strings" - "syscall" "time" - "github.com/hashicorp/go-getter" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/driver/args" + "github.com/hashicorp/nomad/client/driver/executor" + "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/getter" "github.com/hashicorp/nomad/nomad/structs" ) const ( // The option that enables this driver in the Config.Options map. rawExecConfigOption = "driver.raw_exec.enable" - - // Null files to use as stdin. - unixNull = "/dev/null" - windowsNull = "nul" ) // The RawExecDriver is a privileged version of the exec driver. It provides no @@ -34,18 +24,19 @@ const ( // and this should only be used when explicitly needed. type RawExecDriver struct { DriverContext + fingerprint.StaticFingerprinter } // rawExecHandle is returned from Start/Open as a handle to the PID type rawExecHandle struct { - proc *os.Process + cmd executor.Executor waitCh chan error doneCh chan struct{} } // NewRawExecDriver is used to create a new raw exec driver func NewRawExecDriver(ctx *DriverContext) Driver { - return &RawExecDriver{*ctx} + return &RawExecDriver{DriverContext: *ctx} } func (d *RawExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { @@ -71,7 +62,6 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl if !ok { return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) } - taskLocal := filepath.Join(taskDir, allocdir.TaskLocal) // Get the command to be ran command, ok := task.Config["command"] @@ -83,88 +73,47 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl source, ok := task.Config["artifact_source"] if ok && source != "" { // Proceed to download an artifact to be executed. - // We use go-getter to support a variety of protocols, but need to change - // file permissions of the resulted download to be executable - - // Create a location to download the artifact. - destDir := filepath.Join(taskDir, allocdir.TaskLocal) - - artifactName := path.Base(source) - artifactFile := filepath.Join(destDir, artifactName) - if err := getter.GetFile(artifactFile, source); err != nil { - return nil, fmt.Errorf("Error downloading artifact for Raw Exec driver: %s", err) - } - - // Add execution permissions to the newly downloaded artifact - if runtime.GOOS != "windows" { - if err := syscall.Chmod(artifactFile, 0755); err != nil { - log.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err) - } + _, err := getter.GetArtifact( + filepath.Join(taskDir, allocdir.TaskLocal), + task.Config["artifact_source"], + task.Config["checksum"], + d.logger, + ) + if err != nil { + return nil, err } } // Get the environment variables. envVars := TaskEnvironmentVariables(ctx, task) - // expand NOMAD_TASK_DIR - parsedPath, err := args.ParseAndReplace(command, envVars.Map()) - if err != nil { - return nil, fmt.Errorf("failure to parse arguments in command path: %v", command) - } else if len(parsedPath) != 1 { - return nil, fmt.Errorf("couldn't properly parse command path: %v", command) - } - - cm := parsedPath[0] - // Look for arguments - var cmdArgs []string + var args []string if argRaw, ok := task.Config["args"]; ok { - parsed, err := args.ParseAndReplace(argRaw, envVars.Map()) - if err != nil { - return nil, err - } - cmdArgs = append(cmdArgs, parsed...) + args = append(args, argRaw) } // Setup the command - cmd := exec.Command(cm, cmdArgs...) - cmd.Dir = taskDir - cmd.Env = envVars.List() - - // Capture the stdout/stderr and redirect stdin to /dev/null - stdoutFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stdout", taskName)) - stderrFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stderr", taskName)) - stdinFilename := unixNull - if runtime.GOOS == "windows" { - stdinFilename = windowsNull + cmd := executor.NewBasicExecutor() + executor.SetCommand(cmd, command, args) + if err := cmd.Limit(task.Resources); err != nil { + return nil, fmt.Errorf("failed to constrain resources: %s", err) } - stdo, err := os.OpenFile(stdoutFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) - if err != nil { - return nil, fmt.Errorf("Error opening file to redirect stdout: %v", err) - } + // Populate environment variables + cmd.Command().Env = envVars.List() - stde, err := os.OpenFile(stderrFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) - if err != nil { - return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err) + if err := cmd.ConfigureTaskDir(d.taskName, ctx.AllocDir); err != nil { + return nil, fmt.Errorf("failed to configure task directory: %v", err) } - stdi, err := os.OpenFile(stdinFilename, os.O_CREATE|os.O_RDONLY, 0666) - if err != nil { - return nil, fmt.Errorf("Error opening file to redirect stdin: %v", err) - } - - cmd.Stdout = stdo - cmd.Stderr = stde - cmd.Stdin = stdi - if err := cmd.Start(); err != nil { return nil, fmt.Errorf("failed to start command: %v", err) } // Return a driver handle - h := &rawExecHandle{ - proc: cmd.Process, + h := &execHandle{ + cmd: cmd, doneCh: make(chan struct{}), waitCh: make(chan error, 1), } @@ -173,22 +122,15 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl } func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { - // Split the handle - pidStr := strings.TrimPrefix(handleID, "PID:") - pid, err := strconv.Atoi(pidStr) - if err != nil { - return nil, fmt.Errorf("failed to parse handle '%s': %v", handleID, err) - } - // Find the process - proc, err := os.FindProcess(pid) - if proc == nil || err != nil { - return nil, fmt.Errorf("failed to find PID %d: %v", pid, err) + cmd := executor.NewBasicExecutor() + if err := cmd.Open(handleID); err != nil { + return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err) } // Return a driver handle - h := &rawExecHandle{ - proc: proc, + h := &execHandle{ + cmd: cmd, doneCh: make(chan struct{}), waitCh: make(chan error, 1), } @@ -197,8 +139,8 @@ func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, e } func (h *rawExecHandle) ID() string { - // Return a handle to the PID - return fmt.Sprintf("PID:%d", h.proc.Pid) + id, _ := h.cmd.ID() + return id } func (h *rawExecHandle) WaitCh() chan error { @@ -210,30 +152,21 @@ func (h *rawExecHandle) Update(task *structs.Task) error { return nil } -// Kill is used to terminate the task. We send an Interrupt -// and then provide a 5 second grace period before doing a Kill on supported -// OS's, otherwise we kill immediately. func (h *rawExecHandle) Kill() error { - if runtime.GOOS == "windows" { - return h.proc.Kill() - } - - h.proc.Signal(os.Interrupt) + h.cmd.Shutdown() select { case <-h.doneCh: return nil case <-time.After(5 * time.Second): - return h.proc.Kill() + return h.cmd.ForceStop() } } func (h *rawExecHandle) run() { - ps, err := h.proc.Wait() + err := h.cmd.Wait() close(h.doneCh) if err != nil { h.waitCh <- err - } else if !ps.Success() { - h.waitCh <- fmt.Errorf("task exited with error") } close(h.waitCh) } diff --git a/client/driver/raw_exec_test.go b/client/driver/raw_exec_test.go index 0a6133df9..053f29337 100644 --- a/client/driver/raw_exec_test.go +++ b/client/driver/raw_exec_test.go @@ -55,6 +55,7 @@ func TestRawExecDriver_StartOpen_Wait(t *testing.T) { "command": "/bin/sleep", "args": "1", }, + Resources: basicResources, } driverCtx := testDriverContext(task.Name) ctx := testDriverExecContext(task, driverCtx) @@ -84,22 +85,17 @@ func TestRawExecDriver_StartOpen_Wait(t *testing.T) { case <-time.After(2 * time.Second): t.Fatalf("timeout") } - - // Check they are both tracking the same PID. - pid1 := handle.(*rawExecHandle).proc.Pid - pid2 := handle2.(*rawExecHandle).proc.Pid - if pid1 != pid2 { - t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2) - } } func TestRawExecDriver_Start_Artifact_basic(t *testing.T) { - var file string + var file, checksum string switch runtime.GOOS { case "darwin": file = "hi_darwin_amd64" + checksum = "md5:d7f2fdb13b36dcb7407721d78926b335" default: file = "hi_linux_amd64" + checksum = "md5:a9b14903a8942748e4f8474e11f795d3" } task := &structs.Task{ @@ -107,7 +103,9 @@ func TestRawExecDriver_Start_Artifact_basic(t *testing.T) { Config: map[string]string{ "artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file), "command": filepath.Join("$NOMAD_TASK_DIR", file), + "checksum": checksum, }, + Resources: basicResources, } driverCtx := testDriverContext(task.Name) ctx := testDriverExecContext(task, driverCtx) @@ -137,13 +135,6 @@ func TestRawExecDriver_Start_Artifact_basic(t *testing.T) { case <-time.After(5 * time.Second): t.Fatalf("timeout") } - - // Check they are both tracking the same PID. - pid1 := handle.(*rawExecHandle).proc.Pid - pid2 := handle2.(*rawExecHandle).proc.Pid - if pid1 != pid2 { - t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2) - } } func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) { @@ -162,6 +153,7 @@ func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) { "command": "/bin/bash", "args": fmt.Sprintf("-c '/bin/sleep 1 && %s'", filepath.Join("$NOMAD_TASK_DIR", file)), }, + Resources: basicResources, } driverCtx := testDriverContext(task.Name) ctx := testDriverExecContext(task, driverCtx) @@ -191,13 +183,6 @@ func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) { case <-time.After(5 * time.Second): t.Fatalf("timeout") } - - // Check they are both tracking the same PID. - pid1 := handle.(*rawExecHandle).proc.Pid - pid2 := handle2.(*rawExecHandle).proc.Pid - if pid1 != pid2 { - t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2) - } } func TestRawExecDriver_Start_Wait(t *testing.T) { @@ -207,6 +192,7 @@ func TestRawExecDriver_Start_Wait(t *testing.T) { "command": "/bin/sleep", "args": "1", }, + Resources: basicResources, } driverCtx := testDriverContext(task.Name) @@ -248,6 +234,7 @@ func TestRawExecDriver_Start_Wait_AllocDir(t *testing.T) { "command": "/bin/bash", "args": fmt.Sprintf(`-c "sleep 1; echo -n %s > $%s/%s"`, string(exp), environment.AllocDir, file), }, + Resources: basicResources, } driverCtx := testDriverContext(task.Name) @@ -292,6 +279,7 @@ func TestRawExecDriver_Start_Kill_Wait(t *testing.T) { "command": "/bin/sleep", "args": "1", }, + Resources: basicResources, } driverCtx := testDriverContext(task.Name) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 456e4e02b..3f1912531 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver/args" + "github.com/hashicorp/nomad/client/fingerprint" "github.com/hashicorp/nomad/nomad/structs" ) @@ -30,6 +31,7 @@ var ( // planned in the future type RktDriver struct { DriverContext + fingerprint.StaticFingerprinter } // rktHandle is returned from Start/Open as a handle to the PID @@ -50,7 +52,7 @@ type rktPID struct { // NewRktDriver is used to create a new exec driver func NewRktDriver(ctx *DriverContext) Driver { - return &RktDriver{*ctx} + return &RktDriver{DriverContext: *ctx} } func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { diff --git a/client/driver/spawn/spawn.go b/client/driver/spawn/spawn.go new file mode 100644 index 000000000..b962a1ab4 --- /dev/null +++ b/client/driver/spawn/spawn.go @@ -0,0 +1,287 @@ +package spawn + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "time" + + "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/command" + "github.com/hashicorp/nomad/helper/discover" +) + +// Spawner is used to start a user command in an isolated fashion that is +// resistent to Nomad agent failure. +type Spawner struct { + spawn *os.Process + SpawnPid int + SpawnPpid int + StateFile string + UserPid int + + // User configuration + UserCmd *exec.Cmd + Logs *Logs + Chroot string +} + +// Logs is used to define the filepaths the user command's logs should be +// redirected to. The files do not need to exist. +type Logs struct { + Stdin, Stdout, Stderr string +} + +// NewSpawner takes a path to a state file. This state file can be used to +// create a new Spawner that can be used to wait on the exit status of a +// process even through Nomad restarts. +func NewSpawner(stateFile string) *Spawner { + return &Spawner{StateFile: stateFile} +} + +// SetCommand sets the user command to spawn. +func (s *Spawner) SetCommand(cmd *exec.Cmd) { + s.UserCmd = cmd +} + +// SetLogs sets the redirection of user command log files. +func (s *Spawner) SetLogs(l *Logs) { + s.Logs = l +} + +// SetChroot puts the user command into a chroot. +func (s *Spawner) SetChroot(root string) { + s.Chroot = root +} + +// Spawn does a double-fork to start and isolate the user command. It takes a +// call-back that is invoked with the pid of the intermediary process. If the +// call back returns an error, the user command is not started and the spawn is +// cancelled. This can be used to put the process into a cgroup or jail and +// cancel starting the user process if that was not successful. An error is +// returned if the call-back returns an error or the user-command couldn't be +// started. +func (s *Spawner) Spawn(cb func(pid int) error) error { + bin, err := discover.NomadExecutable() + if err != nil { + return fmt.Errorf("Failed to determine the nomad executable: %v", err) + } + + exitFile, err := os.OpenFile(s.StateFile, os.O_CREATE|os.O_WRONLY, 0666) + defer exitFile.Close() + if err != nil { + return fmt.Errorf("Error opening file to store exit status: %v", err) + } + + config, err := s.spawnConfig() + if err != nil { + return err + } + + spawn := exec.Command(bin, "spawn-daemon", config) + + // Capture stdout + spawnStdout, err := spawn.StdoutPipe() + defer spawnStdout.Close() + if err != nil { + return fmt.Errorf("Failed to capture spawn-daemon stdout: %v", err) + } + + // Capture stdin. + spawnStdin, err := spawn.StdinPipe() + defer spawnStdin.Close() + if err != nil { + return fmt.Errorf("Failed to capture spawn-daemon stdin: %v", err) + } + + if err := spawn.Start(); err != nil { + return fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err) + } + + if cb != nil { + cbErr := cb(spawn.Process.Pid) + if cbErr != nil { + errs := new(multierror.Error) + errs = multierror.Append(errs, cbErr) + if err := s.sendAbortCommand(spawnStdin); err != nil { + errs = multierror.Append(errs, err) + } + + return errs + } + } + + if err := s.sendStartCommand(spawnStdin); err != nil { + return err + } + + respCh := make(chan command.SpawnStartStatus, 1) + errCh := make(chan error, 1) + + go func() { + var resp command.SpawnStartStatus + dec := json.NewDecoder(spawnStdout) + if err := dec.Decode(&resp); err != nil { + errCh <- fmt.Errorf("Failed to parse spawn-daemon start response: %v", err) + } + respCh <- resp + }() + + select { + case err := <-errCh: + return err + case resp := <-respCh: + if resp.ErrorMsg != "" { + return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg) + } + s.UserPid = resp.UserPID + case <-time.After(5 * time.Second): + return fmt.Errorf("timed out waiting for response") + } + + // Store the spawn process. + s.spawn = spawn.Process + s.SpawnPid = s.spawn.Pid + s.SpawnPpid = os.Getpid() + return nil +} + +// spawnConfig returns a serialized config to pass to the Nomad spawn-daemon +// command. +func (s *Spawner) spawnConfig() (string, error) { + if s.UserCmd == nil { + return "", fmt.Errorf("Must specify user command") + } + + config := command.DaemonConfig{ + Cmd: *s.UserCmd, + Chroot: s.Chroot, + ExitStatusFile: s.StateFile, + } + + if s.Logs != nil { + config.StdoutFile = s.Logs.Stdout + config.StdinFile = s.Logs.Stdin + config.StderrFile = s.Logs.Stderr + } + + var buffer bytes.Buffer + enc := json.NewEncoder(&buffer) + if err := enc.Encode(config); err != nil { + return "", fmt.Errorf("Failed to serialize configuration: %v", err) + } + + return strconv.Quote(buffer.String()), nil +} + +// sendStartCommand sends the necessary command to the spawn-daemon to have it +// start the user process. +func (s *Spawner) sendStartCommand(w io.Writer) error { + enc := json.NewEncoder(w) + if err := enc.Encode(true); err != nil { + return fmt.Errorf("Failed to serialize start command: %v", err) + } + + return nil +} + +// sendAbortCommand sends the necessary command to the spawn-daemon to have it +// abort starting the user process. This should be invoked if the spawn-daemon +// could not be isolated into a cgroup. +func (s *Spawner) sendAbortCommand(w io.Writer) error { + enc := json.NewEncoder(w) + if err := enc.Encode(false); err != nil { + return fmt.Errorf("Failed to serialize abort command: %v", err) + } + + return nil +} + +// Wait returns the exit code of the user process or an error if the wait +// failed. +func (s *Spawner) Wait() (int, error) { + if os.Getpid() == s.SpawnPpid { + return s.waitAsParent() + } + + return s.pollWait() +} + +// waitAsParent waits on the process if the current process was the spawner. +func (s *Spawner) waitAsParent() (int, error) { + if s.SpawnPpid != os.Getpid() { + return -1, fmt.Errorf("not the parent. Spawner parent is %v; current pid is %v", s.SpawnPpid, os.Getpid()) + } + + // Try to reattach to the spawn. + if s.spawn == nil { + // If it can't be reattached, it means the spawn process has exited so + // we should just read its exit file. + var err error + if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil { + return s.pollWait() + } + } + + if _, err := s.spawn.Wait(); err != nil { + return -1, err + } + + return s.pollWait() +} + +// pollWait polls on the spawn daemon to determine when it exits. After it +// exits, it reads the state file and returns the exit code and possibly an +// error. +func (s *Spawner) pollWait() (int, error) { + // Stat to check if it is there to avoid a race condition. + stat, err := os.Stat(s.StateFile) + if err != nil { + return -1, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err) + } + + // If there is data it means that the file has already been written. + if stat.Size() > 0 { + return s.readExitCode() + } + + // Read after the process exits. + for _ = range time.Tick(5 * time.Second) { + if !s.Alive() { + break + } + } + + return s.readExitCode() +} + +// readExitCode parses the state file and returns the exit code of the task. It +// returns an error if the file can't be read. +func (s *Spawner) readExitCode() (int, error) { + f, err := os.Open(s.StateFile) + defer f.Close() + if err != nil { + return -1, fmt.Errorf("Failed to open %v to read exit code: %v", s.StateFile, err) + } + + stat, err := f.Stat() + if err != nil { + return -1, fmt.Errorf("Failed to stat file %v: %v", s.StateFile, err) + } + + if stat.Size() == 0 { + return -1, fmt.Errorf("Empty state file: %v", s.StateFile) + } + + var exitStatus command.SpawnExitStatus + dec := json.NewDecoder(f) + if err := dec.Decode(&exitStatus); err != nil { + return -1, fmt.Errorf("Failed to parse exit status from %v: %v", s.StateFile, err) + } + + return exitStatus.ExitCode, nil +} diff --git a/client/driver/spawn/spawn_posix.go b/client/driver/spawn/spawn_posix.go new file mode 100644 index 000000000..7df381064 --- /dev/null +++ b/client/driver/spawn/spawn_posix.go @@ -0,0 +1,14 @@ +// +build !windows + +package spawn + +import "syscall" + +func (s *Spawner) Alive() bool { + if s.spawn == nil { + return false + } + + err := s.spawn.Signal(syscall.Signal(0)) + return err == nil +} diff --git a/client/driver/spawn/spawn_test.go b/client/driver/spawn/spawn_test.go new file mode 100644 index 000000000..bbb8c8dca --- /dev/null +++ b/client/driver/spawn/spawn_test.go @@ -0,0 +1,300 @@ +package spawn + +import ( + "fmt" + "io/ioutil" + "os" + "os/exec" + "runtime" + "strings" + "testing" + "time" +) + +func TestSpawn_NoCmd(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + if err := spawn.Spawn(nil); err == nil { + t.Fatalf("Spawn() with no user command should fail") + } +} + +func TestSpawn_InvalidCmd(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("foo")) + if err := spawn.Spawn(nil); err == nil { + t.Fatalf("Spawn() with no invalid command should fail") + } +} + +func TestSpawn_SetsLogs(t *testing.T) { + // TODO: Figure out why this test fails. If the spawn-daemon directly writes + // to the opened stdout file it works but not the user command. Maybe a + // flush issue? + if runtime.GOOS == "windows" { + t.Skip("Test fails on windows; unknown reason. Skipping") + } + + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + exp := "foo" + spawn.SetCommand(exec.Command("echo", exp)) + + // Create file for stdout. + stdout, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(stdout.Name()) + spawn.SetLogs(&Logs{Stdout: stdout.Name()}) + + if err := spawn.Spawn(nil); err != nil { + t.Fatalf("Spawn() failed: %v", err) + } + + if code, err := spawn.Wait(); code != 0 && err != nil { + t.Fatalf("Wait() returned %v, %v; want 0, nil", code, err) + } + + stdout2, err := os.Open(stdout.Name()) + if err != nil { + t.Fatalf("Open() failed: %v", err) + } + + data, err := ioutil.ReadAll(stdout2) + if err != nil { + t.Fatalf("ReadAll() failed: %v", err) + } + + act := strings.TrimSpace(string(data)) + if act != exp { + t.Fatalf("Unexpected data written to stdout; got %v; want %v", act, exp) + } +} + +func TestSpawn_Callback(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("sleep", "1")) + + called := false + cbErr := fmt.Errorf("ERROR CB") + cb := func(_ int) error { + called = true + return cbErr + } + + if err := spawn.Spawn(cb); err == nil { + t.Fatalf("Spawn(%#v) should have errored; want %v", cb, cbErr) + } + + if !called { + t.Fatalf("Spawn(%#v) didn't call callback", cb) + } +} + +func TestSpawn_ParentWaitExited(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("echo", "foo")) + if err := spawn.Spawn(nil); err != nil { + t.Fatalf("Spawn() failed %v", err) + } + + time.Sleep(1 * time.Second) + + code, err := spawn.Wait() + if err != nil { + t.Fatalf("Wait() failed %v", err) + } + + if code != 0 { + t.Fatalf("Wait() returned %v; want 0", code) + } +} + +func TestSpawn_ParentWait(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("sleep", "2")) + if err := spawn.Spawn(nil); err != nil { + t.Fatalf("Spawn() failed %v", err) + } + + code, err := spawn.Wait() + if err != nil { + t.Fatalf("Wait() failed %v", err) + } + + if code != 0 { + t.Fatalf("Wait() returned %v; want 0", code) + } +} + +func TestSpawn_NonParentWaitExited(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("echo", "foo")) + if err := spawn.Spawn(nil); err != nil { + t.Fatalf("Spawn() failed %v", err) + } + + time.Sleep(1 * time.Second) + + // Force the wait to assume non-parent. + spawn.SpawnPpid = 0 + code, err := spawn.Wait() + if err != nil { + t.Fatalf("Wait() failed %v", err) + } + + if code != 0 { + t.Fatalf("Wait() returned %v; want 0", code) + } +} + +func TestSpawn_NonParentWait(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("sleep", "2")) + if err := spawn.Spawn(nil); err != nil { + t.Fatalf("Spawn() failed %v", err) + } + + // Need to wait on the spawner, otherwise it becomes a zombie and the test + // only finishes after the init process cleans it. This speeds that up. + go func() { + time.Sleep(3 * time.Second) + if _, err := spawn.spawn.Wait(); err != nil { + t.FailNow() + } + }() + + // Force the wait to assume non-parent. + spawn.SpawnPpid = 0 + code, err := spawn.Wait() + if err != nil { + t.Fatalf("Wait() failed %v", err) + } + + if code != 0 { + t.Fatalf("Wait() returned %v; want 0", code) + } +} + +func TestSpawn_DeadSpawnDaemon_Parent(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + var spawnPid int + cb := func(pid int) error { + spawnPid = pid + return nil + } + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("sleep", "5")) + if err := spawn.Spawn(cb); err != nil { + t.Fatalf("Spawn() errored: %v", err) + } + + proc, err := os.FindProcess(spawnPid) + if err != nil { + t.FailNow() + } + + if err := proc.Kill(); err != nil { + t.FailNow() + } + + if _, err := proc.Wait(); err != nil { + t.FailNow() + } + + if _, err := spawn.Wait(); err == nil { + t.Fatalf("Wait() should have failed: %v", err) + } +} + +func TestSpawn_DeadSpawnDaemon_NonParent(t *testing.T) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("TempFile() failed") + } + defer os.Remove(f.Name()) + + var spawnPid int + cb := func(pid int) error { + spawnPid = pid + return nil + } + + spawn := NewSpawner(f.Name()) + spawn.SetCommand(exec.Command("sleep", "2")) + if err := spawn.Spawn(cb); err != nil { + t.Fatalf("Spawn() errored: %v", err) + } + + proc, err := os.FindProcess(spawnPid) + if err != nil { + t.FailNow() + } + + if err := proc.Kill(); err != nil { + t.FailNow() + } + + if _, err := proc.Wait(); err != nil { + t.FailNow() + } + + // Force the wait to assume non-parent. + spawn.SpawnPpid = 0 + if _, err := spawn.Wait(); err == nil { + t.Fatalf("Wait() should have failed: %v", err) + } +} diff --git a/client/driver/spawn/spawn_windows.go b/client/driver/spawn/spawn_windows.go new file mode 100644 index 000000000..9683dce97 --- /dev/null +++ b/client/driver/spawn/spawn_windows.go @@ -0,0 +1,21 @@ +package spawn + +import "syscall" + +const STILL_ACTIVE = 259 + +func (s *Spawner) Alive() bool { + const da = syscall.STANDARD_RIGHTS_READ | syscall.PROCESS_QUERY_INFORMATION | syscall.SYNCHRONIZE + h, e := syscall.OpenProcess(da, false, uint32(s.SpawnPid)) + if e != nil { + return false + } + + var ec uint32 + e = syscall.GetExitCodeProcess(h, &ec) + if e != nil { + return false + } + + return ec == STILL_ACTIVE +} diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go deleted file mode 100644 index ceb178063..000000000 --- a/client/executor/exec_linux.go +++ /dev/null @@ -1,579 +0,0 @@ -package executor - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io" - "os" - "os/exec" - "os/user" - "path/filepath" - "strconv" - "strings" - "syscall" - - "github.com/hashicorp/go-multierror" - "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/driver/args" - "github.com/hashicorp/nomad/client/driver/environment" - "github.com/hashicorp/nomad/command" - "github.com/hashicorp/nomad/helper/discover" - "github.com/hashicorp/nomad/nomad/structs" - - cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" - cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" -) - -const ( - cgroupMount = "/sys/fs/cgroup" -) - -var ( - // A mapping of directories on the host OS to attempt to embed inside each - // task's chroot. - chrootEnv = map[string]string{ - "/bin": "/bin", - "/etc": "/etc", - "/lib": "/lib", - "/lib32": "/lib32", - "/lib64": "/lib64", - "/usr/bin": "/usr/bin", - "/usr/lib": "/usr/lib", - } -) - -func NewExecutor() Executor { - e := LinuxExecutor{} - - // TODO: In a follow-up PR make it so this only happens once per client. - // Fingerprinting shouldn't happen per task. - - // Check that cgroups are available. - if _, err := os.Stat(cgroupMount); err == nil { - e.cgroupEnabled = true - } - - return &e -} - -// Linux executor is designed to run on linux kernel 2.8+. -type LinuxExecutor struct { - cmd - user *user.User - - // Finger print capabilities. - cgroupEnabled bool - - // Isolation configurations. - groups *cgroupConfig.Cgroup - alloc *allocdir.AllocDir - taskName string - taskDir string - - // Tracking of child process. - spawnChild exec.Cmd - spawnOutputWriter *os.File - spawnOutputReader *os.File - - // Track whether there are filesystems mounted in the task dir. - mounts bool -} - -func (e *LinuxExecutor) Limit(resources *structs.Resources) error { - if resources == nil { - return errNoResources - } - - if e.cgroupEnabled { - return e.configureCgroups(resources) - } - - return nil -} - -func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error { - e.taskName = taskName - taskDir, ok := alloc.TaskDirs[taskName] - if !ok { - fmt.Errorf("Couldn't find task directory for task %v", taskName) - } - e.taskDir = taskDir - - if err := alloc.MountSharedDir(taskName); err != nil { - return err - } - - if err := alloc.Embed(taskName, chrootEnv); err != nil { - return err - } - - // Mount dev - dev := filepath.Join(taskDir, "dev") - if err := os.Mkdir(dev, 0777); err != nil { - return fmt.Errorf("Mkdir(%v) failed: %v", dev, err) - } - - if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil { - return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err) - } - - // Mount proc - proc := filepath.Join(taskDir, "proc") - if err := os.Mkdir(proc, 0777); err != nil { - return fmt.Errorf("Mkdir(%v) failed: %v", proc, err) - } - - if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil { - return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err) - } - - // Set the tasks AllocDir environment variable. - env, err := environment.ParseFromList(e.Cmd.Env) - if err != nil { - return err - } - env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)) - env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)) - e.Cmd.Env = env.List() - - e.alloc = alloc - e.mounts = true - return nil -} - -func (e *LinuxExecutor) cleanTaskDir() error { - if e.alloc == nil { - return errors.New("ConfigureTaskDir() must be called before Start()") - } - - if !e.mounts { - return nil - } - - // Unmount dev. - errs := new(multierror.Error) - dev := filepath.Join(e.taskDir, "dev") - if err := syscall.Unmount(dev, 0); err != nil { - errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err)) - } - - // Unmount proc. - proc := filepath.Join(e.taskDir, "proc") - if err := syscall.Unmount(proc, 0); err != nil { - errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err)) - } - - e.mounts = false - return errs.ErrorOrNil() -} - -func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error { - if !e.cgroupEnabled { - return nil - } - - e.groups = &cgroupConfig.Cgroup{} - - // Groups will be created in a heiarchy according to the resource being - // constrained, current session, and then this unique name. Restraints are - // then placed in the corresponding files. - // Ex: restricting a process to 2048Mhz CPU and 2MB of memory: - // $ cat /sys/fs/cgroup/cpu/user/1000.user/4.session//cpu.shares - // 2028 - // $ cat /sys/fs/cgroup/memory/user/1000.user/4.session//memory.limit_in_bytes - // 2097152 - e.groups.Name = structs.GenerateUUID() - - // TODO: verify this is needed for things like network access - e.groups.AllowAllDevices = true - - if resources.MemoryMB > 0 { - // Total amount of memory allowed to consume - e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024) - // Disable swap to avoid issues on the machine - e.groups.MemorySwap = int64(-1) - } - - if resources.CPU != 0 { - if resources.CPU < 2 { - return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU) - } - - // Set the relative CPU shares for this cgroup. - // The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any - // given process will have at least that amount of resources, but likely - // more since it is (probably) rare that the machine will run at 100% - // CPU. This scale will cease to work if a node is overprovisioned. - e.groups.CpuShares = int64(resources.CPU) - } - - if resources.IOPS != 0 { - // Validate it is in an acceptable range. - if resources.IOPS < 10 || resources.IOPS > 1000 { - return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) - } - - e.groups.BlkioWeight = uint16(resources.IOPS) - } - - return nil -} - -func (e *LinuxExecutor) runAs(userid string) error { - errs := new(multierror.Error) - - // First, try to lookup the user by uid - u, err := user.LookupId(userid) - if err == nil { - e.user = u - return nil - } else { - errs = multierror.Append(errs, err) - } - - // Lookup failed, so try by username instead - u, err = user.Lookup(userid) - if err == nil { - e.user = u - return nil - } else { - errs = multierror.Append(errs, err) - } - - // If we got here we failed to lookup based on id and username, so we'll - // return those errors. - return fmt.Errorf("Failed to identify user to run as: %s", errs) -} - -func (e *LinuxExecutor) Start() error { - // Run as "nobody" user so we don't leak root privilege to the - // spawned process. - if err := e.runAs("nobody"); err == nil && e.user != nil { - e.cmd.SetUID(e.user.Uid) - e.cmd.SetGID(e.user.Gid) - } - - if e.alloc == nil { - return errors.New("ConfigureTaskDir() must be called before Start()") - } - - // Parse the commands arguments and replace instances of Nomad environment - // variables. - envVars, err := environment.ParseFromList(e.Cmd.Env) - if err != nil { - return err - } - - parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map()) - if err != nil { - return err - } else if len(parsedPath) != 1 { - return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path) - } - e.cmd.Path = parsedPath[0] - - combined := strings.Join(e.Cmd.Args, " ") - parsed, err := args.ParseAndReplace(combined, envVars.Map()) - if err != nil { - return err - } - e.Cmd.Args = parsed - - return e.spawnDaemon() -} - -// spawnDaemon executes a double fork to start the user command with proper -// isolation. Stores the child process for use in Wait. -func (e *LinuxExecutor) spawnDaemon() error { - bin, err := discover.NomadExecutable() - if err != nil { - return fmt.Errorf("Failed to determine the nomad executable: %v", err) - } - - // Serialize the cmd and the cgroup configuration so it can be passed to the - // sub-process. - var buffer bytes.Buffer - enc := json.NewEncoder(&buffer) - - c := command.DaemonConfig{ - Cmd: e.cmd.Cmd, - Chroot: e.taskDir, - StdoutFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)), - StderrFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)), - StdinFile: "/dev/null", - } - if err := enc.Encode(c); err != nil { - return fmt.Errorf("Failed to serialize daemon configuration: %v", err) - } - - // Create a pipe to capture Stdout. - pr, pw, err := os.Pipe() - if err != nil { - return err - } - e.spawnOutputWriter = pw - e.spawnOutputReader = pr - - // Call ourselves using a hidden flag. The new instance of nomad will join - // the passed cgroup, forkExec the cmd, and output status codes through - // Stdout. - escaped := strconv.Quote(buffer.String()) - spawn := exec.Command(bin, "spawn-daemon", escaped) - spawn.Stdout = e.spawnOutputWriter - - // Capture its Stdin. - spawnStdIn, err := spawn.StdinPipe() - if err != nil { - return err - } - - if err := spawn.Start(); err != nil { - fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err) - } - - // Join the spawn-daemon to the cgroup. - if e.groups != nil { - manager := cgroupFs.Manager{} - manager.Cgroups = e.groups - - // Apply will place the current pid into the tasks file for each of the - // created cgroups: - // /sys/fs/cgroup/memory/user/1000.user/4.session//tasks - // - // Apply requires superuser permissions, and may fail if Nomad is not run with - // the required permissions - if err := manager.Apply(spawn.Process.Pid); err != nil { - errs := new(multierror.Error) - errs = multierror.Append(errs, fmt.Errorf("Failed to join spawn-daemon to the cgroup (config => %+v): %v", manager.Cgroups, err)) - - if err := sendAbortCommand(spawnStdIn); err != nil { - errs = multierror.Append(errs, err) - } - - return errs - } - } - - // Tell it to start. - if err := sendStartCommand(spawnStdIn); err != nil { - return err - } - - // Parse the response. - dec := json.NewDecoder(e.spawnOutputReader) - var resp command.SpawnStartStatus - if err := dec.Decode(&resp); err != nil { - return fmt.Errorf("Failed to parse spawn-daemon start response: %v", err) - } - - if resp.ErrorMsg != "" { - return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg) - } - - e.spawnChild = *spawn - return nil -} - -func sendStartCommand(w io.Writer) error { - enc := json.NewEncoder(w) - if err := enc.Encode(true); err != nil { - return fmt.Errorf("Failed to serialize start command: %v", err) - } - - return nil -} - -func sendAbortCommand(w io.Writer) error { - enc := json.NewEncoder(w) - if err := enc.Encode(false); err != nil { - return fmt.Errorf("Failed to serialize abort command: %v", err) - } - - return nil -} - -// Open's behavior is to kill all processes associated with the id and return an -// error. This is done because it is not possible to re-attach to the -// spawn-daemon's stdout to retrieve status messages. -func (e *LinuxExecutor) Open(id string) error { - parts := strings.SplitN(id, ":", 2) - if len(parts) != 2 { - return fmt.Errorf("Invalid id: %v", id) - } - - switch parts[0] { - case "PID": - pid, err := strconv.Atoi(parts[1]) - if err != nil { - return fmt.Errorf("Invalid id: failed to parse pid %v", parts[1]) - } - - process, err := os.FindProcess(pid) - if err != nil { - return fmt.Errorf("Failed to find Pid %v: %v", pid, err) - } - - if err := process.Kill(); err != nil { - return fmt.Errorf("Failed to kill Pid %v: %v", pid, err) - } - case "CGROUP": - if !e.cgroupEnabled { - return errors.New("Passed a a cgroup identifier, but cgroups are disabled") - } - - // De-serialize the cgroup configuration. - dec := json.NewDecoder(strings.NewReader(parts[1])) - var groups cgroupConfig.Cgroup - if err := dec.Decode(&groups); err != nil { - return fmt.Errorf("Failed to parse cgroup configuration: %v", err) - } - - e.groups = &groups - if err := e.destroyCgroup(); err != nil { - return err - } - // TODO: cleanTaskDir is a little more complicated here because the OS - // may have already unmounted in the case of a restart. Need to scan. - default: - return fmt.Errorf("Invalid id type: %v", parts[0]) - } - - return errors.New("Could not re-open to id (intended).") -} - -func (e *LinuxExecutor) Wait() error { - if e.spawnChild.Process == nil { - return errors.New("Can not find child to wait on") - } - - defer e.spawnOutputWriter.Close() - defer e.spawnOutputReader.Close() - - errs := new(multierror.Error) - if err := e.spawnChild.Wait(); err != nil { - errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Process.Pid, err)) - } - - // If they fork/exec and then exit, wait will return but they will be still - // running processes so we need to kill the full cgroup. - if e.groups != nil { - if err := e.destroyCgroup(); err != nil { - errs = multierror.Append(errs, err) - } - } - - if err := e.cleanTaskDir(); err != nil { - errs = multierror.Append(errs, err) - } - - return errs.ErrorOrNil() -} - -// If cgroups are used, the ID is the cgroup structurue. Otherwise, it is the -// PID of the spawn-daemon process. An error is returned if the process was -// never started. -func (e *LinuxExecutor) ID() (string, error) { - if e.spawnChild.Process != nil { - if e.cgroupEnabled && e.groups != nil { - // Serialize the cgroup structure so it can be undone on suabsequent - // opens. - var buffer bytes.Buffer - enc := json.NewEncoder(&buffer) - if err := enc.Encode(e.groups); err != nil { - return "", fmt.Errorf("Failed to serialize daemon configuration: %v", err) - } - - return fmt.Sprintf("CGROUP:%v", buffer.String()), nil - } - - return fmt.Sprintf("PID:%d", e.spawnChild.Process.Pid), nil - } - - return "", fmt.Errorf("Process has finished or was never started") -} - -func (e *LinuxExecutor) Shutdown() error { - return e.ForceStop() -} - -func (e *LinuxExecutor) ForceStop() error { - if e.spawnOutputReader != nil { - e.spawnOutputReader.Close() - } - - if e.spawnOutputWriter != nil { - e.spawnOutputWriter.Close() - } - - // If the task is not running inside a cgroup then just the spawn-daemon child is killed. - // TODO: Find a good way to kill the children of the spawn-daemon. - if e.groups == nil { - if err := e.spawnChild.Process.Kill(); err != nil { - return fmt.Errorf("Failed to kill child (%v): %v", e.spawnChild.Process.Pid, err) - } - - return nil - } - - errs := new(multierror.Error) - if e.groups != nil { - if err := e.destroyCgroup(); err != nil { - errs = multierror.Append(errs, err) - } - } - - if err := e.cleanTaskDir(); err != nil { - errs = multierror.Append(errs, err) - } - - return errs.ErrorOrNil() -} - -func (e *LinuxExecutor) destroyCgroup() error { - if e.groups == nil { - return errors.New("Can't destroy: cgroup configuration empty") - } - - manager := cgroupFs.Manager{} - manager.Cgroups = e.groups - pids, err := manager.GetPids() - if err != nil { - return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err) - } - - errs := new(multierror.Error) - for _, pid := range pids { - process, err := os.FindProcess(pid) - if err != nil { - multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err)) - continue - } - - if err := process.Kill(); err != nil { - multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err)) - continue - } - - if _, err := process.Wait(); err != nil { - multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err)) - continue - } - } - - // Remove the cgroup. - if err := manager.Destroy(); err != nil { - multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err)) - } - - if len(errs.Errors) != 0 { - return fmt.Errorf("Failed to destroy cgroup: %v", errs) - } - - return nil -} - -func (e *LinuxExecutor) Command() *cmd { - return &e.cmd -} diff --git a/client/executor/exec_linux_test.go b/client/executor/exec_linux_test.go deleted file mode 100644 index 8f33b0da4..000000000 --- a/client/executor/exec_linux_test.go +++ /dev/null @@ -1,216 +0,0 @@ -package executor - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "testing" - "time" - - "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - - ctestutil "github.com/hashicorp/nomad/client/testutil" -) - -var ( - constraint = &structs.Resources{ - CPU: 250, - MemoryMB: 256, - Networks: []*structs.NetworkResource{ - &structs.NetworkResource{ - MBits: 50, - DynamicPorts: []string{"http"}, - }, - }, - } -) - -func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) { - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - - allocDir := allocdir.NewAllocDir(filepath.Join(os.TempDir(), alloc.ID)) - if err := allocDir.Build([]*structs.Task{task}); err != nil { - t.Fatalf("allocDir.Build() failed: %v", err) - } - - return task.Name, allocDir -} - -func TestExecutorLinux_Start_Invalid(t *testing.T) { - ctestutil.ExecCompatible(t) - invalid := "/bin/foobar" - e := Command(invalid, "1") - - if err := e.Limit(constraint); err != nil { - t.Fatalf("Limit() failed: %v", err) - } - - task, alloc := mockAllocDir(t) - defer alloc.Destroy() - if err := e.ConfigureTaskDir(task, alloc); err != nil { - t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) - } - - if err := e.Start(); err == nil { - t.Fatalf("Start(%v) should have failed", invalid) - } -} - -func TestExecutorLinux_Start_Wait_Failure_Code(t *testing.T) { - ctestutil.ExecCompatible(t) - e := Command("/bin/date", "-invalid") - - if err := e.Limit(constraint); err != nil { - t.Fatalf("Limit() failed: %v", err) - } - - task, alloc := mockAllocDir(t) - defer alloc.Destroy() - if err := e.ConfigureTaskDir(task, alloc); err != nil { - t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) - } - - if err := e.Start(); err != nil { - t.Fatalf("Start() failed: %v", err) - } - - if err := e.Wait(); err == nil { - t.Fatalf("Wait() should have failed") - } -} - -func TestExecutorLinux_Start_Wait(t *testing.T) { - ctestutil.ExecCompatible(t) - task, alloc := mockAllocDir(t) - defer alloc.Destroy() - - taskDir, ok := alloc.TaskDirs[task] - if !ok { - t.Fatalf("No task directory found for task %v", task) - } - - expected := "hello world" - file := filepath.Join(allocdir.TaskLocal, "output.txt") - absFilePath := filepath.Join(taskDir, file) - cmd := fmt.Sprintf(`"%v \"%v\" > %v"`, "/bin/sleep 1 ; echo -n", expected, file) - e := Command("/bin/bash", "-c", cmd) - - if err := e.Limit(constraint); err != nil { - t.Fatalf("Limit() failed: %v", err) - } - - if err := e.ConfigureTaskDir(task, alloc); err != nil { - t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) - } - - if err := e.Start(); err != nil { - t.Fatalf("Start() failed: %v", err) - } - - if err := e.Wait(); err != nil { - t.Fatalf("Wait() failed: %v", err) - } - - output, err := ioutil.ReadFile(absFilePath) - if err != nil { - t.Fatalf("Couldn't read file %v", absFilePath) - } - - act := string(output) - if act != expected { - t.Fatalf("Command output incorrectly: want %v; got %v", expected, act) - } -} - -func TestExecutorLinux_Start_Kill(t *testing.T) { - ctestutil.ExecCompatible(t) - task, alloc := mockAllocDir(t) - defer alloc.Destroy() - - taskDir, ok := alloc.TaskDirs[task] - if !ok { - t.Fatalf("No task directory found for task %v", task) - } - - filePath := filepath.Join(taskDir, "output") - e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath) - - // This test can only be run if cgroups are enabled. - if !e.(*LinuxExecutor).cgroupEnabled { - t.SkipNow() - } - - if err := e.Limit(constraint); err != nil { - t.Fatalf("Limit() failed: %v", err) - } - - if err := e.ConfigureTaskDir(task, alloc); err != nil { - t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) - } - - if err := e.Start(); err != nil { - t.Fatalf("Start() failed: %v", err) - } - - if err := e.Shutdown(); err != nil { - t.Fatalf("Shutdown() failed: %v", err) - } - - time.Sleep(1500 * time.Millisecond) - - // Check that the file doesn't exist. - if _, err := os.Stat(filePath); err == nil { - t.Fatalf("Stat(%v) should have failed: task not killed", filePath) - } -} - -func TestExecutorLinux_Open(t *testing.T) { - ctestutil.ExecCompatible(t) - task, alloc := mockAllocDir(t) - defer alloc.Destroy() - - taskDir, ok := alloc.TaskDirs[task] - if !ok { - t.Fatalf("No task directory found for task %v", task) - } - - filePath := filepath.Join(taskDir, "output") - e := Command("/bin/bash", "-c", "sleep 1 ; echo \"failure\" > "+filePath) - - // This test can only be run if cgroups are enabled. - if !e.(*LinuxExecutor).cgroupEnabled { - t.SkipNow() - } - - if err := e.Limit(constraint); err != nil { - t.Fatalf("Limit() failed: %v", err) - } - - if err := e.ConfigureTaskDir(task, alloc); err != nil { - t.Fatalf("ConfigureTaskDir(%v, %v) failed: %v", task, alloc, err) - } - - if err := e.Start(); err != nil { - t.Fatalf("Start() failed: %v", err) - } - - id, err := e.ID() - if err != nil { - t.Fatalf("ID() failed: %v", err) - } - - if _, err := OpenId(id); err == nil { - t.Fatalf("Open(%v) should have failed", id) - } - - time.Sleep(1500 * time.Millisecond) - - // Check that the file doesn't exist, open should have killed the process. - if _, err := os.Stat(filePath); err == nil { - t.Fatalf("Stat(%v) should have failed: task not killed", filePath) - } -} diff --git a/client/executor/exec_universal.go b/client/executor/exec_universal.go deleted file mode 100644 index 6b1977d10..000000000 --- a/client/executor/exec_universal.go +++ /dev/null @@ -1,107 +0,0 @@ -// +build !linux - -package executor - -import ( - "fmt" - "os" - "strconv" - "strings" - - "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/driver/args" - "github.com/hashicorp/nomad/client/driver/environment" - "github.com/hashicorp/nomad/nomad/structs" -) - -func NewExecutor() Executor { - return &UniversalExecutor{} -} - -// UniversalExecutor should work everywhere, and as a result does not include -// any resource restrictions or runas capabilities. -type UniversalExecutor struct { - cmd -} - -func (e *UniversalExecutor) Limit(resources *structs.Resources) error { - if resources == nil { - return errNoResources - } - return nil -} - -func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error { - taskDir, ok := alloc.TaskDirs[taskName] - if !ok { - return fmt.Errorf("Error finding task dir for (%s)", taskName) - } - e.Dir = taskDir - return nil -} - -func (e *UniversalExecutor) Start() error { - // Parse the commands arguments and replace instances of Nomad environment - // variables. - envVars, err := environment.ParseFromList(e.cmd.Env) - if err != nil { - return err - } - - parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map()) - if err != nil { - return err - } else if len(parsedPath) != 1 { - return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path) - } - - e.cmd.Path = parsedPath[0] - combined := strings.Join(e.cmd.Args, " ") - parsed, err := args.ParseAndReplace(combined, envVars.Map()) - if err != nil { - return err - } - e.Cmd.Args = parsed - - // We don't want to call ourself. We want to call Start on our embedded Cmd - return e.cmd.Start() -} - -func (e *UniversalExecutor) Open(pid string) error { - pidNum, err := strconv.Atoi(pid) - if err != nil { - return fmt.Errorf("Failed to parse pid %v: %v", pid, err) - } - - process, err := os.FindProcess(pidNum) - if err != nil { - return fmt.Errorf("Failed to reopen pid %d: %v", pidNum, err) - } - e.Process = process - return nil -} - -func (e *UniversalExecutor) Wait() error { - // We don't want to call ourself. We want to call Start on our embedded Cmd - return e.cmd.Wait() -} - -func (e *UniversalExecutor) ID() (string, error) { - if e.cmd.Process != nil { - return strconv.Itoa(e.cmd.Process.Pid), nil - } else { - return "", fmt.Errorf("Process has finished or was never started") - } -} - -func (e *UniversalExecutor) Shutdown() error { - return e.ForceStop() -} - -func (e *UniversalExecutor) ForceStop() error { - return e.Process.Kill() -} - -func (e *UniversalExecutor) Command() *cmd { - return &e.cmd -} diff --git a/client/executor/setuid.go b/client/executor/setuid.go deleted file mode 100644 index 4793f8e2c..000000000 --- a/client/executor/setuid.go +++ /dev/null @@ -1,41 +0,0 @@ -// +build !windows - -package executor - -import ( - "fmt" - "strconv" - "syscall" -) - -// SetUID changes the Uid for this command (must be set before starting) -func (c *cmd) SetUID(userid string) error { - uid, err := strconv.ParseUint(userid, 10, 32) - if err != nil { - return fmt.Errorf("Unable to convert userid to uint32: %s", err) - } - if c.SysProcAttr == nil { - c.SysProcAttr = &syscall.SysProcAttr{} - } - if c.SysProcAttr.Credential == nil { - c.SysProcAttr.Credential = &syscall.Credential{} - } - c.SysProcAttr.Credential.Uid = uint32(uid) - return nil -} - -// SetGID changes the Gid for this command (must be set before starting) -func (c *cmd) SetGID(groupid string) error { - gid, err := strconv.ParseUint(groupid, 10, 32) - if err != nil { - return fmt.Errorf("Unable to convert groupid to uint32: %s", err) - } - if c.SysProcAttr == nil { - c.SysProcAttr = &syscall.SysProcAttr{} - } - if c.SysProcAttr.Credential == nil { - c.SysProcAttr.Credential = &syscall.Credential{} - } - c.SysProcAttr.Credential.Gid = uint32(gid) - return nil -} diff --git a/client/executor/setuid_windows.go b/client/executor/setuid_windows.go deleted file mode 100644 index 9c18bed53..000000000 --- a/client/executor/setuid_windows.go +++ /dev/null @@ -1,13 +0,0 @@ -package executor - -// SetUID changes the Uid for this command (must be set before starting) -func (c *cmd) SetUID(userid string) error { - // TODO implement something for windows - return nil -} - -// SetGID changes the Gid for this command (must be set before starting) -func (c *cmd) SetGID(groupid string) error { - // TODO implement something for windows - return nil -} diff --git a/client/fingerprint/arch.go b/client/fingerprint/arch.go index 869c542eb..16d8c99a8 100644 --- a/client/fingerprint/arch.go +++ b/client/fingerprint/arch.go @@ -10,6 +10,7 @@ import ( // ArchFingerprint is used to fingerprint the architecture type ArchFingerprint struct { + StaticFingerprinter logger *log.Logger } diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index a03dfeec1..9ae81faf6 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -48,7 +48,7 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // If we can't hit this URL consul is probably not running on this machine. info, err := consulClient.Agent().Self() if err != nil { - return false, fmt.Errorf("Failed to query consul for agent status: %s", err) + return false, nil } node.Attributes["consul.server"] = strconv.FormatBool(info["Config"]["Server"].(bool)) @@ -63,3 +63,7 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod return true, nil } + +func (f *ConsulFingerprint) Periodic() (bool, time.Duration) { + return true, 15 * time.Second +} diff --git a/client/fingerprint/cpu.go b/client/fingerprint/cpu.go index 3e809397e..5027c8e9a 100644 --- a/client/fingerprint/cpu.go +++ b/client/fingerprint/cpu.go @@ -11,6 +11,7 @@ import ( // CPUFingerprint is used to fingerprint the CPU type CPUFingerprint struct { + StaticFingerprinter logger *log.Logger } diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 839285a1d..f5e26e7cb 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -15,6 +15,10 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +// This is where the AWS metadata server normally resides. We hardcode the +// "instance" path as well since it's the only one we access here. +const DEFAULT_AWS_URL = "http//169.254.169.254/latest/meta-data/" + // map of instance type to approximate speed, in Mbits/s // http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797 // which itself cites these sources: @@ -65,6 +69,7 @@ var ec2InstanceSpeedMap = map[string]int{ // EnvAWSFingerprint is used to fingerprint AWS metadata type EnvAWSFingerprint struct { + StaticFingerprinter logger *log.Logger } @@ -89,7 +94,7 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) } metadataURL := os.Getenv("AWS_ENV_URL") if metadataURL == "" { - metadataURL = "http://169.254.169.254/latest/meta-data/" + metadataURL = DEFAULT_AWS_URL } // assume 2 seconds is enough time for inside AWS network @@ -161,7 +166,7 @@ func isAWS() bool { // provide their own metadataURL := os.Getenv("AWS_ENV_URL") if metadataURL == "" { - metadataURL = "http://169.254.169.254/latest/meta-data/" + metadataURL = DEFAULT_AWS_URL } // assume 2 seconds is enough time for inside AWS network @@ -205,7 +210,7 @@ func (f *EnvAWSFingerprint) linkSpeed() int { // the network speed metadataURL := os.Getenv("AWS_ENV_URL") if metadataURL == "" { - metadataURL = "http://169.254.169.254/latest/meta-data/" + metadataURL = DEFAULT_AWS_URL } // assume 2 seconds is enough time for inside AWS network diff --git a/client/fingerprint/env_gce.go b/client/fingerprint/env_gce.go index f721fc36a..faef7deab 100644 --- a/client/fingerprint/env_gce.go +++ b/client/fingerprint/env_gce.go @@ -46,6 +46,7 @@ func lastToken(s string) string { // EnvGCEFingerprint is used to fingerprint GCE metadata type EnvGCEFingerprint struct { + StaticFingerprinter client *http.Client logger *log.Logger metadataURL string diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index 4a42057b2..a0139d485 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -3,35 +3,41 @@ package fingerprint import ( "fmt" "log" + "time" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/nomad/structs" ) +// EmptyDuration is to be used by fingerprinters that are not periodic. +const EmptyDuration = time.Duration(0) + // BuiltinFingerprints is a slice containing the key names of all regestered // fingerprints available, to provided an ordered iteration var BuiltinFingerprints = []string{ "arch", + "consul", "cpu", - "host", - "memory", - "storage", - "network", "env_aws", "env_gce", + "host", + "memory", + "network", + "storage", } // builtinFingerprintMap contains the built in registered fingerprints // which are available, corresponding to a key found in BuiltinFingerprints var builtinFingerprintMap = map[string]Factory{ "arch": NewArchFingerprint, + "consul": NewConsulFingerprint, "cpu": NewCPUFingerprint, - "host": NewHostFingerprint, - "memory": NewMemoryFingerprint, - "storage": NewStorageFingerprint, - "network": NewNetworkFingerprinter, "env_aws": NewEnvAWSFingerprint, "env_gce": NewEnvGCEFingerprint, + "host": NewHostFingerprint, + "memory": NewMemoryFingerprint, + "network": NewNetworkFingerprinter, + "storage": NewStorageFingerprint, } // NewFingerprint is used to instantiate and return a new fingerprint @@ -59,4 +65,17 @@ type Fingerprint interface { // Fingerprint is used to update properties of the Node, // and returns if the fingerprint was applicable and a potential error. Fingerprint(*config.Config, *structs.Node) (bool, error) + + // Periodic is a mechanism for the fingerprinter to indicate that it should + // be run periodically. The return value is a boolean indicating if it + // should be periodic, and if true, a duration. + Periodic() (bool, time.Duration) +} + +// StaticFingerprinter can be embeded in a struct that has a Fingerprint method +// to make it non-periodic. +type StaticFingerprinter struct{} + +func (s *StaticFingerprinter) Periodic() (bool, time.Duration) { + return false, EmptyDuration } diff --git a/client/fingerprint/host.go b/client/fingerprint/host.go index ac7a347f2..87acac63c 100644 --- a/client/fingerprint/host.go +++ b/client/fingerprint/host.go @@ -14,6 +14,7 @@ import ( // HostFingerprint is used to fingerprint the host type HostFingerprint struct { + StaticFingerprinter logger *log.Logger } diff --git a/client/fingerprint/memory.go b/client/fingerprint/memory.go index 5af097848..b249bebf5 100644 --- a/client/fingerprint/memory.go +++ b/client/fingerprint/memory.go @@ -11,6 +11,7 @@ import ( // MemoryFingerprint is used to fingerprint the available memory on the node type MemoryFingerprint struct { + StaticFingerprinter logger *log.Logger } diff --git a/client/fingerprint/network.go b/client/fingerprint/network.go index 593f7b790..23ce44554 100644 --- a/client/fingerprint/network.go +++ b/client/fingerprint/network.go @@ -17,6 +17,7 @@ import ( // NetworkFingerprint is used to fingerprint the Network capabilities of a node type NetworkFingerprint struct { + StaticFingerprinter logger *log.Logger interfaceDetector NetworkInterfaceDetector } diff --git a/client/fingerprint/storage.go b/client/fingerprint/storage.go index 6abbe52e4..ead264845 100644 --- a/client/fingerprint/storage.go +++ b/client/fingerprint/storage.go @@ -18,6 +18,7 @@ import ( // StorageFingerprint is used to measure the amount of storage free for // applications that the Nomad agent will run on this machine. type StorageFingerprint struct { + StaticFingerprinter logger *log.Logger } diff --git a/client/getter/getter.go b/client/getter/getter.go new file mode 100644 index 000000000..1a721c3f2 --- /dev/null +++ b/client/getter/getter.go @@ -0,0 +1,43 @@ +package getter + +import ( + "fmt" + "log" + "net/url" + "path" + "path/filepath" + "runtime" + "strings" + "syscall" + + gg "github.com/hashicorp/go-getter" +) + +func GetArtifact(destDir, source, checksum string, logger *log.Logger) (string, error) { + if source == "" { + return "", fmt.Errorf("Source url is empty in Artifact Getter") + } + u, err := url.Parse(source) + if err != nil { + return "", err + } + + // if checksum is seperate, apply to source + if checksum != "" { + source = strings.Join([]string{source, fmt.Sprintf("checksum=%s", checksum)}, "?") + logger.Printf("[DEBUG] client.getter: Applying checksum to Artifact Source URL, new url: %s", source) + } + + artifactFile := filepath.Join(destDir, path.Base(u.Path)) + if err := gg.GetFile(artifactFile, source); err != nil { + return "", fmt.Errorf("Error downloading artifact: %s", err) + } + + // Add execution permissions to the newly downloaded artifact + if runtime.GOOS != "windows" { + if err := syscall.Chmod(artifactFile, 0755); err != nil { + logger.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err) + } + } + return artifactFile, nil +} diff --git a/client/getter/getter_test.go b/client/getter/getter_test.go new file mode 100644 index 000000000..54eff20c6 --- /dev/null +++ b/client/getter/getter_test.go @@ -0,0 +1,111 @@ +package getter + +import ( + "fmt" + "io/ioutil" + "log" + "os" + "strings" + "testing" +) + +func TestGetArtifact_basic(t *testing.T) { + + logger := log.New(os.Stderr, "", log.LstdFlags) + + // TODO: Use http.TestServer to serve these files from fixtures dir + passing := []struct { + Source, Checksum string + }{ + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64", + "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5df51e33871d6b295a60e0f9f6dd573da977", + }, + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64", + "sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c", + }, + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64", + "md5:a9b14903a8942748e4f8474e11f795d3", + }, + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64?checksum=sha256:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c", + "", + }, + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64", + "", + }, + } + + for i, p := range passing { + destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i)) + if err != nil { + t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err) + } + + path, err := GetArtifact(destDir, p.Source, p.Checksum, logger) + if err != nil { + t.Fatalf("TestGetArtifact_basic unexpected failure here: %s", err) + } + + if p.Checksum != "" { + if ok := strings.Contains(path, p.Checksum); ok { + t.Fatalf("path result should not contain the checksum, got: %s", path) + } + } + + // verify artifact exists + if _, err := os.Stat(path); err != nil { + t.Fatalf("source path error: %s", err) + } + } +} + +func TestGetArtifact_fails(t *testing.T) { + + logger := log.New(os.Stderr, "", log.LstdFlags) + + failing := []struct { + Source, Checksum string + }{ + { + "", + "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d", + }, + { + "/u/47675/jar_thing/hi_darwin_amd64", + "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d", + }, + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_darwin_amd64", + "sha256:66aa0f05fc0cfcf1e5ed8cc5307b5d", + }, + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64", + "sha257:6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c", + }, + // malformed checksum + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd64", + "6f99b4c5184726e601ecb062500aeb9537862434dfe1898dbe5c68d9f50c179c", + }, + // 404 + { + "https://dl.dropboxusercontent.com/u/47675/jar_thing/hi_linux_amd86", + "", + }, + } + for i, p := range failing { + destDir, err := ioutil.TempDir("", fmt.Sprintf("nomad-test-%d", i)) + if err != nil { + t.Fatalf("Error in TestGetArtifact_basic makeing TempDir: %s", err) + } + + _, err = GetArtifact(destDir, p.Source, p.Checksum, logger) + if err == nil { + t.Fatalf("TestGetArtifact_basic expected failure, but got none") + } + } +} diff --git a/client/restarts.go b/client/restarts.go new file mode 100644 index 000000000..4141405f8 --- /dev/null +++ b/client/restarts.go @@ -0,0 +1,83 @@ +package client + +import ( + "github.com/hashicorp/nomad/nomad/structs" + "time" +) + +// The errorCounter keeps track of the number of times a process has exited +// It returns the duration after which a task is restarted +// For Batch jobs, the interval is set to zero value since the takss +// will be restarted only upto maxAttempts times +type restartTracker interface { + nextRestart() (bool, time.Duration) +} + +func newRestartTracker(jobType string, restartPolicy *structs.RestartPolicy) restartTracker { + switch jobType { + case structs.JobTypeService: + return &serviceRestartTracker{ + maxAttempts: restartPolicy.Attempts, + startTime: time.Now(), + interval: restartPolicy.Interval, + delay: restartPolicy.Delay, + } + default: + return &batchRestartTracker{ + maxAttempts: restartPolicy.Attempts, + delay: restartPolicy.Delay, + } + } +} + +type batchRestartTracker struct { + maxAttempts int + delay time.Duration + + count int +} + +func (b *batchRestartTracker) increment() { + b.count += 1 +} + +func (b *batchRestartTracker) nextRestart() (bool, time.Duration) { + if b.count < b.maxAttempts { + b.increment() + return true, b.delay + } + return false, 0 +} + +type serviceRestartTracker struct { + maxAttempts int + delay time.Duration + interval time.Duration + + count int + startTime time.Time +} + +func (s *serviceRestartTracker) increment() { + s.count += 1 +} + +func (s *serviceRestartTracker) nextRestart() (bool, time.Duration) { + defer s.increment() + windowEndTime := s.startTime.Add(s.interval) + now := time.Now() + // If the window of restart is over we wait until the delay duration + if now.After(windowEndTime) { + s.count = 0 + s.startTime = time.Now() + return true, s.delay + } + + // If we are within the delay duration and didn't exhaust all retries + if s.count < s.maxAttempts { + return true, s.delay + } + + // If we exhausted all the retries and are withing the time window + return true, windowEndTime.Sub(now) +} diff --git a/client/restarts_test.go b/client/restarts_test.go new file mode 100644 index 000000000..9d5b59bb4 --- /dev/null +++ b/client/restarts_test.go @@ -0,0 +1,61 @@ +package client + +import ( + "github.com/hashicorp/nomad/nomad/structs" + "testing" + "time" +) + +func TestTaskRunner_ServiceRestartCounter(t *testing.T) { + interval := 2 * time.Minute + delay := 1 * time.Second + attempts := 3 + rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: attempts, Interval: interval, Delay: delay}) + + for i := 0; i < attempts; i++ { + actual, when := rt.nextRestart() + if !actual { + t.Fatalf("should restart returned %v, actual %v", actual, true) + } + if when != delay { + t.Fatalf("nextRestart() returned %v; want %v", when, delay) + } + } + + time.Sleep(1 * time.Second) + for i := 0; i < 3; i++ { + actual, when := rt.nextRestart() + if !actual { + t.Fail() + } + if !(when > delay && when < interval) { + t.Fatalf("nextRestart() returned %v; want less than %v and more than %v", when, interval, delay) + } + } + +} + +func TestTaskRunner_BatchRestartCounter(t *testing.T) { + attempts := 2 + interval := 1 * time.Second + delay := 1 * time.Second + rt := newRestartTracker(structs.JobTypeBatch, + &structs.RestartPolicy{Attempts: attempts, + Interval: interval, + Delay: delay, + }, + ) + for i := 0; i < attempts; i++ { + shouldRestart, when := rt.nextRestart() + if !shouldRestart { + t.Fatalf("should restart returned %v, actual %v", shouldRestart, true) + } + if when != delay { + t.Fatalf("Delay should be %v, actual: %v", delay, when) + } + } + actual, _ := rt.nextRestart() + if actual { + t.Fatalf("Expect %v, Actual: %v", false, actual) + } +} diff --git a/client/task_runner.go b/client/task_runner.go index 14a45ffc3..a59c72fb8 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "sync" + "time" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver" @@ -16,11 +17,12 @@ import ( // TaskRunner is used to wrap a task within an allocation and provide the execution context. type TaskRunner struct { - config *config.Config - updater TaskStateUpdater - logger *log.Logger - ctx *driver.ExecContext - allocID string + config *config.Config + updater TaskStateUpdater + logger *log.Logger + ctx *driver.ExecContext + allocID string + restartTracker restartTracker task *structs.Task updateCh chan *structs.Task @@ -44,17 +46,19 @@ type TaskStateUpdater func(taskName, status, desc string) // NewTaskRunner is used to create a new task context func NewTaskRunner(logger *log.Logger, config *config.Config, updater TaskStateUpdater, ctx *driver.ExecContext, - allocID string, task *structs.Task) *TaskRunner { + allocID string, task *structs.Task, restartTracker restartTracker) *TaskRunner { + tc := &TaskRunner{ - config: config, - updater: updater, - logger: logger, - ctx: ctx, - allocID: allocID, - task: task, - updateCh: make(chan *structs.Task, 8), - destroyCh: make(chan struct{}), - waitCh: make(chan struct{}), + config: config, + updater: updater, + logger: logger, + restartTracker: restartTracker, + ctx: ctx, + allocID: allocID, + task: task, + updateCh: make(chan *structs.Task, 8), + destroyCh: make(chan struct{}), + waitCh: make(chan struct{}), } return tc } @@ -164,6 +168,7 @@ func (r *TaskRunner) startTask() error { // Run is a long running routine used to manage the task func (r *TaskRunner) Run() { + var err error defer close(r.waitCh) r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')", r.task.Name, r.allocID) @@ -175,25 +180,56 @@ func (r *TaskRunner) Run() { } } + // Monitoring the Driver + err = r.monitorDriver(r.handle.WaitCh(), r.updateCh, r.destroyCh) + for err != nil { + r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", + r.task.Name, r.allocID, err) + shouldRestart, when := r.restartTracker.nextRestart() + if !shouldRestart { + r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.allocID) + r.setStatus(structs.AllocClientStatusDead, fmt.Sprintf("task failed with: %v", err)) + break + } + + r.logger.Printf("[INFO] client: Restarting Task: %v", r.task.Name) + r.setStatus(structs.AllocClientStatusPending, "Task Restarting") + r.logger.Printf("[DEBUG] client: Sleeping for %v before restarting Task %v", when, r.task.Name) + select { + case <-time.After(when): + case <-r.destroyCh: + } + r.destroyLock.Lock() + if r.destroy { + r.logger.Printf("[DEBUG] client: Not restarting task: %v because it's destroyed by user", r.task.Name) + break + } + if err = r.startTask(); err != nil { + r.destroyLock.Unlock() + continue + } + r.destroyLock.Unlock() + err = r.monitorDriver(r.handle.WaitCh(), r.updateCh, r.destroyCh) + } + + // Cleanup after ourselves + r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", r.task.Name, r.allocID) + r.setStatus(structs.AllocClientStatusDead, "task completed") + + r.DestroyState() +} + +// This functions listens to messages from the driver and blocks until the +// driver exits +func (r *TaskRunner) monitorDriver(waitCh chan error, updateCh chan *structs.Task, destroyCh chan struct{}) error { + var err error OUTER: // Wait for updates for { select { - case err := <-r.handle.WaitCh(): - if err != nil { - r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", - r.task.Name, r.allocID, err) - r.setStatus(structs.AllocClientStatusDead, - fmt.Sprintf("task failed with: %v", err)) - } else { - r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", - r.task.Name, r.allocID) - r.setStatus(structs.AllocClientStatusDead, - "task completed") - } + case err = <-waitCh: break OUTER - - case update := <-r.updateCh: + case update := <-updateCh: // Update r.task = update if err := r.handle.Update(update); err != nil { @@ -201,7 +237,7 @@ OUTER: r.task.Name, r.allocID, err) } - case <-r.destroyCh: + case <-destroyCh: // Send the kill signal, and use the WaitCh to block until complete if err := r.handle.Kill(); err != nil { r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc '%s': %v", @@ -209,9 +245,7 @@ OUTER: } } } - - // Cleanup after ourselves - r.DestroyState() + return err } // Update is used to update the task of the context diff --git a/client/task_runner_test.go b/client/task_runner_test.go index e173b9176..17709316f 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -51,8 +51,10 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) { allocDir := allocdir.NewAllocDir(filepath.Join(conf.AllocDir, alloc.ID)) allocDir.Build([]*structs.Task{task}) - ctx := driver.NewExecContext(allocDir) - tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task) + ctx := driver.NewExecContext(allocDir, alloc.ID) + rp := structs.NewRestartPolicy(structs.JobTypeService) + restartTracker := newRestartTracker(structs.JobTypeService, rp) + tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, restartTracker) return upd, tr } diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go index d73d62f33..996fca131 100644 --- a/client/testutil/driver_compatible.go +++ b/client/testutil/driver_compatible.go @@ -8,8 +8,14 @@ import ( ) func ExecCompatible(t *testing.T) { - if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { - t.Skip("Must be root on non-windows environments to run test") + if runtime.GOOS != "linux" || syscall.Geteuid() != 0 { + t.Skip("Test only available running as root on linux") + } +} + +func JavaCompatible(t *testing.T) { + if runtime.GOOS == "linux" && syscall.Geteuid() != 0 { + t.Skip("Test only available when running as root on linux") } } diff --git a/command/init.go b/command/init.go index 0b9be934b..356337ae8 100644 --- a/command/init.go +++ b/command/init.go @@ -104,6 +104,17 @@ job "example" { # Defaults to 1 # count = 1 + # Restart Policy - This block defines the restart policy for TaskGroups, + # the attempts value defines the number of restarts Nomad will do if Tasks + # in this TaskGroup fails in a rolling window of interval duration + # The delay value makes Nomad wait for that duration to restart after a Task + # fails or crashes. + restart { + interval = "5m" + attempts = 10 + delay = "25s" + } + # Define a task to run task "redis" { # Use Docker to run the task. diff --git a/command/spawn_daemon.go b/command/spawn_daemon.go index 3ca825d41..52ffd8e6c 100644 --- a/command/spawn_daemon.go +++ b/command/spawn_daemon.go @@ -2,19 +2,19 @@ package command import ( "encoding/json" + "fmt" + "io" "os" + "os/exec" + "strconv" "strings" + "syscall" ) type SpawnDaemonCommand struct { Meta -} - -// Status of executing the user's command. -type SpawnStartStatus struct { - // ErrorMsg will be empty if the user command was started successfully. - // Otherwise it will have an error message. - ErrorMsg string + config *DaemonConfig + exitFile io.WriteCloser } func (c *SpawnDaemonCommand) Help() string { @@ -23,15 +23,15 @@ Usage: nomad spawn-daemon [options] INTERNAL ONLY - Spawns a daemon process optionally inside a cgroup. The required daemon_config is a json - encoding of the DaemonConfig struct containing the isolation configuration and command to run. - SpawnStartStatus is json serialized to Stdout upon running the user command or if any error - prevents its execution. If there is no error, the process waits on the users - command and then json serializes SpawnExitStatus to Stdout after its termination. - -General Options: - - ` + generalOptionsUsage() + Spawns a daemon process by double forking. The required daemon_config is a + json encoding of the DaemonConfig struct containing the isolation + configuration and command to run. SpawnStartStatus is json serialized to + stdout upon running the user command or if any error prevents its execution. + If there is no error, the process waits on the users command. Once the user + command exits, the exit code is written to a file specified in the + daemon_config and this process exits with the same exit status as the user + command. + ` return strings.TrimSpace(helpText) } @@ -40,6 +40,154 @@ func (c *SpawnDaemonCommand) Synopsis() string { return "Spawn a daemon command with configurable isolation." } +// Status of executing the user's command. +type SpawnStartStatus struct { + // The PID of the user's command. + UserPID int + + // ErrorMsg will be empty if the user command was started successfully. + // Otherwise it will have an error message. + ErrorMsg string +} + +// Exit status of the user's command. +type SpawnExitStatus struct { + // The exit code of the user's command. + ExitCode int +} + +// Configuration for the command to start as a daemon. +type DaemonConfig struct { + exec.Cmd + + // The filepath to write the exit status to. + ExitStatusFile string + + // The paths, if not /dev/null, must be either in the tasks root directory + // or in the shared alloc directory. + StdoutFile string + StdinFile string + StderrFile string + + // An optional path specifying the directory to chroot the process in. + Chroot string +} + +// Whether to start the user command or abort. +type TaskStart bool + +// parseConfig reads the DaemonConfig from the passed arguments. If not +// successful, an error is returned. +func (c *SpawnDaemonCommand) parseConfig(args []string) (*DaemonConfig, error) { + flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + if err := flags.Parse(args); err != nil { + return nil, fmt.Errorf("failed to parse args: %v", err) + } + + // Check that we got json input. + args = flags.Args() + if len(args) != 1 { + return nil, fmt.Errorf("incorrect number of args; got %v; want 1", len(args)) + } + jsonInput, err := strconv.Unquote(args[0]) + if err != nil { + return nil, fmt.Errorf("Failed to unquote json input: %v", err) + } + + // De-serialize the passed command. + var config DaemonConfig + dec := json.NewDecoder(strings.NewReader(jsonInput)) + if err := dec.Decode(&config); err != nil { + return nil, err + } + + return &config, nil +} + +// configureLogs creates the log files and redirects the process +// stdin/stderr/stdout to them. If unsuccessful, an error is returned. +func (c *SpawnDaemonCommand) configureLogs() error { + if len(c.config.StdoutFile) != 0 { + stdo, err := os.OpenFile(c.config.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) + if err != nil { + return fmt.Errorf("Error opening file to redirect stdout: %v", err) + } + + c.config.Cmd.Stdout = stdo + } + + if len(c.config.StderrFile) != 0 { + stde, err := os.OpenFile(c.config.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) + if err != nil { + return fmt.Errorf("Error opening file to redirect stderr: %v", err) + } + c.config.Cmd.Stderr = stde + } + + if len(c.config.StdinFile) != 0 { + stdi, err := os.OpenFile(c.config.StdinFile, os.O_CREATE|os.O_RDONLY, 0666) + if err != nil { + return fmt.Errorf("Error opening file to redirect stdin: %v", err) + } + c.config.Cmd.Stdin = stdi + } + + return nil +} + +func (c *SpawnDaemonCommand) Run(args []string) int { + var err error + c.config, err = c.parseConfig(args) + if err != nil { + return c.outputStartStatus(err, 1) + } + + // Open the file we will be using to write exit codes to. We do this early + // to ensure that we don't start the user process when we can't capture its + // exit status. + c.exitFile, err = os.OpenFile(c.config.ExitStatusFile, os.O_WRONLY, 0666) + if err != nil { + return c.outputStartStatus(fmt.Errorf("Error opening file to store exit status: %v", err), 1) + } + + // Isolate the user process. + if err := c.isolateCmd(); err != nil { + return c.outputStartStatus(err, 1) + } + + // Redirect logs. + if err := c.configureLogs(); err != nil { + return c.outputStartStatus(err, 1) + } + + // Chroot jail the process and set its working directory. + c.configureChroot() + + // Wait to get the start command. + var start TaskStart + dec := json.NewDecoder(os.Stdin) + if err := dec.Decode(&start); err != nil { + return c.outputStartStatus(err, 1) + } + + // Aborted by Nomad process. + if !start { + return 0 + } + + // Spawn the user process. + if err := c.config.Cmd.Start(); err != nil { + return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1) + } + + // Indicate that the command was started successfully. + c.outputStartStatus(nil, 0) + + // Wait and then output the exit status. + return c.writeExitStatus(c.config.Cmd.Wait()) +} + // outputStartStatus is a helper function that outputs a SpawnStartStatus to // Stdout with the passed error, which may be nil to indicate no error. It // returns the passed status. @@ -51,6 +199,36 @@ func (c *SpawnDaemonCommand) outputStartStatus(err error, status int) int { startStatus.ErrorMsg = err.Error() } + if c.config != nil && c.config.Cmd.Process != nil { + startStatus.UserPID = c.config.Process.Pid + } + enc.Encode(startStatus) return status } + +// writeExitStatus takes in the error result from calling wait and writes out +// the exit status to a file. It returns the same exit status as the user +// command. +func (c *SpawnDaemonCommand) writeExitStatus(exit error) int { + // Parse the exit code. + exitStatus := &SpawnExitStatus{} + if exit != nil { + // Default to exit code 1 if we can not get the actual exit code. + exitStatus.ExitCode = 1 + + if exiterr, ok := exit.(*exec.ExitError); ok { + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + exitStatus.ExitCode = status.ExitStatus() + } + } + } + + if c.exitFile != nil { + enc := json.NewEncoder(c.exitFile) + enc.Encode(exitStatus) + c.exitFile.Close() + } + + return exitStatus.ExitCode +} diff --git a/command/spawn_daemon_darwin.go b/command/spawn_daemon_darwin.go new file mode 100644 index 000000000..f3fe8484a --- /dev/null +++ b/command/spawn_daemon_darwin.go @@ -0,0 +1,4 @@ +package command + +// No chroot on darwin. +func (c *SpawnDaemonCommand) configureChroot() {} diff --git a/command/spawn_daemon_linux.go b/command/spawn_daemon_linux.go index 3e9ceaa3e..512ec645f 100644 --- a/command/spawn_daemon_linux.go +++ b/command/spawn_daemon_linux.go @@ -1,115 +1,16 @@ package command -import ( - "encoding/json" - "fmt" - "os" - "os/exec" - "strconv" - "strings" - "syscall" -) +import "syscall" -// Configuration for the command to start as a daemon. -type DaemonConfig struct { - exec.Cmd +// configureChroot enters the user command into a chroot if specified in the +// config and on an OS that supports Chroots. +func (c *SpawnDaemonCommand) configureChroot() { + if len(c.config.Chroot) != 0 { + if c.config.Cmd.SysProcAttr == nil { + c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{} + } - // The paths, if not /dev/null, must be either in the tasks root directory - // or in the shared alloc directory. - StdoutFile string - StdinFile string - StderrFile string - - Chroot string -} - -// Whether to start the user command or abort. -type TaskStart bool - -func (c *SpawnDaemonCommand) Run(args []string) int { - flags := c.Meta.FlagSet("spawn-daemon", FlagSetClient) - flags.Usage = func() { c.Ui.Output(c.Help()) } - - if err := flags.Parse(args); err != nil { - return 1 - } - - // Check that we got json input. - args = flags.Args() - if len(args) != 1 { - c.Ui.Error(c.Help()) - return 1 - } - jsonInput, err := strconv.Unquote(args[0]) - if err != nil { - return c.outputStartStatus(fmt.Errorf("Failed to unquote json input: %v", err), 1) - } - - // De-serialize the passed command. - var cmd DaemonConfig - dec := json.NewDecoder(strings.NewReader(jsonInput)) - if err := dec.Decode(&cmd); err != nil { - return c.outputStartStatus(err, 1) - } - - // Isolate the user process. - if _, err := syscall.Setsid(); err != nil { - return c.outputStartStatus(fmt.Errorf("Failed setting sid: %v", err), 1) - } - - syscall.Umask(0) - - // Redirect logs. - stdo, err := os.OpenFile(cmd.StdoutFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) - if err != nil { - return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdout: %v", err), 1) - } - - stde, err := os.OpenFile(cmd.StderrFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) - if err != nil { - return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stderr: %v", err), 1) - } - - stdi, err := os.OpenFile(cmd.StdinFile, os.O_CREATE|os.O_RDONLY, 0666) - if err != nil { - return c.outputStartStatus(fmt.Errorf("Error opening file to redirect Stdin: %v", err), 1) - } - - cmd.Cmd.Stdout = stdo - cmd.Cmd.Stderr = stde - cmd.Cmd.Stdin = stdi - - // Chroot jail the process and set its working directory. - if cmd.Cmd.SysProcAttr == nil { - cmd.Cmd.SysProcAttr = &syscall.SysProcAttr{} - } - - cmd.Cmd.SysProcAttr.Chroot = cmd.Chroot - cmd.Cmd.Dir = "/" - - // Wait to get the start command. - var start TaskStart - dec = json.NewDecoder(os.Stdin) - if err := dec.Decode(&start); err != nil { - return c.outputStartStatus(err, 1) - } - - if !start { - return 0 - } - - // Spawn the user process. - if err := cmd.Cmd.Start(); err != nil { - return c.outputStartStatus(fmt.Errorf("Error starting user command: %v", err), 1) - } - - // Indicate that the command was started successfully. - c.outputStartStatus(nil, 0) - - // Wait and then output the exit status. - if err := cmd.Wait(); err != nil { - return 1 - } - - return 0 + c.config.Cmd.SysProcAttr.Chroot = c.config.Chroot + c.config.Cmd.Dir = "/" + } } diff --git a/command/spawn_daemon_test.go b/command/spawn_daemon_test.go new file mode 100644 index 000000000..5bfd6ad5a --- /dev/null +++ b/command/spawn_daemon_test.go @@ -0,0 +1,48 @@ +package command + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "os/exec" + "testing" +) + +type nopCloser struct { + io.ReadWriter +} + +func (n *nopCloser) Close() error { + return nil +} + +func TestSpawnDaemon_WriteExitStatus(t *testing.T) { + // Check if there is python. + path, err := exec.LookPath("python") + if err != nil { + t.Skip("python not detected") + } + + var b bytes.Buffer + daemon := &SpawnDaemonCommand{exitFile: &nopCloser{&b}} + + code := 3 + cmd := exec.Command(path, "./test-resources/exiter.py", fmt.Sprintf("%d", code)) + err = cmd.Run() + actual := daemon.writeExitStatus(err) + if actual != code { + t.Fatalf("writeExitStatus(%v) returned %v; want %v", err, actual, code) + } + + // De-serialize the passed command. + var exitStatus SpawnExitStatus + dec := json.NewDecoder(&b) + if err := dec.Decode(&exitStatus); err != nil { + t.Fatalf("failed to decode exit status: %v", err) + } + + if exitStatus.ExitCode != code { + t.Fatalf("writeExitStatus(%v) wrote exit status %v; want %v", err, exitStatus.ExitCode, code) + } +} diff --git a/command/spawn_daemon_universal.go b/command/spawn_daemon_universal.go deleted file mode 100644 index 5083af5f3..000000000 --- a/command/spawn_daemon_universal.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build !linux - -package command - -import "errors" - -func (c *SpawnDaemonCommand) Run(args []string) int { - return c.outputStartStatus(errors.New("spawn-daemon not supported"), 1) -} diff --git a/command/spawn_daemon_unix.go b/command/spawn_daemon_unix.go new file mode 100644 index 000000000..981e52596 --- /dev/null +++ b/command/spawn_daemon_unix.go @@ -0,0 +1,16 @@ +// +build !windows + +package command + +import "syscall" + +// isolateCmd sets the session id for the process and the umask. +func (c *SpawnDaemonCommand) isolateCmd() error { + if c.config.Cmd.SysProcAttr == nil { + c.config.Cmd.SysProcAttr = &syscall.SysProcAttr{} + } + + c.config.Cmd.SysProcAttr.Setsid = true + syscall.Umask(0) + return nil +} diff --git a/command/spawn_daemon_windows.go b/command/spawn_daemon_windows.go new file mode 100644 index 000000000..bb2d63ed8 --- /dev/null +++ b/command/spawn_daemon_windows.go @@ -0,0 +1,7 @@ +// build !linux !darwin + +package command + +// No isolation on Windows. +func (c *SpawnDaemonCommand) isolateCmd() error { return nil } +func (c *SpawnDaemonCommand) configureChroot() {} diff --git a/command/test-resources/exiter.py b/command/test-resources/exiter.py new file mode 100644 index 000000000..90e66b98c --- /dev/null +++ b/command/test-resources/exiter.py @@ -0,0 +1,3 @@ +import sys + +sys.exit(int(sys.argv[1])) diff --git a/helper/discover/discover.go b/helper/discover/discover.go index d90ddb4cc..8582a0133 100644 --- a/helper/discover/discover.go +++ b/helper/discover/discover.go @@ -3,18 +3,21 @@ package discover import ( "fmt" "os" + "os/exec" "path/filepath" + "runtime" "github.com/kardianos/osext" ) -const ( - nomadExe = "nomad" -) - // Checks the current executable, then $GOPATH/bin, and finally the CWD, in that // order. If it can't be found, an error is returned. func NomadExecutable() (string, error) { + nomadExe := "nomad" + if runtime.GOOS == "windows" { + nomadExe = "nomad.exe" + } + // Check the current executable. bin, err := osext.Executable() if err != nil { @@ -25,6 +28,11 @@ func NomadExecutable() (string, error) { return bin, nil } + // Check the $PATH + if bin, err := exec.LookPath(nomadExe); err == nil { + return bin, nil + } + // Check the $GOPATH. bin = filepath.Join(os.Getenv("GOPATH"), "bin", nomadExe) if _, err := os.Stat(bin); err == nil { diff --git a/jobspec/parse.go b/jobspec/parse.go index f63ac5294..77f9b819f 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -124,7 +124,7 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error { } } - // If we have tasks outside, do those + // If we have tasks outside, create TaskGroups for them if o := obj.Get("task", false); o != nil { var tasks []*structs.Task if err := parseTasks(&tasks, o); err != nil { @@ -134,9 +134,10 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error { result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2) for i, t := range tasks { result.TaskGroups[i] = &structs.TaskGroup{ - Name: t.Name, - Count: 1, - Tasks: []*structs.Task{t}, + Name: t.Name, + Count: 1, + Tasks: []*structs.Task{t}, + RestartPolicy: structs.NewRestartPolicy(result.Type), } } } @@ -180,6 +181,7 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { delete(m, "constraint") delete(m, "meta") delete(m, "task") + delete(m, "restart") // Default count to 1 if not specified if _, ok := m["count"]; !ok { @@ -199,6 +201,11 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { return err } } + g.RestartPolicy = structs.NewRestartPolicy(result.Type) + + if err := parseRestartPolicy(g.RestartPolicy, o); err != nil { + return err + } // Parse out meta fields. These are in HCL as a list so we need // to iterate over them and merge them. @@ -228,6 +235,42 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { return nil } +func parseRestartPolicy(result *structs.RestartPolicy, obj *hclobj.Object) error { + var restartHclObj *hclobj.Object + var m map[string]interface{} + if restartHclObj = obj.Get("restart", false); restartHclObj == nil { + return nil + } + if err := hcl.DecodeObject(&m, restartHclObj); err != nil { + return err + } + + if delay, ok := m["delay"]; ok { + d, err := toDuration(delay) + if err != nil { + return fmt.Errorf("Invalid Delay time in restart policy: %v", err) + } + result.Delay = d + } + + if interval, ok := m["interval"]; ok { + i, err := toDuration(interval) + if err != nil { + return fmt.Errorf("Invalid Interval time in restart policy: %v", err) + } + result.Interval = i + } + + if attempts, ok := m["attempts"]; ok { + a, err := toInteger(attempts) + if err != nil { + return fmt.Errorf("Invalid value in attempts: %v", err) + } + result.Attempts = a + } + return nil +} + func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error { for _, o := range obj.Elem(false) { var m map[string]interface{} @@ -455,19 +498,11 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error { } for _, key := range []string{"stagger", "Stagger"} { if raw, ok := m[key]; ok { - switch v := raw.(type) { - case string: - dur, err := time.ParseDuration(v) - if err != nil { - return fmt.Errorf("invalid stagger time '%s'", raw) - } - m[key] = dur - case int: - m[key] = time.Duration(v) * time.Second - default: - return fmt.Errorf("invalid type for stagger time '%s'", - raw) + staggerTime, err := toDuration(raw) + if err != nil { + return fmt.Errorf("Invalid stagger time: %v", err) } + m[key] = staggerTime } } @@ -477,3 +512,35 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error { } return nil } + +func toDuration(value interface{}) (time.Duration, error) { + var dur time.Duration + var err error + switch v := value.(type) { + case string: + dur, err = time.ParseDuration(v) + case int: + dur = time.Duration(v) * time.Second + default: + err = fmt.Errorf("Invalid time %s", value) + } + + return dur, err +} + +func toInteger(value interface{}) (int, error) { + var integer int + var err error + switch v := value.(type) { + case string: + var i int64 + i, err = strconv.ParseInt(v, 10, 32) + integer = int(i) + case int: + integer = v + default: + err = fmt.Errorf("Value: %v can't be parsed into int", value) + } + + return integer, err +} diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index f91789ddb..e785443b7 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -48,6 +48,11 @@ func TestParse(t *testing.T) { &structs.TaskGroup{ Name: "outside", Count: 1, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 2, + Interval: 1 * time.Minute, + Delay: 15 * time.Second, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "outside", @@ -77,6 +82,11 @@ func TestParse(t *testing.T) { "elb_interval": "10", "elb_checks": "3", }, + RestartPolicy: &structs.RestartPolicy{ + Interval: 10 * time.Minute, + Attempts: 5, + Delay: 15 * time.Second, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "binstore", diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl index 941272b2d..bf81a6ae7 100644 --- a/jobspec/test-fixtures/basic.hcl +++ b/jobspec/test-fixtures/basic.hcl @@ -31,6 +31,11 @@ job "binstore-storagelocker" { group "binsl" { count = 5 + restart { + attempts = 5 + interval = "10m" + delay = "15s" + } task "binstore" { driver = "docker" config { diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index 53b630480..c07d5549d 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -5,6 +5,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" ) // Alloc endpoint is used for manipulating allocations @@ -19,35 +20,45 @@ func (a *Alloc) List(args *structs.AllocListRequest, reply *structs.AllocListRes } defer metrics.MeasureSince([]string{"nomad", "alloc", "list"}, time.Now()) - // Capture all the allocations - snap, err := a.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.Allocs() - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Table: "allocs"}), + run: func() error { + // Capture all the allocations + snap, err := a.srv.fsm.State().Snapshot() + if err != nil { + return err + } + iter, err := snap.Allocs() + if err != nil { + return err + } - for { - raw := iter.Next() - if raw == nil { - break - } - alloc := raw.(*structs.Allocation) - reply.Allocations = append(reply.Allocations, alloc.Stub()) - } + var allocs []*structs.AllocListStub + for { + raw := iter.Next() + if raw == nil { + break + } + alloc := raw.(*structs.Allocation) + allocs = append(allocs, alloc.Stub()) + } + reply.Allocations = allocs - // Use the last index that affected the jobs table - index, err := snap.Index("allocs") - if err != nil { - return err - } - reply.Index = index + // Use the last index that affected the jobs table + index, err := snap.Index("allocs") + if err != nil { + return err + } + reply.Index = index - // Set the query response - a.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + a.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return a.srv.blockingRPC(&opts) } // GetAlloc is used to lookup a particular allocation @@ -58,30 +69,38 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest, } defer metrics.MeasureSince([]string{"nomad", "alloc", "get_alloc"}, time.Now()) - // Lookup the allocation - snap, err := a.srv.fsm.State().Snapshot() - if err != nil { - return err - } - out, err := snap.AllocByID(args.AllocID) - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Alloc: args.AllocID}), + run: func() error { + // Lookup the allocation + snap, err := a.srv.fsm.State().Snapshot() + if err != nil { + return err + } + out, err := snap.AllocByID(args.AllocID) + if err != nil { + return err + } - // Setup the output - if out != nil { - reply.Alloc = out - reply.Index = out.ModifyIndex - } else { - // Use the last index that affected the nodes table - index, err := snap.Index("allocs") - if err != nil { - return err - } - reply.Index = index - } + // Setup the output + reply.Alloc = out + if out != nil { + reply.Index = out.ModifyIndex + } else { + // Use the last index that affected the nodes table + index, err := snap.Index("allocs") + if err != nil { + return err + } + reply.Index = index + } - // Set the query response - a.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + a.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return a.srv.blockingRPC(&opts) } diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go index 8076b64d6..bcab0a387 100644 --- a/nomad/alloc_endpoint_test.go +++ b/nomad/alloc_endpoint_test.go @@ -3,6 +3,7 @@ package nomad import ( "reflect" "testing" + "time" "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/nomad/mock" @@ -44,6 +45,74 @@ func TestAllocEndpoint_List(t *testing.T) { } } +func TestAllocEndpoint_List_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the alloc + alloc := mock.Alloc() + + // Upsert alloc triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpsertAllocs(2, []*structs.Allocation{alloc}); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req := &structs.AllocListRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 1, + }, + } + start := time.Now() + var resp structs.AllocListResponse + if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 2 { + t.Fatalf("Bad index: %d %d", resp.Index, 2) + } + if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID { + t.Fatalf("bad: %#v", resp.Allocations) + } + + // Client updates trigger watches + alloc2 := mock.Alloc() + alloc2.ID = alloc.ID + alloc2.ClientStatus = structs.AllocClientStatusRunning + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpdateAllocFromClient(3, alloc2); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.MinQueryIndex = 2 + start = time.Now() + var resp2 structs.AllocListResponse + if err := msgpackrpc.CallWithCodec(codec, "Alloc.List", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) + } + if resp2.Index != 3 { + t.Fatalf("Bad index: %d %d", resp2.Index, 3) + } + if len(resp2.Allocations) != 1 || resp.Allocations[0].ID != alloc.ID || + resp2.Allocations[0].ClientStatus != structs.AllocClientStatusRunning { + t.Fatalf("bad: %#v", resp2.Allocations) + } +} + func TestAllocEndpoint_GetAlloc(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() @@ -75,3 +144,55 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) { t.Fatalf("bad: %#v", resp.Alloc) } } + +func TestAllocEndpoint_GetAlloc_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the allocs + alloc1 := mock.Alloc() + alloc2 := mock.Alloc() + + // First create an unrelated alloc + time.AfterFunc(100*time.Millisecond, func() { + err := state.UpsertAllocs(100, []*structs.Allocation{alloc1}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Create the alloc we are watching later + time.AfterFunc(200*time.Millisecond, func() { + err := state.UpsertAllocs(200, []*structs.Allocation{alloc2}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Lookup the jobs + get := &structs.AllocSpecificRequest{ + AllocID: alloc2.ID, + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + var resp structs.SingleAllocResponse + start := time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Alloc.GetAlloc", get, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 200*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 200 { + t.Fatalf("Bad index: %d %d", resp.Index, 200) + } + if resp.Alloc == nil || resp.Alloc.ID != alloc2.ID { + t.Fatalf("bad: %#v", resp.Alloc) + } +} diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go index 0dce98a52..bc74e85f3 100644 --- a/nomad/eval_endpoint.go +++ b/nomad/eval_endpoint.go @@ -6,6 +6,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" ) const ( @@ -26,32 +27,40 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest, } defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now()) - // Look for the job - snap, err := e.srv.fsm.State().Snapshot() - if err != nil { - return err - } - out, err := snap.EvalByID(args.EvalID) - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Eval: args.EvalID}), + run: func() error { + // Look for the job + snap, err := e.srv.fsm.State().Snapshot() + if err != nil { + return err + } + out, err := snap.EvalByID(args.EvalID) + if err != nil { + return err + } - // Setup the output - if out != nil { - reply.Eval = out - reply.Index = out.ModifyIndex - } else { - // Use the last index that affected the nodes table - index, err := snap.Index("evals") - if err != nil { - return err - } - reply.Index = index - } + // Setup the output + reply.Eval = out + if out != nil { + reply.Index = out.ModifyIndex + } else { + // Use the last index that affected the nodes table + index, err := snap.Index("evals") + if err != nil { + return err + } + reply.Index = index + } - // Set the query response - e.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + e.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return e.srv.blockingRPC(&opts) } // Dequeue is used to dequeue a pending evaluation @@ -219,35 +228,45 @@ func (e *Eval) List(args *structs.EvalListRequest, } defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now()) - // Scan all the evaluations - snap, err := e.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.Evals() - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Table: "evals"}), + run: func() error { + // Scan all the evaluations + snap, err := e.srv.fsm.State().Snapshot() + if err != nil { + return err + } + iter, err := snap.Evals() + if err != nil { + return err + } - for { - raw := iter.Next() - if raw == nil { - break - } - eval := raw.(*structs.Evaluation) - reply.Evaluations = append(reply.Evaluations, eval) - } + var evals []*structs.Evaluation + for { + raw := iter.Next() + if raw == nil { + break + } + eval := raw.(*structs.Evaluation) + evals = append(evals, eval) + } + reply.Evaluations = evals - // Use the last index that affected the jobs table - index, err := snap.Index("evals") - if err != nil { - return err - } - reply.Index = index + // Use the last index that affected the jobs table + index, err := snap.Index("evals") + if err != nil { + return err + } + reply.Index = index - // Set the query response - e.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + e.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return e.srv.blockingRPC(&opts) } // Allocations is used to list the allocations for an evaluation @@ -258,32 +277,40 @@ func (e *Eval) Allocations(args *structs.EvalSpecificRequest, } defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now()) - // Capture the allocations - snap, err := e.srv.fsm.State().Snapshot() - if err != nil { - return err - } - allocs, err := snap.AllocsByEval(args.EvalID) - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{AllocEval: args.EvalID}), + run: func() error { + // Capture the allocations + snap, err := e.srv.fsm.State().Snapshot() + if err != nil { + return err + } + allocs, err := snap.AllocsByEval(args.EvalID) + if err != nil { + return err + } - // Convert to a stub - if len(allocs) > 0 { - reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs)) - for _, alloc := range allocs { - reply.Allocations = append(reply.Allocations, alloc.Stub()) - } - } + // Convert to a stub + if len(allocs) > 0 { + reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs)) + for _, alloc := range allocs { + reply.Allocations = append(reply.Allocations, alloc.Stub()) + } + } - // Use the last index that affected the allocs table - index, err := snap.Index("allocs") - if err != nil { - return err - } - reply.Index = index + // Use the last index that affected the allocs table + index, err := snap.Index("allocs") + if err != nil { + return err + } + reply.Index = index - // Set the query response - e.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + e.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return e.srv.blockingRPC(&opts) } diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go index eb61ea3d0..55782a031 100644 --- a/nomad/eval_endpoint_test.go +++ b/nomad/eval_endpoint_test.go @@ -51,6 +51,83 @@ func TestEvalEndpoint_GetEval(t *testing.T) { } } +func TestEvalEndpoint_GetEval_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the evals + eval1 := mock.Eval() + eval2 := mock.Eval() + + // First create an unrelated eval + time.AfterFunc(100*time.Millisecond, func() { + err := state.UpsertEvals(100, []*structs.Evaluation{eval1}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Upsert the eval we are watching later + time.AfterFunc(200*time.Millisecond, func() { + err := state.UpsertEvals(200, []*structs.Evaluation{eval2}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Lookup the eval + req := &structs.EvalSpecificRequest{ + EvalID: eval2.ID, + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + var resp structs.SingleEvalResponse + start := time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 200*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 200 { + t.Fatalf("Bad index: %d %d", resp.Index, 200) + } + if resp.Eval == nil || resp.Eval.ID != eval2.ID { + t.Fatalf("bad: %#v", resp.Eval) + } + + // Eval delete triggers watches + time.AfterFunc(100*time.Millisecond, func() { + err := state.DeleteEval(300, []string{eval2.ID}, []string{}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.QueryOptions.MinQueryIndex = 250 + var resp2 structs.SingleEvalResponse + start = time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) + } + if resp2.Index != 300 { + t.Fatalf("Bad index: %d %d", resp2.Index, 300) + } + if resp2.Eval != nil { + t.Fatalf("bad: %#v", resp2.Eval) + } +} + func TestEvalEndpoint_Dequeue(t *testing.T) { s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue @@ -334,6 +411,70 @@ func TestEvalEndpoint_List(t *testing.T) { } } +func TestEvalEndpoint_List_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the ieval + eval := mock.Eval() + + // Upsert eval triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpsertEvals(2, []*structs.Evaluation{eval}); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req := &structs.EvalListRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 1, + }, + } + start := time.Now() + var resp structs.EvalListResponse + if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 2 { + t.Fatalf("Bad index: %d %d", resp.Index, 2) + } + if len(resp.Evaluations) != 1 || resp.Evaluations[0].ID != eval.ID { + t.Fatalf("bad: %#v", resp.Evaluations) + } + + // Eval deletion triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.DeleteEval(3, []string{eval.ID}, nil); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.MinQueryIndex = 2 + start = time.Now() + var resp2 structs.EvalListResponse + if err := msgpackrpc.CallWithCodec(codec, "Eval.List", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) + } + if resp2.Index != 3 { + t.Fatalf("Bad index: %d %d", resp2.Index, 3) + } + if len(resp2.Evaluations) != 0 { + t.Fatalf("bad: %#v", resp2.Evaluations) + } +} + func TestEvalEndpoint_Allocations(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() @@ -368,3 +509,55 @@ func TestEvalEndpoint_Allocations(t *testing.T) { t.Fatalf("bad: %#v", resp.Allocations) } } + +func TestEvalEndpoint_Allocations_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the allocs + alloc1 := mock.Alloc() + alloc2 := mock.Alloc() + + // Upsert an unrelated alloc first + time.AfterFunc(100*time.Millisecond, func() { + err := state.UpsertAllocs(100, []*structs.Allocation{alloc1}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Upsert an alloc which will trigger the watch later + time.AfterFunc(200*time.Millisecond, func() { + err := state.UpsertAllocs(200, []*structs.Allocation{alloc2}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Lookup the eval + get := &structs.EvalSpecificRequest{ + EvalID: alloc2.EvalID, + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + var resp structs.EvalAllocationsResponse + start := time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Eval.Allocations", get, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 200*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 200 { + t.Fatalf("Bad index: %d %d", resp.Index, 200) + } + if len(resp.Allocations) != 1 || resp.Allocations[0].ID != alloc2.ID { + t.Fatalf("bad: %#v", resp.Allocations) + } +} diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 63d31eb3c..e961428e4 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -6,6 +6,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" ) // Job endpoint is used for job interactions @@ -180,32 +181,41 @@ func (j *Job) GetJob(args *structs.JobSpecificRequest, } defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now()) - // Look for the job - snap, err := j.srv.fsm.State().Snapshot() - if err != nil { - return err - } - out, err := snap.JobByID(args.JobID) - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Job: args.JobID}), + run: func() error { - // Setup the output - if out != nil { - reply.Job = out - reply.Index = out.ModifyIndex - } else { - // Use the last index that affected the nodes table - index, err := snap.Index("jobs") - if err != nil { - return err - } - reply.Index = index - } + // Look for the job + snap, err := j.srv.fsm.State().Snapshot() + if err != nil { + return err + } + out, err := snap.JobByID(args.JobID) + if err != nil { + return err + } - // Set the query response - j.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Setup the output + reply.Job = out + if out != nil { + reply.Index = out.ModifyIndex + } else { + // Use the last index that affected the nodes table + index, err := snap.Index("jobs") + if err != nil { + return err + } + reply.Index = index + } + + // Set the query response + j.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return j.srv.blockingRPC(&opts) } // List is used to list the jobs registered in the system @@ -216,35 +226,45 @@ func (j *Job) List(args *structs.JobListRequest, } defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now()) - // Capture all the jobs - snap, err := j.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.Jobs() - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Table: "jobs"}), + run: func() error { + // Capture all the jobs + snap, err := j.srv.fsm.State().Snapshot() + if err != nil { + return err + } + iter, err := snap.Jobs() + if err != nil { + return err + } - for { - raw := iter.Next() - if raw == nil { - break - } - job := raw.(*structs.Job) - reply.Jobs = append(reply.Jobs, job.Stub()) - } + var jobs []*structs.JobListStub + for { + raw := iter.Next() + if raw == nil { + break + } + job := raw.(*structs.Job) + jobs = append(jobs, job.Stub()) + } + reply.Jobs = jobs - // Use the last index that affected the jobs table - index, err := snap.Index("jobs") - if err != nil { - return err - } - reply.Index = index + // Use the last index that affected the jobs table + index, err := snap.Index("jobs") + if err != nil { + return err + } + reply.Index = index - // Set the query response - j.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + j.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return j.srv.blockingRPC(&opts) } // Allocations is used to list the allocations for a job @@ -255,34 +275,43 @@ func (j *Job) Allocations(args *structs.JobSpecificRequest, } defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now()) - // Capture the allocations - snap, err := j.srv.fsm.State().Snapshot() - if err != nil { - return err - } - allocs, err := snap.AllocsByJob(args.JobID) - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{AllocJob: args.JobID}), + run: func() error { + // Capture the allocations + snap, err := j.srv.fsm.State().Snapshot() + if err != nil { + return err + } + allocs, err := snap.AllocsByJob(args.JobID) + if err != nil { + return err + } - // Convert to stubs - if len(allocs) > 0 { - reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs)) - for _, alloc := range allocs { - reply.Allocations = append(reply.Allocations, alloc.Stub()) - } - } + // Convert to stubs + if len(allocs) > 0 { + reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs)) + for _, alloc := range allocs { + reply.Allocations = append(reply.Allocations, alloc.Stub()) + } + } - // Use the last index that affected the allocs table - index, err := snap.Index("allocs") - if err != nil { - return err - } - reply.Index = index + // Use the last index that affected the allocs table + index, err := snap.Index("allocs") + if err != nil { + return err + } + reply.Index = index - // Set the query response - j.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + j.srv.setQueryMeta(&reply.QueryMeta) + return nil + + }} + return j.srv.blockingRPC(&opts) } // Evaluations is used to list the evaluations for a job diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index e43ed3ba2..c12e5b463 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -3,6 +3,7 @@ package nomad import ( "reflect" "testing" + "time" "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/nomad/mock" @@ -363,6 +364,80 @@ func TestJobEndpoint_GetJob(t *testing.T) { } } +func TestJobEndpoint_GetJob_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the jobs + job1 := mock.Job() + job2 := mock.Job() + + // Upsert a job we are not interested in first. + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpsertJob(100, job1); err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Upsert another job later which should trigger the watch. + time.AfterFunc(200*time.Millisecond, func() { + if err := state.UpsertJob(200, job2); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req := &structs.JobSpecificRequest{ + JobID: job2.ID, + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + start := time.Now() + var resp structs.SingleJobResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 200*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 200 { + t.Fatalf("Bad index: %d %d", resp.Index, 200) + } + if resp.Job == nil || resp.Job.ID != job2.ID { + t.Fatalf("bad: %#v", resp.Job) + } + + // Job delete fires watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.DeleteJob(300, job2.ID); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.QueryOptions.MinQueryIndex = 250 + start = time.Now() + + var resp2 structs.SingleJobResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.GetJob", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) + } + if resp2.Index != 300 { + t.Fatalf("Bad index: %d %d", resp2.Index, 300) + } + if resp2.Job != nil { + t.Fatalf("bad: %#v", resp2.Job) + } +} + func TestJobEndpoint_ListJobs(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() @@ -397,6 +472,70 @@ func TestJobEndpoint_ListJobs(t *testing.T) { } } +func TestJobEndpoint_ListJobs_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the job + job := mock.Job() + + // Upsert job triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpsertJob(100, job); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req := &structs.JobListRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + start := time.Now() + var resp structs.JobListResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 100 { + t.Fatalf("Bad index: %d %d", resp.Index, 100) + } + if len(resp.Jobs) != 1 || resp.Jobs[0].ID != job.ID { + t.Fatalf("bad: %#v", resp.Jobs) + } + + // Job deletion triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.DeleteJob(200, job.ID); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.MinQueryIndex = 150 + start = time.Now() + var resp2 structs.JobListResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) + } + if resp2.Index != 200 { + t.Fatalf("Bad index: %d %d", resp2.Index, 200) + } + if len(resp2.Jobs) != 0 { + t.Fatalf("bad: %#v", resp2.Jobs) + } +} + func TestJobEndpoint_Allocations(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() @@ -432,6 +571,59 @@ func TestJobEndpoint_Allocations(t *testing.T) { } } +func TestJobEndpoint_Allocations_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + alloc1 := mock.Alloc() + alloc2 := mock.Alloc() + alloc2.JobID = "job1" + state := s1.fsm.State() + + // First upsert an unrelated alloc + time.AfterFunc(100*time.Millisecond, func() { + err := state.UpsertAllocs(100, []*structs.Allocation{alloc1}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Upsert an alloc for the job we are interested in later + time.AfterFunc(200*time.Millisecond, func() { + err := state.UpsertAllocs(200, []*structs.Allocation{alloc2}) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Lookup the jobs + get := &structs.JobSpecificRequest{ + JobID: "job1", + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + var resp structs.JobAllocationsResponse + start := time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Job.Allocations", get, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 200*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 200 { + t.Fatalf("Bad index: %d %d", resp.Index, 200) + } + if len(resp.Allocations) != 1 || resp.Allocations[0].JobID != "job1" { + t.Fatalf("bad: %#v", resp.Allocations) + } +} + func TestJobEndpoint_Evaluations(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 87c426dce..329ecd872 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -1,6 +1,9 @@ package mock -import "github.com/hashicorp/nomad/nomad/structs" +import ( + "github.com/hashicorp/nomad/nomad/structs" + "time" +) func Node() *structs.Node { node := &structs.Node{ @@ -71,6 +74,11 @@ func Job() *structs.Job { &structs.TaskGroup{ Name: "web", Count: 10, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "web", @@ -131,6 +139,11 @@ func SystemJob() *structs.Job { &structs.TaskGroup{ Name: "web", Count: 1, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "web", diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 9ce14aadd..5bd600380 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -6,6 +6,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" ) // Node endpoint is used for client interactions @@ -282,37 +283,45 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest, } defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) - // Verify the arguments - if args.NodeID == "" { - return fmt.Errorf("missing node ID") - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Node: args.NodeID}), + run: func() error { + // Verify the arguments + if args.NodeID == "" { + return fmt.Errorf("missing node ID") + } - // Look for the node - snap, err := n.srv.fsm.State().Snapshot() - if err != nil { - return err - } - out, err := snap.NodeByID(args.NodeID) - if err != nil { - return err - } + // Look for the node + snap, err := n.srv.fsm.State().Snapshot() + if err != nil { + return err + } + out, err := snap.NodeByID(args.NodeID) + if err != nil { + return err + } - // Setup the output - if out != nil { - reply.Node = out - reply.Index = out.ModifyIndex - } else { - // Use the last index that affected the nodes table - index, err := snap.Index("nodes") - if err != nil { - return err - } - reply.Index = index - } + // Setup the output + reply.Node = out + if out != nil { + reply.Index = out.ModifyIndex + } else { + // Use the last index that affected the nodes table + index, err := snap.Index("nodes") + if err != nil { + return err + } + reply.Index = index + } - // Set the query response - n.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + n.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return n.srv.blockingRPC(&opts) } // GetAllocs is used to request allocations for a specific node @@ -330,9 +339,9 @@ func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, // Setup the blocking query opts := blockingOptions{ - queryOpts: &args.QueryOptions, - queryMeta: &reply.QueryMeta, - allocWatch: args.NodeID, + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{AllocNode: args.NodeID}), run: func() error { // Look for the node snap, err := n.srv.fsm.State().Snapshot() @@ -404,35 +413,45 @@ func (n *Node) List(args *structs.NodeListRequest, } defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) - // Capture all the nodes - snap, err := n.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.Nodes() - if err != nil { - return err - } + // Setup the blocking query + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + watch: watch.NewItems(watch.Item{Table: "nodes"}), + run: func() error { + // Capture all the nodes + snap, err := n.srv.fsm.State().Snapshot() + if err != nil { + return err + } + iter, err := snap.Nodes() + if err != nil { + return err + } - for { - raw := iter.Next() - if raw == nil { - break - } - node := raw.(*structs.Node) - reply.Nodes = append(reply.Nodes, node.Stub()) - } + var nodes []*structs.NodeListStub + for { + raw := iter.Next() + if raw == nil { + break + } + node := raw.(*structs.Node) + nodes = append(nodes, node.Stub()) + } + reply.Nodes = nodes - // Use the last index that affected the jobs table - index, err := snap.Index("nodes") - if err != nil { - return err - } - reply.Index = index + // Use the last index that affected the jobs table + index, err := snap.Index("nodes") + if err != nil { + return err + } + reply.Index = index - // Set the query response - n.srv.setQueryMeta(&reply.QueryMeta) - return nil + // Set the query response + n.srv.setQueryMeta(&reply.QueryMeta) + return nil + }} + return n.srv.blockingRPC(&opts) } // createNodeEvals is used to create evaluations for each alloc on a node. diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 62f4a4959..74b154655 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -371,6 +371,107 @@ func TestClientEndpoint_GetNode(t *testing.T) { } } +func TestClientEndpoint_GetNode_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the node + node1 := mock.Node() + node2 := mock.Node() + + // First create an unrelated node. + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpsertNode(100, node1); err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Upsert the node we are watching later + time.AfterFunc(200*time.Millisecond, func() { + if err := state.UpsertNode(200, node2); err != nil { + t.Fatalf("err: %v", err) + } + }) + + // Lookup the node + req := &structs.NodeSpecificRequest{ + NodeID: node2.ID, + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 50, + }, + } + var resp structs.SingleNodeResponse + start := time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 200*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 200 { + t.Fatalf("Bad index: %d %d", resp.Index, 200) + } + if resp.Node == nil || resp.Node.ID != node2.ID { + t.Fatalf("bad: %#v", resp.Node) + } + + // Node update triggers watches + time.AfterFunc(100*time.Millisecond, func() { + nodeUpdate := mock.Node() + nodeUpdate.ID = node2.ID + nodeUpdate.Status = structs.NodeStatusDown + if err := state.UpsertNode(300, nodeUpdate); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.QueryOptions.MinQueryIndex = 250 + var resp2 structs.SingleNodeResponse + start = time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp2.Index != 300 { + t.Fatalf("Bad index: %d %d", resp2.Index, 300) + } + if resp2.Node == nil || resp2.Node.Status != structs.NodeStatusDown { + t.Fatalf("bad: %#v", resp2.Node) + } + + // Node delete triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.DeleteNode(400, node2.ID); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.QueryOptions.MinQueryIndex = 350 + var resp3 structs.SingleNodeResponse + start = time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp3); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp3.Index != 400 { + t.Fatalf("Bad index: %d %d", resp2.Index, 400) + } + if resp3.Node != nil { + t.Fatalf("bad: %#v", resp3.Node) + } +} + func TestClientEndpoint_GetAllocs(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() @@ -457,16 +558,15 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) { alloc.NodeID = node.ID state := s1.fsm.State() start := time.Now() - go func() { - time.Sleep(100 * time.Millisecond) + time.AfterFunc(100*time.Millisecond, func() { err := state.UpsertAllocs(100, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) } - }() + }) // Lookup the allocs in a blocking query - get := &structs.NodeSpecificRequest{ + req := &structs.NodeSpecificRequest{ NodeID: node.ID, QueryOptions: structs.QueryOptions{ Region: "global", @@ -475,7 +575,7 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) { }, } var resp2 structs.NodeAllocsResponse - if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", get, &resp2); err != nil { + if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp2); err != nil { t.Fatalf("err: %v", err) } @@ -491,6 +591,34 @@ func TestClientEndpoint_GetAllocs_Blocking(t *testing.T) { if len(resp2.Allocs) != 1 || resp2.Allocs[0].ID != alloc.ID { t.Fatalf("bad: %#v", resp2.Allocs) } + + // Alloc updates fire watches + time.AfterFunc(100*time.Millisecond, func() { + allocUpdate := mock.Alloc() + allocUpdate.NodeID = alloc.NodeID + allocUpdate.ID = alloc.ID + allocUpdate.ClientStatus = structs.AllocClientStatusRunning + err := state.UpdateAllocFromClient(200, allocUpdate) + if err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.QueryOptions.MinQueryIndex = 150 + var resp3 structs.NodeAllocsResponse + if err := msgpackrpc.CallWithCodec(codec, "Node.GetAllocs", req, &resp3); err != nil { + t.Fatalf("err: %v", err) + } + + if time.Since(start) < 100*time.Millisecond { + t.Fatalf("too fast") + } + if resp3.Index != 200 { + t.Fatalf("Bad index: %d %d", resp3.Index, 200) + } + if len(resp3.Allocs) != 1 || resp3.Allocs[0].ClientStatus != structs.AllocClientStatusRunning { + t.Fatalf("bad: %#v", resp3.Allocs[0]) + } } func TestClientEndpoint_UpdateAlloc(t *testing.T) { @@ -752,3 +880,115 @@ func TestClientEndpoint_ListNodes(t *testing.T) { t.Fatalf("bad: %#v", resp2.Nodes[0]) } } + +func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + state := s1.fsm.State() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the node + node := mock.Node() + + // Node upsert triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpsertNode(2, node); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req := &structs.NodeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + MinQueryIndex: 1, + }, + } + start := time.Now() + var resp structs.NodeListResponse + if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp) + } + if resp.Index != 2 { + t.Fatalf("Bad index: %d %d", resp.Index, 2) + } + if len(resp.Nodes) != 1 || resp.Nodes[0].ID != node.ID { + t.Fatalf("bad: %#v", resp.Nodes) + } + + // Node drain updates trigger watches. + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpdateNodeDrain(3, node.ID, true); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.MinQueryIndex = 2 + var resp2 structs.NodeListResponse + start = time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) + } + if resp2.Index != 3 { + t.Fatalf("Bad index: %d %d", resp2.Index, 3) + } + if len(resp2.Nodes) != 1 || !resp2.Nodes[0].Drain { + t.Fatalf("bad: %#v", resp2.Nodes) + } + + // Node status update triggers watches + time.AfterFunc(100*time.Millisecond, func() { + if err := state.UpdateNodeStatus(4, node.ID, structs.NodeStatusDown); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.MinQueryIndex = 3 + var resp3 structs.NodeListResponse + start = time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp3); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp3) + } + if resp3.Index != 4 { + t.Fatalf("Bad index: %d %d", resp3.Index, 4) + } + if len(resp3.Nodes) != 1 || resp3.Nodes[0].Status != structs.NodeStatusDown { + t.Fatalf("bad: %#v", resp3.Nodes) + } + + // Node delete triggers watches. + time.AfterFunc(100*time.Millisecond, func() { + if err := state.DeleteNode(5, node.ID); err != nil { + t.Fatalf("err: %v", err) + } + }) + + req.MinQueryIndex = 4 + var resp4 structs.NodeListResponse + start = time.Now() + if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp4); err != nil { + t.Fatalf("err: %v", err) + } + + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { + t.Fatalf("should block (returned in %s) %#v", elapsed, resp4) + } + if resp4.Index != 5 { + t.Fatalf("Bad index: %d %d", resp4.Index, 5) + } + if len(resp4.Nodes) != 0 { + t.Fatalf("bad: %#v", resp4.Nodes) + } +} diff --git a/nomad/rpc.go b/nomad/rpc.go index 074dec0d6..21f9c9dc6 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" "github.com/hashicorp/raft" "github.com/hashicorp/yamux" ) @@ -268,10 +269,10 @@ func (s *Server) setQueryMeta(m *structs.QueryMeta) { // blockingOptions is used to parameterize blockingRPC type blockingOptions struct { - queryOpts *structs.QueryOptions - queryMeta *structs.QueryMeta - allocWatch string - run func() error + queryOpts *structs.QueryOptions + queryMeta *structs.QueryMeta + watch watch.Items + run func() error } // blockingRPC is used for queries that need to wait for a @@ -306,17 +307,13 @@ func (s *Server) blockingRPC(opts *blockingOptions) error { state = s.fsm.State() defer func() { timeout.Stop() - if opts.allocWatch != "" { - state.StopWatchAllocs(opts.allocWatch, notifyCh) - } + state.StopWatch(opts.watch, notifyCh) }() REGISTER_NOTIFY: // Register the notification channel. This may be done // multiple times if we have not reached the target wait index. - if opts.allocWatch != "" { - state.WatchAllocs(opts.allocWatch, notifyCh) - } + state.Watch(opts.watch, notifyCh) RUN_QUERY: // Update the query meta data @@ -327,7 +324,7 @@ RUN_QUERY: err := opts.run() // Check for minimum query time - if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex { + if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex { select { case <-notifyCh: goto REGISTER_NOTIFY diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 22487234b..30ee87259 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -8,8 +8,16 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" ) +// IndexEntry is used with the "index" table +// for managing the latest Raft index affecting a table. +type IndexEntry struct { + Key string + Value uint64 +} + // The StateStore is responsible for maintaining all the Nomad // state. It is manipulated by the FSM which maintains consistency // through the use of Raft. The goals of the StateStore are to provide @@ -23,45 +31,6 @@ type StateStore struct { watch *stateWatch } -// StateSnapshot is used to provide a point-in-time snapshot -type StateSnapshot struct { - StateStore -} - -// StateRestore is used to optimize the performance when -// restoring state by only using a single large transaction -// instead of thousands of sub transactions -type StateRestore struct { - txn *memdb.Txn - watch *stateWatch - allocNodes map[string]struct{} -} - -// Abort is used to abort the restore operation -func (s *StateRestore) Abort() { - s.txn.Abort() -} - -// Commit is used to commit the restore operation -func (s *StateRestore) Commit() { - s.txn.Defer(func() { s.watch.notifyAllocs(s.allocNodes) }) - s.txn.Commit() -} - -// IndexEntry is used with the "index" table -// for managing the latest Raft index affecting a table. -type IndexEntry struct { - Key string - Value uint64 -} - -// stateWatch holds shared state for watching updates. This is -// outside of StateStore so it can be shared with snapshots. -type stateWatch struct { - allocs map[string]*NotifyGroup - allocLock sync.Mutex -} - // NewStateStore is used to create a new state store func NewStateStore(logOutput io.Writer) (*StateStore, error) { // Create the MemDB @@ -70,16 +39,11 @@ func NewStateStore(logOutput io.Writer) (*StateStore, error) { return nil, fmt.Errorf("state store setup failed: %v", err) } - // Create the watch entry - watch := &stateWatch{ - allocs: make(map[string]*NotifyGroup), - } - // Create the state store s := &StateStore{ logger: log.New(logOutput, "", log.LstdFlags), db: db, - watch: watch, + watch: newStateWatch(), } return s, nil } @@ -104,55 +68,21 @@ func (s *StateStore) Snapshot() (*StateSnapshot, error) { func (s *StateStore) Restore() (*StateRestore, error) { txn := s.db.Txn(true) r := &StateRestore{ - txn: txn, - watch: s.watch, - allocNodes: make(map[string]struct{}), + txn: txn, + watch: s.watch, + items: watch.NewItems(), } return r, nil } -// WatchAllocs is used to subscribe a channel to changes in allocations for a node -func (s *StateStore) WatchAllocs(node string, notify chan struct{}) { - s.watch.allocLock.Lock() - defer s.watch.allocLock.Unlock() - - // Check for an existing notify group - if grp, ok := s.watch.allocs[node]; ok { - grp.Wait(notify) - return - } - - // Create new notify group - grp := &NotifyGroup{} - grp.Wait(notify) - s.watch.allocs[node] = grp +// Watch subscribes a channel to a set of watch items. +func (s *StateStore) Watch(items watch.Items, notify chan struct{}) { + s.watch.watch(items, notify) } -// StopWatchAllocs is used to unsubscribe a channel from changes in allocations -func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) { - s.watch.allocLock.Lock() - defer s.watch.allocLock.Unlock() - - // Check for an existing notify group - if grp, ok := s.watch.allocs[node]; ok { - grp.Clear(notify) - if grp.Empty() { - delete(s.watch.allocs, node) - } - } -} - -// notifyAllocs is used to notify any node alloc listeners of a change -func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) { - w.allocLock.Lock() - defer w.allocLock.Unlock() - - for node := range nodes { - if grp, ok := w.allocs[node]; ok { - grp.Notify() - delete(w.allocs, node) - } - } +// StopWatch unsubscribes a channel from a set of watch items. +func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) { + s.watch.stopWatch(items, notify) } // UpsertNode is used to register a node or update a node definition @@ -162,6 +92,10 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "nodes"}) + watcher.Add(watch.Item{Node: node.ID}) + // Check if the node already exists existing, err := txn.First("nodes", "id", node.ID) if err != nil { @@ -187,6 +121,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { return fmt.Errorf("index update failed: %v", err) } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -196,6 +131,10 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error { txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "nodes"}) + watcher.Add(watch.Item{Node: nodeID}) + // Lookup the node existing, err := txn.First("nodes", "id", nodeID) if err != nil { @@ -213,6 +152,7 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error { return fmt.Errorf("index update failed: %v", err) } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -222,6 +162,10 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "nodes"}) + watcher.Add(watch.Item{Node: nodeID}) + // Lookup the node existing, err := txn.First("nodes", "id", nodeID) if err != nil { @@ -248,6 +192,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error return fmt.Errorf("index update failed: %v", err) } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -257,6 +202,10 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "nodes"}) + watcher.Add(watch.Item{Node: nodeID}) + // Lookup the node existing, err := txn.First("nodes", "id", nodeID) if err != nil { @@ -283,6 +232,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er return fmt.Errorf("index update failed: %v", err) } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -319,6 +269,10 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "jobs"}) + watcher.Add(watch.Item{Job: job.ID}) + // Check if the job already exists existing, err := txn.First("jobs", "id", job.ID) if err != nil { @@ -342,6 +296,7 @@ func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { return fmt.Errorf("index update failed: %v", err) } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -351,6 +306,10 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error { txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "jobs"}) + watcher.Add(watch.Item{Job: jobID}) + // Lookup the node existing, err := txn.First("jobs", "id", jobID) if err != nil { @@ -368,6 +327,7 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error { return fmt.Errorf("index update failed: %v", err) } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -417,13 +377,18 @@ func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) erro txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "evals"}) + // Do a nested upsert for _, eval := range evals { + watcher.Add(watch.Item{Eval: eval.ID}) if err := s.nestedUpsertEval(txn, index, eval); err != nil { return err } } + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -459,7 +424,9 @@ func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *struct func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { txn := s.db.Txn(true) defer txn.Abort() - nodes := make(map[string]struct{}) + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "evals"}) + watcher.Add(watch.Item{Table: "allocs"}) for _, eval := range evals { existing, err := txn.First("evals", "id", eval) @@ -472,6 +439,7 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e if err := txn.Delete("evals", existing); err != nil { return fmt.Errorf("eval delete failed: %v", err) } + watcher.Add(watch.Item{Eval: eval}) } for _, alloc := range allocs { @@ -482,10 +450,14 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e if existing == nil { continue } - nodes[existing.(*structs.Allocation).NodeID] = struct{}{} if err := txn.Delete("allocs", existing); err != nil { return fmt.Errorf("alloc delete failed: %v", err) } + realAlloc := existing.(*structs.Allocation) + watcher.Add(watch.Item{Alloc: realAlloc.ID}) + watcher.Add(watch.Item{AllocEval: realAlloc.EvalID}) + watcher.Add(watch.Item{AllocJob: realAlloc.JobID}) + watcher.Add(watch.Item{AllocNode: realAlloc.NodeID}) } // Update the indexes @@ -495,7 +467,8 @@ func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) e if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } - txn.Defer(func() { s.watch.notifyAllocs(nodes) }) + + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -557,6 +530,13 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati txn := s.db.Txn(true) defer txn.Abort() + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "allocs"}) + watcher.Add(watch.Item{Alloc: alloc.ID}) + watcher.Add(watch.Item{AllocEval: alloc.EvalID}) + watcher.Add(watch.Item{AllocJob: alloc.JobID}) + watcher.Add(watch.Item{AllocNode: alloc.NodeID}) + // Look for existing alloc existing, err := txn.First("allocs", "id", alloc.ID) if err != nil { @@ -590,8 +570,7 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati return fmt.Errorf("index update failed: %v", err) } - nodes := map[string]struct{}{alloc.NodeID: struct{}{}} - txn.Defer(func() { s.watch.notifyAllocs(nodes) }) + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -601,7 +580,9 @@ func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocati func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { txn := s.db.Txn(true) defer txn.Abort() - nodes := make(map[string]struct{}) + + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "allocs"}) // Handle the allocations for _, alloc := range allocs { @@ -620,10 +601,14 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er alloc.ClientStatus = exist.ClientStatus alloc.ClientDescription = exist.ClientDescription } - nodes[alloc.NodeID] = struct{}{} if err := txn.Insert("allocs", alloc); err != nil { return fmt.Errorf("alloc insert failed: %v", err) } + + watcher.Add(watch.Item{Alloc: alloc.ID}) + watcher.Add(watch.Item{AllocEval: alloc.EvalID}) + watcher.Add(watch.Item{AllocJob: alloc.JobID}) + watcher.Add(watch.Item{AllocNode: alloc.NodeID}) } // Update the indexes @@ -631,7 +616,7 @@ func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) er return fmt.Errorf("index update failed: %v", err) } - txn.Defer(func() { s.watch.notifyAllocs(nodes) }) + txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil } @@ -753,8 +738,35 @@ func (s *StateStore) Indexes() (memdb.ResultIterator, error) { return iter, nil } +// StateSnapshot is used to provide a point-in-time snapshot +type StateSnapshot struct { + StateStore +} + +// StateRestore is used to optimize the performance when +// restoring state by only using a single large transaction +// instead of thousands of sub transactions +type StateRestore struct { + txn *memdb.Txn + watch *stateWatch + items watch.Items +} + +// Abort is used to abort the restore operation +func (s *StateRestore) Abort() { + s.txn.Abort() +} + +// Commit is used to commit the restore operation +func (s *StateRestore) Commit() { + s.txn.Defer(func() { s.watch.notify(s.items) }) + s.txn.Commit() +} + // NodeRestore is used to restore a node func (r *StateRestore) NodeRestore(node *structs.Node) error { + r.items.Add(watch.Item{Table: "nodes"}) + r.items.Add(watch.Item{Node: node.ID}) if err := r.txn.Insert("nodes", node); err != nil { return fmt.Errorf("node insert failed: %v", err) } @@ -763,6 +775,8 @@ func (r *StateRestore) NodeRestore(node *structs.Node) error { // JobRestore is used to restore a job func (r *StateRestore) JobRestore(job *structs.Job) error { + r.items.Add(watch.Item{Table: "jobs"}) + r.items.Add(watch.Item{Job: job.ID}) if err := r.txn.Insert("jobs", job); err != nil { return fmt.Errorf("job insert failed: %v", err) } @@ -771,6 +785,8 @@ func (r *StateRestore) JobRestore(job *structs.Job) error { // EvalRestore is used to restore an evaluation func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { + r.items.Add(watch.Item{Table: "evals"}) + r.items.Add(watch.Item{Eval: eval.ID}) if err := r.txn.Insert("evals", eval); err != nil { return fmt.Errorf("eval insert failed: %v", err) } @@ -779,7 +795,11 @@ func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { // AllocRestore is used to restore an allocation func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { - r.allocNodes[alloc.NodeID] = struct{}{} + r.items.Add(watch.Item{Table: "allocs"}) + r.items.Add(watch.Item{Alloc: alloc.ID}) + r.items.Add(watch.Item{AllocEval: alloc.EvalID}) + r.items.Add(watch.Item{AllocJob: alloc.JobID}) + r.items.Add(watch.Item{AllocNode: alloc.NodeID}) if err := r.txn.Insert("allocs", alloc); err != nil { return fmt.Errorf("alloc insert failed: %v", err) } @@ -793,3 +813,59 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error { } return nil } + +// stateWatch holds shared state for watching updates. This is +// outside of StateStore so it can be shared with snapshots. +type stateWatch struct { + items map[watch.Item]*NotifyGroup + l sync.Mutex +} + +// newStateWatch creates a new stateWatch for change notification. +func newStateWatch() *stateWatch { + return &stateWatch{ + items: make(map[watch.Item]*NotifyGroup), + } +} + +// watch subscribes a channel to the given watch items. +func (w *stateWatch) watch(items watch.Items, ch chan struct{}) { + w.l.Lock() + defer w.l.Unlock() + + for item, _ := range items { + grp, ok := w.items[item] + if !ok { + grp = new(NotifyGroup) + w.items[item] = grp + } + grp.Wait(ch) + } +} + +// stopWatch unsubscribes a channel from the given watch items. +func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) { + w.l.Lock() + defer w.l.Unlock() + + for item, _ := range items { + if grp, ok := w.items[item]; ok { + grp.Clear(ch) + if grp.Empty() { + delete(w.items, item) + } + } + } +} + +// notify is used to fire notifications on the given watch items. +func (w *stateWatch) notify(items watch.Items) { + w.l.Lock() + defer w.l.Unlock() + + for wi, _ := range items { + if grp, ok := w.items[wi]; ok { + grp.Notify() + } + } +} diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 1c4b60238..5e1021e55 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/watch" ) func testStateStore(t *testing.T) *StateStore { @@ -25,6 +26,11 @@ func TestStateStore_UpsertNode_Node(t *testing.T) { state := testStateStore(t) node := mock.Node() + notify := setupNotifyTest( + state, + watch.Item{Table: "nodes"}, + watch.Item{Node: node.ID}) + err := state.UpsertNode(1000, node) if err != nil { t.Fatalf("err: %v", err) @@ -46,12 +52,19 @@ func TestStateStore_UpsertNode_Node(t *testing.T) { if index != 1000 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_DeleteNode_Node(t *testing.T) { state := testStateStore(t) node := mock.Node() + notify := setupNotifyTest( + state, + watch.Item{Table: "nodes"}, + watch.Item{Node: node.ID}) + err := state.UpsertNode(1000, node) if err != nil { t.Fatalf("err: %v", err) @@ -78,12 +91,19 @@ func TestStateStore_DeleteNode_Node(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_UpdateNodeStatus_Node(t *testing.T) { state := testStateStore(t) node := mock.Node() + notify := setupNotifyTest( + state, + watch.Item{Table: "nodes"}, + watch.Item{Node: node.ID}) + err := state.UpsertNode(1000, node) if err != nil { t.Fatalf("err: %v", err) @@ -113,12 +133,19 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { state := testStateStore(t) node := mock.Node() + notify := setupNotifyTest( + state, + watch.Item{Table: "nodes"}, + watch.Item{Node: node.ID}) + err := state.UpsertNode(1000, node) if err != nil { t.Fatalf("err: %v", err) @@ -148,6 +175,8 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_Nodes(t *testing.T) { @@ -188,18 +217,22 @@ func TestStateStore_Nodes(t *testing.T) { func TestStateStore_RestoreNode(t *testing.T) { state := testStateStore(t) + node := mock.Node() + + notify := setupNotifyTest( + state, + watch.Item{Table: "nodes"}, + watch.Item{Node: node.ID}) restore, err := state.Restore() if err != nil { t.Fatalf("err: %v", err) } - node := mock.Node() err = restore.NodeRestore(node) if err != nil { t.Fatalf("err: %v", err) } - restore.Commit() out, err := state.NodeByID(node.ID) @@ -210,12 +243,19 @@ func TestStateStore_RestoreNode(t *testing.T) { if !reflect.DeepEqual(out, node) { t.Fatalf("Bad: %#v %#v", out, node) } + + notify.verify(t) } func TestStateStore_UpsertJob_Job(t *testing.T) { state := testStateStore(t) job := mock.Job() + notify := setupNotifyTest( + state, + watch.Item{Table: "jobs"}, + watch.Item{Job: job.ID}) + err := state.UpsertJob(1000, job) if err != nil { t.Fatalf("err: %v", err) @@ -237,12 +277,19 @@ func TestStateStore_UpsertJob_Job(t *testing.T) { if index != 1000 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_UpdateUpsertJob_Job(t *testing.T) { state := testStateStore(t) job := mock.Job() + notify := setupNotifyTest( + state, + watch.Item{Table: "jobs"}, + watch.Item{Job: job.ID}) + err := state.UpsertJob(1000, job) if err != nil { t.Fatalf("err: %v", err) @@ -278,12 +325,19 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_DeleteJob_Job(t *testing.T) { state := testStateStore(t) job := mock.Job() + notify := setupNotifyTest( + state, + watch.Item{Table: "jobs"}, + watch.Item{Job: job.ID}) + err := state.UpsertJob(1000, job) if err != nil { t.Fatalf("err: %v", err) @@ -310,6 +364,8 @@ func TestStateStore_DeleteJob_Job(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_Jobs(t *testing.T) { @@ -417,18 +473,22 @@ func TestStateStore_JobsByScheduler(t *testing.T) { func TestStateStore_RestoreJob(t *testing.T) { state := testStateStore(t) + job := mock.Job() + + notify := setupNotifyTest( + state, + watch.Item{Table: "jobs"}, + watch.Item{Job: job.ID}) restore, err := state.Restore() if err != nil { t.Fatalf("err: %v", err) } - job := mock.Job() err = restore.JobRestore(job) if err != nil { t.Fatalf("err: %v", err) } - restore.Commit() out, err := state.JobByID(job.ID) @@ -439,6 +499,8 @@ func TestStateStore_RestoreJob(t *testing.T) { if !reflect.DeepEqual(out, job) { t.Fatalf("Bad: %#v %#v", out, job) } + + notify.verify(t) } func TestStateStore_Indexes(t *testing.T) { @@ -503,6 +565,11 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) { state := testStateStore(t) eval := mock.Eval() + notify := setupNotifyTest( + state, + watch.Item{Table: "evals"}, + watch.Item{Eval: eval.ID}) + err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) if err != nil { t.Fatalf("err: %v", err) @@ -524,6 +591,8 @@ func TestStateStore_UpsertEvals_Eval(t *testing.T) { if index != 1000 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) { @@ -535,6 +604,11 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) { t.Fatalf("err: %v", err) } + notify := setupNotifyTest( + state, + watch.Item{Table: "evals"}, + watch.Item{Eval: eval.ID}) + eval2 := mock.Eval() eval2.ID = eval.ID err = state.UpsertEvals(1001, []*structs.Evaluation{eval2}) @@ -565,40 +639,54 @@ func TestStateStore_Update_UpsertEvals_Eval(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_DeleteEval_Eval(t *testing.T) { state := testStateStore(t) - eval := mock.Eval() + eval1 := mock.Eval() eval2 := mock.Eval() - alloc := mock.Alloc() + alloc1 := mock.Alloc() alloc2 := mock.Alloc() - err := state.UpsertEvals(1000, []*structs.Evaluation{eval, eval2}) + notify := setupNotifyTest( + state, + watch.Item{Table: "evals"}, + watch.Item{Table: "allocs"}, + watch.Item{Eval: eval1.ID}, + watch.Item{Eval: eval2.ID}, + watch.Item{Alloc: alloc1.ID}, + watch.Item{Alloc: alloc2.ID}, + watch.Item{AllocEval: alloc1.EvalID}, + watch.Item{AllocEval: alloc2.EvalID}, + watch.Item{AllocJob: alloc1.JobID}, + watch.Item{AllocJob: alloc2.JobID}, + watch.Item{AllocNode: alloc1.NodeID}, + watch.Item{AllocNode: alloc2.NodeID}) + + err := state.UpsertEvals(1000, []*structs.Evaluation{eval1, eval2}) if err != nil { t.Fatalf("err: %v", err) } - err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2}) + err = state.UpsertAllocs(1001, []*structs.Allocation{alloc1, alloc2}) if err != nil { t.Fatalf("err: %v", err) } - notify1 := make(chan struct{}, 1) - state.WatchAllocs(alloc.NodeID, notify1) - - err = state.DeleteEval(1002, []string{eval.ID, eval2.ID}, []string{alloc.ID, alloc2.ID}) + err = state.DeleteEval(1002, []string{eval1.ID, eval2.ID}, []string{alloc1.ID, alloc2.ID}) if err != nil { t.Fatalf("err: %v", err) } - out, err := state.EvalByID(eval.ID) + out, err := state.EvalByID(eval1.ID) if err != nil { t.Fatalf("err: %v", err) } if out != nil { - t.Fatalf("bad: %#v %#v", eval, out) + t.Fatalf("bad: %#v %#v", eval1, out) } out, err = state.EvalByID(eval2.ID) @@ -607,16 +695,16 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) { } if out != nil { - t.Fatalf("bad: %#v %#v", eval, out) + t.Fatalf("bad: %#v %#v", eval1, out) } - outA, err := state.AllocByID(alloc.ID) + outA, err := state.AllocByID(alloc1.ID) if err != nil { t.Fatalf("err: %v", err) } if out != nil { - t.Fatalf("bad: %#v %#v", alloc, outA) + t.Fatalf("bad: %#v %#v", alloc1, outA) } outA, err = state.AllocByID(alloc2.ID) @@ -625,7 +713,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) { } if out != nil { - t.Fatalf("bad: %#v %#v", alloc, outA) + t.Fatalf("bad: %#v %#v", alloc1, outA) } index, err := state.Index("evals") @@ -644,11 +732,7 @@ func TestStateStore_DeleteEval_Eval(t *testing.T) { t.Fatalf("bad: %d", index) } - select { - case <-notify1: - default: - t.Fatalf("should be notified") - } + notify.verify(t) } func TestStateStore_EvalsByJob(t *testing.T) { @@ -720,34 +804,48 @@ func TestStateStore_Evals(t *testing.T) { func TestStateStore_RestoreEval(t *testing.T) { state := testStateStore(t) + eval := mock.Eval() + + notify := setupNotifyTest( + state, + watch.Item{Table: "evals"}, + watch.Item{Eval: eval.ID}) restore, err := state.Restore() if err != nil { t.Fatalf("err: %v", err) } - job := mock.Eval() - err = restore.EvalRestore(job) + err = restore.EvalRestore(eval) if err != nil { t.Fatalf("err: %v", err) } - restore.Commit() - out, err := state.EvalByID(job.ID) + out, err := state.EvalByID(eval.ID) if err != nil { t.Fatalf("err: %v", err) } - if !reflect.DeepEqual(out, job) { - t.Fatalf("Bad: %#v %#v", out, job) + if !reflect.DeepEqual(out, eval) { + t.Fatalf("Bad: %#v %#v", out, eval) } + + notify.verify(t) } func TestStateStore_UpdateAllocFromClient(t *testing.T) { state := testStateStore(t) - alloc := mock.Alloc() + + notify := setupNotifyTest( + state, + watch.Item{Table: "allocs"}, + watch.Item{Alloc: alloc.ID}, + watch.Item{AllocEval: alloc.EvalID}, + watch.Item{AllocJob: alloc.JobID}, + watch.Item{AllocNode: alloc.NodeID}) + err := state.UpsertAllocs(1000, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) @@ -779,12 +877,22 @@ func TestStateStore_UpdateAllocFromClient(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_UpsertAlloc_Alloc(t *testing.T) { state := testStateStore(t) - alloc := mock.Alloc() + + notify := setupNotifyTest( + state, + watch.Item{Table: "allocs"}, + watch.Item{Alloc: alloc.ID}, + watch.Item{AllocEval: alloc.EvalID}, + watch.Item{AllocJob: alloc.JobID}, + watch.Item{AllocNode: alloc.NodeID}) + err := state.UpsertAllocs(1000, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) @@ -806,35 +914,8 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) { if index != 1000 { t.Fatalf("bad: %d", index) } -} -func TestStateStore_WatchAllocs(t *testing.T) { - state := testStateStore(t) - - notify1 := make(chan struct{}, 1) - notify2 := make(chan struct{}, 1) - state.WatchAllocs("foo", notify1) - state.WatchAllocs("foo", notify2) - state.StopWatchAllocs("foo", notify2) - - alloc := mock.Alloc() - alloc.NodeID = "foo" - err := state.UpsertAllocs(1000, []*structs.Allocation{alloc}) - if err != nil { - t.Fatalf("err: %v", err) - } - - select { - case <-notify1: - default: - t.Fatalf("should be notified") - } - - select { - case <-notify2: - t.Fatalf("should not be notified") - default: - } + notify.verify(t) } func TestStateStore_UpdateAlloc_Alloc(t *testing.T) { @@ -849,6 +930,15 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) { alloc2 := mock.Alloc() alloc2.ID = alloc.ID alloc2.NodeID = alloc.NodeID + ".new" + + notify := setupNotifyTest( + state, + watch.Item{Table: "allocs"}, + watch.Item{Alloc: alloc2.ID}, + watch.Item{AllocEval: alloc2.EvalID}, + watch.Item{AllocJob: alloc2.JobID}, + watch.Item{AllocNode: alloc2.NodeID}) + err = state.UpsertAllocs(1001, []*structs.Allocation{alloc2}) if err != nil { t.Fatalf("err: %v", err) @@ -877,6 +967,8 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) { if index != 1001 { t.Fatalf("bad: %d", index) } + + notify.verify(t) } func TestStateStore_EvictAlloc_Alloc(t *testing.T) { @@ -1008,13 +1100,21 @@ func TestStateStore_Allocs(t *testing.T) { func TestStateStore_RestoreAlloc(t *testing.T) { state := testStateStore(t) + alloc := mock.Alloc() + + notify := setupNotifyTest( + state, + watch.Item{Table: "allocs"}, + watch.Item{Alloc: alloc.ID}, + watch.Item{AllocEval: alloc.EvalID}, + watch.Item{AllocJob: alloc.JobID}, + watch.Item{AllocNode: alloc.NodeID}) restore, err := state.Restore() if err != nil { t.Fatalf("err: %v", err) } - alloc := mock.Alloc() err = restore.AllocRestore(alloc) if err != nil { t.Fatalf("err: %v", err) @@ -1030,6 +1130,87 @@ func TestStateStore_RestoreAlloc(t *testing.T) { if !reflect.DeepEqual(out, alloc) { t.Fatalf("Bad: %#v %#v", out, alloc) } + + notify.verify(t) +} + +func TestStateWatch_watch(t *testing.T) { + sw := newStateWatch() + notify1 := make(chan struct{}, 1) + notify2 := make(chan struct{}, 1) + notify3 := make(chan struct{}, 1) + + // Notifications trigger subscribed channels + sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify1) + sw.watch(watch.NewItems(watch.Item{Table: "bar"}), notify2) + sw.watch(watch.NewItems(watch.Item{Table: "baz"}), notify3) + + items := watch.NewItems() + items.Add(watch.Item{Table: "foo"}) + items.Add(watch.Item{Table: "bar"}) + + sw.notify(items) + if len(notify1) != 1 { + t.Fatalf("should notify") + } + if len(notify2) != 1 { + t.Fatalf("should notify") + } + if len(notify3) != 0 { + t.Fatalf("should not notify") + } +} + +func TestStateWatch_stopWatch(t *testing.T) { + sw := newStateWatch() + notify := make(chan struct{}) + + // First subscribe + sw.watch(watch.NewItems(watch.Item{Table: "foo"}), notify) + + // Unsubscribe stop notifications + sw.stopWatch(watch.NewItems(watch.Item{Table: "foo"}), notify) + + // Check that the group was removed + if _, ok := sw.items[watch.Item{Table: "foo"}]; ok { + t.Fatalf("should remove group") + } + + // Check that we are not notified + sw.notify(watch.NewItems(watch.Item{Table: "foo"})) + if len(notify) != 0 { + t.Fatalf("should not notify") + } +} + +// setupNotifyTest takes a state store and a set of watch items, then creates +// and subscribes a notification channel for each item. +func setupNotifyTest(state *StateStore, items ...watch.Item) notifyTest { + var n notifyTest + for _, item := range items { + ch := make(chan struct{}, 1) + state.Watch(watch.NewItems(item), ch) + n = append(n, ¬ifyTestCase{item, ch}) + } + return n +} + +// notifyTestCase is used to set up and verify watch triggers. +type notifyTestCase struct { + item watch.Item + ch chan struct{} +} + +// notifyTest is a suite of notifyTestCases. +type notifyTest []*notifyTestCase + +// verify ensures that each channel received a notification. +func (n notifyTest) verify(t *testing.T) { + for _, tcase := range n { + if len(tcase.ch) != 1 { + t.Fatalf("should notify %#v", tcase.item) + } + } } // NodeIDSort is used to sort nodes by ID diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f5d20552a..6e57fc58c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -14,8 +14,17 @@ import ( ) var ( - ErrNoLeader = fmt.Errorf("No cluster leader") - ErrNoRegionPath = fmt.Errorf("No path to region") + ErrNoLeader = fmt.Errorf("No cluster leader") + ErrNoRegionPath = fmt.Errorf("No path to region") + defaultServiceJobRestartPolicy = RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 2, + Interval: 1 * time.Minute, + } + defaultBatchJobRestartPolicy = RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 15, + } ) type MessageType uint8 @@ -898,6 +907,33 @@ func (u *UpdateStrategy) Rolling() bool { return u.Stagger > 0 && u.MaxParallel > 0 } +// RestartPolicy influences how Nomad restarts Tasks when they +// crash or fail. +type RestartPolicy struct { + Attempts int + Interval time.Duration + Delay time.Duration +} + +func (r *RestartPolicy) Validate() error { + if time.Duration(r.Attempts)*r.Delay > r.Interval { + return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) + } + return nil +} + +func NewRestartPolicy(jobType string) *RestartPolicy { + switch jobType { + case JobTypeService, JobTypeSystem: + rp := defaultServiceJobRestartPolicy + return &rp + case JobTypeBatch: + rp := defaultBatchJobRestartPolicy + return &rp + } + return nil +} + // TaskGroup is an atomic unit of placement. Each task group belongs to // a job and may contain any number of tasks. A task group support running // in many replicas using the same configuration.. @@ -913,6 +949,9 @@ type TaskGroup struct { // all the tasks contained. Constraints []*Constraint + //RestartPolicy of a TaskGroup + RestartPolicy *RestartPolicy + // Tasks are the collection of tasks that this task group needs to run Tasks []*Task @@ -940,6 +979,10 @@ func (tg *TaskGroup) Validate() error { } } + if err := tg.RestartPolicy.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + // Check for duplicate tasks tasks := make(map[string]int) for idx, task := range tg.Tasks { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index cabf83dfa..1f107b095 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -1,11 +1,11 @@ package structs import ( + "github.com/hashicorp/go-multierror" "reflect" "strings" "testing" - - "github.com/hashicorp/go-multierror" + "time" ) func TestJob_Validate(t *testing.T) { @@ -44,11 +44,27 @@ func TestJob_Validate(t *testing.T) { TaskGroups: []*TaskGroup{ &TaskGroup{ Name: "web", + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, }, &TaskGroup{ Name: "web", + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, + }, + &TaskGroup{ + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, }, - &TaskGroup{}, }, } err = j.Validate() @@ -65,7 +81,13 @@ func TestJob_Validate(t *testing.T) { } func TestTaskGroup_Validate(t *testing.T) { - tg := &TaskGroup{} + tg := &TaskGroup{ + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, + } err := tg.Validate() mErr := err.(*multierror.Error) if !strings.Contains(mErr.Errors[0].Error(), "group name") { @@ -86,6 +108,11 @@ func TestTaskGroup_Validate(t *testing.T) { &Task{Name: "web"}, &Task{}, }, + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, } err = tg.Validate() mErr = err.(*multierror.Error) diff --git a/nomad/watch/watch.go b/nomad/watch/watch.go new file mode 100644 index 000000000..4e9bafbc9 --- /dev/null +++ b/nomad/watch/watch.go @@ -0,0 +1,38 @@ +package watch + +// The watch package provides a means of describing a watch for a blocking +// query. It is exported so it may be shared between Nomad's RPC layer and +// the underlying state store. + +// Item describes the scope of a watch. It is used to provide a uniform +// input for subscribe/unsubscribe and notification firing. Specifying +// multiple fields does not place a watch on multiple items. Each Item +// describes exactly one scoped watch. +type Item struct { + Alloc string + AllocEval string + AllocJob string + AllocNode string + Eval string + Job string + Node string + Table string +} + +// Items is a helper used to construct a set of watchItems. It deduplicates +// the items as they are added using map keys. +type Items map[Item]struct{} + +// NewItems creates a new Items set and adds the given items. +func NewItems(items ...Item) Items { + wi := make(Items) + for _, item := range items { + wi.Add(item) + } + return wi +} + +// Add adds an item to the watch set. +func (wi Items) Add(i Item) { + wi[i] = struct{}{} +} diff --git a/nomad/watch/watch_test.go b/nomad/watch/watch_test.go new file mode 100644 index 000000000..9a8901aa8 --- /dev/null +++ b/nomad/watch/watch_test.go @@ -0,0 +1,31 @@ +package watch + +import ( + "testing" +) + +func TestWatchItems(t *testing.T) { + // Creates an empty set of items + wi := NewItems() + if len(wi) != 0 { + t.Fatalf("expect 0 items, got: %#v", wi) + } + + // Creates a new set of supplied items + wi = NewItems(Item{Table: "foo"}) + if len(wi) != 1 { + t.Fatalf("expected 1 item, got: %#v", wi) + } + + // Adding items works + wi.Add(Item{Node: "bar"}) + if len(wi) != 2 { + t.Fatalf("expected 2 items, got: %#v", wi) + } + + // Adding duplicates auto-dedupes + wi.Add(Item{Table: "foo"}) + if len(wi) != 2 { + t.Fatalf("expected 2 items, got: %#v", wi) + } +} diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md index 6f0b9b0dc..d548b7e67 100644 --- a/website/source/docs/agent/config.html.md +++ b/website/source/docs/agent/config.html.md @@ -42,7 +42,7 @@ nodes, unless otherwise specified: as `us-west` and `us-east`. Defaults to `global`. * `datacenter`: Datacenter of the local agent. All members of a datacenter - should all share a local LAN connection. Defaults to `dc1`. + should share a local LAN connection. Defaults to `dc1`. * `name`: The name of the local node. This value is used to identify individual nodes in a given datacenter and must be unique @@ -103,7 +103,7 @@ nodes, unless otherwise specified: This can be used to advertise a different address to the peers of a server node to support more complex network configurations such as NAT. This configuration is optional, and defaults to the bind address of the specific - network service if it is not provided. This configuration is only appicable + network service if it is not provided. This configuration is only applicable on server nodes. The value is a map of IP addresses and supports the following keys:
@@ -125,13 +125,13 @@ nodes, unless otherwise specified: * `disable_hostname`: A boolean indicating if gauge values should not be prefixed with the local hostname. -* `leave_on_interrupt`: Enables gracefully leave when receiving the +* `leave_on_interrupt`: Enables gracefully leaving when receiving the interrupt signal. By default, the agent will exit forcefully on any signal. -* `leave_on_terminate`: Enables gracefully leave when receiving the +* `leave_on_terminate`: Enables gracefully leaving when receiving the terminate signal. By default, the agent will exit forcefully on any signal. -* `enable_syslog`: Enables logging to syslog. This option only work on +* `enable_syslog`: Enables logging to syslog. This option only works on Unix based systems. * `syslog_facility`: Controls the syslog facility that is used. By default, diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md index 444e7f6ca..dfa4c85a5 100644 --- a/website/source/docs/drivers/docker.html.md +++ b/website/source/docs/drivers/docker.html.md @@ -32,6 +32,26 @@ The `docker` driver supports the following configuration in the job specificatio network mode is not supported right now and is reported as an invalid option. +* `privileged` - (optional) Privileged mode gives the container full access to + the host. Valid options are `"true"` and `"false"` (defaults to `"false"`). + Tasks with `privileged` set can only run on Nomad Agents with + `docker.privileged.enabled = "true"`. + +* `dns-servers` - (optional) A comma separated list of DNS servers for the container + to use (e.g. "8.8.8.8,8.8.4.4"). *Docker API v1.10 and above only* + +* `search-domains` - (optional) A comma separated list of DNS search domains for the + container to use. + +**Authentication** +Registry authentication can be set per task with the following authentication +parameters. These options can provide access to private repositories that +utilize the docker remote api (e.g. dockerhub, quay.io) + - `auth.username` - (optional) The account username + - `auth.password` - (optional) The account password + - `auth.email` - (optional) The account email + - `auth.server-address` - (optional) The server domain/ip without the protocol + ### Port Mapping Nomad uses port binding to expose services running in containers using the port @@ -121,6 +141,11 @@ The `docker` driver has the following configuration options: * `docker.cleanup.image` Defaults to `true`. Changing this to `false` will prevent Nomad from removing images from stopped tasks. +* `docker.privileged.enabled` Defaults to `false`. Changing this to `true` will + allow containers to use "privileged" mode, which gives the containers full access + to the host. + + Note: When testing or using the `-dev` flag you can use `DOCKER_HOST`, `DOCKER_TLS_VERIFY`, and `DOCKER_CERT_PATH` to customize Nomad's behavior. In production Nomad will always read `docker.endpoint`. diff --git a/website/source/docs/drivers/exec.html.md b/website/source/docs/drivers/exec.html.md index dadf28549..f897b1ea4 100644 --- a/website/source/docs/drivers/exec.html.md +++ b/website/source/docs/drivers/exec.html.md @@ -11,7 +11,7 @@ description: |- Name: `exec` The `exec` driver is used to simply execute a particular command for a task. -However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation +However, unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation primitives of the operating system to limit the tasks access to resources. While simple, since the `exec` driver can invoke any command, it can be used to call scripts or other wrappers which provide higher level features. @@ -24,13 +24,18 @@ The `exec` driver supports the following configuration in the job spec: * `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible from the Nomad client. If you specify an `artifact_source` to be executed, you must reference it in the `command` as show in the examples below +* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image. +The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`, +and the value is the computed checksum. If a checksum is supplied and does not +match the downloaded artifact, the driver will fail to start * `args` - The argument list to the command, space seperated. Optional. ## Client Requirements -The `exec` driver can run on all supported operating systems but to provide -proper isolation the client must be run as root on non-Windows operating systems. -Further, to support cgroups, `/sys/fs/cgroups/` must be mounted. +The `exec` driver can only be run when on Linux and running Nomad as root. +`exec` is limited to this configuration because currently isolation of resources +is only guaranteed on Linux. Further the host must have cgroups mounted properly +in order for the driver to work. You must specify a `command` to be executed. Optionally you can specify an `artifact_source` to be downloaded as well. Any `command` is assumed to be present on the @@ -52,6 +57,7 @@ To execute a binary specified by `artifact_source`: ``` config { artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin" + checksum = "sha256:abd123445ds4555555555" command = "$NOMAD_TASK_DIR/binary.bin" } ``` @@ -68,8 +74,5 @@ The `exec` driver will set the following client attributes: The resource isolation provided varies by the operating system of the client and the configuration. -On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the +On Linux, Nomad will use cgroups, and a chroot to isolate the resources of a process and as such the Nomad agent must be run as root. - -On Windows, the task driver will just execute the command with no additional -resource isolation. diff --git a/website/source/docs/drivers/java.html.md b/website/source/docs/drivers/java.html.md index ecfddb645..f2bbd2b76 100644 --- a/website/source/docs/drivers/java.html.md +++ b/website/source/docs/drivers/java.html.md @@ -18,8 +18,12 @@ HTTP from the Nomad client. The `java` driver supports the following configuration in the job spec: -* `jar_source` - **(Required)** The hosted location of the source Jar file. Must be accessible +* `artifact_source` - **(Required)** The hosted location of the source Jar file. Must be accessible from the Nomad client +* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image. +The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`, +and the value is the computed checksum. If a checksum is supplied and does not +match the downloaded artifact, the driver will fail to start * `args` - **(Optional)** The argument list for the `java` command, space separated. @@ -29,10 +33,27 @@ from the Nomad client ## Client Requirements The `java` driver requires Java to be installed and in your systems `$PATH`. -The `jar_source` must be accessible by the node running Nomad. This can be an +The `artifact_source` must be accessible by the node running Nomad. This can be an internal source, private to your cluster, but it must be reachable by the client over HTTP. +## Examples + +A simple config block to run a Java Jar: + +```json +# Define a task to run +task "web" { + # Run a Java Jar + driver = "java" + + config { + artifact_source = "https://dl.dropboxusercontent.com/u/1234/hello.jar" + checksum = "md5:123445555555555" + jvm_options = "-Xmx2048m -Xms256m" + } +``` + ## Client Attributes The `java` driver will set the following client attributes: diff --git a/website/source/docs/drivers/qemu.html.md b/website/source/docs/drivers/qemu.html.md index 3e19076a0..403926b4c 100644 --- a/website/source/docs/drivers/qemu.html.md +++ b/website/source/docs/drivers/qemu.html.md @@ -23,10 +23,12 @@ The `Qemu` driver can execute any regular `qemu` image (e.g. `qcow`, `img`, The `Qemu` driver supports the following configuration in the job spec: -* `image_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible +* `artifact_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible from the Nomad client, via HTTP. -* `checksum` - **(Required)** The SHA256 checksum of the `qemu` image. If the -checksums do not match, the `Qemu` driver will fail to start the image +* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image. +The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`, +and the value is the computed checksum. If a checksum is supplied and does not +match the downloaded artifact, the driver will fail to start * `accelerator` - (Optional) The type of accelerator to use in the invocation. If the host machine has `Qemu` installed with KVM support, users can specify `kvm` for the `accelerator`. Default is `tcg` * `host_port` - **(Required)** Port on the host machine to forward to the guest @@ -38,7 +40,7 @@ in the `Task` specification ## Client Requirements The `Qemu` driver requires Qemu to be installed and in your system's `$PATH`. -The `image_source` must be accessible by the node running Nomad. This can be an +The `artifact_source` must be accessible by the node running Nomad. This can be an internal source, private to your cluster, but it must be reachable by the client over HTTP. diff --git a/website/source/docs/drivers/raw_exec.html.md b/website/source/docs/drivers/raw_exec.html.md index fa67129ba..2dc741887 100644 --- a/website/source/docs/drivers/raw_exec.html.md +++ b/website/source/docs/drivers/raw_exec.html.md @@ -22,6 +22,10 @@ The `raw_exec` driver supports the following configuration in the job spec: * `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible from the Nomad client. If you specify an `artifact_source` to be executed, you must reference it in the `command` as show in the examples below +* `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image. +The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`, +and the value is the computed checksum. If a checksum is supplied and does not +match the downloaded artifact, the driver will fail to start * `args` - The argument list to the command, space seperated. Optional. ## Client Requirements @@ -57,6 +61,7 @@ To execute a binary specified by `artifact_source`: ``` config { artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin" + checksum = "sha256:133jifjiofu9090fsadjofsdjlk" command = "$NOMAD_TASK_DIR/binary.bin" } ``` diff --git a/website/source/docs/http/alloc.html.md b/website/source/docs/http/alloc.html.md index 3c224fd54..822858a8c 100644 --- a/website/source/docs/http/alloc.html.md +++ b/website/source/docs/http/alloc.html.md @@ -31,6 +31,11 @@ be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -179,4 +184,3 @@ be specified using the `?region=` query parameter.
- diff --git a/website/source/docs/http/allocs.html.md b/website/source/docs/http/allocs.html.md index 44ad8aa7e..b59a4f204 100644 --- a/website/source/docs/http/allocs.html.md +++ b/website/source/docs/http/allocs.html.md @@ -31,6 +31,11 @@ be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -56,4 +61,3 @@ be specified using the `?region=` query parameter.
- diff --git a/website/source/docs/http/eval.html.md b/website/source/docs/http/eval.html.md index cba43900c..87e048209 100644 --- a/website/source/docs/http/eval.html.md +++ b/website/source/docs/http/eval.html.md @@ -3,7 +3,7 @@ layout: "http" page_title: "HTTP API: /v1/evaluation" sidebar_current: "docs-http-eval-" description: |- - The '/1/evaluation' endpoint is used to query a specific evaluation. + The '/v1/evaluation' endpoint is used to query a specific evaluation. --- # /v1/evaluation @@ -17,7 +17,7 @@ be specified using the `?region=` query parameter.
Description
- Lists all the evaluations. + Query a specific evaluation.
Method
@@ -31,6 +31,11 @@ be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -57,9 +62,6 @@ be specified using the `?region=` query parameter.
-# /v1/evaluation/\/allocations -## GET -
Description
@@ -77,6 +79,11 @@ be specified using the `?region=` query parameter. None
+
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -102,4 +109,3 @@ be specified using the `?region=` query parameter.
- diff --git a/website/source/docs/http/evals.html.md b/website/source/docs/http/evals.html.md index 3bc22da8f..23d98cc95 100644 --- a/website/source/docs/http/evals.html.md +++ b/website/source/docs/http/evals.html.md @@ -31,6 +31,11 @@ be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -59,4 +64,3 @@ be specified using the `?region=` query parameter.
- diff --git a/website/source/docs/http/index.html.md b/website/source/docs/http/index.html.md index 671d19fa5..7ed3f0dbd 100644 --- a/website/source/docs/http/index.html.md +++ b/website/source/docs/http/index.html.md @@ -31,6 +31,7 @@ The API is modeled closely on the underlying data model. Use the links to the le documentation about specific endpoints. There are also "Agent" APIs which interact with a specific agent and not the broader cluster used for administration. + ## Blocking Queries Certain endpoints support a feature called a "blocking query." A blocking query diff --git a/website/source/docs/http/job.html.md b/website/source/docs/http/job.html.md index 211963e6a..cbf0f5097 100644 --- a/website/source/docs/http/job.html.md +++ b/website/source/docs/http/job.html.md @@ -6,7 +6,7 @@ description: |- The '/1/job' endpoint is used for CRUD on a single job. --- -# /v1/job/\ +# /v1/job The `job` endpoint is used for CRUD on a single job. By default, the agent's local region is used; another region can be specified using the `?region=` query parameter. @@ -30,6 +30,11 @@ region is used; another region can be specified using the `?region=` query param None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -136,6 +141,105 @@ region is used; another region can be specified using the `?region=` query param
+
+
Description
+
+ Query the allocations belonging to a single job. +
+ +
Method
+
GET
+ +
URL
+
`/v1/job//allocations`
+ +
Parameters
+
+ None +
+ +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+ +
Returns
+
+ + ```javascript + [ + { + "ID": "3575ba9d-7a12-0c96-7b28-add168c67984", + "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88", + "Name": "binstore-storagelocker.binsl[0]", + "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b", + "JobID": "binstore-storagelocker", + "TaskGroup": "binsl", + "DesiredStatus": "run", + "DesiredDescription": "", + "ClientStatus": "running", + "ClientDescription": "", + "CreateIndex": 16, + "ModifyIndex": 16 + }, + ... + ] + ``` + +
+
+ +
+
Description
+
+ Query the evaluations belonging to a single job. +
+ +
Method
+
GET
+ +
URL
+
`/v1/job//evaluations`
+ +
Parameters
+
+ None +
+ +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+ +
Returns
+
+ + ```javascript + [ + { + "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88", + "Priority": 50, + "Type": "service", + "TriggeredBy": "job-register", + "JobID": "binstore-storagelocker", + "JobModifyIndex": 14, + "NodeID": "", + "NodeModifyIndex": 0, + "Status": "complete", + "StatusDescription": "", + "Wait": 0, + "NextEval": "", + "PreviousEval": "", + "CreateIndex": 15, + "ModifyIndex": 17 + }, + ... + ] + ``` + +
+
+ ## PUT / POST
@@ -177,6 +281,38 @@ region is used; another region can be specified using the `?region=` query param
+
+
Description
+
+ Creates a new evaluation for the given job. This can be used to force + run the scheduling logic if necessary. +
+ +
Method
+
PUT or POST
+ +
URL
+
`/v1/job//evaluate`
+ +
Parameters
+
+ None +
+ +
Returns
+
+ + ```javascript + { + "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac", + "EvalCreateIndex": 35, + "JobModifyIndex": 34, + } + ``` + +
+
+ ## DELETE
@@ -209,134 +345,3 @@ region is used; another region can be specified using the `?region=` query param
- -# /v1/job/\/allocations -## GET - -
-
Description
-
- Query the allocations belonging to a single job. -
- -
Method
-
GET
- -
URL
-
`/v1/job//allocations`
- -
Parameters
-
- None -
- -
Returns
-
- - ```javascript - [ - { - "ID": "3575ba9d-7a12-0c96-7b28-add168c67984", - "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88", - "Name": "binstore-storagelocker.binsl[0]", - "NodeID": "a703c3ca-5ff8-11e5-9213-970ee8879d1b", - "JobID": "binstore-storagelocker", - "TaskGroup": "binsl", - "DesiredStatus": "run", - "DesiredDescription": "", - "ClientStatus": "running", - "ClientDescription": "", - "CreateIndex": 16, - "ModifyIndex": 16 - }, - ... - ] - ``` - -
-
- -# /v1/job/\/evaluate -## PUT / POST - -
-
Description
-
- Creates a new evaluation for the given job. This can be used to force - run the scheduling logic if necessary. -
- -
Method
-
PUT or POST
- -
URL
-
`/v1/job//evaluate`
- -
Parameters
-
- None -
- -
Returns
-
- - ```javascript - { - "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac", - "EvalCreateIndex": 35, - "JobModifyIndex": 34, - } - ``` - -
-
- -# /v1/job/\/evaluations -## GET - -
-
Description
-
- Query the evaluations belonging to a single job. -
- -
Method
-
GET
- -
URL
-
`/v1/job//evaluations`
- -
Parameters
-
- None -
- -
Returns
-
- - ```javascript - [ - { - "ID": "151accaa-1ac6-90fe-d427-313e70ccbb88", - "Priority": 50, - "Type": "service", - "TriggeredBy": "job-register", - "JobID": "binstore-storagelocker", - "JobModifyIndex": 14, - "NodeID": "", - "NodeModifyIndex": 0, - "Status": "complete", - "StatusDescription": "", - "Wait": 0, - "NextEval": "", - "PreviousEval": "", - "CreateIndex": 15, - "ModifyIndex": 17 - }, - ... - ] - ``` - -
-
- diff --git a/website/source/docs/http/jobs.html.md b/website/source/docs/http/jobs.html.md index f724ce0ac..8f098b1ca 100644 --- a/website/source/docs/http/jobs.html.md +++ b/website/source/docs/http/jobs.html.md @@ -31,6 +31,11 @@ another region can be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -93,4 +98,3 @@ another region can be specified using the `?region=` query parameter.
- diff --git a/website/source/docs/http/node.html.md b/website/source/docs/http/node.html.md index f16131f97..df09426d6 100644 --- a/website/source/docs/http/node.html.md +++ b/website/source/docs/http/node.html.md @@ -31,6 +31,11 @@ be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -82,9 +87,6 @@ be specified using the `?region=` query parameter.
-# /v1/node/\/allocations -## GET -
Description
@@ -102,6 +104,11 @@ be specified using the `?region=` query parameter. None
+
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -128,7 +135,6 @@ be specified using the `?region=` query parameter.
-# /v1/node/\/evaluate ## PUT / POST
@@ -163,9 +169,6 @@ be specified using the `?region=` query parameter.
-# /v1/node/\/drain -## PUT / POST -
Description
@@ -175,7 +178,7 @@ be specified using the `?region=` query parameter.
Method
-
PUT or POSt
+
PUT or POST
URL
`/v1/node//drain`
@@ -205,4 +208,3 @@ be specified using the `?region=` query parameter.
- diff --git a/website/source/docs/http/nodes.html.md b/website/source/docs/http/nodes.html.md index 36fa96fcd..b8e2b91a9 100644 --- a/website/source/docs/http/nodes.html.md +++ b/website/source/docs/http/nodes.html.md @@ -31,6 +31,11 @@ be specified using the `?region=` query parameter. None +
Blocking Queries
+
+ [Supported](/docs/http/index.html#blocking-queries) +
+
Returns
@@ -53,5 +58,3 @@ be specified using the `?region=` query parameter.
- -