diff --git a/.travis.yml b/.travis.yml index ee85997c8..3bd56f296 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ go: - tip env: - - TRAVIS_RUN=true DOCKER_VERSION=1.9.1 + - DOCKER_VERSION=1.9.1 matrix: allow_failures: diff --git a/CHANGELOG.md b/CHANGELOG.md index 75b6cc33c..f53be37bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ BACKWARDS INCOMPATIBILITIES: * core: Improved restart policy with more user configuration [GH-594] * core/cli: Print short identifiers [GH-675] + * core/consul: Validate service name doesn't include period [GH-770] * core/jobspec: Variables/constraints interpreted using ${} notation [GH-675] * client: Environment variable containing address for each allocated port [GH-704] @@ -26,6 +27,7 @@ IMPROVEMENTS: * core/client: Client pulls minimum set of required allocations [GH-731] * core/jobspec: Default task resources and validation [GH-739] * cli: Output of agent-info is sorted [GH-617] + * cli: Eval monitor detects zero wait condition [GH-776] * cli: Ability to navigate allocation directories [GH-709] * client: Handle updates to tasks Restart Policy and KillTimeout [GH-751] * client: Create a tmp/ directory inside each task directory [GH-757] @@ -34,6 +36,8 @@ IMPROVEMENTS: * drivers: Interpret Nomad variables in environment variables/args [GH-653] * driver/rkt: Add support for CPU/Memory isolation [GH-610] * driver/rkt: Add support for mounting alloc/task directory [GH-645] + * driver/docker: Support for .dockercfg based auth for private registries + [GH-773] BUG FIXES: * core: Node drain could only be partially applied [GH-750] @@ -42,6 +46,7 @@ BUG FIXES: * client: Handle non-200 codes when parsing AWS metadata [GH-614] * client: Cleanup of the allocation directory [GH-755] * client: Unmounted of shared alloc dir when client is rebooted [GH-755] + * client/consul: Service name changes handled properly [GH-766] * driver/rkt: handle broader format of rkt version outputs [GH-745] * driver/qemu: failed to load image and kvm accelerator fixes [GH-656] diff --git a/README.md b/README.md index 57c646b90..67d7d8442 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Developing Nomad If you wish to work on Nomad itself or any of its built-in systems, you will first need [Go](https://www.golang.org) installed on your -machine (version 1.4+ is *required*). +machine (version 1.5+ is *required*). **Developing with Vagrant** There is an included Vagrantfile that can help bootstrap the process. The diff --git a/Vagrantfile b/Vagrantfile index 83e87d8d2..b5733f353 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -62,6 +62,8 @@ sudo usermod -aG docker vagrant # Setup Nomad for development cd /opt/gopath/src/github.com/hashicorp/nomad && make updatedeps +# Install gox +go get github.com/mitchellh/gox # CD into the nomad working directory when we login to the VM grep "cd /opt/gopath/src/github.com/hashicorp/nomad" ~/.profile || echo "cd /opt/gopath/src/github.com/hashicorp/nomad" >> ~/.profile diff --git a/client/alloc_runner.go b/client/alloc_runner.go index 928b0105b..f1c33fb72 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -364,8 +364,6 @@ func (r *AllocRunner) Run() { continue } - // Merge in the task resources - task.Resources = alloc.TaskResources[task.Name] tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.alloc, task, r.consulService) r.tasks[task.Name] = tr @@ -392,22 +390,6 @@ OUTER: r.taskLock.RLock() for _, task := range tg.Tasks { tr := r.tasks[task.Name] - - // Merge in the task resources - task.Resources = update.TaskResources[task.Name] - FOUND: - for _, updateGroup := range update.Job.TaskGroups { - if tg.Name != updateGroup.Name { - continue - } - for _, updateTask := range updateGroup.Tasks { - if updateTask.Name != task.Name { - continue - } - task.Services = updateTask.Services - break FOUND - } - } tr.Update(update) } r.taskLock.RUnlock() diff --git a/client/config/config.go b/client/config/config.go index 9e07bc12e..0d59a1b9a 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -57,6 +57,12 @@ type Config struct { // Node provides the base node Node *structs.Node + // ExecutorMaxPort defines the highest port a plugin process can use + ExecutorMaxPort int + + // ExecutorMinPort defines the lowest port a plugin process can use + ExecutorMinPort int + // Options provides arbitrary key-value configuration for nomad internals, // like fingerprinters and drivers. The format is: // diff --git a/client/consul.go b/client/consul.go index a5a015504..f0462486f 100644 --- a/client/consul.go +++ b/client/consul.go @@ -76,6 +76,7 @@ type ConsulService struct { trackedTasks map[string]*trackedTask serviceStates map[string]string + allocToService map[string][]string trackedTskLock sync.Mutex } @@ -130,12 +131,13 @@ func NewConsulService(config *consulServiceConfig) (*ConsulService, error) { } consulService := ConsulService{ - client: &consulApiClient{client: c}, - logger: config.logger, - node: config.node, - trackedTasks: make(map[string]*trackedTask), - serviceStates: make(map[string]string), - shutdownCh: make(chan struct{}), + client: &consulApiClient{client: c}, + logger: config.logger, + node: config.node, + trackedTasks: make(map[string]*trackedTask), + serviceStates: make(map[string]string), + allocToService: make(map[string][]string), + shutdownCh: make(chan struct{}), } return &consulService, nil @@ -148,8 +150,18 @@ func (c *ConsulService) Register(task *structs.Task, alloc *structs.Allocation) c.trackedTskLock.Lock() tt := &trackedTask{task: task, alloc: alloc} c.trackedTasks[fmt.Sprintf("%s-%s", alloc.ID, task.Name)] = tt + + // Delete any previously registered service as the same alloc is being + // re-registered. + for _, service := range c.allocToService[alloc.ID] { + delete(c.serviceStates, service) + } c.trackedTskLock.Unlock() + for _, service := range task.Services { + // Track the services this alloc is registering. + c.allocToService[alloc.ID] = append(c.allocToService[alloc.ID], service.Name) + c.logger.Printf("[INFO] consul: registering service %s with consul.", service.Name) if err := c.registerService(service, task, alloc); err != nil { mErr.Errors = append(mErr.Errors, err) @@ -165,6 +177,7 @@ func (c *ConsulService) Deregister(task *structs.Task, alloc *structs.Allocation var mErr multierror.Error c.trackedTskLock.Lock() delete(c.trackedTasks, fmt.Sprintf("%s-%s", alloc.ID, task.Name)) + delete(c.allocToService, alloc.ID) c.trackedTskLock.Unlock() for _, service := range task.Services { serviceID := alloc.Services[service.Name] @@ -229,14 +242,14 @@ func (c *ConsulService) performSync() { // Add new services which Consul agent isn't aware of knownServices[serviceID] = struct{}{} if _, ok := consulServices[serviceID]; !ok { - c.printLogMessage("[INFO] consul: registering service %s with consul.", service.Name) + c.printLogMessage("[INFO] consul: perform sync, registering service %s with consul.", service.Name) c.registerService(service, trackedTask.task, trackedTask.alloc) continue } // If a service has changed, re-register it with Consul agent if service.Hash() != c.serviceStates[serviceID] { - c.printLogMessage("[INFO] consul: reregistering service %s with consul.", service.Name) + c.printLogMessage("[INFO] consul: perform sync hash change, reregistering service %s with consul.", service.Name) c.registerService(service, trackedTask.task, trackedTask.alloc) continue } @@ -268,7 +281,7 @@ func (c *ConsulService) performSync() { for _, consulService := range consulServices { if _, ok := knownServices[consulService.ID]; !ok { delete(c.serviceStates, consulService.ID) - c.printLogMessage("[INFO] consul: deregistering service %v with consul", consulService.Service) + c.printLogMessage("[INFO] consul: perform sync, deregistering service %v with consul", consulService.Service) c.deregisterService(consulService.ID) } } diff --git a/client/driver/docker.go b/client/driver/docker.go index 78601817c..a71e9f060 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "net" + "os" "path/filepath" "strconv" "strings" @@ -448,6 +449,17 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle } } + if authConfig := d.config.Read("docker.auth.config"); authConfig != "" { + if f, err := os.Open(authConfig); err == nil { + defer f.Close() + if authConfigurations, err := docker.NewAuthConfigurations(f); err == nil { + if authConfiguration, ok := authConfigurations.Configs[repo]; ok { + authOptions = authConfiguration + } + } + } + } + err = client.PullImage(pullOptions, authOptions) if err != nil { d.logger.Printf("[ERR] driver.docker: failed pulling container %s:%s: %s", repo, tag, err) diff --git a/client/driver/exec.go b/client/driver/exec.go index a1ca8ded6..6b1f4f7a7 100644 --- a/client/driver/exec.go +++ b/client/driver/exec.go @@ -17,6 +17,8 @@ import ( "github.com/hashicorp/nomad/helper/discover" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/mapstructure" + + cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" ) // ExecDriver fork/execs tasks using as many of the underlying OS's isolation @@ -36,6 +38,7 @@ type ExecDriverConfig struct { type execHandle struct { pluginClient *plugin.Client executor executor.Executor + groups *cgroupConfig.Cgroup userPid int killTimeout time.Duration logger *log.Logger @@ -102,7 +105,7 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, if err != nil { return nil, fmt.Errorf("unable to find the nomad binary: %v", err) } - pluginLogFile := filepath.Join(ctx.AllocDir.AllocDir, "plugin.out") + pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name)) pluginConfig := &plugin.ClientConfig{ Cmd: exec.Command(bin, "executor", pluginLogFile), } @@ -126,13 +129,14 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, pluginClient.Kill() return nil, fmt.Errorf("error starting process via the plugin: %v", err) } - d.logger.Printf("started process via plugin with pid: %v", ps.Pid) + d.logger.Printf("[DEBUG] driver.exec: started process via plugin with pid: %v", ps.Pid) // Return a driver handle h := &execHandle{ pluginClient: pluginClient, userPid: ps.Pid, executor: exec, + groups: &ps.IsolationConfig, killTimeout: d.DriverContext.KillTimeout(task), logger: d.logger, doneCh: make(chan struct{}), @@ -145,6 +149,7 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, type execId struct { KillTimeout time.Duration UserPid int + Groups *cgroupConfig.Cgroup PluginConfig *ExecutorReattachConfig } @@ -154,15 +159,17 @@ func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) } - reattachConfig := id.PluginConfig.PluginConfig() pluginConfig := &plugin.ClientConfig{ - Reattach: reattachConfig, + Reattach: id.PluginConfig.PluginConfig(), } executor, client, err := createExecutor(pluginConfig, d.config.LogOutput) if err != nil { - d.logger.Println("[ERROR] error connecting to plugin so destroying plugin pid and user pid") + d.logger.Println("[ERROR] driver.exec: error connecting to plugin so destroying plugin pid and user pid") if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil { - d.logger.Printf("[ERROR] error destroying plugin and userpid: %v", e) + d.logger.Printf("[ERROR] driver.exec: error destroying plugin and userpid: %v", e) + } + if e := destroyCgroup(id.Groups); e != nil { + d.logger.Printf("[ERROR] driver.exec: %v", e) } return nil, fmt.Errorf("error connecting to plugin: %v", err) } @@ -172,6 +179,7 @@ func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro pluginClient: client, executor: executor, userPid: id.UserPid, + groups: id.Groups, logger: d.logger, killTimeout: id.KillTimeout, doneCh: make(chan struct{}), @@ -186,6 +194,7 @@ func (h *execHandle) ID() string { KillTimeout: h.killTimeout, PluginConfig: NewExecutorReattachConfig(h.pluginClient.ReattachConfig()), UserPid: h.userPid, + Groups: h.groups, } data, err := json.Marshal(id) diff --git a/client/driver/exec_test.go b/client/driver/exec_test.go index e7189f1fc..047584db0 100644 --- a/client/driver/exec_test.go +++ b/client/driver/exec_test.go @@ -1,10 +1,13 @@ package driver import ( + "encoding/json" "fmt" "io/ioutil" + "os" "path/filepath" "reflect" + "syscall" "testing" "time" @@ -75,6 +78,62 @@ func TestExecDriver_StartOpen_Wait(t *testing.T) { handle2.Kill() } +func TestExecDriver_KillUserPid_OnPluginReconnectFailure(t *testing.T) { + t.Parallel() + ctestutils.ExecCompatible(t) + task := &structs.Task{ + Name: "sleep", + Config: map[string]interface{}{ + "command": "/bin/sleep", + "args": []string{"1000000"}, + }, + Resources: basicResources, + } + + driverCtx, execCtx := testDriverContexts(task) + defer execCtx.AllocDir.Destroy() + d := NewExecDriver(driverCtx) + + handle, err := d.Start(execCtx, task) + defer handle.Kill() + if err != nil { + t.Fatalf("err: %v", err) + } + if handle == nil { + t.Fatalf("missing handle") + } + + id := &execId{} + if err := json.Unmarshal([]byte(handle.ID()), id); err != nil { + t.Fatalf("Failed to parse handle '%s': %v", handle.ID(), err) + } + pluginPid := id.PluginConfig.Pid + proc, err := os.FindProcess(pluginPid) + if err != nil { + t.Fatalf("can't find plugin pid: %v", pluginPid) + } + if err := proc.Kill(); err != nil { + t.Fatalf("can't kill plugin pid: %v", err) + } + + // Attempt to open + handle2, err := d.Open(execCtx, handle.ID()) + if err == nil { + t.Fatalf("expected error") + } + if handle2 != nil { + handle2.Kill() + t.Fatalf("expected handle2 to be nil") + } + // Test if the userpid is still present + userProc, err := os.FindProcess(id.UserPid) + + err = userProc.Signal(syscall.Signal(0)) + if err != nil { + t.Fatalf("expected user process to die") + } +} + func TestExecDriver_Start_Wait(t *testing.T) { t.Parallel() ctestutils.ExecCompatible(t) diff --git a/client/driver/executor/executor.go b/client/driver/executor/executor.go index 408668010..1d92a013b 100644 --- a/client/driver/executor/executor.go +++ b/client/driver/executor/executor.go @@ -8,10 +8,12 @@ import ( "os/exec" "path/filepath" "runtime" + "strings" "sync" "syscall" "time" + "github.com/hashicorp/go-multierror" cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" "github.com/hashicorp/nomad/client/allocdir" @@ -20,35 +22,56 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -// ExecutorContext is a wrapper to hold context to configure the command user -// wants to run +// ExecutorContext holds context to configure the command user +// wants to run and isolate it type ExecutorContext struct { - TaskEnv *env.TaskEnvironment - AllocDir *allocdir.AllocDir - TaskName string - TaskResources *structs.Resources - LogConfig *structs.LogConfig - FSIsolation bool - ResourceLimits bool + // TaskEnv holds information about the environment of a Task + TaskEnv *env.TaskEnvironment + + // AllocDir is the handle to do operations on the alloc dir of + // the task + AllocDir *allocdir.AllocDir + + // TaskName is the name of the Task + TaskName string + + // TaskResources are the resource constraints for the Task + TaskResources *structs.Resources + + // FSIsolation is a flag for drivers to impose file system + // isolation on certain platforms + FSIsolation bool + + // ResourceLimits is a flag for drivers to impose resource + // contraints on a Task on certain platforms + ResourceLimits bool + + // UnprivilegedUser is a flag for drivers to make the process + // run as nobody UnprivilegedUser bool + + // LogConfig provides the configuration related to log rotation + LogConfig *structs.LogConfig } -// ExecCommand is a wrapper to hold the user command +// ExecCommand holds the user command and args. It's a lightweight replacement +// of exec.Cmd for serialization purposes. type ExecCommand struct { Cmd string Args []string } -// ProcessState holds information about the state of -// a user process +// ProcessState holds information about the state of a user process. type ProcessState struct { - Pid int - ExitCode int - Time time.Time + Pid int + ExitCode int + Signal int + IsolationConfig cgroupConfig.Cgroup + Time time.Time } // Executor is the interface which allows a driver to launch and supervise -// a process user wants to run +// a process type Executor interface { LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error) Wait() (*ProcessState, error) @@ -83,17 +106,22 @@ func NewExecutor(logger *log.Logger) Executor { // LaunchCmd launches a process and returns it's state. It also configures an // applies isolation on certain platforms. func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error) { - e.logger.Printf("[INFO] executor: launching command %v", command.Cmd) + e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, "")) e.ctx = ctx + // configuring the task dir if err := e.configureTaskDir(); err != nil { return nil, err } + + // configuring the chroot, cgroup and enters the plugin process in the + // chroot if err := e.configureIsolation(); err != nil { return nil, err } + // setting the user of the process if e.ctx.UnprivilegedUser { if err := e.runAs("nobody"); err != nil { return nil, err @@ -103,7 +131,9 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext logFileSize := int64(ctx.LogConfig.MaxFileSizeMB * 1024 * 1024) stdor, stdow := io.Pipe() - lro, err := logrotator.NewLogRotator(filepath.Join(e.taskDir, allocdir.TaskLocal), fmt.Sprintf("%v.stdout", ctx.TaskName), ctx.LogConfig.MaxFiles, logFileSize, e.logger) + lro, err := logrotator.NewLogRotator(filepath.Join(e.taskDir, allocdir.TaskLocal), + fmt.Sprintf("%v.stdout", ctx.TaskName), ctx.LogConfig.MaxFiles, + logFileSize, e.logger) if err != nil { return nil, fmt.Errorf("error creating log rotator for stdout of task %v", err) } @@ -112,7 +142,9 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext go lro.Start(stdor) stder, stdew := io.Pipe() - lre, err := logrotator.NewLogRotator(filepath.Join(e.taskDir, allocdir.TaskLocal), fmt.Sprintf("%v.stderr", ctx.TaskName), ctx.LogConfig.MaxFiles, logFileSize, e.logger) + lre, err := logrotator.NewLogRotator(filepath.Join(e.taskDir, allocdir.TaskLocal), + fmt.Sprintf("%v.stderr", ctx.TaskName), ctx.LogConfig.MaxFiles, + logFileSize, e.logger) if err != nil { return nil, fmt.Errorf("error creating log rotator for stderr of task %v", err) } @@ -120,8 +152,9 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext e.lre = lre go lre.Start(stder) + // setting the env, path and args for the command + e.ctx.TaskEnv.Build() e.cmd.Env = ctx.TaskEnv.EnvList() - e.cmd.Path = ctx.TaskEnv.ReplaceEnv(command.Cmd) e.cmd.Args = append([]string{e.cmd.Path}, ctx.TaskEnv.ParseAndReplace(command.Args)...) if filepath.Base(command.Cmd) == command.Cmd { @@ -131,13 +164,13 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext } } + // starting the process if err := e.cmd.Start(); err != nil { return nil, fmt.Errorf("error starting command: %v", err) } - e.applyLimits() go e.wait() - return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil + return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: *e.groups, Time: time.Now()}, nil } // Wait waits until a process has exited and returns it's exitcode and errors @@ -187,24 +220,28 @@ func (e *UniversalExecutor) wait() { // Exit cleans up the alloc directory, destroys cgroups and kills the user // process func (e *UniversalExecutor) Exit() error { - e.logger.Printf("[INFO] Exiting plugin for task %q", e.ctx.TaskName) - if e.cmd.Process == nil { - return fmt.Errorf("executor.exit error: no process found") - } - proc, err := os.FindProcess(e.cmd.Process.Pid) - if err != nil { - return fmt.Errorf("failied to find user process %v: %v", e.cmd.Process.Pid, err) + var merr multierror.Error + if e.cmd.Process != nil { + proc, err := os.FindProcess(e.cmd.Process.Pid) + if err != nil { + e.logger.Printf("[ERROR] can't find process with pid: %v, err: %v", e.cmd.Process.Pid, err) + } + if err := proc.Kill(); err != nil { + e.logger.Printf("[ERROR] can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err) + } } + if e.ctx.FSIsolation { - e.removeChrootMounts() + if err := e.removeChrootMounts(); err != nil { + merr.Errors = append(merr.Errors, err) + } } if e.ctx.ResourceLimits { - e.destroyCgroup() + if err := e.destroyCgroup(); err != nil { + merr.Errors = append(merr.Errors, err) + } } - if err = proc.Kill(); err != nil { - e.logger.Printf("[DEBUG] executor.exit error: %v", err) - } - return nil + return merr.ErrorOrNil() } // Shutdown sends an interrupt signal to the user process diff --git a/client/driver/executor/executor_basic.go b/client/driver/executor/executor_basic.go index 88acf1105..84d080233 100644 --- a/client/driver/executor/executor_basic.go +++ b/client/driver/executor/executor_basic.go @@ -18,7 +18,7 @@ func (e *UniversalExecutor) runAs(userid string) error { return nil } -func (e *UniversalExecutor) applyLimits() error { +func (e *UniversalExecutor) applyLimits(pid int) error { return nil } diff --git a/client/driver/executor/executor_linux.go b/client/driver/executor/executor_linux.go index 0b3406e47..c95db04d0 100644 --- a/client/driver/executor/executor_linux.go +++ b/client/driver/executor/executor_linux.go @@ -45,23 +45,35 @@ func (e *UniversalExecutor) configureIsolation() error { if err := e.configureCgroups(e.ctx.TaskResources); err != nil { return fmt.Errorf("error creating cgroups: %v", err) } + if err := e.applyLimits(os.Getpid()); err != nil { + if er := e.destroyCgroup(); er != nil { + e.logger.Printf("[ERROR] error destroying cgroup: %v", er) + } + if er := e.removeChrootMounts(); er != nil { + e.logger.Printf("[ERROR] error removing chroot: %v", er) + } + return fmt.Errorf("error entering the plugin process in the cgroup: %v:", err) + } } return nil } // applyLimits puts a process in a pre-configured cgroup -func (e *UniversalExecutor) applyLimits() error { +func (e *UniversalExecutor) applyLimits(pid int) error { if !e.ctx.ResourceLimits { return nil } + + // Entering the process in the cgroup manager := e.getCgroupManager(e.groups) - if err := manager.Apply(e.cmd.Process.Pid); err != nil { + if err := manager.Apply(pid); err != nil { e.logger.Printf("[ERROR] unable to join cgroup: %v", err) if err := e.Exit(); err != nil { e.logger.Printf("[ERROR] unable to kill process: %v", err) } return err } + return nil } @@ -210,8 +222,7 @@ func (e *UniversalExecutor) removeChrootMounts() error { } } - // Unmount - // proc. + // Unmount proc. proc := filepath.Join(e.taskDir, "proc") if e.pathExists(proc) { if err := syscall.Unmount(proc, 0); err != nil { diff --git a/client/driver/executor/executor_test.go b/client/driver/executor/executor_test.go index 2b7e281ac..26876e7e9 100644 --- a/client/driver/executor/executor_test.go +++ b/client/driver/executor/executor_test.go @@ -11,9 +11,10 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/driver/env" + "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" + tu "github.com/hashicorp/nomad/testutil" ) var ( @@ -57,17 +58,18 @@ func TestExecutor_Start_Invalid(t *testing.T) { invalid := "/bin/foobar" execCmd := ExecCommand{Cmd: invalid, Args: []string{"1"}} ctx := testExecutorContext(t) + defer ctx.AllocDir.Destroy() executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags)) _, err := executor.LaunchCmd(&execCmd, ctx) if err == nil { t.Fatalf("Expected error") } - defer ctx.AllocDir.Destroy() } func TestExecutor_Start_Wait_Failure_Code(t *testing.T) { execCmd := ExecCommand{Cmd: "/bin/sleep", Args: []string{"fail"}} ctx := testExecutorContext(t) + defer ctx.AllocDir.Destroy() executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags)) ps, _ := executor.LaunchCmd(&execCmd, ctx) if ps.Pid == 0 { @@ -77,12 +79,12 @@ func TestExecutor_Start_Wait_Failure_Code(t *testing.T) { if ps.ExitCode < 1 { t.Fatalf("expected exit code to be non zero, actual: %v", ps.ExitCode) } - defer ctx.AllocDir.Destroy() } func TestExecutor_Start_Wait(t *testing.T) { execCmd := ExecCommand{Cmd: "/bin/echo", Args: []string{"hello world"}} ctx := testExecutorContext(t) + defer ctx.AllocDir.Destroy() executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags)) ps, err := executor.LaunchCmd(&execCmd, ctx) if err != nil { @@ -95,8 +97,51 @@ func TestExecutor_Start_Wait(t *testing.T) { if err != nil { t.Fatalf("error in waiting for command: %v", err) } + + task := "web" + taskDir, ok := ctx.AllocDir.TaskDirs[task] + if !ok { + log.Panicf("No task directory found for task %v", task) + } + + expected := "hello world" + file := filepath.Join(allocdir.TaskLocal, "web.stdout") + absFilePath := filepath.Join(taskDir, file) + output, err := ioutil.ReadFile(absFilePath) + if err != nil { + t.Fatalf("Couldn't read file %v", absFilePath) + } + + act := strings.TrimSpace(string(output)) + if act != expected { + t.Fatalf("Command output incorrectly: want %v; got %v", expected, act) + } +} + +func TestExecutor_IsolationAndConstraints(t *testing.T) { + testutil.ExecCompatible(t) + + execCmd := ExecCommand{Cmd: "/bin/echo", Args: []string{"hello world"}} + ctx := testExecutorContext(t) defer ctx.AllocDir.Destroy() + ctx.FSIsolation = true + ctx.ResourceLimits = true + ctx.UnprivilegedUser = true + + executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags)) + ps, err := executor.LaunchCmd(&execCmd, ctx) + if err != nil { + t.Fatalf("error in launching command: %v", err) + } + if ps.Pid == 0 { + t.Fatalf("expected process to start and have non zero pid") + } + ps, err = executor.Wait() + if err != nil { + t.Fatalf("error in waiting for command: %v", err) + } + task := "web" taskDir, ok := ctx.AllocDir.TaskDirs[task] if !ok { @@ -120,6 +165,7 @@ func TestExecutor_Start_Wait(t *testing.T) { func TestExecutor_Start_Kill(t *testing.T) { execCmd := ExecCommand{Cmd: "/bin/sleep", Args: []string{"10 && hello world"}} ctx := testExecutorContext(t) + defer ctx.AllocDir.Destroy() executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags)) ps, err := executor.LaunchCmd(&execCmd, ctx) if err != nil { @@ -132,7 +178,6 @@ func TestExecutor_Start_Kill(t *testing.T) { if err != nil { t.Fatalf("error in waiting for command: %v", err) } - defer ctx.AllocDir.Destroy() task := "web" taskDir, ok := ctx.AllocDir.TaskDirs[task] @@ -143,7 +188,7 @@ func TestExecutor_Start_Kill(t *testing.T) { file := filepath.Join(allocdir.TaskLocal, "web.stdout") absFilePath := filepath.Join(taskDir, file) - time.Sleep(time.Duration(testutil.TestMultiplier()*2) * time.Second) + time.Sleep(time.Duration(tu.TestMultiplier()*2) * time.Second) output, err := ioutil.ReadFile(absFilePath) if err != nil { diff --git a/client/driver/java.go b/client/driver/java.go index 3b807de0f..b30f87c26 100644 --- a/client/driver/java.go +++ b/client/driver/java.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/go-plugin" "github.com/mitchellh/mapstructure" + cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver/executor" @@ -43,6 +44,7 @@ type javaHandle struct { pluginClient *plugin.Client userPid int executor executor.Executor + groups *cgroupConfig.Cgroup killTimeout time.Duration logger *log.Logger @@ -148,7 +150,7 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, return nil, fmt.Errorf("unable to find the nomad binary: %v", err) } - pluginLogFile := filepath.Join(ctx.AllocDir.AllocDir, "plugin.out") + pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name)) pluginConfig := &plugin.ClientConfig{ Cmd: exec.Command(bin, "executor", pluginLogFile), } @@ -158,24 +160,26 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, return nil, err } executorCtx := &executor.ExecutorContext{ - TaskEnv: d.taskEnv, - AllocDir: ctx.AllocDir, - TaskName: task.Name, - TaskResources: task.Resources, - LogConfig: task.LogConfig, + TaskEnv: d.taskEnv, + AllocDir: ctx.AllocDir, + TaskName: task.Name, + TaskResources: task.Resources, + UnprivilegedUser: true, + LogConfig: task.LogConfig, } ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: "java", Args: args}, executorCtx) if err != nil { pluginClient.Kill() return nil, fmt.Errorf("error starting process via the plugin: %v", err) } - d.logger.Printf("[INFO] started process with pid: %v", ps.Pid) + d.logger.Printf("[DEBUG] driver.java: started process with pid: %v", ps.Pid) // Return a driver handle h := &javaHandle{ pluginClient: pluginClient, executor: exec, userPid: ps.Pid, + groups: &ps.IsolationConfig, killTimeout: d.DriverContext.KillTimeout(task), logger: d.logger, doneCh: make(chan struct{}), @@ -189,6 +193,7 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, type javaId struct { KillTimeout time.Duration PluginConfig *ExecutorReattachConfig + Groups *cgroupConfig.Cgroup UserPid int } @@ -198,16 +203,19 @@ func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) } - reattachConfig := id.PluginConfig.PluginConfig() pluginConfig := &plugin.ClientConfig{ - Reattach: reattachConfig, + Reattach: id.PluginConfig.PluginConfig(), } executor, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput) if err != nil { - d.logger.Println("[ERROR] error connecting to plugin so destroying plugin pid and user pid") + d.logger.Println("[ERROR] driver.java: error connecting to plugin so destroying plugin pid and user pid") if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil { - d.logger.Printf("[ERROR] error destroying plugin and userpid: %v", e) + d.logger.Printf("[ERROR] driver.java: error destroying plugin and userpid: %v", e) } + if e := destroyCgroup(id.Groups); e != nil { + d.logger.Printf("[ERROR] driver.exec: %v", e) + } + return nil, fmt.Errorf("error connecting to plugin: %v", err) } @@ -216,6 +224,7 @@ func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro pluginClient: pluginClient, executor: executor, userPid: id.UserPid, + groups: id.Groups, logger: d.logger, killTimeout: id.KillTimeout, doneCh: make(chan struct{}), @@ -231,6 +240,7 @@ func (h *javaHandle) ID() string { KillTimeout: h.killTimeout, PluginConfig: NewExecutorReattachConfig(h.pluginClient.ReattachConfig()), UserPid: h.userPid, + Groups: h.groups, } data, err := json.Marshal(id) diff --git a/client/driver/qemu.go b/client/driver/qemu.go index 06709d71a..44ce6957a 100644 --- a/client/driver/qemu.go +++ b/client/driver/qemu.go @@ -192,7 +192,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, return nil, fmt.Errorf("unable to find the nomad binary: %v", err) } - pluginLogFile := filepath.Join(ctx.AllocDir.AllocDir, "plugin.out") + pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name)) pluginConfig := &plugin.ClientConfig{ Cmd: exec.Command(bin, "executor", pluginLogFile), } @@ -213,7 +213,6 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, pluginClient.Kill() return nil, fmt.Errorf("error starting process via the plugin: %v", err) } - d.logger.Printf("[INFO] started process with pid: %v", ps.Pid) d.logger.Printf("[INFO] Started new QemuVM: %s", vmID) // Create and Return Handle @@ -243,16 +242,15 @@ func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) } - reattachConfig := id.PluginConfig.PluginConfig() pluginConfig := &plugin.ClientConfig{ - Reattach: reattachConfig, + Reattach: id.PluginConfig.PluginConfig(), } executor, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput) if err != nil { - d.logger.Println("[ERROR] error connecting to plugin so destroying plugin pid and user pid") + d.logger.Println("[ERROR] driver.qemu: error connecting to plugin so destroying plugin pid and user pid") if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil { - d.logger.Printf("[ERROR] error destroying plugin and userpid: %v", e) + d.logger.Printf("[ERROR] driver.qemu: error destroying plugin and userpid: %v", e) } return nil, fmt.Errorf("error connecting to plugin: %v", err) } diff --git a/client/driver/raw_exec.go b/client/driver/raw_exec.go index c67623367..154a7ed1b 100644 --- a/client/driver/raw_exec.go +++ b/client/driver/raw_exec.go @@ -98,7 +98,7 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl if err != nil { return nil, fmt.Errorf("unable to find the nomad binary: %v", err) } - pluginLogFile := filepath.Join(ctx.AllocDir.AllocDir, "plugin.out") + pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name)) pluginConfig := &plugin.ClientConfig{ Cmd: exec.Command(bin, "executor", pluginLogFile), } @@ -119,7 +119,7 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl pluginClient.Kill() return nil, fmt.Errorf("error starting process via the plugin: %v", err) } - d.logger.Printf("[INFO] started process with pid: %v", ps.Pid) + d.logger.Printf("[DEBUG] driver.raw_exec: started process with pid: %v", ps.Pid) // Return a driver handle h := &rawExecHandle{ @@ -152,9 +152,9 @@ func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, e } executor, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput) if err != nil { - d.logger.Println("[ERROR] error connecting to plugin so destroying plugin pid and user pid") + d.logger.Println("[ERROR] driver.raw_exec: error connecting to plugin so destroying plugin pid and user pid") if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil { - d.logger.Printf("[ERROR] error destroying plugin and userpid: %v", e) + d.logger.Printf("[ERROR] driver.raw_exec: error destroying plugin and userpid: %v", e) } return nil, fmt.Errorf("error connecting to plugin: %v", err) } diff --git a/client/driver/utils.go b/client/driver/utils.go index b42c4a74a..f2f8fc02a 100644 --- a/client/driver/utils.go +++ b/client/driver/utils.go @@ -10,9 +10,14 @@ import ( "github.com/hashicorp/nomad/client/driver/executor" ) +// createExecutor launches an executor plugin and returns an instance of the +// Executor interface func createExecutor(config *plugin.ClientConfig, w io.Writer) (executor.Executor, *plugin.Client, error) { config.HandshakeConfig = HandshakeConfig config.Plugins = GetPluginMap(w) + if config.Cmd != nil { + isolateCommand(config.Cmd) + } executorClient := plugin.NewClient(config) rpcClient, err := executorClient.Client() if err != nil { @@ -27,6 +32,7 @@ func createExecutor(config *plugin.ClientConfig, w io.Writer) (executor.Executor return executorPlugin, executorClient, nil } +// killProcess kills a process with the given pid func killProcess(pid int) error { proc, err := os.FindProcess(pid) if err != nil { @@ -35,6 +41,8 @@ func killProcess(pid int) error { return proc.Kill() } +// destroyPlugin kills the plugin with the given pid and also kills the user +// process func destroyPlugin(pluginPid int, userPid int) error { var merr error if err := killProcess(pluginPid); err != nil { diff --git a/client/driver/utils_linux.go b/client/driver/utils_linux.go new file mode 100644 index 000000000..eb7022cd0 --- /dev/null +++ b/client/driver/utils_linux.go @@ -0,0 +1,40 @@ +package driver + +import ( + "os/exec" + "syscall" + + "fmt" + "github.com/opencontainers/runc/libcontainer/cgroups" + cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" +) + +// isolateCommand sets the setsid flag in exec.Cmd to true so that the process +// becomes the process leader in a new session and doesn't receive signals that +// are sent to the parent process. +func isolateCommand(cmd *exec.Cmd) { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Setsid = true +} + +// destroyCgroup destroys a cgroup and thereby killing all the processes in that +// group +func destroyCgroup(group *cgroupConfig.Cgroup) error { + if group == nil { + return nil + } + var manager cgroups.Manager + manager = &cgroupFs.Manager{Cgroups: group} + if systemd.UseSystemd() { + manager = &systemd.Manager{Cgroups: group} + } + + if err := manager.Destroy(); err != nil { + return fmt.Errorf("failed to destroy cgroup: %v", err) + } + return nil +} diff --git a/client/driver/utils_posix.go b/client/driver/utils_posix.go new file mode 100644 index 000000000..cf90d109d --- /dev/null +++ b/client/driver/utils_posix.go @@ -0,0 +1,24 @@ +// +build !linux + +package driver + +import ( + "os/exec" + "syscall" + + cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" +) + +// isolateCommand sets the setsid flag in exec.Cmd to true so that the process +// becomes the process leader in a new session and doesn't receive signals that +// are sent to the parent process. +func isolateCommand(cmd *exec.Cmd) { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Setsid = true +} + +func destroyCgroup(group *cgroupConfig.Cgroup) error { + return nil +} diff --git a/client/driver/utils_windows.go b/client/driver/utils_windows.go new file mode 100644 index 000000000..84aff1e5f --- /dev/null +++ b/client/driver/utils_windows.go @@ -0,0 +1,15 @@ +package driver + +import ( + "os/exec" + + cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" +) + +// TODO Figure out if this is needed in Wondows +func isolateCommand(cmd *exec.Cmd) { +} + +func destroyCgroup(group *cgroupConfig.Cgroup) error { + return nil +} diff --git a/client/task_runner.go b/client/task_runner.go index 3d2b5c7c2..83a29bb65 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -10,9 +10,11 @@ import ( "sync" "time" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver" "github.com/hashicorp/nomad/nomad/structs" + "github.com/mitchellh/hashstructure" cstructs "github.com/hashicorp/nomad/client/driver/structs" ) @@ -54,6 +56,9 @@ func NewTaskRunner(logger *log.Logger, config *config.Config, alloc *structs.Allocation, task *structs.Task, consulService *ConsulService) *TaskRunner { + // Merge in the task resources + task.Resources = alloc.TaskResources[task.Name] + // Build the restart tracker. tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) if tg == nil { @@ -319,21 +324,24 @@ func (r *TaskRunner) handleUpdate(update *structs.Allocation) error { } // Extract the task. - var task *structs.Task + var updatedTask *structs.Task for _, t := range tg.Tasks { if t.Name == r.task.Name { - task = t + updatedTask = t } } - if task == nil { + if updatedTask == nil { return fmt.Errorf("task group %q doesn't contain task %q", tg.Name, r.task.Name) } - r.task = task + + // Merge in the task resources + updatedTask.Resources = update.TaskResources[updatedTask.Name] // Update will update resources and store the new kill timeout. + var mErr multierror.Error if r.handle != nil { - if err := r.handle.Update(task); err != nil { - r.logger.Printf("[ERR] client: failed to update task '%s' for alloc '%s': %v", r.task.Name, r.alloc.ID, err) + if err := r.handle.Update(updatedTask); err != nil { + mErr.Errors = append(mErr.Errors, fmt.Errorf("updating task resources failed: %v", err)) } } @@ -342,14 +350,26 @@ func (r *TaskRunner) handleUpdate(update *structs.Allocation) error { r.restartTracker.SetPolicy(tg.RestartPolicy) } - /* TODO - // Re-register the task to consul and store the updated alloc. - r.consulService.Deregister(r.task, r.alloc) - r.alloc = update - r.consulService.Register(r.task, r.alloc) - */ + // Hash services returns the hash of the task's services + hashServices := func(task *structs.Task) uint64 { + h, err := hashstructure.Hash(task.Services, nil) + if err != nil { + mErr.Errors = append(mErr.Errors, fmt.Errorf("hashing services failed %#v: %v", task.Services, err)) + } + return h + } - return nil + // Re-register the task to consul if any of the services have changed. + if hashServices(updatedTask) != hashServices(r.task) { + if err := r.consulService.Register(updatedTask, update); err != nil { + mErr.Errors = append(mErr.Errors, fmt.Errorf("updating services with consul failed: %v", err)) + } + } + + // Store the updated alloc. + r.alloc = update + r.task = updatedTask + return mErr.ErrorOrNil() } // Helper function for converting a WaitResult into a TaskTerminated event. diff --git a/command/agent/agent.go b/command/agent/agent.go index 0c7f1dd48..28e5e6b60 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -7,6 +7,7 @@ import ( "net" "os" "path/filepath" + "runtime" "sync" "time" @@ -238,6 +239,36 @@ func (a *Agent) setupClient() error { } conf.Node.HTTPAddr = httpAddr + // Reserve some ports for the plugins + if runtime.GOOS == "windows" { + deviceName, err := a.findLoopbackDevice() + if conf.ExecutorMaxPort == 0 { + conf.ExecutorMaxPort = 15000 + } + if conf.ExecutorMinPort == 0 { + conf.ExecutorMinPort = 14000 + } + if err != nil { + return fmt.Errorf("error finding the device name for the ip 127.0.0.1: %v", err) + } + var nr *structs.NetworkResource + for _, n := range conf.Node.Reserved.Networks { + if n.Device == deviceName { + nr = n + } + } + if nr == nil { + nr = &structs.NetworkResource{ + Device: deviceName, + ReservedPorts: make([]structs.Port, 0), + } + } + for i := conf.ExecutorMinPort; i <= conf.ExecutorMaxPort; i++ { + nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: i}) + } + + } + // Create the client client, err := client.NewClient(conf) if err != nil { @@ -247,6 +278,28 @@ func (a *Agent) setupClient() error { return nil } +func (a *Agent) findLoopbackDevice() (string, error) { + var ifcs []net.Interface + var err error + var deviceName string + ifcs, err = net.Interfaces() + if err != nil { + return "", err + } + for _, ifc := range ifcs { + addrs, err := ifc.Addrs() + if err != nil { + return deviceName, err + } + for _, addr := range addrs { + if net.ParseIP(addr.String()).IsLoopback() { + return ifc.Name, nil + } + } + } + return deviceName, err +} + // Leave is used gracefully exit. Clients will inform servers // of their departure so that allocations can be rescheduled. func (a *Agent) Leave() error { diff --git a/command/fs.go b/command/fs.go new file mode 100644 index 000000000..3925d044b --- /dev/null +++ b/command/fs.go @@ -0,0 +1,19 @@ +package command + +import "github.com/mitchellh/cli" + +type FSCommand struct { + Meta +} + +func (f *FSCommand) Help() string { + return "This command is accessed by using one of the subcommands below." +} + +func (f *FSCommand) Synopsis() string { + return "Inspect the contents of an allocation directory" +} + +func (f *FSCommand) Run(args []string) int { + return cli.RunResultHelp +} diff --git a/command/fs_cat.go b/command/fs_cat.go index 21f1b7641..e28e92b41 100644 --- a/command/fs_cat.go +++ b/command/fs_cat.go @@ -29,7 +29,7 @@ func (f *FSCatCommand) Help() string { } func (f *FSCatCommand) Synopsis() string { - return "displays a file at a given location" + return "Cat a file in an allocation directory" } func (f *FSCatCommand) Run(args []string) int { diff --git a/command/fs_ls.go b/command/fs_ls.go index 7066d30a2..12d0a9298 100644 --- a/command/fs_ls.go +++ b/command/fs_ls.go @@ -28,7 +28,7 @@ Usage: nomad fs ls } func (f *FSListCommand) Synopsis() string { - return "Lists list of files of an allocation directory" + return "List files in an allocation directory" } func (f *FSListCommand) Run(args []string) int { diff --git a/command/fs_stat.go b/command/fs_stat.go index 5ba345408..91c09a591 100644 --- a/command/fs_stat.go +++ b/command/fs_stat.go @@ -27,7 +27,7 @@ Usage: nomad fs stat } func (f *FSStatCommand) Synopsis() string { - return "Stats an entry in an allocation directory" + return "Stat an entry in an allocation directory" } func (f *FSStatCommand) Run(args []string) int { diff --git a/command/monitor.go b/command/monitor.go index af23737da..025f8e22e 100644 --- a/command/monitor.go +++ b/command/monitor.go @@ -299,12 +299,14 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int { // Monitor the next eval in the chain, if present if eval.NextEval != "" { - m.ui.Info(fmt.Sprintf( - "Monitoring next evaluation %q in %s", - eval.NextEval, eval.Wait)) + if eval.Wait.Nanoseconds() != 0 { + m.ui.Info(fmt.Sprintf( + "Monitoring next evaluation %q in %s", + limit(eval.NextEval, m.length), eval.Wait)) - // Skip some unnecessary polling - time.Sleep(eval.Wait) + // Skip some unnecessary polling + time.Sleep(eval.Wait) + } // Reset the state and monitor the new eval m.state = newEvalState() diff --git a/commands.go b/commands.go index 99294858d..6ad1ba3c3 100644 --- a/commands.go +++ b/commands.go @@ -62,6 +62,11 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "fs": func() (cli.Command, error) { + return &command.FSCommand{ + Meta: meta, + }, nil + }, "fs ls": func() (cli.Command, error) { return &command.FSListCommand{ Meta: meta, diff --git a/main.go b/main.go old mode 100755 new mode 100644 index 36115789d..1e3558658 --- a/main.go +++ b/main.go @@ -32,7 +32,7 @@ func RunCustom(args []string, commands map[string]cli.CommandFactory) int { commandsInclude := make([]string, 0, len(commands)) for k, _ := range commands { switch k { - case "spawn-daemon": + case "executor": default: commandsInclude = append(commandsInclude, k) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 95d64e05b..9e3b85076 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1414,6 +1414,13 @@ func (s *Service) InitFields(job string, taskGroup string, task string) { // Validate checks if the Check definition is valid func (s *Service) Validate() error { var mErr multierror.Error + + // Ensure the name does not have a period in it. + // RFC-2782: https://tools.ietf.org/html/rfc2782 + if strings.Contains(s.Name, ".") { + mErr.Errors = append(mErr.Errors, fmt.Errorf("service name can't contain periods: %q", s.Name)) + } + for _, c := range s.Checks { if err := c.Validate(); err != nil { mErr.Errors = append(mErr.Errors, err) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 076456c08..dfbca2ad1 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -485,6 +485,14 @@ func TestInvalidServiceCheck(t *testing.T) { if err := s.Validate(); err == nil { t.Fatalf("Service should be invalid") } + + s = Service{ + Name: "service.name", + PortLabel: "bar", + } + if err := s.Validate(); err == nil { + t.Fatalf("Service should be invalid: %v", err) + } } func TestDistinctCheckID(t *testing.T) { diff --git a/testutil/wait.go b/testutil/wait.go index 43ad02aca..23c88c497 100644 --- a/testutil/wait.go +++ b/testutil/wait.go @@ -11,7 +11,7 @@ import ( const ( // TravisRunEnv is an environment variable that is set if being run by // Travis. - TravisRunEnv = "TRAVIS_RUN" + TravisRunEnv = "CI" ) type testFn func() (bool, error) diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md index 140522b15..d964a78df 100644 --- a/website/source/docs/drivers/docker.html.md +++ b/website/source/docs/drivers/docker.html.md @@ -247,6 +247,9 @@ The `docker` driver has the following host-level configuration options: to customize this if you use a non-standard socket (http or another location). +* `docker.auth.config` - Allows an operator to specify a json file which is in + the dockercfg format containing authentication information for private registry. + * `docker.tls.cert` - Path to the server's certificate file (`.pem`). Specify this along with `docker.tls.key` and `docker.tls.ca` to use a TLS client to connect to the docker daemon. `docker.endpoint` must also be specified or diff --git a/website/source/docs/http/node.html.md b/website/source/docs/http/node.html.md index 01f06ceef..4ceee7cf6 100644 --- a/website/source/docs/http/node.html.md +++ b/website/source/docs/http/node.html.md @@ -338,7 +338,7 @@ be specified using the `?region=` query parameter. { "EvalIDs": ["d092fdc0-e1fd-2536-67d8-43af8ca798ac"], "EvalCreateIndex": 35, - "NodeModifyIndex": 34, + "NodeModifyIndex": 34 } ``` @@ -378,7 +378,7 @@ be specified using the `?region=` query parameter. { "EvalID": "d092fdc0-e1fd-2536-67d8-43af8ca798ac", "EvalCreateIndex": 35, - "NodeModifyIndex": 34, + "NodeModifyIndex": 34 } ``` diff --git a/website/source/intro/getting-started/cluster.html.md b/website/source/intro/getting-started/cluster.html.md index 0aa63f171..249baa6ae 100644 --- a/website/source/intro/getting-started/cluster.html.md +++ b/website/source/intro/getting-started/cluster.html.md @@ -140,9 +140,9 @@ we should see both nodes in the `ready` state: ``` $ nomad node-status -ID DC Name Class Drain Status -f7780117-2cae-8ee9-4b36-f34dd796ab02 dc1 nomad false ready -ffb5b55a-6059-9ec7-6108-23a2bbba95da dc1 nomad false ready +ID Datacenter Name Class Drain Status +fca62612 dc1 nomad false ready +c887deef dc1 nomad false ready ``` We now have a simple three node cluster running. The only difference @@ -159,13 +159,13 @@ Then, use the [`run` command](/docs/commands/run.html) to submit the job: ``` $ nomad run example.nomad -==> Monitoring evaluation "77e5075f-2a1b-9cce-d14e-fe98cca9e17f" +==> Monitoring evaluation "8e0a7cf9" Evaluation triggered by job "example" - Allocation "711edd85-f183-99ea-910a-6445b23d79e4" created: node "ffb5b55a-6059-9ec7-6108-23a2bbba95da", group "cache" - Allocation "98218a8a-627c-308f-8941-acdbffe1940c" created: node "f7780117-2cae-8ee9-4b36-f34dd796ab02", group "cache" - Allocation "e8957a7f-6fff-f61f-2878-57715c26725d" created: node "f7780117-2cae-8ee9-4b36-f34dd796ab02", group "cache" + Allocation "501154ac" created: node "c887deef", group "cache" + Allocation "7e2b3900" created: node "fca62612", group "cache" + Allocation "9c66fcaf" created: node "c887deef", group "cache" Evaluation status changed: "pending" -> "complete" -==> Evaluation "77e5075f-2a1b-9cce-d14e-fe98cca9e17f" finished with status "complete" +==> Evaluation "8e0a7cf9" finished with status "complete" ``` We can see in the output that the scheduler assigned two of the @@ -181,17 +181,19 @@ Name = example Type = service Priority = 50 Datacenters = dc1 -Status = +Status = running +Periodic = false ==> Evaluations -ID Priority TriggeredBy Status -77e5075f-2a1b-9cce-d14e-fe98cca9e17f 50 job-register complete +ID Priority Triggered By Status +54dd2ae3 50 job-register complete ==> Allocations -ID EvalID NodeID TaskGroup Desired Status -711edd85-f183-99ea-910a-6445b23d79e4 77e5075f-2a1b-9cce-d14e-fe98cca9e17f ffb5b55a-6059-9ec7-6108-23a2bbba95da cache run running -98218a8a-627c-308f-8941-acdbffe1940c 77e5075f-2a1b-9cce-d14e-fe98cca9e17f f7780117-2cae-8ee9-4b36-f34dd796ab02 cache run running -e8957a7f-6fff-f61f-2878-57715c26725d 77e5075f-2a1b-9cce-d14e-fe98cca9e17f f7780117-2cae-8ee9-4b36-f34dd796ab02 cache run running +ID Eval ID Node ID Task Group Desired Status +102225ab 54dd2ae3 56b590e6 cache run running +f327d2b1 54dd2ae3 e4235508 cache run running +f91137f8 54dd2ae3 56b590e6 cache run running + ``` We can see that all our tasks have been allocated and are running. diff --git a/website/source/intro/getting-started/install.html.md b/website/source/intro/getting-started/install.html.md index a328c62e7..8cb223e31 100644 --- a/website/source/intro/getting-started/install.html.md +++ b/website/source/intro/getting-started/install.html.md @@ -50,6 +50,7 @@ Available commands are: alloc-status Display allocation status information and metadata client-config View or modify client configuration details eval-monitor Monitor an evaluation interactively + fs Inspect the contents of an allocation directory init Create an example job file node-drain Toggle drain mode on a given node node-status Display status information about nodes diff --git a/website/source/intro/getting-started/jobs.html.md b/website/source/intro/getting-started/jobs.html.md index 1e893bf6f..beb5e10cd 100644 --- a/website/source/intro/getting-started/jobs.html.md +++ b/website/source/intro/getting-started/jobs.html.md @@ -46,11 +46,11 @@ We can register our example job now: ``` $ nomad run example.nomad -==> Monitoring evaluation "3d823c52-929a-fa8b-c50d-1ac4d00cf6b7" +==> Monitoring evaluation "26cfc69e" Evaluation triggered by job "example" - Allocation "85b839d7-f67a-72a4-5a13-104020ae4807" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" + Allocation "8ba85cef" created: node "171a583b", group "cache" Evaluation status changed: "pending" -> "complete" -==> Evaluation "3d823c52-929a-fa8b-c50d-1ac4d00cf6b7" finished with status "complete" +==> Evaluation "26cfc69e" finished with status "complete" ``` Anytime a job is updated, Nomad creates an evaluation to determine what @@ -67,15 +67,16 @@ Name = example Type = service Priority = 50 Datacenters = dc1 -Status = +Status = running +Periodic = false ==> Evaluations -ID Priority TriggeredBy Status -3d823c52-929a-fa8b-c50d-1ac4d00cf6b7 50 job-register complete +ID Priority Triggered By Status +26cfc69e 50 job-register complete ==> Allocations -ID EvalID NodeID TaskGroup Desired Status -85b839d7-f67a-72a4-5a13-104020ae4807 3d823c52-929a-fa8b-c50d-1ac4d00cf6b7 2512929f-5b7c-a959-dfd9-bf8a8eb022a6 cache run running +ID Eval ID Node ID Task Group Desired Status +8ba85cef 26cfc69e 171a583b cache run running ``` Here we can see that our evaluation that was created has completed, and that @@ -100,13 +101,13 @@ push the updated version of the job: ``` $ nomad run example.nomad -==> Monitoring evaluation "ec199c63-2022-f5c7-328d-1cf85e61bf66" +==> Monitoring evaluation "127a49d0" Evaluation triggered by job "example" - Allocation "21551679-5224-cb6b-80a2-d0b091612d2e" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" - Allocation "b1be1410-a01c-20ad-80ff-96750ec0f1da" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" - Allocation "ed32a35d-8086-3f04-e299-4432e562cbf2" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" + Allocation "8ab24eef" created: node "171a583b", group "cache" + Allocation "f6c29874" created: node "171a583b", group "cache" + Allocation "8ba85cef" modified: node "171a583b", group "cache" Evaluation status changed: "pending" -> "complete" -==> Evaluation "ec199c63-2022-f5c7-328d-1cf85e61bf66" finished with status "complete" +==> Evaluation "127a49d0" finished with status "complete" ``` Because we set the count of the task group to three, Nomad created two @@ -132,13 +133,23 @@ specification now: ``` $ nomad run example.nomad -==> Monitoring evaluation "d34d37f4-19b1-f4c0-b2da-c949e6ade82d" +==> Monitoring evaluation "ebcc3e14" Evaluation triggered by job "example" - Allocation "5614feb0-212d-21e5-ccfb-56a394fc41d5" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" - Allocation "bf7e3ad5-b217-14fe-f3f8-2b83af9dbb42" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" - Allocation "e3978af2-f61e-c601-7aa1-90aea9b23cf6" created: node "2512929f-5b7c-a959-dfd9-bf8a8eb022a6", group "cache" + Allocation "9a3743f4" created: node "171a583b", group "cache" Evaluation status changed: "pending" -> "complete" -==> Evaluation "d34d37f4-19b1-f4c0-b2da-c949e6ade82d" finished with status "complete" +==> Evaluation "ebcc3e14" finished with status "complete" +==> Monitoring next evaluation "b508d8f0-7f21-8d66-ec59-7f5b2573435a" in 0 +==> Monitoring evaluation "b508d8f0" + Evaluation triggered by job "example" + Allocation "926e5876" created: node "171a583b", group "cache" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "b508d8f0" finished with status "complete" +==> Monitoring next evaluation "ea78c05a-a15f-92ae-8c3d-59f4a1edd091" in 10s +==> Monitoring evaluation "ea78c05a" + Evaluation triggered by job "example" + Allocation "3c8589d5" created: node "171a583b", group "cache" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "ea78c05a" finished with status "complete" ``` We can see that Nomad handled the update in three phases, only updating a single task @@ -152,10 +163,10 @@ is stopping the job. This is done with the [`stop` command](/docs/commands/stop. ``` $ nomad stop example -==> Monitoring evaluation "bb407de4-02cb-f009-d986-646d6c11366d" +==> Monitoring evaluation "fd03c9f8" Evaluation triggered by job "example" Evaluation status changed: "pending" -> "complete" -==> Evaluation "bb407de4-02cb-f009-d986-646d6c11366d" finished with status "complete" +==> Evaluation "fd03c9f8" finished with status "complete" ``` When we stop a job, it creates an evaluation which is used to stop all @@ -164,7 +175,7 @@ If we try to query the job status, we can see it is no longer registered: ``` $ nomad status example -Error querying job: Unexpected response code: 404 (job not found) +No job(s) with prefix or id "example" found ``` If we wanted to start the job again, we could simply `run` it again. diff --git a/website/source/intro/getting-started/running.html.md b/website/source/intro/getting-started/running.html.md index 60bc9be34..1d33d969c 100644 --- a/website/source/intro/getting-started/running.html.md +++ b/website/source/intro/getting-started/running.html.md @@ -83,8 +83,8 @@ $ vagrant ssh ... $ nomad node-status -ID DC Name Class Drain Status -72d3af97-144f-1e5f-94e5-df1516fe4add dc1 nomad false ready +ID Datacenter Name Class Drain Status +171a583b dc1 nomad false ready ``` The output shows our Node ID, which is a randomly generated UUID, @@ -99,8 +99,8 @@ ring using the [`server-members`](/docs/commands/server-members.html) command: ```text $ nomad server-members -Name Addr Port Status Proto Build DC Region -nomad.global 127.0.0.1 4648 alive 2 0.1.0dev dc1 global +Name Address Port Status Protocol Build Datacenter Region +nomad.global 127.0.0.1 4648 alive 2 0.3.0dev dc1 global ``` The output shows our own agent, the address it is running on, its