Merge branch 'master' into f-port-configs

This commit is contained in:
Chris Bednarski
2015-09-23 11:57:12 -07:00
52 changed files with 1069 additions and 278 deletions

6
.gitignore vendored
View File

@@ -42,3 +42,9 @@ ui/dist/
website/.bundle
website/vendor
example.nomad
nomad_linux_amd64
nomad_darwin_amd64
TODO.md

View File

@@ -12,7 +12,7 @@ func TestCompose(t *testing.T) {
SetMeta("foo", "bar").
Constrain(HardConstraint("kernel.name", "=", "linux")).
Require(&Resources{
CPU: 1.25,
CPU: 1250,
MemoryMB: 1024,
DiskMB: 2048,
IOPS: 1024,
@@ -78,7 +78,7 @@ func TestCompose(t *testing.T) {
Name: "task1",
Driver: "exec",
Resources: &Resources{
CPU: 1.25,
CPU: 1250,
MemoryMB: 1024,
DiskMB: 2048,
IOPS: 1024,

View File

@@ -3,7 +3,7 @@ package api
// Resources encapsulates the required resources of
// a given task or task group.
type Resources struct {
CPU float64
CPU int
MemoryMB int
DiskMB int
IOPS int

View File

@@ -166,7 +166,7 @@ func TestTask_Require(t *testing.T) {
// Create some require resources
resources := &Resources{
CPU: 1.25,
CPU: 1250,
MemoryMB: 128,
DiskMB: 2048,
IOPS: 1024,

View File

@@ -7,6 +7,8 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
ctestutil "github.com/hashicorp/nomad/client/testutil"
)
type MockAllocStateUpdater struct {
@@ -32,6 +34,7 @@ func testAllocRunner() (*MockAllocStateUpdater, *AllocRunner) {
}
func TestAllocRunner_SimpleRun(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner()
go ar.Run()
defer ar.Destroy()
@@ -48,6 +51,7 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
}
func TestAllocRunner_Destroy(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner()
// Ensure task takes some time
@@ -79,6 +83,7 @@ func TestAllocRunner_Destroy(t *testing.T) {
}
func TestAllocRunner_Update(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner()
// Ensure task takes some time

View File

@@ -142,6 +142,13 @@ func (c *Client) init() error {
return fmt.Errorf("failed creating alloc dir: %s", err)
}
}
// Ensure the state dir exists if we have one
if c.config.StateDir != "" {
if err := os.MkdirAll(c.config.StateDir, 0700); err != nil {
return fmt.Errorf("failed creating state dir: %s", err)
}
}
return nil
}
@@ -244,8 +251,8 @@ func (c *Client) Stats() map[string]map[string]string {
"client": map[string]string{
"known_servers": toString(uint64(len(c.config.Servers))),
"num_allocations": toString(uint64(numAllocs)),
"last_heartbeat": fmt.Sprintf("%#v", time.Since(c.lastHeartbeat)),
"heartbeat_ttl": fmt.Sprintf("%#v", c.heartbeatTTL),
"last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)),
"heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL),
},
"runtime": nomad.RuntimeStats(),
}
@@ -265,7 +272,9 @@ func (c *Client) restoreState() error {
// Scan the directory
list, err := ioutil.ReadDir(filepath.Join(c.config.StateDir, "alloc"))
if err != nil {
if err != nil && os.IsNotExist(err) {
return nil
} else if err != nil {
return fmt.Errorf("failed to list alloc state: %v", err)
}
@@ -556,6 +565,7 @@ func (c *Client) watchAllocations(allocUpdates chan []*structs.Allocation) {
for {
// Get the allocations, blocking for updates
resp = structs.NodeAllocsResponse{}
err := c.RPC("Node.GetAllocs", &req, &resp)
if err != nil {
c.logger.Printf("[ERR] client: failed to query for node allocations: %v", err)

View File

@@ -15,6 +15,8 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
ctestutil "github.com/hashicorp/nomad/client/testutil"
)
var nextPort uint32 = 16000
@@ -137,6 +139,7 @@ func TestClient_Fingerprint(t *testing.T) {
}
func TestClient_Drivers(t *testing.T) {
ctestutil.ExecCompatible(t)
c := testClient(t, nil)
defer c.Shutdown()
@@ -246,6 +249,7 @@ func TestClient_UpdateAllocStatus(t *testing.T) {
}
func TestClient_WatchAllocs(t *testing.T) {
ctestutil.ExecCompatible(t)
s1, _ := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
@@ -314,6 +318,7 @@ func TestClient_WatchAllocs(t *testing.T) {
}
func TestClient_SaveRestoreState(t *testing.T) {
ctestutil.ExecCompatible(t)
s1, _ := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)

View File

@@ -2,7 +2,9 @@ package driver
import (
"fmt"
"runtime"
"strings"
"syscall"
"time"
"github.com/hashicorp/nomad/client/config"
@@ -30,7 +32,12 @@ func NewExecDriver(ctx *DriverContext) Driver {
}
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// We can always do a fork/exec
// Only enable if we are root when running on non-windows systems.
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
return false, nil
}
node.Attributes["driver.exec"] = "1"
return true, nil
}

View File

@@ -9,7 +9,9 @@ import (
"os/exec"
"path"
"path/filepath"
"runtime"
"strings"
"syscall"
"time"
"github.com/hashicorp/nomad/client/config"
@@ -36,6 +38,12 @@ func NewJavaDriver(ctx *DriverContext) Driver {
}
func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Only enable if we are root when running on non-windows systems.
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling")
return false, nil
}
// Find java version
var out bytes.Buffer
var erOut bytes.Buffer

View File

@@ -13,7 +13,9 @@ import (
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strings"
"syscall"
"time"
"github.com/hashicorp/nomad/client/config"
@@ -52,6 +54,12 @@ func NewQemuDriver(ctx *DriverContext) Driver {
}
func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Only enable if we are root when running on non-windows systems.
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
d.logger.Printf("[DEBUG] driver.qemu: must run as root user, disabling")
return false, nil
}
outBytes, err := exec.Command("qemu-system-x86_64", "-version").Output()
if err != nil {
return false, nil

View File

@@ -10,7 +10,6 @@ import (
"os/user"
"strconv"
"strings"
"syscall"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/command"
@@ -31,16 +30,8 @@ func NewExecutor() Executor {
// TODO: In a follow-up PR make it so this only happens once per client.
// Fingerprinting shouldn't happen per task.
// Check if the process is has root capabilities.
e.root = syscall.Geteuid() == 0
// Check if this process can set uid.
if e.root {
e.setUidEnabled = true
}
// Check that cgroups are available. Must be root to modify it.
if _, err := os.Stat(cgroupMount); err == nil && e.root {
// Check that cgroups are available.
if _, err := os.Stat(cgroupMount); err == nil {
e.cgroupEnabled = true
}
@@ -53,8 +44,6 @@ type LinuxExecutor struct {
user *user.User
// Finger print capabilities.
root bool
setUidEnabled bool
cgroupEnabled bool
// Isolation configurations.
@@ -152,11 +141,9 @@ func (e *LinuxExecutor) Start() error {
// spawned process. Note that we will only do this if we can call SetUID.
// Otherwise we'll just run the other process as our current (non-root)
// user. This means we aren't forced to run nomad as root.
if e.setUidEnabled {
if err := e.runAs("nobody"); err == nil && e.user != nil {
e.cmd.SetUID(e.user.Uid)
e.cmd.SetGID(e.user.Gid)
}
if err := e.runAs("nobody"); err == nil && e.user != nil {
e.cmd.SetUID(e.user.Uid)
e.cmd.SetGID(e.user.Gid)
}
return e.spawnDaemon()

View File

@@ -61,7 +61,7 @@ func (f *CPUFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bo
node.Resources = &structs.Resources{}
}
node.Resources.CPU = tc
node.Resources.CPU = int(tc)
}
if modelName != "" {

View File

@@ -11,6 +11,8 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
ctestutil "github.com/hashicorp/nomad/client/testutil"
)
func testLogger() *log.Logger {
@@ -44,6 +46,7 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) {
}
func TestTaskRunner_SimpleRun(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, tr := testTaskRunner()
go tr.Run()
defer tr.Destroy()
@@ -79,6 +82,7 @@ func TestTaskRunner_SimpleRun(t *testing.T) {
}
func TestTaskRunner_Destroy(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, tr := testTaskRunner()
// Change command to ensure we run for a bit
@@ -113,6 +117,7 @@ func TestTaskRunner_Destroy(t *testing.T) {
}
func TestTaskRunner_Update(t *testing.T) {
ctestutil.ExecCompatible(t)
_, tr := testTaskRunner()
// Change command to ensure we run for a bit

View File

@@ -0,0 +1,13 @@
package testutil
import (
"runtime"
"syscall"
"testing"
)
func ExecCompatible(t *testing.T) {
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
t.Skip("Must be root on non-windows environments to run test")
}
}

View File

@@ -77,11 +77,12 @@ func (a *Agent) serverConfig() (*nomad.Config, error) {
if a.config.NodeName != "" {
conf.NodeName = a.config.NodeName
}
if a.config.Server.Bootstrap {
conf.Bootstrap = a.config.Server.Bootstrap
}
if a.config.Server.BootstrapExpect > 0 {
conf.BootstrapExpect = a.config.Server.BootstrapExpect
if a.config.Server.BootstrapExpect == 1 {
conf.Bootstrap = true
} else {
conf.BootstrapExpect = a.config.Server.BootstrapExpect
}
}
if a.config.DataDir != "" {
conf.DataDir = filepath.Join(a.config.DataDir, "server")

View File

@@ -162,4 +162,29 @@ func TestAgent_ServerConfig(t *testing.T) {
if addr := out.SerfConfig.MemberlistConfig.BindAddr; addr != "127.0.0.3" {
t.Fatalf("expect 127.0.0.3, got: %s", addr)
}
// Properly handles the bootstrap flags
conf.Server.BootstrapExpect = 1
out, err = a.serverConfig()
if err != nil {
t.Fatalf("err: %s", err)
}
if !out.Bootstrap {
t.Fatalf("should have set bootstrap mode")
}
if out.BootstrapExpect != 0 {
t.Fatalf("boostrap expect should be 0")
}
conf.Server.BootstrapExpect = 3
out, err = a.serverConfig()
if err != nil {
t.Fatalf("err: %s", err)
}
if out.Bootstrap {
t.Fatalf("bootstrap mode should be disabled")
}
if out.BootstrapExpect != 3 {
t.Fatalf("should have bootstrap-expect = 3")
}
}

View File

@@ -67,13 +67,16 @@ func (c *Command) readConfig() *Config {
flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
// Server-only options
flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
// General options
flags.Var((*sliceflag.StringFlag)(&configPath), "config", "config")
flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
flags.StringVar(&cmdConfig.Region, "region", "", "")
flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
flags.StringVar(&cmdConfig.LogLevel, "log-level", "info", "")
flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
flags.StringVar(&cmdConfig.NodeName, "node", "", "")
// Atlas options
@@ -121,17 +124,31 @@ func (c *Command) readConfig() *Config {
// Merge any CLI options over config file options
config = config.Merge(cmdConfig)
// Check that we have a data-dir if we are a server
if !dev && config.DataDir == "" {
c.Ui.Error("Must specify data directory")
return nil
}
// Set the version info
config.Revision = c.Revision
config.Version = c.Version
config.VersionPrerelease = c.VersionPrerelease
if dev {
// Skip validation for dev mode
return config
}
// Check that we have a data-dir
if config.DataDir == "" {
c.Ui.Error("Must specify data directory")
return nil
}
// Check the bootstrap flags
if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
c.Ui.Error("Bootstrap requires server mode to be enabled")
return nil
}
if config.Server.BootstrapExpect == 1 {
c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
}
return config
}
@@ -549,25 +566,32 @@ General Options (clients and servers):
Name of the region the Nomad agent will be a member of. By default
this value is set to "global".
Role-Specific Options:
-client
Enable client mode for the agent. Client mode enables a given node
to be evaluated for allocations. If client mode is not enabled,
no work will be scheduled to the agent.
-dev
Start the agent in development mode. This enables a pre-configured
dual-role agent (client + server) which is useful for developing
or testing Nomad. No other configuration is required to start the
agent in this mode.
Server Options:
-server
Enable server mode for the agent. Agents in server mode are
clustered together and handle the additional responsibility of
leader election, data replication, and scheduling work onto
eligible client nodes.
-bootstrap-expect=<num>
Configures the expected number of servers nodes to wait for before
bootstrapping the cluster. Once <num> servers have joined eachother,
Nomad initiates the bootstrap process.
Client Options:
-client
Enable client mode for the agent. Client mode enables a given node
to be evaluated for allocations. If client mode is not enabled,
no work will be scheduled to the agent.
Atlas Options:
-atlas=<infrastructure>

View File

@@ -0,0 +1,63 @@
package agent
import (
"io/ioutil"
"os"
"strings"
"testing"
"github.com/mitchellh/cli"
)
func TestCommand_Implements(t *testing.T) {
var _ cli.Command = &Command{}
}
func TestCommand_Args(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "nomad")
if err != nil {
t.Fatalf("err: %s", err)
}
defer os.RemoveAll(tmpDir)
type tcase struct {
args []string
errOut string
}
tcases := []tcase{
{
[]string{},
"Must specify data directory",
},
{
[]string{"-data-dir=" + tmpDir, "-bootstrap-expect=1"},
"Bootstrap requires server mode to be enabled",
},
{
[]string{"-data-dir=" + tmpDir, "-server", "-bootstrap-expect=1"},
"WARNING: Bootstrap mode enabled!",
},
}
for _, tc := range tcases {
// Make a new command. We pre-emptively close the shutdownCh
// so that the command exits immediately instead of blocking.
ui := new(cli.MockUi)
shutdownCh := make(chan struct{})
close(shutdownCh)
cmd := &Command{
Ui: ui,
ShutdownCh: shutdownCh,
}
if code := cmd.Run(tc.args); code != 1 {
t.Fatalf("args: %v\nexit: %d\n", tc.args, code)
}
if expect := tc.errOut; expect != "" {
out := ui.ErrorWriter.String()
if !strings.Contains(out, expect) {
t.Fatalf("expect to find %q\n\n%s", expect, out)
}
}
}
}

View File

@@ -146,10 +146,6 @@ type ServerConfig struct {
// Enabled controls if we are a server
Enabled bool `hcl:"enabled"`
// Bootstrap is used to bring up the first Consul server, and
// permits that node to elect itself leader
Bootstrap bool `hcl:"bootstrap"`
// BootstrapExpect tries to automatically bootstrap the Consul cluster,
// by witholding peers until enough servers join.
BootstrapExpect int `hcl:"bootstrap_expect"`
@@ -350,9 +346,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
if b.Enabled {
result.Enabled = true
}
if b.Bootstrap {
result.Bootstrap = true
}
if b.BootstrapExpect > 0 {
result.BootstrapExpect = b.BootstrapExpect
}

View File

@@ -39,7 +39,6 @@ func TestConfig_Merge(t *testing.T) {
},
Server: &ServerConfig{
Enabled: false,
Bootstrap: false,
BootstrapExpect: 1,
DataDir: "/tmp/data1",
ProtocolVersion: 1,
@@ -91,7 +90,6 @@ func TestConfig_Merge(t *testing.T) {
},
Server: &ServerConfig{
Enabled: true,
Bootstrap: true,
BootstrapExpect: 2,
DataDir: "/tmp/data2",
ProtocolVersion: 2,
@@ -341,7 +339,6 @@ func TestConfig_LoadConfigString(t *testing.T) {
},
Server: &ServerConfig{
Enabled: true,
Bootstrap: true,
BootstrapExpect: 5,
DataDir: "/tmp/data",
ProtocolVersion: 3,
@@ -409,7 +406,6 @@ client {
}
server {
enabled = true
bootstrap = true
bootstrap_expect = 5
data_dir = "/tmp/data"
protocol_version = 3

View File

@@ -12,7 +12,6 @@ import (
"time"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
)
const (
@@ -191,20 +190,9 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque
}
// decodeBody is used to decode a JSON request body
func decodeBody(req *http.Request, out interface{}, cb func(interface{}) error) error {
var raw interface{}
func decodeBody(req *http.Request, out interface{}) error {
dec := json.NewDecoder(req.Body)
if err := dec.Decode(&raw); err != nil {
return err
}
// Invoke the callback prior to decode
if cb != nil {
if err := cb(raw); err != nil {
return err
}
}
return mapstructure.Decode(raw, out)
return dec.Decode(&out)
}
// setIndex is used to set the index response header

View File

@@ -157,7 +157,7 @@ func (s *HTTPServer) jobQuery(resp http.ResponseWriter, req *http.Request,
func (s *HTTPServer) jobUpdate(resp http.ResponseWriter, req *http.Request,
jobName string) (interface{}, error) {
var args structs.JobRegisterRequest
if err := decodeBody(req, &args, nil); err != nil {
if err := decodeBody(req, &args); err != nil {
return nil, CodedError(400, err.Error())
}
if args.Job == nil {

View File

@@ -2,8 +2,15 @@ package command
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
const (
// DefaultInitName is the default name we use when
// initializing the example file
DefaultInitName = "example.nomad"
)
// InitCommand generates a new job template that you can customize to your
@@ -13,89 +20,106 @@ type InitCommand struct {
}
func (c *InitCommand) Help() string {
return initUsage
}
helpText := `
Usage: nomad init
func (c *InitCommand) Run(args []string) int {
dir, err := os.Getwd()
if err != nil {
c.Ui.Error("Unable to determine pwd; aborting")
return 1
}
Creates an example job file that can be used as a starting
point to customize further.
// Derive the job name from the pwd folder name, which is our best guess at
// the project's name
jobname := filepath.Base(dir)
jobfile := fmt.Sprintf("%s.nomad", jobname)
jobpath := filepath.Join(dir, jobfile)
if _, err := os.Stat(jobpath); err == nil {
c.Ui.Error(fmt.Sprintf("%s file already exists", jobfile))
return 1
}
file, err := os.Create(jobfile)
defer file.Close()
if err != nil {
c.Ui.Error(fmt.Sprintf("Unable to create file %s: %s", jobfile, err))
return 1
}
_, err = file.WriteString(defaultJob)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to write job template to %s", jobfile))
return 1
}
c.Ui.Output(fmt.Sprintf("Initialized nomad job template in %s", jobfile))
return 0
`
return strings.TrimSpace(helpText)
}
func (c *InitCommand) Synopsis() string {
return "Create a new job template"
return "Create an example job file"
}
const initUsage = ``
func (c *InitCommand) Run(args []string) int {
// Check if the file already exists
_, err := os.Stat(DefaultInitName)
if err == nil || !os.IsNotExist(err) {
c.Ui.Error(fmt.Sprintf("Job '%s' already exists", DefaultInitName))
return 1
} else if !os.IsNotExist(err) {
c.Ui.Error(fmt.Sprintf("Failed to stat '%s': %v", DefaultInitName, err))
return 1
}
// Write out the example
err = ioutil.WriteFile(DefaultInitName, []byte(defaultJob), 0660)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to write '%s': %v", DefaultInitName, err))
return 1
}
// Success
c.Ui.Output(fmt.Sprintf("Example job file written to %s", DefaultInitName))
return 0
}
const defaultJob = `
job "my-app" {
region = "global"
type = "service"
priority = 50
# There can only be a single job definition per file.
# Create a job with ID and Name 'example'
job "example" {
# Run the job in the global region, which is the default.
# region = "global"
// Each task in the group will be scheduled on the same machine(s).
group "app-group" {
// How many copies of this group should we run?
count = 5
# Specify the datacenters within the region this job can run in.
datacenters = ["dc1"]
task "python-webapp" {
driver = "docker"
config {
image = "org/container"
}
resources {
// For CPU 1024 = 1ghz
cpu = 500
// Memory in megabytes
memory = 128
# Service type jobs optimize for long-lived services. This is
# the default but we can change to batch for short-lived tasks.
# type = "service"
network {
dynamic_ports = [
"http",
"https",
]
}
}
}
# Priority controls our access to resources and scheduling priority.
# This can be 1 to 100, inclusively, and defaults to 50.
# priority = 50
task "logshipper" {
driver = "exec"
}
# Restrict our job to only linux. We can specify multiple
# constraints as needed.
constraint {
attribute = "$attr.kernel.name"
value = "linux"
}
constraint {
attribute = "kernel.os"
value = "linux"
}
}
# Configure the job to do rolling updates
update {
# Stagger updates every 10 seconds
stagger = "10s"
# Update a single task at a time
max_parallel = 1
}
# Create a 'cache' group. Each task in the group will be
# scheduled onto the same machine.
group "cache" {
# Control the number of instances of this groups.
# Defaults to 1
# count = 1
# Define a task to run
task "redis" {
# Use Docker to run the task.
driver = "docker"
# Configure Docker driver with the image
config {
image = "redis:latest"
}
# We must specify the resources required for
# this task to ensure it runs on a machine with
# enough capacity.
resources {
cpu = 500 # 500 Mhz
memory = 256 # 256MB
network {
mbits = 10
dynamic_ports = ["redis"]
}
}
}
}
}
`

View File

@@ -130,7 +130,6 @@ func (c *NodeStatusCommand) Run(args []string) int {
alloc.ID,
alloc.EvalID,
alloc.JobID,
alloc.NodeID,
alloc.TaskGroup,
alloc.DesiredStatus,
alloc.ClientStatus)

View File

@@ -19,8 +19,9 @@ func (c *RunCommand) Help() string {
helpText := `
Usage: nomad run [options] <file>
Starts running a new job using the definition located at <file>.
This is the main command used to invoke new work in Nomad.
Starts running a new job or updates an existing job using
the specification located at <file>. This is the main command
used to interact with Nomad.
Upon successful job submission, this command will immediately
enter an interactive monitor. This is useful to watch Nomad's
@@ -50,7 +51,7 @@ Run Options:
}
func (c *RunCommand) Synopsis() string {
return "Run a new job"
return "Run a new job or update an existing job"
}
func (c *RunCommand) Run(args []string) int {

24
demo/vagrant/README.md Normal file
View File

@@ -0,0 +1,24 @@
# Vagrant Nomad Demo
This Vagrantfile and associated Nomad configuration files are meant
to be used along with the
[getting started guide](https://nomadproject.io/intro/getting-started/install.html).
Follow along with the guide, or just start the Vagrant box with:
$ vagrant up
Once it is finished, you should be able to SSH in and interact with Nomad:
$ vagrant ssh
...
$ nomad
usage: nomad [--version] [--help] <command> [<args>]
Available commands are:
agent Runs a Nomad agent
agent-info Display status information about the local agent
...
To learn more about starting Nomad see the [official site](https://nomadproject.io).

43
demo/vagrant/Vagrantfile vendored Normal file
View File

@@ -0,0 +1,43 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
$script = <<SCRIPT
# Update apt and get dependencies
sudo apt-get update
sudo apt-get install -y unzip curl wget
# Install Docker
sudo curl -sSL https://get.docker.com/ | sh
# Download Nomad
echo Fetching Nomad...
cd /tmp/
wget https://s3.amazonaws.com/hc-public/nomad/0.1.0dev/nomad_linux_amd64 -O nomad
echo Installing Nomad...
#unzip nomad.zip
sudo chmod +x nomad
sudo mv nomad /usr/bin/nomad
sudo mkdir /etc/nomad.d
sudo chmod a+w /etc/nomad.d
SCRIPT
Vagrant.configure(2) do |config|
config.vm.box = "puphpet/ubuntu1404-x64"
config.vm.hostname = "nomad"
config.vm.provision "shell", inline: $script, privileged: false
# Increase memory for Virtualbox
config.vm.provider "virtualbox" do |vb|
vb.memory = "1024"
end
# Increase memory for VMware
["vmware_fusion", "vmware_workstation"].each do |p|
config.vm.provider p do |v|
v.vmx["memsize"] = "1024"
end
end
end

20
demo/vagrant/client1.hcl Normal file
View File

@@ -0,0 +1,20 @@
# Increase log verbosity
log_level = "DEBUG"
# Setup data dir
data_dir = "/tmp/client1"
# Enable the client
client {
enabled = true
# For demo assume we are talking to server1. For production,
# this should be like "nomad.service.consul:4647" and a system
# like Consul used for service discovery.
servers = ["127.0.0.1:4647"]
}
# Modify our port to avoid a collision with server1
ports {
http = 5656
}

25
demo/vagrant/client2.hcl Normal file
View File

@@ -0,0 +1,25 @@
# Increase log verbosity
log_level = "DEBUG"
# Setup data dir
data_dir = "/tmp/client2"
# Enable the client
client {
enabled = true
# For demo assume we are talking to server1. For production,
# this should be like "nomad.service.consul:4647" and a system
# like Consul used for service discovery.
servers = ["127.0.0.1:4647"]
# Set ourselves as thing one
meta {
thing = "two"
}
}
# Modify our port to avoid a collision with server1 and client1
ports {
http = 5657
}

13
demo/vagrant/server.hcl Normal file
View File

@@ -0,0 +1,13 @@
# Increase log verbosity
log_level = "DEBUG"
# Setup data dir
data_dir = "/tmp/server1"
# Enable the server
server {
enabled = true
# Self-elect, should be 3 or 5 for production
bootstrap_expect = 1
}

View File

@@ -14,7 +14,7 @@ func Node() *structs.Node {
"driver.exec": "1",
},
Resources: &structs.Resources{
CPU: 4.0,
CPU: 4000,
MemoryMB: 8192,
DiskMB: 100 * 1024,
IOPS: 150,
@@ -27,7 +27,7 @@ func Node() *structs.Node {
},
},
Reserved: &structs.Resources{
CPU: 0.1,
CPU: 100,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
@@ -81,7 +81,7 @@ func Job() *structs.Job {
"args": "+%s",
},
Resources: &structs.Resources{
CPU: 0.5,
CPU: 500,
MemoryMB: 256,
Networks: []*structs.NetworkResource{
&structs.NetworkResource{
@@ -127,7 +127,7 @@ func Alloc() *structs.Allocation {
NodeID: "foo",
TaskGroup: "web",
Resources: &structs.Resources{
CPU: 0.5,
CPU: 500,
MemoryMB: 256,
Networks: []*structs.NetworkResource{
&structs.NetworkResource{
@@ -141,7 +141,7 @@ func Alloc() *structs.Allocation {
},
TaskResources: map[string]*structs.Resources{
"web": &structs.Resources{
CPU: 0.5,
CPU: 500,
MemoryMB: 256,
Networks: []*structs.NetworkResource{
&structs.NetworkResource{

View File

@@ -91,9 +91,9 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex) (bool, st
// This is equivalent to their BestFit v3
func ScoreFit(node *Node, util *Resources) float64 {
// Determine the node availability
nodeCpu := node.Resources.CPU
nodeCpu := float64(node.Resources.CPU)
if node.Reserved != nil {
nodeCpu -= node.Reserved.CPU
nodeCpu -= float64(node.Reserved.CPU)
}
nodeMem := float64(node.Resources.MemoryMB)
if node.Reserved != nil {
@@ -101,7 +101,7 @@ func ScoreFit(node *Node, util *Resources) float64 {
}
// Compute the free percentage
freePctCpu := 1 - (util.CPU / nodeCpu)
freePctCpu := 1 - (float64(util.CPU) / nodeCpu)
freePctRam := 1 - (float64(util.MemoryMB) / nodeMem)
// Total will be "maximized" the smaller the value is.

View File

@@ -89,7 +89,7 @@ func TestAllocsFit_PortsOvercommitted(t *testing.T) {
func TestAllocsFit(t *testing.T) {
n := &Node{
Resources: &Resources{
CPU: 2.0,
CPU: 2000,
MemoryMB: 2048,
DiskMB: 10000,
IOPS: 100,
@@ -102,7 +102,7 @@ func TestAllocsFit(t *testing.T) {
},
},
Reserved: &Resources{
CPU: 1.0,
CPU: 1000,
MemoryMB: 1024,
DiskMB: 5000,
IOPS: 50,
@@ -119,7 +119,7 @@ func TestAllocsFit(t *testing.T) {
a1 := &Allocation{
Resources: &Resources{
CPU: 1.0,
CPU: 1000,
MemoryMB: 1024,
DiskMB: 5000,
IOPS: 50,
@@ -144,7 +144,7 @@ func TestAllocsFit(t *testing.T) {
}
// Sanity check the used resources
if used.CPU != 2.0 {
if used.CPU != 2000 {
t.Fatalf("bad: %#v", used)
}
if used.MemoryMB != 2048 {
@@ -161,7 +161,7 @@ func TestAllocsFit(t *testing.T) {
}
// Sanity check the used resources
if used.CPU != 3.0 {
if used.CPU != 3000 {
t.Fatalf("bad: %#v", used)
}
if used.MemoryMB != 3072 {

View File

@@ -136,6 +136,7 @@ func (idx *NetworkIndex) yieldIP(cb func(net *NetworkResource, ip net.IP) bool)
// AssignNetwork is used to assign network resources given an ask.
// If the ask cannot be satisfied, returns nil
func (idx *NetworkIndex) AssignNetwork(ask *NetworkResource) (out *NetworkResource, err error) {
err = fmt.Errorf("no networks available")
idx.yieldIP(func(n *NetworkResource, ip net.IP) (stop bool) {
// Convert the IP to a string
ipStr := ip.String()

View File

@@ -534,7 +534,7 @@ type NodeListStub struct {
// Resources is used to define the resources available
// on a client
type Resources struct {
CPU float64
CPU int
MemoryMB int `mapstructure:"memory"`
DiskMB int `mapstructure:"disk"`
IOPS int

View File

@@ -146,13 +146,13 @@ func TestResource_NetIndex(t *testing.T) {
func TestResource_Superset(t *testing.T) {
r1 := &Resources{
CPU: 2.0,
CPU: 2000,
MemoryMB: 2048,
DiskMB: 10000,
IOPS: 100,
}
r2 := &Resources{
CPU: 1.0,
CPU: 2000,
MemoryMB: 1024,
DiskMB: 5000,
IOPS: 50,
@@ -174,7 +174,7 @@ func TestResource_Superset(t *testing.T) {
func TestResource_Add(t *testing.T) {
r1 := &Resources{
CPU: 2.0,
CPU: 2000,
MemoryMB: 2048,
DiskMB: 10000,
IOPS: 100,
@@ -187,7 +187,7 @@ func TestResource_Add(t *testing.T) {
},
}
r2 := &Resources{
CPU: 1.0,
CPU: 2000,
MemoryMB: 1024,
DiskMB: 5000,
IOPS: 50,
@@ -206,7 +206,7 @@ func TestResource_Add(t *testing.T) {
}
expect := &Resources{
CPU: 3.0,
CPU: 3000,
MemoryMB: 3072,
DiskMB: 15000,
IOPS: 150,

View File

@@ -96,19 +96,19 @@ func (s *GenericScheduler) setStatus(status, desc string) error {
// Process is used to handle a single evaluation
func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
// Store the evaluation
s.eval = eval
// Verify the evaluation trigger reason is understood
switch eval.TriggeredBy {
case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,
structs.EvalTriggerJobDeregister:
structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate:
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
return s.setStatus(structs.EvalStatusFailed, desc)
}
// Store the evaluation
s.eval = eval
// Retry up to the maxScheduleAttempts
limit := maxServiceScheduleAttempts
if s.batch {

View File

@@ -289,8 +289,8 @@ func (iter *JobAntiAffinityIterator) Next() *RankedNode {
// Apply a penalty if there are collisions
if collisions > 0 {
scorePenalty := float64(collisions) * iter.penalty
option.Score -= scorePenalty
scorePenalty := -1 * float64(collisions) * iter.penalty
option.Score += scorePenalty
iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty)
}
return option

View File

@@ -88,7 +88,7 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")
// Apply a limit function. This is to avoid scanning *every* possible node.
s.limit = NewLimitIterator(ctx, s.binPack, 2)
s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)
// Select the node with the maximum score for placement
s.maxScore = NewMaxScoreIterator(ctx, s.limit)

View File

@@ -67,7 +67,7 @@ func TestServiceStack_Select_Size(t *testing.T) {
t.Fatalf("missing size")
}
if size.CPU != 0.5 || size.MemoryMB != 256 {
if size.CPU != 500 || size.MemoryMB != 256 {
t.Fatalf("bad: %#v", size)
}

3
website/Vagrantfile vendored
View File

@@ -33,8 +33,9 @@ sudo apt-get install -y nodejs
SCRIPT
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.box = "chef/ubuntu-12.04"
config.vm.box = "bento/ubuntu-12.04"
config.vm.network "private_network", ip: "33.33.30.10"
config.vm.network "forwarded_port", guest: 4567, host: 8080
config.vm.provision "shell", inline: $script, privileged: false
config.vm.synced_folder ".", "/vagrant", type: "rsync"
end

View File

@@ -152,17 +152,11 @@ configured on client nodes.
* `enabled`: A boolean indicating if server mode should be enabled for the
local agent. All other server options depend on this value being set.
Defaults to `false`.
* `bootstrap`: A boolean indicating if the server should be started in
bootstrap mode. Bootstrap mode is a special case mode used for easily
starting a single-server Nomad server cluster. This mode of operation does
not provide any fault tolerance and is not recommended for production
environments. Defaults to `false`.
* `bootstrap_expect`: This is an integer representing the number of server
nodes to wait for before bootstrapping. This is a safer alternative to
bootstrap mode, as there will never be a single point-of-failure. It is most
common to use the odd-numbered integers `3` or `5` for this value, depending
on the cluster size. A value of `1` is functionally equivalent to bootstrap
mode and is not recommended.
nodes to wait for before bootstrapping. It is most common to use the
odd-numbered integers `3` or `5` for this value, depending on the cluster
size. A value of `1` does not provide any fault tolerance and is not
recommended for production use cases.
* `data_dir`: This is the data directory used for server-specific data,
including the replicated log. By default, this directory lives inside of the
[data_dir](#data_dir) in the "server" sub-path.
@@ -236,6 +230,8 @@ A subset of the available Nomad agent configuration can optionally be passed in
via CLI arguments. The `agent` command accepts the following arguments:
* `-bind=<address>`: Equivalent to the [bind_addr](#bind_addr) config option.
* `-bootstrap-expect=<num>`: Equivalent to the
[bootstrap_expect](#bootstrap_expect) config option.
* `-config=<path>`: Specifies the path to a configuration file or a directory of
configuration files to load. Can be specified multiple times.
* `-data-dir=<path>`: Equivalent to the [data_dir](#data_dir) config option.

View File

@@ -3,20 +3,23 @@ layout: "docs"
page_title: "Commands: init"
sidebar_current: "docs-commands-init"
description: >
Toggle drain mode for a given node.
Generate a skeleton jobspec template.
---
# Command: init
The `init` command creates a [jobspec](/docs/jobspec/) template in the current
The `init` command creates an example [job specification](/docs/jobspec/) in the current
directory that demonstrates some common configurations for tasks, tasks groups,
runtime constraints, and resource allocation.
Please refer to the [jobspec](/docs/jobspec/) and [drivers](/docs/drivers/)
pages to learn how to customize the template.
## Usage
## Examples
Generate an example job file:
```
nomad init
$ nomad init
Example job file written to example.nomad
```

View File

@@ -8,10 +8,9 @@ description: >
# Command: run
The `run` command is used to run new jobs in Nomad. Jobs are specified using
[HCL](https://github.com/hashicorp/hcl)-encoded files, and may specify one or
more task groups. More information about jobs and their configuration format
can be found in the [jobs documentation](#).
The `run` command is used to submit new jobs to Nomad or to update existing
jobs. Job files must conform to the [job specification](/docs/jobspec/index.html)
format.
## Usage
@@ -20,8 +19,8 @@ nomad run [options] <file>
```
The run command requires a single argument, specifying the path to a file
containing a valid [job definition](#). This file will be read and the job
will be submitted to the Nomad server for scheduling.
containing a valid [job specification](/docs/jobspec/index.html). This file
will be read and the job will be submitted to Nomad for scheduling.
By default, on sucessful job submission, the run command will enter an
interactive monitor and display log information detailing the scheduling

View File

@@ -25,15 +25,15 @@ The `exec` driver supports the following configuration in the job spec:
## Client Requirements
The `exec` driver has no special requirements and can run on all
supported operating systems. The resource isolation primitives vary
by OS.
The `exec` driver can run on all supported operating systems but to provide
proper isolation the client must be run as root on non-Windows operating systems.
Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
## Client Attributes
The `exec` driver will set the following client attributes:
* `driver.exec` - This will always be set to "1", indicating the
* `driver.exec` - This will be set to "1", indicating the
driver is available.
## Resource Isolation
@@ -41,10 +41,8 @@ The `exec` driver will set the following client attributes:
The resource isolation provided varies by the operating system of
the client and the configuration.
On Linux, Nomad will attempt to use cgroups, namespaces, and chroot
to isolate the resources of a process. If the Nomad agent is not
running as root many of these mechanisms cannot be used.
As a baseline, the task driver will just execute the command
with no additional resource isolation if none are available.
On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the
resources of a process and as such the Nomad agent must be run as root.
On Windows, the task driver will just execute the command with no additional
resource isolation.

View File

@@ -1,21 +0,0 @@
---
layout: "intro"
page_title: "Using the HTTP APIs with Authentication"
sidebar_current: "getting-started-apis"
description: |-
Using the HTTP APIs for authentication and secret access.
---
# Using the HTTP APIs with Authentication
Many of Nomad's capabilities are accessible via the HTTP API in addition to the
CLI.
TODO: Document Nomad's HTTP API
Congratulations! You now know all the basics to get started with Nomad.
## Next
Next, we have a page dedicated to
[next steps](/intro/getting-started/next-steps.html) depending on
what you would like to achieve.

View File

@@ -0,0 +1,203 @@
---
layout: "intro"
page_title: "Clustering"
sidebar_current: "getting-started-cluster"
description: |-
Join another Nomad client to create your first cluster.
---
# Clustering
We have started our first agent and run a job against it in development mode.
This demonstrates the ease of use and the workflow of Nomad, but did not show how
this could be extended to a scalable, production-grade configuration. In this step,
we will create our first real cluster with multiple nodes.
## Starting the Server
The first step is to create the config file for the server. Either download
the file from the [repository here](#), or paste this into a file called
`server.hcl`:
```
# Increase log verbosity
log_level = "DEBUG"
# Setup data dir
data_dir = "/tmp/server1"
# Enable the server
server {
enabled = true
# Self-elect, should be 3 or 5 for production
bootstrap_expect = 1
}
```
This is a fairly minimal server configuration file, but it
is enough to start an agent in server only mode and have it
elect as a leader. The major change that should be made for
production is to run more than one server, and to change the
corresponding `bootstrap_expect` value.
Once the file is created, start the agent in a new tab:
```
$ sudo nomad agent -config server.hcl
==> WARNING: Bootstrap mode enabled! Potentially unsafe operation.
==> Starting Nomad agent...
==> Nomad agent configuration:
Atlas: <disabled>
Client: false
Log Level: DEBUG
Region: global (DC: dc1)
Server: true
==> Nomad agent started! Log data will stream in below:
[INFO] serf: EventMemberJoin: nomad.global 127.0.0.1
[INFO] nomad: starting 4 scheduling worker(s) for [service batch _core]
[INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state
[WARN] serf: Failed to re-join any previously known node
[INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1)
[WARN] raft: Heartbeat timeout reached, starting election
[INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state
[DEBUG] raft: Votes needed: 1
[DEBUG] raft: Vote granted. Tally: 1
[INFO] raft: Election won. Tally: 1
[INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state
[INFO] nomad: cluster leadership acquired
```
We can see above that client mode is disabled, and that we are
only running as the server. This means that this server will manage
state and make scheduling decisions but will not run any tasks.
Now we need some agents to run tasks!
## Starting the Clients
Similar to the server, we must first configure the clients. Either download
the configuration for client1 and client2 from the [repository here](#), or
paste the following into `client1.hcl`:
```
# Increase log verbosity
log_level = "DEBUG"
# Setup data dir
data_dir = "/tmp/client1"
# Enable the client
client {
enabled = true
# For demo assume we are talking to server1. For production,
# this should be like "nomad.service.consul:4647" and a system
# like Consul used for service discovery.
servers = ["127.0.0.1:4647"]
}
# Modify our port to avoid a collision with server1
ports {
http = 5656
}
```
Copy that file to `client2.hcl` and change the `data_dir` to
be "/tmp/client2" and the `http` port to 5657. Once you've created
both `client1.hcl` and `client2.hcl`, open a tab for each and
start the agents:
```
$ sudo nomad agent -config client1.hcl
==> Starting Nomad agent...
==> Nomad agent configuration:
Atlas: <disabled>
Client: true
Log Level: DEBUG
Region: global (DC: dc1)
Server: false
==> Nomad agent started! Log data will stream in below:
[DEBUG] client: applied fingerprints [host memory storage arch cpu]
[DEBUG] client: available drivers [docker exec]
[DEBUG] client: node registration complete
...
```
In the output we can see the agent is running in client mode only.
This agent will be available to run tasks but will not participate
in managing the cluster or making scheduling decisions.
Using the [`node-status` command](/docs/commands/node-status.html)
we should see both nodes in the `ready` state:
```
$ nomad node-status
ID DC Name Class Drain Status
e5239796-7285-3ed2-efe1-37cdc2d459d4 dc1 nomad <none> false ready
d12e4ab0-4206-bd33-ff75-e1367590eceb dc1 nomad <none> false ready
```
We now have a simple three node cluster running. The only difference
between a demo and full production cluster is that we are running a
single server instead of three or five.
## Submit a Job
Now that we have a simple cluster, we can use it to schedule a job.
We should still have the `example.nomad` job file from before, but
verify that the `count` is still set to 3.
Then, use the [`run` command](/docs/commands/run.html) to submit the job:
```
$ nomad run example.nomad
==> Monitoring evaluation "2d742049-497f-c602-c56d-ae2a328a5671"
Evaluation triggered by job "example"
Allocation "44d46439-655d-701e-55ce-552ee74fbbd8" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache"
Allocation "624be24f-5992-0c75-742d-7f8dbd3044a2" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache"
Allocation "a133a2c7-cc3c-2f8c-8664-71d2389c7759" created: node "d12e4ab0-4206-bd33-ff75-e1367590eceb", group "cache"
Evaluation status changed: "pending" -> "complete"
==> Evaluation "2d742049-497f-c602-c56d-ae2a328a5671" finished with status "complete"
```
We can see in the output that the scheduler assigned two of the
tasks for one of the client nodes and the remaining task to the
second client.
We can again use the [`status` command](/docs/commands/status.html) to verify:
```
$ nomad status example
ID = example
Name = example
Type = service
Priority = 50
Datacenters = dc1
Status =
==> Evaluations
ID Priority TriggeredBy Status
2d742049-497f-c602-c56d-ae2a328a5671 50 job-register complete
==> Allocations
ID EvalID NodeID TaskGroup Desired Status
44d46439-655d-701e-55ce-552ee74fbbd8 2d742049-497f-c602-c56d-ae2a328a5671 e5239796-7285-3ed2-efe1-37cdc2d459d4 cache run running
a133a2c7-cc3c-2f8c-8664-71d2389c7759 2d742049-497f-c602-c56d-ae2a328a5671 d12e4ab0-4206-bd33-ff75-e1367590eceb cache run running
624be24f-5992-0c75-742d-7f8dbd3044a2 2d742049-497f-c602-c56d-ae2a328a5671 e5239796-7285-3ed2-efe1-37cdc2d459d4 cache run running
```
We can see that all our tasks have been allocated and are running.
Once we are satisfied that our job is happily running, we can tear
it down with `nomad stop`.
## Next Steps
We've now concluded the getting started guide, however there are a number
of [next steps](next-steps.html) to get started with Nomad.

View File

@@ -8,52 +8,63 @@ description: |-
# Install Nomad
Nomad must first be installed on your machine. Nomad is distributed as
a [binary package](/downloads.html) for all supported platforms and
architectures. This page will not cover how to compile Nomad from source,
but compiling from source is covered in the [documentation](/docs/install/index.html)
for those who want to be sure they're compiling source they trust into
the final binary.
The task drivers that are available to Nomad vary by operating system,
for example Docker is only available on Linux machines. To simplify the
getting started experience, we will be working in a Vagrant environment.
Create a new directory, and download [this `Vagrantfile`](#).
## Installing Nomad
## Vagrant Setup
To install Nomad, find the [appropriate package](/downloads.html) for
your system and download it. Nomad is packaged as a zip archive.
Once you have created a new directory and downloaded the `Vagrantfile`
you must create the virtual the machine:
After downloading Nomad, unzip the package. Nomad runs as a single binary
named `nomad`. Any other files in the package can be safely removed and
Nomad will still function.
$ vagrant up
The final step is to make sure that `nomad` is available on the PATH.
See [this page](https://stackoverflow.com/questions/14637979/how-to-permanently-set-path-on-linux)
for instructions on setting the PATH on Linux and Mac.
[This page](https://stackoverflow.com/questions/1618280/where-can-i-set-path-to-make-exe-on-windows)
contains instructions for setting the PATH on Windows.
This will take a few minutes as the base Ubuntu box must be downloaded
and provisioned with both Docker and Nomad. Once this completes, you should
see output similar to:
Bringing machine 'default' up with 'vmware_fusion' provider...
==> default: Checking if box 'puphpet/ubuntu1404-x64' is up to date...
==> default: Machine is already running.
At this point the Vagrant box is running and ready to go.
## Verifying the Installation
After installing Nomad, verify the installation worked by opening a new
terminal session and checking that `nomad` is available. By executing
After starting the Vagrant box, verify the installation worked by connecting
to the box using SSH and checking that `nomad` is available. By executing
`nomad`, you should see help output similar to the following:
```
$ nomad
$ vagrant ssh
...
vagrant@nomad:~$ nomad
usage: nomad [--version] [--help] <command> [<args>]
Available commands are:
agent Runs a Nomad agent
agent-force-leave Force a member into the 'left' state
agent-info Display status information about the local agent
agent-join Join server nodes together
agent-members Display a list of known members and their status
node-drain Toggle drain mode on a given node
node-status Display status information about nodes
status Display status information about jobs
version Prints the Nomad version
agent Runs a Nomad agent
agent-info Display status information about the local agent
eval-monitor Monitor an evaluation interactively
node-drain Toggle drain mode on a given node
node-status Display status information about nodes
run Run a new job
server-force-leave Force a server into the 'left' state
server-join Join server nodes together
server-members Display a list of known servers and their status
status Display status information about jobs
stop Stop a running job
version Prints the Nomad version
```
If you get an error that Nomad could not be found, then your PATH environment
variable was not setup properly. Please go back and ensure that your PATH
variable contains the directory where Nomad was installed.
If you get an error that Nomad could not be found, then your Vagrant box
may not have provisioned correctly. Check any error messages that may have
been occurred during `vagrant up`. You can always destroy the box and
re-create it.
## Next Steps
Vagrant is running and Nomad is installed. Let's [start Nomad](/intro/getting-started/running.html)!
Otherwise, Nomad is installed and ready to go!

View File

@@ -0,0 +1,177 @@
---
layout: "intro"
page_title: "Jobs"
sidebar_current: "getting-started-jobs"
description: |-
Learn how to submit, modify and stop jobs in Nomad.
---
# Jobs
Jobs are the primary configuration that users interact with when using
Nomad. A job is a declarative specification of tasks that Nomad should run.
Jobs have a globally unique name, one or many task groups, which are themselves
collections of one or many tasks.
The format of the jobs is [documented here](/docs/jobspec/index.html). They
can either be specified in [HCL](https://github.com/hashicorp/hcl) or JSON,
however we recommend only using JSON when the configuration is generated by a machine.
## Running a Job
To get started, we will use the [`init` command](/docs/commands/init.html) which
generates an skeleton job file:
```
$ nomad init
Example job file written to example.nomad
$ cat example.nomad
# There can only be a single job definition per file.
# Create a job with ID and Name 'example'
job "example" {
# Run the job in the global region, which is the default.
# region = "global"
...
```
In this example job file, we have declared a single task 'redis' which is using
the Docker driver to run the task. The primary way you interact with Nomad
is with the [`run` command](/docs/commands/run.html). The `run` command takes
a job file and registers it with Nomad. This is used both to register new
jobs and to update existing jobs.
We can register our example job now:
```
$ nomad run example.nomad
==> Monitoring evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6"
Evaluation triggered by job "example"
Allocation "c1d2f085-7049-6c4a-4479-1b2310fdaba9" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
Evaluation status changed: "pending" -> "complete"
==> Evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6" finished with status "complete"
```
Anytime a job is updated, Nomad creates an evaluation to determine what
actions need to take place. In this case, because this is a new job, Nomad has
determined that an allocation should be created and has scheduled it on our
local agent.
To inspect the status of our job we use the [`status` command](/docs/commands/status.html):
```
$ nomad status example
ID = example
Name = example
Type = service
Priority = 50
Datacenters = dc1
Status =
==> Evaluations
ID Priority TriggeredBy Status
f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 50 job-register complete
==> Allocations
ID EvalID NodeID TaskGroup Desired Status
c1d2f085-7049-6c4a-4479-1b2310fdaba9 f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 1f43787c-7ab4-8d10-d2d6-1593ed06463a cache run running
```
Here we can see that our evaluation that was created has completed, and that
it resulted in the creation of an allocation that is now running on the local node.
## Modifying a Job
The definition of a job is not static, and is meant to be updated overtime.
You may update a job to change the docker container to update the application version,
or change the count of a task group to scale with load.
For now, edit the `example.nomad` file to uncomment the count and set it to 3:
```
# Control the number of instances of this groups.
# Defaults to 1
count = 3
```
Once you have finished modifying the job specification, use `nomad run` to
push the updated version of the job:
```
$ nomad run example.nomad
==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee"
Evaluation triggered by job "example"
Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
Evaluation status changed: "pending" -> "complete"
==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete"
```
Because we set the count of the task group to three, Nomad created two
additional allocations to get to the desired state. It is idempotent to
run the same job specification again and no new allocations will be created.
Now, lets try to do an application update. In this case, we will simply change
the version of redis we want to run. Edit the `example.nomad` file and change
the Docker image from "redis:latest" to "redis:2.8":
```
# Configure Docker driver with the image
config {
image = "redis:2.8"
}
```
This time we have not change the number of task groups we want running,
but we've changed the task itself. This requires stopping the old tasks
and starting new tasks. Our example job is configured to do a rolling update,
doing a single update every 10 seconds. Use `run` to push the updated
specification now:
```
$ nomad run example.nomad
==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee"
Evaluation triggered by job "example"
Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
Evaluation status changed: "pending" -> "complete"
==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete"
```
We can see that Nomad handled the updated in three phases, each
time only updating a single task group at a time. The update strategy
can be configured, but rolling updates makes it easy to upgrade
an application at large scale.
## Stopping a Job
So far we've created, run and modified a job. The final step in a job lifecycle
is stopping the job. This is done with the [`stop` command](/docs/commands/stop.html):
```
$ nomad stop example
==> Monitoring evaluation "4b236340-d5ed-1838-be15-a896095d3ac9"
Evaluation triggered by job "example"
Evaluation status changed: "pending" -> "complete"
==> Evaluation "4b236340-d5ed-1838-be15-a896095d3ac9" finished with status "complete"
```
When we stop a job, it creates an evaluation which is used to stop all
the existing allocations. This also deletes the job definition out of Nomad.
If we try to query the job status, we can see it is no longer registered:
```
$ nomad status example
Error querying job: Unexpected response code: 404 (job not found)
```
If we wanted to start the job again, we could simply `run` it again.
## Next Steps
Users of Nomad primarily interact with jobs, and we've now seen
how to create and scale our job, perform an application update,
and do a job tear down. Next we will add another Nomad
client to [create our first cluster](cluster.html)

View File

@@ -0,0 +1,144 @@
---
layout: "intro"
page_title: "Running Nomad"
sidebar_current: "getting-started-running"
description: |-
Learn about the Nomad agent, and the lifecycle of running and stopping.
---
# Running Nomad
Nomad relies on a long running agent on every machine in the cluster.
The agent can run either in server or client mode. Each region must
have at least one server, though a cluster of 3 or 5 servers is recommended.
A single server deployment is _**highly**_ discouraged as data loss is inevitable
in a failure scenario.
All other agents run in client mode. A client is a very lightweight
process that registers the host machine, performs heartbeating, and runs any tasks
that are assigned to it by the servers. The agent must be run on every node that
is part of the cluster so that the servers can assign work to those machines.
## Starting the Agent
For simplicity, we will run a single Nomad agent in development mode. This mode
is used to quickly start an agent that is acting as a client and server to test
job configurations or prototype interactions. It should _**not**_ be used in
production as it does not persist state.
```
$ sudo nomad agent -dev
==> Starting Nomad agent...
==> Nomad agent configuration:
Atlas: <disabled>
Client: true
Log Level: debug
Region: global (DC: dc1)
Server: true
==> Nomad agent started! Log data will stream in below:
[INFO] serf: EventMemberJoin: nomad.global 127.0.0.1
[INFO] nomad: starting 4 scheduling worker(s) for [service batch _core]
[INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state
[INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1)
[DEBUG] client: applied fingerprints [storage arch cpu host memory]
[DEBUG] client: available drivers [exec docker]
[WARN] raft: Heartbeat timeout reached, starting election
[INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state
[DEBUG] raft: Votes needed: 1
[DEBUG] raft: Vote granted. Tally: 1
[INFO] raft: Election won. Tally: 1
[INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state
[INFO] raft: Disabling EnableSingleNode (bootstrap)
[DEBUG] raft: Node 127.0.0.1:4647 updated peer set (2): [127.0.0.1:4647]
[INFO] nomad: cluster leadership acquired
[DEBUG] client: node registration complete
[DEBUG] client: updated allocations at index 1 (0 allocs)
[DEBUG] client: allocs: (added 0) (removed 0) (updated 0) (ignore 0)
[DEBUG] client: state updated to ready
```
As you can see, the Nomad agent has started and has output some log
data. From the log data, you can see that our agent is running in both
client and server mode, and has claimed leadership of the cluster.
Additionally, the local client has been registered and marked as ready.
-> **Note:** Typically any agent running in client mode must be run with root level
privilege. Nomad makes use of operating system primitives for resource isolation
which require elevated permissions. The agent will function as non-root, but
certain task drivers will not be available.
## Cluster Nodes
If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you
can see the registered nodes of the Nomad cluster:
```text
$ vagrant ssh
...
$ nomad node-status
ID DC Name Class Drain Status
72d3af97-144f-1e5f-94e5-df1516fe4add dc1 nomad <none> false ready
```
The output shows our Node ID, which is randomly generated UUID,
it's datacenter, node name, node class, drain mode and current status.
We can see that our node is in the ready state, and task draining is
currently off.
The agent is also running in server mode, which means it is part of
the [gossip protocol](/docs/internals/gossip.html) used to connect all
the server instances together. We can view the members of the gossip
ring using the [`server-members`](/docs/commands/server-members.html) command:
```text
$ nomad server-members
Name Addr Port Status Proto Build DC Region
nomad.global 127.0.0.1 4648 alive 2 0.1.0dev dc1 global
```
The output shows our own agent, the address it is running on, its
health state, some version information, and the datacenter and region.
Additional metadata can be viewed by providing the `-detailed` flag.
## <a name="stopping"></a>Stopping the Agent
You can use `Ctrl-C` (the interrupt signal) to halt the agent.
By default, all signals will cause the agent to forcefully shutdown.
The agent [can be configured](/docs/agent/config.html) to gracefully
leave on either the interrupt or terminate signals.
After interrupting the agent, you should see it leave the cluster
and shut down:
```
^C==> Caught signal: interrupt
[DEBUG] http: Shutting down http server
[INFO] agent: requesting shutdown
[INFO] client: shutting down
[INFO] nomad: shutting down server
[WARN] serf: Shutdown without a Leave
[INFO] agent: shutdown complete
```
By gracefully leaving, Nomad clients update their status to prevent
futher tasks from being scheduled and to start migrating any tasks that are
already assigned. Nomad servers notifies other their peers they intend to leave.
When a server leaves, replication to that server stops. If a server fails,
replication continues to be attempted until the node recovers. Nomad will
automatically try to reconnect to _failed_ nodes, allowing it to recover from
certain network conditions, while _left_ nodes are no longer contacted.
If an agent is operating as a server, a graceful leave is important to avoid
causing a potential availability outage affecting the
[consensus protocol](/docs/internals/consensus.html). If a server does
forcefully exit and will not be returning into service, the
[`server-force-leave` command](/docs/commands/server-force-leave.html) should
be used to force the server from a _failed_ to a _left_ state.
## Next Steps
The development Nomad agent is up and running. Let's try to [run a job](jobs.html)!

View File

@@ -1,18 +0,0 @@
---
layout: "intro"
page_title: "Running Nomad"
sidebar_current: "getting-started-running"
description: |-
Learn how to deploy Nomad into production, how to initialize it, configure it, etc.
---
# Running Nomad
This section will detail how to run Nomad on client machines. It should include
a sample upstart script and stuff
## Next
TODO: Fill in text here.
Next, we have a [short tutorial](/intro/getting-started/apis.html) on using
Nomad's HTTP APIs.

View File

@@ -58,8 +58,12 @@
<a href="/intro/getting-started/running.html">Running Nomad</a>
</li>
<li<%= sidebar_current("getting-started-apis") %>>
<a href="/intro/getting-started/apis.html">HTTP API</a>
<li<%= sidebar_current("getting-started-jobs") %>>
<a href="/intro/getting-started/jobs.html">Jobs</a>
</li>
<li<%= sidebar_current("getting-started-cluster") %>>
<a href="/intro/getting-started/cluster.html">Clustering</a>
</li>
<li<%= sidebar_current("getting-started-nextsteps") %>>