From 2db143cba0410259f495ccfe6eeb4c0c35fd0b2c Mon Sep 17 00:00:00 2001 From: Ryan Uber Date: Tue, 22 Sep 2015 12:40:21 -0700 Subject: [PATCH 01/38] command/agent: add bootstrap flags --- command/agent/command.go | 60 +++++++++++++++++++----- website/source/docs/agent/config.html.md | 3 ++ 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 3ac8aa12b..5ff91619b 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -67,6 +67,9 @@ func (c *Command) readConfig() *Config { flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "") flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "") + // Server-only options + flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") + // General options flags.Var((*sliceflag.StringFlag)(&configPath), "config", "config") flags.StringVar(&cmdConfig.BindAddr, "bind", "", "") @@ -121,17 +124,41 @@ func (c *Command) readConfig() *Config { // Merge any CLI options over config file options config = config.Merge(cmdConfig) - // Check that we have a data-dir if we are a server - if !dev && config.DataDir == "" { - c.Ui.Error("Must specify data directory") - return nil - } - // Set the version info config.Revision = c.Revision config.Version = c.Version config.VersionPrerelease = c.VersionPrerelease + if dev { + // Skip validation for dev mode + return config + } + + // Check that we have a data-dir + if config.DataDir == "" { + c.Ui.Error("Must specify data directory") + return nil + } + + // Check the bootstrap flags + if config.Server.Bootstrap || config.Server.BootstrapExpect > 0 { + if !config.Server.Enabled { + c.Ui.Error("Bootstrap requires server mode to be enabled") + return nil + } + } + if config.Server.Bootstrap && config.Server.BootstrapExpect > 0 { + c.Ui.Error("Bootstrap mode and BootstrapExpect are mutually exclusive") + return nil + } + if config.Server.BootstrapExpect == 1 { + config.Server.Bootstrap = true + config.Server.BootstrapExpect = 0 + } + if config.Server.Bootstrap { + c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") + } + return config } @@ -549,25 +576,32 @@ General Options (clients and servers): Name of the region the Nomad agent will be a member of. By default this value is set to "global". -Role-Specific Options: - - -client - Enable client mode for the agent. Client mode enables a given node - to be evaluated for allocations. If client mode is not enabled, - no work will be scheduled to the agent. - -dev Start the agent in development mode. This enables a pre-configured dual-role agent (client + server) which is useful for developing or testing Nomad. No other configuration is required to start the agent in this mode. +Server Options: + -server Enable server mode for the agent. Agents in server mode are clustered together and handle the additional responsibility of leader election, data replication, and scheduling work onto eligible client nodes. + -bootstrap-expect= + Configures the expected number of servers nodes to wait for before + bootstrapping the cluster. Once servers have joined eachother, + Nomad initiates the bootstrap process. + +Client Options: + + -client + Enable client mode for the agent. Client mode enables a given node + to be evaluated for allocations. If client mode is not enabled, + no work will be scheduled to the agent. + Atlas Options: -atlas= diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md index 44164cd29..cd6e27be8 100644 --- a/website/source/docs/agent/config.html.md +++ b/website/source/docs/agent/config.html.md @@ -236,6 +236,9 @@ A subset of the available Nomad agent configuration can optionally be passed in via CLI arguments. The `agent` command accepts the following arguments: * `-bind=
`: Equivalent to the [bind_addr](#bind_addr) config option. +* `-bootstrap`: Equivalent to the [bootstrap](#bootstrap) config option. +* `-bootstrap-expect=`: Equivalent to the + [bootstrap_expect](#bootstrap_expect) config option. * `-config=`: Specifies the path to a configuration file or a directory of configuration files to load. Can be specified multiple times. * `-data-dir=`: Equivalent to the [data_dir](#data_dir) config option. From 8c76a1bfc01c1b5e72081e58c094ab6f0f00bffc Mon Sep 17 00:00:00 2001 From: Ryan Uber Date: Tue, 22 Sep 2015 14:04:40 -0700 Subject: [PATCH 02/38] agent: test agent command args --- command/agent/command_test.go | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 command/agent/command_test.go diff --git a/command/agent/command_test.go b/command/agent/command_test.go new file mode 100644 index 000000000..3dbb13504 --- /dev/null +++ b/command/agent/command_test.go @@ -0,0 +1,63 @@ +package agent + +import ( + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/mitchellh/cli" +) + +func TestCommand_Implements(t *testing.T) { + var _ cli.Command = &Command{} +} + +func TestCommand_Args(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "nomad") + if err != nil { + t.Fatalf("err: %s", err) + } + defer os.RemoveAll(tmpDir) + + type tcase struct { + args []string + errOut string + } + tcases := []tcase{ + { + []string{}, + "Must specify data directory", + }, + { + []string{"-data-dir=" + tmpDir, "-bootstrap-expect=1"}, + "Bootstrap requires server mode to be enabled", + }, + { + []string{"-data-dir=" + tmpDir, "-server", "-bootstrap-expect=1"}, + "WARNING: Bootstrap mode enabled!", + }, + } + for _, tc := range tcases { + // Make a new command. We pre-emptively close the shutdownCh + // so that the command exits immediately instead of blocking. + ui := new(cli.MockUi) + shutdownCh := make(chan struct{}) + close(shutdownCh) + cmd := &Command{ + Ui: ui, + ShutdownCh: shutdownCh, + } + + if code := cmd.Run(tc.args); code != 1 { + t.Fatalf("args: %v\nexit: %d\n", tc.args, code) + } + + if expect := tc.errOut; expect != "" { + out := ui.ErrorWriter.String() + if !strings.Contains(out, expect) { + t.Fatalf("expect to find %q\n\n%s", expect, out) + } + } + } +} From f2744604063868952fee9e41a7db3756b5662d21 Mon Sep 17 00:00:00 2001 From: Ryan Uber Date: Tue, 22 Sep 2015 14:08:56 -0700 Subject: [PATCH 03/38] website: remove bootstrap flag --- website/source/docs/agent/config.html.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md index cd6e27be8..bd34a1e50 100644 --- a/website/source/docs/agent/config.html.md +++ b/website/source/docs/agent/config.html.md @@ -152,17 +152,11 @@ configured on client nodes. * `enabled`: A boolean indicating if server mode should be enabled for the local agent. All other server options depend on this value being set. Defaults to `false`. - * `bootstrap`: A boolean indicating if the server should be started in - bootstrap mode. Bootstrap mode is a special case mode used for easily - starting a single-server Nomad server cluster. This mode of operation does - not provide any fault tolerance and is not recommended for production - environments. Defaults to `false`. * `bootstrap_expect`: This is an integer representing the number of server - nodes to wait for before bootstrapping. This is a safer alternative to - bootstrap mode, as there will never be a single point-of-failure. It is most - common to use the odd-numbered integers `3` or `5` for this value, depending - on the cluster size. A value of `1` is functionally equivalent to bootstrap - mode and is not recommended. + nodes to wait for before bootstrapping. It is most common to use the + odd-numbered integers `3` or `5` for this value, depending on the cluster + size. A value of `1` does not provide any fault tolerance and is not + recommended for production use cases. * `data_dir`: This is the data directory used for server-specific data, including the replicated log. By default, this directory lives inside of the [data_dir](#data_dir) in the "server" sub-path. @@ -236,7 +230,6 @@ A subset of the available Nomad agent configuration can optionally be passed in via CLI arguments. The `agent` command accepts the following arguments: * `-bind=
`: Equivalent to the [bind_addr](#bind_addr) config option. -* `-bootstrap`: Equivalent to the [bootstrap](#bootstrap) config option. * `-bootstrap-expect=`: Equivalent to the [bootstrap_expect](#bootstrap_expect) config option. * `-config=`: Specifies the path to a configuration file or a directory of From fbbe4a497329033f57b8f2bb2d105112c9be0355 Mon Sep 17 00:00:00 2001 From: Ryan Uber Date: Tue, 22 Sep 2015 14:25:43 -0700 Subject: [PATCH 04/38] agent: remove explicit Bootstrap option in favor of BootstrapExpect --- command/agent/agent.go | 9 +++++---- command/agent/agent_test.go | 25 +++++++++++++++++++++++++ command/agent/command.go | 14 ++------------ command/agent/config.go | 7 ------- command/agent/config_test.go | 4 ---- 5 files changed, 32 insertions(+), 27 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index d05e3271f..c05bc0ed9 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -77,11 +77,12 @@ func (a *Agent) serverConfig() (*nomad.Config, error) { if a.config.NodeName != "" { conf.NodeName = a.config.NodeName } - if a.config.Server.Bootstrap { - conf.Bootstrap = a.config.Server.Bootstrap - } if a.config.Server.BootstrapExpect > 0 { - conf.BootstrapExpect = a.config.Server.BootstrapExpect + if a.config.Server.BootstrapExpect == 1 { + conf.Bootstrap = true + } else { + conf.BootstrapExpect = a.config.Server.BootstrapExpect + } } if a.config.DataDir != "" { conf.DataDir = filepath.Join(a.config.DataDir, "server") diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index dfcf29204..ab614861d 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -162,4 +162,29 @@ func TestAgent_ServerConfig(t *testing.T) { if addr := out.SerfConfig.MemberlistConfig.BindAddr; addr != "127.0.0.3" { t.Fatalf("expect 127.0.0.3, got: %s", addr) } + + // Properly handles the bootstrap flags + conf.Server.BootstrapExpect = 1 + out, err = a.serverConfig() + if err != nil { + t.Fatalf("err: %s", err) + } + if !out.Bootstrap { + t.Fatalf("should have set bootstrap mode") + } + if out.BootstrapExpect != 0 { + t.Fatalf("boostrap expect should be 0") + } + + conf.Server.BootstrapExpect = 3 + out, err = a.serverConfig() + if err != nil { + t.Fatalf("err: %s", err) + } + if out.Bootstrap { + t.Fatalf("bootstrap mode should be disabled") + } + if out.BootstrapExpect != 3 { + t.Fatalf("should have bootstrap-expect = 3") + } } diff --git a/command/agent/command.go b/command/agent/command.go index 5ff91619b..1e2906049 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -141,21 +141,11 @@ func (c *Command) readConfig() *Config { } // Check the bootstrap flags - if config.Server.Bootstrap || config.Server.BootstrapExpect > 0 { - if !config.Server.Enabled { - c.Ui.Error("Bootstrap requires server mode to be enabled") - return nil - } - } - if config.Server.Bootstrap && config.Server.BootstrapExpect > 0 { - c.Ui.Error("Bootstrap mode and BootstrapExpect are mutually exclusive") + if config.Server.BootstrapExpect > 0 && !config.Server.Enabled { + c.Ui.Error("Bootstrap requires server mode to be enabled") return nil } if config.Server.BootstrapExpect == 1 { - config.Server.Bootstrap = true - config.Server.BootstrapExpect = 0 - } - if config.Server.Bootstrap { c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") } diff --git a/command/agent/config.go b/command/agent/config.go index 775d79fab..c39aa0dcb 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -146,10 +146,6 @@ type ServerConfig struct { // Enabled controls if we are a server Enabled bool `hcl:"enabled"` - // Bootstrap is used to bring up the first Consul server, and - // permits that node to elect itself leader - Bootstrap bool `hcl:"bootstrap"` - // BootstrapExpect tries to automatically bootstrap the Consul cluster, // by witholding peers until enough servers join. BootstrapExpect int `hcl:"bootstrap_expect"` @@ -350,9 +346,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.Enabled { result.Enabled = true } - if b.Bootstrap { - result.Bootstrap = true - } if b.BootstrapExpect > 0 { result.BootstrapExpect = b.BootstrapExpect } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 5269d6e23..3c6f6d38f 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -39,7 +39,6 @@ func TestConfig_Merge(t *testing.T) { }, Server: &ServerConfig{ Enabled: false, - Bootstrap: false, BootstrapExpect: 1, DataDir: "/tmp/data1", ProtocolVersion: 1, @@ -91,7 +90,6 @@ func TestConfig_Merge(t *testing.T) { }, Server: &ServerConfig{ Enabled: true, - Bootstrap: true, BootstrapExpect: 2, DataDir: "/tmp/data2", ProtocolVersion: 2, @@ -341,7 +339,6 @@ func TestConfig_LoadConfigString(t *testing.T) { }, Server: &ServerConfig{ Enabled: true, - Bootstrap: true, BootstrapExpect: 5, DataDir: "/tmp/data", ProtocolVersion: 3, @@ -409,7 +406,6 @@ client { } server { enabled = true - bootstrap = true bootstrap_expect = 5 data_dir = "/tmp/data" protocol_version = 3 From b43e2629238fbc34bde7de33a1f70fd1995e48bf Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 16:32:05 -0700 Subject: [PATCH 05/38] Disable exec/java/qemu when non-root on non-windows OSes --- client/driver/exec.go | 8 +++++++- client/driver/java.go | 7 +++++++ client/driver/qemu.go | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/client/driver/exec.go b/client/driver/exec.go index cda0a4989..cad95dd26 100644 --- a/client/driver/exec.go +++ b/client/driver/exec.go @@ -2,7 +2,9 @@ package driver import ( "fmt" + "runtime" "strings" + "syscall" "time" "github.com/hashicorp/nomad/client/config" @@ -30,7 +32,11 @@ func NewExecDriver(ctx *DriverContext) Driver { } func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { - // We can always do a fork/exec + // Only enable if we are root when running on non-windows systems. + if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + return false, nil + } + node.Attributes["driver.exec"] = "1" return true, nil } diff --git a/client/driver/java.go b/client/driver/java.go index de44dfb33..08d6c09f6 100644 --- a/client/driver/java.go +++ b/client/driver/java.go @@ -9,7 +9,9 @@ import ( "os/exec" "path" "path/filepath" + "runtime" "strings" + "syscall" "time" "github.com/hashicorp/nomad/client/config" @@ -36,6 +38,11 @@ func NewJavaDriver(ctx *DriverContext) Driver { } func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { + // Only enable if we are root when running on non-windows systems. + if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + return false, nil + } + // Find java version var out bytes.Buffer var erOut bytes.Buffer diff --git a/client/driver/qemu.go b/client/driver/qemu.go index 0a57bbda2..38760ab55 100644 --- a/client/driver/qemu.go +++ b/client/driver/qemu.go @@ -13,7 +13,9 @@ import ( "os/exec" "path/filepath" "regexp" + "runtime" "strings" + "syscall" "time" "github.com/hashicorp/nomad/client/config" @@ -52,6 +54,11 @@ func NewQemuDriver(ctx *DriverContext) Driver { } func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { + // Only enable if we are root when running on non-windows systems. + if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + return false, nil + } + outBytes, err := exec.Command("qemu-system-x86_64", "-version").Output() if err != nil { return false, nil From 06de892626f1f6ee580f561c44df906bcefdc9b3 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 16:44:11 -0700 Subject: [PATCH 06/38] Update exec_linux to assume it is running as root --- client/executor/exec_linux.go | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/client/executor/exec_linux.go b/client/executor/exec_linux.go index 21d115c3f..dd7c32253 100644 --- a/client/executor/exec_linux.go +++ b/client/executor/exec_linux.go @@ -10,7 +10,6 @@ import ( "os/user" "strconv" "strings" - "syscall" "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/command" @@ -31,16 +30,8 @@ func NewExecutor() Executor { // TODO: In a follow-up PR make it so this only happens once per client. // Fingerprinting shouldn't happen per task. - // Check if the process is has root capabilities. - e.root = syscall.Geteuid() == 0 - - // Check if this process can set uid. - if e.root { - e.setUidEnabled = true - } - - // Check that cgroups are available. Must be root to modify it. - if _, err := os.Stat(cgroupMount); err == nil && e.root { + // Check that cgroups are available. + if _, err := os.Stat(cgroupMount); err == nil { e.cgroupEnabled = true } @@ -53,8 +44,6 @@ type LinuxExecutor struct { user *user.User // Finger print capabilities. - root bool - setUidEnabled bool cgroupEnabled bool // Isolation configurations. @@ -152,11 +141,9 @@ func (e *LinuxExecutor) Start() error { // spawned process. Note that we will only do this if we can call SetUID. // Otherwise we'll just run the other process as our current (non-root) // user. This means we aren't forced to run nomad as root. - if e.setUidEnabled { - if err := e.runAs("nobody"); err == nil && e.user != nil { - e.cmd.SetUID(e.user.Uid) - e.cmd.SetGID(e.user.Gid) - } + if err := e.runAs("nobody"); err == nil && e.user != nil { + e.cmd.SetUID(e.user.Uid) + e.cmd.SetGID(e.user.Gid) } return e.spawnDaemon() From 891d8b8f47cdd3b8a3a2eaeb7773ea6e0d3b82c6 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 17:10:03 -0700 Subject: [PATCH 07/38] Guard tests --- client/alloc_runner_test.go | 5 +++++ client/client_test.go | 5 +++++ client/task_runner_test.go | 5 +++++ client/testutil/driver_compatible.go | 13 +++++++++++++ 4 files changed, 28 insertions(+) create mode 100644 client/testutil/driver_compatible.go diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 73b4da19f..2f3462958 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -7,6 +7,8 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + + clientTestUtil "github.com/hashicorp/nomad/client/testutil" ) type MockAllocStateUpdater struct { @@ -32,6 +34,7 @@ func testAllocRunner() (*MockAllocStateUpdater, *AllocRunner) { } func TestAllocRunner_SimpleRun(t *testing.T) { + clientTestUtil.ExecCompatible(t) upd, ar := testAllocRunner() go ar.Run() defer ar.Destroy() @@ -48,6 +51,7 @@ func TestAllocRunner_SimpleRun(t *testing.T) { } func TestAllocRunner_Destroy(t *testing.T) { + clientTestUtil.ExecCompatible(t) upd, ar := testAllocRunner() // Ensure task takes some time @@ -79,6 +83,7 @@ func TestAllocRunner_Destroy(t *testing.T) { } func TestAllocRunner_Update(t *testing.T) { + clientTestUtil.ExecCompatible(t) upd, ar := testAllocRunner() // Ensure task takes some time diff --git a/client/client_test.go b/client/client_test.go index cb14030a1..98a36c7f3 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -15,6 +15,8 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + + clientTestUtil "github.com/hashicorp/nomad/client/testutil" ) var nextPort uint32 = 16000 @@ -137,6 +139,7 @@ func TestClient_Fingerprint(t *testing.T) { } func TestClient_Drivers(t *testing.T) { + clientTestUtil.ExecCompatible(t) c := testClient(t, nil) defer c.Shutdown() @@ -246,6 +249,7 @@ func TestClient_UpdateAllocStatus(t *testing.T) { } func TestClient_WatchAllocs(t *testing.T) { + clientTestUtil.ExecCompatible(t) s1, _ := testServer(t, nil) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) @@ -314,6 +318,7 @@ func TestClient_WatchAllocs(t *testing.T) { } func TestClient_SaveRestoreState(t *testing.T) { + clientTestUtil.ExecCompatible(t) s1, _ := testServer(t, nil) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) diff --git a/client/task_runner_test.go b/client/task_runner_test.go index 32e8b4782..2756ae02d 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -11,6 +11,8 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + + clientTestUtil "github.com/hashicorp/nomad/client/testutil" ) func testLogger() *log.Logger { @@ -44,6 +46,7 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) { } func TestTaskRunner_SimpleRun(t *testing.T) { + clientTestUtil.ExecCompatible(t) upd, tr := testTaskRunner() go tr.Run() defer tr.Destroy() @@ -79,6 +82,7 @@ func TestTaskRunner_SimpleRun(t *testing.T) { } func TestTaskRunner_Destroy(t *testing.T) { + clientTestUtil.ExecCompatible(t) upd, tr := testTaskRunner() // Change command to ensure we run for a bit @@ -113,6 +117,7 @@ func TestTaskRunner_Destroy(t *testing.T) { } func TestTaskRunner_Update(t *testing.T) { + clientTestUtil.ExecCompatible(t) _, tr := testTaskRunner() // Change command to ensure we run for a bit diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go new file mode 100644 index 000000000..2f34508a7 --- /dev/null +++ b/client/testutil/driver_compatible.go @@ -0,0 +1,13 @@ +package testutil + +import ( + "runtime" + "syscall" + "testing" +) + +func ExecCompatible(t *testing.T) { + if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + t.Skip("Must be root on non-windows environments to run test") + } +} From 33381f0fb8da9eae3a19667b182b3bedc54ccf72 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 17:12:52 -0700 Subject: [PATCH 08/38] Update Qemu Fingerprint check --- client/driver/qemu.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/client/driver/qemu.go b/client/driver/qemu.go index 38760ab55..203d826da 100644 --- a/client/driver/qemu.go +++ b/client/driver/qemu.go @@ -54,8 +54,9 @@ func NewQemuDriver(ctx *DriverContext) Driver { } func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { - // Only enable if we are root when running on non-windows systems. - if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + // Only enable if we are root. This check also disables on Windows as + // Geteuid() returns -1. + if syscall.Geteuid() != 0 { return false, nil } From 3c12f4bd20c5d451e174c5c540d619c9bf603e26 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 17:36:44 -0700 Subject: [PATCH 09/38] Fix qemu check and add debug lines when a driver is disabled --- client/driver/exec.go | 1 + client/driver/java.go | 1 + client/driver/qemu.go | 6 +++--- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/client/driver/exec.go b/client/driver/exec.go index cad95dd26..4f41cc847 100644 --- a/client/driver/exec.go +++ b/client/driver/exec.go @@ -34,6 +34,7 @@ func NewExecDriver(ctx *DriverContext) Driver { func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { // Only enable if we are root when running on non-windows systems. if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling") return false, nil } diff --git a/client/driver/java.go b/client/driver/java.go index 08d6c09f6..31960512e 100644 --- a/client/driver/java.go +++ b/client/driver/java.go @@ -40,6 +40,7 @@ func NewJavaDriver(ctx *DriverContext) Driver { func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { // Only enable if we are root when running on non-windows systems. if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling") return false, nil } diff --git a/client/driver/qemu.go b/client/driver/qemu.go index 203d826da..32589c696 100644 --- a/client/driver/qemu.go +++ b/client/driver/qemu.go @@ -54,9 +54,9 @@ func NewQemuDriver(ctx *DriverContext) Driver { } func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { - // Only enable if we are root. This check also disables on Windows as - // Geteuid() returns -1. - if syscall.Geteuid() != 0 { + // Only enable if we are root when running on non-windows systems. + if runtime.GOOS != "windows" && syscall.Geteuid() != 0 { + d.logger.Printf("[DEBUG] driver.qemu: must run as root user, disabling") return false, nil } From 03215a1f90423e0a56a816b817fd41ece3daba3f Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 18:31:13 -0700 Subject: [PATCH 10/38] Update website documentation on the fork/exec driver + Vagrantfile fixes --- website/Vagrantfile | 3 ++- website/source/docs/drivers/exec.html.md | 18 ++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/website/Vagrantfile b/website/Vagrantfile index b735df1f5..1a0f6f62a 100644 --- a/website/Vagrantfile +++ b/website/Vagrantfile @@ -33,8 +33,9 @@ sudo apt-get install -y nodejs SCRIPT Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - config.vm.box = "chef/ubuntu-12.04" + config.vm.box = "bento/ubuntu-12.04" config.vm.network "private_network", ip: "33.33.30.10" + config.vm.network "forwarded_port", guest: 4567, host: 8080 config.vm.provision "shell", inline: $script, privileged: false config.vm.synced_folder ".", "/vagrant", type: "rsync" end diff --git a/website/source/docs/drivers/exec.html.md b/website/source/docs/drivers/exec.html.md index 2a4d4e5c4..e480f38bd 100644 --- a/website/source/docs/drivers/exec.html.md +++ b/website/source/docs/drivers/exec.html.md @@ -25,15 +25,15 @@ The `exec` driver supports the following configuration in the job spec: ## Client Requirements -The `exec` driver has no special requirements and can run on all -supported operating systems. The resource isolation primitives vary -by OS. +The `exec` driver can run on all supported operating systems but to provide +proper isolation the client must be run as root on non-Windows operating systems. +Further, to support cgroups, `/sys/fs/cgroups/` must be mounted. ## Client Attributes The `exec` driver will set the following client attributes: -* `driver.exec` - This will always be set to "1", indicating the +* `driver.exec` - This will be set to "1", indicating the driver is available. ## Resource Isolation @@ -41,10 +41,8 @@ The `exec` driver will set the following client attributes: The resource isolation provided varies by the operating system of the client and the configuration. -On Linux, Nomad will attempt to use cgroups, namespaces, and chroot -to isolate the resources of a process. If the Nomad agent is not -running as root many of these mechanisms cannot be used. - -As a baseline, the task driver will just execute the command -with no additional resource isolation if none are available. +On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the +resources of a process and as such the Nomad agent must be run as root. +On Windows, the task driver will just execute the command with no additional +resource isolation. From 596da6cb7e237e895fa14173e3fb91fde9e11df7 Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Tue, 22 Sep 2015 18:33:29 -0700 Subject: [PATCH 11/38] Initialize error 'no networks avaialable' before yielding to find an IP address (maybe there are no IP addresses) --- nomad/structs/network.go | 1 + 1 file changed, 1 insertion(+) diff --git a/nomad/structs/network.go b/nomad/structs/network.go index f19a87ded..2a1e61ae8 100644 --- a/nomad/structs/network.go +++ b/nomad/structs/network.go @@ -136,6 +136,7 @@ func (idx *NetworkIndex) yieldIP(cb func(net *NetworkResource, ip net.IP) bool) // AssignNetwork is used to assign network resources given an ask. // If the ask cannot be satisfied, returns nil func (idx *NetworkIndex) AssignNetwork(ask *NetworkResource) (out *NetworkResource, err error) { + err = fmt.Errorf("no networks available") idx.yieldIP(func(n *NetworkResource, ip net.IP) (stop bool) { // Convert the IP to a string ipStr := ip.String() From 289a9766f3d74abf64adb0da5d289377b09da0c8 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Sep 2015 18:48:42 -0700 Subject: [PATCH 12/38] Update package import name --- client/alloc_runner_test.go | 8 ++++---- client/client_test.go | 8 ++++---- client/task_runner_test.go | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 2f3462958..cfa0e530a 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -8,7 +8,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" - clientTestUtil "github.com/hashicorp/nomad/client/testutil" + ctestutil "github.com/hashicorp/nomad/client/testutil" ) type MockAllocStateUpdater struct { @@ -34,7 +34,7 @@ func testAllocRunner() (*MockAllocStateUpdater, *AllocRunner) { } func TestAllocRunner_SimpleRun(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) upd, ar := testAllocRunner() go ar.Run() defer ar.Destroy() @@ -51,7 +51,7 @@ func TestAllocRunner_SimpleRun(t *testing.T) { } func TestAllocRunner_Destroy(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) upd, ar := testAllocRunner() // Ensure task takes some time @@ -83,7 +83,7 @@ func TestAllocRunner_Destroy(t *testing.T) { } func TestAllocRunner_Update(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) upd, ar := testAllocRunner() // Ensure task takes some time diff --git a/client/client_test.go b/client/client_test.go index 98a36c7f3..62689f975 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -16,7 +16,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" - clientTestUtil "github.com/hashicorp/nomad/client/testutil" + ctestutil "github.com/hashicorp/nomad/client/testutil" ) var nextPort uint32 = 16000 @@ -139,7 +139,7 @@ func TestClient_Fingerprint(t *testing.T) { } func TestClient_Drivers(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) c := testClient(t, nil) defer c.Shutdown() @@ -249,7 +249,7 @@ func TestClient_UpdateAllocStatus(t *testing.T) { } func TestClient_WatchAllocs(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) s1, _ := testServer(t, nil) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) @@ -318,7 +318,7 @@ func TestClient_WatchAllocs(t *testing.T) { } func TestClient_SaveRestoreState(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) s1, _ := testServer(t, nil) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) diff --git a/client/task_runner_test.go b/client/task_runner_test.go index 2756ae02d..2bb190d4c 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -12,7 +12,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" - clientTestUtil "github.com/hashicorp/nomad/client/testutil" + ctestutil "github.com/hashicorp/nomad/client/testutil" ) func testLogger() *log.Logger { @@ -46,7 +46,7 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) { } func TestTaskRunner_SimpleRun(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) upd, tr := testTaskRunner() go tr.Run() defer tr.Destroy() @@ -82,7 +82,7 @@ func TestTaskRunner_SimpleRun(t *testing.T) { } func TestTaskRunner_Destroy(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) upd, tr := testTaskRunner() // Change command to ensure we run for a bit @@ -117,7 +117,7 @@ func TestTaskRunner_Destroy(t *testing.T) { } func TestTaskRunner_Update(t *testing.T) { - clientTestUtil.ExecCompatible(t) + ctestutil.ExecCompatible(t) _, tr := testTaskRunner() // Change command to ensure we run for a bit From 3f5dae9b91c38318ffa69750a7dfbce37ccffb32 Mon Sep 17 00:00:00 2001 From: Ryan Uber Date: Tue, 22 Sep 2015 20:01:57 -0700 Subject: [PATCH 13/38] agent: json decode directly to structs in api --- command/agent/http.go | 16 ++-------------- command/agent/job_endpoint.go | 2 +- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/command/agent/http.go b/command/agent/http.go index 16ea31f9a..8ecd789f8 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -12,7 +12,6 @@ import ( "time" "github.com/hashicorp/nomad/nomad/structs" - "github.com/mitchellh/mapstructure" ) const ( @@ -191,20 +190,9 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque } // decodeBody is used to decode a JSON request body -func decodeBody(req *http.Request, out interface{}, cb func(interface{}) error) error { - var raw interface{} +func decodeBody(req *http.Request, out interface{}) error { dec := json.NewDecoder(req.Body) - if err := dec.Decode(&raw); err != nil { - return err - } - - // Invoke the callback prior to decode - if cb != nil { - if err := cb(raw); err != nil { - return err - } - } - return mapstructure.Decode(raw, out) + return dec.Decode(&out) } // setIndex is used to set the index response header diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index ceaa8096f..67672d26b 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -157,7 +157,7 @@ func (s *HTTPServer) jobQuery(resp http.ResponseWriter, req *http.Request, func (s *HTTPServer) jobUpdate(resp http.ResponseWriter, req *http.Request, jobName string) (interface{}, error) { var args structs.JobRegisterRequest - if err := decodeBody(req, &args, nil); err != nil { + if err := decodeBody(req, &args); err != nil { return nil, CodedError(400, err.Error()) } if args.Job == nil { From 9afa2aeab6e519967266082cc80aef1d62e84573 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 21:45:20 -0700 Subject: [PATCH 14/38] scheduler: Allow rolling update, assign eval first.Fixes #91 --- scheduler/generic_sched.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index f2a22092d..030f10bbc 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -96,19 +96,19 @@ func (s *GenericScheduler) setStatus(status, desc string) error { // Process is used to handle a single evaluation func (s *GenericScheduler) Process(eval *structs.Evaluation) error { + // Store the evaluation + s.eval = eval + // Verify the evaluation trigger reason is understood switch eval.TriggeredBy { case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, - structs.EvalTriggerJobDeregister: + structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate: default: desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) return s.setStatus(structs.EvalStatusFailed, desc) } - // Store the evaluation - s.eval = eval - // Retry up to the maxScheduleAttempts limit := maxServiceScheduleAttempts if s.batch { From 9bcc5b444b2ddc315fb0a1d7dc2c14dd06df24a5 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 15:23:30 -0700 Subject: [PATCH 15/38] agent: Do not override default log levels --- command/agent/command.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/command.go b/command/agent/command.go index 1e2906049..49093588a 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -76,7 +76,7 @@ func (c *Command) readConfig() *Config { flags.StringVar(&cmdConfig.Region, "region", "", "") flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "") flags.StringVar(&cmdConfig.Datacenter, "dc", "", "") - flags.StringVar(&cmdConfig.LogLevel, "log-level", "info", "") + flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "") flags.StringVar(&cmdConfig.NodeName, "node", "", "") // Atlas options From 81064ebbd8955704d61aaf4e2336eca1adddc789 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 15:24:01 -0700 Subject: [PATCH 16/38] website: getting started --- .../intro/getting-started/install.html.md | 72 ++++++++++--------- website/source/layouts/intro.erb | 12 +++- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/website/source/intro/getting-started/install.html.md b/website/source/intro/getting-started/install.html.md index cbc71a344..668394aa0 100644 --- a/website/source/intro/getting-started/install.html.md +++ b/website/source/intro/getting-started/install.html.md @@ -8,52 +8,60 @@ description: |- # Install Nomad -Nomad must first be installed on your machine. Nomad is distributed as -a [binary package](/downloads.html) for all supported platforms and -architectures. This page will not cover how to compile Nomad from source, -but compiling from source is covered in the [documentation](/docs/install/index.html) -for those who want to be sure they're compiling source they trust into -the final binary. +The task drivers that are available to Nomad vary by operating system, +for example Docker is only available on Linux machines. To simplify the +getting started experience, we will be working in a Vagrant environment. +Create a new directory, and download [this `Vagrantfile`](#). -## Installing Nomad +## Vagrant Setup -To install Nomad, find the [appropriate package](/downloads.html) for -your system and download it. Nomad is packaged as a zip archive. +Once you have created a new directory and downloaded the `Vagrantfile` +you must create the virtual the machine: -After downloading Nomad, unzip the package. Nomad runs as a single binary -named `nomad`. Any other files in the package can be safely removed and -Nomad will still function. + $ vagrant up -The final step is to make sure that `nomad` is available on the PATH. -See [this page](https://stackoverflow.com/questions/14637979/how-to-permanently-set-path-on-linux) -for instructions on setting the PATH on Linux and Mac. -[This page](https://stackoverflow.com/questions/1618280/where-can-i-set-path-to-make-exe-on-windows) -contains instructions for setting the PATH on Windows. +This will take a few minutes as the base Ubuntu box must be downloaded +and provisioned with both Docker and Nomad. Once this completes, you should +see output similar to: + + Bringing machine 'default' up with 'vmware_fusion' provider... + ==> default: Checking if box 'puphpet/ubuntu1404-x64' is up to date... + ==> default: Machine is already running. + +At this point the Vagrant box is running and ready to go. ## Verifying the Installation -After installing Nomad, verify the installation worked by opening a new -terminal session and checking that `nomad` is available. By executing +After starting the Vagrant box, verify the installation worked by connecting +to the box using SSH and checking that `nomad` is available. By executing `nomad`, you should see help output similar to the following: ``` -$ nomad +$ vagrant ssh +... + +vagrant@nomad:~$ nomad usage: nomad [--version] [--help] [] Available commands are: - agent Runs a Nomad agent - agent-force-leave Force a member into the 'left' state - agent-info Display status information about the local agent - agent-join Join server nodes together - agent-members Display a list of known members and their status - node-drain Toggle drain mode on a given node - node-status Display status information about nodes - status Display status information about jobs - version Prints the Nomad version + agent Runs a Nomad agent + agent-info Display status information about the local agent + eval-monitor Monitor an evaluation interactively + node-drain Toggle drain mode on a given node + node-status Display status information about nodes + run Run a new job + server-force-leave Force a server into the 'left' state + server-join Join server nodes together + server-members Display a list of known servers and their status + status Display status information about jobs + stop Stop a running job + version Prints the Nomad version ``` -If you get an error that Nomad could not be found, then your PATH environment -variable was not setup properly. Please go back and ensure that your PATH -variable contains the directory where Nomad was installed. +If you get an error that Nomad could not be found, then your Vagrant box +may not have provisioned correctly. Check any error messages that may have +been occurred during `vagrant up`. You can always destroy the box and +re-create it. Otherwise, Nomad is installed and ready to go! + diff --git a/website/source/layouts/intro.erb b/website/source/layouts/intro.erb index 6390f4206..41ae2eb6e 100644 --- a/website/source/layouts/intro.erb +++ b/website/source/layouts/intro.erb @@ -58,8 +58,16 @@ Running Nomad - > - HTTP API + > + Jobs + + + > + HTTP API + + + > + Nomad Cluster > From 4e5722b45075a2ac951f5284789485b0ac00039d Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 15:29:30 -0700 Subject: [PATCH 17/38] client: fixing stats formating --- client/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/client.go b/client/client.go index 1a6992233..00051dbc9 100644 --- a/client/client.go +++ b/client/client.go @@ -244,8 +244,8 @@ func (c *Client) Stats() map[string]map[string]string { "client": map[string]string{ "known_servers": toString(uint64(len(c.config.Servers))), "num_allocations": toString(uint64(numAllocs)), - "last_heartbeat": fmt.Sprintf("%#v", time.Since(c.lastHeartbeat)), - "heartbeat_ttl": fmt.Sprintf("%#v", c.heartbeatTTL), + "last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)), + "heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL), }, "runtime": nomad.RuntimeStats(), } From 1aa8a4dafeeb481ef9d8d981efda2b0b381af265 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 15:57:12 -0700 Subject: [PATCH 18/38] website: getting started --- .../intro/getting-started/install.html.md | 5 +- .../source/intro/getting-started/jobs.html.md | 139 ++++++++++++++++++ .../intro/getting-started/running.html.md | 139 ++++++++++++++++++ .../source/intro/getting-started/running.md | 18 --- 4 files changed, 282 insertions(+), 19 deletions(-) create mode 100644 website/source/intro/getting-started/jobs.html.md create mode 100644 website/source/intro/getting-started/running.html.md delete mode 100644 website/source/intro/getting-started/running.md diff --git a/website/source/intro/getting-started/install.html.md b/website/source/intro/getting-started/install.html.md index 668394aa0..166332ab5 100644 --- a/website/source/intro/getting-started/install.html.md +++ b/website/source/intro/getting-started/install.html.md @@ -63,5 +63,8 @@ may not have provisioned correctly. Check any error messages that may have been occurred during `vagrant up`. You can always destroy the box and re-create it. -Otherwise, Nomad is installed and ready to go! +## Next Steps + +Vagrant is running and Nomad is installed. Let's [start Nomad](/intro/getting-started/running.html)! + diff --git a/website/source/intro/getting-started/jobs.html.md b/website/source/intro/getting-started/jobs.html.md new file mode 100644 index 000000000..43415e4ea --- /dev/null +++ b/website/source/intro/getting-started/jobs.html.md @@ -0,0 +1,139 @@ +--- +layout: "intro" +page_title: "Jobs" +sidebar_current: "getting-started-jobs" +description: |- + Learn how to deploy Nomad into production, how to initialize it, configure it, etc. +--- + +# Jobs + +Nomad relies on a long running agent on every machine in the cluster. +The agent can run either in server or client mode. Each region must +have at least one server, though a cluster of 3 or 5 servers is recommended. +A single server deployment is _**highly**_ discouraged as data loss is inevitable +in a failure scenario. + +All other agents run in client mode. A client is a very lightweight +process that registers the host machine, performs heartbeating, and runs any tasks +that are assigned to it by the servers. The agent must be run on every node that +is part of the cluster so that the servers can assign work to those machines. + +## Starting the Agent + +For simplicity, we will run a single Nomad agent in development mode. This mode +is used to quickly start an agent that is acting as a client and server to test +job configurations or prototype interactions. It should _**not**_ be used in +production as it does not persist state. + +``` +$ nomad agent -dev +==> Starting Nomad agent... +==> Nomad agent configuration: + + Atlas: + Client: true + Log Level: debug + Region: global (DC: dc1) + Server: true + +==> Nomad agent started! Log data will stream in below: + + [INFO] serf: EventMemberJoin: nomad.global 127.0.0.1 + [INFO] nomad: starting 4 scheduling worker(s) for [service batch _core] + [INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state + [INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1) + [DEBUG] client: applied fingerprints [storage arch cpu host memory] + [DEBUG] client: available drivers [exec docker] + [WARN] raft: Heartbeat timeout reached, starting election + [INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state + [DEBUG] raft: Votes needed: 1 + [DEBUG] raft: Vote granted. Tally: 1 + [INFO] raft: Election won. Tally: 1 + [INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state + [INFO] raft: Disabling EnableSingleNode (bootstrap) + [DEBUG] raft: Node 127.0.0.1:4647 updated peer set (2): [127.0.0.1:4647] + [INFO] nomad: cluster leadership acquired + [DEBUG] client: node registration complete + [DEBUG] client: updated allocations at index 1 (0 allocs) + [DEBUG] client: allocs: (added 0) (removed 0) (updated 0) (ignore 0) + [DEBUG] client: state updated to ready +``` + +As you can see, the Nomad agent has started and has output some log +data. From the log data, you can see that our agent is running in both +client and server mode, and has claimed leadership of the cluster. +Additionally, the local client has been registered and marked as ready. + +## Cluster Nodes + +If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you +can see the registered nodes of the Nomad cluster: + +```text +$ vagrant ssh +... + +$ nomad node-status +ID DC Name Class Drain Status +72d3af97-144f-1e5f-94e5-df1516fe4add dc1 nomad false ready +``` + +The output shows our Node ID, which is randomly generated UUID, +it's datacenter, node name, node class, drain mode and current status. +We can see that our node is in the ready state, and task draining is +currently off. + +The agent is also running in server mode, which means it is part of +the [gossip protocol](/docs/internals/gossip.html) used to connect all +the server instances together. We can view the members of the gossip +ring using the [`server-members`](/docs/commands/server-members.html) command: + +```text +$ nomad server-members +Name Addr Port Status Proto Build DC Region +nomad.global 127.0.0.1 4648 alive 2 0.1.0dev dc1 global +``` + +The output shows our own agent, the address it is running on, its +health state, some version information, and the datacenter and region. +Additional metadata can be viewed by providing the `-detailed` flag. + +## Stopping the Agent + +You can use `Ctrl-C` (the interrupt signal) to halt the agent. +By default, all signals will cause the agent to forcefully shutdown. +The agent [can be configured](/docs/agent/config.html) to gracefully +leave on either the interrupt or terminate signals. + +After interrupting the agent, you should see it leave the cluster +and shut down: + +``` +^C==> Caught signal: interrupt + [DEBUG] http: Shutting down http server + [INFO] agent: requesting shutdown + [INFO] client: shutting down + [INFO] nomad: shutting down server + [WARN] serf: Shutdown without a Leave + [INFO] agent: shutdown complete +``` + +By gracefully leaving, Nomad clients update their status to prevent +futher tasks from being scheduled and to start migrating any tasks that are +already assigned. Nomad servers notifies other their peers they intend to leave. +When a server leaves, replication to that server stops. If a server fails, +replication continues to be attempted until the node recovers. Nomad will +automatically try to reconnect to _failed_ nodes, allowing it to recover from +certain network conditions, while _left_ nodes are no longer contacted. + +If an agent is operating as a server, a graceful leave is important to avoid +causing a potential availability outage affecting the +[consensus protocol](/docs/internals/consensus.html). If a server does +forcefully exit and will not be returning into service, the +[`server-force-leave` command](/docs/commands/server-force-leave.html) should +be used to force the server from a _failed_ to a _left_ state. + +## Next Steps + +The development Nomad agent is up and running. Let's try to [run a job](jobs.html)! diff --git a/website/source/intro/getting-started/running.html.md b/website/source/intro/getting-started/running.html.md new file mode 100644 index 000000000..eb7582617 --- /dev/null +++ b/website/source/intro/getting-started/running.html.md @@ -0,0 +1,139 @@ +--- +layout: "intro" +page_title: "Running Nomad" +sidebar_current: "getting-started-running" +description: |- + Learn how to deploy Nomad into production, how to initialize it, configure it, etc. +--- + +# Running Nomad + +Nomad relies on a long running agent on every machine in the cluster. +The agent can run either in server or client mode. Each region must +have at least one server, though a cluster of 3 or 5 servers is recommended. +A single server deployment is _**highly**_ discouraged as data loss is inevitable +in a failure scenario. + +All other agents run in client mode. A client is a very lightweight +process that registers the host machine, performs heartbeating, and runs any tasks +that are assigned to it by the servers. The agent must be run on every node that +is part of the cluster so that the servers can assign work to those machines. + +## Starting the Agent + +For simplicity, we will run a single Nomad agent in development mode. This mode +is used to quickly start an agent that is acting as a client and server to test +job configurations or prototype interactions. It should _**not**_ be used in +production as it does not persist state. + +``` +$ nomad agent -dev +==> Starting Nomad agent... +==> Nomad agent configuration: + + Atlas: + Client: true + Log Level: debug + Region: global (DC: dc1) + Server: true + +==> Nomad agent started! Log data will stream in below: + + [INFO] serf: EventMemberJoin: nomad.global 127.0.0.1 + [INFO] nomad: starting 4 scheduling worker(s) for [service batch _core] + [INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state + [INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1) + [DEBUG] client: applied fingerprints [storage arch cpu host memory] + [DEBUG] client: available drivers [exec docker] + [WARN] raft: Heartbeat timeout reached, starting election + [INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state + [DEBUG] raft: Votes needed: 1 + [DEBUG] raft: Vote granted. Tally: 1 + [INFO] raft: Election won. Tally: 1 + [INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state + [INFO] raft: Disabling EnableSingleNode (bootstrap) + [DEBUG] raft: Node 127.0.0.1:4647 updated peer set (2): [127.0.0.1:4647] + [INFO] nomad: cluster leadership acquired + [DEBUG] client: node registration complete + [DEBUG] client: updated allocations at index 1 (0 allocs) + [DEBUG] client: allocs: (added 0) (removed 0) (updated 0) (ignore 0) + [DEBUG] client: state updated to ready +``` + +As you can see, the Nomad agent has started and has output some log +data. From the log data, you can see that our agent is running in both +client and server mode, and has claimed leadership of the cluster. +Additionally, the local client has been registered and marked as ready. + +## Cluster Nodes + +If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you +can see the registered nodes of the Nomad cluster: + +```text +$ vagrant ssh +... + +$ nomad node-status +ID DC Name Class Drain Status +72d3af97-144f-1e5f-94e5-df1516fe4add dc1 nomad false ready +``` + +The output shows our Node ID, which is randomly generated UUID, +it's datacenter, node name, node class, drain mode and current status. +We can see that our node is in the ready state, and task draining is +currently off. + +The agent is also running in server mode, which means it is part of +the [gossip protocol](/docs/internals/gossip.html) used to connect all +the server instances together. We can view the members of the gossip +ring using the [`server-members`](/docs/commands/server-members.html) command: + +```text +$ nomad server-members +Name Addr Port Status Proto Build DC Region +nomad.global 127.0.0.1 4648 alive 2 0.1.0dev dc1 global +``` + +The output shows our own agent, the address it is running on, its +health state, some version information, and the datacenter and region. +Additional metadata can be viewed by providing the `-detailed` flag. + +## Stopping the Agent + +You can use `Ctrl-C` (the interrupt signal) to halt the agent. +By default, all signals will cause the agent to forcefully shutdown. +The agent [can be configured](/docs/agent/config.html) to gracefully +leave on either the interrupt or terminate signals. + +After interrupting the agent, you should see it leave the cluster +and shut down: + +``` +^C==> Caught signal: interrupt + [DEBUG] http: Shutting down http server + [INFO] agent: requesting shutdown + [INFO] client: shutting down + [INFO] nomad: shutting down server + [WARN] serf: Shutdown without a Leave + [INFO] agent: shutdown complete +``` + +By gracefully leaving, Nomad clients update their status to prevent +futher tasks from being scheduled and to start migrating any tasks that are +already assigned. Nomad servers notifies other their peers they intend to leave. +When a server leaves, replication to that server stops. If a server fails, +replication continues to be attempted until the node recovers. Nomad will +automatically try to reconnect to _failed_ nodes, allowing it to recover from +certain network conditions, while _left_ nodes are no longer contacted. + +If an agent is operating as a server, a graceful leave is important to avoid +causing a potential availability outage affecting the +[consensus protocol](/docs/internals/consensus.html). If a server does +forcefully exit and will not be returning into service, the +[`server-force-leave` command](/docs/commands/server-force-leave.html) should +be used to force the server from a _failed_ to a _left_ state. + +## Next Steps + +The development Nomad agent is up and running. Let's try to [run a job](jobs.html)! diff --git a/website/source/intro/getting-started/running.md b/website/source/intro/getting-started/running.md deleted file mode 100644 index 478355857..000000000 --- a/website/source/intro/getting-started/running.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -layout: "intro" -page_title: "Running Nomad" -sidebar_current: "getting-started-running" -description: |- - Learn how to deploy Nomad into production, how to initialize it, configure it, etc. ---- - -# Running Nomad -This section will detail how to run Nomad on client machines. It should include -a sample upstart script and stuff - -## Next - -TODO: Fill in text here. - -Next, we have a [short tutorial](/intro/getting-started/apis.html) on using -Nomad's HTTP APIs. From 7faa311fed42881ed8da67d71c6c5e2c03e638e4 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 16:18:26 -0700 Subject: [PATCH 19/38] website: getting started --- .../source/intro/getting-started/apis.html.md | 21 --- .../intro/getting-started/cluster.html.md | 17 +++ .../source/intro/getting-started/http.html.md | 16 ++ .../source/intro/getting-started/jobs.html.md | 140 +++--------------- .../intro/getting-started/running.html.md | 2 +- 5 files changed, 55 insertions(+), 141 deletions(-) delete mode 100644 website/source/intro/getting-started/apis.html.md create mode 100644 website/source/intro/getting-started/cluster.html.md create mode 100644 website/source/intro/getting-started/http.html.md diff --git a/website/source/intro/getting-started/apis.html.md b/website/source/intro/getting-started/apis.html.md deleted file mode 100644 index 6863125f8..000000000 --- a/website/source/intro/getting-started/apis.html.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -layout: "intro" -page_title: "Using the HTTP APIs with Authentication" -sidebar_current: "getting-started-apis" -description: |- - Using the HTTP APIs for authentication and secret access. ---- - -# Using the HTTP APIs with Authentication -Many of Nomad's capabilities are accessible via the HTTP API in addition to the -CLI. - -TODO: Document Nomad's HTTP API - -Congratulations! You now know all the basics to get started with Nomad. - -## Next - -Next, we have a page dedicated to -[next steps](/intro/getting-started/next-steps.html) depending on -what you would like to achieve. diff --git a/website/source/intro/getting-started/cluster.html.md b/website/source/intro/getting-started/cluster.html.md new file mode 100644 index 000000000..3af9cc9ec --- /dev/null +++ b/website/source/intro/getting-started/cluster.html.md @@ -0,0 +1,17 @@ +--- +layout: "intro" +page_title: "Nomad Cluster" +sidebar_current: "getting-started-cluster" +description: |- + Join another Nomad client to create your first cluster. +--- + +# Nomad Cluster + +TODO: Document clustering + +## Next Steps + +We've now concluded the getting started guide, however there are a number +of [next steps](next-steps.html) to get started with Nomad. + diff --git a/website/source/intro/getting-started/http.html.md b/website/source/intro/getting-started/http.html.md new file mode 100644 index 000000000..2cf5df21a --- /dev/null +++ b/website/source/intro/getting-started/http.html.md @@ -0,0 +1,16 @@ +--- +layout: "intro" +page_title: "HTTP API" +sidebar_current: "getting-started-http" +description: |- + Learn about the HTTP APIs for interacting with Nomad. +--- + +# HTTP API + +TODO: Document Nomad's HTTP API + +## Next Steps + +Next we will add another Nomad client to [create our first cluster](cluster.html) + diff --git a/website/source/intro/getting-started/jobs.html.md b/website/source/intro/getting-started/jobs.html.md index 43415e4ea..0e8b034f5 100644 --- a/website/source/intro/getting-started/jobs.html.md +++ b/website/source/intro/getting-started/jobs.html.md @@ -3,137 +3,39 @@ layout: "intro" page_title: "Jobs" sidebar_current: "getting-started-jobs" description: |- - Learn how to deploy Nomad into production, how to initialize it, configure it, etc. + Learn how to submit, modify and stop jobs in Nomad. --- # Jobs -Nomad relies on a long running agent on every machine in the cluster. -The agent can run either in server or client mode. Each region must -have at least one server, though a cluster of 3 or 5 servers is recommended. -A single server deployment is _**highly**_ discouraged as data loss is inevitable -in a failure scenario. +Jobs are the primary configuration that users interact with when using +Nomad. A job is a declaritive specification of tasks that Nomad should run. +Jobs have a globally unique name, one or many task groups, which are themselves +collections of one or many tasks. -All other agents run in client mode. A client is a very lightweight -process that registers the host machine, performs heartbeating, and runs any tasks -that are assigned to it by the servers. The agent must be run on every node that -is part of the cluster so that the servers can assign work to those machines. +The format of the jobs is [documented here](/docs/jobspec/index.html). They +can either be specified in [HCL](https://github.com/hashicorp/hcl) or JSON, +however we recommend only using JSON when the configuration is generated by a machine. -## Starting the Agent - -For simplicity, we will run a single Nomad agent in development mode. This mode -is used to quickly start an agent that is acting as a client and server to test -job configurations or prototype interactions. It should _**not**_ be used in -production as it does not persist state. +To get started, we will use the [`init` command](/docs/commands/init.html) which +generates an skeleton job file: ``` -$ nomad agent -dev -==> Starting Nomad agent... -==> Nomad agent configuration: +$ nomad init +Created job file 'example.nomad' - Atlas: - Client: true - Log Level: debug - Region: global (DC: dc1) - Server: true +$ cat example.nomad -==> Nomad agent started! Log data will stream in below: - - [INFO] serf: EventMemberJoin: nomad.global 127.0.0.1 - [INFO] nomad: starting 4 scheduling worker(s) for [service batch _core] - [INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state - [INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1) - [DEBUG] client: applied fingerprints [storage arch cpu host memory] - [DEBUG] client: available drivers [exec docker] - [WARN] raft: Heartbeat timeout reached, starting election - [INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state - [DEBUG] raft: Votes needed: 1 - [DEBUG] raft: Vote granted. Tally: 1 - [INFO] raft: Election won. Tally: 1 - [INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state - [INFO] raft: Disabling EnableSingleNode (bootstrap) - [DEBUG] raft: Node 127.0.0.1:4647 updated peer set (2): [127.0.0.1:4647] - [INFO] nomad: cluster leadership acquired - [DEBUG] client: node registration complete - [DEBUG] client: updated allocations at index 1 (0 allocs) - [DEBUG] client: allocs: (added 0) (removed 0) (updated 0) (ignore 0) - [DEBUG] client: state updated to ready -``` - -As you can see, the Nomad agent has started and has output some log -data. From the log data, you can see that our agent is running in both -client and server mode, and has claimed leadership of the cluster. -Additionally, the local client has been registered and marked as ready. - -## Cluster Nodes - -If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you -can see the registered nodes of the Nomad cluster: - -```text -$ vagrant ssh -... - -$ nomad node-status -ID DC Name Class Drain Status -72d3af97-144f-1e5f-94e5-df1516fe4add dc1 nomad false ready -``` - -The output shows our Node ID, which is randomly generated UUID, -it's datacenter, node name, node class, drain mode and current status. -We can see that our node is in the ready state, and task draining is -currently off. - -The agent is also running in server mode, which means it is part of -the [gossip protocol](/docs/internals/gossip.html) used to connect all -the server instances together. We can view the members of the gossip -ring using the [`server-members`](/docs/commands/server-members.html) command: - -```text -$ nomad server-members -Name Addr Port Status Proto Build DC Region -nomad.global 127.0.0.1 4648 alive 2 0.1.0dev dc1 global -``` - -The output shows our own agent, the address it is running on, its -health state, some version information, and the datacenter and region. -Additional metadata can be viewed by providing the `-detailed` flag. - -## Stopping the Agent - -You can use `Ctrl-C` (the interrupt signal) to halt the agent. -By default, all signals will cause the agent to forcefully shutdown. -The agent [can be configured](/docs/agent/config.html) to gracefully -leave on either the interrupt or terminate signals. - -After interrupting the agent, you should see it leave the cluster -and shut down: +job "example" { + region = "global" + datacenter = ["dc1", "dc2"] + ... +} ``` -^C==> Caught signal: interrupt - [DEBUG] http: Shutting down http server - [INFO] agent: requesting shutdown - [INFO] client: shutting down - [INFO] nomad: shutting down server - [WARN] serf: Shutdown without a Leave - [INFO] agent: shutdown complete -``` - -By gracefully leaving, Nomad clients update their status to prevent -futher tasks from being scheduled and to start migrating any tasks that are -already assigned. Nomad servers notifies other their peers they intend to leave. -When a server leaves, replication to that server stops. If a server fails, -replication continues to be attempted until the node recovers. Nomad will -automatically try to reconnect to _failed_ nodes, allowing it to recover from -certain network conditions, while _left_ nodes are no longer contacted. - -If an agent is operating as a server, a graceful leave is important to avoid -causing a potential availability outage affecting the -[consensus protocol](/docs/internals/consensus.html). If a server does -forcefully exit and will not be returning into service, the -[`server-force-leave` command](/docs/commands/server-force-leave.html) should -be used to force the server from a _failed_ to a _left_ state. ## Next Steps -The development Nomad agent is up and running. Let's try to [run a job](jobs.html)! +We've now covered the basic workflow around jobs with Nomad. Next +we will do a brief [tour of the HTTP API](http.html). + diff --git a/website/source/intro/getting-started/running.html.md b/website/source/intro/getting-started/running.html.md index eb7582617..5991aef05 100644 --- a/website/source/intro/getting-started/running.html.md +++ b/website/source/intro/getting-started/running.html.md @@ -3,7 +3,7 @@ layout: "intro" page_title: "Running Nomad" sidebar_current: "getting-started-running" description: |- - Learn how to deploy Nomad into production, how to initialize it, configure it, etc. + Learn about the Nomad agent, and the lifecycle of running and stopping. --- # Running Nomad From 8eb7db7d691759227ed5ea57b7b97685a3de325e Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 17:06:23 -0700 Subject: [PATCH 20/38] command/init: reworking behavior and default file --- command/init.go | 166 ++++++++++-------- website/source/docs/commands/init.html.md.erb | 11 +- 2 files changed, 102 insertions(+), 75 deletions(-) diff --git a/command/init.go b/command/init.go index cbbb34cfb..978490ff8 100644 --- a/command/init.go +++ b/command/init.go @@ -2,8 +2,15 @@ package command import ( "fmt" + "io/ioutil" "os" - "path/filepath" + "strings" +) + +const ( + // DefaultInitName is the default name we use when + // initializing the example file + DefaultInitName = "example.nomad" ) // InitCommand generates a new job template that you can customize to your @@ -13,89 +20,106 @@ type InitCommand struct { } func (c *InitCommand) Help() string { - return initUsage -} + helpText := ` +Usage: nomad init -func (c *InitCommand) Run(args []string) int { - dir, err := os.Getwd() - if err != nil { - c.Ui.Error("Unable to determine pwd; aborting") - return 1 - } + Creates an example job file that can be used as a starting + point to customize further. - // Derive the job name from the pwd folder name, which is our best guess at - // the project's name - jobname := filepath.Base(dir) - jobfile := fmt.Sprintf("%s.nomad", jobname) - jobpath := filepath.Join(dir, jobfile) - if _, err := os.Stat(jobpath); err == nil { - c.Ui.Error(fmt.Sprintf("%s file already exists", jobfile)) - return 1 - } - - file, err := os.Create(jobfile) - defer file.Close() - if err != nil { - c.Ui.Error(fmt.Sprintf("Unable to create file %s: %s", jobfile, err)) - return 1 - } - - _, err = file.WriteString(defaultJob) - if err != nil { - c.Ui.Error(fmt.Sprintf("Failed to write job template to %s", jobfile)) - return 1 - } - - c.Ui.Output(fmt.Sprintf("Initialized nomad job template in %s", jobfile)) - - return 0 +` + return strings.TrimSpace(helpText) } func (c *InitCommand) Synopsis() string { - return "Create a new job template" + return "Create an example job file" } -const initUsage = `` +func (c *InitCommand) Run(args []string) int { + // Check if the file already exists + _, err := os.Stat(DefaultInitName) + if err == nil || !os.IsNotExist(err) { + c.Ui.Error(fmt.Sprintf("Job '%s' already exists", DefaultInitName)) + return 1 + } else if !os.IsNotExist(err) { + c.Ui.Error(fmt.Sprintf("Failed to stat '%s': %v", DefaultInitName, err)) + return 1 + } + + // Write out the example + err = ioutil.WriteFile(DefaultInitName, []byte(defaultJob), 0660) + if err != nil { + c.Ui.Error(fmt.Sprintf("Failed to write '%s': %v", DefaultInitName, err)) + return 1 + } + + // Success + c.Ui.Output(fmt.Sprintf("Example job file written to %s", DefaultInitName)) + return 0 +} const defaultJob = ` -job "my-app" { - region = "global" - type = "service" - priority = 50 +# There can only be a single job definition per file. +# Create a job with ID and Name 'example' +job "example" { + # Run the job in the global region, which is the default. + # region = "global" - // Each task in the group will be scheduled on the same machine(s). - group "app-group" { - // How many copies of this group should we run? - count = 5 + # Specify the datacenters within the region this job can run in. + datacenters = ["dc1"] - task "python-webapp" { - driver = "docker" - config { - image = "org/container" - } - resources { - // For CPU 1024 = 1ghz - cpu = 500 - // Memory in megabytes - memory = 128 + # Service type jobs optimize for long-lived services. This is + # the default but we can change to batch for short-lived tasks. + # type = "service" - network { - dynamic_ports = [ - "http", - "https", - ] - } - } - } + # Priority controls our access to resources and scheduling priority. + # This can be 1 to 100, inclusively, and defaults to 50. + # priority = 50 - task "logshipper" { - driver = "exec" - } + # Restrict our job to only linux. We can specify multiple + # constraints as needed. + constraint { + attribute = "$attr.os.name" + value = "linux" + } - constraint { - attribute = "kernel.os" - value = "linux" - } - } + # Configure the job to do rolling updates + update { + # Stagger updates every 10 seconds + stagger = "10s" + + # Update a single task at a time + max_parallel = 1 + } + + # Create a 'cache' group. Each task in the group will be + # scheduled onto the same machine. + group "cache" { + # Control the number of instances of this groups. + # Defaults to 1 + # count = 1 + + # Define a task to run + task "redis" { + # Use Docker to run the task. + driver = "docker" + + # Configure Docker driver with the image + config { + image = "redis" + } + + # We must specify the resources required for + # this task to ensure it runs on a machine with + # enough capacity. + resources { + cpu = 500 # 500 Mhz + memory = 256 # 128MB + network { + mbits = 10 + dynamic_ports = ["redis"] + } + } + } + } } ` diff --git a/website/source/docs/commands/init.html.md.erb b/website/source/docs/commands/init.html.md.erb index 7bd8b47f8..e37841705 100644 --- a/website/source/docs/commands/init.html.md.erb +++ b/website/source/docs/commands/init.html.md.erb @@ -3,20 +3,23 @@ layout: "docs" page_title: "Commands: init" sidebar_current: "docs-commands-init" description: > - Toggle drain mode for a given node. + Generate a skeleton jobspec template. --- # Command: init -The `init` command creates a [jobspec](/docs/jobspec/) template in the current +The `init` command creates an example [job specification](/docs/jobspec/) in the current directory that demonstrates some common configurations for tasks, tasks groups, runtime constraints, and resource allocation. Please refer to the [jobspec](/docs/jobspec/) and [drivers](/docs/drivers/) pages to learn how to customize the template. -## Usage +## Examples + +Generate an example job file: ``` -nomad init +$ nomad init +Example job file written to example.nomad ``` From 8e11e8c3facaeb1fdb1c590a3ebd57d24c228df9 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 17:24:55 -0700 Subject: [PATCH 21/38] command/node-status: removing extra formating input --- command/node_status.go | 1 - 1 file changed, 1 deletion(-) diff --git a/command/node_status.go b/command/node_status.go index 2a20c7a05..cf58de8d4 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -130,7 +130,6 @@ func (c *NodeStatusCommand) Run(args []string) int { alloc.ID, alloc.EvalID, alloc.JobID, - alloc.NodeID, alloc.TaskGroup, alloc.DesiredStatus, alloc.ClientStatus) From 667c9c0794b03dee57b30fe5342326502e7ca72c Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:10:33 -0700 Subject: [PATCH 22/38] command/init: fixing inconsistencies --- command/init.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command/init.go b/command/init.go index 978490ff8..531622967 100644 --- a/command/init.go +++ b/command/init.go @@ -105,7 +105,7 @@ job "example" { # Configure Docker driver with the image config { - image = "redis" + image = "redis:latest" } # We must specify the resources required for @@ -113,7 +113,7 @@ job "example" { # enough capacity. resources { cpu = 500 # 500 Mhz - memory = 256 # 128MB + memory = 256 # 256MB network { mbits = 10 dynamic_ports = ["redis"] From ceaa6a3e533913dd5440e1f889b17d5a50329df4 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:10:46 -0700 Subject: [PATCH 23/38] website: getting started --- .../source/intro/getting-started/jobs.html.md | 149 +++++++++++++++++- 1 file changed, 142 insertions(+), 7 deletions(-) diff --git a/website/source/intro/getting-started/jobs.html.md b/website/source/intro/getting-started/jobs.html.md index 0e8b034f5..3d4cd3db7 100644 --- a/website/source/intro/getting-started/jobs.html.md +++ b/website/source/intro/getting-started/jobs.html.md @@ -17,25 +17,160 @@ The format of the jobs is [documented here](/docs/jobspec/index.html). They can either be specified in [HCL](https://github.com/hashicorp/hcl) or JSON, however we recommend only using JSON when the configuration is generated by a machine. +## Running a Job + To get started, we will use the [`init` command](/docs/commands/init.html) which generates an skeleton job file: ``` $ nomad init -Created job file 'example.nomad' +Example job file written to example.nomad $ cat example.nomad +# There can only be a single job definition per file. +# Create a job with ID and Name 'example' job "example" { - region = "global" - datacenter = ["dc1", "dc2"] - ... -} + # Run the job in the global region, which is the default. + # region = "global" +... +``` + +In this example job file, we have declared a single task 'redis' which is using +the Docker driver to run the task. The primary way you interact with Nomad +is with the [`run` command](/docs/commands/run.html). The `run` command takes +a job file and registers it with Nomad. This is used both to register new +jobs and to update existing jobs. + +We can register our example job now: ``` +$ nomad run example.nomad +==> Monitoring evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6" + Evaluation triggered by job "example" + Allocation "c1d2f085-7049-6c4a-4479-1b2310fdaba9" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6" finished with status "complete" +``` + +Anytime a job is updated, Nomad creates an evaluation to determine what +actions need to take place. In this case, because this is a new job, Nomad has +determined that an allocation should be created and has scheduled it on our +local agent. + +To inspect the status of our job we use the [`status` command](/docs/commands/status.html): + +``` +$ nomad status example +ID = example +Name = example +Type = service +Priority = 50 +Datacenters = dc1 +Status = + +==> Evaluations +ID Priority TriggeredBy Status +f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 50 job-register complete + +==> Allocations +ID EvalID NodeID TaskGroup Desired Status +c1d2f085-7049-6c4a-4479-1b2310fdaba9 f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 1f43787c-7ab4-8d10-d2d6-1593ed06463a cache run running +``` + +Here we can see that our evalution that was created has completed, and that +it resulted in the creation of an allocation that is now running on the local node. + +## Modifying a Job + +The definition of a job is not static, and is meant to be updated overtime. +You may update a job to change the docker container to update the application version, +or change the count of a task group to scale with load. + +For now, edit the `example.nomad` file to uncommand the count and set it to 3: + +``` +# Control the number of instances of this groups. +# Defaults to 1 +count = 3 +``` + +Once you have finished modifying the job specification, use `nomad run` to +push the updated version of the job: + +``` +$ nomad run example.nomad +==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" + Evaluation triggered by job "example" + Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache" + Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete" +``` + +Because we set the count of the task group to three, Nomad created two +additional allocations to get to the desired state. It is idempotent to +run the same job specification again and no new allocations will be created. + +Now, lets try to do an application update. In this case, we will simply change +the version of redis we want to run. Edit the `example.nomad` file and change +the Docker image from "redis:latest" to "redis:2.8": + +``` +# Configure Docker driver with the image +config { + image = "redis:2.8" +} +``` + +This time we have not change the number of task groups we want running, +but we've changed the task itself. This requires stopping the old tasks +and starting new tasks. Our example job is configured to do a rolling update, +doing a single update every 10 seconds. Use `run` to push the updated +specification now: + +``` +$ nomad run example.nomad +==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" + Evaluation triggered by job "example" + Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache" + Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete" +``` + +We can see that Nomad handled the updated in three phases, each +time only updating a single task group at a time. The update strategy +can be configured, but rolling updates makes it easy to upgrade +an application at large scale. + +## Stopping a Job + +So far we've created, run and modified a job. The final step in a job lifecycle +is stopping the job. This is done with the [`stop` command](/docs/commands/stop.html): + +``` +$ nomad stop example +==> Monitoring evaluation "4b236340-d5ed-1838-be15-a896095d3ac9" + Evaluation triggered by job "example" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "4b236340-d5ed-1838-be15-a896095d3ac9" finished with status "complete" +``` + +When we stop a job, it creates an evaluation which is used to stop all +the existing allocations. This also deletes the job definition out of Nomad. +If we try to query the job status, we can see it is no longer registered: + +``` +$ nomad status example +Error querying job: Unexpected response code: 404 (job not found) +``` + +If we wanted to start the job again, we could simply `run` it again. ## Next Steps -We've now covered the basic workflow around jobs with Nomad. Next -we will do a brief [tour of the HTTP API](http.html). +Users of Nomad primarily interact with jobs, and we've now seen +how to create and scale our job, perform an application update, +and do a job tear down. Next we will do a brief [tour of the HTTP API](http.html). From 75427cb1a5da1508d2c7c98f2438e9d0635c9b3c Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:16:37 -0700 Subject: [PATCH 24/38] website: update 'run' docs --- website/source/docs/commands/run.html.md.erb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/website/source/docs/commands/run.html.md.erb b/website/source/docs/commands/run.html.md.erb index 8a0dd6c1f..fb0698870 100644 --- a/website/source/docs/commands/run.html.md.erb +++ b/website/source/docs/commands/run.html.md.erb @@ -8,10 +8,9 @@ description: > # Command: run -The `run` command is used to run new jobs in Nomad. Jobs are specified using -[HCL](https://github.com/hashicorp/hcl)-encoded files, and may specify one or -more task groups. More information about jobs and their configuration format -can be found in the [jobs documentation](#). +The `run` command is used to submit new jobs to Nomad or to update existing +jobs. Job files must conform to the [job specification](/docs/jobspec/index.html) +format. ## Usage @@ -20,8 +19,8 @@ nomad run [options] ``` The run command requires a single argument, specifying the path to a file -containing a valid [job definition](#). This file will be read and the job -will be submitted to the Nomad server for scheduling. +containing a valid [job specification](/docs/jobspec/index.html). This file +will be read and the job will be submitted to Nomad for scheduling. By default, on sucessful job submission, the run command will enter an interactive monitor and display log information detailing the scheduling From ea240a5063c80672876718ed2c99569ea00bc6ee Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:17:30 -0700 Subject: [PATCH 25/38] command/run: update help output --- command/run.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/command/run.go b/command/run.go index 83e8a94e1..008361c9a 100644 --- a/command/run.go +++ b/command/run.go @@ -19,8 +19,9 @@ func (c *RunCommand) Help() string { helpText := ` Usage: nomad run [options] - Starts running a new job using the definition located at . - This is the main command used to invoke new work in Nomad. + Starts running a new job or updates an existing job using + the specification located at . This is the main command + used to interact with Nomad. Upon successful job submission, this command will immediately enter an interactive monitor. This is useful to watch Nomad's From 54f5b05b075f53849bc4d4da0477edd0803b0452 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:17:55 -0700 Subject: [PATCH 26/38] command/run: update synopsis --- command/run.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/run.go b/command/run.go index 008361c9a..ab43a3886 100644 --- a/command/run.go +++ b/command/run.go @@ -51,7 +51,7 @@ Run Options: } func (c *RunCommand) Synopsis() string { - return "Run a new job" + return "Run a new job or update an existing job" } func (c *RunCommand) Run(args []string) int { From 90c5f2035aa4930924332081346e79be8a63b396 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:19:00 -0700 Subject: [PATCH 27/38] website: typo fixes --- website/source/intro/getting-started/jobs.html.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/source/intro/getting-started/jobs.html.md b/website/source/intro/getting-started/jobs.html.md index 3d4cd3db7..d471452f9 100644 --- a/website/source/intro/getting-started/jobs.html.md +++ b/website/source/intro/getting-started/jobs.html.md @@ -9,7 +9,7 @@ description: |- # Jobs Jobs are the primary configuration that users interact with when using -Nomad. A job is a declaritive specification of tasks that Nomad should run. +Nomad. A job is a declarative specification of tasks that Nomad should run. Jobs have a globally unique name, one or many task groups, which are themselves collections of one or many tasks. @@ -78,7 +78,7 @@ ID EvalID Node c1d2f085-7049-6c4a-4479-1b2310fdaba9 f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 1f43787c-7ab4-8d10-d2d6-1593ed06463a cache run running ``` -Here we can see that our evalution that was created has completed, and that +Here we can see that our evaluation that was created has completed, and that it resulted in the creation of an allocation that is now running on the local node. ## Modifying a Job @@ -87,7 +87,7 @@ The definition of a job is not static, and is meant to be updated overtime. You may update a job to change the docker container to update the application version, or change the count of a task group to scale with load. -For now, edit the `example.nomad` file to uncommand the count and set it to 3: +For now, edit the `example.nomad` file to uncomment the count and set it to 3: ``` # Control the number of instances of this groups. From f2a09e9cf6b91912f7fef1b3604322de95ee3494 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:21:46 -0700 Subject: [PATCH 28/38] command/init: tweak constraint --- command/init.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/init.go b/command/init.go index 531622967..ee0bd99ad 100644 --- a/command/init.go +++ b/command/init.go @@ -78,7 +78,7 @@ job "example" { # Restrict our job to only linux. We can specify multiple # constraints as needed. constraint { - attribute = "$attr.os.name" + attribute = "$attr.kernel.name" value = "linux" } From 7f561128bdad5c97b81236c32b80b22e8011a3db Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 18:34:30 -0700 Subject: [PATCH 29/38] website: skip HTTP in intro --- .../source/intro/getting-started/cluster.html.md | 4 ++-- .../source/intro/getting-started/http.html.md | 16 ---------------- .../source/intro/getting-started/jobs.html.md | 3 ++- website/source/layouts/intro.erb | 6 +----- 4 files changed, 5 insertions(+), 24 deletions(-) delete mode 100644 website/source/intro/getting-started/http.html.md diff --git a/website/source/intro/getting-started/cluster.html.md b/website/source/intro/getting-started/cluster.html.md index 3af9cc9ec..a9b8b4f2e 100644 --- a/website/source/intro/getting-started/cluster.html.md +++ b/website/source/intro/getting-started/cluster.html.md @@ -1,12 +1,12 @@ --- layout: "intro" -page_title: "Nomad Cluster" +page_title: "Clustering" sidebar_current: "getting-started-cluster" description: |- Join another Nomad client to create your first cluster. --- -# Nomad Cluster +# Clustering TODO: Document clustering diff --git a/website/source/intro/getting-started/http.html.md b/website/source/intro/getting-started/http.html.md deleted file mode 100644 index 2cf5df21a..000000000 --- a/website/source/intro/getting-started/http.html.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -layout: "intro" -page_title: "HTTP API" -sidebar_current: "getting-started-http" -description: |- - Learn about the HTTP APIs for interacting with Nomad. ---- - -# HTTP API - -TODO: Document Nomad's HTTP API - -## Next Steps - -Next we will add another Nomad client to [create our first cluster](cluster.html) - diff --git a/website/source/intro/getting-started/jobs.html.md b/website/source/intro/getting-started/jobs.html.md index d471452f9..6b9a5efac 100644 --- a/website/source/intro/getting-started/jobs.html.md +++ b/website/source/intro/getting-started/jobs.html.md @@ -172,5 +172,6 @@ If we wanted to start the job again, we could simply `run` it again. Users of Nomad primarily interact with jobs, and we've now seen how to create and scale our job, perform an application update, -and do a job tear down. Next we will do a brief [tour of the HTTP API](http.html). +and do a job tear down. Next we will add another Nomad +client to [create our first cluster](cluster.html) diff --git a/website/source/layouts/intro.erb b/website/source/layouts/intro.erb index 41ae2eb6e..6433d93e0 100644 --- a/website/source/layouts/intro.erb +++ b/website/source/layouts/intro.erb @@ -62,12 +62,8 @@ Jobs - > - HTTP API - - > - Nomad Cluster + Clustering > From 0b1b0730e6102e690b9444241ee16bf5846f9b3c Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 21:44:51 -0700 Subject: [PATCH 30/38] website: starting cluster section --- website/source/intro/getting-started/cluster.html.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/website/source/intro/getting-started/cluster.html.md b/website/source/intro/getting-started/cluster.html.md index a9b8b4f2e..d1c697545 100644 --- a/website/source/intro/getting-started/cluster.html.md +++ b/website/source/intro/getting-started/cluster.html.md @@ -8,7 +8,16 @@ description: |- # Clustering -TODO: Document clustering +We have started our first agent and run a job against it in development mode. +This demonstrates the ease of use and the workflow of Nomad, but did not show how +this could be extended to a scalable, production-grade configuration. In this step, +we will create our first real cluster with multiple nodes. + +## Starting the Server + +## Starting the Clients + +## Submit a Job ## Next Steps From f94c007348ce04845f0552f8ca64f12ef0531e0b Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 22:00:24 -0700 Subject: [PATCH 31/38] client: create dir, handle not exist more gracefully --- client/client.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/client/client.go b/client/client.go index 00051dbc9..ea77d36d8 100644 --- a/client/client.go +++ b/client/client.go @@ -142,6 +142,13 @@ func (c *Client) init() error { return fmt.Errorf("failed creating alloc dir: %s", err) } } + + // Ensure the state dir exists if we have one + if c.config.StateDir != "" { + if err := os.MkdirAll(c.config.StateDir, 0700); err != nil { + return fmt.Errorf("failed creating state dir: %s", err) + } + } return nil } @@ -265,7 +272,9 @@ func (c *Client) restoreState() error { // Scan the directory list, err := ioutil.ReadDir(filepath.Join(c.config.StateDir, "alloc")) - if err != nil { + if err != nil && os.IsNotExist(err) { + return nil + } else if err != nil { return fmt.Errorf("failed to list alloc state: %v", err) } From 7c4e647cbb6834b75731c7c2c59544e63b1ab5cc Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 22:10:28 -0700 Subject: [PATCH 32/38] client: reset response struct to avoid decode errors --- client/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/client/client.go b/client/client.go index ea77d36d8..49564bddc 100644 --- a/client/client.go +++ b/client/client.go @@ -565,6 +565,7 @@ func (c *Client) watchAllocations(allocUpdates chan []*structs.Allocation) { for { // Get the allocations, blocking for updates + resp = structs.NodeAllocsResponse{} err := c.RPC("Node.GetAllocs", &req, &resp) if err != nil { c.logger.Printf("[ERR] client: failed to query for node allocations: %v", err) From 5fb980bc53ca640797f6f565b8290a9bb46800f0 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 22:20:07 -0700 Subject: [PATCH 33/38] scheduler: do not skip job anti-affinity --- scheduler/stack.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduler/stack.go b/scheduler/stack.go index 156c231c3..c1468602f 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -88,7 +88,7 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "") // Apply a limit function. This is to avoid scanning *every* possible node. - s.limit = NewLimitIterator(ctx, s.binPack, 2) + s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2) // Select the node with the maximum score for placement s.maxScore = NewMaxScoreIterator(ctx, s.limit) From 415444466ea289e46f0495caa60d8e19df79645c Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 22:24:07 -0700 Subject: [PATCH 34/38] scheduler: job anti-affinity score should record as negative --- scheduler/rank.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scheduler/rank.go b/scheduler/rank.go index 7a64186a6..1677187ad 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -289,8 +289,8 @@ func (iter *JobAntiAffinityIterator) Next() *RankedNode { // Apply a penalty if there are collisions if collisions > 0 { - scorePenalty := float64(collisions) * iter.penalty - option.Score -= scorePenalty + scorePenalty := -1 * float64(collisions) * iter.penalty + option.Score += scorePenalty iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty) } return option From d14267f12ed0a84e6cada40a8944e60126854773 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 22:45:23 -0700 Subject: [PATCH 35/38] website: adding clustering to getting started --- .../intro/getting-started/cluster.html.md | 177 ++++++++++++++++++ .../intro/getting-started/running.html.md | 7 +- 2 files changed, 183 insertions(+), 1 deletion(-) diff --git a/website/source/intro/getting-started/cluster.html.md b/website/source/intro/getting-started/cluster.html.md index d1c697545..e484c70e2 100644 --- a/website/source/intro/getting-started/cluster.html.md +++ b/website/source/intro/getting-started/cluster.html.md @@ -15,10 +15,187 @@ we will create our first real cluster with multiple nodes. ## Starting the Server +The first step is to create the config file for the server. Either download +the file from the [repository here](#), or paste this into a file called +`server.hcl`: + +``` +# Increase log verbosity +log_level = "DEBUG" + +# Setup data dir +data_dir = "/tmp/server1" + +# Enable the server +server { + enabled = true + + # Self-elect, should be 3 or 5 for production + bootstrap_expect = 1 +} +``` + +This is a fairly minimal server configuration file, but it +is enough to start an agent in server only mode and have it +elect as a leader. The major change that should be made for +production is to run more than one server, and to change the +corresponding `bootstrap_expect` value. + +Once the file is created, start the agent in a new tab: + +``` +$ sudo nomad agent -config server.hcl +==> WARNING: Bootstrap mode enabled! Potentially unsafe operation. +==> Starting Nomad agent... +==> Nomad agent configuration: + + Atlas: + Client: false + Log Level: DEBUG + Region: global (DC: dc1) + Server: true + +==> Nomad agent started! Log data will stream in below: + + [INFO] serf: EventMemberJoin: nomad.global 127.0.0.1 + [INFO] nomad: starting 4 scheduling worker(s) for [service batch _core] + [INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state + [WARN] serf: Failed to re-join any previously known node + [INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1) + [WARN] raft: Heartbeat timeout reached, starting election + [INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state + [DEBUG] raft: Votes needed: 1 + [DEBUG] raft: Vote granted. Tally: 1 + [INFO] raft: Election won. Tally: 1 + [INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state + [INFO] nomad: cluster leadership acquired +``` + +We can see above that client mode is disabled, and that we are +only running as the server. This means that this server will manage +state and make scheduling decisions but will not run any tasks. +Now we need some agents to run tasks! + ## Starting the Clients +Similar to the server, we must first configure the clients. Either download +the configuration for client1 and client2 from the [repository here](#), or +paste the following into `client1.hcl`: + +``` +# Increase log verbosity +log_level = "DEBUG" + +# Setup data dir +data_dir = "/tmp/client1" + +# Enable the client +client { + enabled = true + + # For demo assume we are talking to server1. For production, + # this should be like "nomad.service.consul:4647" and a system + # like Consul used for service discovery. + servers = ["127.0.0.1:4647"] +} + +# Modify our port to avoid a collision with server1 +ports { + http = 5656 +} +``` + +Copy that file to `client2.hcl` and change the `data_dir` to +be "/tmp/client2" and the `http` port to 5657. Once you've created +both `client1.hcl` and `client2.hcl`, open a tab for each and +start the agents: + +``` +$ sudo nomad agent -config client1.hcl +==> Starting Nomad agent... +==> Nomad agent configuration: + + Atlas: + Client: true + Log Level: DEBUG + Region: global (DC: dc1) + Server: false + +==> Nomad agent started! Log data will stream in below: + + [DEBUG] client: applied fingerprints [host memory storage arch cpu] + [DEBUG] client: available drivers [docker exec] + [DEBUG] client: node registration complete + ... +``` + +In the output we can see the agent is running in client mode only. +This agent will be available to run tasks but will not participate +in managing the cluster or making scheduling decisions. + +Using the [`node-status` command](/docs/commands/node-status.html) +we should see both nodes in the `ready` state: + +``` +$ nomad node-status +ID DC Name Class Drain Status +e5239796-7285-3ed2-efe1-37cdc2d459d4 dc1 nomad false ready +d12e4ab0-4206-bd33-ff75-e1367590eceb dc1 nomad false ready +``` + +We now have a simple three node cluster running. The only difference +between a demo and full production cluster is that we are running a +single server instead of three or five. + ## Submit a Job +Now that we have a simple cluster, we can use it to schedule a job. +We should still have the `example.nomad` job file from before, but +verify that the `count` is still set to 3. + +Then, use the [`run` command](/docs/commands/run.html) to submit the job: + +``` +$ nomad run example.nomad +==> Monitoring evaluation "2d742049-497f-c602-c56d-ae2a328a5671" + Evaluation triggered by job "example" + Allocation "44d46439-655d-701e-55ce-552ee74fbbd8" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache" + Allocation "624be24f-5992-0c75-742d-7f8dbd3044a2" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache" + Allocation "a133a2c7-cc3c-2f8c-8664-71d2389c7759" created: node "d12e4ab0-4206-bd33-ff75-e1367590eceb", group "cache" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "2d742049-497f-c602-c56d-ae2a328a5671" finished with status "complete" +``` + +We can see in the output that the scheduler assigned two of the +tasks for one of the client nodes and the remaining task to the +second client. + +We can again use the [`status` command](/docs/commands/status.html) to verify: + +``` +$ nomad status example +ID = example +Name = example +Type = service +Priority = 50 +Datacenters = dc1 +Status = + +==> Evaluations +ID Priority TriggeredBy Status +2d742049-497f-c602-c56d-ae2a328a5671 50 job-register complete + +==> Allocations +ID EvalID NodeID TaskGroup Desired Status +44d46439-655d-701e-55ce-552ee74fbbd8 2d742049-497f-c602-c56d-ae2a328a5671 e5239796-7285-3ed2-efe1-37cdc2d459d4 cache run running +a133a2c7-cc3c-2f8c-8664-71d2389c7759 2d742049-497f-c602-c56d-ae2a328a5671 d12e4ab0-4206-bd33-ff75-e1367590eceb cache run running +624be24f-5992-0c75-742d-7f8dbd3044a2 2d742049-497f-c602-c56d-ae2a328a5671 e5239796-7285-3ed2-efe1-37cdc2d459d4 cache run running +``` + +We can see that all our tasks have been allocated and are running. +Once we are satisfied that our job is happily running, we can tear +it down with `nomad stop`. + ## Next Steps We've now concluded the getting started guide, however there are a number diff --git a/website/source/intro/getting-started/running.html.md b/website/source/intro/getting-started/running.html.md index 5991aef05..e450c34cb 100644 --- a/website/source/intro/getting-started/running.html.md +++ b/website/source/intro/getting-started/running.html.md @@ -27,7 +27,7 @@ job configurations or prototype interactions. It should _**not**_ be used in production as it does not persist state. ``` -$ nomad agent -dev +$ sudo nomad agent -dev ==> Starting Nomad agent... ==> Nomad agent configuration: @@ -65,6 +65,11 @@ data. From the log data, you can see that our agent is running in both client and server mode, and has claimed leadership of the cluster. Additionally, the local client has been registered and marked as ready. +-> **Note:** Typically any agent running in client mode must be run with root level +privilege. Nomad makes use of operating system primitives for resource isolation +which require elevated permissions. The agent will function as non-root, but +certain task drivers will not be available. + ## Cluster Nodes If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you From 8430387f8a2ec7679d8372aa660c66f988ed79c8 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Sep 2015 22:48:39 -0700 Subject: [PATCH 36/38] demo/vagrant: Adding files for getting started --- demo/vagrant/README.md | 24 ++++++++++++++++++++++ demo/vagrant/Vagrantfile | 43 ++++++++++++++++++++++++++++++++++++++++ demo/vagrant/client1.hcl | 20 +++++++++++++++++++ demo/vagrant/client2.hcl | 25 +++++++++++++++++++++++ demo/vagrant/server.hcl | 13 ++++++++++++ 5 files changed, 125 insertions(+) create mode 100644 demo/vagrant/README.md create mode 100644 demo/vagrant/Vagrantfile create mode 100644 demo/vagrant/client1.hcl create mode 100644 demo/vagrant/client2.hcl create mode 100644 demo/vagrant/server.hcl diff --git a/demo/vagrant/README.md b/demo/vagrant/README.md new file mode 100644 index 000000000..2150799a4 --- /dev/null +++ b/demo/vagrant/README.md @@ -0,0 +1,24 @@ +# Vagrant Nomad Demo + +This Vagrantfile and associated Nomad configuration files are meant +to be used along with the +[getting started guide](https://nomadproject.io/intro/getting-started/install.html). + +Follow along with the guide, or just start the Vagrant box with: + + $ vagrant up + +Once it is finished, you should be able to SSH in and interact with Nomad: + + $ vagrant ssh + ... + $ nomad + usage: nomad [--version] [--help] [] + + Available commands are: + agent Runs a Nomad agent + agent-info Display status information about the local agent + ... + +To learn more about starting Nomad see the [official site](https://nomadproject.io). + diff --git a/demo/vagrant/Vagrantfile b/demo/vagrant/Vagrantfile new file mode 100644 index 000000000..960fdef57 --- /dev/null +++ b/demo/vagrant/Vagrantfile @@ -0,0 +1,43 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +$script = <