mirror of
https://github.com/kemko/nomad.git
synced 2026-01-07 19:05:42 +03:00
Merge branch 'master' into f-port-configs
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -42,3 +42,9 @@ ui/dist/
|
||||
|
||||
website/.bundle
|
||||
website/vendor
|
||||
|
||||
example.nomad
|
||||
nomad_linux_amd64
|
||||
nomad_darwin_amd64
|
||||
TODO.md
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ func TestCompose(t *testing.T) {
|
||||
SetMeta("foo", "bar").
|
||||
Constrain(HardConstraint("kernel.name", "=", "linux")).
|
||||
Require(&Resources{
|
||||
CPU: 1.25,
|
||||
CPU: 1250,
|
||||
MemoryMB: 1024,
|
||||
DiskMB: 2048,
|
||||
IOPS: 1024,
|
||||
@@ -78,7 +78,7 @@ func TestCompose(t *testing.T) {
|
||||
Name: "task1",
|
||||
Driver: "exec",
|
||||
Resources: &Resources{
|
||||
CPU: 1.25,
|
||||
CPU: 1250,
|
||||
MemoryMB: 1024,
|
||||
DiskMB: 2048,
|
||||
IOPS: 1024,
|
||||
|
||||
@@ -3,7 +3,7 @@ package api
|
||||
// Resources encapsulates the required resources of
|
||||
// a given task or task group.
|
||||
type Resources struct {
|
||||
CPU float64
|
||||
CPU int
|
||||
MemoryMB int
|
||||
DiskMB int
|
||||
IOPS int
|
||||
|
||||
@@ -166,7 +166,7 @@ func TestTask_Require(t *testing.T) {
|
||||
|
||||
// Create some require resources
|
||||
resources := &Resources{
|
||||
CPU: 1.25,
|
||||
CPU: 1250,
|
||||
MemoryMB: 128,
|
||||
DiskMB: 2048,
|
||||
IOPS: 1024,
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
|
||||
ctestutil "github.com/hashicorp/nomad/client/testutil"
|
||||
)
|
||||
|
||||
type MockAllocStateUpdater struct {
|
||||
@@ -32,6 +34,7 @@ func testAllocRunner() (*MockAllocStateUpdater, *AllocRunner) {
|
||||
}
|
||||
|
||||
func TestAllocRunner_SimpleRun(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, ar := testAllocRunner()
|
||||
go ar.Run()
|
||||
defer ar.Destroy()
|
||||
@@ -48,6 +51,7 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAllocRunner_Destroy(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, ar := testAllocRunner()
|
||||
|
||||
// Ensure task takes some time
|
||||
@@ -79,6 +83,7 @@ func TestAllocRunner_Destroy(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAllocRunner_Update(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, ar := testAllocRunner()
|
||||
|
||||
// Ensure task takes some time
|
||||
|
||||
@@ -142,6 +142,13 @@ func (c *Client) init() error {
|
||||
return fmt.Errorf("failed creating alloc dir: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the state dir exists if we have one
|
||||
if c.config.StateDir != "" {
|
||||
if err := os.MkdirAll(c.config.StateDir, 0700); err != nil {
|
||||
return fmt.Errorf("failed creating state dir: %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -244,8 +251,8 @@ func (c *Client) Stats() map[string]map[string]string {
|
||||
"client": map[string]string{
|
||||
"known_servers": toString(uint64(len(c.config.Servers))),
|
||||
"num_allocations": toString(uint64(numAllocs)),
|
||||
"last_heartbeat": fmt.Sprintf("%#v", time.Since(c.lastHeartbeat)),
|
||||
"heartbeat_ttl": fmt.Sprintf("%#v", c.heartbeatTTL),
|
||||
"last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)),
|
||||
"heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL),
|
||||
},
|
||||
"runtime": nomad.RuntimeStats(),
|
||||
}
|
||||
@@ -265,7 +272,9 @@ func (c *Client) restoreState() error {
|
||||
|
||||
// Scan the directory
|
||||
list, err := ioutil.ReadDir(filepath.Join(c.config.StateDir, "alloc"))
|
||||
if err != nil {
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
return nil
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("failed to list alloc state: %v", err)
|
||||
}
|
||||
|
||||
@@ -556,6 +565,7 @@ func (c *Client) watchAllocations(allocUpdates chan []*structs.Allocation) {
|
||||
|
||||
for {
|
||||
// Get the allocations, blocking for updates
|
||||
resp = structs.NodeAllocsResponse{}
|
||||
err := c.RPC("Node.GetAllocs", &req, &resp)
|
||||
if err != nil {
|
||||
c.logger.Printf("[ERR] client: failed to query for node allocations: %v", err)
|
||||
|
||||
@@ -15,6 +15,8 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
|
||||
ctestutil "github.com/hashicorp/nomad/client/testutil"
|
||||
)
|
||||
|
||||
var nextPort uint32 = 16000
|
||||
@@ -137,6 +139,7 @@ func TestClient_Fingerprint(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestClient_Drivers(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
c := testClient(t, nil)
|
||||
defer c.Shutdown()
|
||||
|
||||
@@ -246,6 +249,7 @@ func TestClient_UpdateAllocStatus(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestClient_WatchAllocs(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
s1, _ := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
@@ -314,6 +318,7 @@ func TestClient_WatchAllocs(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestClient_SaveRestoreState(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
s1, _ := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
@@ -2,7 +2,9 @@ package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
@@ -30,7 +32,12 @@ func NewExecDriver(ctx *DriverContext) Driver {
|
||||
}
|
||||
|
||||
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// We can always do a fork/exec
|
||||
// Only enable if we are root when running on non-windows systems.
|
||||
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
|
||||
d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
node.Attributes["driver.exec"] = "1"
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -9,7 +9,9 @@ import (
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
@@ -36,6 +38,12 @@ func NewJavaDriver(ctx *DriverContext) Driver {
|
||||
}
|
||||
|
||||
func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Only enable if we are root when running on non-windows systems.
|
||||
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
|
||||
d.logger.Printf("[DEBUG] driver.java: must run as root user, disabling")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Find java version
|
||||
var out bytes.Buffer
|
||||
var erOut bytes.Buffer
|
||||
|
||||
@@ -13,7 +13,9 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
@@ -52,6 +54,12 @@ func NewQemuDriver(ctx *DriverContext) Driver {
|
||||
}
|
||||
|
||||
func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Only enable if we are root when running on non-windows systems.
|
||||
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
|
||||
d.logger.Printf("[DEBUG] driver.qemu: must run as root user, disabling")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
outBytes, err := exec.Command("qemu-system-x86_64", "-version").Output()
|
||||
if err != nil {
|
||||
return false, nil
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"os/user"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/command"
|
||||
@@ -31,16 +30,8 @@ func NewExecutor() Executor {
|
||||
// TODO: In a follow-up PR make it so this only happens once per client.
|
||||
// Fingerprinting shouldn't happen per task.
|
||||
|
||||
// Check if the process is has root capabilities.
|
||||
e.root = syscall.Geteuid() == 0
|
||||
|
||||
// Check if this process can set uid.
|
||||
if e.root {
|
||||
e.setUidEnabled = true
|
||||
}
|
||||
|
||||
// Check that cgroups are available. Must be root to modify it.
|
||||
if _, err := os.Stat(cgroupMount); err == nil && e.root {
|
||||
// Check that cgroups are available.
|
||||
if _, err := os.Stat(cgroupMount); err == nil {
|
||||
e.cgroupEnabled = true
|
||||
}
|
||||
|
||||
@@ -53,8 +44,6 @@ type LinuxExecutor struct {
|
||||
user *user.User
|
||||
|
||||
// Finger print capabilities.
|
||||
root bool
|
||||
setUidEnabled bool
|
||||
cgroupEnabled bool
|
||||
|
||||
// Isolation configurations.
|
||||
@@ -152,11 +141,9 @@ func (e *LinuxExecutor) Start() error {
|
||||
// spawned process. Note that we will only do this if we can call SetUID.
|
||||
// Otherwise we'll just run the other process as our current (non-root)
|
||||
// user. This means we aren't forced to run nomad as root.
|
||||
if e.setUidEnabled {
|
||||
if err := e.runAs("nobody"); err == nil && e.user != nil {
|
||||
e.cmd.SetUID(e.user.Uid)
|
||||
e.cmd.SetGID(e.user.Gid)
|
||||
}
|
||||
if err := e.runAs("nobody"); err == nil && e.user != nil {
|
||||
e.cmd.SetUID(e.user.Uid)
|
||||
e.cmd.SetGID(e.user.Gid)
|
||||
}
|
||||
|
||||
return e.spawnDaemon()
|
||||
|
||||
@@ -61,7 +61,7 @@ func (f *CPUFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bo
|
||||
node.Resources = &structs.Resources{}
|
||||
}
|
||||
|
||||
node.Resources.CPU = tc
|
||||
node.Resources.CPU = int(tc)
|
||||
}
|
||||
|
||||
if modelName != "" {
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
|
||||
ctestutil "github.com/hashicorp/nomad/client/testutil"
|
||||
)
|
||||
|
||||
func testLogger() *log.Logger {
|
||||
@@ -44,6 +46,7 @@ func testTaskRunner() (*MockTaskStateUpdater, *TaskRunner) {
|
||||
}
|
||||
|
||||
func TestTaskRunner_SimpleRun(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, tr := testTaskRunner()
|
||||
go tr.Run()
|
||||
defer tr.Destroy()
|
||||
@@ -79,6 +82,7 @@ func TestTaskRunner_SimpleRun(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTaskRunner_Destroy(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, tr := testTaskRunner()
|
||||
|
||||
// Change command to ensure we run for a bit
|
||||
@@ -113,6 +117,7 @@ func TestTaskRunner_Destroy(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTaskRunner_Update(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
_, tr := testTaskRunner()
|
||||
|
||||
// Change command to ensure we run for a bit
|
||||
|
||||
13
client/testutil/driver_compatible.go
Normal file
13
client/testutil/driver_compatible.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package testutil
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"syscall"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExecCompatible(t *testing.T) {
|
||||
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
|
||||
t.Skip("Must be root on non-windows environments to run test")
|
||||
}
|
||||
}
|
||||
@@ -77,11 +77,12 @@ func (a *Agent) serverConfig() (*nomad.Config, error) {
|
||||
if a.config.NodeName != "" {
|
||||
conf.NodeName = a.config.NodeName
|
||||
}
|
||||
if a.config.Server.Bootstrap {
|
||||
conf.Bootstrap = a.config.Server.Bootstrap
|
||||
}
|
||||
if a.config.Server.BootstrapExpect > 0 {
|
||||
conf.BootstrapExpect = a.config.Server.BootstrapExpect
|
||||
if a.config.Server.BootstrapExpect == 1 {
|
||||
conf.Bootstrap = true
|
||||
} else {
|
||||
conf.BootstrapExpect = a.config.Server.BootstrapExpect
|
||||
}
|
||||
}
|
||||
if a.config.DataDir != "" {
|
||||
conf.DataDir = filepath.Join(a.config.DataDir, "server")
|
||||
|
||||
@@ -162,4 +162,29 @@ func TestAgent_ServerConfig(t *testing.T) {
|
||||
if addr := out.SerfConfig.MemberlistConfig.BindAddr; addr != "127.0.0.3" {
|
||||
t.Fatalf("expect 127.0.0.3, got: %s", addr)
|
||||
}
|
||||
|
||||
// Properly handles the bootstrap flags
|
||||
conf.Server.BootstrapExpect = 1
|
||||
out, err = a.serverConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !out.Bootstrap {
|
||||
t.Fatalf("should have set bootstrap mode")
|
||||
}
|
||||
if out.BootstrapExpect != 0 {
|
||||
t.Fatalf("boostrap expect should be 0")
|
||||
}
|
||||
|
||||
conf.Server.BootstrapExpect = 3
|
||||
out, err = a.serverConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if out.Bootstrap {
|
||||
t.Fatalf("bootstrap mode should be disabled")
|
||||
}
|
||||
if out.BootstrapExpect != 3 {
|
||||
t.Fatalf("should have bootstrap-expect = 3")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,13 +67,16 @@ func (c *Command) readConfig() *Config {
|
||||
flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
|
||||
flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
|
||||
|
||||
// Server-only options
|
||||
flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
|
||||
|
||||
// General options
|
||||
flags.Var((*sliceflag.StringFlag)(&configPath), "config", "config")
|
||||
flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
|
||||
flags.StringVar(&cmdConfig.Region, "region", "", "")
|
||||
flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
|
||||
flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
|
||||
flags.StringVar(&cmdConfig.LogLevel, "log-level", "info", "")
|
||||
flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
|
||||
flags.StringVar(&cmdConfig.NodeName, "node", "", "")
|
||||
|
||||
// Atlas options
|
||||
@@ -121,17 +124,31 @@ func (c *Command) readConfig() *Config {
|
||||
// Merge any CLI options over config file options
|
||||
config = config.Merge(cmdConfig)
|
||||
|
||||
// Check that we have a data-dir if we are a server
|
||||
if !dev && config.DataDir == "" {
|
||||
c.Ui.Error("Must specify data directory")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set the version info
|
||||
config.Revision = c.Revision
|
||||
config.Version = c.Version
|
||||
config.VersionPrerelease = c.VersionPrerelease
|
||||
|
||||
if dev {
|
||||
// Skip validation for dev mode
|
||||
return config
|
||||
}
|
||||
|
||||
// Check that we have a data-dir
|
||||
if config.DataDir == "" {
|
||||
c.Ui.Error("Must specify data directory")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check the bootstrap flags
|
||||
if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
|
||||
c.Ui.Error("Bootstrap requires server mode to be enabled")
|
||||
return nil
|
||||
}
|
||||
if config.Server.BootstrapExpect == 1 {
|
||||
c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
@@ -549,25 +566,32 @@ General Options (clients and servers):
|
||||
Name of the region the Nomad agent will be a member of. By default
|
||||
this value is set to "global".
|
||||
|
||||
Role-Specific Options:
|
||||
|
||||
-client
|
||||
Enable client mode for the agent. Client mode enables a given node
|
||||
to be evaluated for allocations. If client mode is not enabled,
|
||||
no work will be scheduled to the agent.
|
||||
|
||||
-dev
|
||||
Start the agent in development mode. This enables a pre-configured
|
||||
dual-role agent (client + server) which is useful for developing
|
||||
or testing Nomad. No other configuration is required to start the
|
||||
agent in this mode.
|
||||
|
||||
Server Options:
|
||||
|
||||
-server
|
||||
Enable server mode for the agent. Agents in server mode are
|
||||
clustered together and handle the additional responsibility of
|
||||
leader election, data replication, and scheduling work onto
|
||||
eligible client nodes.
|
||||
|
||||
-bootstrap-expect=<num>
|
||||
Configures the expected number of servers nodes to wait for before
|
||||
bootstrapping the cluster. Once <num> servers have joined eachother,
|
||||
Nomad initiates the bootstrap process.
|
||||
|
||||
Client Options:
|
||||
|
||||
-client
|
||||
Enable client mode for the agent. Client mode enables a given node
|
||||
to be evaluated for allocations. If client mode is not enabled,
|
||||
no work will be scheduled to the agent.
|
||||
|
||||
Atlas Options:
|
||||
|
||||
-atlas=<infrastructure>
|
||||
|
||||
63
command/agent/command_test.go
Normal file
63
command/agent/command_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
func TestCommand_Implements(t *testing.T) {
|
||||
var _ cli.Command = &Command{}
|
||||
}
|
||||
|
||||
func TestCommand_Args(t *testing.T) {
|
||||
tmpDir, err := ioutil.TempDir("", "nomad")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
type tcase struct {
|
||||
args []string
|
||||
errOut string
|
||||
}
|
||||
tcases := []tcase{
|
||||
{
|
||||
[]string{},
|
||||
"Must specify data directory",
|
||||
},
|
||||
{
|
||||
[]string{"-data-dir=" + tmpDir, "-bootstrap-expect=1"},
|
||||
"Bootstrap requires server mode to be enabled",
|
||||
},
|
||||
{
|
||||
[]string{"-data-dir=" + tmpDir, "-server", "-bootstrap-expect=1"},
|
||||
"WARNING: Bootstrap mode enabled!",
|
||||
},
|
||||
}
|
||||
for _, tc := range tcases {
|
||||
// Make a new command. We pre-emptively close the shutdownCh
|
||||
// so that the command exits immediately instead of blocking.
|
||||
ui := new(cli.MockUi)
|
||||
shutdownCh := make(chan struct{})
|
||||
close(shutdownCh)
|
||||
cmd := &Command{
|
||||
Ui: ui,
|
||||
ShutdownCh: shutdownCh,
|
||||
}
|
||||
|
||||
if code := cmd.Run(tc.args); code != 1 {
|
||||
t.Fatalf("args: %v\nexit: %d\n", tc.args, code)
|
||||
}
|
||||
|
||||
if expect := tc.errOut; expect != "" {
|
||||
out := ui.ErrorWriter.String()
|
||||
if !strings.Contains(out, expect) {
|
||||
t.Fatalf("expect to find %q\n\n%s", expect, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -146,10 +146,6 @@ type ServerConfig struct {
|
||||
// Enabled controls if we are a server
|
||||
Enabled bool `hcl:"enabled"`
|
||||
|
||||
// Bootstrap is used to bring up the first Consul server, and
|
||||
// permits that node to elect itself leader
|
||||
Bootstrap bool `hcl:"bootstrap"`
|
||||
|
||||
// BootstrapExpect tries to automatically bootstrap the Consul cluster,
|
||||
// by witholding peers until enough servers join.
|
||||
BootstrapExpect int `hcl:"bootstrap_expect"`
|
||||
@@ -350,9 +346,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
|
||||
if b.Enabled {
|
||||
result.Enabled = true
|
||||
}
|
||||
if b.Bootstrap {
|
||||
result.Bootstrap = true
|
||||
}
|
||||
if b.BootstrapExpect > 0 {
|
||||
result.BootstrapExpect = b.BootstrapExpect
|
||||
}
|
||||
|
||||
@@ -39,7 +39,6 @@ func TestConfig_Merge(t *testing.T) {
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: false,
|
||||
Bootstrap: false,
|
||||
BootstrapExpect: 1,
|
||||
DataDir: "/tmp/data1",
|
||||
ProtocolVersion: 1,
|
||||
@@ -91,7 +90,6 @@ func TestConfig_Merge(t *testing.T) {
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: true,
|
||||
Bootstrap: true,
|
||||
BootstrapExpect: 2,
|
||||
DataDir: "/tmp/data2",
|
||||
ProtocolVersion: 2,
|
||||
@@ -341,7 +339,6 @@ func TestConfig_LoadConfigString(t *testing.T) {
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: true,
|
||||
Bootstrap: true,
|
||||
BootstrapExpect: 5,
|
||||
DataDir: "/tmp/data",
|
||||
ProtocolVersion: 3,
|
||||
@@ -409,7 +406,6 @@ client {
|
||||
}
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap = true
|
||||
bootstrap_expect = 5
|
||||
data_dir = "/tmp/data"
|
||||
protocol_version = 3
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -191,20 +190,9 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque
|
||||
}
|
||||
|
||||
// decodeBody is used to decode a JSON request body
|
||||
func decodeBody(req *http.Request, out interface{}, cb func(interface{}) error) error {
|
||||
var raw interface{}
|
||||
func decodeBody(req *http.Request, out interface{}) error {
|
||||
dec := json.NewDecoder(req.Body)
|
||||
if err := dec.Decode(&raw); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Invoke the callback prior to decode
|
||||
if cb != nil {
|
||||
if err := cb(raw); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return mapstructure.Decode(raw, out)
|
||||
return dec.Decode(&out)
|
||||
}
|
||||
|
||||
// setIndex is used to set the index response header
|
||||
|
||||
@@ -157,7 +157,7 @@ func (s *HTTPServer) jobQuery(resp http.ResponseWriter, req *http.Request,
|
||||
func (s *HTTPServer) jobUpdate(resp http.ResponseWriter, req *http.Request,
|
||||
jobName string) (interface{}, error) {
|
||||
var args structs.JobRegisterRequest
|
||||
if err := decodeBody(req, &args, nil); err != nil {
|
||||
if err := decodeBody(req, &args); err != nil {
|
||||
return nil, CodedError(400, err.Error())
|
||||
}
|
||||
if args.Job == nil {
|
||||
|
||||
166
command/init.go
166
command/init.go
@@ -2,8 +2,15 @@ package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultInitName is the default name we use when
|
||||
// initializing the example file
|
||||
DefaultInitName = "example.nomad"
|
||||
)
|
||||
|
||||
// InitCommand generates a new job template that you can customize to your
|
||||
@@ -13,89 +20,106 @@ type InitCommand struct {
|
||||
}
|
||||
|
||||
func (c *InitCommand) Help() string {
|
||||
return initUsage
|
||||
}
|
||||
helpText := `
|
||||
Usage: nomad init
|
||||
|
||||
func (c *InitCommand) Run(args []string) int {
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
c.Ui.Error("Unable to determine pwd; aborting")
|
||||
return 1
|
||||
}
|
||||
Creates an example job file that can be used as a starting
|
||||
point to customize further.
|
||||
|
||||
// Derive the job name from the pwd folder name, which is our best guess at
|
||||
// the project's name
|
||||
jobname := filepath.Base(dir)
|
||||
jobfile := fmt.Sprintf("%s.nomad", jobname)
|
||||
jobpath := filepath.Join(dir, jobfile)
|
||||
if _, err := os.Stat(jobpath); err == nil {
|
||||
c.Ui.Error(fmt.Sprintf("%s file already exists", jobfile))
|
||||
return 1
|
||||
}
|
||||
|
||||
file, err := os.Create(jobfile)
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Unable to create file %s: %s", jobfile, err))
|
||||
return 1
|
||||
}
|
||||
|
||||
_, err = file.WriteString(defaultJob)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to write job template to %s", jobfile))
|
||||
return 1
|
||||
}
|
||||
|
||||
c.Ui.Output(fmt.Sprintf("Initialized nomad job template in %s", jobfile))
|
||||
|
||||
return 0
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
|
||||
func (c *InitCommand) Synopsis() string {
|
||||
return "Create a new job template"
|
||||
return "Create an example job file"
|
||||
}
|
||||
|
||||
const initUsage = ``
|
||||
func (c *InitCommand) Run(args []string) int {
|
||||
// Check if the file already exists
|
||||
_, err := os.Stat(DefaultInitName)
|
||||
if err == nil || !os.IsNotExist(err) {
|
||||
c.Ui.Error(fmt.Sprintf("Job '%s' already exists", DefaultInitName))
|
||||
return 1
|
||||
} else if !os.IsNotExist(err) {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to stat '%s': %v", DefaultInitName, err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Write out the example
|
||||
err = ioutil.WriteFile(DefaultInitName, []byte(defaultJob), 0660)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to write '%s': %v", DefaultInitName, err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Success
|
||||
c.Ui.Output(fmt.Sprintf("Example job file written to %s", DefaultInitName))
|
||||
return 0
|
||||
}
|
||||
|
||||
const defaultJob = `
|
||||
job "my-app" {
|
||||
region = "global"
|
||||
type = "service"
|
||||
priority = 50
|
||||
# There can only be a single job definition per file.
|
||||
# Create a job with ID and Name 'example'
|
||||
job "example" {
|
||||
# Run the job in the global region, which is the default.
|
||||
# region = "global"
|
||||
|
||||
// Each task in the group will be scheduled on the same machine(s).
|
||||
group "app-group" {
|
||||
// How many copies of this group should we run?
|
||||
count = 5
|
||||
# Specify the datacenters within the region this job can run in.
|
||||
datacenters = ["dc1"]
|
||||
|
||||
task "python-webapp" {
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "org/container"
|
||||
}
|
||||
resources {
|
||||
// For CPU 1024 = 1ghz
|
||||
cpu = 500
|
||||
// Memory in megabytes
|
||||
memory = 128
|
||||
# Service type jobs optimize for long-lived services. This is
|
||||
# the default but we can change to batch for short-lived tasks.
|
||||
# type = "service"
|
||||
|
||||
network {
|
||||
dynamic_ports = [
|
||||
"http",
|
||||
"https",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
# Priority controls our access to resources and scheduling priority.
|
||||
# This can be 1 to 100, inclusively, and defaults to 50.
|
||||
# priority = 50
|
||||
|
||||
task "logshipper" {
|
||||
driver = "exec"
|
||||
}
|
||||
# Restrict our job to only linux. We can specify multiple
|
||||
# constraints as needed.
|
||||
constraint {
|
||||
attribute = "$attr.kernel.name"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
constraint {
|
||||
attribute = "kernel.os"
|
||||
value = "linux"
|
||||
}
|
||||
}
|
||||
# Configure the job to do rolling updates
|
||||
update {
|
||||
# Stagger updates every 10 seconds
|
||||
stagger = "10s"
|
||||
|
||||
# Update a single task at a time
|
||||
max_parallel = 1
|
||||
}
|
||||
|
||||
# Create a 'cache' group. Each task in the group will be
|
||||
# scheduled onto the same machine.
|
||||
group "cache" {
|
||||
# Control the number of instances of this groups.
|
||||
# Defaults to 1
|
||||
# count = 1
|
||||
|
||||
# Define a task to run
|
||||
task "redis" {
|
||||
# Use Docker to run the task.
|
||||
driver = "docker"
|
||||
|
||||
# Configure Docker driver with the image
|
||||
config {
|
||||
image = "redis:latest"
|
||||
}
|
||||
|
||||
# We must specify the resources required for
|
||||
# this task to ensure it runs on a machine with
|
||||
# enough capacity.
|
||||
resources {
|
||||
cpu = 500 # 500 Mhz
|
||||
memory = 256 # 256MB
|
||||
network {
|
||||
mbits = 10
|
||||
dynamic_ports = ["redis"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
|
||||
@@ -130,7 +130,6 @@ func (c *NodeStatusCommand) Run(args []string) int {
|
||||
alloc.ID,
|
||||
alloc.EvalID,
|
||||
alloc.JobID,
|
||||
alloc.NodeID,
|
||||
alloc.TaskGroup,
|
||||
alloc.DesiredStatus,
|
||||
alloc.ClientStatus)
|
||||
|
||||
@@ -19,8 +19,9 @@ func (c *RunCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: nomad run [options] <file>
|
||||
|
||||
Starts running a new job using the definition located at <file>.
|
||||
This is the main command used to invoke new work in Nomad.
|
||||
Starts running a new job or updates an existing job using
|
||||
the specification located at <file>. This is the main command
|
||||
used to interact with Nomad.
|
||||
|
||||
Upon successful job submission, this command will immediately
|
||||
enter an interactive monitor. This is useful to watch Nomad's
|
||||
@@ -50,7 +51,7 @@ Run Options:
|
||||
}
|
||||
|
||||
func (c *RunCommand) Synopsis() string {
|
||||
return "Run a new job"
|
||||
return "Run a new job or update an existing job"
|
||||
}
|
||||
|
||||
func (c *RunCommand) Run(args []string) int {
|
||||
|
||||
24
demo/vagrant/README.md
Normal file
24
demo/vagrant/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Vagrant Nomad Demo
|
||||
|
||||
This Vagrantfile and associated Nomad configuration files are meant
|
||||
to be used along with the
|
||||
[getting started guide](https://nomadproject.io/intro/getting-started/install.html).
|
||||
|
||||
Follow along with the guide, or just start the Vagrant box with:
|
||||
|
||||
$ vagrant up
|
||||
|
||||
Once it is finished, you should be able to SSH in and interact with Nomad:
|
||||
|
||||
$ vagrant ssh
|
||||
...
|
||||
$ nomad
|
||||
usage: nomad [--version] [--help] <command> [<args>]
|
||||
|
||||
Available commands are:
|
||||
agent Runs a Nomad agent
|
||||
agent-info Display status information about the local agent
|
||||
...
|
||||
|
||||
To learn more about starting Nomad see the [official site](https://nomadproject.io).
|
||||
|
||||
43
demo/vagrant/Vagrantfile
vendored
Normal file
43
demo/vagrant/Vagrantfile
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
|
||||
$script = <<SCRIPT
|
||||
# Update apt and get dependencies
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y unzip curl wget
|
||||
|
||||
# Install Docker
|
||||
sudo curl -sSL https://get.docker.com/ | sh
|
||||
|
||||
# Download Nomad
|
||||
echo Fetching Nomad...
|
||||
cd /tmp/
|
||||
wget https://s3.amazonaws.com/hc-public/nomad/0.1.0dev/nomad_linux_amd64 -O nomad
|
||||
|
||||
echo Installing Nomad...
|
||||
#unzip nomad.zip
|
||||
sudo chmod +x nomad
|
||||
sudo mv nomad /usr/bin/nomad
|
||||
|
||||
sudo mkdir /etc/nomad.d
|
||||
sudo chmod a+w /etc/nomad.d
|
||||
|
||||
SCRIPT
|
||||
|
||||
Vagrant.configure(2) do |config|
|
||||
config.vm.box = "puphpet/ubuntu1404-x64"
|
||||
config.vm.hostname = "nomad"
|
||||
config.vm.provision "shell", inline: $script, privileged: false
|
||||
|
||||
# Increase memory for Virtualbox
|
||||
config.vm.provider "virtualbox" do |vb|
|
||||
vb.memory = "1024"
|
||||
end
|
||||
|
||||
# Increase memory for VMware
|
||||
["vmware_fusion", "vmware_workstation"].each do |p|
|
||||
config.vm.provider p do |v|
|
||||
v.vmx["memsize"] = "1024"
|
||||
end
|
||||
end
|
||||
end
|
||||
20
demo/vagrant/client1.hcl
Normal file
20
demo/vagrant/client1.hcl
Normal file
@@ -0,0 +1,20 @@
|
||||
# Increase log verbosity
|
||||
log_level = "DEBUG"
|
||||
|
||||
# Setup data dir
|
||||
data_dir = "/tmp/client1"
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
# For demo assume we are talking to server1. For production,
|
||||
# this should be like "nomad.service.consul:4647" and a system
|
||||
# like Consul used for service discovery.
|
||||
servers = ["127.0.0.1:4647"]
|
||||
}
|
||||
|
||||
# Modify our port to avoid a collision with server1
|
||||
ports {
|
||||
http = 5656
|
||||
}
|
||||
25
demo/vagrant/client2.hcl
Normal file
25
demo/vagrant/client2.hcl
Normal file
@@ -0,0 +1,25 @@
|
||||
# Increase log verbosity
|
||||
log_level = "DEBUG"
|
||||
|
||||
# Setup data dir
|
||||
data_dir = "/tmp/client2"
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
# For demo assume we are talking to server1. For production,
|
||||
# this should be like "nomad.service.consul:4647" and a system
|
||||
# like Consul used for service discovery.
|
||||
servers = ["127.0.0.1:4647"]
|
||||
|
||||
# Set ourselves as thing one
|
||||
meta {
|
||||
thing = "two"
|
||||
}
|
||||
}
|
||||
|
||||
# Modify our port to avoid a collision with server1 and client1
|
||||
ports {
|
||||
http = 5657
|
||||
}
|
||||
13
demo/vagrant/server.hcl
Normal file
13
demo/vagrant/server.hcl
Normal file
@@ -0,0 +1,13 @@
|
||||
# Increase log verbosity
|
||||
log_level = "DEBUG"
|
||||
|
||||
# Setup data dir
|
||||
data_dir = "/tmp/server1"
|
||||
|
||||
# Enable the server
|
||||
server {
|
||||
enabled = true
|
||||
|
||||
# Self-elect, should be 3 or 5 for production
|
||||
bootstrap_expect = 1
|
||||
}
|
||||
@@ -14,7 +14,7 @@ func Node() *structs.Node {
|
||||
"driver.exec": "1",
|
||||
},
|
||||
Resources: &structs.Resources{
|
||||
CPU: 4.0,
|
||||
CPU: 4000,
|
||||
MemoryMB: 8192,
|
||||
DiskMB: 100 * 1024,
|
||||
IOPS: 150,
|
||||
@@ -27,7 +27,7 @@ func Node() *structs.Node {
|
||||
},
|
||||
},
|
||||
Reserved: &structs.Resources{
|
||||
CPU: 0.1,
|
||||
CPU: 100,
|
||||
MemoryMB: 256,
|
||||
DiskMB: 4 * 1024,
|
||||
Networks: []*structs.NetworkResource{
|
||||
@@ -81,7 +81,7 @@ func Job() *structs.Job {
|
||||
"args": "+%s",
|
||||
},
|
||||
Resources: &structs.Resources{
|
||||
CPU: 0.5,
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
Networks: []*structs.NetworkResource{
|
||||
&structs.NetworkResource{
|
||||
@@ -127,7 +127,7 @@ func Alloc() *structs.Allocation {
|
||||
NodeID: "foo",
|
||||
TaskGroup: "web",
|
||||
Resources: &structs.Resources{
|
||||
CPU: 0.5,
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
Networks: []*structs.NetworkResource{
|
||||
&structs.NetworkResource{
|
||||
@@ -141,7 +141,7 @@ func Alloc() *structs.Allocation {
|
||||
},
|
||||
TaskResources: map[string]*structs.Resources{
|
||||
"web": &structs.Resources{
|
||||
CPU: 0.5,
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
Networks: []*structs.NetworkResource{
|
||||
&structs.NetworkResource{
|
||||
|
||||
@@ -91,9 +91,9 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex) (bool, st
|
||||
// This is equivalent to their BestFit v3
|
||||
func ScoreFit(node *Node, util *Resources) float64 {
|
||||
// Determine the node availability
|
||||
nodeCpu := node.Resources.CPU
|
||||
nodeCpu := float64(node.Resources.CPU)
|
||||
if node.Reserved != nil {
|
||||
nodeCpu -= node.Reserved.CPU
|
||||
nodeCpu -= float64(node.Reserved.CPU)
|
||||
}
|
||||
nodeMem := float64(node.Resources.MemoryMB)
|
||||
if node.Reserved != nil {
|
||||
@@ -101,7 +101,7 @@ func ScoreFit(node *Node, util *Resources) float64 {
|
||||
}
|
||||
|
||||
// Compute the free percentage
|
||||
freePctCpu := 1 - (util.CPU / nodeCpu)
|
||||
freePctCpu := 1 - (float64(util.CPU) / nodeCpu)
|
||||
freePctRam := 1 - (float64(util.MemoryMB) / nodeMem)
|
||||
|
||||
// Total will be "maximized" the smaller the value is.
|
||||
|
||||
@@ -89,7 +89,7 @@ func TestAllocsFit_PortsOvercommitted(t *testing.T) {
|
||||
func TestAllocsFit(t *testing.T) {
|
||||
n := &Node{
|
||||
Resources: &Resources{
|
||||
CPU: 2.0,
|
||||
CPU: 2000,
|
||||
MemoryMB: 2048,
|
||||
DiskMB: 10000,
|
||||
IOPS: 100,
|
||||
@@ -102,7 +102,7 @@ func TestAllocsFit(t *testing.T) {
|
||||
},
|
||||
},
|
||||
Reserved: &Resources{
|
||||
CPU: 1.0,
|
||||
CPU: 1000,
|
||||
MemoryMB: 1024,
|
||||
DiskMB: 5000,
|
||||
IOPS: 50,
|
||||
@@ -119,7 +119,7 @@ func TestAllocsFit(t *testing.T) {
|
||||
|
||||
a1 := &Allocation{
|
||||
Resources: &Resources{
|
||||
CPU: 1.0,
|
||||
CPU: 1000,
|
||||
MemoryMB: 1024,
|
||||
DiskMB: 5000,
|
||||
IOPS: 50,
|
||||
@@ -144,7 +144,7 @@ func TestAllocsFit(t *testing.T) {
|
||||
}
|
||||
|
||||
// Sanity check the used resources
|
||||
if used.CPU != 2.0 {
|
||||
if used.CPU != 2000 {
|
||||
t.Fatalf("bad: %#v", used)
|
||||
}
|
||||
if used.MemoryMB != 2048 {
|
||||
@@ -161,7 +161,7 @@ func TestAllocsFit(t *testing.T) {
|
||||
}
|
||||
|
||||
// Sanity check the used resources
|
||||
if used.CPU != 3.0 {
|
||||
if used.CPU != 3000 {
|
||||
t.Fatalf("bad: %#v", used)
|
||||
}
|
||||
if used.MemoryMB != 3072 {
|
||||
|
||||
@@ -136,6 +136,7 @@ func (idx *NetworkIndex) yieldIP(cb func(net *NetworkResource, ip net.IP) bool)
|
||||
// AssignNetwork is used to assign network resources given an ask.
|
||||
// If the ask cannot be satisfied, returns nil
|
||||
func (idx *NetworkIndex) AssignNetwork(ask *NetworkResource) (out *NetworkResource, err error) {
|
||||
err = fmt.Errorf("no networks available")
|
||||
idx.yieldIP(func(n *NetworkResource, ip net.IP) (stop bool) {
|
||||
// Convert the IP to a string
|
||||
ipStr := ip.String()
|
||||
|
||||
@@ -534,7 +534,7 @@ type NodeListStub struct {
|
||||
// Resources is used to define the resources available
|
||||
// on a client
|
||||
type Resources struct {
|
||||
CPU float64
|
||||
CPU int
|
||||
MemoryMB int `mapstructure:"memory"`
|
||||
DiskMB int `mapstructure:"disk"`
|
||||
IOPS int
|
||||
|
||||
@@ -146,13 +146,13 @@ func TestResource_NetIndex(t *testing.T) {
|
||||
|
||||
func TestResource_Superset(t *testing.T) {
|
||||
r1 := &Resources{
|
||||
CPU: 2.0,
|
||||
CPU: 2000,
|
||||
MemoryMB: 2048,
|
||||
DiskMB: 10000,
|
||||
IOPS: 100,
|
||||
}
|
||||
r2 := &Resources{
|
||||
CPU: 1.0,
|
||||
CPU: 2000,
|
||||
MemoryMB: 1024,
|
||||
DiskMB: 5000,
|
||||
IOPS: 50,
|
||||
@@ -174,7 +174,7 @@ func TestResource_Superset(t *testing.T) {
|
||||
|
||||
func TestResource_Add(t *testing.T) {
|
||||
r1 := &Resources{
|
||||
CPU: 2.0,
|
||||
CPU: 2000,
|
||||
MemoryMB: 2048,
|
||||
DiskMB: 10000,
|
||||
IOPS: 100,
|
||||
@@ -187,7 +187,7 @@ func TestResource_Add(t *testing.T) {
|
||||
},
|
||||
}
|
||||
r2 := &Resources{
|
||||
CPU: 1.0,
|
||||
CPU: 2000,
|
||||
MemoryMB: 1024,
|
||||
DiskMB: 5000,
|
||||
IOPS: 50,
|
||||
@@ -206,7 +206,7 @@ func TestResource_Add(t *testing.T) {
|
||||
}
|
||||
|
||||
expect := &Resources{
|
||||
CPU: 3.0,
|
||||
CPU: 3000,
|
||||
MemoryMB: 3072,
|
||||
DiskMB: 15000,
|
||||
IOPS: 150,
|
||||
|
||||
@@ -96,19 +96,19 @@ func (s *GenericScheduler) setStatus(status, desc string) error {
|
||||
|
||||
// Process is used to handle a single evaluation
|
||||
func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
||||
// Store the evaluation
|
||||
s.eval = eval
|
||||
|
||||
// Verify the evaluation trigger reason is understood
|
||||
switch eval.TriggeredBy {
|
||||
case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,
|
||||
structs.EvalTriggerJobDeregister:
|
||||
structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate:
|
||||
default:
|
||||
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
|
||||
eval.TriggeredBy)
|
||||
return s.setStatus(structs.EvalStatusFailed, desc)
|
||||
}
|
||||
|
||||
// Store the evaluation
|
||||
s.eval = eval
|
||||
|
||||
// Retry up to the maxScheduleAttempts
|
||||
limit := maxServiceScheduleAttempts
|
||||
if s.batch {
|
||||
|
||||
@@ -289,8 +289,8 @@ func (iter *JobAntiAffinityIterator) Next() *RankedNode {
|
||||
|
||||
// Apply a penalty if there are collisions
|
||||
if collisions > 0 {
|
||||
scorePenalty := float64(collisions) * iter.penalty
|
||||
option.Score -= scorePenalty
|
||||
scorePenalty := -1 * float64(collisions) * iter.penalty
|
||||
option.Score += scorePenalty
|
||||
iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty)
|
||||
}
|
||||
return option
|
||||
|
||||
@@ -88,7 +88,7 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
|
||||
s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")
|
||||
|
||||
// Apply a limit function. This is to avoid scanning *every* possible node.
|
||||
s.limit = NewLimitIterator(ctx, s.binPack, 2)
|
||||
s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)
|
||||
|
||||
// Select the node with the maximum score for placement
|
||||
s.maxScore = NewMaxScoreIterator(ctx, s.limit)
|
||||
|
||||
@@ -67,7 +67,7 @@ func TestServiceStack_Select_Size(t *testing.T) {
|
||||
t.Fatalf("missing size")
|
||||
}
|
||||
|
||||
if size.CPU != 0.5 || size.MemoryMB != 256 {
|
||||
if size.CPU != 500 || size.MemoryMB != 256 {
|
||||
t.Fatalf("bad: %#v", size)
|
||||
}
|
||||
|
||||
|
||||
3
website/Vagrantfile
vendored
3
website/Vagrantfile
vendored
@@ -33,8 +33,9 @@ sudo apt-get install -y nodejs
|
||||
SCRIPT
|
||||
|
||||
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
|
||||
config.vm.box = "chef/ubuntu-12.04"
|
||||
config.vm.box = "bento/ubuntu-12.04"
|
||||
config.vm.network "private_network", ip: "33.33.30.10"
|
||||
config.vm.network "forwarded_port", guest: 4567, host: 8080
|
||||
config.vm.provision "shell", inline: $script, privileged: false
|
||||
config.vm.synced_folder ".", "/vagrant", type: "rsync"
|
||||
end
|
||||
|
||||
@@ -152,17 +152,11 @@ configured on client nodes.
|
||||
* `enabled`: A boolean indicating if server mode should be enabled for the
|
||||
local agent. All other server options depend on this value being set.
|
||||
Defaults to `false`.
|
||||
* `bootstrap`: A boolean indicating if the server should be started in
|
||||
bootstrap mode. Bootstrap mode is a special case mode used for easily
|
||||
starting a single-server Nomad server cluster. This mode of operation does
|
||||
not provide any fault tolerance and is not recommended for production
|
||||
environments. Defaults to `false`.
|
||||
* `bootstrap_expect`: This is an integer representing the number of server
|
||||
nodes to wait for before bootstrapping. This is a safer alternative to
|
||||
bootstrap mode, as there will never be a single point-of-failure. It is most
|
||||
common to use the odd-numbered integers `3` or `5` for this value, depending
|
||||
on the cluster size. A value of `1` is functionally equivalent to bootstrap
|
||||
mode and is not recommended.
|
||||
nodes to wait for before bootstrapping. It is most common to use the
|
||||
odd-numbered integers `3` or `5` for this value, depending on the cluster
|
||||
size. A value of `1` does not provide any fault tolerance and is not
|
||||
recommended for production use cases.
|
||||
* `data_dir`: This is the data directory used for server-specific data,
|
||||
including the replicated log. By default, this directory lives inside of the
|
||||
[data_dir](#data_dir) in the "server" sub-path.
|
||||
@@ -236,6 +230,8 @@ A subset of the available Nomad agent configuration can optionally be passed in
|
||||
via CLI arguments. The `agent` command accepts the following arguments:
|
||||
|
||||
* `-bind=<address>`: Equivalent to the [bind_addr](#bind_addr) config option.
|
||||
* `-bootstrap-expect=<num>`: Equivalent to the
|
||||
[bootstrap_expect](#bootstrap_expect) config option.
|
||||
* `-config=<path>`: Specifies the path to a configuration file or a directory of
|
||||
configuration files to load. Can be specified multiple times.
|
||||
* `-data-dir=<path>`: Equivalent to the [data_dir](#data_dir) config option.
|
||||
|
||||
@@ -3,20 +3,23 @@ layout: "docs"
|
||||
page_title: "Commands: init"
|
||||
sidebar_current: "docs-commands-init"
|
||||
description: >
|
||||
Toggle drain mode for a given node.
|
||||
Generate a skeleton jobspec template.
|
||||
---
|
||||
|
||||
# Command: init
|
||||
|
||||
The `init` command creates a [jobspec](/docs/jobspec/) template in the current
|
||||
The `init` command creates an example [job specification](/docs/jobspec/) in the current
|
||||
directory that demonstrates some common configurations for tasks, tasks groups,
|
||||
runtime constraints, and resource allocation.
|
||||
|
||||
Please refer to the [jobspec](/docs/jobspec/) and [drivers](/docs/drivers/)
|
||||
pages to learn how to customize the template.
|
||||
|
||||
## Usage
|
||||
## Examples
|
||||
|
||||
Generate an example job file:
|
||||
|
||||
```
|
||||
nomad init
|
||||
$ nomad init
|
||||
Example job file written to example.nomad
|
||||
```
|
||||
|
||||
@@ -8,10 +8,9 @@ description: >
|
||||
|
||||
# Command: run
|
||||
|
||||
The `run` command is used to run new jobs in Nomad. Jobs are specified using
|
||||
[HCL](https://github.com/hashicorp/hcl)-encoded files, and may specify one or
|
||||
more task groups. More information about jobs and their configuration format
|
||||
can be found in the [jobs documentation](#).
|
||||
The `run` command is used to submit new jobs to Nomad or to update existing
|
||||
jobs. Job files must conform to the [job specification](/docs/jobspec/index.html)
|
||||
format.
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -20,8 +19,8 @@ nomad run [options] <file>
|
||||
```
|
||||
|
||||
The run command requires a single argument, specifying the path to a file
|
||||
containing a valid [job definition](#). This file will be read and the job
|
||||
will be submitted to the Nomad server for scheduling.
|
||||
containing a valid [job specification](/docs/jobspec/index.html). This file
|
||||
will be read and the job will be submitted to Nomad for scheduling.
|
||||
|
||||
By default, on sucessful job submission, the run command will enter an
|
||||
interactive monitor and display log information detailing the scheduling
|
||||
|
||||
@@ -25,15 +25,15 @@ The `exec` driver supports the following configuration in the job spec:
|
||||
|
||||
## Client Requirements
|
||||
|
||||
The `exec` driver has no special requirements and can run on all
|
||||
supported operating systems. The resource isolation primitives vary
|
||||
by OS.
|
||||
The `exec` driver can run on all supported operating systems but to provide
|
||||
proper isolation the client must be run as root on non-Windows operating systems.
|
||||
Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
|
||||
|
||||
## Client Attributes
|
||||
|
||||
The `exec` driver will set the following client attributes:
|
||||
|
||||
* `driver.exec` - This will always be set to "1", indicating the
|
||||
* `driver.exec` - This will be set to "1", indicating the
|
||||
driver is available.
|
||||
|
||||
## Resource Isolation
|
||||
@@ -41,10 +41,8 @@ The `exec` driver will set the following client attributes:
|
||||
The resource isolation provided varies by the operating system of
|
||||
the client and the configuration.
|
||||
|
||||
On Linux, Nomad will attempt to use cgroups, namespaces, and chroot
|
||||
to isolate the resources of a process. If the Nomad agent is not
|
||||
running as root many of these mechanisms cannot be used.
|
||||
|
||||
As a baseline, the task driver will just execute the command
|
||||
with no additional resource isolation if none are available.
|
||||
On Linux, Nomad will use cgroups, namespaces, and chroot to isolate the
|
||||
resources of a process and as such the Nomad agent must be run as root.
|
||||
|
||||
On Windows, the task driver will just execute the command with no additional
|
||||
resource isolation.
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
---
|
||||
layout: "intro"
|
||||
page_title: "Using the HTTP APIs with Authentication"
|
||||
sidebar_current: "getting-started-apis"
|
||||
description: |-
|
||||
Using the HTTP APIs for authentication and secret access.
|
||||
---
|
||||
|
||||
# Using the HTTP APIs with Authentication
|
||||
Many of Nomad's capabilities are accessible via the HTTP API in addition to the
|
||||
CLI.
|
||||
|
||||
TODO: Document Nomad's HTTP API
|
||||
|
||||
Congratulations! You now know all the basics to get started with Nomad.
|
||||
|
||||
## Next
|
||||
|
||||
Next, we have a page dedicated to
|
||||
[next steps](/intro/getting-started/next-steps.html) depending on
|
||||
what you would like to achieve.
|
||||
203
website/source/intro/getting-started/cluster.html.md
Normal file
203
website/source/intro/getting-started/cluster.html.md
Normal file
@@ -0,0 +1,203 @@
|
||||
---
|
||||
layout: "intro"
|
||||
page_title: "Clustering"
|
||||
sidebar_current: "getting-started-cluster"
|
||||
description: |-
|
||||
Join another Nomad client to create your first cluster.
|
||||
---
|
||||
|
||||
# Clustering
|
||||
|
||||
We have started our first agent and run a job against it in development mode.
|
||||
This demonstrates the ease of use and the workflow of Nomad, but did not show how
|
||||
this could be extended to a scalable, production-grade configuration. In this step,
|
||||
we will create our first real cluster with multiple nodes.
|
||||
|
||||
## Starting the Server
|
||||
|
||||
The first step is to create the config file for the server. Either download
|
||||
the file from the [repository here](#), or paste this into a file called
|
||||
`server.hcl`:
|
||||
|
||||
```
|
||||
# Increase log verbosity
|
||||
log_level = "DEBUG"
|
||||
|
||||
# Setup data dir
|
||||
data_dir = "/tmp/server1"
|
||||
|
||||
# Enable the server
|
||||
server {
|
||||
enabled = true
|
||||
|
||||
# Self-elect, should be 3 or 5 for production
|
||||
bootstrap_expect = 1
|
||||
}
|
||||
```
|
||||
|
||||
This is a fairly minimal server configuration file, but it
|
||||
is enough to start an agent in server only mode and have it
|
||||
elect as a leader. The major change that should be made for
|
||||
production is to run more than one server, and to change the
|
||||
corresponding `bootstrap_expect` value.
|
||||
|
||||
Once the file is created, start the agent in a new tab:
|
||||
|
||||
```
|
||||
$ sudo nomad agent -config server.hcl
|
||||
==> WARNING: Bootstrap mode enabled! Potentially unsafe operation.
|
||||
==> Starting Nomad agent...
|
||||
==> Nomad agent configuration:
|
||||
|
||||
Atlas: <disabled>
|
||||
Client: false
|
||||
Log Level: DEBUG
|
||||
Region: global (DC: dc1)
|
||||
Server: true
|
||||
|
||||
==> Nomad agent started! Log data will stream in below:
|
||||
|
||||
[INFO] serf: EventMemberJoin: nomad.global 127.0.0.1
|
||||
[INFO] nomad: starting 4 scheduling worker(s) for [service batch _core]
|
||||
[INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state
|
||||
[WARN] serf: Failed to re-join any previously known node
|
||||
[INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1)
|
||||
[WARN] raft: Heartbeat timeout reached, starting election
|
||||
[INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state
|
||||
[DEBUG] raft: Votes needed: 1
|
||||
[DEBUG] raft: Vote granted. Tally: 1
|
||||
[INFO] raft: Election won. Tally: 1
|
||||
[INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state
|
||||
[INFO] nomad: cluster leadership acquired
|
||||
```
|
||||
|
||||
We can see above that client mode is disabled, and that we are
|
||||
only running as the server. This means that this server will manage
|
||||
state and make scheduling decisions but will not run any tasks.
|
||||
Now we need some agents to run tasks!
|
||||
|
||||
## Starting the Clients
|
||||
|
||||
Similar to the server, we must first configure the clients. Either download
|
||||
the configuration for client1 and client2 from the [repository here](#), or
|
||||
paste the following into `client1.hcl`:
|
||||
|
||||
```
|
||||
# Increase log verbosity
|
||||
log_level = "DEBUG"
|
||||
|
||||
# Setup data dir
|
||||
data_dir = "/tmp/client1"
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
# For demo assume we are talking to server1. For production,
|
||||
# this should be like "nomad.service.consul:4647" and a system
|
||||
# like Consul used for service discovery.
|
||||
servers = ["127.0.0.1:4647"]
|
||||
}
|
||||
|
||||
# Modify our port to avoid a collision with server1
|
||||
ports {
|
||||
http = 5656
|
||||
}
|
||||
```
|
||||
|
||||
Copy that file to `client2.hcl` and change the `data_dir` to
|
||||
be "/tmp/client2" and the `http` port to 5657. Once you've created
|
||||
both `client1.hcl` and `client2.hcl`, open a tab for each and
|
||||
start the agents:
|
||||
|
||||
```
|
||||
$ sudo nomad agent -config client1.hcl
|
||||
==> Starting Nomad agent...
|
||||
==> Nomad agent configuration:
|
||||
|
||||
Atlas: <disabled>
|
||||
Client: true
|
||||
Log Level: DEBUG
|
||||
Region: global (DC: dc1)
|
||||
Server: false
|
||||
|
||||
==> Nomad agent started! Log data will stream in below:
|
||||
|
||||
[DEBUG] client: applied fingerprints [host memory storage arch cpu]
|
||||
[DEBUG] client: available drivers [docker exec]
|
||||
[DEBUG] client: node registration complete
|
||||
...
|
||||
```
|
||||
|
||||
In the output we can see the agent is running in client mode only.
|
||||
This agent will be available to run tasks but will not participate
|
||||
in managing the cluster or making scheduling decisions.
|
||||
|
||||
Using the [`node-status` command](/docs/commands/node-status.html)
|
||||
we should see both nodes in the `ready` state:
|
||||
|
||||
```
|
||||
$ nomad node-status
|
||||
ID DC Name Class Drain Status
|
||||
e5239796-7285-3ed2-efe1-37cdc2d459d4 dc1 nomad <none> false ready
|
||||
d12e4ab0-4206-bd33-ff75-e1367590eceb dc1 nomad <none> false ready
|
||||
```
|
||||
|
||||
We now have a simple three node cluster running. The only difference
|
||||
between a demo and full production cluster is that we are running a
|
||||
single server instead of three or five.
|
||||
|
||||
## Submit a Job
|
||||
|
||||
Now that we have a simple cluster, we can use it to schedule a job.
|
||||
We should still have the `example.nomad` job file from before, but
|
||||
verify that the `count` is still set to 3.
|
||||
|
||||
Then, use the [`run` command](/docs/commands/run.html) to submit the job:
|
||||
|
||||
```
|
||||
$ nomad run example.nomad
|
||||
==> Monitoring evaluation "2d742049-497f-c602-c56d-ae2a328a5671"
|
||||
Evaluation triggered by job "example"
|
||||
Allocation "44d46439-655d-701e-55ce-552ee74fbbd8" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache"
|
||||
Allocation "624be24f-5992-0c75-742d-7f8dbd3044a2" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache"
|
||||
Allocation "a133a2c7-cc3c-2f8c-8664-71d2389c7759" created: node "d12e4ab0-4206-bd33-ff75-e1367590eceb", group "cache"
|
||||
Evaluation status changed: "pending" -> "complete"
|
||||
==> Evaluation "2d742049-497f-c602-c56d-ae2a328a5671" finished with status "complete"
|
||||
```
|
||||
|
||||
We can see in the output that the scheduler assigned two of the
|
||||
tasks for one of the client nodes and the remaining task to the
|
||||
second client.
|
||||
|
||||
We can again use the [`status` command](/docs/commands/status.html) to verify:
|
||||
|
||||
```
|
||||
$ nomad status example
|
||||
ID = example
|
||||
Name = example
|
||||
Type = service
|
||||
Priority = 50
|
||||
Datacenters = dc1
|
||||
Status =
|
||||
|
||||
==> Evaluations
|
||||
ID Priority TriggeredBy Status
|
||||
2d742049-497f-c602-c56d-ae2a328a5671 50 job-register complete
|
||||
|
||||
==> Allocations
|
||||
ID EvalID NodeID TaskGroup Desired Status
|
||||
44d46439-655d-701e-55ce-552ee74fbbd8 2d742049-497f-c602-c56d-ae2a328a5671 e5239796-7285-3ed2-efe1-37cdc2d459d4 cache run running
|
||||
a133a2c7-cc3c-2f8c-8664-71d2389c7759 2d742049-497f-c602-c56d-ae2a328a5671 d12e4ab0-4206-bd33-ff75-e1367590eceb cache run running
|
||||
624be24f-5992-0c75-742d-7f8dbd3044a2 2d742049-497f-c602-c56d-ae2a328a5671 e5239796-7285-3ed2-efe1-37cdc2d459d4 cache run running
|
||||
```
|
||||
|
||||
We can see that all our tasks have been allocated and are running.
|
||||
Once we are satisfied that our job is happily running, we can tear
|
||||
it down with `nomad stop`.
|
||||
|
||||
## Next Steps
|
||||
|
||||
We've now concluded the getting started guide, however there are a number
|
||||
of [next steps](next-steps.html) to get started with Nomad.
|
||||
|
||||
@@ -8,52 +8,63 @@ description: |-
|
||||
|
||||
# Install Nomad
|
||||
|
||||
Nomad must first be installed on your machine. Nomad is distributed as
|
||||
a [binary package](/downloads.html) for all supported platforms and
|
||||
architectures. This page will not cover how to compile Nomad from source,
|
||||
but compiling from source is covered in the [documentation](/docs/install/index.html)
|
||||
for those who want to be sure they're compiling source they trust into
|
||||
the final binary.
|
||||
The task drivers that are available to Nomad vary by operating system,
|
||||
for example Docker is only available on Linux machines. To simplify the
|
||||
getting started experience, we will be working in a Vagrant environment.
|
||||
Create a new directory, and download [this `Vagrantfile`](#).
|
||||
|
||||
## Installing Nomad
|
||||
## Vagrant Setup
|
||||
|
||||
To install Nomad, find the [appropriate package](/downloads.html) for
|
||||
your system and download it. Nomad is packaged as a zip archive.
|
||||
Once you have created a new directory and downloaded the `Vagrantfile`
|
||||
you must create the virtual the machine:
|
||||
|
||||
After downloading Nomad, unzip the package. Nomad runs as a single binary
|
||||
named `nomad`. Any other files in the package can be safely removed and
|
||||
Nomad will still function.
|
||||
$ vagrant up
|
||||
|
||||
The final step is to make sure that `nomad` is available on the PATH.
|
||||
See [this page](https://stackoverflow.com/questions/14637979/how-to-permanently-set-path-on-linux)
|
||||
for instructions on setting the PATH on Linux and Mac.
|
||||
[This page](https://stackoverflow.com/questions/1618280/where-can-i-set-path-to-make-exe-on-windows)
|
||||
contains instructions for setting the PATH on Windows.
|
||||
This will take a few minutes as the base Ubuntu box must be downloaded
|
||||
and provisioned with both Docker and Nomad. Once this completes, you should
|
||||
see output similar to:
|
||||
|
||||
Bringing machine 'default' up with 'vmware_fusion' provider...
|
||||
==> default: Checking if box 'puphpet/ubuntu1404-x64' is up to date...
|
||||
==> default: Machine is already running.
|
||||
|
||||
At this point the Vagrant box is running and ready to go.
|
||||
|
||||
## Verifying the Installation
|
||||
|
||||
After installing Nomad, verify the installation worked by opening a new
|
||||
terminal session and checking that `nomad` is available. By executing
|
||||
After starting the Vagrant box, verify the installation worked by connecting
|
||||
to the box using SSH and checking that `nomad` is available. By executing
|
||||
`nomad`, you should see help output similar to the following:
|
||||
|
||||
```
|
||||
$ nomad
|
||||
$ vagrant ssh
|
||||
...
|
||||
|
||||
vagrant@nomad:~$ nomad
|
||||
usage: nomad [--version] [--help] <command> [<args>]
|
||||
|
||||
Available commands are:
|
||||
agent Runs a Nomad agent
|
||||
agent-force-leave Force a member into the 'left' state
|
||||
agent-info Display status information about the local agent
|
||||
agent-join Join server nodes together
|
||||
agent-members Display a list of known members and their status
|
||||
node-drain Toggle drain mode on a given node
|
||||
node-status Display status information about nodes
|
||||
status Display status information about jobs
|
||||
version Prints the Nomad version
|
||||
agent Runs a Nomad agent
|
||||
agent-info Display status information about the local agent
|
||||
eval-monitor Monitor an evaluation interactively
|
||||
node-drain Toggle drain mode on a given node
|
||||
node-status Display status information about nodes
|
||||
run Run a new job
|
||||
server-force-leave Force a server into the 'left' state
|
||||
server-join Join server nodes together
|
||||
server-members Display a list of known servers and their status
|
||||
status Display status information about jobs
|
||||
stop Stop a running job
|
||||
version Prints the Nomad version
|
||||
```
|
||||
|
||||
If you get an error that Nomad could not be found, then your PATH environment
|
||||
variable was not setup properly. Please go back and ensure that your PATH
|
||||
variable contains the directory where Nomad was installed.
|
||||
If you get an error that Nomad could not be found, then your Vagrant box
|
||||
may not have provisioned correctly. Check any error messages that may have
|
||||
been occurred during `vagrant up`. You can always destroy the box and
|
||||
re-create it.
|
||||
|
||||
## Next Steps
|
||||
|
||||
Vagrant is running and Nomad is installed. Let's [start Nomad](/intro/getting-started/running.html)!
|
||||
|
||||
|
||||
Otherwise, Nomad is installed and ready to go!
|
||||
|
||||
177
website/source/intro/getting-started/jobs.html.md
Normal file
177
website/source/intro/getting-started/jobs.html.md
Normal file
@@ -0,0 +1,177 @@
|
||||
---
|
||||
layout: "intro"
|
||||
page_title: "Jobs"
|
||||
sidebar_current: "getting-started-jobs"
|
||||
description: |-
|
||||
Learn how to submit, modify and stop jobs in Nomad.
|
||||
---
|
||||
|
||||
# Jobs
|
||||
|
||||
Jobs are the primary configuration that users interact with when using
|
||||
Nomad. A job is a declarative specification of tasks that Nomad should run.
|
||||
Jobs have a globally unique name, one or many task groups, which are themselves
|
||||
collections of one or many tasks.
|
||||
|
||||
The format of the jobs is [documented here](/docs/jobspec/index.html). They
|
||||
can either be specified in [HCL](https://github.com/hashicorp/hcl) or JSON,
|
||||
however we recommend only using JSON when the configuration is generated by a machine.
|
||||
|
||||
## Running a Job
|
||||
|
||||
To get started, we will use the [`init` command](/docs/commands/init.html) which
|
||||
generates an skeleton job file:
|
||||
|
||||
```
|
||||
$ nomad init
|
||||
Example job file written to example.nomad
|
||||
|
||||
$ cat example.nomad
|
||||
|
||||
# There can only be a single job definition per file.
|
||||
# Create a job with ID and Name 'example'
|
||||
job "example" {
|
||||
# Run the job in the global region, which is the default.
|
||||
# region = "global"
|
||||
...
|
||||
```
|
||||
|
||||
In this example job file, we have declared a single task 'redis' which is using
|
||||
the Docker driver to run the task. The primary way you interact with Nomad
|
||||
is with the [`run` command](/docs/commands/run.html). The `run` command takes
|
||||
a job file and registers it with Nomad. This is used both to register new
|
||||
jobs and to update existing jobs.
|
||||
|
||||
We can register our example job now:
|
||||
|
||||
```
|
||||
$ nomad run example.nomad
|
||||
==> Monitoring evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6"
|
||||
Evaluation triggered by job "example"
|
||||
Allocation "c1d2f085-7049-6c4a-4479-1b2310fdaba9" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
|
||||
Evaluation status changed: "pending" -> "complete"
|
||||
==> Evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6" finished with status "complete"
|
||||
```
|
||||
|
||||
Anytime a job is updated, Nomad creates an evaluation to determine what
|
||||
actions need to take place. In this case, because this is a new job, Nomad has
|
||||
determined that an allocation should be created and has scheduled it on our
|
||||
local agent.
|
||||
|
||||
To inspect the status of our job we use the [`status` command](/docs/commands/status.html):
|
||||
|
||||
```
|
||||
$ nomad status example
|
||||
ID = example
|
||||
Name = example
|
||||
Type = service
|
||||
Priority = 50
|
||||
Datacenters = dc1
|
||||
Status =
|
||||
|
||||
==> Evaluations
|
||||
ID Priority TriggeredBy Status
|
||||
f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 50 job-register complete
|
||||
|
||||
==> Allocations
|
||||
ID EvalID NodeID TaskGroup Desired Status
|
||||
c1d2f085-7049-6c4a-4479-1b2310fdaba9 f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6 1f43787c-7ab4-8d10-d2d6-1593ed06463a cache run running
|
||||
```
|
||||
|
||||
Here we can see that our evaluation that was created has completed, and that
|
||||
it resulted in the creation of an allocation that is now running on the local node.
|
||||
|
||||
## Modifying a Job
|
||||
|
||||
The definition of a job is not static, and is meant to be updated overtime.
|
||||
You may update a job to change the docker container to update the application version,
|
||||
or change the count of a task group to scale with load.
|
||||
|
||||
For now, edit the `example.nomad` file to uncomment the count and set it to 3:
|
||||
|
||||
```
|
||||
# Control the number of instances of this groups.
|
||||
# Defaults to 1
|
||||
count = 3
|
||||
```
|
||||
|
||||
Once you have finished modifying the job specification, use `nomad run` to
|
||||
push the updated version of the job:
|
||||
|
||||
```
|
||||
$ nomad run example.nomad
|
||||
==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee"
|
||||
Evaluation triggered by job "example"
|
||||
Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
|
||||
Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
|
||||
Evaluation status changed: "pending" -> "complete"
|
||||
==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete"
|
||||
```
|
||||
|
||||
Because we set the count of the task group to three, Nomad created two
|
||||
additional allocations to get to the desired state. It is idempotent to
|
||||
run the same job specification again and no new allocations will be created.
|
||||
|
||||
Now, lets try to do an application update. In this case, we will simply change
|
||||
the version of redis we want to run. Edit the `example.nomad` file and change
|
||||
the Docker image from "redis:latest" to "redis:2.8":
|
||||
|
||||
```
|
||||
# Configure Docker driver with the image
|
||||
config {
|
||||
image = "redis:2.8"
|
||||
}
|
||||
```
|
||||
|
||||
This time we have not change the number of task groups we want running,
|
||||
but we've changed the task itself. This requires stopping the old tasks
|
||||
and starting new tasks. Our example job is configured to do a rolling update,
|
||||
doing a single update every 10 seconds. Use `run` to push the updated
|
||||
specification now:
|
||||
|
||||
```
|
||||
$ nomad run example.nomad
|
||||
==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee"
|
||||
Evaluation triggered by job "example"
|
||||
Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
|
||||
Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
|
||||
Evaluation status changed: "pending" -> "complete"
|
||||
==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete"
|
||||
```
|
||||
|
||||
We can see that Nomad handled the updated in three phases, each
|
||||
time only updating a single task group at a time. The update strategy
|
||||
can be configured, but rolling updates makes it easy to upgrade
|
||||
an application at large scale.
|
||||
|
||||
## Stopping a Job
|
||||
|
||||
So far we've created, run and modified a job. The final step in a job lifecycle
|
||||
is stopping the job. This is done with the [`stop` command](/docs/commands/stop.html):
|
||||
|
||||
```
|
||||
$ nomad stop example
|
||||
==> Monitoring evaluation "4b236340-d5ed-1838-be15-a896095d3ac9"
|
||||
Evaluation triggered by job "example"
|
||||
Evaluation status changed: "pending" -> "complete"
|
||||
==> Evaluation "4b236340-d5ed-1838-be15-a896095d3ac9" finished with status "complete"
|
||||
```
|
||||
|
||||
When we stop a job, it creates an evaluation which is used to stop all
|
||||
the existing allocations. This also deletes the job definition out of Nomad.
|
||||
If we try to query the job status, we can see it is no longer registered:
|
||||
|
||||
```
|
||||
$ nomad status example
|
||||
Error querying job: Unexpected response code: 404 (job not found)
|
||||
```
|
||||
|
||||
If we wanted to start the job again, we could simply `run` it again.
|
||||
|
||||
## Next Steps
|
||||
|
||||
Users of Nomad primarily interact with jobs, and we've now seen
|
||||
how to create and scale our job, perform an application update,
|
||||
and do a job tear down. Next we will add another Nomad
|
||||
client to [create our first cluster](cluster.html)
|
||||
|
||||
144
website/source/intro/getting-started/running.html.md
Normal file
144
website/source/intro/getting-started/running.html.md
Normal file
@@ -0,0 +1,144 @@
|
||||
---
|
||||
layout: "intro"
|
||||
page_title: "Running Nomad"
|
||||
sidebar_current: "getting-started-running"
|
||||
description: |-
|
||||
Learn about the Nomad agent, and the lifecycle of running and stopping.
|
||||
---
|
||||
|
||||
# Running Nomad
|
||||
|
||||
Nomad relies on a long running agent on every machine in the cluster.
|
||||
The agent can run either in server or client mode. Each region must
|
||||
have at least one server, though a cluster of 3 or 5 servers is recommended.
|
||||
A single server deployment is _**highly**_ discouraged as data loss is inevitable
|
||||
in a failure scenario.
|
||||
|
||||
All other agents run in client mode. A client is a very lightweight
|
||||
process that registers the host machine, performs heartbeating, and runs any tasks
|
||||
that are assigned to it by the servers. The agent must be run on every node that
|
||||
is part of the cluster so that the servers can assign work to those machines.
|
||||
|
||||
## Starting the Agent
|
||||
|
||||
For simplicity, we will run a single Nomad agent in development mode. This mode
|
||||
is used to quickly start an agent that is acting as a client and server to test
|
||||
job configurations or prototype interactions. It should _**not**_ be used in
|
||||
production as it does not persist state.
|
||||
|
||||
```
|
||||
$ sudo nomad agent -dev
|
||||
==> Starting Nomad agent...
|
||||
==> Nomad agent configuration:
|
||||
|
||||
Atlas: <disabled>
|
||||
Client: true
|
||||
Log Level: debug
|
||||
Region: global (DC: dc1)
|
||||
Server: true
|
||||
|
||||
==> Nomad agent started! Log data will stream in below:
|
||||
|
||||
[INFO] serf: EventMemberJoin: nomad.global 127.0.0.1
|
||||
[INFO] nomad: starting 4 scheduling worker(s) for [service batch _core]
|
||||
[INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state
|
||||
[INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1)
|
||||
[DEBUG] client: applied fingerprints [storage arch cpu host memory]
|
||||
[DEBUG] client: available drivers [exec docker]
|
||||
[WARN] raft: Heartbeat timeout reached, starting election
|
||||
[INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state
|
||||
[DEBUG] raft: Votes needed: 1
|
||||
[DEBUG] raft: Vote granted. Tally: 1
|
||||
[INFO] raft: Election won. Tally: 1
|
||||
[INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state
|
||||
[INFO] raft: Disabling EnableSingleNode (bootstrap)
|
||||
[DEBUG] raft: Node 127.0.0.1:4647 updated peer set (2): [127.0.0.1:4647]
|
||||
[INFO] nomad: cluster leadership acquired
|
||||
[DEBUG] client: node registration complete
|
||||
[DEBUG] client: updated allocations at index 1 (0 allocs)
|
||||
[DEBUG] client: allocs: (added 0) (removed 0) (updated 0) (ignore 0)
|
||||
[DEBUG] client: state updated to ready
|
||||
```
|
||||
|
||||
As you can see, the Nomad agent has started and has output some log
|
||||
data. From the log data, you can see that our agent is running in both
|
||||
client and server mode, and has claimed leadership of the cluster.
|
||||
Additionally, the local client has been registered and marked as ready.
|
||||
|
||||
-> **Note:** Typically any agent running in client mode must be run with root level
|
||||
privilege. Nomad makes use of operating system primitives for resource isolation
|
||||
which require elevated permissions. The agent will function as non-root, but
|
||||
certain task drivers will not be available.
|
||||
|
||||
## Cluster Nodes
|
||||
|
||||
If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you
|
||||
can see the registered nodes of the Nomad cluster:
|
||||
|
||||
```text
|
||||
$ vagrant ssh
|
||||
...
|
||||
|
||||
$ nomad node-status
|
||||
ID DC Name Class Drain Status
|
||||
72d3af97-144f-1e5f-94e5-df1516fe4add dc1 nomad <none> false ready
|
||||
```
|
||||
|
||||
The output shows our Node ID, which is randomly generated UUID,
|
||||
it's datacenter, node name, node class, drain mode and current status.
|
||||
We can see that our node is in the ready state, and task draining is
|
||||
currently off.
|
||||
|
||||
The agent is also running in server mode, which means it is part of
|
||||
the [gossip protocol](/docs/internals/gossip.html) used to connect all
|
||||
the server instances together. We can view the members of the gossip
|
||||
ring using the [`server-members`](/docs/commands/server-members.html) command:
|
||||
|
||||
```text
|
||||
$ nomad server-members
|
||||
Name Addr Port Status Proto Build DC Region
|
||||
nomad.global 127.0.0.1 4648 alive 2 0.1.0dev dc1 global
|
||||
```
|
||||
|
||||
The output shows our own agent, the address it is running on, its
|
||||
health state, some version information, and the datacenter and region.
|
||||
Additional metadata can be viewed by providing the `-detailed` flag.
|
||||
|
||||
## <a name="stopping"></a>Stopping the Agent
|
||||
|
||||
You can use `Ctrl-C` (the interrupt signal) to halt the agent.
|
||||
By default, all signals will cause the agent to forcefully shutdown.
|
||||
The agent [can be configured](/docs/agent/config.html) to gracefully
|
||||
leave on either the interrupt or terminate signals.
|
||||
|
||||
After interrupting the agent, you should see it leave the cluster
|
||||
and shut down:
|
||||
|
||||
```
|
||||
^C==> Caught signal: interrupt
|
||||
[DEBUG] http: Shutting down http server
|
||||
[INFO] agent: requesting shutdown
|
||||
[INFO] client: shutting down
|
||||
[INFO] nomad: shutting down server
|
||||
[WARN] serf: Shutdown without a Leave
|
||||
[INFO] agent: shutdown complete
|
||||
```
|
||||
|
||||
By gracefully leaving, Nomad clients update their status to prevent
|
||||
futher tasks from being scheduled and to start migrating any tasks that are
|
||||
already assigned. Nomad servers notifies other their peers they intend to leave.
|
||||
When a server leaves, replication to that server stops. If a server fails,
|
||||
replication continues to be attempted until the node recovers. Nomad will
|
||||
automatically try to reconnect to _failed_ nodes, allowing it to recover from
|
||||
certain network conditions, while _left_ nodes are no longer contacted.
|
||||
|
||||
If an agent is operating as a server, a graceful leave is important to avoid
|
||||
causing a potential availability outage affecting the
|
||||
[consensus protocol](/docs/internals/consensus.html). If a server does
|
||||
forcefully exit and will not be returning into service, the
|
||||
[`server-force-leave` command](/docs/commands/server-force-leave.html) should
|
||||
be used to force the server from a _failed_ to a _left_ state.
|
||||
|
||||
## Next Steps
|
||||
|
||||
The development Nomad agent is up and running. Let's try to [run a job](jobs.html)!
|
||||
@@ -1,18 +0,0 @@
|
||||
---
|
||||
layout: "intro"
|
||||
page_title: "Running Nomad"
|
||||
sidebar_current: "getting-started-running"
|
||||
description: |-
|
||||
Learn how to deploy Nomad into production, how to initialize it, configure it, etc.
|
||||
---
|
||||
|
||||
# Running Nomad
|
||||
This section will detail how to run Nomad on client machines. It should include
|
||||
a sample upstart script and stuff
|
||||
|
||||
## Next
|
||||
|
||||
TODO: Fill in text here.
|
||||
|
||||
Next, we have a [short tutorial](/intro/getting-started/apis.html) on using
|
||||
Nomad's HTTP APIs.
|
||||
@@ -58,8 +58,12 @@
|
||||
<a href="/intro/getting-started/running.html">Running Nomad</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("getting-started-apis") %>>
|
||||
<a href="/intro/getting-started/apis.html">HTTP API</a>
|
||||
<li<%= sidebar_current("getting-started-jobs") %>>
|
||||
<a href="/intro/getting-started/jobs.html">Jobs</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("getting-started-cluster") %>>
|
||||
<a href="/intro/getting-started/cluster.html">Clustering</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("getting-started-nextsteps") %>>
|
||||
|
||||
Reference in New Issue
Block a user