mirror of
https://github.com/kemko/nomad.git
synced 2026-01-09 03:45:41 +03:00
Merge branch 'master' into b-vagrantfile
This commit is contained in:
21
.travis.yml
Normal file
21
.travis.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.5.1
|
||||
- tip
|
||||
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
|
||||
install:
|
||||
- make bootstrap
|
||||
|
||||
script:
|
||||
- make test
|
||||
@@ -3,6 +3,7 @@
|
||||
IMPROVEMENTS:
|
||||
|
||||
* Nomad client cleans allocations on exit when in dev mode
|
||||
* drivers: Use go-getter for artifact retrieval, add artifact support to Exec, Raw Exec drivers [GH-288]
|
||||
|
||||
## 0.1.1 (October 5, 2015)
|
||||
|
||||
|
||||
19
api/agent.go
19
api/agent.go
@@ -178,3 +178,22 @@ type AgentMember struct {
|
||||
DelegateMax uint8
|
||||
DelegateCur uint8
|
||||
}
|
||||
|
||||
// AgentMembersNameSort implements sort.Interface for []*AgentMembersNameSort
|
||||
// based on the Name, DC and Region
|
||||
type AgentMembersNameSort []*AgentMember
|
||||
|
||||
func (a AgentMembersNameSort) Len() int { return len(a) }
|
||||
func (a AgentMembersNameSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a AgentMembersNameSort) Less(i, j int) bool {
|
||||
if a[i].Tags["region"] != a[j].Tags["region"] {
|
||||
return a[i].Tags["region"] < a[j].Tags["region"]
|
||||
}
|
||||
|
||||
if a[i].Tags["dc"] != a[j].Tags["dc"] {
|
||||
return a[i].Tags["dc"] < a[j].Tags["dc"]
|
||||
}
|
||||
|
||||
return a[i].Name < a[j].Name
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
@@ -154,3 +156,117 @@ func TestAgent_SetServers(t *testing.T) {
|
||||
t.Fatalf("bad server list: %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *AgentMember) String() string {
|
||||
return "{Name: " + a.Name + " Region: " + a.Tags["region"] + " DC: " + a.Tags["dc"] + "}"
|
||||
}
|
||||
|
||||
func TestAgents_Sort(t *testing.T) {
|
||||
var sortTests = []struct {
|
||||
in []*AgentMember
|
||||
out []*AgentMember
|
||||
}{
|
||||
{
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-2.vac.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
|
||||
&AgentMember{Name: "nomad-1.global",
|
||||
Tags: map[string]string{"region": "global", "dc": "dc1"}},
|
||||
&AgentMember{Name: "nomad-1.vac.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
|
||||
},
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-1.global",
|
||||
Tags: map[string]string{"region": "global", "dc": "dc1"}},
|
||||
&AgentMember{Name: "nomad-1.vac.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
|
||||
&AgentMember{Name: "nomad-2.vac.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-02.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
&AgentMember{Name: "nomad-02.pal.us-west",
|
||||
Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
|
||||
&AgentMember{Name: "nomad-01.pal.us-west",
|
||||
Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
|
||||
&AgentMember{Name: "nomad-01.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
},
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-01.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
&AgentMember{Name: "nomad-02.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
&AgentMember{Name: "nomad-01.pal.us-west",
|
||||
Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
|
||||
&AgentMember{Name: "nomad-02.pal.us-west",
|
||||
Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-02.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
&AgentMember{Name: "nomad-02.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-01.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
&AgentMember{Name: "nomad-01.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
},
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-01.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-02.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-01.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
&AgentMember{Name: "nomad-02.tam.us-east",
|
||||
Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-02.ber.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "berlin"}},
|
||||
&AgentMember{Name: "nomad-02.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-01.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-01.ber.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "berlin"}},
|
||||
},
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-01.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-02.ams.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
|
||||
&AgentMember{Name: "nomad-01.ber.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "berlin"}},
|
||||
&AgentMember{Name: "nomad-02.ber.europe",
|
||||
Tags: map[string]string{"region": "europe", "dc": "berlin"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-1.global"},
|
||||
&AgentMember{Name: "nomad-3.global"},
|
||||
&AgentMember{Name: "nomad-2.global"},
|
||||
},
|
||||
[]*AgentMember{
|
||||
&AgentMember{Name: "nomad-1.global"},
|
||||
&AgentMember{Name: "nomad-2.global"},
|
||||
&AgentMember{Name: "nomad-3.global"},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range sortTests {
|
||||
sort.Sort(AgentMembersNameSort(tt.in))
|
||||
if !reflect.DeepEqual(tt.in, tt.out) {
|
||||
t.Errorf("\necpected: %s\nget : %s", tt.in, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
)
|
||||
|
||||
// QueryOptions are used to parameterize a query
|
||||
@@ -86,7 +88,7 @@ type Config struct {
|
||||
func DefaultConfig() *Config {
|
||||
config := &Config{
|
||||
Address: "http://127.0.0.1:4646",
|
||||
HttpClient: http.DefaultClient,
|
||||
HttpClient: cleanhttp.DefaultClient(),
|
||||
}
|
||||
if addr := os.Getenv("NOMAD_ADDR"); addr != "" {
|
||||
config.Address = addr
|
||||
|
||||
@@ -2,6 +2,7 @@ package api
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -100,6 +101,12 @@ func (j *Jobs) ForceEvaluate(jobID string, q *WriteOptions) (string, *WriteMeta,
|
||||
return resp.EvalID, wm, nil
|
||||
}
|
||||
|
||||
//UpdateStrategy is for serializing update strategy for a job.
|
||||
type UpdateStrategy struct {
|
||||
Stagger time.Duration
|
||||
MaxParallel int
|
||||
}
|
||||
|
||||
// Job is used to serialize a job.
|
||||
type Job struct {
|
||||
Region string
|
||||
@@ -111,6 +118,7 @@ type Job struct {
|
||||
Datacenters []string
|
||||
Constraints []*Constraint
|
||||
TaskGroups []*TaskGroup
|
||||
Update *UpdateStrategy
|
||||
Meta map[string]string
|
||||
Status string
|
||||
StatusDescription string
|
||||
|
||||
@@ -282,7 +282,11 @@ func (r *AllocRunner) Run() {
|
||||
// Create the execution context
|
||||
if r.ctx == nil {
|
||||
allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID))
|
||||
allocDir.Build(tg.Tasks)
|
||||
if err := allocDir.Build(tg.Tasks); err != nil {
|
||||
r.logger.Printf("[WARN] client: failed to build task directories: %v", err)
|
||||
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
|
||||
return
|
||||
}
|
||||
r.ctx = driver.NewExecContext(allocDir)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,12 +4,15 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/args"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
@@ -37,11 +40,36 @@ func NewDockerDriver(ctx *DriverContext) Driver {
|
||||
return &DockerDriver{*ctx}
|
||||
}
|
||||
|
||||
// dockerClient creates *docker.Client. In test / dev mode we can use ENV vars
|
||||
// to connect to the docker daemon. In production mode we will read
|
||||
// docker.endpoint from the config file.
|
||||
func (d *DockerDriver) dockerClient() (*docker.Client, error) {
|
||||
// In dev mode, read DOCKER_* environment variables DOCKER_HOST,
|
||||
// DOCKER_TLS_VERIFY, and DOCKER_CERT_PATH. This allows you to run tests and
|
||||
// demo against boot2docker or a VM on OSX and Windows. This falls back on
|
||||
// the default unix socket on linux if tests are run on linux.
|
||||
//
|
||||
// Also note that we need to turn on DevMode in the test configs.
|
||||
if d.config.DevMode {
|
||||
return docker.NewClientFromEnv()
|
||||
}
|
||||
|
||||
// In prod mode we'll read the docker.endpoint configuration and fall back
|
||||
// on the host-specific default. We do not read from the environment.
|
||||
defaultEndpoint, err := docker.DefaultDockerHost()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to determine default docker endpoint: %s", err)
|
||||
}
|
||||
dockerEndpoint := d.config.ReadDefault("docker.endpoint", defaultEndpoint)
|
||||
|
||||
return docker.NewClient(dockerEndpoint)
|
||||
}
|
||||
|
||||
func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Initialize docker API client
|
||||
dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
|
||||
client, err := docker.NewClient(dockerEndpoint)
|
||||
client, err := d.dockerClient()
|
||||
if err != nil {
|
||||
d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@@ -56,6 +84,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
|
||||
|
||||
env, err := client.Version()
|
||||
if err != nil {
|
||||
d.logger.Printf("[DEBUG] driver.docker: could not read version from daemon: %v", err)
|
||||
// Check the "no such file" error if the unix file is missing
|
||||
if strings.Contains(err.Error(), "no such file") {
|
||||
return false, nil
|
||||
@@ -65,18 +94,39 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
|
||||
// is broken.
|
||||
return false, err
|
||||
}
|
||||
node.Attributes["driver.docker"] = "true"
|
||||
node.Attributes["driver.docker"] = "1"
|
||||
node.Attributes["driver.docker.version"] = env.Get("Version")
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// We have to call this when we create the container AND when we start it so
|
||||
// we'll make a function.
|
||||
func createHostConfig(task *structs.Task) *docker.HostConfig {
|
||||
// hostConfig holds options for the docker container that are unique to this
|
||||
// machine, such as resource limits and port mappings
|
||||
return &docker.HostConfig{
|
||||
func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) {
|
||||
shared := alloc.SharedDir
|
||||
local, ok := alloc.TaskDirs[task.Name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name)
|
||||
}
|
||||
|
||||
return []string{
|
||||
fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocName),
|
||||
fmt.Sprintf("%s:%s", local, allocdir.TaskLocal),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// createContainer initializes a struct needed to call docker.client.CreateContainer()
|
||||
func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (docker.CreateContainerOptions, error) {
|
||||
var c docker.CreateContainerOptions
|
||||
if task.Resources == nil {
|
||||
d.logger.Printf("[ERR] driver.docker: task.Resources is empty")
|
||||
return c, fmt.Errorf("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.")
|
||||
}
|
||||
|
||||
binds, err := d.containerBinds(ctx.AllocDir, task)
|
||||
if err != nil {
|
||||
return c, err
|
||||
}
|
||||
|
||||
hostConfig := &docker.HostConfig{
|
||||
// Convert MB to bytes. This is an absolute value.
|
||||
//
|
||||
// This value represents the total amount of memory a process can use.
|
||||
@@ -105,40 +155,38 @@ func createHostConfig(task *structs.Task) *docker.HostConfig {
|
||||
// - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
|
||||
// - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt
|
||||
CPUShares: int64(task.Resources.CPU),
|
||||
}
|
||||
}
|
||||
|
||||
// createContainer initializes a struct needed to call docker.client.CreateContainer()
|
||||
func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) docker.CreateContainerOptions {
|
||||
if task.Resources == nil {
|
||||
panic("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.")
|
||||
// Binds are used to mount a host volume into the container. We mount a
|
||||
// local directory for storage and a shared alloc directory that can be
|
||||
// used to share data between different tasks in the same task group.
|
||||
Binds: binds,
|
||||
}
|
||||
|
||||
hostConfig := createHostConfig(task)
|
||||
logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
|
||||
logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
|
||||
d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
|
||||
d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
|
||||
d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])
|
||||
|
||||
mode, ok := task.Config["network_mode"]
|
||||
if !ok || mode == "" {
|
||||
// docker default
|
||||
logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge")
|
||||
d.logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge")
|
||||
mode = "bridge"
|
||||
}
|
||||
|
||||
// Ignore the container mode for now
|
||||
switch mode {
|
||||
case "default", "bridge", "none", "host":
|
||||
logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode)
|
||||
d.logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode)
|
||||
default:
|
||||
logger.Printf("[WARN] invalid setting for network mode %s, defaulting to bridge mode on docker0", mode)
|
||||
mode = "bridge"
|
||||
d.logger.Printf("[ERR] driver.docker: invalid setting for network mode: %s", mode)
|
||||
return c, fmt.Errorf("Invalid setting for network mode: %s", mode)
|
||||
}
|
||||
hostConfig.NetworkMode = mode
|
||||
|
||||
// Setup port mapping (equivalent to -p on docker CLI). Ports must already be
|
||||
// exposed in the container.
|
||||
if len(task.Resources.Networks) == 0 {
|
||||
logger.Print("[WARN] driver.docker: No networks are available for port mapping")
|
||||
d.logger.Print("[WARN] driver.docker: No networks are available for port mapping")
|
||||
} else {
|
||||
network := task.Resources.Networks[0]
|
||||
dockerPorts := map[docker.Port][]docker.PortBinding{}
|
||||
@@ -146,7 +194,7 @@ func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) d
|
||||
for _, port := range network.ListStaticPorts() {
|
||||
dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
|
||||
dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
|
||||
logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port)
|
||||
d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port)
|
||||
}
|
||||
|
||||
for label, port := range network.MapDynamicPorts() {
|
||||
@@ -160,30 +208,47 @@ func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) d
|
||||
if _, err := strconv.Atoi(label); err == nil {
|
||||
dockerPorts[docker.Port(label+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
|
||||
dockerPorts[docker.Port(label+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
|
||||
logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label)
|
||||
d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label)
|
||||
} else {
|
||||
dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
|
||||
dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
|
||||
logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label)
|
||||
d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label)
|
||||
}
|
||||
}
|
||||
hostConfig.PortBindings = dockerPorts
|
||||
}
|
||||
|
||||
// Create environment variables.
|
||||
env := TaskEnvironmentVariables(ctx, task)
|
||||
env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
|
||||
env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
|
||||
|
||||
config := &docker.Config{
|
||||
Env: TaskEnvironmentVariables(ctx, task).List(),
|
||||
Env: env.List(),
|
||||
Image: task.Config["image"],
|
||||
}
|
||||
|
||||
rawArgs, hasArgs := task.Config["args"]
|
||||
parsedArgs, err := args.ParseAndReplace(rawArgs, env.Map())
|
||||
if err != nil {
|
||||
return c, err
|
||||
}
|
||||
|
||||
// If the user specified a custom command to run, we'll inject it here.
|
||||
if command, ok := task.Config["command"]; ok {
|
||||
config.Cmd = strings.Split(command, " ")
|
||||
cmd := []string{command}
|
||||
if hasArgs {
|
||||
cmd = append(cmd, parsedArgs...)
|
||||
}
|
||||
config.Cmd = cmd
|
||||
} else if hasArgs {
|
||||
d.logger.Println("[DEBUG] driver.docker: ignoring args because command not specified")
|
||||
}
|
||||
|
||||
return docker.CreateContainerOptions{
|
||||
Config: config,
|
||||
HostConfig: hostConfig,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
@@ -212,10 +277,9 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
|
||||
}
|
||||
|
||||
// Initialize docker API client
|
||||
dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
|
||||
client, err := docker.NewClient(dockerEndpoint)
|
||||
client, err := d.dockerClient()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err)
|
||||
return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
|
||||
}
|
||||
|
||||
repo, tag := docker.ParseRepositoryTag(image)
|
||||
@@ -258,8 +322,13 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
|
||||
d.logger.Printf("[DEBUG] driver.docker: using image %s", dockerImage.ID)
|
||||
d.logger.Printf("[INFO] driver.docker: identified image %s as %s", image, dockerImage.ID)
|
||||
|
||||
config, err := d.createContainer(ctx, task)
|
||||
if err != nil {
|
||||
d.logger.Printf("[ERR] driver.docker: %s", err)
|
||||
return nil, fmt.Errorf("Failed to create container config for image %s", image)
|
||||
}
|
||||
// Create a container
|
||||
container, err := client.CreateContainer(createContainer(ctx, task, d.logger))
|
||||
container, err := client.CreateContainer(config)
|
||||
if err != nil {
|
||||
d.logger.Printf("[ERR] driver.docker: %s", err)
|
||||
return nil, fmt.Errorf("Failed to create container from image %s", image)
|
||||
@@ -309,10 +378,9 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
|
||||
d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID)
|
||||
|
||||
// Initialize docker API client
|
||||
dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
|
||||
client, err := docker.NewClient(dockerEndpoint)
|
||||
client, err := d.dockerClient()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err)
|
||||
return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
|
||||
}
|
||||
|
||||
// Look for a running container with this ID
|
||||
@@ -401,6 +469,7 @@ func (h *dockerHandle) Kill() error {
|
||||
err = h.client.RemoveImage(h.imageID)
|
||||
if err != nil {
|
||||
containers, err := h.client.ListContainers(docker.ListContainersOptions{
|
||||
// The image might be in use by a stopped container, so check everything
|
||||
All: true,
|
||||
Filters: map[string][]string{
|
||||
"image": []string{h.imageID},
|
||||
|
||||
@@ -1,14 +1,25 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/environment"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
func testDockerDriverContext(task string) *DriverContext {
|
||||
cfg := testConfig()
|
||||
cfg.DevMode = true
|
||||
return NewDriverContext(task, cfg, cfg.Node, testLogger())
|
||||
}
|
||||
|
||||
// dockerLocated looks to see whether docker is available on this system before
|
||||
// we try to run tests. We'll keep it simple and just check for the CLI.
|
||||
func dockerLocated() bool {
|
||||
@@ -33,7 +44,7 @@ func TestDockerDriver_Handle(t *testing.T) {
|
||||
|
||||
// The fingerprinter test should always pass, even if Docker is not installed.
|
||||
func TestDockerDriver_Fingerprint(t *testing.T) {
|
||||
d := NewDockerDriver(testDriverContext(""))
|
||||
d := NewDockerDriver(testDockerDriverContext(""))
|
||||
node := &structs.Node{
|
||||
Attributes: make(map[string]string),
|
||||
}
|
||||
@@ -44,7 +55,7 @@ func TestDockerDriver_Fingerprint(t *testing.T) {
|
||||
if apply != dockerLocated() {
|
||||
t.Fatalf("Fingerprinter should detect Docker when it is installed")
|
||||
}
|
||||
if node.Attributes["driver.docker"] == "" {
|
||||
if node.Attributes["driver.docker"] != "1" {
|
||||
t.Log("Docker not found. The remainder of the docker tests will be skipped.")
|
||||
}
|
||||
t.Logf("Found docker version %s", node.Attributes["driver.docker.version"])
|
||||
@@ -56,14 +67,14 @@ func TestDockerDriver_StartOpen_Wait(t *testing.T) {
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "python-demo",
|
||||
Name: "redis-demo",
|
||||
Config: map[string]string{
|
||||
"image": "redis",
|
||||
},
|
||||
Resources: basicResources,
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
@@ -93,10 +104,11 @@ func TestDockerDriver_Start_Wait(t *testing.T) {
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "python-demo",
|
||||
Name: "redis-demo",
|
||||
Config: map[string]string{
|
||||
"image": "redis",
|
||||
"command": "redis-server -v",
|
||||
"command": "redis-server",
|
||||
"args": "-v",
|
||||
},
|
||||
Resources: &structs.Resources{
|
||||
MemoryMB: 256,
|
||||
@@ -104,7 +116,7 @@ func TestDockerDriver_Start_Wait(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
@@ -134,21 +146,77 @@ func TestDockerDriver_Start_Wait(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDockerDriver_Start_Wait_AllocDir(t *testing.T) {
|
||||
if !dockerLocated() {
|
||||
t.SkipNow()
|
||||
}
|
||||
|
||||
exp := []byte{'w', 'i', 'n'}
|
||||
file := "output.txt"
|
||||
task := &structs.Task{
|
||||
Name: "redis-demo",
|
||||
Config: map[string]string{
|
||||
"image": "redis",
|
||||
"command": "/bin/bash",
|
||||
"args": fmt.Sprintf(`-c "sleep 1; echo -n %s > $%s/%s"`, string(exp), environment.AllocDir, file),
|
||||
},
|
||||
Resources: &structs.Resources{
|
||||
MemoryMB: 256,
|
||||
CPU: 512,
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
defer handle.Kill()
|
||||
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
|
||||
// Check that data was written to the shared alloc directory.
|
||||
outputFile := filepath.Join(ctx.AllocDir.SharedDir, file)
|
||||
act, err := ioutil.ReadFile(outputFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't read expected output: %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(act, exp) {
|
||||
t.Fatalf("Command outputted %v; want %v", act, exp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDockerDriver_Start_Kill_Wait(t *testing.T) {
|
||||
if !dockerLocated() {
|
||||
t.SkipNow()
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "python-demo",
|
||||
Name: "redis-demo",
|
||||
Config: map[string]string{
|
||||
"image": "redis",
|
||||
"command": "sleep 10",
|
||||
"command": "/bin/sleep",
|
||||
"args": "10",
|
||||
},
|
||||
Resources: basicResources,
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
@@ -182,6 +250,7 @@ func TestDockerDriver_Start_Kill_Wait(t *testing.T) {
|
||||
|
||||
func taskTemplate() *structs.Task {
|
||||
return &structs.Task{
|
||||
Name: "redis-demo",
|
||||
Config: map[string]string{
|
||||
"image": "redis",
|
||||
},
|
||||
@@ -222,7 +291,7 @@ func TestDocker_StartN(t *testing.T) {
|
||||
// Let's spin up a bunch of things
|
||||
var err error
|
||||
for idx, task := range taskList {
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
@@ -236,6 +305,11 @@ func TestDocker_StartN(t *testing.T) {
|
||||
t.Log("==> All tasks are started. Terminating...")
|
||||
|
||||
for idx, handle := range handles {
|
||||
if handle == nil {
|
||||
t.Errorf("Bad handle for task #%d", idx+1)
|
||||
continue
|
||||
}
|
||||
|
||||
err := handle.Kill()
|
||||
if err != nil {
|
||||
t.Errorf("Failed stopping task #%d: %s", idx+1, err)
|
||||
@@ -271,7 +345,7 @@ func TestDocker_StartNVersions(t *testing.T) {
|
||||
// Let's spin up a bunch of things
|
||||
var err error
|
||||
for idx, task := range taskList {
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
@@ -285,6 +359,11 @@ func TestDocker_StartNVersions(t *testing.T) {
|
||||
t.Log("==> All tasks are started. Terminating...")
|
||||
|
||||
for idx, handle := range handles {
|
||||
if handle == nil {
|
||||
t.Errorf("Bad handle for task #%d", idx+1)
|
||||
continue
|
||||
}
|
||||
|
||||
err := handle.Kill()
|
||||
if err != nil {
|
||||
t.Errorf("Failed stopping task #%d: %s", idx+1, err)
|
||||
@@ -300,6 +379,7 @@ func TestDockerHostNet(t *testing.T) {
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "redis-demo",
|
||||
Config: map[string]string{
|
||||
"image": "redis",
|
||||
"network_mode": "host",
|
||||
@@ -309,7 +389,7 @@ func TestDockerHostNet(t *testing.T) {
|
||||
CPU: 512,
|
||||
},
|
||||
}
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
driverCtx := testDockerDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewDockerDriver(driverCtx)
|
||||
|
||||
@@ -3,6 +3,7 @@ package driver
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
@@ -15,11 +16,12 @@ import (
|
||||
// BuiltinDrivers contains the built in registered drivers
|
||||
// which are available for allocation handling
|
||||
var BuiltinDrivers = map[string]Factory{
|
||||
"docker": NewDockerDriver,
|
||||
"exec": NewExecDriver,
|
||||
"java": NewJavaDriver,
|
||||
"qemu": NewQemuDriver,
|
||||
"rkt": NewRktDriver,
|
||||
"docker": NewDockerDriver,
|
||||
"exec": NewExecDriver,
|
||||
"raw_exec": NewRawExecDriver,
|
||||
"java": NewJavaDriver,
|
||||
"qemu": NewQemuDriver,
|
||||
"rkt": NewRktDriver,
|
||||
}
|
||||
|
||||
// NewDriver is used to instantiate and return a new driver
|
||||
@@ -112,7 +114,13 @@ func TaskEnvironmentVariables(ctx *ExecContext, task *structs.Task) environment.
|
||||
env.SetMeta(task.Meta)
|
||||
|
||||
if ctx.AllocDir != nil {
|
||||
env.SetAllocDir(ctx.AllocDir.AllocDir)
|
||||
env.SetAllocDir(ctx.AllocDir.SharedDir)
|
||||
taskdir, ok := ctx.AllocDir.TaskDirs[task.Name]
|
||||
if !ok {
|
||||
// TODO: Update this to return an error
|
||||
}
|
||||
|
||||
env.SetTaskLocalDir(filepath.Join(taskdir, allocdir.TaskLocal))
|
||||
}
|
||||
|
||||
if task.Resources != nil {
|
||||
|
||||
@@ -12,6 +12,10 @@ const (
|
||||
// group.
|
||||
AllocDir = "NOMAD_ALLOC_DIR"
|
||||
|
||||
// The path to the tasks local directory where it can store data that is
|
||||
// persisted to the alloc is removed.
|
||||
TaskLocalDir = "NOMAD_TASK_DIR"
|
||||
|
||||
// The tasks memory limit in MBs.
|
||||
MemLimit = "NOMAD_MEMORY_LIMIT"
|
||||
|
||||
@@ -30,6 +34,10 @@ const (
|
||||
MetaPrefix = "NOMAD_META_"
|
||||
)
|
||||
|
||||
var (
|
||||
nomadVars = []string{AllocDir, TaskLocalDir, MemLimit, CpuLimit, TaskIP, PortPrefix, MetaPrefix}
|
||||
)
|
||||
|
||||
type TaskEnvironment map[string]string
|
||||
|
||||
func NewTaskEnivornment() TaskEnvironment {
|
||||
@@ -70,18 +78,42 @@ func (t TaskEnvironment) SetAllocDir(dir string) {
|
||||
t[AllocDir] = dir
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearAllocDir() {
|
||||
delete(t, AllocDir)
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) SetTaskLocalDir(dir string) {
|
||||
t[TaskLocalDir] = dir
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearTaskLocalDir() {
|
||||
delete(t, TaskLocalDir)
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) SetMemLimit(limit int) {
|
||||
t[MemLimit] = strconv.Itoa(limit)
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearMemLimit() {
|
||||
delete(t, MemLimit)
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) SetCpuLimit(limit int) {
|
||||
t[CpuLimit] = strconv.Itoa(limit)
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearCpuLimit() {
|
||||
delete(t, CpuLimit)
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) SetTaskIp(ip string) {
|
||||
t[TaskIP] = ip
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearTaskIp() {
|
||||
delete(t, TaskIP)
|
||||
}
|
||||
|
||||
// Takes a map of port labels to their port value.
|
||||
func (t TaskEnvironment) SetPorts(ports map[string]int) {
|
||||
for label, port := range ports {
|
||||
@@ -89,6 +121,14 @@ func (t TaskEnvironment) SetPorts(ports map[string]int) {
|
||||
}
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearPorts() {
|
||||
for k, _ := range t {
|
||||
if strings.HasPrefix(k, PortPrefix) {
|
||||
delete(t, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Takes a map of meta values to be passed to the task. The keys are capatilized
|
||||
// when the environent variable is set.
|
||||
func (t TaskEnvironment) SetMeta(m map[string]string) {
|
||||
@@ -97,8 +137,28 @@ func (t TaskEnvironment) SetMeta(m map[string]string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearMeta() {
|
||||
for k, _ := range t {
|
||||
if strings.HasPrefix(k, MetaPrefix) {
|
||||
delete(t, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) SetEnvvars(m map[string]string) {
|
||||
for k, v := range m {
|
||||
t[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
func (t TaskEnvironment) ClearEnvvars() {
|
||||
OUTER:
|
||||
for k, _ := range t {
|
||||
for _, nomadPrefix := range nomadVars {
|
||||
if strings.HasPrefix(k, nomadPrefix) {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
delete(t, k)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ func TestEnvironment_AsList(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TastEnvironment_ParseFromList(t *testing.T) {
|
||||
func TestEnvironment_ParseFromList(t *testing.T) {
|
||||
input := []string{"foo=bar", "BAZ=baM"}
|
||||
env, err := ParseFromList(input)
|
||||
if err != nil {
|
||||
@@ -29,10 +29,44 @@ func TastEnvironment_ParseFromList(t *testing.T) {
|
||||
}
|
||||
|
||||
exp := map[string]string{
|
||||
"foo": "baz",
|
||||
"foo": "bar",
|
||||
"BAZ": "baM",
|
||||
}
|
||||
if !reflect.DeepEqual(env, exp) {
|
||||
t.Fatalf("ParseFromList(%#v) returned %v; want %v", input, env, exp)
|
||||
|
||||
if len(env) != len(exp) {
|
||||
t.Fatalf("ParseFromList(%#v) has length %v; want %v", input, len(env), len(exp))
|
||||
}
|
||||
|
||||
for k, v := range exp {
|
||||
if actV, ok := env[k]; !ok {
|
||||
t.Fatalf("ParseFromList(%#v) doesn't contain expected %v", input, k)
|
||||
} else if actV != v {
|
||||
t.Fatalf("ParseFromList(%#v) has incorrect value for %v; got %v; want %v", input, k, actV, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnvironment_ClearEnvvars(t *testing.T) {
|
||||
env := NewTaskEnivornment()
|
||||
env.SetTaskIp("127.0.0.1")
|
||||
env.SetEnvvars(map[string]string{"foo": "baz", "bar": "bang"})
|
||||
|
||||
act := env.List()
|
||||
exp := []string{"NOMAD_IP=127.0.0.1", "bar=bang", "foo=baz"}
|
||||
sort.Strings(act)
|
||||
sort.Strings(exp)
|
||||
if !reflect.DeepEqual(act, exp) {
|
||||
t.Fatalf("env.List() returned %v; want %v", act, exp)
|
||||
}
|
||||
|
||||
// Clear the environent variables.
|
||||
env.ClearEnvvars()
|
||||
|
||||
act = env.List()
|
||||
exp = []string{"NOMAD_IP=127.0.0.1"}
|
||||
sort.Strings(act)
|
||||
sort.Strings(exp)
|
||||
if !reflect.DeepEqual(act, exp) {
|
||||
t.Fatalf("env.List() returned %v; want %v", act, exp)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,10 +2,15 @@ package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/executor"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
@@ -41,12 +46,40 @@ func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
|
||||
}
|
||||
|
||||
func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
// Get the command
|
||||
// Get the command to be ran
|
||||
command, ok := task.Config["command"]
|
||||
if !ok || command == "" {
|
||||
return nil, fmt.Errorf("missing command for exec driver")
|
||||
}
|
||||
|
||||
// Check if an artificat is specified and attempt to download it
|
||||
source, ok := task.Config["artifact_source"]
|
||||
if ok && source != "" {
|
||||
// Proceed to download an artifact to be executed.
|
||||
// We use go-getter to support a variety of protocols, but need to change
|
||||
// file permissions of the resulted download to be executable
|
||||
|
||||
// Create a location to download the artifact.
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
destDir := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
artifactName := path.Base(source)
|
||||
artifactFile := filepath.Join(destDir, artifactName)
|
||||
if err := getter.GetFile(artifactFile, source); err != nil {
|
||||
return nil, fmt.Errorf("Error downloading artifact for Exec driver: %s", err)
|
||||
}
|
||||
|
||||
// Add execution permissions to the newly downloaded artifact
|
||||
if runtime.GOOS != "windows" {
|
||||
if err := syscall.Chmod(artifactFile, 0755); err != nil {
|
||||
log.Printf("[ERR] driver.Exec: Error making artifact executable: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the environment variables.
|
||||
envVars := TaskEnvironmentVariables(ctx, task)
|
||||
|
||||
|
||||
@@ -5,10 +5,10 @@ import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/environment"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
@@ -86,7 +86,7 @@ func TestExecDriver_Start_Wait(t *testing.T) {
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"command": "/bin/sleep",
|
||||
"args": "1",
|
||||
"args": "2",
|
||||
},
|
||||
Resources: basicResources,
|
||||
}
|
||||
@@ -116,11 +116,109 @@ func TestExecDriver_Start_Wait(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
case <-time.After(4 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecDriver_Start_Artifact_basic(t *testing.T) {
|
||||
ctestutils.ExecCompatible(t)
|
||||
var file string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
file = "hi_darwin_amd64"
|
||||
default:
|
||||
file = "hi_linux_amd64"
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
|
||||
"command": filepath.Join("$NOMAD_TASK_DIR", file),
|
||||
},
|
||||
Resources: basicResources,
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewExecDriver(driverCtx)
|
||||
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Update should be a no-op
|
||||
err = handle.Update(task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecDriver_Start_Artifact_expanded(t *testing.T) {
|
||||
ctestutils.ExecCompatible(t)
|
||||
var file string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
file = "hi_darwin_amd64"
|
||||
default:
|
||||
file = "hi_linux_amd64"
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
|
||||
"command": "/bin/bash",
|
||||
"args": fmt.Sprintf("-c '/bin/sleep 1 && %s'", filepath.Join("$NOMAD_TASK_DIR", file)),
|
||||
},
|
||||
Resources: basicResources,
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
d := NewExecDriver(driverCtx)
|
||||
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Update should be a no-op
|
||||
err = handle.Update(task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
func TestExecDriver_Start_Wait_AllocDir(t *testing.T) {
|
||||
ctestutils.ExecCompatible(t)
|
||||
|
||||
@@ -159,7 +257,7 @@ func TestExecDriver_Start_Wait_AllocDir(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check that data was written to the shared alloc directory.
|
||||
outputFile := filepath.Join(ctx.AllocDir.AllocDir, allocdir.SharedAllocName, file)
|
||||
outputFile := filepath.Join(ctx.AllocDir.SharedDir, file)
|
||||
act, err := ioutil.ReadFile(outputFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't read expected output: %v", err)
|
||||
|
||||
@@ -3,9 +3,6 @@ package driver
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
@@ -14,6 +11,7 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/executor"
|
||||
@@ -69,7 +67,7 @@ func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
|
||||
}
|
||||
|
||||
if infoString == "" {
|
||||
d.logger.Println("[WARN] Error parsing Java version information, aborting")
|
||||
d.logger.Println("[WARN] driver.java: error parsing Java version information, aborting")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@@ -97,44 +95,33 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||
return nil, fmt.Errorf("missing jar source for Java Jar driver")
|
||||
}
|
||||
|
||||
// Attempt to download the thing
|
||||
// Should be extracted to some kind of Http Fetcher
|
||||
// Right now, assume publicly accessible HTTP url
|
||||
resp, err := http.Get(source)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error downloading source for Java driver: %s", err)
|
||||
}
|
||||
|
||||
// Get the tasks local directory.
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
destDir := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
// Create a location to download the binary.
|
||||
fName := path.Base(source)
|
||||
fPath := filepath.Join(taskLocal, fName)
|
||||
f, err := os.OpenFile(fPath, os.O_CREATE|os.O_WRONLY, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to download to: %s", err)
|
||||
}
|
||||
|
||||
defer f.Close()
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy remote file to local directory for execution
|
||||
// TODO: a retry of sort if io.Copy fails, for large binaries
|
||||
_, ioErr := io.Copy(f, resp.Body)
|
||||
if ioErr != nil {
|
||||
return nil, fmt.Errorf("Error copying jar from source: %s", ioErr)
|
||||
jarName := path.Base(source)
|
||||
jarPath := filepath.Join(destDir, jarName)
|
||||
if err := getter.GetFile(jarPath, source); err != nil {
|
||||
return nil, fmt.Errorf("Error downloading source for Java driver: %s", err)
|
||||
}
|
||||
|
||||
// Get the environment variables.
|
||||
envVars := TaskEnvironmentVariables(ctx, task)
|
||||
|
||||
args := []string{}
|
||||
// Look for jvm options
|
||||
jvm_options, ok := task.Config["jvm_options"]
|
||||
if ok && jvm_options != "" {
|
||||
d.logger.Printf("[DEBUG] driver.java: found JVM options: %s", jvm_options)
|
||||
args = append(args, jvm_options)
|
||||
}
|
||||
|
||||
// Build the argument list.
|
||||
args := []string{"-jar", filepath.Join(allocdir.TaskLocal, fName)}
|
||||
args = append(args, "-jar", filepath.Join(allocdir.TaskLocal, jarName))
|
||||
if argRaw, ok := task.Config["args"]; ok {
|
||||
args = append(args, argRaw)
|
||||
}
|
||||
|
||||
@@ -100,6 +100,7 @@ func TestJavaDriver_Start_Wait(t *testing.T) {
|
||||
"jar_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/demoapp.jar",
|
||||
// "jar_source": "https://s3-us-west-2.amazonaws.com/java-jar-thing/demoapp.jar",
|
||||
// "args": "-d64",
|
||||
"jvm_options": "-Xmx2048m -Xms256m",
|
||||
},
|
||||
Resources: basicResources,
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@@ -19,6 +18,7 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
@@ -94,45 +94,25 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||
return nil, fmt.Errorf("Missing required Task Resource: Memory")
|
||||
}
|
||||
|
||||
// Attempt to download the thing
|
||||
// Should be extracted to some kind of Http Fetcher
|
||||
// Right now, assume publicly accessible HTTP url
|
||||
resp, err := http.Get(source)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error downloading source for Qemu driver: %s", err)
|
||||
}
|
||||
|
||||
// Get the tasks local directory.
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
// Create a location in the local directory to download and store the image.
|
||||
// TODO: Caching
|
||||
// Create a location to download the binary.
|
||||
destDir := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
vmID := fmt.Sprintf("qemu-vm-%s-%s", structs.GenerateUUID(), filepath.Base(source))
|
||||
fPath := filepath.Join(taskLocal, vmID)
|
||||
vmPath, err := os.OpenFile(fPath, os.O_CREATE|os.O_WRONLY, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to download to: %s", err)
|
||||
}
|
||||
|
||||
defer vmPath.Close()
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy remote file to local AllocDir for execution
|
||||
// TODO: a retry of sort if io.Copy fails, for large binaries
|
||||
_, ioErr := io.Copy(vmPath, resp.Body)
|
||||
if ioErr != nil {
|
||||
return nil, fmt.Errorf("Error copying Qemu image from source: %s", ioErr)
|
||||
vmPath := filepath.Join(destDir, vmID)
|
||||
if err := getter.GetFile(vmPath, source); err != nil {
|
||||
return nil, fmt.Errorf("Error downloading artifact for Qemu driver: %s", err)
|
||||
}
|
||||
|
||||
// compute and check checksum
|
||||
if check, ok := task.Config["checksum"]; ok {
|
||||
d.logger.Printf("[DEBUG] Running checksum on (%s)", vmID)
|
||||
hasher := sha256.New()
|
||||
file, err := os.Open(vmPath.Name())
|
||||
file, err := os.Open(vmPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to open file for checksum")
|
||||
}
|
||||
@@ -163,7 +143,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||
"-machine", "type=pc,accel=" + accelerator,
|
||||
"-name", vmID,
|
||||
"-m", mem,
|
||||
"-drive", "file=" + vmPath.Name(),
|
||||
"-drive", "file=" + vmPath,
|
||||
"-nodefconfig",
|
||||
"-nodefaults",
|
||||
"-nographic",
|
||||
@@ -240,7 +220,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||
// Create and Return Handle
|
||||
h := &qemuHandle{
|
||||
proc: cmd.Process,
|
||||
vmID: vmPath.Name(),
|
||||
vmID: vmPath,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan error, 1),
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package driver
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
@@ -12,14 +11,6 @@ import (
|
||||
ctestutils "github.com/hashicorp/nomad/client/testutil"
|
||||
)
|
||||
|
||||
// qemuLocated looks to see whether qemu binaries are available on this system
|
||||
// before we try to run tests. We may need to tweak this for cross-OS support
|
||||
// but I think this should work on *nix at least.
|
||||
func qemuLocated() bool {
|
||||
_, err := exec.Command("qemu-x86_64", "-version").CombinedOutput()
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func TestQemuDriver_Handle(t *testing.T) {
|
||||
h := &qemuHandle{
|
||||
proc: &os.Process{Pid: 123},
|
||||
@@ -58,10 +49,7 @@ func TestQemuDriver_Fingerprint(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQemuDriver_Start(t *testing.T) {
|
||||
if !qemuLocated() {
|
||||
t.Skip("QEMU not found; skipping")
|
||||
}
|
||||
|
||||
ctestutils.QemuCompatible(t)
|
||||
// TODO: use test server to load from a fixture
|
||||
task := &structs.Task{
|
||||
Name: "linux",
|
||||
@@ -110,10 +98,7 @@ func TestQemuDriver_Start(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQemuDriver_RequiresMemory(t *testing.T) {
|
||||
if !qemuLocated() {
|
||||
t.Skip("QEMU not found; skipping")
|
||||
}
|
||||
|
||||
ctestutils.QemuCompatible(t)
|
||||
// TODO: use test server to load from a fixture
|
||||
task := &structs.Task{
|
||||
Name: "linux",
|
||||
@@ -136,5 +121,4 @@ func TestQemuDriver_RequiresMemory(t *testing.T) {
|
||||
if err == nil {
|
||||
t.Fatalf("Expected error when not specifying memory")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
239
client/driver/raw_exec.go
Normal file
239
client/driver/raw_exec.go
Normal file
@@ -0,0 +1,239 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/args"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// The option that enables this driver in the Config.Options map.
|
||||
rawExecConfigOption = "driver.raw_exec.enable"
|
||||
|
||||
// Null files to use as stdin.
|
||||
unixNull = "/dev/null"
|
||||
windowsNull = "nul"
|
||||
)
|
||||
|
||||
// The RawExecDriver is a privileged version of the exec driver. It provides no
|
||||
// resource isolation and just fork/execs. The Exec driver should be preferred
|
||||
// and this should only be used when explicitly needed.
|
||||
type RawExecDriver struct {
|
||||
DriverContext
|
||||
}
|
||||
|
||||
// rawExecHandle is returned from Start/Open as a handle to the PID
|
||||
type rawExecHandle struct {
|
||||
proc *os.Process
|
||||
waitCh chan error
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewRawExecDriver is used to create a new raw exec driver
|
||||
func NewRawExecDriver(ctx *DriverContext) Driver {
|
||||
return &RawExecDriver{*ctx}
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Check that the user has explicitly enabled this executor.
|
||||
enabled, err := strconv.ParseBool(cfg.ReadDefault(rawExecConfigOption, "false"))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Failed to parse %v option: %v", rawExecConfigOption, err)
|
||||
}
|
||||
|
||||
if enabled {
|
||||
d.logger.Printf("[WARN] driver.raw_exec: raw exec is enabled. Only enable if needed")
|
||||
node.Attributes["driver.raw_exec"] = "1"
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
// Get the tasks local directory.
|
||||
taskName := d.DriverContext.taskName
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
// Get the command to be ran
|
||||
command, ok := task.Config["command"]
|
||||
if !ok || command == "" {
|
||||
return nil, fmt.Errorf("missing command for Raw Exec driver")
|
||||
}
|
||||
|
||||
// Check if an artificat is specified and attempt to download it
|
||||
source, ok := task.Config["artifact_source"]
|
||||
if ok && source != "" {
|
||||
// Proceed to download an artifact to be executed.
|
||||
// We use go-getter to support a variety of protocols, but need to change
|
||||
// file permissions of the resulted download to be executable
|
||||
|
||||
// Create a location to download the artifact.
|
||||
destDir := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
artifactName := path.Base(source)
|
||||
artifactFile := filepath.Join(destDir, artifactName)
|
||||
if err := getter.GetFile(artifactFile, source); err != nil {
|
||||
return nil, fmt.Errorf("Error downloading artifact for Raw Exec driver: %s", err)
|
||||
}
|
||||
|
||||
// Add execution permissions to the newly downloaded artifact
|
||||
if runtime.GOOS != "windows" {
|
||||
if err := syscall.Chmod(artifactFile, 0755); err != nil {
|
||||
log.Printf("[ERR] driver.raw_exec: Error making artifact executable: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the environment variables.
|
||||
envVars := TaskEnvironmentVariables(ctx, task)
|
||||
|
||||
// expand NOMAD_TASK_DIR
|
||||
parsedPath, err := args.ParseAndReplace(command, envVars.Map())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failure to parse arguments in command path: %v", command)
|
||||
} else if len(parsedPath) != 1 {
|
||||
return nil, fmt.Errorf("couldn't properly parse command path: %v", command)
|
||||
}
|
||||
|
||||
cm := parsedPath[0]
|
||||
|
||||
// Look for arguments
|
||||
var cmdArgs []string
|
||||
if argRaw, ok := task.Config["args"]; ok {
|
||||
parsed, err := args.ParseAndReplace(argRaw, envVars.Map())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmdArgs = append(cmdArgs, parsed...)
|
||||
}
|
||||
|
||||
// Setup the command
|
||||
cmd := exec.Command(cm, cmdArgs...)
|
||||
cmd.Dir = taskDir
|
||||
cmd.Env = envVars.List()
|
||||
|
||||
// Capture the stdout/stderr and redirect stdin to /dev/null
|
||||
stdoutFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stdout", taskName))
|
||||
stderrFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stderr", taskName))
|
||||
stdinFilename := unixNull
|
||||
if runtime.GOOS == "windows" {
|
||||
stdinFilename = windowsNull
|
||||
}
|
||||
|
||||
stdo, err := os.OpenFile(stdoutFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to redirect stdout: %v", err)
|
||||
}
|
||||
|
||||
stde, err := os.OpenFile(stderrFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err)
|
||||
}
|
||||
|
||||
stdi, err := os.OpenFile(stdinFilename, os.O_CREATE|os.O_RDONLY, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to redirect stdin: %v", err)
|
||||
}
|
||||
|
||||
cmd.Stdout = stdo
|
||||
cmd.Stderr = stde
|
||||
cmd.Stdin = stdi
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("failed to start command: %v", err)
|
||||
}
|
||||
|
||||
// Return a driver handle
|
||||
h := &rawExecHandle{
|
||||
proc: cmd.Process,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan error, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
// Split the handle
|
||||
pidStr := strings.TrimPrefix(handleID, "PID:")
|
||||
pid, err := strconv.Atoi(pidStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
// Find the process
|
||||
proc, err := os.FindProcess(pid)
|
||||
if proc == nil || err != nil {
|
||||
return nil, fmt.Errorf("failed to find PID %d: %v", pid, err)
|
||||
}
|
||||
|
||||
// Return a driver handle
|
||||
h := &rawExecHandle{
|
||||
proc: proc,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan error, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) ID() string {
|
||||
// Return a handle to the PID
|
||||
return fmt.Sprintf("PID:%d", h.proc.Pid)
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) WaitCh() chan error {
|
||||
return h.waitCh
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) Update(task *structs.Task) error {
|
||||
// Update is not possible
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kill is used to terminate the task. We send an Interrupt
|
||||
// and then provide a 5 second grace period before doing a Kill on supported
|
||||
// OS's, otherwise we kill immediately.
|
||||
func (h *rawExecHandle) Kill() error {
|
||||
if runtime.GOOS == "windows" {
|
||||
return h.proc.Kill()
|
||||
}
|
||||
|
||||
h.proc.Signal(os.Interrupt)
|
||||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(5 * time.Second):
|
||||
return h.proc.Kill()
|
||||
}
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) run() {
|
||||
ps, err := h.proc.Wait()
|
||||
close(h.doneCh)
|
||||
if err != nil {
|
||||
h.waitCh <- err
|
||||
} else if !ps.Success() {
|
||||
h.waitCh <- fmt.Errorf("task exited with error")
|
||||
}
|
||||
close(h.waitCh)
|
||||
}
|
||||
327
client/driver/raw_exec_test.go
Normal file
327
client/driver/raw_exec_test.go
Normal file
@@ -0,0 +1,327 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/environment"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
func TestRawExecDriver_Fingerprint(t *testing.T) {
|
||||
d := NewRawExecDriver(testDriverContext(""))
|
||||
node := &structs.Node{
|
||||
Attributes: make(map[string]string),
|
||||
}
|
||||
|
||||
// Disable raw exec.
|
||||
cfg := &config.Config{Options: map[string]string{rawExecConfigOption: "false"}}
|
||||
|
||||
apply, err := d.Fingerprint(cfg, node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if apply {
|
||||
t.Fatalf("should not apply")
|
||||
}
|
||||
if node.Attributes["driver.raw_exec"] != "" {
|
||||
t.Fatalf("driver incorrectly enabled")
|
||||
}
|
||||
|
||||
// Enable raw exec.
|
||||
cfg.Options[rawExecConfigOption] = "true"
|
||||
apply, err = d.Fingerprint(cfg, node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !apply {
|
||||
t.Fatalf("should apply")
|
||||
}
|
||||
if node.Attributes["driver.raw_exec"] != "1" {
|
||||
t.Fatalf("driver not enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRawExecDriver_StartOpen_Wait(t *testing.T) {
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"command": "/bin/sleep",
|
||||
"args": "1",
|
||||
},
|
||||
}
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
d := NewRawExecDriver(driverCtx)
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Attempt to open
|
||||
handle2, err := d.Open(ctx, handle.ID())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle2 == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case <-handle2.WaitCh():
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
|
||||
// Check they are both tracking the same PID.
|
||||
pid1 := handle.(*rawExecHandle).proc.Pid
|
||||
pid2 := handle2.(*rawExecHandle).proc.Pid
|
||||
if pid1 != pid2 {
|
||||
t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRawExecDriver_Start_Artifact_basic(t *testing.T) {
|
||||
var file string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
file = "hi_darwin_amd64"
|
||||
default:
|
||||
file = "hi_linux_amd64"
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
|
||||
"command": filepath.Join("$NOMAD_TASK_DIR", file),
|
||||
},
|
||||
}
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
d := NewRawExecDriver(driverCtx)
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Attempt to open
|
||||
handle2, err := d.Open(ctx, handle.ID())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle2 == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case <-handle2.WaitCh():
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
|
||||
// Check they are both tracking the same PID.
|
||||
pid1 := handle.(*rawExecHandle).proc.Pid
|
||||
pid2 := handle2.(*rawExecHandle).proc.Pid
|
||||
if pid1 != pid2 {
|
||||
t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRawExecDriver_Start_Artifact_expanded(t *testing.T) {
|
||||
var file string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
file = "hi_darwin_amd64"
|
||||
default:
|
||||
file = "hi_linux_amd64"
|
||||
}
|
||||
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"artifact_source": fmt.Sprintf("https://dl.dropboxusercontent.com/u/47675/jar_thing/%s", file),
|
||||
"command": "/bin/bash",
|
||||
"args": fmt.Sprintf("-c '/bin/sleep 1 && %s'", filepath.Join("$NOMAD_TASK_DIR", file)),
|
||||
},
|
||||
}
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
d := NewRawExecDriver(driverCtx)
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Attempt to open
|
||||
handle2, err := d.Open(ctx, handle.ID())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle2 == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case <-handle2.WaitCh():
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
|
||||
// Check they are both tracking the same PID.
|
||||
pid1 := handle.(*rawExecHandle).proc.Pid
|
||||
pid2 := handle2.(*rawExecHandle).proc.Pid
|
||||
if pid1 != pid2 {
|
||||
t.Fatalf("tracking incorrect Pid; %v != %v", pid1, pid2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRawExecDriver_Start_Wait(t *testing.T) {
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"command": "/bin/sleep",
|
||||
"args": "1",
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
d := NewRawExecDriver(driverCtx)
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Update should be a no-op
|
||||
err = handle.Update(task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRawExecDriver_Start_Wait_AllocDir(t *testing.T) {
|
||||
exp := []byte{'w', 'i', 'n'}
|
||||
file := "output.txt"
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"command": "/bin/bash",
|
||||
"args": fmt.Sprintf(`-c "sleep 1; echo -n %s > $%s/%s"`, string(exp), environment.AllocDir, file),
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
d := NewRawExecDriver(driverCtx)
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
|
||||
// Check that data was written to the shared alloc directory.
|
||||
outputFile := filepath.Join(ctx.AllocDir.SharedDir, file)
|
||||
act, err := ioutil.ReadFile(outputFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't read expected output: %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(act, exp) {
|
||||
t.Fatalf("Command outputted %v; want %v", act, exp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRawExecDriver_Start_Kill_Wait(t *testing.T) {
|
||||
task := &structs.Task{
|
||||
Name: "sleep",
|
||||
Config: map[string]string{
|
||||
"command": "/bin/sleep",
|
||||
"args": "1",
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
d := NewRawExecDriver(driverCtx)
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
|
||||
go func() {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
err := handle.Kill()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Task should terminate quickly
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err == nil {
|
||||
t.Fatal("should err")
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
@@ -7,19 +7,22 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/args"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
var (
|
||||
reRktVersion = regexp.MustCompile("rkt version ([\\d\\.]+).+")
|
||||
reAppcVersion = regexp.MustCompile("appc version ([\\d\\.]+).+")
|
||||
reRktVersion = regexp.MustCompile(`rkt version (\d[.\d]+)`)
|
||||
reAppcVersion = regexp.MustCompile(`appc version (\d[.\d]+)`)
|
||||
)
|
||||
|
||||
// RktDriver is a driver for running images via Rkt
|
||||
@@ -32,7 +35,7 @@ type RktDriver struct {
|
||||
// rktHandle is returned from Start/Open as a handle to the PID
|
||||
type rktHandle struct {
|
||||
proc *os.Process
|
||||
name string
|
||||
image string
|
||||
logger *log.Logger
|
||||
waitCh chan error
|
||||
doneCh chan struct{}
|
||||
@@ -41,8 +44,8 @@ type rktHandle struct {
|
||||
// rktPID is a struct to map the pid running the process to the vm image on
|
||||
// disk
|
||||
type rktPID struct {
|
||||
Pid int
|
||||
Name string
|
||||
Pid int
|
||||
Image string
|
||||
}
|
||||
|
||||
// NewRktDriver is used to create a new exec driver
|
||||
@@ -64,13 +67,13 @@ func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, e
|
||||
out := strings.TrimSpace(string(outBytes))
|
||||
|
||||
rktMatches := reRktVersion.FindStringSubmatch(out)
|
||||
appcMatches := reRktVersion.FindStringSubmatch(out)
|
||||
appcMatches := reAppcVersion.FindStringSubmatch(out)
|
||||
if len(rktMatches) != 2 || len(appcMatches) != 2 {
|
||||
return false, fmt.Errorf("Unable to parse Rkt version string: %#v", rktMatches)
|
||||
}
|
||||
|
||||
node.Attributes["driver.rkt"] = "true"
|
||||
node.Attributes["driver.rkt.version"] = rktMatches[0]
|
||||
node.Attributes["driver.rkt"] = "1"
|
||||
node.Attributes["driver.rkt.version"] = rktMatches[1]
|
||||
node.Attributes["driver.rkt.appc.version"] = appcMatches[1]
|
||||
|
||||
return true, nil
|
||||
@@ -78,61 +81,104 @@ func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, e
|
||||
|
||||
// Run an existing Rkt image.
|
||||
func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
trust_prefix, ok := task.Config["trust_prefix"]
|
||||
if !ok || trust_prefix == "" {
|
||||
return nil, fmt.Errorf("Missing trust prefix for rkt")
|
||||
// Validate that the config is valid.
|
||||
img, ok := task.Config["image"]
|
||||
if !ok || img == "" {
|
||||
return nil, fmt.Errorf("Missing ACI image for rkt")
|
||||
}
|
||||
|
||||
// Get the tasks local directory.
|
||||
taskName := d.DriverContext.taskName
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
taskLocal := filepath.Join(taskDir, allocdir.TaskLocal)
|
||||
|
||||
// Add the given trust prefix
|
||||
var outBuf, errBuf bytes.Buffer
|
||||
cmd := exec.Command("rkt", "trust", fmt.Sprintf("--prefix=%s", trust_prefix))
|
||||
cmd.Stdout = &outBuf
|
||||
cmd.Stderr = &errBuf
|
||||
d.logger.Printf("[DEBUG] driver.rkt: starting rkt command: %q", cmd.Args)
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"Error running rkt: %s\n\nOutput: %s\n\nError: %s",
|
||||
err, outBuf.String(), errBuf.String())
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trust_prefix)
|
||||
|
||||
name, ok := task.Config["name"]
|
||||
if !ok || name == "" {
|
||||
return nil, fmt.Errorf("Missing ACI name for rkt")
|
||||
trust_prefix, trust_cmd := task.Config["trust_prefix"]
|
||||
if trust_cmd {
|
||||
var outBuf, errBuf bytes.Buffer
|
||||
cmd := exec.Command("rkt", "trust", fmt.Sprintf("--prefix=%s", trust_prefix))
|
||||
cmd.Stdout = &outBuf
|
||||
cmd.Stderr = &errBuf
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("Error running rkt trust: %s\n\nOutput: %s\n\nError: %s",
|
||||
err, outBuf.String(), errBuf.String())
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trust_prefix)
|
||||
}
|
||||
|
||||
exec_cmd, ok := task.Config["exec"]
|
||||
if !ok || exec_cmd == "" {
|
||||
d.logger.Printf("[WARN] driver.rkt: could not find a command to execute in the ACI, the default command will be executed")
|
||||
// Build the command.
|
||||
var cmd_args []string
|
||||
|
||||
// Inject the environment variables.
|
||||
envVars := TaskEnvironmentVariables(ctx, task)
|
||||
|
||||
// Clear the task directories as they are not currently supported.
|
||||
envVars.ClearTaskLocalDir()
|
||||
envVars.ClearAllocDir()
|
||||
|
||||
for k, v := range envVars.Map() {
|
||||
cmd_args = append(cmd_args, fmt.Sprintf("--set-env=%v=%v", k, v))
|
||||
}
|
||||
|
||||
// Run the ACI
|
||||
var aoutBuf, aerrBuf bytes.Buffer
|
||||
run_cmd := []string{
|
||||
"rkt",
|
||||
"run",
|
||||
"--mds-register=false",
|
||||
name,
|
||||
// Disble signature verification if the trust command was not run.
|
||||
if !trust_cmd {
|
||||
cmd_args = append(cmd_args, "--insecure-skip-verify")
|
||||
}
|
||||
if exec_cmd != "" {
|
||||
splitted := strings.Fields(exec_cmd)
|
||||
run_cmd = append(run_cmd, "--exec=", splitted[0], "--")
|
||||
run_cmd = append(run_cmd, splitted[1:]...)
|
||||
run_cmd = append(run_cmd, "---")
|
||||
|
||||
// Append the run command.
|
||||
cmd_args = append(cmd_args, "run", "--mds-register=false", img)
|
||||
|
||||
// Check if the user has overriden the exec command.
|
||||
if exec_cmd, ok := task.Config["command"]; ok {
|
||||
cmd_args = append(cmd_args, fmt.Sprintf("--exec=%v", exec_cmd))
|
||||
}
|
||||
acmd := exec.Command(run_cmd[0], run_cmd[1:]...)
|
||||
acmd.Stdout = &aoutBuf
|
||||
acmd.Stderr = &aerrBuf
|
||||
d.logger.Printf("[DEBUG] driver:rkt: starting rkt command: %q", acmd.Args)
|
||||
if err := acmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"Error running rkt: %s\n\nOutput: %s\n\nError: %s",
|
||||
err, aoutBuf.String(), aerrBuf.String())
|
||||
|
||||
// Add user passed arguments.
|
||||
if userArgs, ok := task.Config["args"]; ok {
|
||||
parsed, err := args.ParseAndReplace(userArgs, envVars.Map())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Need to start arguments with "--"
|
||||
if len(parsed) > 0 {
|
||||
cmd_args = append(cmd_args, "--")
|
||||
}
|
||||
|
||||
for _, arg := range parsed {
|
||||
cmd_args = append(cmd_args, fmt.Sprintf("%v", arg))
|
||||
}
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.rkt: started ACI: %q", name)
|
||||
|
||||
// Create files to capture stdin and out.
|
||||
stdoutFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stdout", taskName))
|
||||
stderrFilename := filepath.Join(taskLocal, fmt.Sprintf("%s.stderr", taskName))
|
||||
|
||||
stdo, err := os.OpenFile(stdoutFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to redirect stdout: %v", err)
|
||||
}
|
||||
|
||||
stde, err := os.OpenFile(stderrFilename, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command("rkt", cmd_args...)
|
||||
cmd.Stdout = stdo
|
||||
cmd.Stderr = stde
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("Error running rkt: %v", err)
|
||||
}
|
||||
|
||||
d.logger.Printf("[DEBUG] driver.rkt: started ACI %q with: %v", img, cmd.Args)
|
||||
h := &rktHandle{
|
||||
proc: acmd.Process,
|
||||
name: name,
|
||||
proc: cmd.Process,
|
||||
image: img,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan error, 1),
|
||||
@@ -158,7 +204,7 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
|
||||
// Return a driver handle
|
||||
h := &rktHandle{
|
||||
proc: proc,
|
||||
name: qpid.Name,
|
||||
image: qpid.Image,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan error, 1),
|
||||
@@ -171,8 +217,8 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
|
||||
func (h *rktHandle) ID() string {
|
||||
// Return a handle to the PID
|
||||
pid := &rktPID{
|
||||
Pid: h.proc.Pid,
|
||||
Name: h.name,
|
||||
Pid: h.proc.Pid,
|
||||
Image: h.image,
|
||||
}
|
||||
data, err := json.Marshal(pid)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,26 +2,44 @@ package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
|
||||
ctestutils "github.com/hashicorp/nomad/client/testutil"
|
||||
)
|
||||
|
||||
func TestRktVersionRegex(t *testing.T) {
|
||||
input_rkt := "rkt version 0.8.1"
|
||||
input_appc := "appc version 1.2.0"
|
||||
expected_rkt := "0.8.1"
|
||||
expected_appc := "1.2.0"
|
||||
rktMatches := reRktVersion.FindStringSubmatch(input_rkt)
|
||||
appcMatches := reAppcVersion.FindStringSubmatch(input_appc)
|
||||
if rktMatches[1] != expected_rkt {
|
||||
fmt.Printf("Test failed; got %q; want %q\n", rktMatches[1], expected_rkt)
|
||||
}
|
||||
if appcMatches[1] != expected_appc {
|
||||
fmt.Printf("Test failed; got %q; want %q\n", appcMatches[1], expected_appc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRktDriver_Handle(t *testing.T) {
|
||||
h := &rktHandle{
|
||||
proc: &os.Process{Pid: 123},
|
||||
name: "foo",
|
||||
image: "foo",
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan error, 1),
|
||||
}
|
||||
|
||||
actual := h.ID()
|
||||
expected := `Rkt:{"Pid":123,"Name":"foo"}`
|
||||
expected := `Rkt:{"Pid":123,"Image":"foo"}`
|
||||
if actual != expected {
|
||||
t.Errorf("Expected `%s`, found `%s`", expected, actual)
|
||||
}
|
||||
@@ -41,7 +59,7 @@ func TestRktDriver_Fingerprint(t *testing.T) {
|
||||
if !apply {
|
||||
t.Fatalf("should apply")
|
||||
}
|
||||
if node.Attributes["driver.rkt"] == "" {
|
||||
if node.Attributes["driver.rkt"] != "1" {
|
||||
t.Fatalf("Missing Rkt driver")
|
||||
}
|
||||
if node.Attributes["driver.rkt.version"] == "" {
|
||||
@@ -59,8 +77,8 @@ func TestRktDriver_Start(t *testing.T) {
|
||||
Name: "etcd",
|
||||
Config: map[string]string{
|
||||
"trust_prefix": "coreos.com/etcd",
|
||||
"name": "coreos.com/etcd:v2.0.4",
|
||||
"exec": "/etcd --version",
|
||||
"image": "coreos.com/etcd:v2.0.4",
|
||||
"command": "/etcd",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -98,8 +116,9 @@ func TestRktDriver_Start_Wait(t *testing.T) {
|
||||
Name: "etcd",
|
||||
Config: map[string]string{
|
||||
"trust_prefix": "coreos.com/etcd",
|
||||
"name": "coreos.com/etcd:v2.0.4",
|
||||
"exec": "/etcd --version",
|
||||
"image": "coreos.com/etcd:v2.0.4",
|
||||
"command": "/etcd",
|
||||
"args": "--version",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -132,3 +151,94 @@ func TestRktDriver_Start_Wait(t *testing.T) {
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
|
||||
ctestutils.RktCompatible(t)
|
||||
task := &structs.Task{
|
||||
Name: "etcd",
|
||||
Config: map[string]string{
|
||||
"image": "coreos.com/etcd:v2.0.4",
|
||||
"command": "/etcd",
|
||||
"args": "--version",
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
d := NewRktDriver(driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
defer handle.Kill()
|
||||
|
||||
// Update should be a no-op
|
||||
err = handle.Update(task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRktDriver_Start_Wait_Logs(t *testing.T) {
|
||||
ctestutils.RktCompatible(t)
|
||||
task := &structs.Task{
|
||||
Name: "etcd",
|
||||
Config: map[string]string{
|
||||
"trust_prefix": "coreos.com/etcd",
|
||||
"image": "coreos.com/etcd:v2.0.4",
|
||||
"command": "/etcd",
|
||||
"args": "--version",
|
||||
},
|
||||
}
|
||||
|
||||
driverCtx := testDriverContext(task.Name)
|
||||
ctx := testDriverExecContext(task, driverCtx)
|
||||
d := NewRktDriver(driverCtx)
|
||||
defer ctx.AllocDir.Destroy()
|
||||
|
||||
handle, err := d.Start(ctx, task)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if handle == nil {
|
||||
t.Fatalf("missing handle")
|
||||
}
|
||||
defer handle.Kill()
|
||||
|
||||
select {
|
||||
case err := <-handle.WaitCh():
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[task.Name]
|
||||
if !ok {
|
||||
t.Fatalf("Could not find task directory for task: %v", task)
|
||||
}
|
||||
stdout := filepath.Join(taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", task.Name))
|
||||
data, err := ioutil.ReadFile(stdout)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read tasks stdout: %v", err)
|
||||
}
|
||||
|
||||
if len(data) == 0 {
|
||||
t.Fatal("Task's stdout is empty")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
|
||||
// Mount dev
|
||||
dev := filepath.Join(taskDir, "dev")
|
||||
if err := os.Mkdir(dev, 0777); err != nil {
|
||||
return fmt.Errorf("Mkdir(%v) failed: %v", dev)
|
||||
return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
|
||||
@@ -122,7 +122,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
|
||||
// Mount proc
|
||||
proc := filepath.Join(taskDir, "proc")
|
||||
if err := os.Mkdir(proc, 0777); err != nil {
|
||||
return fmt.Errorf("Mkdir(%v) failed: %v", proc)
|
||||
return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
|
||||
@@ -135,6 +135,7 @@ func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocD
|
||||
return err
|
||||
}
|
||||
env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
|
||||
env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
|
||||
e.Cmd.Env = env.List()
|
||||
|
||||
e.alloc = alloc
|
||||
@@ -195,7 +196,11 @@ func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error {
|
||||
e.groups.MemorySwap = int64(-1)
|
||||
}
|
||||
|
||||
if resources.CPU > 0.0 {
|
||||
if resources.CPU != 0 {
|
||||
if resources.CPU < 2 {
|
||||
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
|
||||
}
|
||||
|
||||
// Set the relative CPU shares for this cgroup.
|
||||
// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
|
||||
// given process will have at least that amount of resources, but likely
|
||||
@@ -261,6 +266,14 @@ func (e *LinuxExecutor) Start() error {
|
||||
return err
|
||||
}
|
||||
|
||||
parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(parsedPath) != 1 {
|
||||
return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
|
||||
}
|
||||
e.cmd.Path = parsedPath[0]
|
||||
|
||||
combined := strings.Join(e.Cmd.Args, " ")
|
||||
parsed, err := args.ParseAndReplace(combined, envVars.Map())
|
||||
if err != nil {
|
||||
@@ -542,6 +555,11 @@ func (e *LinuxExecutor) destroyCgroup() error {
|
||||
multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err))
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err := process.Wait(); err != nil {
|
||||
multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the cgroup.
|
||||
|
||||
@@ -6,8 +6,11 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/driver/args"
|
||||
"github.com/hashicorp/nomad/client/driver/environment"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
@@ -29,11 +32,37 @@ func (e *UniversalExecutor) Limit(resources *structs.Resources) error {
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error {
|
||||
// No-op
|
||||
taskDir, ok := alloc.TaskDirs[taskName]
|
||||
if !ok {
|
||||
return fmt.Errorf("Error finding task dir for (%s)", taskName)
|
||||
}
|
||||
e.Dir = taskDir
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) Start() error {
|
||||
// Parse the commands arguments and replace instances of Nomad environment
|
||||
// variables.
|
||||
envVars, err := environment.ParseFromList(e.cmd.Env)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
parsedPath, err := args.ParseAndReplace(e.cmd.Path, envVars.Map())
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(parsedPath) != 1 {
|
||||
return fmt.Errorf("couldn't properly parse command path: %v", e.cmd.Path)
|
||||
}
|
||||
|
||||
e.cmd.Path = parsedPath[0]
|
||||
combined := strings.Join(e.cmd.Args, " ")
|
||||
parsed, err := args.ParseAndReplace(combined, envVars.Map())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.Cmd.Args = parsed
|
||||
|
||||
// We don't want to call ourself. We want to call Start on our embedded Cmd
|
||||
return e.cmd.Start()
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
@@ -62,12 +63,12 @@ var ec2InstanceSpeedMap = map[string]int{
|
||||
"d2.8xlarge": 10000,
|
||||
}
|
||||
|
||||
// EnvAWSFingerprint is used to fingerprint the CPU
|
||||
// EnvAWSFingerprint is used to fingerprint AWS metadata
|
||||
type EnvAWSFingerprint struct {
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewEnvAWSFingerprint is used to create a CPU fingerprint
|
||||
// NewEnvAWSFingerprint is used to create a fingerprint from AWS metadata
|
||||
func NewEnvAWSFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &EnvAWSFingerprint{logger: logger}
|
||||
return f
|
||||
@@ -93,7 +94,8 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node)
|
||||
|
||||
// assume 2 seconds is enough time for inside AWS network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
keys := []string{
|
||||
@@ -164,7 +166,8 @@ func isAWS() bool {
|
||||
|
||||
// assume 2 seconds is enough time for inside AWS network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
// Query the metadata url for the ami-id, to veryify we're on AWS
|
||||
@@ -207,7 +210,8 @@ func (f *EnvAWSFingerprint) linkSpeed() int {
|
||||
|
||||
// assume 2 seconds is enough time for inside AWS network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
res, err := client.Get(metadataURL + "instance-type")
|
||||
|
||||
231
client/fingerprint/env_gce.go
Normal file
231
client/fingerprint/env_gce.go
Normal file
@@ -0,0 +1,231 @@
|
||||
package fingerprint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// This is where the GCE metadata server normally resides. We hardcode the
|
||||
// "instance" path as well since it's the only one we access here.
|
||||
const DEFAULT_GCE_URL = "http://169.254.169.254/computeMetadata/v1/instance/"
|
||||
|
||||
type GCEMetadataNetworkInterface struct {
|
||||
AccessConfigs []struct {
|
||||
ExternalIp string
|
||||
Type string
|
||||
}
|
||||
ForwardedIps []string
|
||||
Ip string
|
||||
Network string
|
||||
}
|
||||
|
||||
type ReqError struct {
|
||||
StatusCode int
|
||||
}
|
||||
|
||||
func (e ReqError) Error() string {
|
||||
return http.StatusText(e.StatusCode)
|
||||
}
|
||||
|
||||
func lastToken(s string) string {
|
||||
index := strings.LastIndex(s, "/")
|
||||
return s[index+1:]
|
||||
}
|
||||
|
||||
// EnvGCEFingerprint is used to fingerprint GCE metadata
|
||||
type EnvGCEFingerprint struct {
|
||||
client *http.Client
|
||||
logger *log.Logger
|
||||
metadataURL string
|
||||
}
|
||||
|
||||
// NewEnvGCEFingerprint is used to create a fingerprint from GCE metadata
|
||||
func NewEnvGCEFingerprint(logger *log.Logger) Fingerprint {
|
||||
// Read the internal metadata URL from the environment, allowing test files to
|
||||
// provide their own
|
||||
metadataURL := os.Getenv("GCE_ENV_URL")
|
||||
if metadataURL == "" {
|
||||
metadataURL = DEFAULT_GCE_URL
|
||||
}
|
||||
|
||||
// assume 2 seconds is enough time for inside GCE network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
return &EnvGCEFingerprint{
|
||||
client: client,
|
||||
logger: logger,
|
||||
metadataURL: metadataURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error) {
|
||||
reqUrl := f.metadataURL + attribute
|
||||
if recursive {
|
||||
reqUrl = reqUrl + "?recursive=true"
|
||||
}
|
||||
|
||||
parsedUrl, err := url.Parse(reqUrl)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req := &http.Request{
|
||||
Method: "GET",
|
||||
URL: parsedUrl,
|
||||
Header: http.Header{
|
||||
"Metadata-Flavor": []string{"Google"},
|
||||
},
|
||||
}
|
||||
|
||||
res, err := f.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
resp, err := ioutil.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR]: fingerprint.env_gce: Error reading response body for GCE %s", attribute)
|
||||
return "", err
|
||||
}
|
||||
|
||||
if res.StatusCode >= 400 {
|
||||
return "", ReqError{res.StatusCode}
|
||||
}
|
||||
|
||||
return string(resp), nil
|
||||
}
|
||||
|
||||
func checkError(err error, logger *log.Logger, desc string) error {
|
||||
// If it's a URL error, assume we're not actually in an GCE environment.
|
||||
// To the outer layers, this isn't an error so return nil.
|
||||
if _, ok := err.(*url.Error); ok {
|
||||
logger.Printf("[ERR] fingerprint.env_gce: Error querying GCE " + desc + ", skipping")
|
||||
return nil
|
||||
}
|
||||
// Otherwise pass the error through.
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *EnvGCEFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
if !f.isGCE() {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if node.Links == nil {
|
||||
node.Links = make(map[string]string)
|
||||
}
|
||||
|
||||
keys := []string{
|
||||
"hostname",
|
||||
"id",
|
||||
"cpu-platform",
|
||||
"scheduling/automatic-restart",
|
||||
"scheduling/on-host-maintenance",
|
||||
}
|
||||
for _, k := range keys {
|
||||
value, err := f.Get(k, false)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, k)
|
||||
}
|
||||
|
||||
// assume we want blank entries
|
||||
key := strings.Replace(k, "/", ".", -1)
|
||||
node.Attributes["platform.gce."+key] = strings.Trim(string(value), "\n")
|
||||
}
|
||||
|
||||
// These keys need everything before the final slash removed to be usable.
|
||||
keys = []string{
|
||||
"machine-type",
|
||||
"zone",
|
||||
}
|
||||
for _, k := range keys {
|
||||
value, err := f.Get(k, false)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, k)
|
||||
}
|
||||
|
||||
node.Attributes["platform.gce."+k] = strings.Trim(lastToken(value), "\n")
|
||||
}
|
||||
|
||||
// Get internal and external IPs (if they exist)
|
||||
value, err := f.Get("network-interfaces/", true)
|
||||
var interfaces []GCEMetadataNetworkInterface
|
||||
if err := json.Unmarshal([]byte(value), &interfaces); err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding network interface information: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, intf := range interfaces {
|
||||
prefix := "platform.gce.network." + lastToken(intf.Network)
|
||||
node.Attributes[prefix] = "true"
|
||||
node.Attributes[prefix+".ip"] = strings.Trim(intf.Ip, "\n")
|
||||
for index, accessConfig := range intf.AccessConfigs {
|
||||
node.Attributes[prefix+".external-ip."+strconv.Itoa(index)] = accessConfig.ExternalIp
|
||||
}
|
||||
}
|
||||
|
||||
var tagList []string
|
||||
value, err = f.Get("tags", false)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, "tags")
|
||||
}
|
||||
if err := json.Unmarshal([]byte(value), &tagList); err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance tags: %s", err.Error())
|
||||
}
|
||||
for _, tag := range tagList {
|
||||
node.Attributes["platform.gce.tag."+tag] = "true"
|
||||
}
|
||||
|
||||
var attrDict map[string]string
|
||||
value, err = f.Get("attributes/", true)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, "attributes/")
|
||||
}
|
||||
if err := json.Unmarshal([]byte(value), &attrDict); err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance attributes: %s", err.Error())
|
||||
}
|
||||
for k, v := range attrDict {
|
||||
node.Attributes["platform.gce.attr."+k] = strings.Trim(v, "\n")
|
||||
}
|
||||
|
||||
// populate Links
|
||||
node.Links["gce"] = node.Attributes["platform.gce.id"]
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (f *EnvGCEFingerprint) isGCE() bool {
|
||||
// TODO: better way to detect GCE?
|
||||
|
||||
// Query the metadata url for the machine type, to verify we're on GCE
|
||||
machineType, err := f.Get("machine-type", false)
|
||||
if err != nil {
|
||||
if re, ok := err.(ReqError); !ok || re.StatusCode != 404 {
|
||||
// If it wasn't a 404 error, print an error message.
|
||||
f.logger.Printf("[ERR] fingerprint.env_gce: Error querying GCE Metadata URL, skipping")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
match, err := regexp.MatchString("projects/.+/machineTypes/.+", machineType)
|
||||
if !match {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
193
client/fingerprint/env_gce_test.go
Normal file
193
client/fingerprint/env_gce_test.go
Normal file
@@ -0,0 +1,193 @@
|
||||
package fingerprint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
func TestGCEFingerprint_nonGCE(t *testing.T) {
|
||||
os.Setenv("GCE_ENV_URL", "http://127.0.0.1/computeMetadata/v1/instance/")
|
||||
f := NewEnvGCEFingerprint(testLogger())
|
||||
node := &structs.Node{
|
||||
Attributes: make(map[string]string),
|
||||
}
|
||||
|
||||
ok, err := f.Fingerprint(&config.Config{}, node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if ok {
|
||||
t.Fatalf("Should be false without test server")
|
||||
}
|
||||
}
|
||||
|
||||
func testFingerprint_GCE(t *testing.T, withExternalIp bool) {
|
||||
node := &structs.Node{
|
||||
Attributes: make(map[string]string),
|
||||
}
|
||||
|
||||
// configure mock server with fixture routes, data
|
||||
routes := routes{}
|
||||
if err := json.Unmarshal([]byte(GCE_routes), &routes); err != nil {
|
||||
t.Fatalf("Failed to unmarshal JSON in GCE ENV test: %s", err)
|
||||
}
|
||||
networkEndpoint := &endpoint{
|
||||
Uri: "/computeMetadata/v1/instance/network-interfaces/?recursive=true",
|
||||
ContentType: "application/json",
|
||||
}
|
||||
if withExternalIp {
|
||||
networkEndpoint.Body = `[{"accessConfigs":[{"externalIp":"104.44.55.66","type":"ONE_TO_ONE_NAT"},{"externalIp":"104.44.55.67","type":"ONE_TO_ONE_NAT"}],"forwardedIps":[],"ip":"10.240.0.5","network":"projects/555555/networks/default"}]`
|
||||
} else {
|
||||
networkEndpoint.Body = `[{"accessConfigs":[],"forwardedIps":[],"ip":"10.240.0.5","network":"projects/555555/networks/default"}]`
|
||||
}
|
||||
routes.Endpoints = append(routes.Endpoints, networkEndpoint)
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
value, ok := r.Header["Metadata-Flavor"]
|
||||
if !ok {
|
||||
t.Fatal("Metadata-Flavor not present in HTTP request header")
|
||||
}
|
||||
if value[0] != "Google" {
|
||||
t.Fatalf("Expected Metadata-Flavor Google, saw %s", value[0])
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, e := range routes.Endpoints {
|
||||
if r.RequestURI == e.Uri {
|
||||
w.Header().Set("Content-Type", e.ContentType)
|
||||
fmt.Fprintln(w, e.Body)
|
||||
}
|
||||
found = true
|
||||
}
|
||||
|
||||
if !found {
|
||||
w.WriteHeader(404)
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
os.Setenv("GCE_ENV_URL", ts.URL+"/computeMetadata/v1/instance/")
|
||||
f := NewEnvGCEFingerprint(testLogger())
|
||||
|
||||
ok, err := f.Fingerprint(&config.Config{}, node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("should apply")
|
||||
}
|
||||
|
||||
keys := []string{
|
||||
"platform.gce.id",
|
||||
"platform.gce.hostname",
|
||||
"platform.gce.zone",
|
||||
"platform.gce.machine-type",
|
||||
"platform.gce.zone",
|
||||
"platform.gce.tag.abc",
|
||||
"platform.gce.tag.def",
|
||||
"platform.gce.attr.ghi",
|
||||
"platform.gce.attr.jkl",
|
||||
}
|
||||
|
||||
for _, k := range keys {
|
||||
assertNodeAttributeContains(t, node, k)
|
||||
}
|
||||
|
||||
if len(node.Links) == 0 {
|
||||
t.Fatalf("Empty links for Node in GCE Fingerprint test")
|
||||
}
|
||||
|
||||
// Make sure Links contains the GCE ID.
|
||||
for _, k := range []string{"gce"} {
|
||||
assertNodeLinksContains(t, node, k)
|
||||
}
|
||||
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.id", "12345")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.hostname", "instance-1.c.project.internal")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.zone", "us-central1-f")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.machine-type", "n1-standard-1")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.network.default", "true")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.network.default.ip", "10.240.0.5")
|
||||
if withExternalIp {
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.network.default.external-ip.0", "104.44.55.66")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.network.default.external-ip.1", "104.44.55.67")
|
||||
} else if _, ok := node.Attributes["platform.gce.network.default.external-ip.0"]; ok {
|
||||
t.Fatal("platform.gce.network.default.external-ip is set without an external IP")
|
||||
}
|
||||
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.scheduling.automatic-restart", "TRUE")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.scheduling.on-host-maintenance", "MIGRATE")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.cpu-platform", "Intel Ivy Bridge")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.tag.abc", "true")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.tag.def", "true")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.attr.ghi", "111")
|
||||
assertNodeAttributeEquals(t, node, "platform.gce.attr.jkl", "222")
|
||||
}
|
||||
|
||||
const GCE_routes = `
|
||||
{
|
||||
"endpoints": [
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/id",
|
||||
"content-type": "text/plain",
|
||||
"body": "12345"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/hostname",
|
||||
"content-type": "text/plain",
|
||||
"body": "instance-1.c.project.internal"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/zone",
|
||||
"content-type": "text/plain",
|
||||
"body": "projects/555555/zones/us-central1-f"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/machine-type",
|
||||
"content-type": "text/plain",
|
||||
"body": "projects/555555/machineTypes/n1-standard-1"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/tags",
|
||||
"content-type": "application/json",
|
||||
"body": "[\"abc\", \"def\"]"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/attributes/?recursive=true",
|
||||
"content-type": "application/json",
|
||||
"body": "{\"ghi\":\"111\",\"jkl\":\"222\"}"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/scheduling/automatic-restart",
|
||||
"content-type": "text/plain",
|
||||
"body": "TRUE"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/scheduling/on-host-maintenance",
|
||||
"content-type": "text/plain",
|
||||
"body": "MIGRATE"
|
||||
},
|
||||
{
|
||||
"uri": "/computeMetadata/v1/instance/cpu-platform",
|
||||
"content-type": "text/plain",
|
||||
"body": "Intel Ivy Bridge"
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
func TestFingerprint_GCEWithExternalIp(t *testing.T) {
|
||||
testFingerprint_GCE(t, true)
|
||||
}
|
||||
|
||||
func TestFingerprint_GCEWithoutExternalIp(t *testing.T) {
|
||||
testFingerprint_GCE(t, false)
|
||||
}
|
||||
@@ -18,6 +18,7 @@ var BuiltinFingerprints = []string{
|
||||
"storage",
|
||||
"network",
|
||||
"env_aws",
|
||||
"env_gce",
|
||||
}
|
||||
|
||||
// builtinFingerprintMap contains the built in registered fingerprints
|
||||
@@ -30,6 +31,7 @@ var builtinFingerprintMap = map[string]Factory{
|
||||
"storage": NewStorageFingerprint,
|
||||
"network": NewNetworkFingerprinter,
|
||||
"env_aws": NewEnvAWSFingerprint,
|
||||
"env_gce": NewEnvGCEFingerprint,
|
||||
}
|
||||
|
||||
// NewFingerprint is used to instantiate and return a new fingerprint
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"log"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
@@ -40,7 +41,7 @@ func (f *HostFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (b
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Failed to run uname: %s", err)
|
||||
}
|
||||
node.Attributes["kernel.version"] = string(out)
|
||||
node.Attributes["kernel.version"] = strings.Trim(string(out), "\n")
|
||||
}
|
||||
|
||||
node.Attributes["hostname"] = hostInfo.Hostname
|
||||
|
||||
@@ -33,25 +33,36 @@ func NewNetworkFingerprinter(logger *log.Logger) Fingerprint {
|
||||
func (f *NetworkFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// newNetwork is populated and addded to the Nodes resources
|
||||
newNetwork := &structs.NetworkResource{}
|
||||
defaultDevice := ""
|
||||
|
||||
// eth0 is the default device for Linux, and en0 is default for OS X
|
||||
defaultDevice := "eth0"
|
||||
if "darwin" == runtime.GOOS {
|
||||
defaultDevice = "en0"
|
||||
}
|
||||
// User-defined override for the default interface
|
||||
// 1. Use user-defined network device
|
||||
// 2. Use first interface found in the system for non-dev mode. (dev mode uses lo by default.)
|
||||
if cfg.NetworkInterface != "" {
|
||||
defaultDevice = cfg.NetworkInterface
|
||||
} else {
|
||||
|
||||
intfs, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, i := range intfs {
|
||||
if (i.Flags&net.FlagUp != 0) && (i.Flags&(net.FlagLoopback|net.FlagPointToPoint) == 0) {
|
||||
if ip := f.ipAddress(i.Name); ip != "" {
|
||||
defaultDevice = i.Name
|
||||
node.Attributes["network.ip-address"] = ip
|
||||
newNetwork.IP = ip
|
||||
newNetwork.CIDR = newNetwork.IP + "/32"
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
newNetwork.Device = defaultDevice
|
||||
|
||||
if ip := f.ipAddress(defaultDevice); ip != "" {
|
||||
node.Attributes["network.ip-address"] = ip
|
||||
newNetwork.IP = ip
|
||||
newNetwork.CIDR = newNetwork.IP + "/32"
|
||||
if defaultDevice != "" {
|
||||
newNetwork.Device = defaultDevice
|
||||
} else {
|
||||
return false, fmt.Errorf("Unable to determine IP on network interface %v", defaultDevice)
|
||||
return false, fmt.Errorf("Unable to find any network interface which has IP address")
|
||||
}
|
||||
|
||||
if throughput := f.linkSpeed(defaultDevice); throughput > 0 {
|
||||
|
||||
@@ -14,8 +14,13 @@ func ExecCompatible(t *testing.T) {
|
||||
}
|
||||
|
||||
func QemuCompatible(t *testing.T) {
|
||||
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
|
||||
t.Skip("Must be root on non-windows environments to run test")
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("Must be on non-windows environments to run test")
|
||||
}
|
||||
// else see if qemu exists
|
||||
_, err := exec.Command("qemu-system-x86_64", "-version").CombinedOutput()
|
||||
if err != nil {
|
||||
t.Skip("Must have Qemu installed for Qemu specific tests to run")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -220,6 +220,7 @@ func DevConfig() *Config {
|
||||
conf.DevMode = true
|
||||
conf.EnableDebug = true
|
||||
conf.DisableAnonymousSignature = true
|
||||
conf.Client.NetworkInterface = "lo"
|
||||
return conf
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -103,6 +104,20 @@ func (c *NodeStatusCommand) Run(args []string) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
m := node.Attributes
|
||||
keys := make([]string, len(m))
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
var attributes []string
|
||||
for _, k := range keys {
|
||||
if k != "" {
|
||||
attributes = append(attributes, fmt.Sprintf("%s:%s", k, m[k]))
|
||||
}
|
||||
}
|
||||
|
||||
// Format the output
|
||||
basic := []string{
|
||||
fmt.Sprintf("ID|%s", node.ID),
|
||||
@@ -111,6 +126,7 @@ func (c *NodeStatusCommand) Run(args []string) int {
|
||||
fmt.Sprintf("Datacenter|%s", node.Datacenter),
|
||||
fmt.Sprintf("Drain|%v", node.Drain),
|
||||
fmt.Sprintf("Status|%s", node.Status),
|
||||
fmt.Sprintf("Attributes|%s", strings.Join(attributes, ", ")),
|
||||
}
|
||||
|
||||
var allocs []string
|
||||
|
||||
@@ -2,6 +2,7 @@ package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
@@ -68,6 +69,9 @@ func (c *ServerMembersCommand) Run(args []string) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Sort the members
|
||||
sort.Sort(api.AgentMembersNameSort(mem))
|
||||
|
||||
// Format the list
|
||||
var out []string
|
||||
if detailed {
|
||||
|
||||
2
demo/vagrant/Vagrantfile
vendored
2
demo/vagrant/Vagrantfile
vendored
@@ -4,7 +4,7 @@
|
||||
$script = <<SCRIPT
|
||||
# Update apt and get dependencies
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y unzip curl wget
|
||||
sudo apt-get install -y unzip curl wget vim
|
||||
|
||||
# Download Nomad
|
||||
echo Fetching Nomad...
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -242,6 +243,34 @@ func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error {
|
||||
m["hard"] = true
|
||||
}
|
||||
|
||||
// If "version" is provided, set the operand
|
||||
// to "version" and the value to the "RTarget"
|
||||
if constraint, ok := m[structs.ConstraintVersion]; ok {
|
||||
m["Operand"] = structs.ConstraintVersion
|
||||
m["RTarget"] = constraint
|
||||
}
|
||||
|
||||
// If "regexp" is provided, set the operand
|
||||
// to "regexp" and the value to the "RTarget"
|
||||
if constraint, ok := m[structs.ConstraintRegex]; ok {
|
||||
m["Operand"] = structs.ConstraintRegex
|
||||
m["RTarget"] = constraint
|
||||
}
|
||||
|
||||
if value, ok := m[structs.ConstraintDistinctHosts]; ok {
|
||||
enabled, err := strconv.ParseBool(value.(string))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If it is not enabled, skip the constraint.
|
||||
if !enabled {
|
||||
continue
|
||||
}
|
||||
|
||||
m["Operand"] = structs.ConstraintDistinctHosts
|
||||
}
|
||||
|
||||
// Build the constraint
|
||||
var c structs.Constraint
|
||||
if err := mapstructure.WeakDecode(m, &c); err != nil {
|
||||
|
||||
@@ -152,6 +152,64 @@ func TestParse(t *testing.T) {
|
||||
false,
|
||||
},
|
||||
|
||||
{
|
||||
"version-constraint.hcl",
|
||||
&structs.Job{
|
||||
ID: "foo",
|
||||
Name: "foo",
|
||||
Priority: 50,
|
||||
Region: "global",
|
||||
Type: "service",
|
||||
Constraints: []*structs.Constraint{
|
||||
&structs.Constraint{
|
||||
Hard: true,
|
||||
LTarget: "$attr.kernel.version",
|
||||
RTarget: "~> 3.2",
|
||||
Operand: structs.ConstraintVersion,
|
||||
},
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
|
||||
{
|
||||
"regexp-constraint.hcl",
|
||||
&structs.Job{
|
||||
ID: "foo",
|
||||
Name: "foo",
|
||||
Priority: 50,
|
||||
Region: "global",
|
||||
Type: "service",
|
||||
Constraints: []*structs.Constraint{
|
||||
&structs.Constraint{
|
||||
Hard: true,
|
||||
LTarget: "$attr.kernel.version",
|
||||
RTarget: "[0-9.]+",
|
||||
Operand: structs.ConstraintRegex,
|
||||
},
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
|
||||
{
|
||||
"distinctHosts-constraint.hcl",
|
||||
&structs.Job{
|
||||
ID: "foo",
|
||||
Name: "foo",
|
||||
Priority: 50,
|
||||
Region: "global",
|
||||
Type: "service",
|
||||
Constraints: []*structs.Constraint{
|
||||
&structs.Constraint{
|
||||
Hard: true,
|
||||
Operand: structs.ConstraintDistinctHosts,
|
||||
},
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
|
||||
{
|
||||
"specify-job.hcl",
|
||||
&structs.Job{
|
||||
|
||||
5
jobspec/test-fixtures/distinctHosts-constraint.hcl
Normal file
5
jobspec/test-fixtures/distinctHosts-constraint.hcl
Normal file
@@ -0,0 +1,5 @@
|
||||
job "foo" {
|
||||
constraint {
|
||||
distinct_hosts = "true"
|
||||
}
|
||||
}
|
||||
6
jobspec/test-fixtures/regexp-constraint.hcl
Normal file
6
jobspec/test-fixtures/regexp-constraint.hcl
Normal file
@@ -0,0 +1,6 @@
|
||||
job "foo" {
|
||||
constraint {
|
||||
attribute = "$attr.kernel.version"
|
||||
regexp = "[0-9.]+"
|
||||
}
|
||||
}
|
||||
6
jobspec/test-fixtures/version-constraint.hcl
Normal file
6
jobspec/test-fixtures/version-constraint.hcl
Normal file
@@ -0,0 +1,6 @@
|
||||
job "foo" {
|
||||
constraint {
|
||||
attribute = "$attr.kernel.version"
|
||||
version = "~> 3.2"
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package nomad
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
@@ -18,6 +19,17 @@ const (
|
||||
failedQueue = "_failed"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotOutstanding is returned if an evaluation is not outstanding
|
||||
ErrNotOutstanding = errors.New("evaluation is not outstanding")
|
||||
|
||||
// ErrTokenMismatch is the outstanding eval has a different token
|
||||
ErrTokenMismatch = errors.New("evaluation token does not match")
|
||||
|
||||
// ErrNackTimeoutReached is returned if an expired evaluation is reset
|
||||
ErrNackTimeoutReached = errors.New("evaluation nack timeout reached")
|
||||
)
|
||||
|
||||
// EvalBroker is used to manage brokering of evaluations. When an evaluation is
|
||||
// created, due to a change in a job specification or a node, we put it into the
|
||||
// broker. The broker sorts by evaluations by priority and scheduler type. This
|
||||
@@ -381,6 +393,24 @@ func (b *EvalBroker) Outstanding(evalID string) (string, bool) {
|
||||
return unack.Token, true
|
||||
}
|
||||
|
||||
// OutstandingReset resets the Nack timer for the EvalID if the
|
||||
// token matches and the eval is outstanding
|
||||
func (b *EvalBroker) OutstandingReset(evalID, token string) error {
|
||||
b.l.RLock()
|
||||
defer b.l.RUnlock()
|
||||
unack, ok := b.unack[evalID]
|
||||
if !ok {
|
||||
return ErrNotOutstanding
|
||||
}
|
||||
if unack.Token != token {
|
||||
return ErrTokenMismatch
|
||||
}
|
||||
if !unack.NackTimer.Reset(b.nackTimeout) {
|
||||
return ErrNackTimeoutReached
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Ack is used to positively acknowledge handling an evaluation
|
||||
func (b *EvalBroker) Ack(evalID, token string) error {
|
||||
b.l.Lock()
|
||||
|
||||
@@ -89,6 +89,20 @@ func TestEvalBroker_Enqueue_Dequeue_Nack_Ack(t *testing.T) {
|
||||
t.Fatalf("Bad: %#v %#v", token, tokenOut)
|
||||
}
|
||||
|
||||
// OutstandingReset should verify the token
|
||||
err = b.OutstandingReset("nope", "foo")
|
||||
if err != ErrNotOutstanding {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = b.OutstandingReset(out.ID, "foo")
|
||||
if err != ErrTokenMismatch {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
err = b.OutstandingReset(out.ID, tokenOut)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Check the stats
|
||||
stats = b.Stats()
|
||||
if stats.TotalReady != 0 {
|
||||
@@ -560,6 +574,50 @@ func TestEvalBroker_Nack_Timeout(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure we nack in a timely manner
|
||||
func TestEvalBroker_Nack_TimeoutReset(t *testing.T) {
|
||||
b := testBroker(t, 5*time.Millisecond)
|
||||
b.SetEnabled(true)
|
||||
|
||||
// Enqueue
|
||||
eval := mock.Eval()
|
||||
err := b.Enqueue(eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Dequeue
|
||||
out, token, err := b.Dequeue(defaultSched, time.Second)
|
||||
start := time.Now()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if out != eval {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
|
||||
// Reset in 2 milliseconds
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
if err := b.OutstandingReset(out.ID, token); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Dequeue, should block on Nack timer
|
||||
out, _, err = b.Dequeue(defaultSched, time.Second)
|
||||
end := time.Now()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if out != eval {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
|
||||
// Check the nack timer
|
||||
if diff := end.Sub(start); diff < 7*time.Millisecond {
|
||||
t.Fatalf("bad: %#v", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvalBroker_DeliveryLimit(t *testing.T) {
|
||||
b := testBroker(t, 0)
|
||||
b.SetEnabled(true)
|
||||
|
||||
@@ -134,12 +134,8 @@ func (e *Eval) Update(args *structs.EvalUpdateRequest,
|
||||
eval := args.Evals[0]
|
||||
|
||||
// Verify the evaluation is outstanding, and that the tokens match.
|
||||
token, ok := e.srv.evalBroker.Outstanding(eval.ID)
|
||||
if !ok {
|
||||
return fmt.Errorf("evaluation is not outstanding")
|
||||
}
|
||||
if args.EvalToken != token {
|
||||
return fmt.Errorf("evaluation token does not match")
|
||||
if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Update via Raft
|
||||
@@ -168,12 +164,8 @@ func (e *Eval) Create(args *structs.EvalUpdateRequest,
|
||||
eval := args.Evals[0]
|
||||
|
||||
// Verify the parent evaluation is outstanding, and that the tokens match.
|
||||
token, ok := e.srv.evalBroker.Outstanding(eval.PreviousEval)
|
||||
if !ok {
|
||||
return fmt.Errorf("previous evaluation is not outstanding")
|
||||
}
|
||||
if args.EvalToken != token {
|
||||
return fmt.Errorf("previous evaluation token does not match")
|
||||
if err := e.srv.evalBroker.OutstandingReset(eval.PreviousEval, args.EvalToken); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Look for the eval
|
||||
|
||||
@@ -224,7 +224,9 @@ func TestEvalEndpoint_Update(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestEvalEndpoint_Create(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0 // Prevent automatic dequeue
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
|
||||
|
||||
@@ -11,7 +11,9 @@ import (
|
||||
)
|
||||
|
||||
func TestJobEndpoint_Register(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0 // Prevent automatic dequeue
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
@@ -78,7 +80,9 @@ func TestJobEndpoint_Register(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestJobEndpoint_Register_Existing(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0 // Prevent automatic dequeue
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
@@ -162,7 +166,9 @@ func TestJobEndpoint_Register_Existing(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestJobEndpoint_Evaluate(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0 // Prevent automatic dequeue
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
@@ -231,7 +237,9 @@ func TestJobEndpoint_Evaluate(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestJobEndpoint_Deregister(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0 // Prevent automatic dequeue
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
@@ -228,6 +228,7 @@ func TestLeader_EvalBroker_Reset(t *testing.T) {
|
||||
defer s3.Shutdown()
|
||||
servers := []*Server{s1, s2, s3}
|
||||
testJoin(t, s1, s2, s3)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
for _, s := range servers {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
|
||||
@@ -80,6 +80,9 @@ func Job() *structs.Job {
|
||||
"command": "/bin/date",
|
||||
"args": "+%s",
|
||||
},
|
||||
Env: map[string]string{
|
||||
"FOO": "bar",
|
||||
},
|
||||
Resources: &structs.Resources{
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
@@ -109,6 +112,59 @@ func Job() *structs.Job {
|
||||
return job
|
||||
}
|
||||
|
||||
func SystemJob() *structs.Job {
|
||||
job := &structs.Job{
|
||||
Region: "global",
|
||||
ID: structs.GenerateUUID(),
|
||||
Name: "my-job",
|
||||
Type: structs.JobTypeSystem,
|
||||
Priority: 100,
|
||||
AllAtOnce: false,
|
||||
Datacenters: []string{"dc1"},
|
||||
Constraints: []*structs.Constraint{
|
||||
&structs.Constraint{
|
||||
Hard: true,
|
||||
LTarget: "$attr.kernel.name",
|
||||
RTarget: "linux",
|
||||
Operand: "=",
|
||||
},
|
||||
},
|
||||
TaskGroups: []*structs.TaskGroup{
|
||||
&structs.TaskGroup{
|
||||
Name: "web",
|
||||
Count: 1,
|
||||
Tasks: []*structs.Task{
|
||||
&structs.Task{
|
||||
Name: "web",
|
||||
Driver: "exec",
|
||||
Config: map[string]string{
|
||||
"command": "/bin/date",
|
||||
"args": "+%s",
|
||||
},
|
||||
Resources: &structs.Resources{
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
Networks: []*structs.NetworkResource{
|
||||
&structs.NetworkResource{
|
||||
MBits: 50,
|
||||
DynamicPorts: []string{"http"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Meta: map[string]string{
|
||||
"owner": "armon",
|
||||
},
|
||||
Status: structs.JobStatusPending,
|
||||
CreateIndex: 42,
|
||||
ModifyIndex: 99,
|
||||
}
|
||||
return job
|
||||
}
|
||||
|
||||
func Eval() *structs.Evaluation {
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
|
||||
@@ -154,7 +154,10 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct
|
||||
}
|
||||
|
||||
// Check if we should trigger evaluations
|
||||
if structs.ShouldDrainNode(args.Status) {
|
||||
initToReady := node.Status == structs.NodeStatusInit && args.Status == structs.NodeStatusReady
|
||||
terminalToReady := node.Status == structs.NodeStatusDown && args.Status == structs.NodeStatusReady
|
||||
transitionToReady := initToReady || terminalToReady
|
||||
if structs.ShouldDrainNode(args.Status) || transitionToReady {
|
||||
evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
|
||||
if err != nil {
|
||||
n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
|
||||
@@ -271,7 +274,7 @@ func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUp
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetNode is used to request information about a specific ndoe
|
||||
// GetNode is used to request information about a specific node
|
||||
func (n *Node) GetNode(args *structs.NodeSpecificRequest,
|
||||
reply *structs.SingleNodeResponse) error {
|
||||
if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
|
||||
@@ -312,7 +315,7 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest,
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllocs is used to request allocations for a specific ndoe
|
||||
// GetAllocs is used to request allocations for a specific node
|
||||
func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
|
||||
reply *structs.NodeAllocsResponse) error {
|
||||
if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
|
||||
@@ -447,8 +450,18 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6
|
||||
return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
|
||||
}
|
||||
|
||||
sysJobsIter, err := snap.JobsByScheduler("system")
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
|
||||
}
|
||||
|
||||
var sysJobs []*structs.Job
|
||||
for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
|
||||
sysJobs = append(sysJobs, job.(*structs.Job))
|
||||
}
|
||||
|
||||
// Fast-path if nothing to do
|
||||
if len(allocs) == 0 {
|
||||
if len(allocs) == 0 && len(sysJobs) == 0 {
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
@@ -479,6 +492,29 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6
|
||||
evalIDs = append(evalIDs, eval.ID)
|
||||
}
|
||||
|
||||
// Create an evaluation for each system job.
|
||||
for _, job := range sysJobs {
|
||||
// Still dedup on JobID as the node may already have the system job.
|
||||
if _, ok := jobIDs[job.ID]; ok {
|
||||
continue
|
||||
}
|
||||
jobIDs[job.ID] = struct{}{}
|
||||
|
||||
// Create a new eval
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
Type: job.Type,
|
||||
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
||||
JobID: job.ID,
|
||||
NodeID: nodeID,
|
||||
NodeModifyIndex: nodeIndex,
|
||||
Status: structs.EvalStatusPending,
|
||||
}
|
||||
evals = append(evals, eval)
|
||||
evalIDs = append(evalIDs, eval.ID)
|
||||
}
|
||||
|
||||
// Create the Raft transaction
|
||||
update := &structs.EvalUpdateRequest{
|
||||
Evals: evals,
|
||||
|
||||
@@ -149,6 +149,87 @@ func TestClientEndpoint_UpdateStatus(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientEndpoint_UpdateStatus_GetEvals(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
// Register a system job.
|
||||
job := mock.SystemJob()
|
||||
state := s1.fsm.State()
|
||||
if err := state.UpsertJob(1, job); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Create the register request
|
||||
node := mock.Node()
|
||||
node.Status = structs.NodeStatusInit
|
||||
reg := &structs.NodeRegisterRequest{
|
||||
Node: node,
|
||||
WriteRequest: structs.WriteRequest{Region: "global"},
|
||||
}
|
||||
|
||||
// Fetch the response
|
||||
var resp structs.NodeUpdateResponse
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Check for heartbeat interval
|
||||
ttl := resp.HeartbeatTTL
|
||||
if ttl < s1.config.MinHeartbeatTTL || ttl > 2*s1.config.MinHeartbeatTTL {
|
||||
t.Fatalf("bad: %#v", ttl)
|
||||
}
|
||||
|
||||
// Update the status
|
||||
update := &structs.NodeUpdateStatusRequest{
|
||||
NodeID: node.ID,
|
||||
Status: structs.NodeStatusReady,
|
||||
WriteRequest: structs.WriteRequest{Region: "global"},
|
||||
}
|
||||
var resp2 structs.NodeUpdateResponse
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", update, &resp2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if resp2.Index == 0 {
|
||||
t.Fatalf("bad index: %d", resp2.Index)
|
||||
}
|
||||
|
||||
// Check for an eval caused by the system job.
|
||||
if len(resp2.EvalIDs) != 1 {
|
||||
t.Fatalf("expected one eval; got %#v", resp2.EvalIDs)
|
||||
}
|
||||
|
||||
evalID := resp2.EvalIDs[0]
|
||||
eval, err := state.EvalByID(evalID)
|
||||
if err != nil {
|
||||
t.Fatalf("could not get eval %v", evalID)
|
||||
}
|
||||
|
||||
if eval.Type != "system" {
|
||||
t.Fatalf("unexpected eval type; got %v; want %q", eval.Type, "system")
|
||||
}
|
||||
|
||||
// Check for heartbeat interval
|
||||
ttl = resp2.HeartbeatTTL
|
||||
if ttl < s1.config.MinHeartbeatTTL || ttl > 2*s1.config.MinHeartbeatTTL {
|
||||
t.Fatalf("bad: %#v", ttl)
|
||||
}
|
||||
|
||||
// Check for the node in the FSM
|
||||
out, err := state.NodeByID(node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if out == nil {
|
||||
t.Fatalf("expected node")
|
||||
}
|
||||
if out.ModifyIndex != resp2.Index {
|
||||
t.Fatalf("index mis-match")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientEndpoint_UpdateStatus_HeartbeatOnly(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
@@ -476,8 +557,13 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) {
|
||||
// Inject fake evaluations
|
||||
alloc := mock.Alloc()
|
||||
state := s1.fsm.State()
|
||||
err := state.UpsertAllocs(1, []*structs.Allocation{alloc})
|
||||
if err != nil {
|
||||
if err := state.UpsertAllocs(1, []*structs.Allocation{alloc}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Inject a fake system job.
|
||||
job := mock.SystemJob()
|
||||
if err := state.UpsertJob(1, job); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
@@ -489,47 +575,69 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) {
|
||||
if index == 0 {
|
||||
t.Fatalf("bad: %d", index)
|
||||
}
|
||||
if len(ids) != 1 {
|
||||
if len(ids) != 2 {
|
||||
t.Fatalf("bad: %s", ids)
|
||||
}
|
||||
|
||||
// Lookup the evaluation
|
||||
eval, err := state.EvalByID(ids[0])
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if eval == nil {
|
||||
t.Fatalf("expected eval")
|
||||
}
|
||||
if eval.CreateIndex != index {
|
||||
t.Fatalf("index mis-match")
|
||||
// Lookup the evaluations
|
||||
evalByType := make(map[string]*structs.Evaluation, 2)
|
||||
for _, id := range ids {
|
||||
eval, err := state.EvalByID(id)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if eval == nil {
|
||||
t.Fatalf("expected eval")
|
||||
}
|
||||
|
||||
if old, ok := evalByType[eval.Type]; ok {
|
||||
t.Fatalf("multiple evals of the same type: %v and %v", old, eval)
|
||||
}
|
||||
|
||||
evalByType[eval.Type] = eval
|
||||
}
|
||||
|
||||
if eval.Priority != alloc.Job.Priority {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
if len(evalByType) != 2 {
|
||||
t.Fatalf("Expected a service and system job; got %#v", evalByType)
|
||||
}
|
||||
if eval.Type != alloc.Job.Type {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
}
|
||||
if eval.TriggeredBy != structs.EvalTriggerNodeUpdate {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
}
|
||||
if eval.JobID != alloc.JobID {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
}
|
||||
if eval.NodeID != alloc.NodeID {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
}
|
||||
if eval.NodeModifyIndex != 1 {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
}
|
||||
if eval.Status != structs.EvalStatusPending {
|
||||
t.Fatalf("bad: %#v", eval)
|
||||
|
||||
// Ensure the evals are correct.
|
||||
for schedType, eval := range evalByType {
|
||||
expPriority := alloc.Job.Priority
|
||||
expJobID := alloc.JobID
|
||||
if schedType == "system" {
|
||||
expPriority = job.Priority
|
||||
expJobID = job.ID
|
||||
}
|
||||
|
||||
if eval.CreateIndex != index {
|
||||
t.Fatalf("CreateIndex mis-match on type %v: %#v", schedType, eval)
|
||||
}
|
||||
if eval.TriggeredBy != structs.EvalTriggerNodeUpdate {
|
||||
t.Fatalf("TriggeredBy incorrect on type %v: %#v", schedType, eval)
|
||||
}
|
||||
if eval.NodeID != alloc.NodeID {
|
||||
t.Fatalf("NodeID incorrect on type %v: %#v", schedType, eval)
|
||||
}
|
||||
if eval.NodeModifyIndex != 1 {
|
||||
t.Fatalf("NodeModifyIndex incorrect on type %v: %#v", schedType, eval)
|
||||
}
|
||||
if eval.Status != structs.EvalStatusPending {
|
||||
t.Fatalf("Status incorrect on type %v: %#v", schedType, eval)
|
||||
}
|
||||
if eval.Priority != expPriority {
|
||||
t.Fatalf("Priority incorrect on type %v: %#v", schedType, eval)
|
||||
}
|
||||
if eval.JobID != expJobID {
|
||||
t.Fatalf("JobID incorrect on type %v: %#v", schedType, eval)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientEndpoint_Evaluate(t *testing.T) {
|
||||
s1 := testServer(t, nil)
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0 // Prevent automatic dequeue
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
@@ -7,12 +7,41 @@ import (
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
// planApply is a long lived goroutine that reads plan allocations from
|
||||
// the plan queue, determines if they can be applied safely and applies
|
||||
// them via Raft.
|
||||
//
|
||||
// Naively, we could simply dequeue a plan, verify, apply and then respond.
|
||||
// However, the plan application is bounded by the Raft apply time and
|
||||
// subject to some latency. This creates a stall condition, where we are
|
||||
// not evaluating, but simply waiting for a transaction to apply.
|
||||
//
|
||||
// To avoid this, we overlap verification with apply. This means once
|
||||
// we've verified plan N we attempt to apply it. However, while waiting
|
||||
// for apply, we begin to verify plan N+1 under the assumption that plan
|
||||
// N has succeeded.
|
||||
//
|
||||
// In this sense, we track two parallel versions of the world. One is
|
||||
// the pessimistic one driven by the Raft log which is replicated. The
|
||||
// other is optimistic and assumes our transactions will succeed. In the
|
||||
// happy path, this lets us do productive work during the latency of
|
||||
// apply.
|
||||
//
|
||||
// In the unhappy path (Raft transaction fails), effectively we only
|
||||
// wasted work during a time we would have been waiting anyways. However,
|
||||
// in anticipation of this case we cannot respond to the plan until
|
||||
// the Raft log is updated. This means our schedulers will stall,
|
||||
// but there are many of those and only a single plan verifier.
|
||||
//
|
||||
func (s *Server) planApply() {
|
||||
// waitCh is used to track an outstanding application while snap
|
||||
// holds an optimistic state which includes that plan application.
|
||||
var waitCh chan struct{}
|
||||
var snap *state.StateSnapshot
|
||||
|
||||
for {
|
||||
// Pull the next pending plan, exit if we are no longer leader
|
||||
pending, err := s.planQueue.Dequeue(0)
|
||||
@@ -21,26 +50,30 @@ func (s *Server) planApply() {
|
||||
}
|
||||
|
||||
// Verify the evaluation is outstanding, and that the tokens match.
|
||||
token, ok := s.evalBroker.Outstanding(pending.plan.EvalID)
|
||||
if !ok {
|
||||
s.logger.Printf("[ERR] nomad: plan received for non-outstanding evaluation %s",
|
||||
pending.plan.EvalID)
|
||||
pending.respond(nil, fmt.Errorf("evaluation is not outstanding"))
|
||||
continue
|
||||
}
|
||||
if pending.plan.EvalToken != token {
|
||||
s.logger.Printf("[ERR] nomad: plan received for evaluation %s with wrong token",
|
||||
pending.plan.EvalID)
|
||||
pending.respond(nil, fmt.Errorf("evaluation token does not match"))
|
||||
if err := s.evalBroker.OutstandingReset(pending.plan.EvalID, pending.plan.EvalToken); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: plan rejected for evaluation %s: %v",
|
||||
pending.plan.EvalID, err)
|
||||
pending.respond(nil, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if out last plan has completed
|
||||
select {
|
||||
case <-waitCh:
|
||||
waitCh = nil
|
||||
snap = nil
|
||||
default:
|
||||
}
|
||||
|
||||
// Snapshot the state so that we have a consistent view of the world
|
||||
snap, err := s.fsm.State().Snapshot()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to snapshot state: %v", err)
|
||||
pending.respond(nil, err)
|
||||
continue
|
||||
// if no snapshot is available
|
||||
if waitCh == nil || snap == nil {
|
||||
snap, err = s.fsm.State().Snapshot()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to snapshot state: %v", err)
|
||||
pending.respond(nil, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Evaluate the plan
|
||||
@@ -51,25 +84,40 @@ func (s *Server) planApply() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Apply the plan if there is anything to do
|
||||
if !result.IsNoOp() {
|
||||
allocIndex, err := s.applyPlan(result)
|
||||
// Fast-path the response if there is nothing to do
|
||||
if result.IsNoOp() {
|
||||
pending.respond(result, nil)
|
||||
continue
|
||||
}
|
||||
|
||||
// Ensure any parallel apply is complete before starting the next one.
|
||||
// This also limits how out of date our snapshot can be.
|
||||
if waitCh != nil {
|
||||
<-waitCh
|
||||
snap, err = s.fsm.State().Snapshot()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to apply plan: %v", err)
|
||||
s.logger.Printf("[ERR] nomad: failed to snapshot state: %v", err)
|
||||
pending.respond(nil, err)
|
||||
continue
|
||||
}
|
||||
result.AllocIndex = allocIndex
|
||||
}
|
||||
|
||||
// Respond to the plan
|
||||
pending.respond(result, nil)
|
||||
// Dispatch the Raft transaction for the plan
|
||||
future, err := s.applyPlan(result, snap)
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to submit plan: %v", err)
|
||||
pending.respond(nil, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Respond to the plan in async
|
||||
waitCh = make(chan struct{})
|
||||
go s.asyncPlanWait(waitCh, future, result, pending)
|
||||
}
|
||||
}
|
||||
|
||||
// applyPlan is used to apply the plan result and to return the alloc index
|
||||
func (s *Server) applyPlan(result *structs.PlanResult) (uint64, error) {
|
||||
defer metrics.MeasureSince([]string{"nomad", "plan", "apply"}, time.Now())
|
||||
func (s *Server) applyPlan(result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) {
|
||||
req := structs.AllocUpdateRequest{}
|
||||
for _, updateList := range result.NodeUpdate {
|
||||
req.Alloc = append(req.Alloc, updateList...)
|
||||
@@ -79,8 +127,38 @@ func (s *Server) applyPlan(result *structs.PlanResult) (uint64, error) {
|
||||
}
|
||||
req.Alloc = append(req.Alloc, result.FailedAllocs...)
|
||||
|
||||
_, index, err := s.raftApply(structs.AllocUpdateRequestType, &req)
|
||||
return index, err
|
||||
// Dispatch the Raft transaction
|
||||
future, err := s.raftApplyFuture(structs.AllocUpdateRequestType, &req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Optimistically apply to our state view
|
||||
if snap != nil {
|
||||
nextIdx := s.raft.AppliedIndex() + 1
|
||||
if err := snap.UpsertAllocs(nextIdx, req.Alloc); err != nil {
|
||||
return future, err
|
||||
}
|
||||
}
|
||||
return future, nil
|
||||
}
|
||||
|
||||
// asyncPlanWait is used to apply and respond to a plan async
|
||||
func (s *Server) asyncPlanWait(waitCh chan struct{}, future raft.ApplyFuture,
|
||||
result *structs.PlanResult, pending *pendingPlan) {
|
||||
defer metrics.MeasureSince([]string{"nomad", "plan", "apply"}, time.Now())
|
||||
defer close(waitCh)
|
||||
|
||||
// Wait for the plan to apply
|
||||
if err := future.Error(); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to apply plan: %v", err)
|
||||
pending.respond(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Respond to the plan
|
||||
result.AllocIndex = future.Index()
|
||||
pending.respond(result, nil)
|
||||
}
|
||||
|
||||
// evaluatePlan is used to determine what portions of a plan
|
||||
|
||||
@@ -7,8 +7,17 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
// planWaitFuture is used to wait for the Raft future to complete
|
||||
func planWaitFuture(future raft.ApplyFuture) (uint64, error) {
|
||||
if err := future.Error(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return future.Index(), nil
|
||||
}
|
||||
|
||||
func testRegisterNode(t *testing.T, s *Server, n *structs.Node) {
|
||||
// Create the register request
|
||||
req := &structs.NodeRegisterRequest{
|
||||
@@ -45,8 +54,25 @@ func TestPlanApply_applyPlan(t *testing.T) {
|
||||
FailedAllocs: []*structs.Allocation{allocFail},
|
||||
}
|
||||
|
||||
// Snapshot the state
|
||||
snap, err := s1.State().Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Apply the plan
|
||||
index, err := s1.applyPlan(plan)
|
||||
future, err := s1.applyPlan(plan, snap)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify our optimistic snapshot is updated
|
||||
if out, err := snap.AllocByID(alloc.ID); err != nil || out == nil {
|
||||
t.Fatalf("bad: %v %v", out, err)
|
||||
}
|
||||
|
||||
// Check plan does apply cleanly
|
||||
index, err := planWaitFuture(future)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
@@ -86,8 +112,25 @@ func TestPlanApply_applyPlan(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// Snapshot the state
|
||||
snap, err = s1.State().Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Apply the plan
|
||||
index, err = s1.applyPlan(plan)
|
||||
future, err = s1.applyPlan(plan, snap)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Check that our optimistic view is updated
|
||||
if out, _ := snap.AllocByID(allocEvict.ID); out.DesiredStatus != structs.AllocDesiredStatusEvict {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
// Verify plan applies cleanly
|
||||
index, err = planWaitFuture(future)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
19
nomad/rpc.go
19
nomad/rpc.go
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/hashicorp/net-rpc-msgpackrpc"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/yamux"
|
||||
)
|
||||
|
||||
@@ -225,12 +226,11 @@ func (s *Server) forwardRegion(region, method string, args interface{}, reply in
|
||||
return s.connPool.RPC(region, server.Addr, server.Version, method, args, reply)
|
||||
}
|
||||
|
||||
// raftApply is used to encode a message, run it through raft, and return
|
||||
// the FSM response along with any errors
|
||||
func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) {
|
||||
// raftApplyFuture is used to encode a message, run it through raft, and return the Raft future.
|
||||
func (s *Server) raftApplyFuture(t structs.MessageType, msg interface{}) (raft.ApplyFuture, error) {
|
||||
buf, err := structs.Encode(t, msg)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("Failed to encode request: %v", err)
|
||||
return nil, fmt.Errorf("Failed to encode request: %v", err)
|
||||
}
|
||||
|
||||
// Warn if the command is very large
|
||||
@@ -239,10 +239,19 @@ func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{},
|
||||
}
|
||||
|
||||
future := s.raft.Apply(buf, enqueueLimit)
|
||||
return future, nil
|
||||
}
|
||||
|
||||
// raftApply is used to encode a message, run it through raft, and return
|
||||
// the FSM response along with any errors
|
||||
func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) {
|
||||
future, err := s.raftApplyFuture(t, msg)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if err := future.Error(); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return future.Response(), future.Index(), nil
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,8 @@ func TestRPC_forwardRegion(t *testing.T) {
|
||||
})
|
||||
defer s2.Shutdown()
|
||||
testJoin(t, s1, s2)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
testutil.WaitForLeader(t, s2.RPC)
|
||||
|
||||
var out struct{}
|
||||
err := s1.forwardRegion("region2", "Status.Ping", struct{}{}, &out)
|
||||
|
||||
@@ -44,9 +44,9 @@ func testServer(t *testing.T, cb func(*Config)) *Server {
|
||||
config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond
|
||||
|
||||
// Tighten the Raft timing
|
||||
config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond
|
||||
config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond
|
||||
config.RaftConfig.ElectionTimeout = 40 * time.Millisecond
|
||||
config.RaftConfig.LeaderLeaseTimeout = 50 * time.Millisecond
|
||||
config.RaftConfig.HeartbeatTimeout = 50 * time.Millisecond
|
||||
config.RaftConfig.ElectionTimeout = 50 * time.Millisecond
|
||||
config.RaftTimeout = 500 * time.Millisecond
|
||||
|
||||
// Invoke the callback if any
|
||||
|
||||
@@ -91,6 +91,15 @@ func jobTableSchema() *memdb.TableSchema {
|
||||
Lowercase: true,
|
||||
},
|
||||
},
|
||||
"type": &memdb.IndexSchema{
|
||||
Name: "type",
|
||||
AllowMissing: false,
|
||||
Unique: false,
|
||||
Indexer: &memdb.StringFieldIndex{
|
||||
Field: "Type",
|
||||
Lowercase: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -399,6 +399,19 @@ func (s *StateStore) Jobs() (memdb.ResultIterator, error) {
|
||||
return iter, nil
|
||||
}
|
||||
|
||||
// JobsByScheduler returns an iterator over all the jobs with the specific
|
||||
// scheduler type.
|
||||
func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) {
|
||||
txn := s.db.Txn(false)
|
||||
|
||||
// Return an iterator for jobs with the specific type.
|
||||
iter, err := txn.Get("jobs", "type", schedulerType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return iter, nil
|
||||
}
|
||||
|
||||
// UpsertEvaluation is used to upsert an evaluation
|
||||
func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error {
|
||||
txn := s.db.Txn(true)
|
||||
|
||||
@@ -348,6 +348,73 @@ func TestStateStore_Jobs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateStore_JobsByScheduler(t *testing.T) {
|
||||
state := testStateStore(t)
|
||||
var serviceJobs []*structs.Job
|
||||
var sysJobs []*structs.Job
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
job := mock.Job()
|
||||
serviceJobs = append(serviceJobs, job)
|
||||
|
||||
err := state.UpsertJob(1000+uint64(i), job)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
job := mock.SystemJob()
|
||||
sysJobs = append(sysJobs, job)
|
||||
|
||||
err := state.UpsertJob(2000+uint64(i), job)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
iter, err := state.JobsByScheduler("service")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
var outService []*structs.Job
|
||||
for {
|
||||
raw := iter.Next()
|
||||
if raw == nil {
|
||||
break
|
||||
}
|
||||
outService = append(outService, raw.(*structs.Job))
|
||||
}
|
||||
|
||||
iter, err = state.JobsByScheduler("system")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
var outSystem []*structs.Job
|
||||
for {
|
||||
raw := iter.Next()
|
||||
if raw == nil {
|
||||
break
|
||||
}
|
||||
outSystem = append(outSystem, raw.(*structs.Job))
|
||||
}
|
||||
|
||||
sort.Sort(JobIDSort(serviceJobs))
|
||||
sort.Sort(JobIDSort(sysJobs))
|
||||
sort.Sort(JobIDSort(outService))
|
||||
sort.Sort(JobIDSort(outSystem))
|
||||
|
||||
if !reflect.DeepEqual(serviceJobs, outService) {
|
||||
t.Fatalf("bad: %#v %#v", serviceJobs, outService)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(sysJobs, outSystem) {
|
||||
t.Fatalf("bad: %#v %#v", sysJobs, outSystem)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateStore_RestoreJob(t *testing.T) {
|
||||
state := testStateStore(t)
|
||||
|
||||
|
||||
@@ -4,11 +4,13 @@ import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/go-version"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -697,6 +699,7 @@ const (
|
||||
JobTypeCore = "_core"
|
||||
JobTypeService = "service"
|
||||
JobTypeBatch = "batch"
|
||||
JobTypeSystem = "system"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -809,6 +812,12 @@ func (j *Job) Validate() error {
|
||||
if len(j.TaskGroups) == 0 {
|
||||
mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
|
||||
}
|
||||
for idx, constr := range j.Constraints {
|
||||
if err := constr.Validate(); err != nil {
|
||||
outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
|
||||
mErr.Errors = append(mErr.Errors, outer)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for duplicate task groups
|
||||
taskGroups := make(map[string]int)
|
||||
@@ -820,6 +829,12 @@ func (j *Job) Validate() error {
|
||||
} else {
|
||||
taskGroups[tg.Name] = idx
|
||||
}
|
||||
|
||||
if j.Type == "system" && tg.Count != 1 {
|
||||
mErr.Errors = append(mErr.Errors,
|
||||
fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler",
|
||||
idx+1, tg.Count))
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the task group
|
||||
@@ -918,6 +933,12 @@ func (tg *TaskGroup) Validate() error {
|
||||
if len(tg.Tasks) == 0 {
|
||||
mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
|
||||
}
|
||||
for idx, constr := range tg.Constraints {
|
||||
if err := constr.Validate(); err != nil {
|
||||
outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
|
||||
mErr.Errors = append(mErr.Errors, outer)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for duplicate tasks
|
||||
tasks := make(map[string]int)
|
||||
@@ -997,9 +1018,21 @@ func (t *Task) Validate() error {
|
||||
if t.Resources == nil {
|
||||
mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
|
||||
}
|
||||
for idx, constr := range t.Constraints {
|
||||
if err := constr.Validate(); err != nil {
|
||||
outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
|
||||
mErr.Errors = append(mErr.Errors, outer)
|
||||
}
|
||||
}
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
const (
|
||||
ConstraintDistinctHosts = "distinct_hosts"
|
||||
ConstraintRegex = "regexp"
|
||||
ConstraintVersion = "version"
|
||||
)
|
||||
|
||||
// Constraints are used to restrict placement options in the case of
|
||||
// a hard constraint, and used to prefer a placement in the case of
|
||||
// a soft constraint.
|
||||
@@ -1015,6 +1048,26 @@ func (c *Constraint) String() string {
|
||||
return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
|
||||
}
|
||||
|
||||
func (c *Constraint) Validate() error {
|
||||
var mErr multierror.Error
|
||||
if c.Operand == "" {
|
||||
mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
|
||||
}
|
||||
|
||||
// Perform additional validation based on operand
|
||||
switch c.Operand {
|
||||
case ConstraintRegex:
|
||||
if _, err := regexp.Compile(c.RTarget); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
|
||||
}
|
||||
case ConstraintVersion:
|
||||
if _, err := version.NewConstraint(c.RTarget); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
|
||||
}
|
||||
}
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
const (
|
||||
AllocDesiredStatusRun = "run" // Allocation should run
|
||||
AllocDesiredStatusStop = "stop" // Allocation should stop
|
||||
|
||||
@@ -125,6 +125,43 @@ func TestTask_Validate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestConstraint_Validate(t *testing.T) {
|
||||
c := &Constraint{}
|
||||
err := c.Validate()
|
||||
mErr := err.(*multierror.Error)
|
||||
if !strings.Contains(mErr.Errors[0].Error(), "Missing constraint operand") {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
c = &Constraint{
|
||||
LTarget: "$attr.kernel.name",
|
||||
RTarget: "linux",
|
||||
Operand: "=",
|
||||
}
|
||||
err = c.Validate()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Perform additional regexp validation
|
||||
c.Operand = ConstraintRegex
|
||||
c.RTarget = "(foo"
|
||||
err = c.Validate()
|
||||
mErr = err.(*multierror.Error)
|
||||
if !strings.Contains(mErr.Errors[0].Error(), "missing closing") {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Perform version validation
|
||||
c.Operand = ConstraintVersion
|
||||
c.RTarget = "~> foo"
|
||||
err = c.Validate()
|
||||
mErr = err.(*multierror.Error)
|
||||
if !strings.Contains(mErr.Errors[0].Error(), "Malformed constraint") {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResource_NetIndex(t *testing.T) {
|
||||
r := &Resources{
|
||||
Networks: []*NetworkResource{
|
||||
|
||||
@@ -52,7 +52,12 @@ func TestWorker_dequeueEvaluation(t *testing.T) {
|
||||
|
||||
// Create the evaluation
|
||||
eval1 := mock.Eval()
|
||||
s1.evalBroker.Enqueue(eval1)
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
err := s1.evalBroker.Enqueue(eval1)
|
||||
return err == nil, err
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
|
||||
// Create a worker
|
||||
w := &Worker{srv: s1, logger: s1.logger}
|
||||
@@ -82,7 +87,12 @@ func TestWorker_dequeueEvaluation_paused(t *testing.T) {
|
||||
|
||||
// Create the evaluation
|
||||
eval1 := mock.Eval()
|
||||
s1.evalBroker.Enqueue(eval1)
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
err := s1.evalBroker.Enqueue(eval1)
|
||||
return err == nil, err
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
|
||||
// Create a worker
|
||||
w := &Worker{srv: s1, logger: s1.logger}
|
||||
@@ -153,7 +163,12 @@ func TestWorker_sendAck(t *testing.T) {
|
||||
|
||||
// Create the evaluation
|
||||
eval1 := mock.Eval()
|
||||
s1.evalBroker.Enqueue(eval1)
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
err := s1.evalBroker.Enqueue(eval1)
|
||||
return err == nil, err
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
|
||||
// Create a worker
|
||||
w := &Worker{srv: s1, logger: s1.logger}
|
||||
|
||||
@@ -2,7 +2,9 @@ package scheduler
|
||||
|
||||
import (
|
||||
"log"
|
||||
"regexp"
|
||||
|
||||
"github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
@@ -27,10 +29,36 @@ type Context interface {
|
||||
// which is the existing allocations, removing evictions, and
|
||||
// adding any planned placements.
|
||||
ProposedAllocs(nodeID string) ([]*structs.Allocation, error)
|
||||
|
||||
// RegexpCache is a cache of regular expressions
|
||||
RegexpCache() map[string]*regexp.Regexp
|
||||
|
||||
// ConstraintCache is a cache of version constraints
|
||||
ConstraintCache() map[string]version.Constraints
|
||||
}
|
||||
|
||||
// EvalCache is used to cache certain things during an evaluation
|
||||
type EvalCache struct {
|
||||
reCache map[string]*regexp.Regexp
|
||||
constraintCache map[string]version.Constraints
|
||||
}
|
||||
|
||||
func (e *EvalCache) RegexpCache() map[string]*regexp.Regexp {
|
||||
if e.reCache == nil {
|
||||
e.reCache = make(map[string]*regexp.Regexp)
|
||||
}
|
||||
return e.reCache
|
||||
}
|
||||
func (e *EvalCache) ConstraintCache() map[string]version.Constraints {
|
||||
if e.constraintCache == nil {
|
||||
e.constraintCache = make(map[string]version.Constraints)
|
||||
}
|
||||
return e.constraintCache
|
||||
}
|
||||
|
||||
// EvalContext is a Context used during an Evaluation
|
||||
type EvalContext struct {
|
||||
EvalCache
|
||||
state State
|
||||
plan *structs.Plan
|
||||
logger *log.Logger
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
func testContext(t *testing.T) (*state.StateStore, *EvalContext) {
|
||||
func testContext(t testing.TB) (*state.StateStore, *EvalContext) {
|
||||
state, err := state.NewStateStore(os.Stderr)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
|
||||
@@ -3,8 +3,11 @@ package scheduler
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
@@ -127,14 +130,126 @@ func (iter *DriverIterator) Reset() {
|
||||
func (iter *DriverIterator) hasDrivers(option *structs.Node) bool {
|
||||
for driver := range iter.drivers {
|
||||
driverStr := fmt.Sprintf("driver.%s", driver)
|
||||
_, ok := option.Attributes[driverStr]
|
||||
value, ok := option.Attributes[driverStr]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
enabled, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
iter.ctx.Logger().
|
||||
Printf("[WARN] scheduler.DriverIterator: node %v has invalid driver setting %v: %v",
|
||||
option.ID, driverStr, value)
|
||||
return false
|
||||
}
|
||||
|
||||
if !enabled {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ProposedAllocConstraintIterator is a FeasibleIterator which returns nodes that
|
||||
// match constraints that are not static such as Node attributes but are
|
||||
// effected by proposed alloc placements. Examples are distinct_hosts and
|
||||
// tenancy constraints. This is used to filter on job and task group
|
||||
// constraints.
|
||||
type ProposedAllocConstraintIterator struct {
|
||||
ctx Context
|
||||
source FeasibleIterator
|
||||
tg *structs.TaskGroup
|
||||
job *structs.Job
|
||||
|
||||
// Store whether the Job or TaskGroup has a distinct_hosts constraints so
|
||||
// they don't have to be calculated every time Next() is called.
|
||||
tgDistinctHosts bool
|
||||
jobDistinctHosts bool
|
||||
}
|
||||
|
||||
// NewProposedAllocConstraintIterator creates a ProposedAllocConstraintIterator
|
||||
// from a source.
|
||||
func NewProposedAllocConstraintIterator(ctx Context, source FeasibleIterator) *ProposedAllocConstraintIterator {
|
||||
iter := &ProposedAllocConstraintIterator{
|
||||
ctx: ctx,
|
||||
source: source,
|
||||
}
|
||||
return iter
|
||||
}
|
||||
|
||||
func (iter *ProposedAllocConstraintIterator) SetTaskGroup(tg *structs.TaskGroup) {
|
||||
iter.tg = tg
|
||||
iter.tgDistinctHosts = iter.hasDistinctHostsConstraint(tg.Constraints)
|
||||
}
|
||||
|
||||
func (iter *ProposedAllocConstraintIterator) SetJob(job *structs.Job) {
|
||||
iter.job = job
|
||||
iter.jobDistinctHosts = iter.hasDistinctHostsConstraint(job.Constraints)
|
||||
}
|
||||
|
||||
func (iter *ProposedAllocConstraintIterator) hasDistinctHostsConstraint(constraints []*structs.Constraint) bool {
|
||||
for _, con := range constraints {
|
||||
if con.Operand == structs.ConstraintDistinctHosts {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (iter *ProposedAllocConstraintIterator) Next() *structs.Node {
|
||||
for {
|
||||
// Get the next option from the source
|
||||
option := iter.source.Next()
|
||||
|
||||
// Hot-path if the option is nil or there are no distinct_hosts constraints.
|
||||
if option == nil || !(iter.jobDistinctHosts || iter.tgDistinctHosts) {
|
||||
return option
|
||||
}
|
||||
|
||||
if !iter.satisfiesDistinctHosts(option) {
|
||||
iter.ctx.Metrics().FilterNode(option, structs.ConstraintDistinctHosts)
|
||||
continue
|
||||
}
|
||||
|
||||
return option
|
||||
}
|
||||
}
|
||||
|
||||
// satisfiesDistinctHosts checks if the node satisfies a distinct_hosts
|
||||
// constraint either specified at the job level or the TaskGroup level.
|
||||
func (iter *ProposedAllocConstraintIterator) satisfiesDistinctHosts(option *structs.Node) bool {
|
||||
// Check if there is no constraint set.
|
||||
if !(iter.jobDistinctHosts || iter.tgDistinctHosts) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Get the proposed allocations
|
||||
proposed, err := iter.ctx.ProposedAllocs(option.ID)
|
||||
if err != nil {
|
||||
iter.ctx.Logger().Printf(
|
||||
"[ERR] scheduler.dynamic-constraint: failed to get proposed allocations: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip the node if the task group has already been allocated on it.
|
||||
for _, alloc := range proposed {
|
||||
// If the job has a distinct_hosts constraint we only need an alloc
|
||||
// collision on the JobID but if the constraint is on the TaskGroup then
|
||||
// we need both a job and TaskGroup collision.
|
||||
jobCollision := alloc.JobID == iter.job.ID
|
||||
taskCollision := alloc.TaskGroup == iter.tg.Name
|
||||
if iter.jobDistinctHosts && jobCollision || jobCollision && taskCollision {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (iter *ProposedAllocConstraintIterator) Reset() {
|
||||
iter.source.Reset()
|
||||
}
|
||||
|
||||
// ConstraintIterator is a FeasibleIterator which returns nodes
|
||||
// that match a given set of constraints. This is used to filter
|
||||
// on job, task group, and task constraints.
|
||||
@@ -204,7 +319,7 @@ func (iter *ConstraintIterator) meetsConstraint(constraint *structs.Constraint,
|
||||
}
|
||||
|
||||
// Check if satisfied
|
||||
return checkConstraint(constraint.Operand, lVal, rVal)
|
||||
return checkConstraint(iter.ctx, constraint.Operand, lVal, rVal)
|
||||
}
|
||||
|
||||
// resolveConstraintTarget is used to resolve the LTarget and RTarget of a Constraint
|
||||
@@ -241,19 +356,129 @@ func resolveConstraintTarget(target string, node *structs.Node) (interface{}, bo
|
||||
}
|
||||
|
||||
// checkConstraint checks if a constraint is satisfied
|
||||
func checkConstraint(operand string, lVal, rVal interface{}) bool {
|
||||
func checkConstraint(ctx Context, operand string, lVal, rVal interface{}) bool {
|
||||
// Check for constraints not handled by this iterator.
|
||||
switch operand {
|
||||
case structs.ConstraintDistinctHosts:
|
||||
return true
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
switch operand {
|
||||
case "=", "==", "is":
|
||||
return reflect.DeepEqual(lVal, rVal)
|
||||
case "!=", "not":
|
||||
return !reflect.DeepEqual(lVal, rVal)
|
||||
case "<", "<=", ">", ">=":
|
||||
// TODO: Implement
|
||||
return false
|
||||
case "contains":
|
||||
// TODO: Implement
|
||||
return false
|
||||
return checkLexicalOrder(operand, lVal, rVal)
|
||||
case structs.ConstraintVersion:
|
||||
return checkVersionConstraint(ctx, lVal, rVal)
|
||||
case structs.ConstraintRegex:
|
||||
return checkRegexpConstraint(ctx, lVal, rVal)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// checkLexicalOrder is used to check for lexical ordering
|
||||
func checkLexicalOrder(op string, lVal, rVal interface{}) bool {
|
||||
// Ensure the values are strings
|
||||
lStr, ok := lVal.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
rStr, ok := rVal.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
switch op {
|
||||
case "<":
|
||||
return lStr < rStr
|
||||
case "<=":
|
||||
return lStr <= rStr
|
||||
case ">":
|
||||
return lStr > rStr
|
||||
case ">=":
|
||||
return lStr >= rStr
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// checkVersionConstraint is used to compare a version on the
|
||||
// left hand side with a set of constraints on the right hand side
|
||||
func checkVersionConstraint(ctx Context, lVal, rVal interface{}) bool {
|
||||
// Parse the version
|
||||
var versionStr string
|
||||
switch v := lVal.(type) {
|
||||
case string:
|
||||
versionStr = v
|
||||
case int:
|
||||
versionStr = fmt.Sprintf("%d", v)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
// Parse the verison
|
||||
vers, err := version.NewVersion(versionStr)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Constraint must be a string
|
||||
constraintStr, ok := rVal.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check the cache for a match
|
||||
cache := ctx.ConstraintCache()
|
||||
constraints := cache[constraintStr]
|
||||
|
||||
// Parse the constraints
|
||||
if constraints == nil {
|
||||
constraints, err = version.NewConstraint(constraintStr)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
cache[constraintStr] = constraints
|
||||
}
|
||||
|
||||
// Check the constraints against the version
|
||||
return constraints.Check(vers)
|
||||
}
|
||||
|
||||
// checkRegexpConstraint is used to compare a value on the
|
||||
// left hand side with a regexp on the right hand side
|
||||
func checkRegexpConstraint(ctx Context, lVal, rVal interface{}) bool {
|
||||
// Ensure left-hand is string
|
||||
lStr, ok := lVal.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
// Regexp must be a string
|
||||
regexpStr, ok := rVal.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check the cache
|
||||
cache := ctx.RegexpCache()
|
||||
re := cache[regexpStr]
|
||||
|
||||
// Parse the regexp
|
||||
if re == nil {
|
||||
var err error
|
||||
re, err = regexp.Compile(regexpStr)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
cache[regexpStr] = re
|
||||
}
|
||||
|
||||
// Look for a match
|
||||
return re.MatchString(lStr)
|
||||
}
|
||||
|
||||
@@ -82,11 +82,14 @@ func TestDriverIterator(t *testing.T) {
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
static := NewStaticIterator(ctx, nodes)
|
||||
|
||||
nodes[0].Attributes["driver.foo"] = "2"
|
||||
nodes[2].Attributes["driver.foo"] = "2"
|
||||
nodes[0].Attributes["driver.foo"] = "1"
|
||||
nodes[1].Attributes["driver.foo"] = "0"
|
||||
nodes[2].Attributes["driver.foo"] = "true"
|
||||
nodes[3].Attributes["driver.foo"] = "False"
|
||||
|
||||
drivers := map[string]struct{}{
|
||||
"exec": struct{}{},
|
||||
@@ -244,15 +247,315 @@ func TestCheckConstraint(t *testing.T) {
|
||||
lVal: "foo", rVal: "bar",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
op: structs.ConstraintVersion,
|
||||
lVal: "1.2.3", rVal: "~> 1.0",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
op: structs.ConstraintRegex,
|
||||
lVal: "foobarbaz", rVal: "[\\w]+",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
op: "<",
|
||||
lVal: "foo", rVal: "bar",
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if res := checkConstraint(tc.op, tc.lVal, tc.rVal); res != tc.result {
|
||||
_, ctx := testContext(t)
|
||||
if res := checkConstraint(ctx, tc.op, tc.lVal, tc.rVal); res != tc.result {
|
||||
t.Fatalf("TC: %#v, Result: %v", tc, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckLexicalOrder(t *testing.T) {
|
||||
type tcase struct {
|
||||
op string
|
||||
lVal, rVal interface{}
|
||||
result bool
|
||||
}
|
||||
cases := []tcase{
|
||||
{
|
||||
op: "<",
|
||||
lVal: "bar", rVal: "foo",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
op: "<=",
|
||||
lVal: "foo", rVal: "foo",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
op: ">",
|
||||
lVal: "bar", rVal: "foo",
|
||||
result: false,
|
||||
},
|
||||
{
|
||||
op: ">=",
|
||||
lVal: "bar", rVal: "bar",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
op: ">",
|
||||
lVal: 1, rVal: "foo",
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if res := checkLexicalOrder(tc.op, tc.lVal, tc.rVal); res != tc.result {
|
||||
t.Fatalf("TC: %#v, Result: %v", tc, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckVersionConstraint(t *testing.T) {
|
||||
type tcase struct {
|
||||
lVal, rVal interface{}
|
||||
result bool
|
||||
}
|
||||
cases := []tcase{
|
||||
{
|
||||
lVal: "1.2.3", rVal: "~> 1.0",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
lVal: "1.2.3", rVal: ">= 1.0, < 1.4",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
lVal: "2.0.1", rVal: "~> 1.0",
|
||||
result: false,
|
||||
},
|
||||
{
|
||||
lVal: "1.4", rVal: ">= 1.0, < 1.4",
|
||||
result: false,
|
||||
},
|
||||
{
|
||||
lVal: 1, rVal: "~> 1.0",
|
||||
result: true,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
_, ctx := testContext(t)
|
||||
if res := checkVersionConstraint(ctx, tc.lVal, tc.rVal); res != tc.result {
|
||||
t.Fatalf("TC: %#v, Result: %v", tc, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckRegexpConstraint(t *testing.T) {
|
||||
type tcase struct {
|
||||
lVal, rVal interface{}
|
||||
result bool
|
||||
}
|
||||
cases := []tcase{
|
||||
{
|
||||
lVal: "foobar", rVal: "bar",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
lVal: "foobar", rVal: "^foo",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
lVal: "foobar", rVal: "^bar",
|
||||
result: false,
|
||||
},
|
||||
{
|
||||
lVal: "zipzap", rVal: "foo",
|
||||
result: false,
|
||||
},
|
||||
{
|
||||
lVal: 1, rVal: "foo",
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
_, ctx := testContext(t)
|
||||
if res := checkRegexpConstraint(ctx, tc.lVal, tc.rVal); res != tc.result {
|
||||
t.Fatalf("TC: %#v, Result: %v", tc, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestProposedAllocConstraint_JobDistinctHosts(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
static := NewStaticIterator(ctx, nodes)
|
||||
|
||||
// Create a job with a distinct_hosts constraint and two task groups.
|
||||
tg1 := &structs.TaskGroup{Name: "bar"}
|
||||
tg2 := &structs.TaskGroup{Name: "baz"}
|
||||
|
||||
job := &structs.Job{
|
||||
ID: "foo",
|
||||
Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}},
|
||||
TaskGroups: []*structs.TaskGroup{tg1, tg2},
|
||||
}
|
||||
|
||||
propsed := NewProposedAllocConstraintIterator(ctx, static)
|
||||
propsed.SetTaskGroup(tg1)
|
||||
propsed.SetJob(job)
|
||||
|
||||
out := collectFeasible(propsed)
|
||||
if len(out) != 4 {
|
||||
t.Fatalf("Bad: %#v", out)
|
||||
}
|
||||
|
||||
selected := make(map[string]struct{}, 4)
|
||||
for _, option := range out {
|
||||
if _, ok := selected[option.ID]; ok {
|
||||
t.Fatalf("selected node %v for more than one alloc", option)
|
||||
}
|
||||
selected[option.ID] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func TestProposedAllocConstraint_JobDistinctHosts_Infeasible(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
static := NewStaticIterator(ctx, nodes)
|
||||
|
||||
// Create a job with a distinct_hosts constraint and two task groups.
|
||||
tg1 := &structs.TaskGroup{Name: "bar"}
|
||||
tg2 := &structs.TaskGroup{Name: "baz"}
|
||||
|
||||
job := &structs.Job{
|
||||
ID: "foo",
|
||||
Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}},
|
||||
TaskGroups: []*structs.TaskGroup{tg1, tg2},
|
||||
}
|
||||
|
||||
// Add allocs placing tg1 on node1 and tg2 on node2. This should make the
|
||||
// job unsatisfiable.
|
||||
plan := ctx.Plan()
|
||||
plan.NodeAllocation[nodes[0].ID] = []*structs.Allocation{
|
||||
&structs.Allocation{
|
||||
TaskGroup: tg1.Name,
|
||||
JobID: job.ID,
|
||||
},
|
||||
|
||||
// Should be ignored as it is a different job.
|
||||
&structs.Allocation{
|
||||
TaskGroup: tg2.Name,
|
||||
JobID: "ignore 2",
|
||||
},
|
||||
}
|
||||
plan.NodeAllocation[nodes[1].ID] = []*structs.Allocation{
|
||||
&structs.Allocation{
|
||||
TaskGroup: tg2.Name,
|
||||
JobID: job.ID,
|
||||
},
|
||||
|
||||
// Should be ignored as it is a different job.
|
||||
&structs.Allocation{
|
||||
TaskGroup: tg1.Name,
|
||||
JobID: "ignore 2",
|
||||
},
|
||||
}
|
||||
|
||||
propsed := NewProposedAllocConstraintIterator(ctx, static)
|
||||
propsed.SetTaskGroup(tg1)
|
||||
propsed.SetJob(job)
|
||||
|
||||
out := collectFeasible(propsed)
|
||||
if len(out) != 0 {
|
||||
t.Fatalf("Bad: %#v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProposedAllocConstraint_JobDistinctHosts_InfeasibleCount(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
static := NewStaticIterator(ctx, nodes)
|
||||
|
||||
// Create a job with a distinct_hosts constraint and three task groups.
|
||||
tg1 := &structs.TaskGroup{Name: "bar"}
|
||||
tg2 := &structs.TaskGroup{Name: "baz"}
|
||||
tg3 := &structs.TaskGroup{Name: "bam"}
|
||||
|
||||
job := &structs.Job{
|
||||
ID: "foo",
|
||||
Constraints: []*structs.Constraint{{Operand: structs.ConstraintDistinctHosts}},
|
||||
TaskGroups: []*structs.TaskGroup{tg1, tg2, tg3},
|
||||
}
|
||||
|
||||
propsed := NewProposedAllocConstraintIterator(ctx, static)
|
||||
propsed.SetTaskGroup(tg1)
|
||||
propsed.SetJob(job)
|
||||
|
||||
// It should not be able to place 3 tasks with only two nodes.
|
||||
out := collectFeasible(propsed)
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("Bad: %#v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProposedAllocConstraint_TaskGroupDistinctHosts(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
static := NewStaticIterator(ctx, nodes)
|
||||
|
||||
// Create a task group with a distinct_hosts constraint.
|
||||
taskGroup := &structs.TaskGroup{
|
||||
Name: "example",
|
||||
Constraints: []*structs.Constraint{
|
||||
{Operand: structs.ConstraintDistinctHosts},
|
||||
},
|
||||
}
|
||||
|
||||
// Add a planned alloc to node1.
|
||||
plan := ctx.Plan()
|
||||
plan.NodeAllocation[nodes[0].ID] = []*structs.Allocation{
|
||||
&structs.Allocation{
|
||||
TaskGroup: taskGroup.Name,
|
||||
JobID: "foo",
|
||||
},
|
||||
}
|
||||
|
||||
// Add a planned alloc to node2 with the same task group name but a
|
||||
// different job.
|
||||
plan.NodeAllocation[nodes[1].ID] = []*structs.Allocation{
|
||||
&structs.Allocation{
|
||||
TaskGroup: taskGroup.Name,
|
||||
JobID: "bar",
|
||||
},
|
||||
}
|
||||
|
||||
propsed := NewProposedAllocConstraintIterator(ctx, static)
|
||||
propsed.SetTaskGroup(taskGroup)
|
||||
propsed.SetJob(&structs.Job{ID: "foo"})
|
||||
|
||||
out := collectFeasible(propsed)
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("Bad: %#v", out)
|
||||
}
|
||||
|
||||
// Expect it to skip the first node as there is a previous alloc on it for
|
||||
// the same task group.
|
||||
if out[0] != nodes[1] {
|
||||
t.Fatalf("Bad: %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func collectFeasible(iter FeasibleIterator) (out []*structs.Node) {
|
||||
for {
|
||||
next := iter.Next()
|
||||
|
||||
@@ -82,18 +82,6 @@ func NewBatchScheduler(logger *log.Logger, state State, planner Planner) Schedul
|
||||
return s
|
||||
}
|
||||
|
||||
// setStatus is used to update the status of the evaluation
|
||||
func (s *GenericScheduler) setStatus(status, desc string) error {
|
||||
s.logger.Printf("[DEBUG] sched: %#v: setting status to %s", s.eval, status)
|
||||
newEval := s.eval.Copy()
|
||||
newEval.Status = status
|
||||
newEval.StatusDescription = desc
|
||||
if s.nextEval != nil {
|
||||
newEval.NextEval = s.nextEval.ID
|
||||
}
|
||||
return s.planner.UpdateEval(newEval)
|
||||
}
|
||||
|
||||
// Process is used to handle a single evaluation
|
||||
func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
||||
// Store the evaluation
|
||||
@@ -106,7 +94,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
||||
default:
|
||||
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
|
||||
eval.TriggeredBy)
|
||||
return s.setStatus(structs.EvalStatusFailed, desc)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
|
||||
}
|
||||
|
||||
// Retry up to the maxScheduleAttempts
|
||||
@@ -116,13 +104,13 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
||||
}
|
||||
if err := retryMax(limit, s.process); err != nil {
|
||||
if statusErr, ok := err.(*SetStatusError); ok {
|
||||
return s.setStatus(statusErr.EvalStatus, err.Error())
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Update the status to complete
|
||||
return s.setStatus(structs.EvalStatusComplete, "")
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
|
||||
}
|
||||
|
||||
// process is wrapped in retryMax to iteratively run the handler until we have no
|
||||
@@ -143,7 +131,7 @@ func (s *GenericScheduler) process() (bool, error) {
|
||||
s.ctx = NewEvalContext(s.state, s.plan, s.logger)
|
||||
|
||||
// Construct the placement stack
|
||||
s.stack = NewGenericStack(s.batch, s.ctx, nil)
|
||||
s.stack = NewGenericStack(s.batch, s.ctx)
|
||||
if s.job != nil {
|
||||
s.stack.SetJob(s.job)
|
||||
}
|
||||
@@ -231,7 +219,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
|
||||
}
|
||||
|
||||
// Attempt to do the upgrades in place
|
||||
diff.update = s.inplaceUpdate(diff.update)
|
||||
diff.update = inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update)
|
||||
|
||||
// Check if a rolling upgrade strategy is being used
|
||||
limit := len(diff.update) + len(diff.migrate)
|
||||
@@ -240,10 +228,10 @@ func (s *GenericScheduler) computeJobAllocs() error {
|
||||
}
|
||||
|
||||
// Treat migrations as an eviction and a new placement.
|
||||
s.evictAndPlace(diff, diff.migrate, allocMigrating, &limit)
|
||||
s.limitReached = evictAndPlace(s.ctx, diff, diff.migrate, allocMigrating, &limit)
|
||||
|
||||
// Treat non in-place updates as an eviction and new placement.
|
||||
s.evictAndPlace(diff, diff.update, allocUpdating, &limit)
|
||||
s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit)
|
||||
|
||||
// Nothing remaining to do if placement is not required
|
||||
if len(diff.place) == 0 {
|
||||
@@ -254,101 +242,6 @@ func (s *GenericScheduler) computeJobAllocs() error {
|
||||
return s.computePlacements(diff.place)
|
||||
}
|
||||
|
||||
// evictAndPlace is used to mark allocations for evicts and add them to the placement queue
|
||||
func (s *GenericScheduler) evictAndPlace(diff *diffResult, allocs []allocTuple, desc string, limit *int) {
|
||||
n := len(allocs)
|
||||
for i := 0; i < n && i < *limit; i++ {
|
||||
a := allocs[i]
|
||||
s.plan.AppendUpdate(a.Alloc, structs.AllocDesiredStatusStop, desc)
|
||||
diff.place = append(diff.place, a)
|
||||
}
|
||||
if n <= *limit {
|
||||
*limit -= n
|
||||
} else {
|
||||
*limit = 0
|
||||
s.limitReached = true
|
||||
}
|
||||
}
|
||||
|
||||
// inplaceUpdate attempts to update allocations in-place where possible.
|
||||
func (s *GenericScheduler) inplaceUpdate(updates []allocTuple) []allocTuple {
|
||||
n := len(updates)
|
||||
inplace := 0
|
||||
for i := 0; i < n; i++ {
|
||||
// Get the udpate
|
||||
update := updates[i]
|
||||
|
||||
// Check if the task drivers or config has changed, requires
|
||||
// a rolling upgrade since that cannot be done in-place.
|
||||
existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name)
|
||||
if tasksUpdated(update.TaskGroup, existing) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the existing node
|
||||
node, err := s.state.NodeByID(update.Alloc.NodeID)
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] sched: %#v failed to get node '%s': %v",
|
||||
s.eval, update.Alloc.NodeID, err)
|
||||
continue
|
||||
}
|
||||
if node == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Set the existing node as the base set
|
||||
s.stack.SetNodes([]*structs.Node{node})
|
||||
|
||||
// Stage an eviction of the current allocation
|
||||
s.plan.AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop,
|
||||
allocInPlace)
|
||||
|
||||
// Attempt to match the task group
|
||||
option, size := s.stack.Select(update.TaskGroup)
|
||||
|
||||
// Pop the allocation
|
||||
s.plan.PopUpdate(update.Alloc)
|
||||
|
||||
// Skip if we could not do an in-place update
|
||||
if option == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Restore the network offers from the existing allocation.
|
||||
// We do not allow network resources (reserved/dynamic ports)
|
||||
// to be updated. This is guarded in taskUpdated, so we can
|
||||
// safely restore those here.
|
||||
for task, resources := range option.TaskResources {
|
||||
existing := update.Alloc.TaskResources[task]
|
||||
resources.Networks = existing.Networks
|
||||
}
|
||||
|
||||
// Create a shallow copy
|
||||
newAlloc := new(structs.Allocation)
|
||||
*newAlloc = *update.Alloc
|
||||
|
||||
// Update the allocation
|
||||
newAlloc.EvalID = s.eval.ID
|
||||
newAlloc.Job = s.job
|
||||
newAlloc.Resources = size
|
||||
newAlloc.TaskResources = option.TaskResources
|
||||
newAlloc.Metrics = s.ctx.Metrics()
|
||||
newAlloc.DesiredStatus = structs.AllocDesiredStatusRun
|
||||
newAlloc.ClientStatus = structs.AllocClientStatusPending
|
||||
s.plan.AppendAlloc(newAlloc)
|
||||
|
||||
// Remove this allocation from the slice
|
||||
updates[i] = updates[n-1]
|
||||
i--
|
||||
n--
|
||||
inplace++
|
||||
}
|
||||
if len(updates) > 0 {
|
||||
s.logger.Printf("[DEBUG] sched: %#v: %d in-place updates of %d", s.eval, inplace, len(updates))
|
||||
}
|
||||
return updates[:n]
|
||||
}
|
||||
|
||||
// computePlacements computes placements for allocations
|
||||
func (s *GenericScheduler) computePlacements(place []allocTuple) error {
|
||||
// Get the base nodes
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestServiceSched_JobRegister(t *testing.T) {
|
||||
job := mock.Job()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
// Create a mock evaluation to deregister the job
|
||||
// Create a mock evaluation to register the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
@@ -71,7 +71,7 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
|
||||
job := mock.Job()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
// Create a mock evaluation to deregister the job
|
||||
// Create a mock evaluation to register the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
@@ -550,7 +550,7 @@ func TestServiceSched_RetryLimit(t *testing.T) {
|
||||
job := mock.Job()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
// Create a mock evaluation to deregister the job
|
||||
// Create a mock evaluation to register the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
var BuiltinSchedulers = map[string]Factory{
|
||||
"service": NewServiceScheduler,
|
||||
"batch": NewBatchScheduler,
|
||||
"system": NewSystemScheduler,
|
||||
}
|
||||
|
||||
// NewScheduler is used to instantiate and return a new scheduler
|
||||
|
||||
@@ -35,20 +35,21 @@ type Stack interface {
|
||||
// GenericStack is the Stack used for the Generic scheduler. It is
|
||||
// designed to make better placement decisions at the cost of performance.
|
||||
type GenericStack struct {
|
||||
batch bool
|
||||
ctx Context
|
||||
source *StaticIterator
|
||||
jobConstraint *ConstraintIterator
|
||||
taskGroupDrivers *DriverIterator
|
||||
taskGroupConstraint *ConstraintIterator
|
||||
binPack *BinPackIterator
|
||||
jobAntiAff *JobAntiAffinityIterator
|
||||
limit *LimitIterator
|
||||
maxScore *MaxScoreIterator
|
||||
batch bool
|
||||
ctx Context
|
||||
source *StaticIterator
|
||||
jobConstraint *ConstraintIterator
|
||||
taskGroupDrivers *DriverIterator
|
||||
taskGroupConstraint *ConstraintIterator
|
||||
proposedAllocConstraint *ProposedAllocConstraintIterator
|
||||
binPack *BinPackIterator
|
||||
jobAntiAff *JobAntiAffinityIterator
|
||||
limit *LimitIterator
|
||||
maxScore *MaxScoreIterator
|
||||
}
|
||||
|
||||
// NewGenericStack constructs a stack used for selecting service placements
|
||||
func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *GenericStack {
|
||||
func NewGenericStack(batch bool, ctx Context) *GenericStack {
|
||||
// Create a new stack
|
||||
s := &GenericStack{
|
||||
batch: batch,
|
||||
@@ -58,7 +59,7 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
|
||||
// Create the source iterator. We randomize the order we visit nodes
|
||||
// to reduce collisions between schedulers and to do a basic load
|
||||
// balancing across eligible nodes.
|
||||
s.source = NewRandomIterator(ctx, baseNodes)
|
||||
s.source = NewRandomIterator(ctx, nil)
|
||||
|
||||
// Attach the job constraints. The job is filled in later.
|
||||
s.jobConstraint = NewConstraintIterator(ctx, s.source, nil)
|
||||
@@ -69,8 +70,11 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
|
||||
// Filter on task group constraints second
|
||||
s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil)
|
||||
|
||||
// Filter on constraints that are affected by propsed allocations.
|
||||
s.proposedAllocConstraint = NewProposedAllocConstraintIterator(ctx, s.taskGroupConstraint)
|
||||
|
||||
// Upgrade from feasible to rank iterator
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint)
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.proposedAllocConstraint)
|
||||
|
||||
// Apply the bin packing, this depends on the resources needed
|
||||
// by a particular task group. Only enable eviction for the service
|
||||
@@ -92,11 +96,6 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
|
||||
|
||||
// Select the node with the maximum score for placement
|
||||
s.maxScore = NewMaxScoreIterator(ctx, s.limit)
|
||||
|
||||
// Set the nodes if given
|
||||
if len(baseNodes) != 0 {
|
||||
s.SetNodes(baseNodes)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
@@ -109,7 +108,7 @@ func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {
|
||||
|
||||
// Apply a limit function. This is to avoid scanning *every* possible node.
|
||||
// For batch jobs we only need to evaluate 2 options and depend on the
|
||||
// powwer of two choices. For services jobs we need to visit "enough".
|
||||
// power of two choices. For services jobs we need to visit "enough".
|
||||
// Using a log of the total number of nodes is a good restriction, with
|
||||
// at least 2 as the floor
|
||||
limit := 2
|
||||
@@ -124,6 +123,7 @@ func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {
|
||||
|
||||
func (s *GenericStack) SetJob(job *structs.Job) {
|
||||
s.jobConstraint.SetConstraints(job.Constraints)
|
||||
s.proposedAllocConstraint.SetJob(job)
|
||||
s.binPack.SetPriority(job.Priority)
|
||||
s.jobAntiAff.SetJob(job.ID)
|
||||
}
|
||||
@@ -134,21 +134,13 @@ func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Reso
|
||||
s.ctx.Reset()
|
||||
start := time.Now()
|
||||
|
||||
// Collect the constraints, drivers and resources required by each
|
||||
// sub-task to aggregate the TaskGroup totals
|
||||
constr := make([]*structs.Constraint, 0, len(tg.Constraints))
|
||||
drivers := make(map[string]struct{})
|
||||
size := new(structs.Resources)
|
||||
constr = append(constr, tg.Constraints...)
|
||||
for _, task := range tg.Tasks {
|
||||
drivers[task.Driver] = struct{}{}
|
||||
constr = append(constr, task.Constraints...)
|
||||
size.Add(task.Resources)
|
||||
}
|
||||
// Get the task groups constraints.
|
||||
tgConstr := taskGroupConstraints(tg)
|
||||
|
||||
// Update the parameters of iterators
|
||||
s.taskGroupDrivers.SetDrivers(drivers)
|
||||
s.taskGroupConstraint.SetConstraints(constr)
|
||||
s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
|
||||
s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
|
||||
s.proposedAllocConstraint.SetTaskGroup(tg)
|
||||
s.binPack.SetTasks(tg.Tasks)
|
||||
|
||||
// Find the node with the max score
|
||||
@@ -163,5 +155,83 @@ func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Reso
|
||||
|
||||
// Store the compute time
|
||||
s.ctx.Metrics().AllocationTime = time.Since(start)
|
||||
return option, size
|
||||
return option, tgConstr.size
|
||||
}
|
||||
|
||||
// SystemStack is the Stack used for the System scheduler. It is designed to
|
||||
// attempt to make placements on all nodes.
|
||||
type SystemStack struct {
|
||||
ctx Context
|
||||
source *StaticIterator
|
||||
jobConstraint *ConstraintIterator
|
||||
taskGroupDrivers *DriverIterator
|
||||
taskGroupConstraint *ConstraintIterator
|
||||
binPack *BinPackIterator
|
||||
}
|
||||
|
||||
// NewSystemStack constructs a stack used for selecting service placements
|
||||
func NewSystemStack(ctx Context) *SystemStack {
|
||||
// Create a new stack
|
||||
s := &SystemStack{ctx: ctx}
|
||||
|
||||
// Create the source iterator. We visit nodes in a linear order because we
|
||||
// have to evaluate on all nodes.
|
||||
s.source = NewStaticIterator(ctx, nil)
|
||||
|
||||
// Attach the job constraints. The job is filled in later.
|
||||
s.jobConstraint = NewConstraintIterator(ctx, s.source, nil)
|
||||
|
||||
// Filter on task group drivers first as they are faster
|
||||
s.taskGroupDrivers = NewDriverIterator(ctx, s.jobConstraint, nil)
|
||||
|
||||
// Filter on task group constraints second
|
||||
s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil)
|
||||
|
||||
// Upgrade from feasible to rank iterator
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint)
|
||||
|
||||
// Apply the bin packing, this depends on the resources needed
|
||||
// by a particular task group. Enable eviction as system jobs are high
|
||||
// priority.
|
||||
s.binPack = NewBinPackIterator(ctx, rankSource, true, 0)
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *SystemStack) SetNodes(baseNodes []*structs.Node) {
|
||||
// Update the set of base nodes
|
||||
s.source.SetNodes(baseNodes)
|
||||
}
|
||||
|
||||
func (s *SystemStack) SetJob(job *structs.Job) {
|
||||
s.jobConstraint.SetConstraints(job.Constraints)
|
||||
s.binPack.SetPriority(job.Priority)
|
||||
}
|
||||
|
||||
func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
|
||||
// Reset the binpack selector and context
|
||||
s.binPack.Reset()
|
||||
s.ctx.Reset()
|
||||
start := time.Now()
|
||||
|
||||
// Get the task groups constraints.
|
||||
tgConstr := taskGroupConstraints(tg)
|
||||
|
||||
// Update the parameters of iterators
|
||||
s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
|
||||
s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
|
||||
s.binPack.SetTasks(tg.Tasks)
|
||||
|
||||
// Get the next option that satisfies the constraints.
|
||||
option := s.binPack.Next()
|
||||
|
||||
// Ensure that the task resources were specified
|
||||
if option != nil && len(option.TaskResources) != len(tg.Tasks) {
|
||||
for _, task := range tg.Tasks {
|
||||
option.SetTaskResources(task, task.Resources)
|
||||
}
|
||||
}
|
||||
|
||||
// Store the compute time
|
||||
s.ctx.Metrics().AllocationTime = time.Since(start)
|
||||
return option, tgConstr.size
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
|
||||
func TestServiceStack_SetNodes(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
stack := NewGenericStack(false, ctx, nil)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
@@ -37,7 +37,7 @@ func TestServiceStack_SetNodes(t *testing.T) {
|
||||
|
||||
func TestServiceStack_SetJob(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
stack := NewGenericStack(false, ctx, nil)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
@@ -55,7 +55,8 @@ func TestServiceStack_Select_Size(t *testing.T) {
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
}
|
||||
stack := NewGenericStack(false, ctx, nodes)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
@@ -85,7 +86,8 @@ func TestServiceStack_Select_MetricsReset(t *testing.T) {
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
stack := NewGenericStack(false, ctx, nodes)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
@@ -120,7 +122,8 @@ func TestServiceStack_Select_DriverFilter(t *testing.T) {
|
||||
zero := nodes[0]
|
||||
zero.Attributes["driver.foo"] = "1"
|
||||
|
||||
stack := NewGenericStack(false, ctx, nodes)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Tasks[0].Driver = "foo"
|
||||
@@ -145,7 +148,8 @@ func TestServiceStack_Select_ConstraintFilter(t *testing.T) {
|
||||
zero := nodes[0]
|
||||
zero.Attributes["kernel.name"] = "freebsd"
|
||||
|
||||
stack := NewGenericStack(false, ctx, nodes)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
job.Constraints[0].RTarget = "freebsd"
|
||||
@@ -182,7 +186,8 @@ func TestServiceStack_Select_BinPack_Overflow(t *testing.T) {
|
||||
one := nodes[1]
|
||||
one.Reserved = one.Resources
|
||||
|
||||
stack := NewGenericStack(false, ctx, nodes)
|
||||
stack := NewGenericStack(false, ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
@@ -207,3 +212,209 @@ func TestServiceStack_Select_BinPack_Overflow(t *testing.T) {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_SetNodes(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
stack := NewSystemStack(ctx)
|
||||
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
out := collectFeasible(stack.source)
|
||||
if !reflect.DeepEqual(out, nodes) {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_SetJob(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
stack := NewSystemStack(ctx)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
|
||||
if stack.binPack.priority != job.Priority {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if !reflect.DeepEqual(stack.jobConstraint.constraints, job.Constraints) {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_Select_Size(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{mock.Node()}
|
||||
stack := NewSystemStack(ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
node, size := stack.Select(job.TaskGroups[0])
|
||||
if node == nil {
|
||||
t.Fatalf("missing node %#v", ctx.Metrics())
|
||||
}
|
||||
if size == nil {
|
||||
t.Fatalf("missing size")
|
||||
}
|
||||
|
||||
if size.CPU != 500 || size.MemoryMB != 256 {
|
||||
t.Fatalf("bad: %#v", size)
|
||||
}
|
||||
|
||||
met := ctx.Metrics()
|
||||
if met.AllocationTime == 0 {
|
||||
t.Fatalf("missing time")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_Select_MetricsReset(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
stack := NewSystemStack(ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
n1, _ := stack.Select(job.TaskGroups[0])
|
||||
m1 := ctx.Metrics()
|
||||
if n1 == nil {
|
||||
t.Fatalf("missing node %#v", m1)
|
||||
}
|
||||
|
||||
if m1.NodesEvaluated != 1 {
|
||||
t.Fatalf("should only be 1")
|
||||
}
|
||||
|
||||
n2, _ := stack.Select(job.TaskGroups[0])
|
||||
m2 := ctx.Metrics()
|
||||
if n2 == nil {
|
||||
t.Fatalf("missing node %#v", m2)
|
||||
}
|
||||
|
||||
// If we don't reset, this would be 2
|
||||
if m2.NodesEvaluated != 1 {
|
||||
t.Fatalf("should only be 2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_Select_DriverFilter(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
}
|
||||
zero := nodes[0]
|
||||
zero.Attributes["driver.foo"] = "1"
|
||||
|
||||
stack := NewSystemStack(ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Tasks[0].Driver = "foo"
|
||||
stack.SetJob(job)
|
||||
|
||||
node, _ := stack.Select(job.TaskGroups[0])
|
||||
if node == nil {
|
||||
t.Fatalf("missing node %#v", ctx.Metrics())
|
||||
}
|
||||
|
||||
if node.Node != zero {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
|
||||
zero.Attributes["driver.foo"] = "0"
|
||||
stack = NewSystemStack(ctx)
|
||||
stack.SetNodes(nodes)
|
||||
stack.SetJob(job)
|
||||
node, _ = stack.Select(job.TaskGroups[0])
|
||||
if node != nil {
|
||||
t.Fatalf("node not filtered %#v", node)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_Select_ConstraintFilter(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
zero := nodes[1]
|
||||
zero.Attributes["kernel.name"] = "freebsd"
|
||||
|
||||
stack := NewSystemStack(ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
job.Constraints[0].RTarget = "freebsd"
|
||||
stack.SetJob(job)
|
||||
|
||||
node, _ := stack.Select(job.TaskGroups[0])
|
||||
if node == nil {
|
||||
t.Fatalf("missing node %#v", ctx.Metrics())
|
||||
}
|
||||
|
||||
if node.Node != zero {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
|
||||
met := ctx.Metrics()
|
||||
if met.NodesFiltered != 1 {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
if met.ClassFiltered["linux-medium-pci"] != 1 {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
if met.ConstraintFiltered["$attr.kernel.name = freebsd"] != 1 {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemStack_Select_BinPack_Overflow(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
zero := nodes[0]
|
||||
zero.Reserved = zero.Resources
|
||||
one := nodes[1]
|
||||
|
||||
stack := NewSystemStack(ctx)
|
||||
stack.SetNodes(nodes)
|
||||
|
||||
job := mock.Job()
|
||||
stack.SetJob(job)
|
||||
|
||||
node, _ := stack.Select(job.TaskGroups[0])
|
||||
if node == nil {
|
||||
t.Fatalf("missing node %#v", ctx.Metrics())
|
||||
}
|
||||
|
||||
if node.Node != one {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
|
||||
met := ctx.Metrics()
|
||||
if met.NodesExhausted != 1 {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
if met.ClassExhausted["linux-medium-pci"] != 1 {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
if len(met.Scores) != 1 {
|
||||
t.Fatalf("bad: %#v", met)
|
||||
}
|
||||
}
|
||||
|
||||
265
scheduler/system_sched.go
Normal file
265
scheduler/system_sched.go
Normal file
@@ -0,0 +1,265 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxSystemScheduleAttempts is used to limit the number of times
|
||||
// we will attempt to schedule if we continue to hit conflicts for system
|
||||
// jobs.
|
||||
maxSystemScheduleAttempts = 5
|
||||
|
||||
// allocNodeTainted is the status used when stopping an alloc because it's
|
||||
// node is tainted.
|
||||
allocNodeTainted = "system alloc not needed as node is tainted"
|
||||
)
|
||||
|
||||
// SystemScheduler is used for 'system' jobs. This scheduler is
|
||||
// designed for services that should be run on every client.
|
||||
type SystemScheduler struct {
|
||||
logger *log.Logger
|
||||
state State
|
||||
planner Planner
|
||||
|
||||
eval *structs.Evaluation
|
||||
job *structs.Job
|
||||
plan *structs.Plan
|
||||
ctx *EvalContext
|
||||
stack *SystemStack
|
||||
nodes []*structs.Node
|
||||
|
||||
limitReached bool
|
||||
nextEval *structs.Evaluation
|
||||
}
|
||||
|
||||
// NewSystemScheduler is a factory function to instantiate a new system
|
||||
// scheduler.
|
||||
func NewSystemScheduler(logger *log.Logger, state State, planner Planner) Scheduler {
|
||||
return &SystemScheduler{
|
||||
logger: logger,
|
||||
state: state,
|
||||
planner: planner,
|
||||
}
|
||||
}
|
||||
|
||||
// Process is used to handle a single evaluation.
|
||||
func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
|
||||
// Store the evaluation
|
||||
s.eval = eval
|
||||
|
||||
// Verify the evaluation trigger reason is understood
|
||||
switch eval.TriggeredBy {
|
||||
case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,
|
||||
structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate:
|
||||
default:
|
||||
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
|
||||
eval.TriggeredBy)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
|
||||
}
|
||||
|
||||
// Retry up to the maxSystemScheduleAttempts
|
||||
if err := retryMax(maxSystemScheduleAttempts, s.process); err != nil {
|
||||
if statusErr, ok := err.(*SetStatusError); ok {
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Update the status to complete
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
|
||||
}
|
||||
|
||||
// process is wrapped in retryMax to iteratively run the handler until we have no
|
||||
// further work or we've made the maximum number of attempts.
|
||||
func (s *SystemScheduler) process() (bool, error) {
|
||||
// Lookup the Job by ID
|
||||
var err error
|
||||
s.job, err = s.state.JobByID(s.eval.JobID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get job '%s': %v",
|
||||
s.eval.JobID, err)
|
||||
}
|
||||
|
||||
// Get the ready nodes in the required datacenters
|
||||
if s.job != nil {
|
||||
s.nodes, err = readyNodesInDCs(s.state, s.job.Datacenters)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get ready nodes: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create a plan
|
||||
s.plan = s.eval.MakePlan(s.job)
|
||||
|
||||
// Create an evaluation context
|
||||
s.ctx = NewEvalContext(s.state, s.plan, s.logger)
|
||||
|
||||
// Construct the placement stack
|
||||
s.stack = NewSystemStack(s.ctx)
|
||||
if s.job != nil {
|
||||
s.stack.SetJob(s.job)
|
||||
}
|
||||
|
||||
// Compute the target job allocations
|
||||
if err := s.computeJobAllocs(); err != nil {
|
||||
s.logger.Printf("[ERR] sched: %#v: %v", s.eval, err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// If the plan is a no-op, we can bail
|
||||
if s.plan.IsNoOp() {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// If the limit of placements was reached we need to create an evaluation
|
||||
// to pickup from here after the stagger period.
|
||||
if s.limitReached && s.nextEval == nil {
|
||||
s.nextEval = s.eval.NextRollingEval(s.job.Update.Stagger)
|
||||
if err := s.planner.CreateEval(s.nextEval); err != nil {
|
||||
s.logger.Printf("[ERR] sched: %#v failed to make next eval for rolling update: %v", s.eval, err)
|
||||
return false, err
|
||||
}
|
||||
s.logger.Printf("[DEBUG] sched: %#v: rolling update limit reached, next eval '%s' created", s.eval, s.nextEval.ID)
|
||||
}
|
||||
|
||||
// Submit the plan
|
||||
result, newState, err := s.planner.SubmitPlan(s.plan)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// If we got a state refresh, try again since we have stale data
|
||||
if newState != nil {
|
||||
s.logger.Printf("[DEBUG] sched: %#v: refresh forced", s.eval)
|
||||
s.state = newState
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Try again if the plan was not fully committed, potential conflict
|
||||
fullCommit, expected, actual := result.FullCommit(s.plan)
|
||||
if !fullCommit {
|
||||
s.logger.Printf("[DEBUG] sched: %#v: attempted %d placements, %d placed",
|
||||
s.eval, expected, actual)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Success!
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// computeJobAllocs is used to reconcile differences between the job,
|
||||
// existing allocations and node status to update the allocations.
|
||||
func (s *SystemScheduler) computeJobAllocs() error {
|
||||
// Lookup the allocations by JobID
|
||||
allocs, err := s.state.AllocsByJob(s.eval.JobID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get allocs for job '%s': %v",
|
||||
s.eval.JobID, err)
|
||||
}
|
||||
|
||||
// Filter out the allocations in a terminal state
|
||||
allocs = structs.FilterTerminalAllocs(allocs)
|
||||
|
||||
// Determine the tainted nodes containing job allocs
|
||||
tainted, err := taintedNodes(s.state, allocs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get tainted nodes for job '%s': %v",
|
||||
s.eval.JobID, err)
|
||||
}
|
||||
|
||||
// Diff the required and existing allocations
|
||||
diff := diffSystemAllocs(s.job, s.nodes, tainted, allocs)
|
||||
s.logger.Printf("[DEBUG] sched: %#v: %#v", s.eval, diff)
|
||||
|
||||
// Add all the allocs to stop
|
||||
for _, e := range diff.stop {
|
||||
s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocNotNeeded)
|
||||
}
|
||||
|
||||
// Attempt to do the upgrades in place
|
||||
diff.update = inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update)
|
||||
|
||||
// Check if a rolling upgrade strategy is being used
|
||||
limit := len(diff.update)
|
||||
if s.job != nil && s.job.Update.Rolling() {
|
||||
limit = s.job.Update.MaxParallel
|
||||
}
|
||||
|
||||
// Treat non in-place updates as an eviction and new placement.
|
||||
s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit)
|
||||
|
||||
// Nothing remaining to do if placement is not required
|
||||
if len(diff.place) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compute the placements
|
||||
return s.computePlacements(diff.place)
|
||||
}
|
||||
|
||||
// computePlacements computes placements for allocations
|
||||
func (s *SystemScheduler) computePlacements(place []allocTuple) error {
|
||||
nodeByID := make(map[string]*structs.Node, len(s.nodes))
|
||||
for _, node := range s.nodes {
|
||||
nodeByID[node.ID] = node
|
||||
}
|
||||
|
||||
// Track the failed task groups so that we can coalesce
|
||||
// the failures together to avoid creating many failed allocs.
|
||||
failedTG := make(map[*structs.TaskGroup]*structs.Allocation)
|
||||
|
||||
nodes := make([]*structs.Node, 1)
|
||||
for _, missing := range place {
|
||||
node, ok := nodeByID[missing.Alloc.NodeID]
|
||||
if !ok {
|
||||
return fmt.Errorf("could not find node %q", missing.Alloc.NodeID)
|
||||
}
|
||||
|
||||
// Update the set of placement ndoes
|
||||
nodes[0] = node
|
||||
s.stack.SetNodes(nodes)
|
||||
|
||||
// Attempt to match the task group
|
||||
option, size := s.stack.Select(missing.TaskGroup)
|
||||
|
||||
if option == nil {
|
||||
// Check if this task group has already failed
|
||||
if alloc, ok := failedTG[missing.TaskGroup]; ok {
|
||||
alloc.Metrics.CoalescedFailures += 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Create an allocation for this
|
||||
alloc := &structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
EvalID: s.eval.ID,
|
||||
Name: missing.Name,
|
||||
JobID: s.job.ID,
|
||||
Job: s.job,
|
||||
TaskGroup: missing.TaskGroup.Name,
|
||||
Resources: size,
|
||||
Metrics: s.ctx.Metrics(),
|
||||
}
|
||||
|
||||
// Set fields based on if we found an allocation option
|
||||
if option != nil {
|
||||
alloc.NodeID = option.Node.ID
|
||||
alloc.TaskResources = option.TaskResources
|
||||
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
||||
alloc.ClientStatus = structs.AllocClientStatusPending
|
||||
s.plan.AppendAlloc(alloc)
|
||||
} else {
|
||||
alloc.DesiredStatus = structs.AllocDesiredStatusFailed
|
||||
alloc.DesiredDescription = "failed to find a node for placement"
|
||||
alloc.ClientStatus = structs.AllocClientStatusFailed
|
||||
s.plan.AppendFailed(alloc)
|
||||
failedTG[missing.TaskGroup] = alloc
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
651
scheduler/system_sched_test.go
Normal file
651
scheduler/system_sched_test.go
Normal file
@@ -0,0 +1,651 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
func TestSystemSched_JobRegister(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Create a job
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
// Create a mock evaluation to deregister the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan allocated
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeAllocation {
|
||||
planned = append(planned, allocList...)
|
||||
}
|
||||
if len(planned) != 10 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure all allocations placed
|
||||
if len(out) != 10 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestSystemSched_JobRegister_AddNode(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
var allocs []*structs.Allocation
|
||||
for _, node := range nodes {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = node.ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
|
||||
|
||||
// Add a new node.
|
||||
node := mock.Node()
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
|
||||
// Create a mock evaluation to deal with the node update
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan had no node updates
|
||||
var update []*structs.Allocation
|
||||
for _, updateList := range plan.NodeUpdate {
|
||||
update = append(update, updateList...)
|
||||
}
|
||||
if len(update) != 0 {
|
||||
t.Log(len(update))
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Ensure the plan allocated on the new node
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeAllocation {
|
||||
planned = append(planned, allocList...)
|
||||
}
|
||||
if len(planned) != 1 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Ensure it allocated on the right node
|
||||
if _, ok := plan.NodeAllocation[node.ID]; !ok {
|
||||
t.Fatalf("allocated on wrong node: %#v", plan)
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure all allocations placed
|
||||
out = structs.FilterTerminalAllocs(out)
|
||||
if len(out) != 11 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestSystemSched_JobRegister_AllocFail(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create NO nodes
|
||||
// Create a job
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
// Create a mock evaluation to register the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure no plan as this should be a no-op.
|
||||
if len(h.Plans) != 0 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestSystemSched_JobModify(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
var allocs []*structs.Allocation
|
||||
for _, node := range nodes {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = node.ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
|
||||
|
||||
// Add a few terminal status allocations, these should be ignored
|
||||
var terminal []*structs.Allocation
|
||||
for i := 0; i < 5; i++ {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = nodes[i].ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
alloc.DesiredStatus = structs.AllocDesiredStatusFailed
|
||||
terminal = append(terminal, alloc)
|
||||
}
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal))
|
||||
|
||||
// Update the job
|
||||
job2 := mock.SystemJob()
|
||||
job2.ID = job.ID
|
||||
|
||||
// Update the task, such that it cannot be done in-place
|
||||
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job2))
|
||||
|
||||
// Create a mock evaluation to deal with drain
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan evicted all allocs
|
||||
var update []*structs.Allocation
|
||||
for _, updateList := range plan.NodeUpdate {
|
||||
update = append(update, updateList...)
|
||||
}
|
||||
if len(update) != len(allocs) {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Ensure the plan allocated
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeAllocation {
|
||||
planned = append(planned, allocList...)
|
||||
}
|
||||
if len(planned) != 10 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure all allocations placed
|
||||
out = structs.FilterTerminalAllocs(out)
|
||||
if len(out) != 10 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestSystemSched_JobModify_Rolling(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
var allocs []*structs.Allocation
|
||||
for _, node := range nodes {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = node.ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
|
||||
|
||||
// Update the job
|
||||
job2 := mock.SystemJob()
|
||||
job2.ID = job.ID
|
||||
job2.Update = structs.UpdateStrategy{
|
||||
Stagger: 30 * time.Second,
|
||||
MaxParallel: 5,
|
||||
}
|
||||
|
||||
// Update the task, such that it cannot be done in-place
|
||||
job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job2))
|
||||
|
||||
// Create a mock evaluation to deal with drain
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan evicted only MaxParallel
|
||||
var update []*structs.Allocation
|
||||
for _, updateList := range plan.NodeUpdate {
|
||||
update = append(update, updateList...)
|
||||
}
|
||||
if len(update) != job2.Update.MaxParallel {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Ensure the plan allocated
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeAllocation {
|
||||
planned = append(planned, allocList...)
|
||||
}
|
||||
if len(planned) != job2.Update.MaxParallel {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
|
||||
// Ensure a follow up eval was created
|
||||
eval = h.Evals[0]
|
||||
if eval.NextEval == "" {
|
||||
t.Fatalf("missing next eval")
|
||||
}
|
||||
|
||||
// Check for create
|
||||
if len(h.CreateEvals) == 0 {
|
||||
t.Fatalf("missing created eval")
|
||||
}
|
||||
create := h.CreateEvals[0]
|
||||
if eval.NextEval != create.ID {
|
||||
t.Fatalf("ID mismatch")
|
||||
}
|
||||
if create.PreviousEval != eval.ID {
|
||||
t.Fatalf("missing previous eval")
|
||||
}
|
||||
|
||||
if create.TriggeredBy != structs.EvalTriggerRollingUpdate {
|
||||
t.Fatalf("bad: %#v", create)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemSched_JobModify_InPlace(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
var allocs []*structs.Allocation
|
||||
for _, node := range nodes {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = node.ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
|
||||
|
||||
// Update the job
|
||||
job2 := mock.SystemJob()
|
||||
job2.ID = job.ID
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job2))
|
||||
|
||||
// Create a mock evaluation to deal with drain
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan did not evict any allocs
|
||||
var update []*structs.Allocation
|
||||
for _, updateList := range plan.NodeUpdate {
|
||||
update = append(update, updateList...)
|
||||
}
|
||||
if len(update) != 0 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Ensure the plan updated the existing allocs
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeAllocation {
|
||||
planned = append(planned, allocList...)
|
||||
}
|
||||
if len(planned) != 10 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
for _, p := range planned {
|
||||
if p.Job != job2 {
|
||||
t.Fatalf("should update job")
|
||||
}
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure all allocations placed
|
||||
if len(out) != 10 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
|
||||
// Verify the network did not change
|
||||
for _, alloc := range out {
|
||||
for _, resources := range alloc.TaskResources {
|
||||
if resources.Networks[0].ReservedPorts[0] != 5000 {
|
||||
t.Fatalf("bad: %#v", alloc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemSched_JobDeregister(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
job := mock.SystemJob()
|
||||
|
||||
var allocs []*structs.Allocation
|
||||
for _, node := range nodes {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = node.ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
|
||||
|
||||
// Create a mock evaluation to deregister the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobDeregister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan evicted the job from all nodes.
|
||||
for _, node := range nodes {
|
||||
if len(plan.NodeUpdate[node.ID]) != 1 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure no remaining allocations
|
||||
out = structs.FilterTerminalAllocs(out)
|
||||
if len(out) != 0 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestSystemSched_NodeDrain(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
|
||||
// Register a draining node
|
||||
node := mock.Node()
|
||||
node.Drain = true
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
|
||||
// Generate a fake job allocated on that node.
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = node.ID
|
||||
alloc.Name = "my-job.web[0]"
|
||||
noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc}))
|
||||
|
||||
// Create a mock evaluation to deal with drain
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerNodeUpdate,
|
||||
JobID: job.ID,
|
||||
NodeID: node.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure a single plan
|
||||
if len(h.Plans) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
plan := h.Plans[0]
|
||||
|
||||
// Ensure the plan evicted all allocs
|
||||
if len(plan.NodeUpdate[node.ID]) != 1 {
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Ensure the plan updated the allocation.
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeUpdate {
|
||||
planned = append(planned, allocList...)
|
||||
}
|
||||
if len(planned) != 1 {
|
||||
t.Log(len(planned))
|
||||
t.Fatalf("bad: %#v", plan)
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure the allocations is stopped
|
||||
if planned[0].DesiredStatus != structs.AllocDesiredStatusStop {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestSystemSched_RetryLimit(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
h.Planner = &RejectPlan{h}
|
||||
|
||||
// Create some nodes
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Create a job
|
||||
job := mock.SystemJob()
|
||||
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
|
||||
// Create a mock evaluation to deregister the job
|
||||
eval := &structs.Evaluation{
|
||||
ID: structs.GenerateUUID(),
|
||||
Priority: job.Priority,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: job.ID,
|
||||
}
|
||||
|
||||
// Process the evaluation
|
||||
err := h.Process(NewSystemScheduler, eval)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Ensure multiple plans
|
||||
if len(h.Plans) == 0 {
|
||||
t.Fatalf("bad: %#v", h.Plans)
|
||||
}
|
||||
|
||||
// Lookup the allocations by JobID
|
||||
out, err := h.State.AllocsByJob(job.ID)
|
||||
noErr(t, err)
|
||||
|
||||
// Ensure no allocations placed
|
||||
if len(out) != 0 {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
// Should hit the retry limit
|
||||
h.AssertEvalStatus(t, structs.EvalStatusFailed)
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
|
||||
@@ -19,6 +20,10 @@ type allocTuple struct {
|
||||
// a job requires. This is used to do the count expansion.
|
||||
func materializeTaskGroups(job *structs.Job) map[string]*structs.TaskGroup {
|
||||
out := make(map[string]*structs.TaskGroup)
|
||||
if job == nil {
|
||||
return out
|
||||
}
|
||||
|
||||
for _, tg := range job.TaskGroups {
|
||||
for i := 0; i < tg.Count; i++ {
|
||||
name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)
|
||||
@@ -38,6 +43,14 @@ func (d *diffResult) GoString() string {
|
||||
len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore))
|
||||
}
|
||||
|
||||
func (d *diffResult) Append(other *diffResult) {
|
||||
d.place = append(d.place, other.place...)
|
||||
d.update = append(d.update, other.update...)
|
||||
d.migrate = append(d.migrate, other.migrate...)
|
||||
d.stop = append(d.stop, other.stop...)
|
||||
d.ignore = append(d.ignore, other.ignore...)
|
||||
}
|
||||
|
||||
// diffAllocs is used to do a set difference between the target allocations
|
||||
// and the existing allocations. This returns 5 sets of results, the list of
|
||||
// named task groups that need to be placed (no existing allocation), the
|
||||
@@ -117,6 +130,48 @@ func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
|
||||
return result
|
||||
}
|
||||
|
||||
// diffSystemAllocs is like diffAllocs however, the allocations in the
|
||||
// diffResult contain the specific nodeID they should be allocated on.
|
||||
func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[string]bool,
|
||||
allocs []*structs.Allocation) *diffResult {
|
||||
|
||||
// Build a mapping of nodes to all their allocs.
|
||||
nodeAllocs := make(map[string][]*structs.Allocation, len(allocs))
|
||||
for _, alloc := range allocs {
|
||||
nallocs := append(nodeAllocs[alloc.NodeID], alloc)
|
||||
nodeAllocs[alloc.NodeID] = nallocs
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
if _, ok := nodeAllocs[node.ID]; !ok {
|
||||
nodeAllocs[node.ID] = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Create the required task groups.
|
||||
required := materializeTaskGroups(job)
|
||||
|
||||
result := &diffResult{}
|
||||
for nodeID, allocs := range nodeAllocs {
|
||||
diff := diffAllocs(job, taintedNodes, required, allocs)
|
||||
|
||||
// Mark the alloc as being for a specific node.
|
||||
for i := range diff.place {
|
||||
alloc := &diff.place[i]
|
||||
alloc.Alloc = &structs.Allocation{NodeID: nodeID}
|
||||
}
|
||||
|
||||
// Migrate does not apply to system jobs and instead should be marked as
|
||||
// stop because if a node is tainted, the job is invalid on that node.
|
||||
diff.stop = append(diff.stop, diff.migrate...)
|
||||
diff.migrate = nil
|
||||
|
||||
result.Append(diff)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// readyNodesInDCs returns all the ready nodes in the given datacenters
|
||||
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
|
||||
// Index the DCs
|
||||
@@ -208,7 +263,7 @@ func shuffleNodes(nodes []*structs.Node) {
|
||||
}
|
||||
|
||||
// tasksUpdated does a diff between task groups to see if the
|
||||
// tasks, their drivers or config have updated.
|
||||
// tasks, their drivers, environment variables or config have updated.
|
||||
func tasksUpdated(a, b *structs.TaskGroup) bool {
|
||||
// If the number of tasks do not match, clearly there is an update
|
||||
if len(a.Tasks) != len(b.Tasks) {
|
||||
@@ -227,6 +282,9 @@ func tasksUpdated(a, b *structs.TaskGroup) bool {
|
||||
if !reflect.DeepEqual(at.Config, bt.Config) {
|
||||
return true
|
||||
}
|
||||
if !reflect.DeepEqual(at.Env, bt.Env) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Inspect the network to see if the dynamic ports are different
|
||||
if len(at.Resources.Networks) != len(bt.Resources.Networks) {
|
||||
@@ -242,3 +300,148 @@ func tasksUpdated(a, b *structs.TaskGroup) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// setStatus is used to update the status of the evaluation
|
||||
func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
|
||||
logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
|
||||
newEval := eval.Copy()
|
||||
newEval.Status = status
|
||||
newEval.StatusDescription = desc
|
||||
if nextEval != nil {
|
||||
newEval.NextEval = nextEval.ID
|
||||
}
|
||||
return planner.UpdateEval(newEval)
|
||||
}
|
||||
|
||||
// inplaceUpdate attempts to update allocations in-place where possible.
|
||||
func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
|
||||
stack Stack, updates []allocTuple) []allocTuple {
|
||||
|
||||
n := len(updates)
|
||||
inplace := 0
|
||||
for i := 0; i < n; i++ {
|
||||
// Get the update
|
||||
update := updates[i]
|
||||
|
||||
// Check if the task drivers or config has changed, requires
|
||||
// a rolling upgrade since that cannot be done in-place.
|
||||
existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name)
|
||||
if tasksUpdated(update.TaskGroup, existing) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the existing node
|
||||
node, err := ctx.State().NodeByID(update.Alloc.NodeID)
|
||||
if err != nil {
|
||||
ctx.Logger().Printf("[ERR] sched: %#v failed to get node '%s': %v",
|
||||
eval, update.Alloc.NodeID, err)
|
||||
continue
|
||||
}
|
||||
if node == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Set the existing node as the base set
|
||||
stack.SetNodes([]*structs.Node{node})
|
||||
|
||||
// Stage an eviction of the current allocation. This is done so that
|
||||
// the current allocation is discounted when checking for feasability.
|
||||
// Otherwise we would be trying to fit the tasks current resources and
|
||||
// updated resources. After select is called we can remove the evict.
|
||||
ctx.Plan().AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop,
|
||||
allocInPlace)
|
||||
|
||||
// Attempt to match the task group
|
||||
option, size := stack.Select(update.TaskGroup)
|
||||
|
||||
// Pop the allocation
|
||||
ctx.Plan().PopUpdate(update.Alloc)
|
||||
|
||||
// Skip if we could not do an in-place update
|
||||
if option == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Restore the network offers from the existing allocation.
|
||||
// We do not allow network resources (reserved/dynamic ports)
|
||||
// to be updated. This is guarded in taskUpdated, so we can
|
||||
// safely restore those here.
|
||||
for task, resources := range option.TaskResources {
|
||||
existing := update.Alloc.TaskResources[task]
|
||||
resources.Networks = existing.Networks
|
||||
}
|
||||
|
||||
// Create a shallow copy
|
||||
newAlloc := new(structs.Allocation)
|
||||
*newAlloc = *update.Alloc
|
||||
|
||||
// Update the allocation
|
||||
newAlloc.EvalID = eval.ID
|
||||
newAlloc.Job = job
|
||||
newAlloc.Resources = size
|
||||
newAlloc.TaskResources = option.TaskResources
|
||||
newAlloc.Metrics = ctx.Metrics()
|
||||
newAlloc.DesiredStatus = structs.AllocDesiredStatusRun
|
||||
newAlloc.ClientStatus = structs.AllocClientStatusPending
|
||||
ctx.Plan().AppendAlloc(newAlloc)
|
||||
|
||||
// Remove this allocation from the slice
|
||||
updates[i] = updates[n-1]
|
||||
i--
|
||||
n--
|
||||
inplace++
|
||||
}
|
||||
if len(updates) > 0 {
|
||||
ctx.Logger().Printf("[DEBUG] sched: %#v: %d in-place updates of %d", eval, inplace, len(updates))
|
||||
}
|
||||
return updates[:n]
|
||||
}
|
||||
|
||||
// evictAndPlace is used to mark allocations for evicts and add them to the
|
||||
// placement queue. evictAndPlace modifies both the the diffResult and the
|
||||
// limit. It returns true if the limit has been reached.
|
||||
func evictAndPlace(ctx Context, diff *diffResult, allocs []allocTuple, desc string, limit *int) bool {
|
||||
n := len(allocs)
|
||||
for i := 0; i < n && i < *limit; i++ {
|
||||
a := allocs[i]
|
||||
ctx.Plan().AppendUpdate(a.Alloc, structs.AllocDesiredStatusStop, desc)
|
||||
diff.place = append(diff.place, a)
|
||||
}
|
||||
if n <= *limit {
|
||||
*limit -= n
|
||||
return false
|
||||
}
|
||||
*limit = 0
|
||||
return true
|
||||
}
|
||||
|
||||
// tgConstrainTuple is used to store the total constraints of a task group.
|
||||
type tgConstrainTuple struct {
|
||||
// Holds the combined constraints of the task group and all it's sub-tasks.
|
||||
constraints []*structs.Constraint
|
||||
|
||||
// The set of required drivers within the task group.
|
||||
drivers map[string]struct{}
|
||||
|
||||
// The combined resources of all tasks within the task group.
|
||||
size *structs.Resources
|
||||
}
|
||||
|
||||
// taskGroupConstraints collects the constraints, drivers and resources required by each
|
||||
// sub-task to aggregate the TaskGroup totals
|
||||
func taskGroupConstraints(tg *structs.TaskGroup) tgConstrainTuple {
|
||||
c := tgConstrainTuple{
|
||||
constraints: make([]*structs.Constraint, 0, len(tg.Constraints)),
|
||||
drivers: make(map[string]struct{}),
|
||||
size: new(structs.Resources),
|
||||
}
|
||||
|
||||
c.constraints = append(c.constraints, tg.Constraints...)
|
||||
for _, task := range tg.Tasks {
|
||||
c.drivers[task.Driver] = struct{}{}
|
||||
c.constraints = append(c.constraints, task.Constraints...)
|
||||
c.size.Add(task.Resources)
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
@@ -109,6 +110,80 @@ func TestDiffAllocs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiffSystemAllocs(t *testing.T) {
|
||||
job := mock.SystemJob()
|
||||
|
||||
// Create three alive nodes.
|
||||
nodes := []*structs.Node{{ID: "foo"}, {ID: "bar"}, {ID: "baz"}}
|
||||
|
||||
// The "old" job has a previous modify index
|
||||
oldJob := new(structs.Job)
|
||||
*oldJob = *job
|
||||
oldJob.ModifyIndex -= 1
|
||||
|
||||
tainted := map[string]bool{
|
||||
"dead": true,
|
||||
"baz": false,
|
||||
}
|
||||
|
||||
allocs := []*structs.Allocation{
|
||||
// Update allocation on baz
|
||||
&structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
NodeID: "baz",
|
||||
Name: "my-job.web[0]",
|
||||
Job: oldJob,
|
||||
},
|
||||
|
||||
// Ignore allocation on bar
|
||||
&structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
NodeID: "bar",
|
||||
Name: "my-job.web[0]",
|
||||
Job: job,
|
||||
},
|
||||
|
||||
// Stop allocation on dead.
|
||||
&structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
NodeID: "dead",
|
||||
Name: "my-job.web[0]",
|
||||
},
|
||||
}
|
||||
|
||||
diff := diffSystemAllocs(job, nodes, tainted, allocs)
|
||||
place := diff.place
|
||||
update := diff.update
|
||||
migrate := diff.migrate
|
||||
stop := diff.stop
|
||||
ignore := diff.ignore
|
||||
|
||||
// We should update the first alloc
|
||||
if len(update) != 1 || update[0].Alloc != allocs[0] {
|
||||
t.Fatalf("bad: %#v", update)
|
||||
}
|
||||
|
||||
// We should ignore the second alloc
|
||||
if len(ignore) != 1 || ignore[0].Alloc != allocs[1] {
|
||||
t.Fatalf("bad: %#v", ignore)
|
||||
}
|
||||
|
||||
// We should stop the third alloc
|
||||
if len(stop) != 1 || stop[0].Alloc != allocs[2] {
|
||||
t.Fatalf("bad: %#v", stop)
|
||||
}
|
||||
|
||||
// There should be no migrates.
|
||||
if len(migrate) != 0 {
|
||||
t.Fatalf("bad: %#v", migrate)
|
||||
}
|
||||
|
||||
// We should place 1
|
||||
if len(place) != 1 {
|
||||
t.Fatalf("bad: %#v", place)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadyNodesInDCs(t *testing.T) {
|
||||
state, err := state.NewStateStore(os.Stderr)
|
||||
if err != nil {
|
||||
@@ -213,18 +288,25 @@ func TestTaintedNodes(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestShuffleNodes(t *testing.T) {
|
||||
// Use a large number of nodes to make the probability of shuffling to the
|
||||
// original order very low.
|
||||
nodes := []*structs.Node{
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
mock.Node(),
|
||||
}
|
||||
orig := make([]*structs.Node, len(nodes))
|
||||
copy(orig, nodes)
|
||||
shuffleNodes(nodes)
|
||||
if reflect.DeepEqual(nodes, orig) {
|
||||
t.Fatalf("shoudl not match")
|
||||
t.Fatalf("should not match")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,4 +347,304 @@ func TestTasksUpdated(t *testing.T) {
|
||||
if !tasksUpdated(j1.TaskGroups[0], j6.TaskGroups[0]) {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
|
||||
j7 := mock.Job()
|
||||
j7.TaskGroups[0].Tasks[0].Env["NEW_ENV"] = "NEW_VALUE"
|
||||
if !tasksUpdated(j1.TaskGroups[0], j7.TaskGroups[0]) {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvictAndPlace_LimitLessThanAllocs(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
allocs := []allocTuple{
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
}
|
||||
diff := &diffResult{}
|
||||
|
||||
limit := 2
|
||||
if !evictAndPlace(ctx, diff, allocs, "", &limit) {
|
||||
t.Fatal("evictAndReplace() should have returned true")
|
||||
}
|
||||
|
||||
if limit != 0 {
|
||||
t.Fatalf("evictAndReplace() should decremented limit; got %v; want 0", limit)
|
||||
}
|
||||
|
||||
if len(diff.place) != 2 {
|
||||
t.Fatalf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvictAndPlace_LimitEqualToAllocs(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
allocs := []allocTuple{
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
}
|
||||
diff := &diffResult{}
|
||||
|
||||
limit := 4
|
||||
if evictAndPlace(ctx, diff, allocs, "", &limit) {
|
||||
t.Fatal("evictAndReplace() should have returned false")
|
||||
}
|
||||
|
||||
if limit != 0 {
|
||||
t.Fatalf("evictAndReplace() should decremented limit; got %v; want 0", limit)
|
||||
}
|
||||
|
||||
if len(diff.place) != 4 {
|
||||
t.Fatalf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetStatus(t *testing.T) {
|
||||
h := NewHarness(t)
|
||||
logger := log.New(os.Stderr, "", log.LstdFlags)
|
||||
eval := mock.Eval()
|
||||
status := "a"
|
||||
desc := "b"
|
||||
if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
|
||||
t.Fatalf("setStatus() failed: %v", err)
|
||||
}
|
||||
|
||||
if len(h.Evals) != 1 {
|
||||
t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
|
||||
}
|
||||
|
||||
newEval := h.Evals[0]
|
||||
if newEval.ID != eval.ID || newEval.Status != status || newEval.StatusDescription != desc {
|
||||
t.Fatalf("setStatus() submited invalid eval: %v", newEval)
|
||||
}
|
||||
|
||||
h = NewHarness(t)
|
||||
next := mock.Eval()
|
||||
if err := setStatus(logger, h, eval, next, status, desc); err != nil {
|
||||
t.Fatalf("setStatus() failed: %v", err)
|
||||
}
|
||||
|
||||
if len(h.Evals) != 1 {
|
||||
t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
|
||||
}
|
||||
|
||||
newEval = h.Evals[0]
|
||||
if newEval.NextEval != next.ID {
|
||||
t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {
|
||||
state, ctx := testContext(t)
|
||||
eval := mock.Eval()
|
||||
job := mock.Job()
|
||||
|
||||
node := mock.Node()
|
||||
noErr(t, state.UpsertNode(1000, node))
|
||||
|
||||
// Register an alloc
|
||||
alloc := &structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
EvalID: eval.ID,
|
||||
NodeID: node.ID,
|
||||
JobID: job.ID,
|
||||
Job: job,
|
||||
Resources: &structs.Resources{
|
||||
CPU: 2048,
|
||||
MemoryMB: 2048,
|
||||
},
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
}
|
||||
alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources}
|
||||
noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
|
||||
|
||||
// Create a new task group that prevents in-place updates.
|
||||
tg := &structs.TaskGroup{}
|
||||
*tg = *job.TaskGroups[0]
|
||||
task := &structs.Task{Name: "FOO"}
|
||||
tg.Tasks = nil
|
||||
tg.Tasks = append(tg.Tasks, task)
|
||||
|
||||
updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}}
|
||||
stack := NewGenericStack(false, ctx)
|
||||
|
||||
// Do the inplace update.
|
||||
unplaced := inplaceUpdate(ctx, eval, job, stack, updates)
|
||||
|
||||
if len(unplaced) != 1 {
|
||||
t.Fatal("inplaceUpdate incorrectly did an inplace update")
|
||||
}
|
||||
|
||||
if len(ctx.plan.NodeAllocation) != 0 {
|
||||
t.Fatal("inplaceUpdate incorrectly did an inplace update")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInplaceUpdate_NoMatch(t *testing.T) {
|
||||
state, ctx := testContext(t)
|
||||
eval := mock.Eval()
|
||||
job := mock.Job()
|
||||
|
||||
node := mock.Node()
|
||||
noErr(t, state.UpsertNode(1000, node))
|
||||
|
||||
// Register an alloc
|
||||
alloc := &structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
EvalID: eval.ID,
|
||||
NodeID: node.ID,
|
||||
JobID: job.ID,
|
||||
Job: job,
|
||||
Resources: &structs.Resources{
|
||||
CPU: 2048,
|
||||
MemoryMB: 2048,
|
||||
},
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
}
|
||||
alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources}
|
||||
noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
|
||||
|
||||
// Create a new task group that requires too much resources.
|
||||
tg := &structs.TaskGroup{}
|
||||
*tg = *job.TaskGroups[0]
|
||||
resource := &structs.Resources{CPU: 9999}
|
||||
tg.Tasks[0].Resources = resource
|
||||
|
||||
updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}}
|
||||
stack := NewGenericStack(false, ctx)
|
||||
|
||||
// Do the inplace update.
|
||||
unplaced := inplaceUpdate(ctx, eval, job, stack, updates)
|
||||
|
||||
if len(unplaced) != 1 {
|
||||
t.Fatal("inplaceUpdate incorrectly did an inplace update")
|
||||
}
|
||||
|
||||
if len(ctx.plan.NodeAllocation) != 0 {
|
||||
t.Fatal("inplaceUpdate incorrectly did an inplace update")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInplaceUpdate_Success(t *testing.T) {
|
||||
state, ctx := testContext(t)
|
||||
eval := mock.Eval()
|
||||
job := mock.Job()
|
||||
|
||||
node := mock.Node()
|
||||
noErr(t, state.UpsertNode(1000, node))
|
||||
|
||||
// Register an alloc
|
||||
alloc := &structs.Allocation{
|
||||
ID: structs.GenerateUUID(),
|
||||
EvalID: eval.ID,
|
||||
NodeID: node.ID,
|
||||
JobID: job.ID,
|
||||
Job: job,
|
||||
Resources: &structs.Resources{
|
||||
CPU: 2048,
|
||||
MemoryMB: 2048,
|
||||
},
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
}
|
||||
alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources}
|
||||
noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
|
||||
|
||||
// Create a new task group that updates the resources.
|
||||
tg := &structs.TaskGroup{}
|
||||
*tg = *job.TaskGroups[0]
|
||||
resource := &structs.Resources{CPU: 737}
|
||||
tg.Tasks[0].Resources = resource
|
||||
|
||||
updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}}
|
||||
stack := NewGenericStack(false, ctx)
|
||||
stack.SetJob(job)
|
||||
|
||||
// Do the inplace update.
|
||||
unplaced := inplaceUpdate(ctx, eval, job, stack, updates)
|
||||
|
||||
if len(unplaced) != 0 {
|
||||
t.Fatal("inplaceUpdate did not do an inplace update")
|
||||
}
|
||||
|
||||
if len(ctx.plan.NodeAllocation) != 1 {
|
||||
t.Fatal("inplaceUpdate did not do an inplace update")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvictAndPlace_LimitGreaterThanAllocs(t *testing.T) {
|
||||
_, ctx := testContext(t)
|
||||
allocs := []allocTuple{
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
allocTuple{Alloc: &structs.Allocation{ID: structs.GenerateUUID()}},
|
||||
}
|
||||
diff := &diffResult{}
|
||||
|
||||
limit := 6
|
||||
if evictAndPlace(ctx, diff, allocs, "", &limit) {
|
||||
t.Fatal("evictAndReplace() should have returned false")
|
||||
}
|
||||
|
||||
if limit != 2 {
|
||||
t.Fatalf("evictAndReplace() should decremented limit; got %v; want 2", limit)
|
||||
}
|
||||
|
||||
if len(diff.place) != 4 {
|
||||
t.Fatalf("evictAndReplace() didn't insert into diffResult properly: %v", diff.place)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTaskGroupConstraints(t *testing.T) {
|
||||
constr := &structs.Constraint{Hard: true}
|
||||
constr2 := &structs.Constraint{LTarget: "foo"}
|
||||
constr3 := &structs.Constraint{Weight: 10}
|
||||
|
||||
tg := &structs.TaskGroup{
|
||||
Name: "web",
|
||||
Count: 10,
|
||||
Constraints: []*structs.Constraint{constr},
|
||||
Tasks: []*structs.Task{
|
||||
&structs.Task{
|
||||
Driver: "exec",
|
||||
Resources: &structs.Resources{
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
},
|
||||
Constraints: []*structs.Constraint{constr2},
|
||||
},
|
||||
&structs.Task{
|
||||
Driver: "docker",
|
||||
Resources: &structs.Resources{
|
||||
CPU: 500,
|
||||
MemoryMB: 256,
|
||||
},
|
||||
Constraints: []*structs.Constraint{constr3},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Build the expected values.
|
||||
expConstr := []*structs.Constraint{constr, constr2, constr3}
|
||||
expDrivers := map[string]struct{}{"exec": struct{}{}, "docker": struct{}{}}
|
||||
expSize := &structs.Resources{
|
||||
CPU: 1000,
|
||||
MemoryMB: 512,
|
||||
}
|
||||
|
||||
actConstrains := taskGroupConstraints(tg)
|
||||
if !reflect.DeepEqual(actConstrains.constraints, expConstr) {
|
||||
t.Fatalf("taskGroupConstraints(%v) returned %v; want %v", tg, actConstrains.constraints, expConstr)
|
||||
}
|
||||
if !reflect.DeepEqual(actConstrains.drivers, expDrivers) {
|
||||
t.Fatalf("taskGroupConstraints(%v) returned %v; want %v", tg, actConstrains.drivers, expDrivers)
|
||||
}
|
||||
if !reflect.DeepEqual(actConstrains.size, expSize) {
|
||||
t.Fatalf("taskGroupConstraints(%v) returned %v; want %v", tg, actConstrains.size, expSize)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@ gox \
|
||||
-arch="${XC_ARCH}" \
|
||||
-osarch="!linux/arm !darwin/386" \
|
||||
-ldflags "-X main.GitCommit ${GIT_COMMIT}${GIT_DIRTY}" \
|
||||
-cgo \
|
||||
-output "pkg/{{.OS}}_{{.Arch}}/nomad" \
|
||||
.
|
||||
|
||||
|
||||
@@ -10,4 +10,4 @@ go build -o $TEMPDIR/nomad || exit 1
|
||||
|
||||
# Run the tests
|
||||
echo "--> Running tests"
|
||||
go list ./... | PATH=$TEMPDIR:$PATH xargs -n1 go test -timeout=40s
|
||||
go list ./... | PATH=$TEMPDIR:$PATH xargs -n1 go test -cover -timeout=40s
|
||||
|
||||
@@ -16,7 +16,8 @@ while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
|
||||
DIR="$( cd -P "$( dirname "$SOURCE" )/.." && pwd )"
|
||||
|
||||
# Copy into tmpdir
|
||||
cp -R $DIR/website/ $DEPLOY/
|
||||
shopt -s dotglob
|
||||
cp -r $DIR/website/* $DEPLOY/
|
||||
|
||||
# Change into that directory
|
||||
pushd $DEPLOY &>/dev/null
|
||||
@@ -25,6 +26,7 @@ pushd $DEPLOY &>/dev/null
|
||||
touch .gitignore
|
||||
echo ".sass-cache" >> .gitignore
|
||||
echo "build" >> .gitignore
|
||||
echo "vendor" >> .gitignore
|
||||
|
||||
# Add everything
|
||||
git init -q .
|
||||
|
||||
@@ -22,6 +22,8 @@ import (
|
||||
"os/exec"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
)
|
||||
|
||||
// offset is used to atomically increment the port numbers.
|
||||
@@ -156,8 +158,7 @@ func NewTestServer(t *testing.T, cb ServerConfigCallback) *TestServer {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
var client *http.Client
|
||||
client = http.DefaultClient
|
||||
client := cleanhttp.DefaultClient()
|
||||
|
||||
server := &TestServer{
|
||||
Config: nomadConfig,
|
||||
|
||||
1
website/.ruby-version
Normal file
1
website/.ruby-version
Normal file
@@ -0,0 +1 @@
|
||||
2.2.2
|
||||
@@ -1,12 +1,12 @@
|
||||
GIT
|
||||
remote: git://github.com/hashicorp/middleman-hashicorp.git
|
||||
revision: 76f0f284ad44cea0457484ea83467192f02daf87
|
||||
revision: 15cbda0cf1d963fa71292dee921229e7ee618272
|
||||
specs:
|
||||
middleman-hashicorp (0.1.0)
|
||||
middleman-hashicorp (0.2.0)
|
||||
bootstrap-sass (~> 3.3)
|
||||
builder (~> 3.2)
|
||||
less (~> 2.6)
|
||||
middleman (~> 3.3)
|
||||
middleman (~> 3.4)
|
||||
middleman-livereload (~> 3.4)
|
||||
middleman-minify-html (~> 3.4)
|
||||
middleman-syntax (~> 2.0)
|
||||
@@ -21,21 +21,25 @@ GIT
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
activesupport (4.1.12)
|
||||
i18n (~> 0.6, >= 0.6.9)
|
||||
activesupport (4.2.4)
|
||||
i18n (~> 0.7)
|
||||
json (~> 1.7, >= 1.7.7)
|
||||
minitest (~> 5.1)
|
||||
thread_safe (~> 0.1)
|
||||
thread_safe (~> 0.3, >= 0.3.4)
|
||||
tzinfo (~> 1.1)
|
||||
autoprefixer-rails (5.2.1)
|
||||
autoprefixer-rails (6.0.3)
|
||||
execjs
|
||||
json
|
||||
bootstrap-sass (3.3.5.1)
|
||||
autoprefixer-rails (>= 5.0.0.1)
|
||||
sass (>= 3.3.0)
|
||||
builder (3.2.2)
|
||||
celluloid (0.16.0)
|
||||
timers (~> 4.0.0)
|
||||
capybara (2.4.4)
|
||||
mime-types (>= 1.16)
|
||||
nokogiri (>= 1.3.3)
|
||||
rack (>= 1.0.0)
|
||||
rack-test (>= 0.5.4)
|
||||
xpath (~> 2.0)
|
||||
chunky_png (1.3.4)
|
||||
coffee-script (2.4.1)
|
||||
coffee-script-source
|
||||
@@ -59,52 +63,50 @@ GEM
|
||||
eventmachine (>= 0.12.9)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
erubis (2.7.0)
|
||||
eventmachine (1.0.7)
|
||||
execjs (2.5.2)
|
||||
eventmachine (1.0.8)
|
||||
execjs (2.6.0)
|
||||
ffi (1.9.10)
|
||||
git-version-bump (0.15.1)
|
||||
haml (4.0.6)
|
||||
haml (4.0.7)
|
||||
tilt
|
||||
hike (1.2.3)
|
||||
hitimes (1.2.2)
|
||||
hooks (0.4.0)
|
||||
uber (~> 0.0.4)
|
||||
hooks (0.4.1)
|
||||
uber (~> 0.0.14)
|
||||
htmlcompressor (0.2.0)
|
||||
http_parser.rb (0.6.0)
|
||||
i18n (0.7.0)
|
||||
json (1.8.3)
|
||||
kramdown (1.8.0)
|
||||
kramdown (1.9.0)
|
||||
less (2.6.0)
|
||||
commonjs (~> 0.2.7)
|
||||
libv8 (3.16.14.11)
|
||||
listen (2.10.1)
|
||||
celluloid (~> 0.16.0)
|
||||
libv8 (3.16.14.13)
|
||||
listen (3.0.3)
|
||||
rb-fsevent (>= 0.9.3)
|
||||
rb-inotify (>= 0.9)
|
||||
middleman (3.3.12)
|
||||
middleman (3.4.0)
|
||||
coffee-script (~> 2.2)
|
||||
compass (>= 1.0.0, < 2.0.0)
|
||||
compass-import-once (= 1.0.5)
|
||||
execjs (~> 2.0)
|
||||
haml (>= 4.0.5)
|
||||
kramdown (~> 1.2)
|
||||
middleman-core (= 3.3.12)
|
||||
middleman-core (= 3.4.0)
|
||||
middleman-sprockets (>= 3.1.2)
|
||||
sass (>= 3.4.0, < 4.0)
|
||||
uglifier (~> 2.5)
|
||||
middleman-core (3.3.12)
|
||||
activesupport (~> 4.1.0)
|
||||
middleman-core (3.4.0)
|
||||
activesupport (~> 4.1)
|
||||
bundler (~> 1.1)
|
||||
capybara (~> 2.4.4)
|
||||
erubis
|
||||
hooks (~> 0.3)
|
||||
i18n (~> 0.7.0)
|
||||
listen (>= 2.7.9, < 3.0)
|
||||
listen (~> 3.0.3)
|
||||
padrino-helpers (~> 0.12.3)
|
||||
rack (>= 1.4.5, < 2.0)
|
||||
rack-test (~> 0.6.2)
|
||||
thor (>= 0.15.2, < 2.0)
|
||||
tilt (~> 1.4.1, < 2.0)
|
||||
middleman-livereload (3.4.2)
|
||||
middleman-livereload (3.4.3)
|
||||
em-websocket (~> 0.5.1)
|
||||
middleman-core (>= 3.3)
|
||||
rack-livereload (~> 0.3.15)
|
||||
@@ -119,8 +121,12 @@ GEM
|
||||
middleman-syntax (2.0.0)
|
||||
middleman-core (~> 3.2)
|
||||
rouge (~> 1.0)
|
||||
minitest (5.7.0)
|
||||
mime-types (2.6.2)
|
||||
mini_portile (0.6.2)
|
||||
minitest (5.8.1)
|
||||
multi_json (1.11.2)
|
||||
nokogiri (1.6.6.2)
|
||||
mini_portile (~> 0.6.0)
|
||||
padrino-helpers (0.12.5)
|
||||
i18n (~> 0.6, >= 0.6.7)
|
||||
padrino-support (= 0.12.5)
|
||||
@@ -128,7 +134,7 @@ GEM
|
||||
padrino-support (0.12.5)
|
||||
activesupport (>= 3.1)
|
||||
rack (1.6.4)
|
||||
rack-contrib (1.3.0)
|
||||
rack-contrib (1.4.0)
|
||||
git-version-bump (~> 0.15)
|
||||
rack (~> 1.4)
|
||||
rack-livereload (0.3.16)
|
||||
@@ -136,16 +142,16 @@ GEM
|
||||
rack-protection (1.5.3)
|
||||
rack
|
||||
rack-rewrite (1.5.1)
|
||||
rack-ssl-enforcer (0.2.8)
|
||||
rack-ssl-enforcer (0.2.9)
|
||||
rack-test (0.6.3)
|
||||
rack (>= 1.0)
|
||||
rb-fsevent (0.9.5)
|
||||
rb-fsevent (0.9.6)
|
||||
rb-inotify (0.9.5)
|
||||
ffi (>= 0.5.0)
|
||||
redcarpet (3.3.2)
|
||||
redcarpet (3.3.3)
|
||||
ref (2.0.0)
|
||||
rouge (1.9.1)
|
||||
sass (3.4.16)
|
||||
rouge (1.10.1)
|
||||
sass (3.4.19)
|
||||
sprockets (2.12.4)
|
||||
hike (~> 1.2)
|
||||
multi_json (~> 1.0)
|
||||
@@ -159,21 +165,21 @@ GEM
|
||||
therubyracer (0.12.2)
|
||||
libv8 (~> 3.16.14.0)
|
||||
ref
|
||||
thin (1.6.3)
|
||||
thin (1.6.4)
|
||||
daemons (~> 1.0, >= 1.0.9)
|
||||
eventmachine (~> 1.0)
|
||||
eventmachine (~> 1.0, >= 1.0.4)
|
||||
rack (~> 1.0)
|
||||
thor (0.19.1)
|
||||
thread_safe (0.3.5)
|
||||
tilt (1.4.1)
|
||||
timers (4.0.1)
|
||||
hitimes
|
||||
tzinfo (1.2.2)
|
||||
thread_safe (~> 0.1)
|
||||
uber (0.0.13)
|
||||
uglifier (2.7.1)
|
||||
uber (0.0.15)
|
||||
uglifier (2.7.2)
|
||||
execjs (>= 0.3.0)
|
||||
json (>= 1.8.0)
|
||||
xpath (2.0.0)
|
||||
nokogiri (~> 1.3)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
|
||||
10
website/Makefile
Normal file
10
website/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
all: build
|
||||
|
||||
init:
|
||||
bundle
|
||||
|
||||
dev: init
|
||||
bundle exec middleman server
|
||||
|
||||
build: init
|
||||
bundle exec middleman build
|
||||
@@ -12,13 +12,7 @@ requests like any normal GitHub project, and we'll merge it in.
|
||||
|
||||
## Running the Site Locally
|
||||
|
||||
Running the site locally is simple. Clone this repo and run the following
|
||||
commands:
|
||||
|
||||
```
|
||||
$ bundle
|
||||
$ bundle exec middleman server
|
||||
```
|
||||
Running the site locally is simple. Clone this repo and run `make dev`.
|
||||
|
||||
Then open up `http://localhost:4567`. Note that some URLs you may need to append
|
||||
".html" to make them work (in the navigation).
|
||||
|
||||
@@ -1,22 +1,9 @@
|
||||
#-------------------------------------------------------------------------
|
||||
# Configure Middleman
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
helpers do
|
||||
def livestream_active?
|
||||
# Must set key for date
|
||||
ENV["LIVESTREAM_ACTIVE"].present?
|
||||
end
|
||||
end
|
||||
|
||||
set :base_url, "https://www.nomadproject.io/"
|
||||
|
||||
activate :hashicorp do |h|
|
||||
h.version = ENV["NOMAD_VERSION"]
|
||||
h.bintray_enabled = ENV["BINTRAY_ENABLED"] == "1"
|
||||
h.bintray_repo = "mitchellh/nomad"
|
||||
h.bintray_user = "mitchellh"
|
||||
h.bintray_key = ENV["BINTRAY_API_KEY"]
|
||||
h.name = "nomad"
|
||||
h.version = "0.1.2"
|
||||
h.github_slug = "hashicorp/nomad"
|
||||
|
||||
h.minify_javascript = false
|
||||
end
|
||||
|
||||
@@ -207,8 +207,8 @@ configured on server nodes.
|
||||
option is not required and has no default.
|
||||
* <a id="meta">`meta`</a>: This is a key/value mapping of metadata pairs. This
|
||||
is a free-form map and can contain any string values.
|
||||
* `options`: This is a key/value mapping of internal configuration for clients,
|
||||
such as for driver configuration.
|
||||
* <a id="options">`options`</a>: This is a key/value mapping of internal
|
||||
configuration for clients, such as for driver configuration.
|
||||
* <a id="network_interface">`network_interface`</a>: This is a string to force
|
||||
network fingerprinting to use a specific network interface
|
||||
* <a id="network_speed">`network_speed`</a>: This is an int that sets the
|
||||
|
||||
@@ -9,7 +9,7 @@ description: >
|
||||
# Command: init
|
||||
|
||||
The `init` command creates an example [job specification](/docs/jobspec/) in the current
|
||||
directory that demonstrates some common configurations for tasks, tasks groups,
|
||||
directory that demonstrates some common configurations for tasks, task groups,
|
||||
runtime constraints, and resource allocation.
|
||||
|
||||
Please refer to the [jobspec](/docs/jobspec/) and [drivers](/docs/drivers/)
|
||||
|
||||
@@ -16,7 +16,7 @@ The `status` command displays status information for jobs.
|
||||
nomad status [options] [job]
|
||||
```
|
||||
|
||||
This command accepts an option job ID as the sole argument. If the job ID is
|
||||
This command accepts an optional job ID as the sole argument. If the job ID is
|
||||
provided, information about the specific job is queried and displayed. If the ID
|
||||
is omitted, the command lists out all of the existing jobs and a few of the most
|
||||
useful status fields for each.
|
||||
|
||||
@@ -20,7 +20,7 @@ nomad stop [options] <job>
|
||||
The stop command requires a single argument, specifying the job ID to
|
||||
cancel.
|
||||
|
||||
Upon successful deregistraion, an interactive monitor session will start to
|
||||
Upon successful deregistration, an interactive monitor session will start to
|
||||
display log lines as the job unwinds its allocations and completes shutting
|
||||
down. The monitor will exit once all allocations are stopped and the job has
|
||||
reached a terminal state. It is safe to exit the monitor early using ctrl+c.
|
||||
|
||||
@@ -20,7 +20,7 @@ nomad version
|
||||
## Output
|
||||
|
||||
This command prints both the version number as well as the exact commit SHA used
|
||||
during the build. The SHA may also have a the string `+CHANGES` appended to the
|
||||
during the build. The SHA may also have the string `+CHANGES` appended to the
|
||||
end, indicating that local, uncommitted changes were detected at build time.
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -23,10 +23,14 @@ The `docker` driver supports the following configuration in the job specificatio
|
||||
|
||||
* `command` - (Optional) The command to run when starting the container.
|
||||
|
||||
* `args` - (Optional) Arguments to the optional `command`. If no `command` is
|
||||
present, `args` are ignored.
|
||||
|
||||
* `network_mode` - (Optional) The network mode to be used for the container.
|
||||
Valid options are `default`, `bridge`, `host` or `none`. If nothing is
|
||||
specified, the container will start in `bridge` mode. The `container`
|
||||
network mode is not supported right now.
|
||||
network mode is not supported right now and is reported as an invalid
|
||||
option.
|
||||
|
||||
### Port Mapping
|
||||
|
||||
@@ -47,8 +51,8 @@ port mapping will still be able to make outbound network connections.
|
||||
|
||||
Typically when you create a Docker container you configure the service to start
|
||||
listening on a port (or ports) when you start the container. For example, redis
|
||||
starts listening on `6379` when you `Docker run redis`. Nomad supports this by
|
||||
mapping the random port to the port inside the container.
|
||||
starts listening on `6379` when you `docker run redis`. Nomad can support this by
|
||||
mapping a random port on the host machine to the port inside the container.
|
||||
|
||||
You need to tell Nomad which ports your container is using so Nomad can map
|
||||
allocated ports for you. You do so by specifying a **numeric port value** for
|
||||
@@ -111,12 +115,24 @@ The `docker` driver has the following configuration options:
|
||||
* `docker.endpoint` - Defaults to `unix:///var/run/docker.sock`. You will need
|
||||
to customize this if you use a non-standard socket (http or another location).
|
||||
|
||||
* `docker.cleanup.container` Defaults to `true`. Changing this to `false` will
|
||||
prevent Nomad from removing containers from stopped tasks.
|
||||
|
||||
* `docker.cleanup.image` Defaults to `true`. Changing this to `false` will
|
||||
prevent Nomad from removing images from stopped tasks.
|
||||
|
||||
Note: When testing or using the `-dev` flag you can use `DOCKER_HOST`,
|
||||
`DOCKER_TLS_VERIFY`, and `DOCKER_CERT_PATH` to customize Nomad's behavior. In
|
||||
production Nomad will always read `docker.endpoint`.
|
||||
|
||||
## Client Attributes
|
||||
|
||||
The `docker` driver will set the following client attributes:
|
||||
|
||||
* `driver.Docker` - This will be set to "1", indicating the
|
||||
* `driver.docker` - This will be set to "1", indicating the
|
||||
driver is available.
|
||||
* `driver.docker.version` - This will be set to version of the
|
||||
docker server
|
||||
|
||||
## Resource Isolation
|
||||
|
||||
@@ -125,7 +141,7 @@ The `docker` driver will set the following client attributes:
|
||||
Nomad limits containers' CPU based on CPU shares. CPU shares allow containers to
|
||||
burst past their CPU limits. CPU limits will only be imposed when there is
|
||||
contention for resources. When the host is under load your process may be
|
||||
throttled to stabilize QOS depending how how many shares it has. You can see how
|
||||
throttled to stabilize QOS depending on how many shares it has. You can see how
|
||||
many CPU shares are available to your process by reading `NOMAD_CPU_LIMIT`. 1000
|
||||
shares are approximately equal to 1Ghz.
|
||||
|
||||
|
||||
@@ -6,21 +6,24 @@ description: |-
|
||||
The Exec task driver is used to run binaries using OS isolation primitives.
|
||||
---
|
||||
|
||||
# Fork/Exec Driver
|
||||
# Isolated Fork/Exec Driver
|
||||
|
||||
Name: `exec`
|
||||
|
||||
The `exec` driver is used to simply execute a particular command for a task.
|
||||
This is the simplest driver and is extremely flexible. In particlar, because
|
||||
it can invoke any command, it can be used to call scripts or other wrappers
|
||||
which provide higher level features.
|
||||
However unlike [`raw_exec`](raw_exec.html) it uses the underlying isolation
|
||||
primitives of the operating system to limit the tasks access to resources. While
|
||||
simple, since the `exec` driver can invoke any command, it can be used to call
|
||||
scripts or other wrappers which provide higher level features.
|
||||
|
||||
## Task Configuration
|
||||
|
||||
The `exec` driver supports the following configuration in the job spec:
|
||||
|
||||
* `command` - The command to execute. Must be provided.
|
||||
|
||||
* `command` - (Required) The command to execute. Must be provided.
|
||||
* `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible
|
||||
from the Nomad client. If you specify an `artifact_source` to be executed, you
|
||||
must reference it in the `command` as show in the examples below
|
||||
* `args` - The argument list to the command, space seperated. Optional.
|
||||
|
||||
## Client Requirements
|
||||
@@ -29,6 +32,30 @@ The `exec` driver can run on all supported operating systems but to provide
|
||||
proper isolation the client must be run as root on non-Windows operating systems.
|
||||
Further, to support cgroups, `/sys/fs/cgroups/` must be mounted.
|
||||
|
||||
You must specify a `command` to be executed. Optionally you can specify an
|
||||
`artifact_source` to be downloaded as well. Any `command` is assumed to be present on the
|
||||
running client, or a downloaded artifact.
|
||||
|
||||
## Examples
|
||||
|
||||
To run a binary present on the Node:
|
||||
|
||||
```
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
args = 1
|
||||
}
|
||||
```
|
||||
|
||||
To execute a binary specified by `artifact_source`:
|
||||
|
||||
```
|
||||
config {
|
||||
artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin"
|
||||
command = "$NOMAD_TASK_DIR/binary.bin"
|
||||
}
|
||||
```
|
||||
|
||||
## Client Attributes
|
||||
|
||||
The `exec` driver will set the following client attributes:
|
||||
|
||||
@@ -19,9 +19,12 @@ HTTP from the Nomad client.
|
||||
The `java` driver supports the following configuration in the job spec:
|
||||
|
||||
* `jar_source` - **(Required)** The hosted location of the source Jar file. Must be accessible
|
||||
from the Nomad client, via HTTP
|
||||
from the Nomad client
|
||||
|
||||
* `args` - (Optional) The argument list for the `java` command, space separated.
|
||||
* `args` - **(Optional)** The argument list for the `java` command, space separated.
|
||||
|
||||
* `jvm_options` - **(Optional)** JVM options to be passed while invoking java. These options
|
||||
are passed not validated in any way in Nomad.
|
||||
|
||||
## Client Requirements
|
||||
|
||||
@@ -49,6 +52,6 @@ On Linux, Nomad will attempt to use cgroups, namespaces, and chroot
|
||||
to isolate the resources of a process. If the Nomad agent is not
|
||||
running as root many of these mechanisms cannot be used.
|
||||
|
||||
As a baseline, the Java jars will be ran inside a Java Virtual Machine,
|
||||
As a baseline, the Java jars will be run inside a Java Virtual Machine,
|
||||
providing a minimum amount of isolation.
|
||||
|
||||
|
||||
@@ -25,8 +25,8 @@ The `Qemu` driver supports the following configuration in the job spec:
|
||||
|
||||
* `image_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible
|
||||
from the Nomad client, via HTTP.
|
||||
* `checksum` - **(Required)** The MD5 checksum of the `qemu` image. If the
|
||||
checksums do not match, the `Qemu` diver will fail to start the image
|
||||
* `checksum` - **(Required)** The SHA256 checksum of the `qemu` image. If the
|
||||
checksums do not match, the `Qemu` driver will fail to start the image
|
||||
* `accelerator` - (Optional) The type of accelerator to use in the invocation.
|
||||
If the host machine has `Qemu` installed with KVM support, users can specify `kvm` for the `accelerator`. Default is `tcg`
|
||||
* `host_port` - **(Required)** Port on the host machine to forward to the guest
|
||||
@@ -37,7 +37,7 @@ in the `Task` specification
|
||||
|
||||
## Client Requirements
|
||||
|
||||
The `Qemu` driver requires Qemu to be installed and in your systems `$PATH`.
|
||||
The `Qemu` driver requires Qemu to be installed and in your system's `$PATH`.
|
||||
The `image_source` must be accessible by the node running Nomad. This can be an
|
||||
internal source, private to your cluster, but it must be reachable by the client
|
||||
over HTTP.
|
||||
@@ -48,7 +48,7 @@ The `Qemu` driver will set the following client attributes:
|
||||
|
||||
* `driver.qemu` - Set to `1` if Qemu is found on the host node. Nomad determines
|
||||
this by executing `qemu-system-x86_64 -version` on the host and parsing the output
|
||||
* `driver.qemu.version` - Version of `qemu-system-x86_64, ex: `2.4.0`
|
||||
* `driver.qemu.version` - Version of `qemu-system-x86_64`, ex: `2.4.0`
|
||||
|
||||
## Resource Isolation
|
||||
|
||||
@@ -57,6 +57,6 @@ workloads. Nomad can use Qemu KVM's hardware-assisted virtualization to deliver
|
||||
better performance.
|
||||
|
||||
Virtualization provides the highest level of isolation for workloads that
|
||||
require additional security, and resources use is constrained by the Qemu
|
||||
require additional security, and resource use is constrained by the Qemu
|
||||
hypervisor rather than the host kernel. VM network traffic still flows through
|
||||
the host's interface(s).
|
||||
the host's interface(s).
|
||||
|
||||
73
website/source/docs/drivers/raw_exec.html.md
Normal file
73
website/source/docs/drivers/raw_exec.html.md
Normal file
@@ -0,0 +1,73 @@
|
||||
---
|
||||
layout: "docs"
|
||||
page_title: "Drivers: Raw Exec"
|
||||
sidebar_current: "docs-drivers-raw-exec"
|
||||
description: |-
|
||||
The Raw Exec task driver simply fork/execs and provides no isolation.
|
||||
---
|
||||
|
||||
# Raw Fork/Exec Driver
|
||||
|
||||
Name: `raw_exec`
|
||||
|
||||
The `raw_exec` driver is used to execute a command for a task without any
|
||||
isolation. Further, the task is started as the same user as the Nomad process.
|
||||
As such, it should be used with extreme care and is disabled by default.
|
||||
|
||||
## Task Configuration
|
||||
|
||||
The `raw_exec` driver supports the following configuration in the job spec:
|
||||
|
||||
* `command` - (Required) The command to execute. Must be provided.
|
||||
* `artifact_source` – (Optional) Source location of an executable artifact. Must be accessible
|
||||
from the Nomad client. If you specify an `artifact_source` to be executed, you
|
||||
must reference it in the `command` as show in the examples below
|
||||
* `args` - The argument list to the command, space seperated. Optional.
|
||||
|
||||
## Client Requirements
|
||||
|
||||
The `raw_exec` driver can run on all supported operating systems. It is however
|
||||
disabled by default. In order to be enabled, the Nomad client configuration must
|
||||
explicitly enable the `raw_exec` driver in the
|
||||
[options](../agent/config.html#options) field:
|
||||
|
||||
```
|
||||
options = {
|
||||
driver.raw_exec.enable = "1"
|
||||
}
|
||||
```
|
||||
|
||||
You must specify a `command` to be executed. Optionally you can specify an
|
||||
`artifact_source` to be executed. Any `command` is assumed to be present on the
|
||||
running client, or a downloaded artifact
|
||||
|
||||
## Examples
|
||||
|
||||
To run a binary present on the Node:
|
||||
|
||||
```
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
args = 1
|
||||
}
|
||||
```
|
||||
|
||||
To execute a binary specified by `artifact_source`:
|
||||
|
||||
```
|
||||
config {
|
||||
artifact_source = "https://dl.dropboxusercontent.com/u/1234/binary.bin"
|
||||
command = "$NOMAD_TASK_DIR/binary.bin"
|
||||
}
|
||||
```
|
||||
|
||||
## Client Attributes
|
||||
|
||||
The `raw_exec` driver will set the following client attributes:
|
||||
|
||||
* `driver.raw_exec` - This will be set to "1", indicating the
|
||||
driver is available.
|
||||
|
||||
## Resource Isolation
|
||||
|
||||
The `raw_exec` driver provides no isolation.
|
||||
@@ -18,10 +18,20 @@ containers.
|
||||
|
||||
The `Rkt` driver supports the following configuration in the job spec:
|
||||
|
||||
* `trust_prefix` - **(Required)** The trust prefix to be passed to rkt. Must be reachable from
|
||||
the box running the nomad agent.
|
||||
* `name` - **(Required)** Fully qualified name of an image to run using rkt
|
||||
* `exec` - **(Optional**) A command to execute on the ACI
|
||||
* `trust_prefix` - **(Optional)** The trust prefix to be passed to rkt. Must be reachable from
|
||||
the box running the nomad agent. If not specified, the image is run without
|
||||
verifying the image signature.
|
||||
* `image` - **(Required)** The image to run which may be specified by name,
|
||||
hash, ACI address or docker registry.
|
||||
* `command` - **(Optional**) A command to execute on the ACI.
|
||||
* `args` - **(Optional**) A string of args to pass into the image.
|
||||
|
||||
## Task Directories
|
||||
|
||||
The `Rkt` driver does not currently support mounting the `alloc/` and `local/`
|
||||
directory. It is currently blocked by this [Rkt
|
||||
issue](https://github.com/coreos/rkt/issues/761). As such the coresponding
|
||||
[environment variables](/docs/jobspec/environment.html#task_dir) are not set.
|
||||
|
||||
## Client Requirements
|
||||
|
||||
@@ -34,7 +44,7 @@ over HTTP.
|
||||
|
||||
The `Rkt` driver will set the following client attributes:
|
||||
|
||||
* `driver.rkt` - Set to `true` if Rkt is found on the host node. Nomad determines
|
||||
* `driver.rkt` - Set to `1` if Rkt is found on the host node. Nomad determines
|
||||
this by executing `rkt version` on the host and parsing the output
|
||||
* `driver.rkt.version` - Version of `rkt` eg: `0.8.1`
|
||||
* `driver.rkt.appc.version` - Version of `appc` that `rkt` is using eg: `0.8.1`
|
||||
|
||||
@@ -52,7 +52,7 @@ be specified using the `?region=` query parameter.
|
||||
"driver.java.vm": "Java HotSpot(TM) 64-Bit Server VM (build 25.5-b02, mixed mode)",
|
||||
"hostname": "Armons-MacBook-Air.local",
|
||||
"kernel.name": "darwin",
|
||||
"kernel.version": "14.4.0\n",
|
||||
"kernel.version": "14.4.0",
|
||||
"memory.totalbytes": "8589934592",
|
||||
"os.name": "darwin",
|
||||
"os.version": "14.4.0",
|
||||
|
||||
@@ -121,15 +121,15 @@ specified by the job. Resource utilization is maximized by bin packing, in which
|
||||
the scheduling tries to make use of all the resources of a machine without
|
||||
exhausting any dimension. Job constraints can be used to ensure an application is
|
||||
running in an appropriate environment. Constraints can be technical requirements based
|
||||
on hardware features such as architecture, availability of GPUs, or software features
|
||||
on hardware features such as architecture and availability of GPUs, or software features
|
||||
like operating system and kernel version, or they can be business constraints like
|
||||
ensuring PCI compliant workloads run on appropriate servers.
|
||||
|
||||
# Getting in Depth
|
||||
|
||||
This has been a brief high-level overview of the architecture of Nomad. There
|
||||
are more details available for each of the sub-systems. The [scheduler design](/docs/internals/scheduling.html),
|
||||
[consensus protocol](/docs/internals/consensus.html), and [gossip protocol](/docs/internals/gossip.html)
|
||||
are more details available for each of the sub-systems. The [consensus protocol](/docs/internals/consensus.html),
|
||||
[gossip protocol](/docs/internals/gossip.html), and [scheduler design](/docs/internals/scheduling.html)
|
||||
are all documented in more detail.
|
||||
|
||||
For other details, either consult the code, ask in IRC or reach out to the mailing list.
|
||||
|
||||
@@ -45,7 +45,7 @@ same sequence of logs must result in the same state, meaning behavior must be de
|
||||
For Nomad's purposes, all server nodes are in the peer set of the local region.
|
||||
|
||||
* **Quorum** - A quorum is a majority of members from a peer set: for a set of size `n`,
|
||||
quorum requires at least `(n/2)+1` members.
|
||||
quorum requires at least `⌊(n/2)+1⌋` members.
|
||||
For example, if there are 5 members in the peer set, we would need 3 nodes
|
||||
to form a quorum. If a quorum of nodes is unavailable for any reason, the
|
||||
cluster becomes *unavailable* and no new logs can be committed.
|
||||
|
||||
@@ -53,10 +53,11 @@ and ensure at least once delivery.
|
||||
|
||||
Nomad servers run scheduling workers, defaulting to one per CPU core, which are used to
|
||||
process evaluations. The workers dequeue evaluations from the broker, and then invoke
|
||||
the appropriate schedule as specified by the job. Nomad ships with a `service` scheduler
|
||||
the appropriate scheduler as specified by the job. Nomad ships with a `service` scheduler
|
||||
that optimizes for long-lived services, a `batch` scheduler that is used for fast placement
|
||||
of batch jobs, and a `core` scheduler which is used for internal maintenance. Nomad can
|
||||
be extended to support custom schedulers as well.
|
||||
of batch jobs, a `system` scheduler that is used to run jobs on every node,
|
||||
and a `core` scheduler which is used for internal maintenance.
|
||||
Nomad can be extended to support custom schedulers as well.
|
||||
|
||||
Schedulers are responsible for processing an evaluation and generating an allocation _plan_.
|
||||
The plan is the set of allocations to evict, update, or create. The specific logic used to
|
||||
@@ -75,8 +76,8 @@ and density of applications, but is also augmented by affinity and anti-affinity
|
||||
Once the scheduler has ranked enough nodes, the highest ranking node is selected and
|
||||
added to the allocation plan.
|
||||
|
||||
When planning is complete, the scheduler submits the plan to the leader and
|
||||
gets added to the plan queue. The plan queue manages pending plans, provides priority
|
||||
When planning is complete, the scheduler submits the plan to the leader which adds
|
||||
the plan to the plan queue. The plan queue manages pending plans, provides priority
|
||||
ordering, and allows Nomad to handle concurrency races. Multiple schedulers are running
|
||||
in parallel without locking or reservations, making Nomad optimistically concurrent.
|
||||
As a result, schedulers might overlap work on the same node and cause resource
|
||||
|
||||
@@ -17,7 +17,7 @@ environment variables.
|
||||
|
||||
When you request resources for a job, Nomad creates a resource offer. The final
|
||||
resources for your job are not determined until it is scheduled. Nomad will
|
||||
tell you which resources have been allocated after evaulation and placement.
|
||||
tell you which resources have been allocated after evaluation and placement.
|
||||
|
||||
### CPU and Memory
|
||||
|
||||
@@ -28,7 +28,7 @@ the memory limit to inform how large your in-process cache should be, or to
|
||||
decide when to flush buffers to disk.
|
||||
|
||||
Both CPU and memory are presented as integers. The unit for CPU limit is
|
||||
`1024 = 1Ghz`. The unit for memory `1 = 1 megabytes`.
|
||||
`1024 = 1Ghz`. The unit for memory is `1 = 1 megabytes`.
|
||||
|
||||
Writing your applications to adjust to these values at runtime provides greater
|
||||
scheduling flexibility since you can adjust the resource allocations in your
|
||||
@@ -56,6 +56,27 @@ exported as environment variables for consistency, e.g. `NOMAD_PORT_5000`.
|
||||
|
||||
Please see the relevant driver documentation for details.
|
||||
|
||||
<a id="task_dir">### Task Directories</a>
|
||||
|
||||
Nomad makes the following two directories available to tasks:
|
||||
|
||||
* `alloc/`: This directory is shared across all tasks in a task group and can be
|
||||
used to store data that needs to be used by multiple tasks, such as a log
|
||||
shipper.
|
||||
* `local/`: This directory is private to each task. It can be used to store
|
||||
arbitrary data that shouldn't be shared by tasks in the task group.
|
||||
|
||||
Both these directories are persisted until the allocation is removed, which
|
||||
occurs hours after all the tasks in the task group enter terminal states. This
|
||||
gives time to view the data produced by tasks.
|
||||
|
||||
Depending on the driver and operating system being targeted, the directories are
|
||||
made available in various ways. For example, on `docker` the directories are
|
||||
binded to the container, while on `exec` on Linux the directories are mounted into the
|
||||
chroot. Regardless of how the directories are made available, the path to the
|
||||
directories can be read through the following environment variables:
|
||||
`NOMAD_ALLOC_DIR` and `NOMAD_TASK_DIR`.
|
||||
|
||||
## Meta
|
||||
|
||||
The job specification also allows you to specify a `meta` block to supply arbitrary
|
||||
|
||||
@@ -9,8 +9,7 @@ description: |-
|
||||
# Job Specification
|
||||
|
||||
Jobs can be specified either in [HCL](https://github.com/hashicorp/hcl) or JSON.
|
||||
HCL is meant to strike a balance between human readable and editable, as well
|
||||
as being machine-friendly.
|
||||
HCL is meant to strike a balance between human readable and editable, and machine-friendly.
|
||||
|
||||
For machine-friendliness, Nomad can also read JSON configurations. In general, we recommend
|
||||
using the HCL syntax.
|
||||
@@ -29,6 +28,9 @@ job "my-service" {
|
||||
# Spread tasks between us-west-1 and us-east-1
|
||||
datacenters = ["us-west-1", "us-east-1"]
|
||||
|
||||
# run this job globally
|
||||
type = "system"
|
||||
|
||||
# Rolling updates should be sequential
|
||||
update {
|
||||
stagger = "30s"
|
||||
@@ -132,7 +134,7 @@ The `job` object supports the following keys:
|
||||
a task group of the same name.
|
||||
|
||||
* `type` - Specifies the job type and switches which scheduler
|
||||
is used. Nomad provides the `service` and `batch` schedulers,
|
||||
is used. Nomad provides the `service`, `system` and `batch` schedulers,
|
||||
and defaults to `service`.
|
||||
|
||||
* `update` - Specifies the task update strategy. This requires providing
|
||||
@@ -218,11 +220,35 @@ The `constraint` object supports the following keys:
|
||||
to true. Soft constraints are not currently supported.
|
||||
|
||||
* `operator` - Specifies the comparison operator. Defaults to equality,
|
||||
and can be `=`, `==`, `is`, `!=`, `not`.
|
||||
and can be `=`, `==`, `is`, `!=`, `not`, `>`, `>=`, `<`, `<=`. The
|
||||
ordering is compared lexically.
|
||||
|
||||
* `value` - Specifies the value to compare the attribute against.
|
||||
This can be a literal value or another attribute.
|
||||
|
||||
* `version` - Specifies a version constraint against the attribute.
|
||||
This sets the operator to "version" and the `value` to what is
|
||||
specified. This supports a comma seperated list of constraints,
|
||||
including the pessimistic operator. See the
|
||||
[go-version](https://github.com/hashicorp/go-version) repository
|
||||
for examples.
|
||||
|
||||
* `regexp` - Specifies a regular expression constraint against
|
||||
the attribute. This sets the operator to "regexp" and the `value`
|
||||
to the regular expression.
|
||||
|
||||
* `distinct_hosts` - `distinct_hosts` accepts a boolean `true`. The default is
|
||||
`false`.
|
||||
|
||||
When `distinct_hosts` is `true` at the Job level, each instance of all Task
|
||||
Groups specified in the job is placed on a separate host.
|
||||
|
||||
When `distinct_hosts` is `true` at the Task Group level with count > 1, each
|
||||
instance of a Task Group is placed on a separate host. Different task groups in
|
||||
the same job _may_ be co-scheduled.
|
||||
|
||||
Tasks within a task group are always co-scheduled.
|
||||
|
||||
Below is a table documenting the variables that can be interpreted:
|
||||
|
||||
<table class="table table-bordered table-striped">
|
||||
|
||||
@@ -9,40 +9,51 @@ description: |-
|
||||
<h1>Download Nomad</h1>
|
||||
|
||||
<section class="downloads">
|
||||
<div class="description row">
|
||||
<div class="col-md-12">
|
||||
<p>
|
||||
Below are all available downloads for the latest version of Nomad
|
||||
(<%= latest_version %>). Please download the proper package for your
|
||||
operating system and architecture. You can find SHA256 checksums
|
||||
for packages <a href="https://dl.bintray.com/mitchellh/nomad/nomad_<%= latest_version %>_SHA256SUMS?direct">here</a>. You can verify the SHA256 checksums using <a href="https://hashicorp.com/security.html">our PGP public key</a> and the <a href="https://dl.bintray.com/mitchellh/nomad/nomad_<%= latest_version %>_SHA256SUMS.sig?direct">SHA256SUMs signature file</a>.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<% product_versions.each do |os, versions| %>
|
||||
<div class="row">
|
||||
<div class="col-md-12 download">
|
||||
<div class="icon pull-left"><%= system_icon(os) %>
|
||||
</div>
|
||||
<div class="details">
|
||||
<h2 class="os-name"><%= os %></h2>
|
||||
<ul>
|
||||
<% versions.each do |url| %>
|
||||
<li><a href="<%= url %>"><%= arch_for_filename(url) %></a></li>
|
||||
<% end %>
|
||||
</ul>
|
||||
<div class="clearfix">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<% end %>
|
||||
<div class="description row">
|
||||
<div class="col-md-12">
|
||||
<p>
|
||||
Below are the available downloads for the latest version of Nomad
|
||||
(<%= latest_version %>). Please download the proper package for your
|
||||
operating system and architecture.
|
||||
</p>
|
||||
<p>
|
||||
You can find the
|
||||
<a href="https://releases.hashicorp.com/nomad/<%= latest_version %>/nomad_<%= latest_version %>_SHA256SUMS">
|
||||
SHA256 checksums for Nomad <%= latest_version %>
|
||||
</a>
|
||||
online and you can
|
||||
<a href="https://releases.hashicorp.com/nomad/<%= latest_version %>/nomad_<%= latest_version %>_SHA256SUMS.sig">
|
||||
verify the checksums signature file
|
||||
</a>
|
||||
which has been signed using <a href="https://hashicorp.com/security.html" target="_TOP">HashiCorp's GPG key</a>.
|
||||
You can also <a href="https://releases.hashicorp.com/nomad" target="_TOP">download older versions of Nomad</a> from the releases service.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-12 poweredby">
|
||||
<a href='http://www.bintray.com'>
|
||||
<img src='https://www.bintray.com/docs/images/poweredByBintray_ColorTransparent.png'>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<% product_versions.each do |os, arches| %>
|
||||
<% next if os == "web" %>
|
||||
<div class="row">
|
||||
<div class="col-md-12 download">
|
||||
<div class="icon pull-left"><%= system_icon(os) %></div>
|
||||
<div class="details">
|
||||
<h2 class="os-name"><%= pretty_os(os) %></h2>
|
||||
<ul>
|
||||
<% arches.each do |arch, url| %>
|
||||
<li><a href="<%= url %>"><%= pretty_arch(arch) %></a></li>
|
||||
<% end %>
|
||||
</ul>
|
||||
<div class="clearfix"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<% end %>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-12 poweredby">
|
||||
<a href="https://www.fastly.com?utm_source=hashicorp" target="_TOP">
|
||||
<%= image_tag "fastly_logo.png" %>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user