diff --git a/CHANGELOG.md b/CHANGELOG.md index 0963b992d..6597cf4eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,17 +40,13 @@ IMPROVEMENTS: * client: Task State is tracked by client [GH-416] * client: Test Skip Detection [GH-221] * driver/docker: Advanced docker driver options [GH-390] - * driver/docker: Docker hostname can be set [GH-426] -<<<<<<< Updated upstream - * driver/docker: Mount task local and alloc directory to docker containers - [GH-290] - * driver/docker: Pass JVM options in java driver [GH-293, GH-297] - * drivers: Use BlkioWeight rather than BlkioThrottleReadIopsDevice [GH-222] - * jobspec and drivers: Driver configuration supports arbitrary struct to be - passed in jobspec [GH-415] -======= * driver/docker: Docker container name can be set [GH-389] ->>>>>>> Stashed changes + * driver/docker: Docker hostname can be set [GH-426] + * driver/docker: Mount task local and alloc directory to docker containers [GH-290] + * driver/docker: Now accepts any value for `network_mode` to support userspace networking plugins in docker 1.9 + * driver/java: Pass JVM options in java driver [GH-293, GH-297] + * drivers: Use BlkioWeight rather than BlkioThrottleReadIopsDevice [GH-222] + * jobspec and drivers: Driver configuration supports arbitrary struct to be passed in jobspec [GH-415] BUG FIXES: diff --git a/api/tasks.go b/api/tasks.go index e5ae46b5c..2990b5433 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -20,6 +20,28 @@ func NewRestartPolicy() *RestartPolicy { } } +// The ServiceCheck data model represents the consul health check that +// Nomad registers for a Task +type ServiceCheck struct { + Id string + Name string + Type string + Script string + Http string + Protocol string + Interval time.Duration + Timeout time.Duration +} + +// The Service model represents a Consul service defintion +type Service struct { + Id string + Name string + Tags []string + PortLabel string `mapstructure:"port"` + Checks []ServiceCheck +} + // TaskGroup is the unit of scheduling. type TaskGroup struct { Name string @@ -68,6 +90,7 @@ type Task struct { Config map[string]interface{} Constraints []*Constraint Env map[string]string + Services []Service Resources *Resources Meta map[string]string } diff --git a/client/driver/docker.go b/client/driver/docker.go index 96d712c54..fcbe14418 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -103,7 +103,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool // Initialize docker API client client, err := d.dockerClient() if err != nil { - d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s\n", err) + d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err) return false, nil } @@ -120,7 +120,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool // Docker isn't available so we'll simply disable the docker driver. env, err := client.Version() if err != nil { - d.logger.Printf("[INFO] driver.docker: could not connect to docker daemon at %s: %s\n", client.Endpoint(), err) + d.logger.Printf("[INFO] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err) return false, nil } node.Attributes["driver.docker"] = "1" @@ -205,14 +205,14 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, dri Binds: binds, } - d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s\n", hostConfig.Memory, task.Config["image"]) - d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s\n", hostConfig.CPUShares, task.Config["image"]) - d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s\n", hostConfig.Binds, task.Config["image"]) + d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"]) + d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"]) + d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"]) // set privileged mode hostPrivileged := d.config.ReadBoolDefault("docker.privileged.enabled", false) if driverConfig.Privileged && !hostPrivileged { - return c, fmt.Errorf(`Unable to set privileged flag since "docker.privileged.enabled" is false`) + return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) } hostConfig.Privileged = hostPrivileged @@ -221,7 +221,7 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, dri if net.ParseIP(ip) != nil { hostConfig.DNS = append(hostConfig.DNS, ip) } else { - d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s\n", ip) + d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip) } } @@ -230,27 +230,17 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, dri hostConfig.DNSSearch = append(hostConfig.DNSSearch, domain) } - mode := driverConfig.NetworkMode - if mode == "" { + hostConfig.NetworkMode = driverConfig.NetworkMode + if hostConfig.NetworkMode == "" { // docker default - d.logger.Println("[DEBUG] driver.docker: no mode specified for networking, defaulting to bridge") - mode = "bridge" + d.logger.Println("[DEBUG] driver.docker: networking mode not specified; defaulting to bridge") + hostConfig.NetworkMode = "bridge" } - // Ignore the container mode for now - switch mode { - case "default", "bridge", "none", "host": - d.logger.Printf("[DEBUG] driver.docker: using %s as network mode\n", mode) - default: - d.logger.Printf("[ERR] driver.docker: invalid setting for network mode: %s\n", mode) - return c, fmt.Errorf("Invalid setting for network mode: %s", mode) - } - hostConfig.NetworkMode = mode - // Setup port mapping and exposed ports if len(task.Resources.Networks) == 0 { d.logger.Println("[DEBUG] driver.docker: No network interfaces are available") - if len(driverConfig.PortMap) > 0 { + if len(driverConfig.PortMap) == 1 && len(driverConfig.PortMap[0]) > 0 { return c, fmt.Errorf("Trying to map ports but no network interface is available") } } else { @@ -261,39 +251,42 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, dri for _, port := range network.ReservedPorts { hostPortStr := strconv.Itoa(port.Value) - dockerPort := docker.Port(hostPortStr) + containerPort := docker.Port(hostPortStr) - publishedPorts[dockerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} - publishedPorts[dockerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} - d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port.Value, port.Value) + publishedPorts[containerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} + publishedPorts[containerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} + d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)", network.IP, port.Value, port.Value) - exposedPorts[dockerPort+"/tcp"] = struct{}{} - exposedPorts[dockerPort+"/udp"] = struct{}{} - d.logger.Printf("[DEBUG] driver.docker: exposed port %d\n", port.Value) + exposedPorts[containerPort+"/tcp"] = struct{}{} + exposedPorts[containerPort+"/udp"] = struct{}{} + d.logger.Printf("[DEBUG] driver.docker: exposed port %d", port.Value) } containerToHostPortMap := make(map[string]int) for _, port := range network.DynamicPorts { - if len(driverConfig.PortMap) == 0 { - continue - } - containerPort, ok := driverConfig.PortMap[0][port.Label] - if !ok { - containerPort = port.Value + // By default we will map the allocated port 1:1 to the container + containerPortInt := port.Value + + // If the user has mapped a port using port_map we'll change it here + if len(driverConfig.PortMap) == 1 { + mapped, ok := driverConfig.PortMap[0][port.Label] + if ok { + containerPortInt = mapped + } } - containerPortStr := docker.Port(strconv.Itoa(containerPort)) hostPortStr := strconv.Itoa(port.Value) + containerPort := docker.Port(hostPortStr) - publishedPorts[containerPortStr+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} - publishedPorts[containerPortStr+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} - d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)\n", network.IP, port.Value, containerPort) + publishedPorts[containerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} + publishedPorts[containerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} + d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)", network.IP, port.Value, containerPortInt) - exposedPorts[containerPortStr+"/tcp"] = struct{}{} - exposedPorts[containerPortStr+"/udp"] = struct{}{} - d.logger.Printf("[DEBUG] driver.docker: exposed port %s\n", hostPortStr) + exposedPorts[containerPort+"/tcp"] = struct{}{} + exposedPorts[containerPort+"/udp"] = struct{}{} + d.logger.Printf("[DEBUG] driver.docker: exposed port %s", hostPortStr) - containerToHostPortMap[string(containerPortStr)] = port.Value + containerToHostPortMap[string(containerPort)] = port.Value } env.SetPorts(containerToHostPortMap) @@ -313,7 +306,7 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, dri if driverConfig.Args != "" { cmd = append(cmd, parsedArgs...) } - d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s\n", strings.Join(cmd, " ")) + d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s", strings.Join(cmd, " ")) config.Cmd = cmd } else if driverConfig.Args != "" { d.logger.Println("[DEBUG] driver.docker: ignoring command arguments because command is not specified") @@ -321,12 +314,16 @@ func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, dri if len(driverConfig.Labels) == 1 { config.Labels = driverConfig.Labels[0] - d.logger.Println("[DEBUG] driver.docker: applied labels on the container") + d.logger.Printf("[DEBUG] driver.docker: applied labels on the container: %+v", config.Labels) } config.Env = env.List() + + containerName := fmt.Sprintf("%s-%s", task.Name, ctx.AllocID) + d.logger.Printf("[DEBUG] driver.docker: setting container name to: %s", containerName) + return docker.CreateContainerOptions{ - Name: fmt.Sprintf("%s-%s", task.Name, ctx.AllocID), + Name: containerName, Config: config, HostConfig: hostConfig, }, nil @@ -392,40 +389,78 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle err = client.PullImage(pullOptions, authOptions) if err != nil { - d.logger.Printf("[ERR] driver.docker: failed pulling container %s:%s: %s\n", repo, tag, err) + d.logger.Printf("[ERR] driver.docker: failed pulling container %s:%s: %s", repo, tag, err) return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err) } - d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded\n", repo, tag) + d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag) // Now that we have the image we can get the image id dockerImage, err = client.InspectImage(image) if err != nil { - d.logger.Printf("[ERR] driver.docker: failed getting image id for %s\n", image) + d.logger.Printf("[ERR] driver.docker: failed getting image id for %s: %s", image, err) return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err) } } - d.logger.Printf("[DEBUG] driver.docker: identified image %s as %s\n", image, dockerImage.ID) + d.logger.Printf("[DEBUG] driver.docker: identified image %s as %s", image, dockerImage.ID) config, err := d.createContainer(ctx, task, &driverConfig) if err != nil { - d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %s: %s\n", image, err) + d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %s: %s", image, err) return nil, fmt.Errorf("Failed to create container configuration for image %s: %s", image, err) } // Create a container container, err := client.CreateContainer(config) if err != nil { - d.logger.Printf("[ERR] driver.docker: failed to create container from image %s: %s\n", image, err) - return nil, fmt.Errorf("Failed to create container from image %s", image) + // If the container already exists because of a previous failure we'll + // try to purge it and re-create it. + if strings.Contains(err.Error(), "container already exists") { + // Get the ID of the existing container so we can delete it + containers, err := client.ListContainers(docker.ListContainersOptions{ + // The image might be in use by a stopped container, so check everything + All: true, + Filters: map[string][]string{ + "name": []string{config.Name}, + }, + }) + if err != nil { + log.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name) + return nil, fmt.Errorf("Failed to query list of containers: %s", err) + } + + if len(containers) != 1 { + log.Printf("[ERR] driver.docker: failed to get id for container %s", config.Name) + return nil, fmt.Errorf("Failed to get id for container %s", config.Name, err) + } + + log.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) + err = client.RemoveContainer(docker.RemoveContainerOptions{ + ID: containers[0].ID, + }) + if err != nil { + log.Printf("[ERR] driver.docker: failed to purge container %s", config.Name) + return nil, fmt.Errorf("Failed to purge container %s: %s", config.Name, err) + } + log.Printf("[INFO] driver.docker: purged container %s", config.Name) + container, err = client.CreateContainer(config) + if err != nil { + log.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name) + return nil, fmt.Errorf("Failed to re-create container %s; aborting", config.Name) + } + } else { + // We failed to create the container for some other reason. + d.logger.Printf("[ERR] driver.docker: failed to create container from image %s: %s", image, err) + return nil, fmt.Errorf("Failed to create container from image %s: %s", image, err) + } } - d.logger.Printf("[INFO] driver.docker: created container %s\n", container.ID) + d.logger.Printf("[INFO] driver.docker: created container %s", container.ID) // Start the container err = client.StartContainer(container.ID, container.HostConfig) if err != nil { - d.logger.Printf("[ERR] driver.docker: starting container %s\n", container.ID) - return nil, fmt.Errorf("Failed to start container %s", container.ID) + d.logger.Printf("[ERR] driver.docker: failed to start container %s: %s", container.ID, err) + return nil, fmt.Errorf("Failed to start container %s: %s", container.ID, err) } - d.logger.Printf("[INFO] driver.docker: started container %s\n", container.ID) + d.logger.Printf("[INFO] driver.docker: started container %s", container.ID) // Return a driver handle h := &dockerHandle{ @@ -452,7 +487,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er if err := json.Unmarshal(pidBytes, pid); err != nil { return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) } - d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s\n", handleID) + d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID) // Initialize docker API client client, err := d.dockerClient() @@ -503,7 +538,7 @@ func (h *dockerHandle) ID() string { } data, err := json.Marshal(pid) if err != nil { - h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s\n", err) + h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err) } return fmt.Sprintf("DOCKER:%s", string(data)) } @@ -553,16 +588,17 @@ func (h *dockerHandle) Kill() error { }, }) if err != nil { - return fmt.Errorf("Unable to query list of containers: %s", err) + log.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID) + return fmt.Errorf("Failed to query list of containers: %s", err) } inUse := len(containers) if inUse > 0 { - log.Printf("[INFO] driver.docker: image %s is still in use by %d containers\n", h.imageID, inUse) + log.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse) } else { return fmt.Errorf("Failed to remove image %s", h.imageID) } } else { - log.Printf("[INFO] driver.docker: removed image %s\n", h.imageID) + log.Printf("[INFO] driver.docker: removed image %s", h.imageID) } } return nil @@ -572,7 +608,7 @@ func (h *dockerHandle) run() { // Wait for it... exitCode, err := h.client.WaitContainer(h.containerID) if err != nil { - h.logger.Printf("[ERR] driver.docker: unable to wait for %s; container already terminated\n", h.containerID) + h.logger.Printf("[ERR] driver.docker: failed to wait for %s; container already terminated", h.containerID) } if exitCode != 0 { diff --git a/client/driver/docker_test.go b/client/driver/docker_test.go index 80b37632d..bc1d93852 100644 --- a/client/driver/docker_test.go +++ b/client/driver/docker_test.go @@ -293,7 +293,7 @@ func taskTemplate() *structs.Task { &structs.NetworkResource{ IP: "127.0.0.1", ReservedPorts: []structs.Port{{"main", 11110}}, - DynamicPorts: []structs.Port{{"REDIS", 0}}, + DynamicPorts: []structs.Port{{"REDIS", 43330}}, }, }, }, @@ -307,12 +307,15 @@ func TestDocker_StartN(t *testing.T) { task1 := taskTemplate() task1.Resources.Networks[0].ReservedPorts[0] = structs.Port{"main", 11110} + task1.Resources.Networks[0].DynamicPorts[0] = structs.Port{"REDIS", 43331} task2 := taskTemplate() task2.Resources.Networks[0].ReservedPorts[0] = structs.Port{"main", 22222} + task2.Resources.Networks[0].DynamicPorts[0] = structs.Port{"REDIS", 43332} task3 := taskTemplate() task3.Resources.Networks[0].ReservedPorts[0] = structs.Port{"main", 33333} + task3.Resources.Networks[0].DynamicPorts[0] = structs.Port{"REDIS", 43333} taskList := []*structs.Task{task1, task2, task3} @@ -359,14 +362,17 @@ func TestDocker_StartNVersions(t *testing.T) { task1 := taskTemplate() task1.Config["image"] = "redis" task1.Resources.Networks[0].ReservedPorts[0] = structs.Port{"main", 11110} + task1.Resources.Networks[0].DynamicPorts[0] = structs.Port{"REDIS", 43331} task2 := taskTemplate() task2.Config["image"] = "redis:latest" task2.Resources.Networks[0].ReservedPorts[0] = structs.Port{"main", 22222} + task2.Resources.Networks[0].DynamicPorts[0] = structs.Port{"REDIS", 43332} task3 := taskTemplate() task3.Config["image"] = "redis:3.0" task3.Resources.Networks[0].ReservedPorts[0] = structs.Port{"main", 33333} + task3.Resources.Networks[0].DynamicPorts[0] = structs.Port{"REDIS", 43333} taskList := []*structs.Task{task1, task2, task3} diff --git a/client/driver/driver_test.go b/client/driver/driver_test.go index 7065153a1..fd4f30569 100644 --- a/client/driver/driver_test.go +++ b/client/driver/driver_test.go @@ -1,7 +1,9 @@ package driver import ( + "fmt" "log" + "math/rand" "os" "path/filepath" "reflect" @@ -19,11 +21,15 @@ var basicResources = &structs.Resources{ &structs.NetworkResource{ IP: "0.0.0.0", ReservedPorts: []structs.Port{{"main", 12345}}, - DynamicPorts: []structs.Port{{"HTTP", 0}}, + DynamicPorts: []structs.Port{{"HTTP", 43330}}, }, }, } +func init() { + rand.Seed(49875) +} + func testLogger() *log.Logger { return log.New(os.Stderr, "", log.LstdFlags) } @@ -43,7 +49,7 @@ func testDriverContext(task string) *DriverContext { func testDriverExecContext(task *structs.Task, driverCtx *DriverContext) *ExecContext { allocDir := allocdir.NewAllocDir(filepath.Join(driverCtx.config.AllocDir, structs.GenerateUUID())) allocDir.Build([]*structs.Task{task}) - ctx := NewExecContext(allocDir, "dummyAllocId") + ctx := NewExecContext(allocDir, fmt.Sprintf("alloc-id-%d", int(rand.Int31()))) return ctx } diff --git a/client/driver/qemu.go b/client/driver/qemu.go index f51907f75..153051c3c 100644 --- a/client/driver/qemu.go +++ b/client/driver/qemu.go @@ -6,7 +6,6 @@ import ( "path/filepath" "regexp" "runtime" - "strconv" "strings" "time" @@ -33,10 +32,10 @@ type QemuDriver struct { } type QemuDriverConfig struct { - ArtifactSource string `mapstructure:"artifact_source"` - Checksum string `mapstructure:"checksum"` - Accelerator string `mapstructure:"accelerator"` - GuestPorts string `mapstructure:"guest_ports"` + ArtifactSource string `mapstructure:"artifact_source"` + Checksum string `mapstructure:"checksum"` + Accelerator string `mapstructure:"accelerator"` + PortMap []map[string]int `mapstructure:"port_map"` // A map of host port labels and to guest ports. } // qemuHandle is returned from Start/Open as a handle to the PID @@ -82,6 +81,11 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { return nil, err } + + if len(driverConfig.PortMap) > 1 { + return nil, fmt.Errorf("Only one port_map block is allowed in the qemu driver config") + } + // Get the image source source, ok := task.Config["artifact_source"] if !ok || source == "" { @@ -138,42 +142,31 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, // the outside world to be able to reach it. VMs ran without port mappings can // still reach out to the world, but without port mappings it is effectively // firewalled - if len(task.Resources.Networks) > 0 { - // TODO: Consolidate these into map of host/guest port when we have HCL - // Note: Host port must be open and available - // Get and split guest ports. The guest_ports configuration must match up with - // the Reserved ports in the Task Resources - // Users can supply guest_hosts as a list of posts to map on the guest vm. - // These map 1:1 with the requested Reserved Ports from the hostmachine. - ports := strings.Split(driverConfig.GuestPorts, ",") - if len(ports) == 0 { - return nil, fmt.Errorf("[ERR] driver.qemu: Error parsing required Guest Ports") - } - - // TODO: support more than a single, default Network - if len(ports) != len(task.Resources.Networks[0].ReservedPorts) { - return nil, fmt.Errorf("[ERR] driver.qemu: Error matching Guest Ports with Reserved ports") - } - - // Loop through the reserved ports and construct the hostfwd string, to map + protocols := []string{"udp", "tcp"} + if len(task.Resources.Networks) > 0 && len(driverConfig.PortMap) == 1 { + // Loop through the port map and construct the hostfwd string, to map // reserved ports to the ports listenting in the VM - // Ex: - // hostfwd=tcp::22000-:22,hostfwd=tcp::80-:8080 - reservedPorts := task.Resources.Networks[0].ReservedPorts - var forwarding string - for i, p := range ports { - forwarding = fmt.Sprintf("%s,hostfwd=tcp::%s-:%s", forwarding, strconv.Itoa(reservedPorts[i].Value), p) + // Ex: hostfwd=tcp::22000-:22,hostfwd=tcp::80-:8080 + var forwarding []string + taskPorts := task.Resources.Networks[0].MapLabelToValues() + for label, guest := range driverConfig.PortMap[0] { + host, ok := taskPorts[label] + if !ok { + return nil, fmt.Errorf("Unknown port label %q", label) + } + + for _, p := range protocols { + forwarding = append(forwarding, fmt.Sprintf("hostfwd=%s::%d-:%d", p, host, guest)) + } } - if "" == forwarding { - return nil, fmt.Errorf("[ERR] driver.qemu: Error constructing port forwarding") + if len(forwarding) != 0 { + args = append(args, + "-netdev", + fmt.Sprintf("user,id=user.0%s", strings.Join(forwarding, ",")), + "-device", "virtio-net,netdev=user.0", + ) } - - args = append(args, - "-netdev", - fmt.Sprintf("user,id=user.0%s", forwarding), - "-device", "virtio-net,netdev=user.0", - ) } // If using KVM, add optimization args diff --git a/client/driver/qemu_test.go b/client/driver/qemu_test.go index 543bf247b..cecca4357 100644 --- a/client/driver/qemu_test.go +++ b/client/driver/qemu_test.go @@ -41,7 +41,10 @@ func TestQemuDriver_StartOpen_Wait(t *testing.T) { "artifact_source": "https://dl.dropboxusercontent.com/u/47675/jar_thing/linux-0.2.img", "checksum": "sha256:a5e836985934c3392cbbd9b26db55a7d35a8d7ae1deb7ca559dd9c0159572544", "accelerator": "tcg", - "guest_ports": "22,8080", + "port_map": []map[string]int{{ + "main": 22, + "web": 8080, + }}, }, Resources: &structs.Resources{ CPU: 500, diff --git a/command/init.go b/command/init.go index c911c95cf..92554ce45 100644 --- a/command/init.go +++ b/command/init.go @@ -128,6 +128,18 @@ job "example" { } } + service { + # name = redis + tags = ["global", "cache"] + port = "db" + check { + name = "alive" + type = "tcp" + interval = "10s" + timeout = "2s" + } + } + # We must specify the resources required for # this task to ensure it runs on a machine with # enough capacity. diff --git a/jobspec/parse.go b/jobspec/parse.go index 24772364f..92f3c5048 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -144,7 +144,7 @@ func parseJob(result *structs.Job, list *ast.ObjectList) error { // If we have tasks outside, create TaskGroups for them if o := listVal.Filter("task"); len(o.Items) > 0 { var tasks []*structs.Task - if err := parseTasks(&tasks, o); err != nil { + if err := parseTasks(result.Name, "", &tasks, o); err != nil { return err } @@ -247,7 +247,7 @@ func parseGroups(result *structs.Job, list *ast.ObjectList) error { // Parse tasks if o := listVal.Filter("task"); len(o.Items) > 0 { - if err := parseTasks(&g.Tasks, o); err != nil { + if err := parseTasks(result.Name, g.Name, &g.Tasks, o); err != nil { return err } } @@ -346,7 +346,7 @@ func parseConstraints(result *[]*structs.Constraint, list *ast.ObjectList) error return nil } -func parseTasks(result *[]*structs.Task, list *ast.ObjectList) error { +func parseTasks(jobName string, taskGroupName string, result *[]*structs.Task, list *ast.ObjectList) error { list = list.Children() if len(list.Items) == 0 { return nil @@ -378,12 +378,16 @@ func parseTasks(result *[]*structs.Task, list *ast.ObjectList) error { delete(m, "config") delete(m, "env") delete(m, "constraint") + delete(m, "service") delete(m, "meta") delete(m, "resources") // Build the task var t structs.Task t.Name = n + if taskGroupName == "" { + taskGroupName = n + } if err := mapstructure.WeakDecode(m, &t); err != nil { return err } @@ -401,6 +405,12 @@ func parseTasks(result *[]*structs.Task, list *ast.ObjectList) error { } } + if o := listVal.Filter("service"); len(o.Items) > 0 { + if err := parseServices(jobName, taskGroupName, &t, o); err != nil { + return err + } + } + // If we have config, then parse that if o := listVal.Filter("config"); len(o.Items) > 0 { for _, o := range o.Elem().Items { @@ -452,6 +462,79 @@ func parseTasks(result *[]*structs.Task, list *ast.ObjectList) error { return nil } +func parseServices(jobName string, taskGroupName string, task *structs.Task, serviceObjs *ast.ObjectList) error { + task.Services = make([]structs.Service, len(serviceObjs.Items)) + var defaultServiceName bool + for idx, o := range serviceObjs.Items { + var service structs.Service + var m map[string]interface{} + if err := hcl.DecodeObject(&m, o.Val); err != nil { + return err + } + + delete(m, "check") + + if err := mapstructure.WeakDecode(m, &service); err != nil { + return err + } + + if defaultServiceName && service.Name == "" { + return fmt.Errorf("Only one service block may omit the Name field") + } + + if service.Name == "" { + defaultServiceName = true + service.Name = fmt.Sprintf("%s-%s-%s", jobName, taskGroupName, task.Name) + } else { + service.Name = fmt.Sprintf("%s-%s-%s-%s", jobName, taskGroupName, task.Name, service.Name) + } + + // Fileter checks + var checkList *ast.ObjectList + if ot, ok := o.Val.(*ast.ObjectType); ok { + checkList = ot.List + } else { + return fmt.Errorf("service '%s': should be an object", service.Name) + } + + if co := checkList.Filter("check"); len(co.Items) > 0 { + if err := parseChecks(&service, co); err != nil { + return err + } + } + + task.Services[idx] = service + } + + return nil +} + +func parseChecks(service *structs.Service, checkObjs *ast.ObjectList) error { + service.Checks = make([]structs.ServiceCheck, len(checkObjs.Items)) + for idx, co := range checkObjs.Items { + var check structs.ServiceCheck + var cm map[string]interface{} + if err := hcl.DecodeObject(&cm, co.Val); err != nil { + return err + } + dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: mapstructure.StringToTimeDurationHookFunc(), + WeaklyTypedInput: true, + Result: &check, + }) + if err != nil { + return err + } + if err := dec.Decode(cm); err != nil { + return err + } + + service.Checks[idx] = check + } + + return nil +} + func parseResources(result *structs.Resources, list *ast.ObjectList) error { list = list.Elem() if len(list.Items) == 0 { diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 125127de5..6eb19af11 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -94,6 +94,23 @@ func TestParse(t *testing.T) { Config: map[string]interface{}{ "image": "hashicorp/binstore", }, + Services: []structs.Service{ + { + Id: "", + Name: "binstore-storagelocker-binsl-binstore", + Tags: []string{"foo", "bar"}, + PortLabel: "http", + Checks: []structs.ServiceCheck{ + { + Id: "", + Name: "check-name", + Type: "tcp", + Interval: 10 * time.Second, + Timeout: 2 * time.Second, + }, + }, + }, + }, Env: map[string]string{ "HELLO": "world", "LOREM": "ipsum", @@ -301,7 +318,7 @@ func TestBadPorts(t *testing.T) { func TestOverlappingPorts(t *testing.T) { path, err := filepath.Abs(filepath.Join("./test-fixtures", "overlapping-ports.hcl")) if err != nil { - t.Fatalf("Can't get absoluate path for file: %s", err) + t.Fatalf("Can't get absolute path for file: %s", err) } _, err = ParseFile(path) @@ -314,3 +331,20 @@ func TestOverlappingPorts(t *testing.T) { t.Fatalf("Expected collision error; got %v", err) } } + +func TestIncompleteServiceDefn(t *testing.T) { + path, err := filepath.Abs(filepath.Join("./test-fixtures", "incorrect-service-def.hcl")) + if err != nil { + t.Fatalf("Can't get absolute path for file: %s", err) + } + + _, err = ParseFile(path) + + if err == nil { + t.Fatalf("Expected an error") + } + + if !strings.Contains(err.Error(), "Only one service block may omit the Name field") { + t.Fatalf("Expected collision error; got %v", err) + } +} diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl index 236f4829a..9696fdef8 100644 --- a/jobspec/test-fixtures/basic.hcl +++ b/jobspec/test-fixtures/basic.hcl @@ -45,6 +45,16 @@ job "binstore-storagelocker" { HELLO = "world" LOREM = "ipsum" } + service { + tags = ["foo", "bar"] + port = "http" + check { + name = "check-name" + type = "tcp" + interval = "10s" + timeout = "2s" + } + } resources { cpu = 500 memory = 128 diff --git a/jobspec/test-fixtures/incorrect-service-def.hcl b/jobspec/test-fixtures/incorrect-service-def.hcl new file mode 100644 index 000000000..8a0029842 --- /dev/null +++ b/jobspec/test-fixtures/incorrect-service-def.hcl @@ -0,0 +1,77 @@ +job "binstore-storagelocker" { + region = "global" + type = "service" + priority = 50 + all_at_once = true + datacenters = ["us2", "eu1"] + + meta { + foo = "bar" + } + + constraint { + attribute = "kernel.os" + value = "windows" + } + + update { + stagger = "60s" + max_parallel = 2 + } + + task "outside" { + driver = "java" + config { + jar = "s3://my-cool-store/foo.jar" + } + meta { + my-cool-key = "foobar" + } + } + + group "binsl" { + count = 5 + restart { + attempts = 5 + interval = "10m" + delay = "15s" + } + task "binstore" { + driver = "docker" + config { + image = "hashicorp/binstore" + } + env { + HELLO = "world" + LOREM = "ipsum" + } + service { + tags = ["foo", "bar"] + port = "http" + check { + name = "check-name" + type = "http" + interval = "10s" + timeout = "2s" + } + } + service { + port = "one" + } + resources { + cpu = 500 + memory = 128 + + network { + mbits = "100" + port "one" { + static = 1 + } + port "three" { + static = 3 + } + port "http" {} + } + } + } +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 746b9b6c8..4a0f29f88 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -995,6 +995,60 @@ func (tg *TaskGroup) GoString() string { return fmt.Sprintf("*%#v", *tg) } +const ( + ServiceCheckHTTP = "http" + ServiceCheckTCP = "tcp" + ServiceCheckDocker = "docker" + ServiceCheckScript = "script" +) + +// The ServiceCheck data model represents the consul health check that +// Nomad registers for a Task +type ServiceCheck struct { + Id string // Id of the check, must be unique and it is autogenrated + Name string // Name of the check, defaults to id + Type string // Type of the check - tcp, http, docker and script + Script string // Script to invoke for script check + Http string // path of the health check url for http type check + Protocol string // Protocol to use if check is http, defaults to http + Interval time.Duration // Interval of the check + Timeout time.Duration // Timeout of the response from the check before consul fails the check +} + +func (sc *ServiceCheck) Validate() error { + t := strings.ToLower(sc.Type) + if sc.Type == ServiceCheckHTTP && sc.Http == "" { + return fmt.Errorf("http checks needs the Http path information.") + } + + if sc.Type == ServiceCheckScript && sc.Script == "" { + return fmt.Errorf("Script checks need the script to invoke") + } + if t != ServiceCheckTCP && t != ServiceCheckHTTP && t != ServiceCheckDocker && t != ServiceCheckScript { + return fmt.Errorf("Check with name %v has invalid check type: %s ", sc.Name, sc.Type) + } + return nil +} + +// The Service model represents a Consul service defintion +type Service struct { + Id string // Id of the service, this needs to be unique on a local machine + Name string // Name of the service, defaults to id + Tags []string // List of tags for the service + PortLabel string `mapstructure:"port"` // port for the service + Checks []ServiceCheck // List of checks associated with the service +} + +func (s *Service) Validate() error { + var mErr multierror.Error + for _, c := range s.Checks { + if err := c.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + } + return mErr.ErrorOrNil() +} + // Task is a single process typically that is executed as part of a task group. type Task struct { // Name of the task @@ -1009,6 +1063,9 @@ type Task struct { // Map of environment variables to be used by the driver Env map[string]string + // List of service definitions exposed by the Task + Services []Service + // Constraints can be specified at a task level and apply only to // the particular task. Constraints []*Constraint @@ -1132,6 +1189,12 @@ func (t *Task) Validate() error { mErr.Errors = append(mErr.Errors, outer) } } + + for _, service := range t.Services { + if err := service.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + } return mErr.ErrorOrNil() } diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 8221c40fd..84af2a198 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -357,9 +357,21 @@ func TestEncodeDecode(t *testing.T) { } } -func TestBatchRestartPolicyValidate(t *testing.T) { - rp := RestartPolicy{Attempts: 10, Delay: 25 * time.Second} - if err := rp.Validate(); err != nil { - t.Fatalf("err: %v", err) +func TestInvalidServiceCheck(t *testing.T) { + s := Service{ + Id: "service-id", + Name: "service-name", + PortLabel: "bar", + Checks: []ServiceCheck{ + { + + Id: "check-id", + Name: "check-name", + Type: "lol", + }, + }, + } + if err := s.Validate(); err == nil { + t.Fatalf("Service should be invalid") } } diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md index 8d4abd035..313b59c1f 100644 --- a/website/source/docs/drivers/docker.html.md +++ b/website/source/docs/drivers/docker.html.md @@ -16,7 +16,8 @@ and cleaning up after containers. ## Task Configuration -The `docker` driver supports the following configuration in the job specification: +The `docker` driver supports the following configuration in the job +specification: * `image` - (Required) The Docker image to run. The image may include a tag or custom URL. By default it will be fetched from Docker Hub. @@ -26,38 +27,40 @@ The `docker` driver supports the following configuration in the job specificatio * `args` - (Optional) Arguments to the optional `command`. If no `command` is present, `args` are ignored. -* `network_mode` - (Optional) The network mode to be used for the container. - Valid options are `default`, `bridge`, `host` or `none`. If nothing is - specified, the container will start in `bridge` mode. The `container` - network mode is not supported right now and is reported as an invalid - option. +* `network_mode` - (Optional) The network mode to be used for the container. In + order to support userspace networking plugins in Docker 1.9 this accepts any + value. The default is `bridge`. Other networking modes may not work without + additional configuration on the host (which is outside the scope of Nomad). + Valid values pre-docker 1.9 are `default`, `bridge`, `host`, `none`, or + `container:name`. * `privileged` - (Optional) Privileged mode gives the container full access to - the host. Valid options are `"true"` and `"false"` (defaults to `"false"`). - Tasks with `privileged` set can only run on Nomad Agents with - `docker.privileged.enabled = "true"`. + the host. Valid options are `"true"` and `"false"` (defaults to `"false"`). + Tasks with `privileged` set can only run on Nomad Agents with + `docker.privileged.enabled = "true"`. -* `dns-servers` - (Optional) A comma separated list of DNS servers for the container - to use (e.g. "8.8.8.8,8.8.4.4"). *Docker API v1.10 and above only* +* `dns_servers` - (Optional) A comma separated list of DNS servers for the + container to use (e.g. "8.8.8.8,8.8.4.4"). *Docker API v1.10 and above only* -* `search-domains` - (Optional) A comma separated list of DNS search domains for the - container to use. +* `search_domains` - (Optional) A comma separated list of DNS search domains + for the container to use. -* `hostname` - (optional) The hostname to assign to the container. When launching more - than one of a task (using `count`) with this option set, every container the task - starts will have the same hostname. +* `hostname` - (Optional) The hostname to assign to the container. When + launching more than one of a task (using `count`) with this option set, every + container the task starts will have the same hostname. -* `labels` - (Optional) A key/value map of labels to set to the containers on start. +* `labels` - (Optional) A key/value map of labels to set to the containers on + start. - -**Authentication** -Registry authentication can be set per task with the following authentication -parameters. These options can provide access to private repositories that -utilize the docker remote api (e.g. dockerhub, quay.io) - - `auth.username` - (Optional) The account username - - `auth.password` - (Optional) The account password - - `auth.email` - (Optional) The account email - - `auth.server-address` - (Optional) The server domain/ip without the protocol +**Authentication** Registry authentication can be set per task with the +following authentication parameters. These options can provide access to +private repositories that utilize the docker remote api (e.g. dockerhub, +quay.io) + - `auth.username` - (Optional) The account username + - `auth.password` - (Optional) The account password + - `auth.email` - (Optional) The account email + - `auth.server-address` - (Optional) The server domain/ip without the + protocol ### Port Mapping @@ -70,16 +73,16 @@ Nomad provides automatic and manual mapping schemes for Docker. You can use either or both schemes for a task. Nomad binds both tcp and udp protocols to ports used for Docker containers. This is not configurable. -Note: You are not required to map any ports, for example if your task is running -a crawler or aggregator and does not provide a network service. Tasks without a -port mapping will still be able to make outbound network connections. +Note: You are not required to map any ports, for example if your task is +running a crawler or aggregator and does not provide a network service. Tasks +without a port mapping will still be able to make outbound network connections. #### Automatic Port Mapping Typically when you create a Docker container you configure the service to start listening on a port (or ports) when you start the container. For example, redis -starts listening on `6379` when you `docker run redis`. Nomad can support this by -mapping a random port on the host machine to the port inside the container. +starts listening on `6379` when you `docker run redis`. Nomad can support this +by mapping a random port on the host machine to the port inside the container. You need to tell Nomad which ports your container is using so Nomad can map allocated ports for you. You do so by specifying a **numeric port value** for @@ -94,17 +97,17 @@ dynamic_ports = [6379] This instructs Nomad to create a port mapping from the random port on the host to the port inside the container. So in our example above, when you contact the host on `1.2.3.4:22333` you will actually hit the service running inside the -container on port `6379`. You can see which port was actually bound by reading the -`NOMAD_PORT_6379` [environment variable](/docs/jobspec/environment.html). +container on port `6379`. You can see which port was actually bound by reading +the `NOMAD_PORT_6379` [environment variable](/docs/jobspec/environment.html). In most cases, the automatic port mapping will be the easiest to use, but you can also use manual port mapping (described below). #### Manual Port Mapping -The `dynamic_ports` option takes any alphanumeric string as a label, so you could -also specify a label for the port like `http` or `admin` to designate how the -port will be used. +The `dynamic_ports` option takes any alphanumeric string as a label, so you +could also specify a label for the port like `http` or `admin` to designate how +the port will be used. In this case, Nomad doesn't know which container port to map to, so it maps 1:1 with the host port. For example, `1.2.3.4:22333` will map to `22333` inside the @@ -119,28 +122,29 @@ determine which port to bind to. ## Client Requirements -Nomad requires Docker to be installed and running on the host alongside the Nomad -agent. Nomad was developed against Docker `1.8.2`. +Nomad requires Docker to be installed and running on the host alongside the +Nomad agent. Nomad was developed against Docker `1.8.2`. -By default Nomad communicates with the Docker daemon using the daemon's -unix socket. Nomad will need to be able to read/write to this socket. If you do -not run Nomad as root, make sure you add the Nomad user to the Docker group so +By default Nomad communicates with the Docker daemon using the daemon's unix +socket. Nomad will need to be able to read/write to this socket. If you do not +run Nomad as root, make sure you add the Nomad user to the Docker group so Nomad can communicate with the Docker daemon. -For example, on ubuntu you can use the `usermod` command to add the `vagrant` user to the -`docker` group so you can run Nomad without root: +For example, on ubuntu you can use the `usermod` command to add the `vagrant` +user to the `docker` group so you can run Nomad without root: sudo usermod -G docker -a vagrant -For the best performance and security features you should use recent versions of -the Linux Kernel and Docker daemon. +For the best performance and security features you should use recent versions +of the Linux Kernel and Docker daemon. ## Client Configuration The `docker` driver has the following configuration options: * `docker.endpoint` - Defaults to `unix:///var/run/docker.sock`. You will need - to customize this if you use a non-standard socket (http or another location). + to customize this if you use a non-standard socket (http or another + location). * `docker.cleanup.container` Defaults to `true`. Changing this to `false` will prevent Nomad from removing containers from stopped tasks. @@ -149,9 +153,8 @@ The `docker` driver has the following configuration options: prevent Nomad from removing images from stopped tasks. * `docker.privileged.enabled` Defaults to `false`. Changing this to `true` will - allow containers to use "privileged" mode, which gives the containers full access - to the host. - + allow containers to use "privileged" mode, which gives the containers full + access to the host. Note: When testing or using the `-dev` flag you can use `DOCKER_HOST`, `DOCKER_TLS_VERIFY`, and `DOCKER_CERT_PATH` to customize Nomad's behavior. In @@ -161,21 +164,20 @@ production Nomad will always read `docker.endpoint`. The `docker` driver will set the following client attributes: -* `driver.docker` - This will be set to "1", indicating the - driver is available. -* `driver.docker.version` - This will be set to version of the - docker server +* `driver.docker` - This will be set to "1", indicating the driver is + available. +* `driver.docker.version` - This will be set to version of the docker server ## Resource Isolation ### CPU -Nomad limits containers' CPU based on CPU shares. CPU shares allow containers to -burst past their CPU limits. CPU limits will only be imposed when there is +Nomad limits containers' CPU based on CPU shares. CPU shares allow containers +to burst past their CPU limits. CPU limits will only be imposed when there is contention for resources. When the host is under load your process may be throttled to stabilize QOS depending on how many shares it has. You can see how -many CPU shares are available to your process by reading `NOMAD_CPU_LIMIT`. 1000 -shares are approximately equal to 1Ghz. +many CPU shares are available to your process by reading `NOMAD_CPU_LIMIT`. +1000 shares are approximately equal to 1Ghz. Please keep the implications of CPU shares in mind when you load test workloads on Nomad. @@ -184,7 +186,8 @@ on Nomad. Nomad limits containers' memory usage based on total virtual memory. This means that containers scheduled by Nomad cannot use swap. This is to ensure that a -swappy process does not degrade performance for other workloads on the same host. +swappy process does not degrade performance for other workloads on the same +host. Since memory is not an elastic resource, you will need to make sure your container does not exceed the amount of memory allocated to it, or it will be @@ -201,6 +204,7 @@ filesystem IO. These will be added in a later release. Docker provides resource isolation by way of [cgroups and namespaces](https://docs.docker.com/introduction/understanding-docker/#the-underlying-technology). -Containers essentially have a virtual file system all to themselves. If you need -a higher degree of isolation between processes for security or other reasons, it -is recommended to use full virtualization like [QEMU](/docs/drivers/qemu.html). +Containers essentially have a virtual file system all to themselves. If you +need a higher degree of isolation between processes for security or other +reasons, it is recommended to use full virtualization like +[QEMU](/docs/drivers/qemu.html). diff --git a/website/source/docs/drivers/qemu.html.md b/website/source/docs/drivers/qemu.html.md index 403926b4c..84909e331 100644 --- a/website/source/docs/drivers/qemu.html.md +++ b/website/source/docs/drivers/qemu.html.md @@ -24,18 +24,18 @@ The `Qemu` driver can execute any regular `qemu` image (e.g. `qcow`, `img`, The `Qemu` driver supports the following configuration in the job spec: * `artifact_source` - **(Required)** The hosted location of the source Qemu image. Must be accessible -from the Nomad client, via HTTP. + from the Nomad client, via HTTP. * `checksum` - **(Optional)** The checksum type and value for the `artifact_source` image. -The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`, -and the value is the computed checksum. If a checksum is supplied and does not -match the downloaded artifact, the driver will fail to start + The format is `type:value`, where type is any of `md5`, `sha1`, `sha256`, or `sha512`, + and the value is the computed checksum. If a checksum is supplied and does not + match the downloaded artifact, the driver will fail to start * `accelerator` - (Optional) The type of accelerator to use in the invocation. - If the host machine has `Qemu` installed with KVM support, users can specify `kvm` for the `accelerator`. Default is `tcg` -* `host_port` - **(Required)** Port on the host machine to forward to the guest -VM -* `guest_ports` - **(Optional)** Ports on the guest machine that are listening for -traffic from the host. These ports match up with any `ReservedPorts` requested -in the `Task` specification + If the host machine has `Qemu` installed with KVM support, users can specify + `kvm` for the `accelerator`. Default is `tcg` +* `port_map` - **(Optional)** A `map[string]int` that maps port labels to ports + on the guest. This forwards the host port to the guest vm. For example, + `port_map { db = 6539 }` would forward the host port with label `db` to the + guest vm's port 6539. ## Client Requirements diff --git a/website/source/docs/http/alloc.html.md b/website/source/docs/http/alloc.html.md index 822858a8c..00776b606 100644 --- a/website/source/docs/http/alloc.html.md +++ b/website/source/docs/http/alloc.html.md @@ -41,144 +41,193 @@ be specified using the `?region=` query parameter. ```javascript { - "ID": "3575ba9d-7a12-0c96-7b28-add168c67984", - "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88", - "Name": "binstore-storagelocker.binsl[3]", - "NodeID": "", - "JobID": "binstore-storagelocker", - "Job": { + "ID": "203266e5-e0d6-9486-5e05-397ed2b184af", + "EvalID": "e68125ed-3fba-fb46-46cc-291addbc4455", + "Name": "example.cache[0]", + "NodeID": "e02b6169-83bd-9df6-69bd-832765f333eb", + "JobID": "example", + "ModifyIndex": 9, + "Resources": { + "Networks": [ + { + "DynamicPorts": [ + { + "Value": 20802, + "Label": "db" + } + ], + "ReservedPorts": null, + "MBits": 10, + "IP": "", + "CIDR": "", + "Device": "" + } + ], + "IOPS": 0, + "DiskMB": 0, + "MemoryMB": 256, + "CPU": 500 + }, + "TaskGroup": "cache", + "Job": { + "ModifyIndex": 5, + "CreateIndex": 5, + "StatusDescription": "", + "Status": "", + "Meta": null, + "Update": { + "MaxParallel": 1, + "Stagger": 1e+10 + }, + "TaskGroups": [ + { + "Meta": null, + "Tasks": [ + { + "Meta": null, + "Resources": { + "Networks": [ + { + "DynamicPorts": [ + { + "Value": 20802, + "Label": "db" + } + ], + "ReservedPorts": null, + "MBits": 0, + "IP": "127.0.0.1", + "CIDR": "", + "Device": "lo" + } + ], + "IOPS": 0, + "DiskMB": 0, + "MemoryMB": 256, + "CPU": 500 + }, + "Constraints": null, + "Services": [ + { + "Checks": [ + { + "Timeout": 2e+09, + "Interval": 1e+10, + "Protocol": "", + "Http": "", + "Script": "", + "Type": "tcp", + "Name": "alive", + "Id": "" + } + ], + "PortLabel": "db", + "Tags": [ + "global", + "cache" + ], + "Name": "example-cache-redis", + "Id": "" + } + ], + "Env": null, + "Config": { + "port_map": [ + { + "db": 6379 + } + ], + "image": "redis:latest" + }, + "Driver": "docker", + "Name": "redis" + } + ], + "RestartPolicy": { + "Delay": 2.5e+10, + "Interval": 3e+11, + "Attempts": 10 + }, + "Constraints": null, + "Count": 1, + "Name": "cache" + } + ], "Region": "global", - "ID": "binstore-storagelocker", - "Name": "binstore-storagelocker", + "ID": "example", + "Name": "example", "Type": "service", "Priority": 50, "AllAtOnce": false, "Datacenters": [ - "us2", - "eu1" + "dc1" ], "Constraints": [ - { - "LTarget": "kernel.os", - "RTarget": "windows", - "Operand": "=", - } - ], - "TaskGroups": [ - { - "Name": "binsl", - "Count": 5, - "Constraints": [ - { - "LTarget": "kernel.os", - "RTarget": "linux", - "Operand": "=", - } - ], - "Tasks": [ - { - "Name": "binstore", - "Driver": "docker", - "Config": { - "image": "hashicorp/binstore" - }, - "Constraints": null, - "Resources": { - "CPU": 500, - "MemoryMB": 0, - "DiskMB": 0, - "IOPS": 0, - "Networks": [ - { - "Device": "", - "CIDR": "", - "IP": "", - "MBits": 100, - "ReservedPorts": null, - "DynamicPorts": 0 - } - ] - }, - "Meta": null - }, - { - "Name": "storagelocker", - "Driver": "java", - "Config": { - "image": "hashicorp/storagelocker" - }, - "Constraints": [ - { - "LTarget": "kernel.arch", - "RTarget": "amd64", - "Operand": "=", - } - ], - "Resources": { - "CPU": 500, - "MemoryMB": 0, - "DiskMB": 0, - "IOPS": 0, - "Networks": null - }, - "Meta": null - } - ], - "Meta": { - "elb_checks": "3", - "elb_interval": "10", - "elb_mode": "tcp" - } - } - ], - "Update": { - "Stagger": 0, - "MaxParallel": 0 - }, - "Meta": { - "foo": "bar" - }, - "Status": "", - "StatusDescription": "", - "CreateIndex": 14, - "ModifyIndex": 14 - }, - "TaskGroup": "binsl", - "Resources": { - "CPU": 1000, - "MemoryMB": 0, - "DiskMB": 0, - "IOPS": 0, - "Networks": [ - { - "Device": "", - "CIDR": "", - "IP": "", - "MBits": 100, - "ReservedPorts": null, - "DynamicPorts": 0 - } + { + "Operand": "=", + "RTarget": "linux", + "LTarget": "$attr.kernel.name" + } ] - }, - "TaskResources": null, - "Metrics": { - "NodesEvaluated": 0, + }, + "TaskResources": { + "redis": { + "Networks": [ + { + "DynamicPorts": [ + { + "Value": 20802, + "Label": "db" + } + ], + "ReservedPorts": null, + "MBits": 0, + "IP": "127.0.0.1", + "CIDR": "", + "Device": "lo" + } + ], + "IOPS": 0, + "DiskMB": 0, + "MemoryMB": 256, + "CPU": 500 + } + }, + "Metrics": { + "CoalescedFailures": 0, + "AllocationTime": 1590406, + "NodesEvaluated": 1, "NodesFiltered": 0, "ClassFiltered": null, "ConstraintFiltered": null, "NodesExhausted": 0, "ClassExhausted": null, "DimensionExhausted": null, - "Scores": null, - "AllocationTime": 9408, - "CoalescedFailures": 4 - }, - "DesiredStatus": "failed", - "DesiredDescription": "failed to find a node for placement", - "ClientStatus": "failed", - "ClientDescription": "", - "CreateIndex": 16, - "ModifyIndex": 16 + "Scores": { + "e02b6169-83bd-9df6-69bd-832765f333eb.binpack": 6.133651487695705 + } + }, + "DesiredStatus": "run", + "DesiredDescription": "", + "ClientStatus": "running", + "ClientDescription": "", + "TaskStates": { + "redis": { + "Events": [ + { + "KillError": "", + "Message": "", + "Signal": 0, + "ExitCode": 0, + "DriverError": "", + "Time": 1447806038427841000, + "Type": "Started" + } + ], + "State": "running" + } + }, + "CreateIndex": 7 } ``` diff --git a/website/source/docs/http/allocs.html.md b/website/source/docs/http/allocs.html.md index b59a4f204..7cb38ab66 100644 --- a/website/source/docs/http/allocs.html.md +++ b/website/source/docs/http/allocs.html.md @@ -42,19 +42,35 @@ be specified using the `?region=` query parameter. ```javascript [ { - "ID": "3575ba9d-7a12-0c96-7b28-add168c67984", - "EvalID": "151accaa-1ac6-90fe-d427-313e70ccbb88", - "Name": "binstore-storagelocker.binsl[3]", - "NodeID": "c9972143-861d-46e6-df73-1d8287bc3e66", - "JobID": "binstore-storagelocker", - "TaskGroup": "binsl", - "DesiredStatus": "run", - "DesiredDescription": "", - "ClientStatus": "running", - "ClientDescription": "", - "CreateIndex": 16, - "ModifyIndex": 16 - }, + "ID": "203266e5-e0d6-9486-5e05-397ed2b184af", + "EvalID": "e68125ed-3fba-fb46-46cc-291addbc4455", + "Name": "example.cache[0]", + "NodeID": "e02b6169-83bd-9df6-69bd-832765f333eb", + "JobID": "example", + "TaskGroup": "cache", + "DesiredStatus": "run", + "DesiredDescription": "" + "ClientDescription": "", + "ClientStatus": "running", + "TaskStates": { + "redis": { + "Events": [ + { + "KillError": "", + "Message": "", + "Signal": 0, + "ExitCode": 0, + "DriverError": "", + "Time": 1447806038427841000, + "Type": "Started" + } + ], + "State": "running" + } + }, + "CreateIndex": 7, + "ModifyIndex": 9, + } ... ] ``` diff --git a/website/source/docs/http/job.html.md b/website/source/docs/http/job.html.md index cbf0f5097..05484aec9 100644 --- a/website/source/docs/http/job.html.md +++ b/website/source/docs/http/job.html.md @@ -88,7 +88,7 @@ region is used; another region can be specified using the `?region=` query param "IP": "", "MBits": 100, "ReservedPorts": null, - "DynamicPorts": 0 + "DynamicPorts": null } ] }, diff --git a/website/source/docs/http/node.html.md b/website/source/docs/http/node.html.md index ed69d96e9..794e0283a 100644 --- a/website/source/docs/http/node.html.md +++ b/website/source/docs/http/node.html.md @@ -59,6 +59,7 @@ be specified using the `?region=` query parameter. "kernel.name": "darwin", "kernel.version": "14.4.0", "memory.totalbytes": "8589934592", + "network.ip-address": "127.0.0.1", "os.name": "darwin", "os.version": "14.4.0", "storage.bytesfree": "35888713728", @@ -114,141 +115,195 @@ be specified using the `?region=` query parameter. ```javascript [ - { - "ID": "8a0c24d9-cdfc-ce67-1208-8d4524b1a9b3", - "EvalID": "2c699410-8697-6109-86b7-430909b00bb9", - "Name": "example.cache[0]", - "NodeID": "12d3409b-9d27-fcad-a03d-b3c18887d153", - "JobID": "example", - "Job": { - "Region": "global", - "ID": "example", - "Name": "example", - "Type": "service", - "Priority": 50, - "AllAtOnce": false, - "Datacenters": [ - "lon1" - ], - "Constraints": [ - { - "Hard": true, - "LTarget": "$attr.kernel.name", - "RTarget": "linux", - "Operand": "=", - "Weight": 0 - } - ], - "TaskGroups": [ - { - "Name": "cache", - "Count": 1, - "Constraints": null, - "Tasks": [ - { - "Name": "redis", - "Driver": "docker", - "Config": { - "image": "redis:latest" - }, - "Env": null, - "Constraints": null, - "Resources": { - "CPU": 500, - "MemoryMB": 256, - "DiskMB": 0, - "IOPS": 0, - "Networks": [ - { - "Device": "", - "CIDR": "", - "IP": "", - "MBits": 10, - "ReservedPorts": null, - "DynamicPorts": [ - "6379" - ] - } - ] - }, - "Meta": null - } - ], - "Meta": null - } - ], - "Update": { - "Stagger": 0, - "MaxParallel": 0 - }, - "Meta": null, - "Status": "", - "StatusDescription": "", - "CreateIndex": 6, - "ModifyIndex": 6 + { + "ID": "203266e5-e0d6-9486-5e05-397ed2b184af", + "EvalID": "e68125ed-3fba-fb46-46cc-291addbc4455", + "Name": "example.cache[0]", + "NodeID": "e02b6169-83bd-9df6-69bd-832765f333eb", + "JobID": "example", + "ModifyIndex": 9, + "Resources": { + "Networks": [ + { + "DynamicPorts": [ + { + "Value": 20802, + "Label": "db" + } + ], + "ReservedPorts": null, + "MBits": 10, + "IP": "", + "CIDR": "", + "Device": "" + } + ], + "IOPS": 0, + "DiskMB": 0, + "MemoryMB": 256, + "CPU": 500 + }, + "TaskGroup": "cache", + "Job": { + "ModifyIndex": 5, + "CreateIndex": 5, + "StatusDescription": "", + "Status": "", + "Meta": null, + "Update": { + "MaxParallel": 1, + "Stagger": 1e+10 }, - "TaskGroup": "cache", - "Resources": { - "CPU": 500, - "MemoryMB": 256, - "DiskMB": 0, - "IOPS": 0, + "TaskGroups": [ + { + "Meta": null, + "Tasks": [ + { + "Meta": null, + "Resources": { + "Networks": [ + { + "DynamicPorts": [ + { + "Value": 20802, + "Label": "db" + } + ], + "ReservedPorts": null, + "MBits": 0, + "IP": "127.0.0.1", + "CIDR": "", + "Device": "lo" + } + ], + "IOPS": 0, + "DiskMB": 0, + "MemoryMB": 256, + "CPU": 500 + }, + "Constraints": null, + "Services": [ + { + "Checks": [ + { + "Timeout": 2e+09, + "Interval": 1e+10, + "Protocol": "", + "Http": "", + "Script": "", + "Type": "tcp", + "Name": "alive", + "Id": "" + } + ], + "PortLabel": "db", + "Tags": [ + "global", + "cache" + ], + "Name": "example-cache-redis", + "Id": "" + } + ], + "Env": null, + "Config": { + "port_map": [ + { + "db": 6379 + } + ], + "image": "redis:latest" + }, + "Driver": "docker", + "Name": "redis" + } + ], + "RestartPolicy": { + "Delay": 2.5e+10, + "Interval": 3e+11, + "Attempts": 10 + }, + "Constraints": null, + "Count": 1, + "Name": "cache" + } + ], + "Region": "global", + "ID": "example", + "Name": "example", + "Type": "service", + "Priority": 50, + "AllAtOnce": false, + "Datacenters": [ + "dc1" + ], + "Constraints": [ + { + "Operand": "=", + "RTarget": "linux", + "LTarget": "$attr.kernel.name" + } + ] + }, + "TaskResources": { + "redis": { "Networks": [ { - "Device": "", - "CIDR": "", - "IP": "", - "MBits": 10, - "ReservedPorts": null, "DynamicPorts": [ - "6379" - ] + { + "Value": 20802, + "Label": "db" + } + ], + "ReservedPorts": null, + "MBits": 0, + "IP": "127.0.0.1", + "CIDR": "", + "Device": "lo" } - ] - }, - "TaskResources": { - "redis": { - "CPU": 500, - "MemoryMB": 256, - "DiskMB": 0, - "IOPS": 0, - "Networks": [ - { - "Device": "eth0", - "CIDR": "", - "IP": "10.16.0.222", - "MBits": 0, - "ReservedPorts": [ - 23889 - ], - "DynamicPorts": [ - "6379" - ] - } - ] - } - }, - "Metrics": { - "NodesEvaluated": 1, - "NodesFiltered": 0, - "ClassFiltered": null, - "ConstraintFiltered": null, - "NodesExhausted": 0, - "ClassExhausted": null, - "DimensionExhausted": null, - "Scores": { - "12d3409b-9d27-fcad-a03d-b3c18887d153.binpack": 10.779215064231561 - }, - "AllocationTime": 75232, - "CoalescedFailures": 0 - }, - "DesiredStatus": "run", - "DesiredDescription": "", - "ClientStatus": "pending", - "ClientDescription": "", - "CreateIndex": 8, - "ModifyIndex": 8 + ], + "IOPS": 0, + "DiskMB": 0, + "MemoryMB": 256, + "CPU": 500 + } }, + "Metrics": { + "CoalescedFailures": 0, + "AllocationTime": 1590406, + "NodesEvaluated": 1, + "NodesFiltered": 0, + "ClassFiltered": null, + "ConstraintFiltered": null, + "NodesExhausted": 0, + "ClassExhausted": null, + "DimensionExhausted": null, + "Scores": { + "e02b6169-83bd-9df6-69bd-832765f333eb.binpack": 6.133651487695705 + } + }, + "DesiredStatus": "run", + "DesiredDescription": "", + "ClientStatus": "running", + "ClientDescription": "", + "TaskStates": { + "redis": { + "Events": [ + { + "KillError": "", + "Message": "", + "Signal": 0, + "ExitCode": 0, + "DriverError": "", + "Time": 1447806038427841000, + "Type": "Started" + } + ], + "State": "running" + } + }, + "CreateIndex": 7 + }, ... ] ``` diff --git a/website/source/docs/jobspec/environment.html.md b/website/source/docs/jobspec/environment.html.md index fc9cc91a6..9ef01f8f3 100644 --- a/website/source/docs/jobspec/environment.html.md +++ b/website/source/docs/jobspec/environment.html.md @@ -41,22 +41,21 @@ cluster gets more or less busy. Each task will receive port allocations on a single IP address. The IP is made available through `NOMAD_IP.` -If you requested reserved ports in your job specification and your task is successfully -scheduled, these ports are available for your use. Ports from `reserved_ports` -in the job spec are not exposed through the environment. If you requested -dynamic ports in your job specification these are made known to your application via -environment variables `NOMAD_PORT_{LABEL}`. For example -`dynamic_ports = ["HTTP"]` becomes `NOMAD_PORT_HTTP`. +Both dynamic and reserved ports are exposed through environment variables in the +following format, `NOMAD_PORT_{LABEL}={PORT}`. For example, a dynamic port +`port "HTTP" {}` becomes `NOMAD_PORT_HTTP=48907`. Some drivers such as Docker and QEMU use port mapping. If a driver supports port -mapping and you specify a numeric label, the label will be automatically used as -the private port number. For example, `dynamic_ports = ["5000"]` will have a -random port mapped to port 5000 inside the container or VM. These ports are also -exported as environment variables for consistency, e.g. `NOMAD_PORT_5000`. +mapping and it has been set in the driver configuration, container ports and +their mapped host port are exposed as environment variables with the following +format, `NOMAD_PORT_{CONTAINER_PORT}={HOST_PORT}`. To give a concrete example, +imagine you had the following port configuration, `port "db" { static = 8181 }` +and you had the following port mapping, `port_map { db = 6379 }`. The following +environment variable would then be set, `NOMAD_PORT_6379=8181`. Please see the relevant driver documentation for details. -### Task Directories +### Task Directories Nomad makes the following two directories available to tasks: diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md index 3d05c909b..64228ed21 100644 --- a/website/source/docs/jobspec/index.html.md +++ b/website/source/docs/jobspec/index.html.md @@ -81,7 +81,7 @@ where a task is eligible for running. An example constraint looks like: ``` # Restrict to only nodes running linux constraint { - attribute = "$attr.kernel.os" + attribute = "$attr.kernel.name" value = "linux" } ``` @@ -153,6 +153,10 @@ The `group` object supports the following keys: * `constraint` - This can be provided multiple times to define additional constraints. See the constraint reference for more details. +* `restart` - Specifies the restart policy to be applied to tasks in this group. + If omitted, a default policy for batch and non-batch jobs is used based on the + job type. See the restart policy reference for more details. + * `task` - This can be specified multiple times, to add a task as part of the group. @@ -209,6 +213,43 @@ The `network` object supports the following keys: For applications that cannot use a dynamic port, they can request a specific port. +### Restart Policy + +The `restart` object supports the following keys: + +* `attempts` - For `batch` jobs, `attempts` is the maximum number of restarts + allowed before the task is failed. For non-batch jobs, the `attempts` is the + number of restarts allowed in an `interval` before a restart delay is added. + +* `interval` - `interval` is only valid on non-batch jobs and is a time duration + that can be specified using the "s", "m", and "h" suffixes, such as "30s". + The `interval` begins when the first task starts and ensures that only + `attempts` number of restarts happens within it. If more than `attempts` + number of failures happen, the restart is delayed till after the `interval`, + which is then reset. + +* `delay` - A duration to wait before restarting a task. It is specified as a + time duration using the "s", "m", and "h" suffixes, such as "30s". + +The default `batch` restart policy is: + +``` +restart { + attempts = 15 + delay = "15s" +} +``` + +The default non-batch restart policy is: + +``` +restart { + interval = "1m" + attempts = 2 + delay = "15s" +} +``` + ### Constraint The `constraint` object supports the following keys: @@ -234,17 +275,17 @@ The `constraint` object supports the following keys: the attribute. This sets the operator to "regexp" and the `value` to the regular expression. -* `distinct_hosts` - `distinct_hosts` accepts a boolean `true`. The default is - `false`. +* `distinct_hosts` - `distinct_hosts` accepts a boolean `true`. The default is + `false`. - When `distinct_hosts` is `true` at the Job level, each instance of all Task - Groups specified in the job is placed on a separate host. + When `distinct_hosts` is `true` at the Job level, each instance of all Task + Groups specified in the job is placed on a separate host. - When `distinct_hosts` is `true` at the Task Group level with count > 1, each - instance of a Task Group is placed on a separate host. Different task groups in - the same job _may_ be co-scheduled. + When `distinct_hosts` is `true` at the Task Group level with count > 1, each + instance of a Task Group is placed on a separate host. Different task groups in + the same job _may_ be co-scheduled. - Tasks within a task group are always co-scheduled. + Tasks within a task group are always co-scheduled. Below is a table documenting the variables that can be interpreted: @@ -302,6 +343,14 @@ Below is a table documenting common node attributes: hostname Hostname of the client + + kernel.name + Kernel of the client. Examples: "linux", "darwin" + + + kernel.version + Version of the client kernel. Examples: "3.19.0-25-generic", "15.0.0" + platform.aws.ami-id On EC2, the AMI ID of the client node @@ -312,7 +361,7 @@ Below is a table documenting common node attributes: os.name - Operating system of the client. Examples: "linux", "windows", "darwin" + Operating system of the client. Examples: "ubuntu", "windows", "darwin" os.version