Merge branch 'master' into release-0101

This commit is contained in:
Michael Schurter
2019-11-04 11:57:12 -08:00
committed by GitHub
421 changed files with 8326 additions and 4061 deletions

View File

@@ -61,8 +61,6 @@ workflows:
test_packages: "./client/fingerprint"
goarch: "386"
<<: *IGNORE_FOR_UI_BRANCHES
- test-rkt:
<<: *IGNORE_FOR_UI_BRANCHES
- test-e2e:
<<: *IGNORE_FOR_UI_BRANCHES
- test-ui
@@ -178,32 +176,6 @@ jobs:
- checkout
- run: make test-website
test-rkt:
executor: go-machine-recent
environment:
<<: *COMMON_ENVS
GOTEST_PKGS: "./drivers/rkt"
GOPATH: /home/circleci/go
RKT_VERSION: 1.29.0
steps:
- checkout
- install-golang
- install-protoc
- run:
name: install rkt
command: |
gpg --recv-key 18AD5014C99EF7E3BA5F6CE950BDD3E0FC8A365E
wget https://github.com/rkt/rkt/releases/download/v$RKT_VERSION/rkt_$RKT_VERSION-1_amd64.deb
wget https://github.com/rkt/rkt/releases/download/v$RKT_VERSION/rkt_$RKT_VERSION-1_amd64.deb.asc
gpg --verify rkt_$RKT_VERSION-1_amd64.deb.asc
sudo dpkg -i rkt_$RKT_VERSION-1_amd64.deb
- run: PATH="$GOPATH/bin:/usr/local/go/bin:$PATH" make bootstrap
- run-tests
- store_test_results:
path: /tmp/test-reports
- store_artifacts:
path: /tmp/test-reports
test-machine:
executor: "<< parameters.executor >>"
parameters:
@@ -326,7 +298,7 @@ commands:
parameters:
version:
type: string
default: 1.0.0
default: 1.2.3
steps:
- run:
name: Install Vault << parameters.version >>

View File

@@ -1,3 +1,5 @@
For reporting security vulnerabilities [please refer to the website.](https://www.nomadproject.io/security.html)
If you have a question, prepend your issue with `[question]` or preferably use the [nomad mailing list](https://www.nomadproject.io/community.html).
If filing a bug please include the following:

3
.github/SECURITY.md vendored Normal file
View File

@@ -0,0 +1,3 @@
# Security Policy
Please see https://www.nomadproject.io/security.html

2
.gitignore vendored
View File

@@ -86,6 +86,8 @@ rkt-*
/ui/libpeerconnection.log
/ui/npm-debug.log*
/ui/testem.log
/ui/.env*
/ui/.pnp*
.ignore
# ember-try

View File

@@ -1,5 +1,12 @@
## 0.10.2 (Unreleased)
IMPROVEMENTS:
* client: Enable setting tags on Consul Connect sidecar service
BUG FIXES:
* cli: Make scoring column orders consistent `nomad alloc status` [[GH-6609](https://github.com/hashicorp/nomad/issues/6609)]
## 0.10.1 (November 4, 2019)
BUG FIXES:
@@ -46,6 +53,8 @@ BUG FIXES:
* client: Fixed failure to start if another client is already running with the same data directory [[GH-6348](https://github.com/hashicorp/nomad/pull/6348)]
* devices: Fixed a bug causing CPU usage spike when a device is detected [[GH-6201](https://github.com/hashicorp/nomad/issues/6201)]
* drivers/docker: Set gc image_delay default to 3 minutes [[GH-6078](https://github.com/hashicorp/nomad/pull/6078)]
* ui: Fixed a bug where the allocation log viewer would render HTML or hide content that matched XML syntax [[GH-6048](https://github.com/hashicorp/nomad/issues/6048)]
* ui: Fixed a bug where allocation log viewer doesn't show all content in Firefox [[GH-6466](https://github.com/hashicorp/nomad/issues/6466)]
* ui: Fixed navigation via clicking recent allocation row [[GH-6087](https://github.com/hashicorp/nomad/pull/6087)]
* ui: Fixed a bug where the allocation log viewer would render HTML or hide content that matched XML syntax [[GH-6048](https://github.com/hashicorp/nomad/issues/6048)]
* ui: Fixed a bug where allocation log viewer doesn't show all content in Firefox [[GH-6466](https://github.com/hashicorp/nomad/issues/6466)]

View File

@@ -6,7 +6,7 @@ GIT_COMMIT := $(shell git rev-parse HEAD)
GIT_DIRTY := $(if $(shell git status --porcelain),+CHANGES)
GO_LDFLAGS := "-X github.com/hashicorp/nomad/version.GitCommit=$(GIT_COMMIT)$(GIT_DIRTY)"
GO_TAGS ?=
GO_TAGS ?= codegen_generated
GO_TEST_CMD = $(if $(shell which gotestsum),gotestsum --,go test)
@@ -147,6 +147,7 @@ deps: ## Install build and development dependencies
go get -u github.com/a8m/tree/cmd/tree
go get -u github.com/magiconair/vendorfmt/cmd/vendorfmt
go get -u gotest.tools/gotestsum
go get -u github.com/fatih/hclfmt
@bash -C "$(PROJECT_ROOT)/scripts/install-codecgen.sh"
@bash -C "$(PROJECT_ROOT)/scripts/install-protoc-gen-go.sh"
@@ -224,21 +225,30 @@ generate-examples: command/job_init.bindata_assetfs.go
command/job_init.bindata_assetfs.go: command/assets/*
go-bindata-assetfs -pkg command -o command/job_init.bindata_assetfs.go ./command/assets/...
.PHONY: vendorfmt
vendorfmt:
@echo "--> Formatting vendor/vendor.json"
test -x $(GOPATH)/bin/vendorfmt || go get -u github.com/magiconair/vendorfmt/cmd/vendorfmt
vendorfmt
.PHONY: changelogfmt
changelogfmt:
@echo "--> Making [GH-xxxx] references clickable..."
@sed -E 's|([^\[])\[GH-([0-9]+)\]|\1[[GH-\2](https://github.com/hashicorp/nomad/issues/\2)]|g' CHANGELOG.md > changelog.tmp && mv changelog.tmp CHANGELOG.md
## We skip the terraform directory as there are templated hcl configurations
## that do not successfully compile without rendering
.PHONY: hclfmt
hclfmt:
@echo "--> Formatting HCL"
@find . -path ./terraform -prune -o -name 'upstart.nomad' -prune -o \( -name '*.nomad' -o -name '*.hcl' \) -exec hclfmt -w {} +
.PHONY: dev
dev: GOOS=$(shell go env GOOS)
dev: GOARCH=$(shell go env GOARCH)
dev: GOPATH=$(shell go env GOPATH)
dev: DEV_TARGET=pkg/$(GOOS)_$(GOARCH)/nomad
dev: vendorfmt changelogfmt ## Build for the current development platform
dev: vendorfmt changelogfmt hclfmt ## Build for the current development platform
@echo "==> Removing old development build..."
@rm -f $(PROJECT_ROOT)/$(DEV_TARGET)
@rm -f $(PROJECT_ROOT)/bin/nomad
@@ -252,11 +262,11 @@ dev: vendorfmt changelogfmt ## Build for the current development platform
@cp $(PROJECT_ROOT)/$(DEV_TARGET) $(GOPATH)/bin
.PHONY: prerelease
prerelease: GO_TAGS=ui release
prerelease: GO_TAGS=ui codegen_generated release
prerelease: generate-all ember-dist static-assets ## Generate all the static assets for a Nomad release
.PHONY: release
release: GO_TAGS=ui release
release: GO_TAGS=ui codegen_generated release
release: clean $(foreach t,$(ALL_TARGETS),pkg/$(t).zip) ## Build all release packages which can be built on this platform.
@echo "==> Results:"
@tree --dirsfirst $(PROJECT_ROOT)/pkg
@@ -283,6 +293,7 @@ test-nomad: dev ## Run Nomad test suites
$(if $(ENABLE_RACE),-race) $(if $(VERBOSE),-v) \
-cover \
-timeout=15m \
-tags "$(GO_TAGS)" \
$(GOTEST_PKGS) $(if $(VERBOSE), >test.log ; echo $$? > exit-code)
@if [ $(VERBOSE) ] ; then \
bash -C "$(PROJECT_ROOT)/scripts/test_check.sh" ; \
@@ -295,6 +306,7 @@ e2e-test: dev ## Run the Nomad e2e test suite
$(if $(ENABLE_RACE),-race) $(if $(VERBOSE),-v) \
-cover \
-timeout=900s \
-tags "$(GO_TAGS)" \
github.com/hashicorp/nomad/e2e/vault/ \
-integration

View File

@@ -107,7 +107,7 @@ Who Uses Nomad
* [Nomad at Target: Scaling Microservices Across Public and Private Clouds](https://www.hashicorp.com/resources/nomad-scaling-target-microservices-across-cloud)
* [Playing with Nomad from HashiCorp](https://danielparker.me/nomad/hashicorp/schedulers/nomad/)
* Trivago
* [Maybe You Dont Need Kubernetes](https://matthias-endler.de/2019/maybe-you-dont-need-kubernetes/)
* [Maybe You Dont Need Kubernetes](https://endler.dev/2019/maybe-you-dont-need-kubernetes/)
* [Nomad - Our Experiences and Best Practices](https://tech.trivago.com/2019/01/25/nomad-our-experiences-and-best-practices/)
* Roblox
* [How Roblox runs a platform for 70 million gamers on HashiCorp Nomad](https://portworx.com/architects-corner-roblox-runs-platform-70-million-gamers-hashicorp-nomad/)
@@ -123,8 +123,6 @@ Who Uses Nomad
* [Tech at N26 - The Bank in the Cloud](https://medium.com/insiden26/tech-at-n26-the-bank-in-the-cloud-e5ff818b528b)
* Elsevier
* [Esleviers Container Framework with Nomad, Terraform, and Consul](https://www.hashicorp.com/resources/elsevier-nomad-container-framework-demo)
* Palantir
* [Enterprise Security at Palantir with the HashiCorp stack](https://www.hashicorp.com/resources/enterprise-security-hashicorp-stack)
* Graymeta
* [Backend Batch Processing At Scale with Nomad](https://www.hashicorp.com/resources/backend-batch-processing-nomad)
* NIH NCBI

View File

@@ -210,12 +210,7 @@ func (a *Allocations) Exec(ctx context.Context,
func (a *Allocations) execFrames(ctx context.Context, alloc *Allocation, task string, tty bool, command []string,
errCh chan<- error, q *QueryOptions) (sendFn func(*ExecStreamingInput) error, output <-chan *ExecStreamingOutput) {
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
if err != nil {
errCh <- err
return nil, nil
}
nodeClient, _ := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
if q == nil {
q = &QueryOptions{}
@@ -236,15 +231,17 @@ func (a *Allocations) execFrames(ctx context.Context, alloc *Allocation, task st
reqPath := fmt.Sprintf("/v1/client/allocation/%s/exec", alloc.ID)
conn, _, err := nodeClient.websocket(reqPath, q)
if err != nil {
// There was an error talking directly to the client. Non-network
// errors are fatal, but network errors can attempt to route via RPC.
if _, ok := err.(net.Error); !ok {
var conn *websocket.Conn
if nodeClient != nil {
conn, _, err = nodeClient.websocket(reqPath, q)
if _, ok := err.(net.Error); err != nil && !ok {
errCh <- err
return nil, nil
}
}
if conn == nil {
conn, _, err = a.client.websocket(reqPath, q)
if err != nil {
errCh <- err

179
api/fs.go
View File

@@ -92,72 +92,24 @@ func (a *AllocFS) Stat(alloc *Allocation, path string, q *QueryOptions) (*AllocF
// ReadAt is used to read bytes at a given offset until limit at the given path
// in an allocation directory. If limit is <= 0, there is no limit.
func (a *AllocFS) ReadAt(alloc *Allocation, path string, offset int64, limit int64, q *QueryOptions) (io.ReadCloser, error) {
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
if err != nil {
return nil, err
}
if q == nil {
q = &QueryOptions{}
}
if q.Params == nil {
q.Params = make(map[string]string)
}
q.Params["path"] = path
q.Params["offset"] = strconv.FormatInt(offset, 10)
q.Params["limit"] = strconv.FormatInt(limit, 10)
reqPath := fmt.Sprintf("/v1/client/fs/readat/%s", alloc.ID)
r, err := nodeClient.rawQuery(reqPath, q)
if err != nil {
// There was a networking error when talking directly to the client.
if _, ok := err.(net.Error); !ok {
return nil, err
}
// Try via the server
r, err = a.client.rawQuery(reqPath, q)
if err != nil {
return nil, err
}
}
return r, nil
return queryClientNode(a.client, alloc, reqPath, q,
func(q *QueryOptions) {
q.Params["path"] = path
q.Params["offset"] = strconv.FormatInt(offset, 10)
q.Params["limit"] = strconv.FormatInt(limit, 10)
})
}
// Cat is used to read contents of a file at the given path in an allocation
// directory
func (a *AllocFS) Cat(alloc *Allocation, path string, q *QueryOptions) (io.ReadCloser, error) {
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
if err != nil {
return nil, err
}
if q == nil {
q = &QueryOptions{}
}
if q.Params == nil {
q.Params = make(map[string]string)
}
q.Params["path"] = path
reqPath := fmt.Sprintf("/v1/client/fs/cat/%s", alloc.ID)
r, err := nodeClient.rawQuery(reqPath, q)
if err != nil {
// There was a networking error when talking directly to the client.
if _, ok := err.(net.Error); !ok {
return nil, err
}
// Try via the server
r, err = a.client.rawQuery(reqPath, q)
if err != nil {
return nil, err
}
}
return r, nil
return queryClientNode(a.client, alloc, reqPath, q,
func(q *QueryOptions) {
q.Params["path"] = path
})
}
// Stream streams the content of a file blocking on EOF.
@@ -172,40 +124,19 @@ func (a *AllocFS) Stream(alloc *Allocation, path, origin string, offset int64,
cancel <-chan struct{}, q *QueryOptions) (<-chan *StreamFrame, <-chan error) {
errCh := make(chan error, 1)
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
reqPath := fmt.Sprintf("/v1/client/fs/stream/%s", alloc.ID)
r, err := queryClientNode(a.client, alloc, reqPath, q,
func(q *QueryOptions) {
q.Params["path"] = path
q.Params["offset"] = strconv.FormatInt(offset, 10)
q.Params["origin"] = origin
})
if err != nil {
errCh <- err
return nil, errCh
}
if q == nil {
q = &QueryOptions{}
}
if q.Params == nil {
q.Params = make(map[string]string)
}
q.Params["path"] = path
q.Params["offset"] = strconv.FormatInt(offset, 10)
q.Params["origin"] = origin
reqPath := fmt.Sprintf("/v1/client/fs/stream/%s", alloc.ID)
r, err := nodeClient.rawQuery(reqPath, q)
if err != nil {
// There was a networking error when talking directly to the client.
if _, ok := err.(net.Error); !ok {
errCh <- err
return nil, errCh
}
// Try via the server
r, err = a.client.rawQuery(reqPath, q)
if err != nil {
errCh <- err
return nil, errCh
}
}
// Create the output channel
frames := make(chan *StreamFrame, 10)
@@ -244,6 +175,40 @@ func (a *AllocFS) Stream(alloc *Allocation, path, origin string, offset int64,
return frames, errCh
}
func queryClientNode(c *Client, alloc *Allocation, reqPath string, q *QueryOptions, customizeQ func(*QueryOptions)) (io.ReadCloser, error) {
nodeClient, _ := c.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
if q == nil {
q = &QueryOptions{}
}
if q.Params == nil {
q.Params = make(map[string]string)
}
if customizeQ != nil {
customizeQ(q)
}
var r io.ReadCloser
var err error
if nodeClient != nil {
r, err = nodeClient.rawQuery(reqPath, q)
if _, ok := err.(net.Error); err != nil && !ok {
// found a non networking error talking to client directly
return nil, err
}
}
// failed to query node, access through server directly
// or network error when talking to the client directly
if r == nil {
return c.rawQuery(reqPath, q)
}
return r, err
}
// Logs streams the content of a tasks logs blocking on EOF.
// The parameters are:
// * allocation: the allocation to stream from.
@@ -264,42 +229,20 @@ func (a *AllocFS) Logs(alloc *Allocation, follow bool, task, logType, origin str
errCh := make(chan error, 1)
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
reqPath := fmt.Sprintf("/v1/client/fs/logs/%s", alloc.ID)
r, err := queryClientNode(a.client, alloc, reqPath, q,
func(q *QueryOptions) {
q.Params["follow"] = strconv.FormatBool(follow)
q.Params["task"] = task
q.Params["type"] = logType
q.Params["origin"] = origin
q.Params["offset"] = strconv.FormatInt(offset, 10)
})
if err != nil {
errCh <- err
return nil, errCh
}
if q == nil {
q = &QueryOptions{}
}
if q.Params == nil {
q.Params = make(map[string]string)
}
q.Params["follow"] = strconv.FormatBool(follow)
q.Params["task"] = task
q.Params["type"] = logType
q.Params["origin"] = origin
q.Params["offset"] = strconv.FormatInt(offset, 10)
reqPath := fmt.Sprintf("/v1/client/fs/logs/%s", alloc.ID)
r, err := nodeClient.rawQuery(reqPath, q)
if err != nil {
// There was a networking error when talking directly to the client.
if _, ok := err.(net.Error); !ok {
errCh <- err
return nil, errCh
}
// Try via the server
r, err = a.client.rawQuery(reqPath, q)
if err != nil {
errCh <- err
return nil, errCh
}
}
// Create the output channel
frames := make(chan *StreamFrame, 10)

View File

@@ -143,6 +143,7 @@ type ConsulConnect struct {
// ConsulSidecarService represents a Consul Connect SidecarService jobspec
// stanza.
type ConsulSidecarService struct {
Tags []string
Port string
Proxy *ConsulProxy
}

View File

@@ -370,12 +370,28 @@ type VolumeRequest struct {
ReadOnly bool `mapstructure:"read_only"`
}
const (
VolumeMountPropagationPrivate = "private"
VolumeMountPropagationHostToTask = "host-to-task"
VolumeMountPropagationBidirectional = "bidirectional"
)
// VolumeMount represents the relationship between a destination path in a task
// and the task group volume that should be mounted there.
type VolumeMount struct {
Volume string
Destination string
ReadOnly bool `mapstructure:"read_only"`
Volume *string
Destination *string
ReadOnly *bool `mapstructure:"read_only"`
PropagationMode *string `mapstructure:"propagation_mode"`
}
func (vm *VolumeMount) Canonicalize() {
if vm.PropagationMode == nil {
vm.PropagationMode = stringToPtr(VolumeMountPropagationPrivate)
}
if vm.ReadOnly == nil {
vm.ReadOnly = boolToPtr(false)
}
}
// TaskGroup is the unit of scheduling.
@@ -632,6 +648,9 @@ func (t *Task) Canonicalize(tg *TaskGroup, job *Job) {
for _, a := range t.Affinities {
a.Canonicalize()
}
for _, vm := range t.VolumeMounts {
vm.Canonicalize()
}
}
// TaskArtifact is used to download artifacts before running a task.

View File

@@ -368,6 +368,14 @@ func TestTask_Artifact(t *testing.T) {
}
}
func TestTask_VolumeMount(t *testing.T) {
t.Parallel()
vm := &VolumeMount{}
vm.Canonicalize()
require.NotNil(t, vm.PropagationMode)
require.Equal(t, *vm.PropagationMode, "private")
}
// Ensures no regression on https://github.com/hashicorp/nomad/issues/3132
func TestTaskGroup_Canonicalize_Update(t *testing.T) {
// Job with an Empty() Update

View File

@@ -89,7 +89,7 @@ type TaskPrestartHook interface {
// Prestart is called before the task is started including after every
// restart. Prestart is not called if the allocation is terminal.
//
// The context is cancelled if the task is killed.
// The context is cancelled if the task is killed or shutdown.
Prestart(context.Context, *TaskPrestartRequest, *TaskPrestartResponse) error
}

View File

@@ -6,6 +6,7 @@ import (
"sync"
"time"
"github.com/LK4D4/joincontext"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
@@ -192,8 +193,11 @@ func (tr *TaskRunner) prestart() error {
}
// Run the prestart hook
// use a joint context to allow any blocking pre-start hooks
// to be canceled by either killCtx or shutdownCtx
joinedCtx, _ := joincontext.Join(tr.killCtx, tr.shutdownCtx)
var resp interfaces.TaskPrestartResponse
if err := pre.Prestart(tr.killCtx, &req, &resp); err != nil {
if err := pre.Prestart(joinedCtx, &req, &resp); err != nil {
tr.emitHookError(err, name)
return structs.WrapRecoverable(fmt.Sprintf("prestart hook %q failed: %v", name, err), err)
}

View File

@@ -1742,6 +1742,69 @@ func TestTaskRunner_Template_Artifact(t *testing.T) {
require.NoErrorf(t, err, "%v not rendered", f2)
}
// TestTaskRunner_Template_BlockingPreStart asserts that a template
// that fails to render in PreStart can gracefully be shutdown by
// either killCtx or shutdownCtx
func TestTaskRunner_Template_BlockingPreStart(t *testing.T) {
t.Parallel()
alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Templates = []*structs.Template{
{
EmbeddedTmpl: `{{ with secret "foo/secret" }}{{ .Data.certificate }}{{ end }}`,
DestPath: "local/test",
ChangeMode: structs.TemplateChangeModeNoop,
},
}
task.Vault = &structs.Vault{Policies: []string{"default"}}
conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
defer cleanup()
tr, err := NewTaskRunner(conf)
require.NoError(t, err)
go tr.Run()
defer tr.Shutdown()
testutil.WaitForResult(func() (bool, error) {
ts := tr.TaskState()
if len(ts.Events) == 0 {
return false, fmt.Errorf("no events yet")
}
for _, e := range ts.Events {
if e.Type == "Template" && strings.Contains(e.DisplayMessage, "vault.read(foo/secret)") {
return true, nil
}
}
return false, fmt.Errorf("no missing vault secret template event yet: %#v", ts.Events)
}, func(err error) {
require.NoError(t, err)
})
shutdown := func() <-chan bool {
finished := make(chan bool)
go func() {
tr.Shutdown()
finished <- true
}()
return finished
}
select {
case <-shutdown():
// it shut down like it should have
case <-time.After(10 * time.Second):
require.Fail(t, "timeout shutting down task")
}
}
// TestTaskRunner_Template_NewVaultToken asserts that a new vault token is
// created when rendering template and that it is revoked on alloc completion
func TestTaskRunner_Template_NewVaultToken(t *testing.T) {

View File

@@ -76,6 +76,10 @@ func TestPrevAlloc_StreamAllocDir_Ok(t *testing.T) {
tw := tar.NewWriter(buf)
walkFn := func(path string, fileInfo os.FileInfo, err error) error {
// filepath.Walk passes in an error
if err != nil {
return fmt.Errorf("error from filepath.Walk(): %s", err)
}
// Include the path of the file name relative to the alloc dir
// so that we can put the files in the right directories
link := ""

View File

@@ -1,6 +1,6 @@
package structs
//go:generate codecgen -d 102 -o structs.generated.go structs.go
//go:generate codecgen -d 102 -t codec_generated -o structs.generated.go structs.go
import (
"errors"

View File

@@ -45,14 +45,6 @@ type VaultClient interface {
// StopRenewToken removes the token from the min-heap, stopping its
// renewal.
StopRenewToken(string) error
// RenewLease renews a vault secret's lease and adds the lease
// identifier to the min-heap for periodic renewal.
RenewLease(string, int) (<-chan error, error)
// StopRenewLease removes a secret's lease ID from the min-heap,
// stopping its renewal.
StopRenewLease(string) error
}
// Implementation of VaultClient interface to interact with vault and perform
@@ -325,44 +317,6 @@ func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, err
return errCh, nil
}
// RenewLease renews the supplied lease identifier for a supplied duration (in
// seconds) and adds it to the min-heap so that it gets renewed periodically by
// the renewal loop. Any error returned during renewal will be written to a
// buffered channel and the channel is returned instead of an actual error.
// This helps the caller be notified of a renewal failure asynchronously for
// appropriate actions to be taken. The caller of this function need not have
// to close the error channel.
func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) {
if leaseId == "" {
err := fmt.Errorf("missing lease ID")
return nil, err
}
if increment < 1 {
err := fmt.Errorf("increment cannot be less than 1")
return nil, err
}
// Create a buffered error channel
errCh := make(chan error, 1)
// Create a renewal request using the supplied lease and duration
renewalReq := &vaultClientRenewalRequest{
errCh: errCh,
id: leaseId,
increment: increment,
}
// Renew the secret and send any error to the dedicated error channel
if err := c.renew(renewalReq); err != nil {
c.logger.Error("error during renewal of lease", "error", err)
metrics.IncrCounter([]string{"client", "vault", "renew_lease_error"}, 1)
return nil, err
}
return errCh, nil
}
// renew is a common method to handle renewal of both tokens and secret leases.
// It invokes a token renewal or a secret's lease renewal. If renewal is
// successful, min-heap is updated based on the duration after which it needs
@@ -558,12 +512,6 @@ func (c *vaultClient) StopRenewToken(token string) error {
return c.stopRenew(token)
}
// StopRenewLease removes the item from the heap which represents the given
// lease identifier.
func (c *vaultClient) StopRenewLease(leaseId string) error {
return c.stopRenew(leaseId)
}
// stopRenew removes the given identifier from the heap and signals the renewal
// loop to compute the next best candidate for renewal.
func (c *vaultClient) stopRenew(id string) error {

View File

@@ -111,10 +111,6 @@ func (vc *MockVaultClient) StopRenewToken(token string) error {
return nil
}
func (vc *MockVaultClient) RenewLease(leaseId string, interval int) (<-chan error, error) {
return nil, nil
}
func (vc *MockVaultClient) StopRenewLease(leaseId string) error { return nil }
func (vc *MockVaultClient) Start() {}
func (vc *MockVaultClient) Stop() {}
func (vc *MockVaultClient) GetConsulACL(string, string) (*vaultapi.Secret, error) { return nil, nil }

View File

@@ -278,14 +278,14 @@ func (c *Command) readConfig() *Config {
config.PluginDir = filepath.Join(config.DataDir, "plugins")
}
if !c.isValidConfig(config) {
if !c.isValidConfig(config, cmdConfig) {
return nil
}
return config
}
func (c *Command) isValidConfig(config *Config) bool {
func (c *Command) isValidConfig(config, cmdConfig *Config) bool {
// Check that the server is running in at least one mode.
if !(config.Server.Enabled || config.Client.Enabled) {
@@ -361,11 +361,12 @@ func (c *Command) isValidConfig(config *Config) bool {
}
// Check the bootstrap flags
if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
if !config.Server.Enabled && cmdConfig.Server.BootstrapExpect > 0 {
// report an error if BootstrapExpect is set in CLI but server is disabled
c.Ui.Error("Bootstrap requires server mode to be enabled")
return false
}
if config.Server.BootstrapExpect == 1 {
if config.Server.Enabled && config.Server.BootstrapExpect == 1 {
c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
}
@@ -391,28 +392,58 @@ func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter,
return nil, nil, nil
}
// Create a log writer, and wrap a logOutput around it
logWriter := NewLogWriter(512)
writers := []io.Writer{c.logFilter, logWriter}
// Check if syslog is enabled
var syslog io.Writer
if config.EnableSyslog {
l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
if err != nil {
c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
return nil, nil, nil
}
syslog = &SyslogWrapper{l, c.logFilter}
writers = append(writers, &SyslogWrapper{l, c.logFilter})
}
// Create a log writer, and wrap a logOutput around it
logWriter := NewLogWriter(512)
var logOutput io.Writer
if syslog != nil {
logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
} else {
logOutput = io.MultiWriter(c.logFilter, logWriter)
// Check if file logging is enabled
if config.LogFile != "" {
dir, fileName := filepath.Split(config.LogFile)
// if a path is provided, but has no filename, then a default is used.
if fileName == "" {
fileName = "nomad.log"
}
// Try to enter the user specified log rotation duration first
var logRotateDuration time.Duration
if config.LogRotateDuration != "" {
duration, err := time.ParseDuration(config.LogRotateDuration)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse log rotation duration: %v", err))
return nil, nil, nil
}
logRotateDuration = duration
} else {
// Default to 24 hrs if no rotation period is specified
logRotateDuration = 24 * time.Hour
}
logFile := &logFile{
logFilter: c.logFilter,
fileName: fileName,
logPath: dir,
duration: logRotateDuration,
MaxBytes: config.LogRotateBytes,
MaxFiles: config.LogRotateMaxFiles,
}
writers = append(writers, logFile)
}
c.logOutput = logOutput
log.SetOutput(logOutput)
return logGate, logWriter, logOutput
c.logOutput = io.MultiWriter(writers...)
log.SetOutput(c.logOutput)
return logGate, logWriter, c.logOutput
}
// setupAgent is used to start the agent and various interfaces

View File

@@ -57,6 +57,18 @@ type Config struct {
// LogJson enables log output in a JSON format
LogJson bool `hcl:"log_json"`
// LogFile enables logging to a file
LogFile string `hcl:"log_file"`
// LogRotateDuration is the time period that logs should be rotated in
LogRotateDuration string `hcl:"log_rotate_duration"`
// LogRotateBytes is the max number of bytes that should be written to a file
LogRotateBytes int `hcl:"log_rotate_bytes"`
// LogRotateMaxFiles is the max number of log files to keep
LogRotateMaxFiles int `hcl:"log_rotate_max_files"`
// BindAddr is the address on which all of nomad's services will
// be bound. If not specified, this defaults to 127.0.0.1.
BindAddr string `hcl:"bind_addr"`
@@ -898,6 +910,18 @@ func (c *Config) Merge(b *Config) *Config {
if b.LogJson {
result.LogJson = true
}
if b.LogFile != "" {
result.LogFile = b.LogFile
}
if b.LogRotateDuration != "" {
result.LogRotateDuration = b.LogRotateDuration
}
if b.LogRotateBytes != 0 {
result.LogRotateBytes = b.LogRotateBytes
}
if b.LogRotateMaxFiles != 0 {
result.LogRotateMaxFiles = b.LogRotateMaxFiles
}
if b.BindAddr != "" {
result.BindAddr = b.BindAddr
}

View File

@@ -19,6 +19,7 @@ var basicConfig = &Config{
NodeName: "my-web",
DataDir: "/tmp/nomad",
PluginDir: "/tmp/nomad-plugins",
LogFile: "/var/log/nomad.log",
LogLevel: "ERR",
LogJson: true,
BindAddr: "192.168.0.1",
@@ -409,14 +410,10 @@ func TestConfig_Parse(t *testing.T) {
t.Run(tc.File, func(t *testing.T) {
require := require.New(t)
path, err := filepath.Abs(filepath.Join("./testdata", tc.File))
if err != nil {
t.Fatalf("file: %s\n\n%s", tc.File, err)
}
require.NoError(err)
actual, err := ParseConfigFile(path)
if (err != nil) != tc.Err {
t.Fatalf("file: %s\n\n%s", tc.File, err)
}
require.NoError(err)
// ParseConfig used to re-merge defaults for these three objects,
// despite them already being merged in LoadConfig. The test structs

View File

@@ -1480,6 +1480,7 @@ func newConnect(serviceName string, nc *structs.ConsulConnect, networks structs.
// Advertise host IP:port
cc.SidecarService = &api.AgentServiceRegistration{
Tags: helper.CopySliceString(nc.SidecarService.Tags),
Address: net.IP,
Port: port.Value,

View File

@@ -17,12 +17,12 @@ import (
)
var (
allocIDNotPresentErr = fmt.Errorf("must provide a valid alloc id")
fileNameNotPresentErr = fmt.Errorf("must provide a file name")
taskNotPresentErr = fmt.Errorf("must provide task name")
logTypeNotPresentErr = fmt.Errorf("must provide log type (stdout/stderr)")
clientNotRunning = fmt.Errorf("node is not running a Nomad Client")
invalidOrigin = fmt.Errorf("origin must be start or end")
allocIDNotPresentErr = CodedError(400, "must provide a valid alloc id")
fileNameNotPresentErr = CodedError(400, "must provide a file name")
taskNotPresentErr = CodedError(400, "must provide task name")
logTypeNotPresentErr = CodedError(400, "must provide log type (stdout/stderr)")
clientNotRunning = CodedError(400, "node is not running a Nomad Client")
invalidOrigin = CodedError(400, "origin must be start or end")
)
func (s *HTTPServer) FsRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
@@ -273,13 +273,13 @@ func (s *HTTPServer) Logs(resp http.ResponseWriter, req *http.Request) (interfac
if followStr := q.Get("follow"); followStr != "" {
if follow, err = strconv.ParseBool(followStr); err != nil {
return nil, fmt.Errorf("failed to parse follow field to boolean: %v", err)
return nil, CodedError(400, fmt.Sprintf("failed to parse follow field to boolean: %v", err))
}
}
if plainStr := q.Get("plain"); plainStr != "" {
if plain, err = strconv.ParseBool(plainStr); err != nil {
return nil, fmt.Errorf("failed to parse plain field to boolean: %v", err)
return nil, CodedError(400, fmt.Sprintf("failed to parse plain field to boolean: %v", err))
}
}
@@ -295,7 +295,7 @@ func (s *HTTPServer) Logs(resp http.ResponseWriter, req *http.Request) (interfac
if offsetString != "" {
var err error
if offset, err = strconv.ParseInt(offsetString, 10, 64); err != nil {
return nil, fmt.Errorf("error parsing offset: %v", err)
return nil, CodedError(400, fmt.Sprintf("error parsing offset: %v", err))
}
}
@@ -388,10 +388,13 @@ func (s *HTTPServer) fsStreamImpl(resp http.ResponseWriter,
decoder.Reset(httpPipe)
if err := res.Error; err != nil {
code := 500
if err.Code != nil {
errCh <- CodedError(int(*err.Code), err.Error())
return
code = int(*err.Code)
}
errCh <- CodedError(code, err.Error())
return
}
if _, err := io.Copy(output, bytes.NewReader(res.Payload)); err != nil {

View File

@@ -3,7 +3,6 @@ package agent
import (
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/http/httptest"
@@ -12,6 +11,7 @@ import (
"time"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
@@ -189,25 +189,26 @@ func TestHTTP_FS_Stream_MissingParams(t *testing.T) {
require := require.New(t)
httpTest(t, nil, func(s *TestAgent) {
req, err := http.NewRequest("GET", "/v1/client/fs/stream/", nil)
require.Nil(err)
require.NoError(err)
respW := httptest.NewRecorder()
_, err = s.Server.Stream(respW, req)
require.EqualError(err, allocIDNotPresentErr.Error())
req, err = http.NewRequest("GET", "/v1/client/fs/stream/foo", nil)
require.Nil(err)
require.NoError(err)
respW = httptest.NewRecorder()
_, err = s.Server.Stream(respW, req)
require.EqualError(err, fileNameNotPresentErr.Error())
req, err = http.NewRequest("GET", "/v1/client/fs/stream/foo?path=/path/to/file", nil)
require.Nil(err)
require.NoError(err)
respW = httptest.NewRecorder()
_, err = s.Server.Stream(respW, req)
require.Nil(err)
require.Error(err)
require.Contains(err.Error(), "alloc lookup failed")
})
}
@@ -219,38 +220,39 @@ func TestHTTP_FS_Logs_MissingParams(t *testing.T) {
httpTest(t, nil, func(s *TestAgent) {
// AllocID Not Present
req, err := http.NewRequest("GET", "/v1/client/fs/logs/", nil)
require.Nil(err)
require.NoError(err)
respW := httptest.NewRecorder()
s.Server.mux.ServeHTTP(respW, req)
require.Equal(respW.Body.String(), allocIDNotPresentErr.Error())
require.Equal(500, respW.Code) // 500 for backward compat
require.Equal(400, respW.Code)
// Task Not Present
req, err = http.NewRequest("GET", "/v1/client/fs/logs/foo", nil)
require.Nil(err)
require.NoError(err)
respW = httptest.NewRecorder()
s.Server.mux.ServeHTTP(respW, req)
require.Equal(respW.Body.String(), taskNotPresentErr.Error())
require.Equal(500, respW.Code) // 500 for backward compat
require.Equal(400, respW.Code)
// Log Type Not Present
req, err = http.NewRequest("GET", "/v1/client/fs/logs/foo?task=foo", nil)
require.Nil(err)
require.NoError(err)
respW = httptest.NewRecorder()
s.Server.mux.ServeHTTP(respW, req)
require.Equal(respW.Body.String(), logTypeNotPresentErr.Error())
require.Equal(500, respW.Code) // 500 for backward compat
require.Equal(400, respW.Code)
// Ok
// case where all parameters are set but alloc isn't found
req, err = http.NewRequest("GET", "/v1/client/fs/logs/foo?task=foo&type=stdout", nil)
require.Nil(err)
require.NoError(err)
respW = httptest.NewRecorder()
s.Server.mux.ServeHTTP(respW, req)
require.Equal(200, respW.Code)
require.Equal(500, respW.Code)
require.Contains(respW.Body.String(), "alloc lookup failed")
})
}
@@ -354,8 +356,7 @@ func TestHTTP_FS_Stream_NoFollow(t *testing.T) {
path := fmt.Sprintf("/v1/client/fs/stream/%s?path=alloc/logs/web.stdout.0&offset=%d&origin=end&follow=false",
a.ID, offset)
p, _ := io.Pipe()
req, err := http.NewRequest("GET", path, p)
req, err := http.NewRequest("GET", path, nil)
require.Nil(err)
respW := testutil.NewResponseRecorder()
doneCh := make(chan struct{})
@@ -383,8 +384,6 @@ func TestHTTP_FS_Stream_NoFollow(t *testing.T) {
case <-time.After(1 * time.Second):
t.Fatal("should close but did not")
}
p.Close()
})
}
@@ -401,9 +400,7 @@ func TestHTTP_FS_Stream_Follow(t *testing.T) {
path := fmt.Sprintf("/v1/client/fs/stream/%s?path=alloc/logs/web.stdout.0&offset=%d&origin=end",
a.ID, offset)
p, _ := io.Pipe()
req, err := http.NewRequest("GET", path, p)
req, err := http.NewRequest("GET", path, nil)
require.Nil(err)
respW := httptest.NewRecorder()
doneCh := make(chan struct{})
@@ -431,8 +428,6 @@ func TestHTTP_FS_Stream_Follow(t *testing.T) {
t.Fatal("shouldn't close")
case <-time.After(1 * time.Second):
}
p.Close()
})
}
@@ -448,8 +443,7 @@ func TestHTTP_FS_Logs(t *testing.T) {
path := fmt.Sprintf("/v1/client/fs/logs/%s?type=stdout&task=web&offset=%d&origin=end&plain=true",
a.ID, offset)
p, _ := io.Pipe()
req, err := http.NewRequest("GET", path, p)
req, err := http.NewRequest("GET", path, nil)
require.Nil(err)
respW := testutil.NewResponseRecorder()
go func() {
@@ -469,8 +463,6 @@ func TestHTTP_FS_Logs(t *testing.T) {
}, func(err error) {
t.Fatal(err)
})
p.Close()
})
}
@@ -486,8 +478,7 @@ func TestHTTP_FS_Logs_Follow(t *testing.T) {
path := fmt.Sprintf("/v1/client/fs/logs/%s?type=stdout&task=web&offset=%d&origin=end&plain=true&follow=true",
a.ID, offset)
p, _ := io.Pipe()
req, err := http.NewRequest("GET", path, p)
req, err := http.NewRequest("GET", path, nil)
require.Nil(err)
respW := testutil.NewResponseRecorder()
errCh := make(chan error)
@@ -514,7 +505,23 @@ func TestHTTP_FS_Logs_Follow(t *testing.T) {
t.Fatalf("shouldn't exit: %v", err)
case <-time.After(1 * time.Second):
}
p.Close()
})
}
func TestHTTP_FS_Logs_PropagatesErrors(t *testing.T) {
t.Parallel()
httpTest(t, nil, func(s *TestAgent) {
path := fmt.Sprintf("/v1/client/fs/logs/%s?type=stdout&task=web&offset=0&origin=end&plain=true",
uuid.Generate())
req, err := http.NewRequest("GET", path, nil)
require.NoError(t, err)
respW := testutil.NewResponseRecorder()
_, err = s.Server.Logs(respW, req)
require.Error(t, err)
_, ok := err.(HTTPCodedError)
require.Truef(t, ok, "expected a coded error but found: %#+v", err)
})
}

View File

@@ -812,9 +812,10 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
structsTask.VolumeMounts = make([]*structs.VolumeMount, l)
for i, mount := range apiTask.VolumeMounts {
structsTask.VolumeMounts[i] = &structs.VolumeMount{
Volume: mount.Volume,
Destination: mount.Destination,
ReadOnly: mount.ReadOnly,
Volume: *mount.Volume,
Destination: *mount.Destination,
ReadOnly: *mount.ReadOnly,
PropagationMode: *mount.PropagationMode,
}
}
}
@@ -1062,6 +1063,7 @@ func ApiConsulConnectToStructs(in *api.ConsulConnect) *structs.ConsulConnect {
if in.SidecarService != nil {
out.SidecarService = &structs.ConsulSidecarService{
Tags: helper.CopySliceString(in.SidecarService.Tags),
Port: in.SidecarService.Port,
}

View File

@@ -1537,6 +1537,13 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
TaskName: "task1",
},
},
Connect: &api.ConsulConnect{
Native: false,
SidecarService: &api.ConsulSidecarService{
Tags: []string{"f", "g"},
Port: "9000",
},
},
},
},
Tasks: []*api.Task{
@@ -1877,6 +1884,13 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
TaskName: "task1",
},
},
Connect: &structs.ConsulConnect{
Native: false,
SidecarService: &structs.ConsulSidecarService{
Tags: []string{"f", "g"},
Port: "9000",
},
},
},
},
Tasks: []*structs.Task{

146
command/agent/log_file.go Normal file
View File

@@ -0,0 +1,146 @@
package agent
import (
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/hashicorp/logutils"
)
var (
now = time.Now
)
// logFile is used to setup a file based logger that also performs log rotation
type logFile struct {
// Log level Filter to filter out logs that do not matcch LogLevel criteria
logFilter *logutils.LevelFilter
//Name of the log file
fileName string
//Path to the log file
logPath string
//Duration between each file rotation operation
duration time.Duration
//LastCreated represents the creation time of the latest log
LastCreated time.Time
//FileInfo is the pointer to the current file being written to
FileInfo *os.File
//MaxBytes is the maximum number of desired bytes for a log file
MaxBytes int
//BytesWritten is the number of bytes written in the current log file
BytesWritten int64
// Max rotated files to keep before removing them.
MaxFiles int
//acquire is the mutex utilized to ensure we have no concurrency issues
acquire sync.Mutex
}
func (l *logFile) fileNamePattern() string {
// Extract the file extension
fileExt := filepath.Ext(l.fileName)
// If we have no file extension we append .log
if fileExt == "" {
fileExt = ".log"
}
// Remove the file extension from the filename
return strings.TrimSuffix(l.fileName, fileExt) + "-%s" + fileExt
}
func (l *logFile) openNew() error {
fileNamePattern := l.fileNamePattern()
// New file name has the format : filename-timestamp.extension
createTime := now()
newfileName := fmt.Sprintf(fileNamePattern, strconv.FormatInt(createTime.UnixNano(), 10))
newfilePath := filepath.Join(l.logPath, newfileName)
// Try creating a file. We truncate the file because we are the only authority to write the logs
filePointer, err := os.OpenFile(newfilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0640)
if err != nil {
return err
}
l.FileInfo = filePointer
// New file, new bytes tracker, new creation time :)
l.LastCreated = createTime
l.BytesWritten = 0
return nil
}
func (l *logFile) rotate() error {
// Get the time from the last point of contact
timeElapsed := time.Since(l.LastCreated)
// Rotate if we hit the byte file limit or the time limit
if (l.BytesWritten >= int64(l.MaxBytes) && (l.MaxBytes > 0)) || timeElapsed >= l.duration {
l.FileInfo.Close()
if err := l.pruneFiles(); err != nil {
return err
}
return l.openNew()
}
return nil
}
func (l *logFile) pruneFiles() error {
if l.MaxFiles == 0 {
return nil
}
pattern := l.fileNamePattern()
//get all the files that match the log file pattern
globExpression := filepath.Join(l.logPath, fmt.Sprintf(pattern, "*"))
matches, err := filepath.Glob(globExpression)
if err != nil {
return err
}
// Stort the strings as filepath.Glob does not publicly guarantee that files
// are sorted, so here we add an extra defensive sort.
sort.Strings(matches)
// Prune if there are more files stored than the configured max
stale := len(matches) - l.MaxFiles
for i := 0; i < stale; i++ {
if err := os.Remove(matches[i]); err != nil {
return err
}
}
return nil
}
// Write is used to implement io.Writer
func (l *logFile) Write(b []byte) (int, error) {
// Filter out log entries that do not match log level criteria
if !l.logFilter.Check(b) {
return 0, nil
}
l.acquire.Lock()
defer l.acquire.Unlock()
//Create a new file if we have no file to write to
if l.FileInfo == nil {
if err := l.openNew(); err != nil {
return 0, err
}
}
// Check for the last contact and rotate if necessary
if err := l.rotate(); err != nil {
return 0, err
}
n, err := l.FileInfo.Write(b)
l.BytesWritten += int64(n)
return n, err
}

View File

@@ -0,0 +1,171 @@
package agent
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
"github.com/hashicorp/logutils"
"github.com/stretchr/testify/require"
)
const (
testFileName = "Nomad.log"
testDuration = 2 * time.Second
testBytes = 10
)
func TestLogFile_timeRotation(t *testing.T) {
t.Parallel()
require := require.New(t)
tempDir, err := ioutil.TempDir("", "LogWriterTimeTest")
require.NoError(err)
defer os.Remove(tempDir)
filt := LevelFilter()
logFile := logFile{
logFilter: filt,
fileName: testFileName,
logPath: tempDir,
duration: testDuration,
}
logFile.Write([]byte("Hello World"))
time.Sleep(2 * time.Second)
logFile.Write([]byte("Second File"))
want := 2
if got, _ := ioutil.ReadDir(tempDir); len(got) != want {
t.Errorf("Expected %d files, got %v file(s)", want, len(got))
}
}
func TestLogFile_openNew(t *testing.T) {
t.Parallel()
require := require.New(t)
tempDir, err := ioutil.TempDir("", "LogWriterOpenTest")
require.NoError(err)
defer os.Remove(tempDir)
logFile := logFile{fileName: testFileName, logPath: tempDir, duration: testDuration}
require.NoError(logFile.openNew())
_, err = ioutil.ReadFile(logFile.FileInfo.Name())
require.NoError(err)
}
func TestLogFile_byteRotation(t *testing.T) {
t.Parallel()
require := require.New(t)
tempDir, err := ioutil.TempDir("", "LogWriterByteTest")
require.NoError(err)
defer os.Remove(tempDir)
filt := LevelFilter()
filt.MinLevel = logutils.LogLevel("INFO")
logFile := logFile{
logFilter: filt,
fileName: testFileName,
logPath: tempDir,
MaxBytes: testBytes,
duration: 24 * time.Hour,
}
logFile.Write([]byte("Hello World"))
logFile.Write([]byte("Second File"))
want := 2
tempFiles, _ := ioutil.ReadDir(tempDir)
require.Equal(want, len(tempFiles))
}
func TestLogFile_logLevelFiltering(t *testing.T) {
t.Parallel()
require := require.New(t)
tempDir, err := ioutil.TempDir("", "LogWriterFilterTest")
require.NoError(err)
defer os.Remove(tempDir)
filt := LevelFilter()
logFile := logFile{
logFilter: filt,
fileName: testFileName,
logPath: tempDir,
MaxBytes: testBytes,
duration: testDuration,
}
logFile.Write([]byte("[INFO] This is an info message"))
logFile.Write([]byte("[DEBUG] This is a debug message"))
logFile.Write([]byte("[ERR] This is an error message"))
want := 2
tempFiles, _ := ioutil.ReadDir(tempDir)
require.Equal(want, len(tempFiles))
}
func TestLogFile_deleteArchives(t *testing.T) {
t.Parallel()
require := require.New(t)
tempDir, err := ioutil.TempDir("", "LogWriterDeleteArchivesTest")
require.NoError(err)
defer os.Remove(tempDir)
filt := LevelFilter()
filt.MinLevel = logutils.LogLevel("INFO")
logFile := logFile{
logFilter: filt,
fileName: testFileName,
logPath: tempDir,
MaxBytes: testBytes,
duration: 24 * time.Hour,
MaxFiles: 1,
}
logFile.Write([]byte("[INFO] Hello World"))
logFile.Write([]byte("[INFO] Second File"))
logFile.Write([]byte("[INFO] Third File"))
want := 2
tempFiles, _ := ioutil.ReadDir(tempDir)
require.Equal(want, len(tempFiles))
for _, tempFile := range tempFiles {
var bytes []byte
var err error
path := filepath.Join(tempDir, tempFile.Name())
if bytes, err = ioutil.ReadFile(path); err != nil {
t.Errorf(err.Error())
return
}
contents := string(bytes)
require.NotEqual("[INFO] Hello World", contents, "oldest log should have been deleted")
}
}
func TestLogFile_deleteArchivesDisabled(t *testing.T) {
t.Parallel()
require := require.New(t)
tempDir, err := ioutil.TempDir("", "LogWriterDeleteArchivesDisabledTest")
require.NoError(err)
defer os.Remove(tempDir)
filt := LevelFilter()
filt.MinLevel = logutils.LogLevel("INFO")
logFile := logFile{
logFilter: filt,
fileName: testFileName,
logPath: tempDir,
MaxBytes: testBytes,
duration: 24 * time.Hour,
MaxFiles: 0,
}
logFile.Write([]byte("[INFO] Hello World"))
logFile.Write([]byte("[INFO] Second File"))
logFile.Write([]byte("[INFO] Third File"))
want := 3
tempFiles, _ := ioutil.ReadDir(tempDir)
require.Equal(want, len(tempFiles))
}

View File

@@ -13,6 +13,8 @@ log_level = "ERR"
log_json = true
log_file = "/var/log/nomad.log"
bind_addr = "192.168.0.1"
enable_debug = true

View File

@@ -143,6 +143,7 @@
],
"leave_on_interrupt": true,
"leave_on_terminate": true,
"log_file": "/var/log/nomad.log",
"log_json": true,
"log_level": "ERR",
"name": "my-web",

View File

@@ -234,7 +234,7 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength
}
basic := []string{
fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)),
fmt.Sprintf("ID|%s", alloc.ID),
fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)),
fmt.Sprintf("Name|%s", alloc.Name),
fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)),

View File

@@ -286,6 +286,9 @@ func TestAllocStatusCommand_ScoreMetrics(t *testing.T) {
require.Contains(out, "Placement Metrics")
require.Contains(out, mockNode1.ID)
require.Contains(out, mockNode2.ID)
// assert we sort headers alphabetically
require.Contains(out, "binpack node-affinity")
require.Contains(out, "final score")
}

View File

@@ -2,6 +2,7 @@ package command
import (
"fmt"
"sort"
"strings"
"sync"
"time"
@@ -380,7 +381,16 @@ func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix strin
// Add header as first row
if i == 0 {
scoreOutput[0] = "Node|"
for scorerName := range scoreMeta.Scores {
// sort scores alphabetically
scores := make([]string, 0, len(scoreMeta.Scores))
for score := range scoreMeta.Scores {
scores = append(scores, score)
}
sort.Strings(scores)
// build score header output
for _, scorerName := range scores {
scoreOutput[0] += fmt.Sprintf("%v|", scorerName)
scorerNames = append(scorerNames, scorerName)
}

View File

@@ -333,7 +333,7 @@ func formatDrain(n *api.Node) string {
func (c *NodeStatusCommand) formatNode(client *api.Client, node *api.Node) int {
// Format the header output
basic := []string{
fmt.Sprintf("ID|%s", limit(node.ID, c.length)),
fmt.Sprintf("ID|%s", node.ID),
fmt.Sprintf("Name|%s", node.Name),
fmt.Sprintf("Class|%s", node.NodeClass),
fmt.Sprintf("DC|%s", node.Datacenter),

View File

@@ -137,11 +137,8 @@ func TestNodeStatusCommand_Run(t *testing.T) {
if !strings.Contains(out, "mynode") {
t.Fatalf("expect to find mynode, got: %s", out)
}
if strings.Contains(out, nodeID) {
t.Fatalf("expected truncated node id, got: %s", out)
}
if !strings.Contains(out, nodeID[:8]) {
t.Fatalf("expected node id %q, got: %s", nodeID[:8], out)
if !strings.Contains(out, nodeID) {
t.Fatalf("expected node id %q, got: %s", nodeID, out)
}
ui.OutputWriter.Reset()

View File

@@ -13,6 +13,7 @@ import (
"github.com/hashicorp/hcl/hcl/ast"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/jobspec"
"github.com/mitchellh/mapstructure"
"github.com/posener/complete"
)
@@ -261,6 +262,7 @@ func parseQuotaResource(result *api.Resources, list *ast.ObjectList) error {
valid := []string{
"cpu",
"memory",
"network",
}
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
return multierror.Prefix(err, "resources ->")
@@ -275,5 +277,20 @@ func parseQuotaResource(result *api.Resources, list *ast.ObjectList) error {
return err
}
// Find the network ObjectList, parse it
nw := listVal.Filter("network")
if len(nw.Items) > 0 {
rl, err := jobspec.ParseNetwork(nw)
if err != nil {
return multierror.Prefix(err, "resources ->")
}
if rl != nil {
if rl.Mode != "" || rl.HasPorts() {
return fmt.Errorf("resources -> network only allows mbits")
}
result.Networks = []*api.NetworkResource{rl}
}
}
return nil
}

View File

@@ -8,8 +8,10 @@ import (
"strings"
"testing"
"github.com/hashicorp/nomad/api"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestQuotaApplyCommand_Implements(t *testing.T) {
@@ -97,3 +99,42 @@ func TestQuotaApplyCommand_Good_JSON(t *testing.T) {
assert.Nil(t, err)
assert.Len(t, quotas, 1)
}
func TestQuotaApplyNetwork(t *testing.T) {
t.Parallel()
mbits := 20
cases := []struct {
hcl string
q *api.QuotaSpec
err string
}{{
hcl: `limit {region = "global", region_limit {network {mbits = 20}}}`,
q: &api.QuotaSpec{
Limits: []*api.QuotaLimit{{
Region: "global",
RegionLimit: &api.Resources{
Networks: []*api.NetworkResource{{
MBits: &mbits,
}},
},
}},
},
err: "",
}, {
hcl: `limit {region = "global", region_limit {network { mbits = 20, device = "eth0"}}}`,
q: nil,
err: "1 error(s) occurred:\n\n* limit -> region_limit -> resources -> network -> invalid key: device",
}}
for _, c := range cases {
t.Run(c.hcl, func(t *testing.T) {
q, err := parseQuotaSpec([]byte(c.hcl))
require.Equal(t, c.q, q)
if c.err != "" {
require.EqualError(t, err, c.err)
}
})
}
}

View File

@@ -3,8 +3,8 @@
This directory contains some documentation about the Nomad codebase,
aimed at readers who are interested in making code contributions.
If you're looking for information on _using_ using, please instead refer
to [the main Nomad website](https://nomadproject.io).
If you're looking for information on _using_ Nomad, please instead refer
to the [Nomad website](https://nomadproject.io).
## Architecture
@@ -38,3 +38,4 @@ developers and reviewers confidence that the proper changes have been made:
* [New `jobspec` entry](checklist-jobspec.md)
* [New CLI command](checklist-command.md)
* [New RPC endpoint](checklist-rpc-endpoint.md)

View File

@@ -20,6 +20,7 @@
* [ ] Changelog
* [ ] Jobspec entry https://www.nomadproject.io/docs/job-specification/index.html
* [ ] Jobspec sidebar entry https://github.com/hashicorp/nomad/blob/master/website/source/layouts/docs.erb
* [ ] Job JSON API entry https://www.nomadproject.io/api/json-jobs.html
* [ ] Sample Response output in API https://www.nomadproject.io/api/jobs.html
* [ ] Consider if it needs a guide https://www.nomadproject.io/guides/index.html

View File

@@ -24,7 +24,7 @@ sudo docker --version
sudo apt-get install unzip curl vim -y
echo "Installing Nomad..."
NOMAD_VERSION=0.8.6
NOMAD_VERSION=0.10.0
cd /tmp/
curl -sSL https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -o nomad.zip
unzip nomad.zip
@@ -34,7 +34,7 @@ sudo chmod a+w /etc/nomad.d
echo "Installing Consul..."
CONSUL_VERSION=1.4.0
CONSUL_VERSION=1.6.1
curl -sSL https://releases.hashicorp.com/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip > consul.zip
unzip /tmp/consul.zip
sudo install consul /usr/bin/consul
@@ -68,7 +68,7 @@ nomad -autocomplete-install
SCRIPT
Vagrant.configure(2) do |config|
config.vm.box = "bento/ubuntu-16.04" # 16.04 LTS
config.vm.box = "bento/ubuntu-18.04" # 18.04 LTS
config.vm.hostname = "nomad"
config.vm.provision "shell", inline: $script, privileged: false

View File

@@ -134,6 +134,25 @@ var (
Name: pluginName,
}
danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewDefault(
hclspec.NewAttr("enabled", "bool", false),
hclspec.NewLiteral(`true`),
),
"period": hclspec.NewDefault(
hclspec.NewAttr("period", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"creation_grace": hclspec.NewDefault(
hclspec.NewAttr("creation_grace", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"dry_run": hclspec.NewDefault(
hclspec.NewAttr("dry_run", "bool", false),
hclspec.NewLiteral(`false`),
),
})
// configSpec is the hcl specification returned by the ConfigSchema RPC
// and is used to parse the contents of the 'plugin "docker" {...}' block.
// Example:
@@ -195,9 +214,22 @@ var (
hclspec.NewAttr("container", "bool", false),
hclspec.NewLiteral("true"),
),
"dangling_containers": hclspec.NewDefault(
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
hclspec.NewLiteral(`{
enabled = true
period = "5m"
creation_grace = "5m"
}`),
),
})), hclspec.NewLiteral(`{
image = true
container = true
dangling_containers = {
enabled = true
period = "5m"
creation_grace = "5m"
}
}`)),
// docker volume options
@@ -491,6 +523,28 @@ type DockerVolumeDriverConfig struct {
Options hclutils.MapStrStr `codec:"options"`
}
// ContainerGCConfig controls the behavior of the GC reconciler to detects
// dangling nomad containers that aren't tracked due to docker/nomad bugs
type ContainerGCConfig struct {
// Enabled controls whether container reconciler is enabled
Enabled bool `codec:"enabled"`
// DryRun indicates that reconciler should log unexpectedly running containers
// if found without actually killing them
DryRun bool `codec:"dry_run"`
// PeriodStr controls the frequency of scanning containers
PeriodStr string `codec:"period"`
period time.Duration `codec:"-"`
// CreationGraceStr is the duration allowed for a newly created container
// to live without being registered as a running task in nomad.
// A container is treated as leaked if it lived more than grace duration
// and haven't been registered in tasks.
CreationGraceStr string `codec:"creation_grace"`
CreationGrace time.Duration `codec:"-"`
}
type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
@@ -519,6 +573,8 @@ type GCConfig struct {
ImageDelay string `codec:"image_delay"`
imageDelayDuration time.Duration `codec:"-"`
Container bool `codec:"container"`
DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
}
type VolumeConfig struct {
@@ -534,6 +590,8 @@ func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
return configSpec, nil
}
const danglingContainersCreationGraceMinimum = 1 * time.Minute
func (d *Driver) SetConfig(c *base.Config) error {
var config DriverConfig
if len(c.PluginConfig) != 0 {
@@ -551,6 +609,25 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.config.GC.imageDelayDuration = dur
}
if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
if err != nil {
return fmt.Errorf("failed to parse 'period' duration: %v", err)
}
d.config.GC.DanglingContainers.period = dur
}
if len(d.config.GC.DanglingContainers.CreationGraceStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationGraceStr)
if err != nil {
return fmt.Errorf("failed to parse 'creation_grace' duration: %v", err)
}
if dur < danglingContainersCreationGraceMinimum {
return fmt.Errorf("creation_grace is less than minimum, %v", danglingContainersCreationGraceMinimum)
}
d.config.GC.DanglingContainers.CreationGrace = dur
}
if c.AgentConfig != nil {
d.clientConfig = c.AgentConfig.Driver
}
@@ -568,6 +645,8 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.coordinator = newDockerCoordinator(coordinatorConfig)
d.reconciler = newReconciler(d)
return nil
}

View File

@@ -423,3 +423,63 @@ config {
require.EqualValues(t, expected, tc)
}
// TestConfig_DriverConfig_DanglingContainers asserts that dangling_containers is parsed
// and populated with defaults as expected
func TestConfig_DriverConfig_DanglingContainers(t *testing.T) {
cases := []struct {
name string
config string
expected ContainerGCConfig
}{
{
name: "pure default",
config: `{}`,
expected: ContainerGCConfig{Enabled: true, PeriodStr: "5m", CreationGraceStr: "5m"},
},
{
name: "partial gc",
config: `{ gc { } }`,
expected: ContainerGCConfig{Enabled: true, PeriodStr: "5m", CreationGraceStr: "5m"},
},
{
name: "partial gc",
config: `{ gc { dangling_containers { } } }`,
expected: ContainerGCConfig{Enabled: true, PeriodStr: "5m", CreationGraceStr: "5m"},
},
{
name: "partial dangling_containers",
config: `{ gc { dangling_containers { enabled = false } } }`,
expected: ContainerGCConfig{Enabled: false, PeriodStr: "5m", CreationGraceStr: "5m"},
},
{
name: "incomplete dangling_containers 2",
config: `{ gc { dangling_containers { period = "10m" } } }`,
expected: ContainerGCConfig{Enabled: true, PeriodStr: "10m", CreationGraceStr: "5m"},
},
{
name: "full default",
config: `{ gc { dangling_containers {
enabled = false
dry_run = true
period = "10m"
creation_grace = "20m"
}}}`,
expected: ContainerGCConfig{
Enabled: false,
DryRun: true,
PeriodStr: "10m",
CreationGraceStr: "20m",
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
var tc DriverConfig
hclutils.NewConfigParser(configSpec).ParseHCL(t, "config "+c.config, &tc)
require.EqualValues(t, c.expected, tc.GC.DanglingContainers)
})
}
}

View File

@@ -66,6 +66,10 @@ var (
nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
)
const (
dockerLabelAllocID = "com.hashicorp.nomad.alloc_id"
)
type Driver struct {
// eventer is used to handle multiplexing of TaskEvents calls such that an
// event can be broadcast to all callers
@@ -108,6 +112,8 @@ type Driver struct {
// for use during fingerprinting.
detected bool
detectedLock sync.RWMutex
reconciler *containerReconciler
}
// NewDockerDriver returns a docker implementation of a driver plugin
@@ -309,6 +315,10 @@ CREATE:
// the container is started
runningContainer, err := client.InspectContainer(container.ID)
if err != nil {
client.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
Force: true,
})
msg := "failed to inspect started container"
d.logger.Error(msg, "error", err)
client.RemoveContainer(docker.RemoveContainerOptions{
@@ -642,6 +652,15 @@ func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConf
return binds, nil
}
var userMountToUnixMount = map[string]string{
// Empty string maps to `rprivate` for backwards compatibility in restored
// older tasks, where mount propagation will not be present.
"": "rprivate",
nstructs.VolumeMountPropagationPrivate: "rprivate",
nstructs.VolumeMountPropagationHostToTask: "rslave",
nstructs.VolumeMountPropagationBidirectional: "rshared",
}
func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
imageID string) (docker.CreateContainerOptions, error) {
@@ -833,13 +852,24 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
hostConfig.Mounts = append(hostConfig.Mounts, hm)
}
for _, m := range task.Mounts {
hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{
hm := docker.HostMount{
Type: "bind",
Target: m.TaskPath,
Source: m.HostPath,
ReadOnly: m.Readonly,
})
}
// MountPropagation is only supported by Docker on Linux:
// https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation
if runtime.GOOS == "linux" {
hm.BindOptions = &docker.BindOptions{
Propagation: userMountToUnixMount[m.PropagationMode],
}
}
hostConfig.Mounts = append(hostConfig.Mounts, hm)
}
// set DNS search domains and extra hosts
@@ -882,7 +912,6 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
// Setup port mapping and exposed ports
if len(task.Resources.NomadResources.Networks) == 0 {
logger.Debug("no network interfaces are available")
if len(driverConfig.PortMap) > 0 {
return c, fmt.Errorf("Trying to map ports but no network interface is available")
}
@@ -957,9 +986,16 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
if len(driverConfig.Labels) > 0 {
config.Labels = driverConfig.Labels
logger.Debug("applied labels on the container", "labels", config.Labels)
}
labels := make(map[string]string, len(driverConfig.Labels)+1)
for k, v := range driverConfig.Labels {
labels[k] = v
}
labels[dockerLabelAllocID] = task.AllocID
config.Labels = labels
logger.Debug("applied labels on the container", "labels", config.Labels)
config.Env = task.EnvList()
containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)

View File

@@ -905,7 +905,8 @@ func TestDockerDriver_Labels(t *testing.T) {
t.Fatalf("err: %v", err)
}
require.Equal(t, 2, len(container.Config.Labels))
// expect to see 1 additional standard labels
require.Equal(t, len(cfg.Labels)+1, len(container.Config.Labels))
for k, v := range cfg.Labels {
require.Equal(t, v, container.Config.Labels[k])
}
@@ -1008,6 +1009,56 @@ func TestDockerDriver_CreateContainerConfig(t *testing.T) {
require.Equal(t, containerName, c.Name)
}
func TestDockerDriver_CreateContainerConfig_User(t *testing.T) {
t.Parallel()
task, cfg, _ := dockerTask(t)
task.User = "random-user-1"
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
dh := dockerDriverHarness(t, nil)
driver := dh.Impl().(*Driver)
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)
require.Equal(t, task.User, c.Config.User)
}
func TestDockerDriver_CreateContainerConfig_Labels(t *testing.T) {
t.Parallel()
task, cfg, _ := dockerTask(t)
task.AllocID = uuid.Generate()
task.JobName = "redis-demo-job"
cfg.Labels = map[string]string{
"user_label": "user_value",
// com.hashicorp.nomad. labels are reserved and
// cannot be overridden
"com.hashicorp.nomad.alloc_id": "bad_value",
}
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
dh := dockerDriverHarness(t, nil)
driver := dh.Impl().(*Driver)
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)
expectedLabels := map[string]string{
// user provided labels
"user_label": "user_value",
// default labels
"com.hashicorp.nomad.alloc_id": task.AllocID,
}
require.Equal(t, expectedLabels, c.Config.Labels)
}
func TestDockerDriver_CreateContainerConfig_Logging(t *testing.T) {
t.Parallel()

View File

@@ -7,6 +7,7 @@ import (
"io"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
@@ -598,22 +599,32 @@ func TestDockerDriver_CreateContainerConfig_MountsCombined(t *testing.T) {
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)
expectedMounts := []docker.HostMount{
{
Type: "bind",
Source: "/tmp/cfg-mount",
Target: "/container/tmp/cfg-mount",
ReadOnly: false,
BindOptions: &docker.BindOptions{},
Type: "bind",
Source: "/tmp/cfg-mount",
Target: "/container/tmp/cfg-mount",
ReadOnly: false,
BindOptions: &docker.BindOptions{
Propagation: "",
},
},
{
Type: "bind",
Source: "/tmp/task-mount",
Target: "/container/tmp/task-mount",
ReadOnly: true,
BindOptions: &docker.BindOptions{
Propagation: "rprivate",
},
},
}
if runtime.GOOS != "linux" {
expectedMounts[0].BindOptions = &docker.BindOptions{}
expectedMounts[1].BindOptions = &docker.BindOptions{}
}
foundMounts := c.HostConfig.Mounts
sort.Slice(foundMounts, func(i, j int) bool {
return foundMounts[i].Target < foundMounts[j].Target

View File

@@ -13,6 +13,10 @@ import (
)
func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
// start reconciler when we start fingerprinting
// this is the only method called when driver is launched properly
d.reconciler.Start()
ch := make(chan *drivers.Fingerprint)
go d.handleFingerprint(ctx, ch)
return ch, nil

View File

@@ -0,0 +1,228 @@
package docker
import (
"context"
"fmt"
"regexp"
"sync"
"time"
docker "github.com/fsouza/go-dockerclient"
hclog "github.com/hashicorp/go-hclog"
)
// containerReconciler detects and kills unexpectedly running containers.
//
// Due to Docker architecture and network based communication, it is
// possible for Docker to start a container successfully, but have the
// creation API call fail with a network error. containerReconciler
// scans for these untracked containers and kill them.
type containerReconciler struct {
ctx context.Context
config *ContainerGCConfig
client *docker.Client
logger hclog.Logger
isDriverHealthy func() bool
trackedContainers func() map[string]bool
isNomadContainer func(c docker.APIContainers) bool
once sync.Once
}
func newReconciler(d *Driver) *containerReconciler {
return &containerReconciler{
ctx: d.ctx,
config: &d.config.GC.DanglingContainers,
client: client,
logger: d.logger,
isDriverHealthy: func() bool { return d.previouslyDetected() && d.fingerprintSuccessful() },
trackedContainers: d.trackedContainers,
isNomadContainer: isNomadContainer,
}
}
func (r *containerReconciler) Start() {
if !r.config.Enabled {
r.logger.Debug("skipping dangling containers handling; is disabled")
return
}
r.once.Do(func() {
go r.removeDanglingContainersGoroutine()
})
}
func (r *containerReconciler) removeDanglingContainersGoroutine() {
period := r.config.period
lastIterSucceeded := true
// ensure that we wait for at least a period or creation timeout
// for first container GC iteration
// The initial period is a grace period for restore allocation
// before a driver may kill containers launched by an earlier nomad
// process.
initialDelay := period
if r.config.CreationGrace > initialDelay {
initialDelay = r.config.CreationGrace
}
timer := time.NewTimer(initialDelay)
for {
select {
case <-timer.C:
if r.isDriverHealthy() {
err := r.removeDanglingContainersIteration()
if err != nil && lastIterSucceeded {
r.logger.Warn("failed to remove dangling containers", "error", err)
}
lastIterSucceeded = (err == nil)
}
timer.Reset(period)
case <-r.ctx.Done():
return
}
}
}
func (r *containerReconciler) removeDanglingContainersIteration() error {
cutoff := time.Now().Add(-r.config.CreationGrace)
tracked := r.trackedContainers()
untracked, err := r.untrackedContainers(tracked, cutoff)
if err != nil {
return fmt.Errorf("failed to find untracked containers: %v", err)
}
if len(untracked) == 0 {
return nil
}
if r.config.DryRun {
r.logger.Info("detected untracked containers", "container_ids", untracked)
return nil
}
for _, id := range untracked {
ctx, cancel := r.dockerAPIQueryContext()
err := client.RemoveContainer(docker.RemoveContainerOptions{
Context: ctx,
ID: id,
Force: true,
})
cancel()
if err != nil {
r.logger.Warn("failed to remove untracked container", "container_id", id, "error", err)
} else {
r.logger.Info("removed untracked container", "container_id", id)
}
}
return nil
}
// untrackedContainers returns the ids of containers that suspected
// to have been started by Nomad but aren't tracked by this driver
func (r *containerReconciler) untrackedContainers(tracked map[string]bool, cutoffTime time.Time) ([]string, error) {
result := []string{}
ctx, cancel := r.dockerAPIQueryContext()
defer cancel()
cc, err := client.ListContainers(docker.ListContainersOptions{
Context: ctx,
All: false, // only reconcile running containers
})
if err != nil {
return nil, fmt.Errorf("failed to list containers: %v", err)
}
cutoff := cutoffTime.Unix()
for _, c := range cc {
if tracked[c.ID] {
continue
}
if c.Created > cutoff {
continue
}
if !r.isNomadContainer(c) {
continue
}
result = append(result, c.ID)
}
return result, nil
}
// dockerAPIQueryTimeout returns a context for docker API response with an appropriate timeout
// to protect against wedged locked-up API call.
//
// We'll try hitting Docker API on subsequent iteration.
func (r *containerReconciler) dockerAPIQueryContext() (context.Context, context.CancelFunc) {
// use a reasoanble floor to avoid very small limit
timeout := 30 * time.Second
if timeout < r.config.period {
timeout = r.config.period
}
return context.WithTimeout(context.Background(), timeout)
}
func isNomadContainer(c docker.APIContainers) bool {
if _, ok := c.Labels[dockerLabelAllocID]; ok {
return true
}
// pre-0.10 containers aren't tagged or labeled in any way,
// so use cheap heuristic based on mount paths
// before inspecting container details
if !hasMount(c, "/alloc") ||
!hasMount(c, "/local") ||
!hasMount(c, "/secrets") ||
!hasNomadName(c) {
return false
}
return true
}
func hasMount(c docker.APIContainers, p string) bool {
for _, m := range c.Mounts {
if m.Destination == p {
return true
}
}
return false
}
var nomadContainerNamePattern = regexp.MustCompile(`\/.*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)
func hasNomadName(c docker.APIContainers) bool {
for _, n := range c.Names {
if nomadContainerNamePattern.MatchString(n) {
return true
}
}
return false
}
func (d *Driver) trackedContainers() map[string]bool {
d.tasks.lock.RLock()
defer d.tasks.lock.RUnlock()
r := make(map[string]bool, len(d.tasks.store))
for _, h := range d.tasks.store {
r[h.containerID] = true
}
return r
}

View File

@@ -0,0 +1,202 @@
package docker
import (
"encoding/json"
"os"
"testing"
"time"
docker "github.com/fsouza/go-dockerclient"
"github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/require"
)
func fakeContainerList(t *testing.T) (nomadContainer, nonNomadContainer docker.APIContainers) {
path := "./test-resources/docker/reconciler_containers_list.json"
f, err := os.Open(path)
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
var sampleContainerList []docker.APIContainers
err = json.NewDecoder(f).Decode(&sampleContainerList)
if err != nil {
t.Fatalf("failed to decode container list: %v", err)
}
return sampleContainerList[0], sampleContainerList[1]
}
func Test_HasMount(t *testing.T) {
nomadContainer, nonNomadContainer := fakeContainerList(t)
require.True(t, hasMount(nomadContainer, "/alloc"))
require.True(t, hasMount(nomadContainer, "/data"))
require.True(t, hasMount(nomadContainer, "/secrets"))
require.False(t, hasMount(nomadContainer, "/random"))
require.False(t, hasMount(nonNomadContainer, "/alloc"))
require.False(t, hasMount(nonNomadContainer, "/data"))
require.False(t, hasMount(nonNomadContainer, "/secrets"))
require.False(t, hasMount(nonNomadContainer, "/random"))
}
func Test_HasNomadName(t *testing.T) {
nomadContainer, nonNomadContainer := fakeContainerList(t)
require.True(t, hasNomadName(nomadContainer))
require.False(t, hasNomadName(nonNomadContainer))
}
// TestDanglingContainerRemoval asserts containers without corresponding tasks
// are removed after the creation grace period.
func TestDanglingContainerRemoval(t *testing.T) {
testutil.DockerCompatible(t)
// start two containers: one tracked nomad container, and one unrelated container
task, cfg, _ := dockerTask(t)
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
client, d, handle, cleanup := dockerSetup(t, task)
defer cleanup()
require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second))
nonNomadContainer, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: nonNomadContainer.ID,
Force: true,
})
err = client.StartContainer(nonNomadContainer.ID, nil)
require.NoError(t, err)
untrackedNomadContainer, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
Labels: map[string]string{
dockerLabelAllocID: uuid.Generate(),
},
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: untrackedNomadContainer.ID,
Force: true,
})
err = client.StartContainer(untrackedNomadContainer.ID, nil)
require.NoError(t, err)
dd := d.Impl().(*Driver)
reconciler := newReconciler(dd)
trackedContainers := map[string]bool{handle.containerID: true}
tf := reconciler.trackedContainers()
require.Contains(t, tf, handle.containerID)
require.NotContains(t, tf, untrackedNomadContainer)
require.NotContains(t, tf, nonNomadContainer.ID)
// assert tracked containers should never be untracked
untracked, err := reconciler.untrackedContainers(trackedContainers, time.Now())
require.NoError(t, err)
require.NotContains(t, untracked, handle.containerID)
require.NotContains(t, untracked, nonNomadContainer.ID)
require.Contains(t, untracked, untrackedNomadContainer.ID)
// assert we recognize nomad containers with appropriate cutoff
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now())
require.NoError(t, err)
require.Contains(t, untracked, handle.containerID)
require.Contains(t, untracked, untrackedNomadContainer.ID)
require.NotContains(t, untracked, nonNomadContainer.ID)
// but ignore if creation happened before cutoff
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now().Add(-1*time.Minute))
require.NoError(t, err)
require.NotContains(t, untracked, handle.containerID)
require.NotContains(t, untracked, untrackedNomadContainer.ID)
require.NotContains(t, untracked, nonNomadContainer.ID)
// a full integration tests to assert that containers are removed
prestineDriver := dockerDriverHarness(t, nil).Impl().(*Driver)
prestineDriver.config.GC.DanglingContainers = ContainerGCConfig{
Enabled: true,
period: 1 * time.Second,
CreationGrace: 0 * time.Second,
}
nReconciler := newReconciler(prestineDriver)
require.NoError(t, nReconciler.removeDanglingContainersIteration())
_, err = client.InspectContainer(nonNomadContainer.ID)
require.NoError(t, err)
_, err = client.InspectContainer(handle.containerID)
require.Error(t, err)
require.Contains(t, err.Error(), NoSuchContainerError)
_, err = client.InspectContainer(untrackedNomadContainer.ID)
require.Error(t, err)
require.Contains(t, err.Error(), NoSuchContainerError)
}
// TestDanglingContainerRemoval_Stopped asserts stopped containers without
// corresponding tasks are not removed even if after creation grace period.
func TestDanglingContainerRemoval_Stopped(t *testing.T) {
testutil.DockerCompatible(t)
_, cfg, _ := dockerTask(t)
client := newTestDockerClient(t)
container, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
Labels: map[string]string{
dockerLabelAllocID: uuid.Generate(),
},
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
Force: true,
})
err = client.StartContainer(container.ID, nil)
require.NoError(t, err)
err = client.StopContainer(container.ID, 60)
require.NoError(t, err)
dd := dockerDriverHarness(t, nil).Impl().(*Driver)
reconciler := newReconciler(dd)
// assert nomad container is tracked, and we ignore stopped one
tf := reconciler.trackedContainers()
require.NotContains(t, tf, container.ID)
untracked, err := reconciler.untrackedContainers(map[string]bool{}, time.Now())
require.NoError(t, err)
require.NotContains(t, untracked, container.ID)
// if we start container again, it'll be marked as untracked
require.NoError(t, client.StartContainer(container.ID, nil))
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now())
require.NoError(t, err)
require.Contains(t, untracked, container.ID)
}

View File

@@ -0,0 +1,116 @@
[
{
"Id": "eb23be71498c2dc0254c029f32b360a000caf33157d1c93e226f4c1a4c9d2218",
"Names": [
"/redis-72bfa388-024e-a903-45b8-2bc28b74ed69"
],
"Image": "redis:3.2",
"ImageID": "sha256:87856cc39862cec77541d68382e4867d7ccb29a85a17221446c857ddaebca916",
"Command": "docker-entrypoint.sh redis-server",
"Created": 1568383081,
"Ports": [
{
"PrivatePort": 6379,
"Type": "tcp"
}
],
"Labels": {},
"State": "running",
"Status": "Up 9 seconds",
"HostConfig": {
"NetworkMode": "default"
},
"NetworkSettings": {
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
"EndpointID": "ed830b4f2f33ab4134aea941611b00b9e576b35a4325d52bacfedd1e2e1ba213",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.3",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:03",
"DriverOpts": null
}
}
},
"Mounts": [
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/alloc",
"Destination": "/alloc",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "volume",
"Name": "d5d7f0f9a3326414257c57cfca01db96c53a424b43e251516511694554309681",
"Source": "",
"Destination": "/data",
"Driver": "local",
"Mode": "",
"RW": true,
"Propagation": ""
},
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/local",
"Destination": "/local",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/secrets",
"Destination": "/secrets",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
}
]
},
{
"Id": "99c49fbe999f6df7b7d6a891d69fe57d7b771a30d5d2899a922b44698084e5c9",
"Names": [
"/serene_keller"
],
"Image": "ubuntu:16.04",
"ImageID": "sha256:9361ce633ff193349d54bed380a5afe86043b09fd6ea8da7549dbbedfc2a7077",
"Command": "/bin/bash",
"Created": 1567795217,
"Ports": [],
"Labels": {},
"State": "running",
"Status": "Up 6 days",
"HostConfig": {
"NetworkMode": "default"
},
"NetworkSettings": {
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
"EndpointID": "fab83a0d4089ca9944ca53c882bdf40ad310c6fda30dda0092731feb9bc9fab6",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:02",
"DriverOpts": null
}
}
},
"Mounts": []
}
]

View File

@@ -813,6 +813,15 @@ func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) {
return r, nil
}
var userMountToUnixMount = map[string]int{
// Empty string maps to `rprivate` for backwards compatibility in restored
// older tasks, where mount propagation will not be present.
"": unix.MS_PRIVATE | unix.MS_REC, // rprivate
structs.VolumeMountPropagationPrivate: unix.MS_PRIVATE | unix.MS_REC, // rprivate
structs.VolumeMountPropagationHostToTask: unix.MS_SLAVE | unix.MS_REC, // rslave
structs.VolumeMountPropagationBidirectional: unix.MS_SHARED | unix.MS_REC, // rshared
}
// cmdMounts converts a list of driver.MountConfigs into excutor.Mounts.
func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
if len(mounts) == 0 {
@@ -826,11 +835,13 @@ func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
if m.Readonly {
flags |= unix.MS_RDONLY
}
r[i] = &lconfigs.Mount{
Source: m.HostPath,
Destination: m.TaskPath,
Device: "bind",
Flags: flags,
Source: m.HostPath,
Destination: m.TaskPath,
Device: "bind",
Flags: flags,
PropagationFlags: []int{userMountToUnixMount[m.PropagationMode]},
}
}

View File

@@ -467,16 +467,18 @@ func TestExecutor_cmdMounts(t *testing.T) {
expected := []*lconfigs.Mount{
{
Source: "/host/path-ro",
Destination: "/task/path-ro",
Flags: unix.MS_BIND | unix.MS_RDONLY,
Device: "bind",
Source: "/host/path-ro",
Destination: "/task/path-ro",
Flags: unix.MS_BIND | unix.MS_RDONLY,
Device: "bind",
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
},
{
Source: "/host/path-rw",
Destination: "/task/path-rw",
Flags: unix.MS_BIND,
Device: "bind",
Source: "/host/path-rw",
Destination: "/task/path-rw",
Flags: unix.MS_BIND,
Device: "bind",
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
},
}

View File

@@ -1,20 +1,24 @@
job "test1" {
datacenters = ["dc1", "dc2"]
type = "service"
type = "service"
affinity {
attribute ="${meta.rack}"
operator = "="
value = "r1"
weight = -50
attribute = "${meta.rack}"
operator = "="
value = "r1"
weight = -50
}
group "test1" {
count = 4
affinity {
attribute ="${node.datacenter}"
operator = "="
value = "dc1"
weight = -50
attribute = "${node.datacenter}"
operator = "="
value = "dc1"
weight = -50
}
task "test" {
driver = "raw_exec"
@@ -24,4 +28,4 @@ job "test1" {
}
}
}
}
}

View File

@@ -1,20 +1,24 @@
job "test1" {
datacenters = ["dc1", "dc2"]
type = "service"
type = "service"
affinity {
attribute ="${meta.rack}"
operator = "="
value = "r1"
weight = 100
attribute = "${meta.rack}"
operator = "="
value = "r1"
weight = 100
}
group "test1" {
count = 4
affinity {
attribute ="${node.datacenter}"
operator = "="
value = "dc1"
weight = 100
attribute = "${node.datacenter}"
operator = "="
value = "dc1"
weight = 100
}
task "test" {
driver = "raw_exec"
@@ -24,4 +28,4 @@ job "test1" {
}
}
}
}
}

View File

@@ -1,15 +1,17 @@
job "test1" {
datacenters = ["dc1", "dc2"]
type = "service"
type = "service"
group "test1" {
count = 5
affinity {
attribute ="${node.datacenter}"
operator = "="
value = "dc1"
weight = 100
attribute = "${node.datacenter}"
operator = "="
value = "dc1"
weight = 100
}
task "test" {
driver = "raw_exec"
@@ -19,4 +21,4 @@ job "test1" {
}
}
}
}
}

View File

@@ -3,15 +3,15 @@ job "test_raw" {
type = "service"
group "test" {
count = 1
count = 1
task "test1" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "var=10000;while true; do a=$(awk -v x=$var 'BEGIN{print sqrt(x)}'); ((var++)); done" ]
}
args = ["-c", "var=10000;while true; do a=$(awk -v x=$var 'BEGIN{print sqrt(x)}'); ((var++)); done"]
}
}
}
}
}

View File

@@ -4,13 +4,16 @@
job "restarter" {
datacenters = ["dc1"]
group "restarter" {
restart {
attempts = 100
delay = "3s"
}
task "restarter" {
driver = "raw_exec"
config {
command = "/bin/bash"
args = ["-c", "echo $$ >> pid && sleep 1 && exit 99"]

View File

@@ -3,8 +3,10 @@
job "sleeper" {
datacenters = ["dc1"]
task "sleeper" {
driver = "raw_exec"
config {
command = "/bin/bash"
args = ["-c", "echo $$ >> pid && sleep 999999"]

View File

@@ -13,8 +13,8 @@ job "consul_canary_test" {
}
service {
name = "canarytest"
tags = ["foo", "bar"]
name = "canarytest"
tags = ["foo", "bar"]
canary_tags = ["foo", "canary"]
}
}

View File

@@ -1,30 +1,31 @@
job "consul-example" {
datacenters = ["dc1"]
type = "service"
type = "service"
update {
max_parallel = 1
min_healthy_time = "10s"
healthy_deadline = "3m"
max_parallel = 1
min_healthy_time = "10s"
healthy_deadline = "3m"
progress_deadline = "10m"
auto_revert = false
canary = 0
auto_revert = false
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
group "cache" {
count = 3
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
delay = "15s"
mode = "fail"
}
ephemeral_disk {
@@ -33,18 +34,22 @@ job "consul-example" {
task "redis" {
driver = "docker"
config {
image = "redis:3.2"
port_map {
db = 6379
}
}
resources {
cpu = 500 # 500 MHz
memory = 256 # 256MB
network {
mbits = 10
port "db" {}
port "db" {}
}
}
@@ -52,6 +57,7 @@ job "consul-example" {
name = "redis-cache"
tags = ["global", "cache"]
port = "db"
check {
name = "alive"
type = "tcp"
@@ -61,4 +67,4 @@ job "consul-example" {
}
}
}
}
}

View File

@@ -7,21 +7,22 @@ job "deployment_auto.nomad" {
update {
max_parallel = 3
auto_promote = true
canary = 2
canary = 2
}
task "one" {
driver = "raw_exec"
config {
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["1000000"]
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["1000000"]
}
resources {
cpu = 20
memory = 20
cpu = 20
memory = 20
}
}
}
@@ -30,9 +31,9 @@ job "deployment_auto.nomad" {
count = 3
update {
max_parallel = 2
auto_promote = true
canary = 2
max_parallel = 2
auto_promote = true
canary = 2
min_healthy_time = "2s"
}
@@ -40,14 +41,15 @@ job "deployment_auto.nomad" {
driver = "raw_exec"
config {
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["2000000"]
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["2000000"]
}
resources {
cpu = 20
memory = 20
cpu = 20
memory = 20
}
}
}

View File

@@ -7,21 +7,22 @@ job "deployment_auto.nomad" {
update {
max_parallel = 3
auto_promote = true
canary = 2
canary = 2
}
task "one" {
driver = "raw_exec"
config {
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["1000001"]
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["1000001"]
}
resources {
cpu = 20
memory = 20
cpu = 20
memory = 20
}
}
}
@@ -30,9 +31,9 @@ job "deployment_auto.nomad" {
count = 3
update {
max_parallel = 2
auto_promote = true
canary = 2
max_parallel = 2
auto_promote = true
canary = 2
min_healthy_time = "2s"
}
@@ -40,14 +41,15 @@ job "deployment_auto.nomad" {
driver = "raw_exec"
config {
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["2000001"]
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["2000001"]
}
resources {
cpu = 20
memory = 20
cpu = 20
memory = 20
}
}
}

View File

@@ -1,23 +1,27 @@
job "fabio" {
datacenters = ["dc1", "dc2"]
type = "system"
type = "system"
group "fabio" {
task "fabio" {
driver = "docker"
config {
image = "fabiolb/fabio"
image = "fabiolb/fabio"
network_mode = "host"
}
resources {
cpu = 100
memory = 64
network {
mbits = 20
port "lb" {
static = 9999
}
port "ui" {
static = 9998
}

View File

@@ -1,27 +1,33 @@
job "cpustress" {
datacenters = ["dc1", "dc2"]
type = "batch"
type = "batch"
group "cpustress" {
count = 1
restart {
mode = "fail"
mode = "fail"
attempts = 0
}
reschedule {
attempts = 3
interval = "10m"
attempts = 3
interval = "10m"
unlimited = false
}
task "cpustress" {
driver = "docker"
config {
image = "progrium/stress"
args = [
"-c", "4",
"-t", "600"
]
args = [
"-c",
"4",
"-t",
"600",
]
}
resources {
@@ -30,4 +36,4 @@ job "cpustress" {
}
}
}
}
}

View File

@@ -1,32 +1,39 @@
job "diskstress" {
datacenters = ["dc1", "dc2"]
type = "batch"
type = "batch"
group "diskstress" {
count = 1
restart {
mode = "fail"
mode = "fail"
attempts = 0
}
reschedule {
attempts = 3
interval = "10m"
attempts = 3
interval = "10m"
unlimited = false
}
task "diskstress" {
driver = "docker"
config {
image = "progrium/stress"
args = [
"-d", "2",
"-t", "30"
]
args = [
"-d",
"2",
"-t",
"30",
]
}
resources {
cpu = 4096
memory = 256
}
}
}
}
}

View File

@@ -2,13 +2,12 @@ job "hello" {
datacenters = ["dc1"]
update {
max_parallel = 1
max_parallel = 1
min_healthy_time = "15s"
auto_revert = true
auto_revert = true
}
group "hello" {
count = 3
task "hello" {
@@ -19,17 +18,18 @@ job "hello" {
}
artifact {
source = "https://nomad-community-demo.s3.amazonaws.com/hellov1"
source = "https://nomad-community-demo.s3.amazonaws.com/hellov1"
destination = "local/hello"
mode = "file"
mode = "file"
}
resources {
cpu = 500
memory = 256
network {
mbits = 10
port "web" {}
port "web" {}
}
}
@@ -37,6 +37,7 @@ job "hello" {
name = "hello"
tags = ["urlprefix-hello/"]
port = "web"
check {
name = "alive"
type = "http"

View File

@@ -1,32 +1,39 @@
job "memstress" {
datacenters = ["dc1", "dc2"]
type = "batch"
type = "batch"
group "memstress" {
count = 1
restart {
mode = "fail"
mode = "fail"
attempts = 0
}
reschedule {
attempts = 3
interval = "10m"
attempts = 3
interval = "10m"
unlimited = false
}
task "memstress" {
driver = "docker"
config {
image = "progrium/stress"
args = [
"-m", "2",
"-t", "120"
]
args = [
"-m",
"2",
"-t",
"120",
]
}
resources {
cpu = 4096
memory = 1024
}
}
}
}
}

View File

@@ -3,26 +3,31 @@ job "redis" {
group "cache" {
count = 4
update {
max_parallel = 1
min_healthy_time = "5s"
healthy_deadline = "30s"
max_parallel = 1
min_healthy_time = "5s"
healthy_deadline = "30s"
progress_deadline = "1m"
}
restart {
mode = "fail"
mode = "fail"
attempts = 0
}
reschedule {
attempts = 3
interval = "10m"
attempts = 3
interval = "10m"
unlimited = false
}
task "redis" {
driver = "docker"
config {
image = "redis:4.0"
port_map {
db = 6379
}
@@ -31,9 +36,10 @@ job "redis" {
resources {
cpu = 500
memory = 256
network {
mbits = 10
port "db" {}
port "db" {}
}
}
@@ -41,6 +47,7 @@ job "redis" {
name = "redis-cache"
tags = ["global", "cache"]
port = "db"
check {
name = "alive"
type = "tcp"
@@ -50,4 +57,4 @@ job "redis" {
}
}
}
}
}

View File

@@ -1,9 +1,8 @@
job "nginx" {
datacenters = ["dc1"]
type = "system"
type = "system"
group "simpleweb" {
update {
stagger = "5s"
max_parallel = 1
@@ -29,8 +28,7 @@ job "nginx" {
network {
mbits = 1
port "http" {
}
port "http"{}
}
}
@@ -49,4 +47,3 @@ job "nginx" {
}
}
}

View File

@@ -16,4 +16,3 @@ job "sleep" {
}
}
}

View File

@@ -7,7 +7,7 @@ job "sleep" {
config {
command = "sleep"
args = ["10000"]
args = ["10000"]
}
resources {
@@ -17,4 +17,3 @@ job "sleep" {
}
}
}

View File

@@ -7,7 +7,7 @@ job "sleep" {
config {
command = "sleep"
args = ["10000"]
args = ["10000"]
}
resources {
@@ -17,4 +17,3 @@ job "sleep" {
}
}
}

View File

@@ -6,9 +6,9 @@ job "nomadexec-docker" {
driver = "docker"
config {
image = "busybox:1.29.2"
image = "busybox:1.29.2"
command = "/bin/sleep"
args = ["1000"]
args = ["1000"]
}
resources {
@@ -17,4 +17,4 @@ job "nomadexec-docker" {
}
}
}
}
}

View File

@@ -1,15 +1,17 @@
job "prometheus" {
datacenters = ["dc1", "dc2"]
type = "service"
type = "service"
group "monitoring" {
count = 1
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
delay = "15s"
mode = "fail"
}
ephemeral_disk {
size = 300
}
@@ -18,6 +20,7 @@ job "prometheus" {
template {
change_mode = "noop"
destination = "local/prometheus.yml"
data = <<EOH
---
global:
@@ -43,26 +46,33 @@ scrape_configs:
format: ['prometheus']
EOH
}
driver = "docker"
config {
image = "prom/prometheus:latest"
volumes = [
"local/prometheus.yml:/etc/prometheus/prometheus.yml"
"local/prometheus.yml:/etc/prometheus/prometheus.yml",
]
port_map {
prometheus_ui = 9090
}
}
resources {
network {
mbits = 10
port "prometheus_ui" {}
port "prometheus_ui"{}
}
}
service {
name = "prometheus"
tags = ["urlprefix-/"]
port = "prometheus_ui"
check {
name = "prometheus_ui port alive"
type = "http"

View File

@@ -1,13 +1,15 @@
job "r1" {
datacenters = ["dc1", "dc2"]
type = "service"
type = "service"
group "r1" {
count = 6
spread {
attribute ="${node.datacenter}"
weight = 100
attribute = "${node.datacenter}"
weight = 100
}
task "test" {
driver = "raw_exec"
@@ -17,4 +19,4 @@ job "r1" {
}
}
}
}
}

View File

@@ -1,22 +1,28 @@
job "r1" {
datacenters = ["dc1", "dc2"]
type = "service"
type = "service"
spread {
attribute ="${node.datacenter}"
weight = 100
attribute = "${node.datacenter}"
weight = 100
}
group "test1" {
count = 10
spread {
attribute ="${meta.rack}"
weight = 100
attribute = "${meta.rack}"
weight = 100
target "r1" {
percent = 70
}
target "r2" {
percent = 30
}
}
task "test" {
driver = "raw_exec"
@@ -26,4 +32,4 @@ job "r1" {
}
}
}
}
}

View File

@@ -1,15 +1,16 @@
job "completed_leader" {
type = "batch"
datacenters = ["dc1"]
type = "batch"
datacenters = ["dc1"]
group "completed_leader" {
restart {
attempts = 0
}
# Only the task named the same as the job has its events tested.
task "completed_leader" {
driver = "raw_exec"
config {
command = "sleep"
args = ["1000"]
@@ -19,6 +20,7 @@ job "completed_leader" {
task "leader" {
leader = true
driver = "raw_exec"
config {
command = "sleep"
args = ["1"]

View File

@@ -1,14 +1,15 @@
job "failed_batch" {
type = "batch"
datacenters = ["dc1"]
type = "batch"
datacenters = ["dc1"]
group "failed_batch" {
restart {
attempts = 0
}
task "failed_batch" {
driver = "raw_exec"
config {
command = "SomeInvalidCommand"
}

View File

@@ -1,15 +1,16 @@
job "failed_sibling" {
type = "service"
datacenters = ["dc1"]
type = "service"
datacenters = ["dc1"]
group "failed_sibling" {
restart {
attempts = 0
}
# Only the task named the same as the job has its events tested.
task "failed_sibling" {
driver = "raw_exec"
config {
command = "sleep"
args = ["1000"]
@@ -18,6 +19,7 @@ job "failed_sibling" {
task "failure" {
driver = "raw_exec"
config {
command = "/bin/sh"
args = ["-c", "sleep 1 && exit 99"]

View File

@@ -1,9 +1,10 @@
job "simple_batch" {
type = "batch"
datacenters = ["dc1"]
type = "batch"
datacenters = ["dc1"]
task "simple_batch" {
driver = "raw_exec"
config {
command = "sleep"
args = ["1"]

View File

@@ -1,142 +1,97 @@
data "template_file" "user_data_server" {
template = "${file("${path.root}/user-data-server.sh")}"
vars {
server_count = "${var.server_count}"
region = "${var.region}"
retry_join = "${var.retry_join}"
}
}
data "template_file" "user_data_client" {
template = "${file("${path.root}/user-data-client.sh")}"
count = "${var.client_count}"
vars {
region = "${var.region}"
retry_join = "${var.retry_join}"
}
}
data "template_file" "nomad_client_config" {
template = "${file("${path.root}/configs/client.hcl")}"
}
data "template_file" "nomad_server_config" {
template = "}"
}
resource "aws_instance" "server" {
ami = "${data.aws_ami.main.image_id}"
instance_type = "${var.instance_type}"
key_name = "${module.keys.key_name}"
vpc_security_group_ids = ["${aws_security_group.primary.id}"]
count = "${var.server_count}"
ami = data.aws_ami.main.image_id
instance_type = var.instance_type
key_name = module.keys.key_name
vpc_security_group_ids = [aws_security_group.primary.id]
count = var.server_count
# Instance tags
tags {
tags = {
Name = "${local.random_name}-server-${count.index}"
ConsulAutoJoin = "auto-join"
SHA = "${var.nomad_sha}"
User = "${data.aws_caller_identity.current.arn}"
SHA = var.nomad_sha
User = data.aws_caller_identity.current.arn
}
user_data = "${data.template_file.user_data_server.rendered}"
iam_instance_profile = "${aws_iam_instance_profile.instance_profile.name}"
iam_instance_profile = aws_iam_instance_profile.instance_profile.name
# copy up all provisioning scripts and configs
provisioner "file" {
content = "${file("${path.root}/configs/${var.indexed == false ? "server.hcl" : "indexed/server-${count.index}.hcl"}")}"
destination = "/tmp/server.hcl"
source = "shared/"
destination = "/ops/shared"
connection {
host = coalesce(self.public_ip, self.private_ip)
type = "ssh"
user = "ubuntu"
private_key = "${module.keys.private_key_pem}"
private_key = module.keys.private_key_pem
}
}
provisioner "remote-exec" {
inline = [
"aws s3 cp s3://nomad-team-test-binary/builds-oss/${var.nomad_sha}.tar.gz nomad.tar.gz",
"sudo cp /ops/shared/config/nomad.service /etc/systemd/system/nomad.service",
"sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/",
"sudo cp /tmp/server.hcl /etc/nomad.d/nomad.hcl",
"sudo chmod 0755 /usr/local/bin/nomad",
"sudo chown root:root /usr/local/bin/nomad",
"sudo systemctl enable nomad.service",
"sudo systemctl start nomad.service",
"chmod +x /ops/shared/config/provision-server.sh",
"/ops/shared/config/provision-server.sh aws ${var.server_count} '${var.nomad_sha}' '${var.indexed == false ? "server.hcl" : "indexed/server-${count.index}.hcl"}'",
]
connection {
host = coalesce(self.public_ip, self.private_ip)
type = "ssh"
user = "ubuntu"
private_key = "${module.keys.private_key_pem}"
private_key = module.keys.private_key_pem
}
}
}
resource "aws_instance" "client" {
ami = "${data.aws_ami.main.image_id}"
instance_type = "${var.instance_type}"
key_name = "${module.keys.key_name}"
vpc_security_group_ids = ["${aws_security_group.primary.id}"]
count = "${var.client_count}"
depends_on = ["aws_instance.server"]
ami = data.aws_ami.main.image_id
instance_type = var.instance_type
key_name = module.keys.key_name
vpc_security_group_ids = [aws_security_group.primary.id]
count = var.client_count
depends_on = [aws_instance.server]
# Instance tags
tags {
tags = {
Name = "${local.random_name}-client-${count.index}"
ConsulAutoJoin = "auto-join"
SHA = "${var.nomad_sha}"
User = "${data.aws_caller_identity.current.arn}"
SHA = var.nomad_sha
User = data.aws_caller_identity.current.arn
}
ebs_block_device = {
ebs_block_device {
device_name = "/dev/xvdd"
volume_type = "gp2"
volume_size = "50"
delete_on_termination = "true"
}
user_data = "${element(data.template_file.user_data_client.*.rendered, count.index)}"
iam_instance_profile = "${aws_iam_instance_profile.instance_profile.name}"
iam_instance_profile = aws_iam_instance_profile.instance_profile.name
# copy up all provisioning scripts and configs
provisioner "file" {
content = "${file("${path.root}/configs/${var.indexed == false ? "client.hcl" : "indexed/client-${count.index}.hcl"}")}"
destination = "/tmp/client.hcl"
source = "shared/"
destination = "/ops/shared"
connection {
host = coalesce(self.public_ip, self.private_ip)
type = "ssh"
user = "ubuntu"
private_key = "${module.keys.private_key_pem}"
private_key = module.keys.private_key_pem
}
}
provisioner "remote-exec" {
inline = [
"aws s3 cp s3://nomad-team-test-binary/builds-oss/${var.nomad_sha}.tar.gz nomad.tar.gz",
"sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/",
"sudo cp /ops/shared/config/nomad.service /etc/systemd/system/nomad.service",
"sudo cp /tmp/client.hcl /etc/nomad.d/nomad.hcl",
"sudo chmod 0755 /usr/local/bin/nomad",
"sudo chown root:root /usr/local/bin/nomad",
# Setup Host Volumes
"sudo mkdir /tmp/data",
# Run Nomad Service
"sudo systemctl enable nomad.service",
"sudo systemctl start nomad.service",
# Install CNI plugins
"sudo mkdir -p /opt/cni/bin",
"wget -q -O - https://github.com/containernetworking/plugins/releases/download/v0.8.2/cni-plugins-linux-amd64-v0.8.2.tgz | sudo tar -C /opt/cni/bin -xz",
"chmod +x /ops/shared/config/provision-client.sh",
"/ops/shared/config/provision-client.sh aws '${var.nomad_sha}' '${var.indexed == false ? "client.hcl" : "indexed/client-${count.index}.hcl"}'",
]
# Setup host volumes
connection {
host = coalesce(self.public_ip, self.private_ip)
type = "ssh"
user = "ubuntu"
private_key = "${module.keys.private_key_pem}"
private_key = module.keys.private_key_pem
}
}
}

View File

@@ -1,42 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
# Enable the client
client {
enabled = true
options {
"driver.raw_exec.enable" = "1"
"docker.privileged.enabled" = "true"
}
meta {
"rack" = "r1"
}
host_volume "shared_data" {
path = "/tmp/data"
}
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,40 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
datacenter = "dc2"
# Enable the client
client {
enabled = true
options {
"driver.raw_exec.enable" = "1"
"docker.privileged.enabled" = "true"
}
meta {
"rack" = "r1"
}
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,40 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
datacenter = "dc2"
# Enable the client
client {
enabled = true
options {
"driver.raw_exec.enable" = "1"
"docker.privileged.enabled" = "true"
}
meta {
"rack" = "r2"
}
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,33 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
# Enable the server
server {
enabled = true
bootstrap_expect = 3
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = false
address = "http://active.vault.service.consul:8200"
task_token_ttl = "1h"
create_from_role = "nomad-cluster"
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,33 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
# Enable the server
server {
enabled = true
bootstrap_expect = 3
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = false
address = "http://active.vault.service.consul:8200"
task_token_ttl = "1h"
create_from_role = "nomad-cluster"
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,33 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
# Enable the server
server {
enabled = true
bootstrap_expect = 3
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = false
address = "http://active.vault.service.consul:8200"
task_token_ttl = "1h"
create_from_role = "nomad-cluster"
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,33 +0,0 @@
enable_debug = true
log_level = "debug"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
# Enable the server
server {
enabled = true
bootstrap_expect = 3
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = false
address = "http://active.vault.service.consul:8200"
task_token_ttl = "1h"
create_from_role = "nomad-cluster"
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@@ -1,11 +1,11 @@
resource "aws_iam_instance_profile" "instance_profile" {
name_prefix = "${local.random_name}"
role = "${aws_iam_role.instance_role.name}"
name_prefix = local.random_name
role = aws_iam_role.instance_role.name
}
resource "aws_iam_role" "instance_role" {
name_prefix = "${local.random_name}"
assume_role_policy = "${data.aws_iam_policy_document.instance_role.json}"
name_prefix = local.random_name
assume_role_policy = data.aws_iam_policy_document.instance_role.json
}
data "aws_iam_policy_document" "instance_role" {
@@ -22,8 +22,8 @@ data "aws_iam_policy_document" "instance_role" {
resource "aws_iam_role_policy" "auto_discover_cluster" {
name = "auto-discover-cluster"
role = "${aws_iam_role.instance_role.id}"
policy = "${data.aws_iam_policy_document.auto_discover_cluster.json}"
role = aws_iam_role.instance_role.id
policy = data.aws_iam_policy_document.auto_discover_cluster.json
}
# Note: Overloading this instance profile to access
@@ -55,10 +55,11 @@ data "aws_iam_policy_document" "auto_discover_cluster" {
effect = "Allow"
actions = [
"s3:PutObject",
"s3:GetObject",
"s3:DeleteObject"
"s3:PutObject",
"s3:GetObject",
"s3:DeleteObject",
]
resources = ["arn:aws:s3:::nomad-team-test-binary/*"]
}
}

View File

@@ -28,20 +28,16 @@ variable "client_count" {
default = "4"
}
variable "retry_join" {
description = "Used by Consul to automatically form a cluster."
default = "provider=aws tag_key=ConsulAutoJoin tag_value=auto-join"
}
variable "nomad_sha" {
description = "The sha of Nomad to run"
}
provider "aws" {
region = "${var.region}"
region = var.region
}
resource "random_pet" "e2e" {}
resource "random_pet" "e2e" {
}
locals {
random_name = "${var.name}-${random_pet.e2e.id}"
@@ -49,10 +45,10 @@ locals {
# Generates keys to use for provisioning and access
module "keys" {
name = "${local.random_name}"
path = "${path.root}/keys"
source = "mitchellh/dynamic-keys/aws"
version = "v1.0.0"
name = local.random_name
path = "${path.root}/keys"
source = "mitchellh/dynamic-keys/aws"
version = "v2.0.0"
}
data "aws_ami" "main" {
@@ -68,17 +64,17 @@ data "aws_ami" "main" {
name = "tag:OS"
values = ["Ubuntu"]
}
}
data "aws_caller_identity" "current" {}
data "aws_caller_identity" "current" {
}
output "servers" {
value = "${aws_instance.server.*.public_ip}"
value = aws_instance.server.*.public_ip
}
output "clients" {
value = "${aws_instance.client.*.public_ip}"
value = aws_instance.client.*.public_ip
}
output "message" {
@@ -87,8 +83,8 @@ Your cluster has been provisioned! - To prepare your environment, run the
following:
```
export NOMAD_ADDR=http://${aws_instance.client.0.public_ip}:4646
export CONSUL_HTTP_ADDR=http://${aws_instance.client.0.public_ip}:8500
export NOMAD_ADDR=http://${aws_instance.client[0].public_ip}:4646
export CONSUL_HTTP_ADDR=http://${aws_instance.client[0].public_ip}:8500
export NOMAD_E2E=1
```
@@ -100,7 +96,8 @@ go test -v ./e2e
ssh into nodes with:
```
ssh -i keys/${local.random_name}.pem ubuntu@${aws_instance.client.0.public_ip}
ssh -i keys/${local.random_name}.pem ubuntu@${aws_instance.client[0].public_ip}
```
EOM
}

View File

@@ -3,8 +3,8 @@ data "aws_vpc" "default" {
}
resource "aws_security_group" "primary" {
name = "${local.random_name}"
vpc_id = "${data.aws_vpc.default.id}"
name = local.random_name
vpc_id = data.aws_vpc.default.id
ingress {
from_port = 22

View File

@@ -1,31 +0,0 @@
{
"builders": [{
"type": "amazon-ebs",
"region": "us-east-1",
"source_ami": "ami-80861296",
"instance_type": "t2.medium",
"ssh_username": "ubuntu",
"ami_name": "nomad-e2e-{{timestamp}}",
"ami_groups": ["all"],
"tags": {
"OS": "Ubuntu"
}
}],
"provisioners": [
{
"type": "shell",
"inline": [
"sudo mkdir /ops",
"sudo chmod 777 /ops"
]
},
{
"type": "file",
"source": "shared",
"destination": "/ops"
},
{
"type": "shell",
"script": "shared/scripts/setup.sh"
}]
}

View File

@@ -0,0 +1,21 @@
# Packer Builds
These builds are run as-needed to update the AMIs used by the end-to-end test infrastructure.
## What goes here?
* steps that aren't specific to a given Nomad build: ex. all Linux instances need `jq` and `awscli`.
* steps that aren't specific to a given EC2 instance: nothing that includes an IP address.
* steps that infrequently change: the version of Consul or Vault we ship.
## Running Packer builds
```sh
$ packer --version
1.4.4
# build linux AMI
$ packer build packer.json
```

View File

@@ -5,9 +5,10 @@ set -e
# Disable interactive apt prompts
export DEBIAN_FRONTEND=noninteractive
cd /ops
sudo mkdir -p /ops/shared
sudo chown -R ubuntu:ubuntu /ops/shared
CONFIGDIR=/ops/shared/config
cd /ops
CONSULVERSION=1.6.0
CONSULDOWNLOAD=https://releases.hashicorp.com/consul/${CONSULVERSION}/consul_${CONSULVERSION}_linux_amd64.zip
@@ -37,13 +38,18 @@ sudo apt-get install -y python-setuptools
sudo easy_install pip
sudo pip install numpy
# Disable the firewall
# Install sockaddr
aws s3 cp "s3://nomad-team-test-binary/tools/sockaddr_linux_amd64" /tmp/sockaddr
sudo mv /tmp/sockaddr /usr/local/bin
sudo chmod +x /usr/local/bin/sockaddr
sudo chown root:root /usr/local/bin/sockaddr
# Disable the firewall
sudo ufw disable || echo "ufw not installed"
echo "Install Consul"
curl -L $CONSULDOWNLOAD > consul.zip
sudo unzip consul.zip -d /usr/local/bin
curl -L -o /tmp/consul.zip $CONSULDOWNLOAD
sudo unzip /tmp/consul.zip -d /usr/local/bin
sudo chmod 0755 /usr/local/bin/consul
sudo chown root:root /usr/local/bin/consul
@@ -54,8 +60,8 @@ sudo mkdir -p $CONSULDIR
sudo chmod 755 $CONSULDIR
echo "Install Vault"
curl -L $VAULTDOWNLOAD > vault.zip
sudo unzip vault.zip -d /usr/local/bin
curl -L -o /tmp/vault.zip $VAULTDOWNLOAD
sudo unzip /tmp/vault.zip -d /usr/local/bin
sudo chmod 0755 /usr/local/bin/vault
sudo chown root:root /usr/local/bin/vault
@@ -66,8 +72,8 @@ sudo mkdir -p $VAULTDIR
sudo chmod 755 $VAULTDIR
echo "Install Nomad"
curl -L $NOMADDOWNLOAD > nomad.zip
sudo unzip nomad.zip -d /usr/local/bin
curl -L -o /tmp/nomad.zip $NOMADDOWNLOAD
sudo unzip /tmp/nomad.zip -d /usr/local/bin
sudo chmod 0755 /usr/local/bin/nomad
sudo chown root:root /usr/local/bin/nomad

View File

@@ -0,0 +1,25 @@
{
"builders": [
{
"type": "amazon-ebs",
"region": "us-east-1",
"source_ami": "ami-80861296",
"instance_type": "t2.medium",
"ssh_username": "ubuntu",
"iam_instance_profile": "packer-builder",
"ami_name": "nomad-e2e-{{timestamp}}",
"ami_groups": [
"all"
],
"tags": {
"OS": "Ubuntu"
}
}
],
"provisioners": [
{
"type": "shell",
"script": "./linux/setup.sh"
}
]
}

View File

@@ -0,0 +1,10 @@
# Terraform Provisioning
These scripts are copied up to instances via Terraform provisioning and executed after launch. This allows us to update the Nomad configurations for features that land on master without having to re-bake AMIs.
## What goes here?
* steps that are specific to a given Nomad build: ex. all Nomad configuration files.
* steps that are specific to a given EC2 instance: configuring IP addresses.
These scripts *should* be idempotent: copy configurations from `/ops/shared` to their destinations where the services expect them to be, rather than moving them.

View File

@@ -0,0 +1,83 @@
#!/bin/bash
# installs and configures the desired build of Nomad as a server
set -o errexit
set -o nounset
CONFIGDIR=/ops/shared/config
HADOOP_VERSION=hadoop-2.7.7
HADOOPCONFIGDIR=/usr/local/$HADOOP_VERSION/etc/hadoop
HOME_DIR=ubuntu
IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetPrivateIP')
DOCKER_BRIDGE_IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetInterfaceIP "docker0"')
CLOUD="$1"
NOMAD_SHA="$2"
NOMAD_CONFIG="$3"
# Consul
CONSUL_SRC=/ops/shared/consul
CONSUL_DEST=/etc/consul.d
sudo cp "$CONSUL_SRC/base.json" "$CONSUL_DEST/"
sudo cp "$CONSUL_SRC/retry_$CLOUD.json" "$CONSUL_DEST/"
sudo cp "$CONSUL_SRC/consul_$CLOUD.service" /etc/systemd/system/consul.service
sudo systemctl enable consul.service
sudo systemctl start consul.service
sleep 10
# Add hostname to /etc/hosts
echo "127.0.0.1 $(hostname)" | sudo tee --append /etc/hosts
# Add Docker bridge network IP to /etc/resolv.conf (at the top)
echo "nameserver $DOCKER_BRIDGE_IP_ADDRESS" | sudo tee /etc/resolv.conf.new
cat /etc/resolv.conf | sudo tee --append /etc/resolv.conf.new
sudo mv /etc/resolv.conf.new /etc/resolv.conf
# Hadoop config file to enable HDFS CLI
sudo cp $CONFIGDIR/core-site.xml $HADOOPCONFIGDIR
# Move examples directory to $HOME
sudo mv /ops/examples /home/$HOME_DIR
sudo chown -R $HOME_DIR:$HOME_DIR /home/$HOME_DIR/examples
sudo chmod -R 775 /home/$HOME_DIR/examples
# Set env vars for tool CLIs
echo "export NOMAD_ADDR=http://$IP_ADDRESS:4646" | sudo tee --append /home/$HOME_DIR/.bashrc
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre" | sudo tee --append /home/$HOME_DIR/.bashrc
# Update PATH
echo "export PATH=$PATH:/usr/local/bin/spark/bin:/usr/local/$HADOOP_VERSION/bin" | sudo tee --append /home/$HOME_DIR/.bashrc
# Nomad
NOMAD_SRC=/ops/shared/nomad
NOMAD_DEST=/etc/nomad.d
NOMAD_CONFIG_FILENAME=$(basename "$NOMAD_CONFIG")
# download
aws s3 cp "s3://nomad-team-test-binary/builds-oss/nomad_linux_amd64_${NOMAD_SHA}.tar.gz" nomad.tar.gz
# unpack and install
sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/
sudo chmod 0755 /usr/local/bin/nomad
sudo chown root:root /usr/local/bin/nomad
sudo cp "$NOMAD_SRC/base.hcl" "$NOMAD_DEST/"
sudo cp "$NOMAD_SRC/$NOMAD_CONFIG" "$NOMAD_DEST/$NOMAD_CONFIG_FILENAME"
# Setup Host Volumes
sudo mkdir /tmp/data
# Install CNI plugins
sudo mkdir -p /opt/cni/bin
wget -q -O - \
https://github.com/containernetworking/plugins/releases/download/v0.8.2/cni-plugins-linux-amd64-v0.8.2.tgz \
| sudo tar -C /opt/cni/bin -xz
# enable as a systemd service
sudo cp "$NOMAD_SRC/nomad.service" /etc/systemd/system/nomad.service
sudo systemctl enable nomad.service
sudo systemctl start nomad.service

View File

@@ -0,0 +1,96 @@
#!/bin/bash
# installs and configures the desired build of Nomad as a server
set -o errexit
set -o nounset
CONFIGDIR=/ops/shared/config
HADOOP_VERSION=hadoop-2.7.7
HADOOPCONFIGDIR=/usr/local/$HADOOP_VERSION/etc/hadoop
HOME_DIR=ubuntu
IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetPrivateIP')
DOCKER_BRIDGE_IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetInterfaceIP "docker0"')
CLOUD="$1"
SERVER_COUNT="$2"
NOMAD_SHA="$3"
NOMAD_CONFIG="$4"
# Consul
CONSUL_SRC=/ops/shared/consul
CONSUL_DEST=/etc/consul.d
sed "s/SERVER_COUNT/$SERVER_COUNT/g" "$CONSUL_SRC/server.json" > /tmp/server.json
sudo mv /tmp/server.json "$CONSUL_DEST/server.json"
sudo cp "$CONSUL_SRC/base.json" "$CONSUL_DEST/"
sudo cp "$CONSUL_SRC/retry_$CLOUD.json" "$CONSUL_DEST/"
sudo cp "$CONSUL_SRC/consul_$CLOUD.service" /etc/systemd/system/consul.service
sudo systemctl enable consul.service
sudo systemctl start consul.service
sleep 10
export CONSUL_HTTP_ADDR=$IP_ADDRESS:8500
export CONSUL_RPC_ADDR=$IP_ADDRESS:8400
# Vault
VAULT_SRC=/ops/shared/vault
VAULT_DEST=/etc/vault.d
sudo cp "$VAULT_SRC/vault.hcl" "$VAULT_DEST"
sudo cp "$VAULT_SRC/vault.service" /etc/systemd/system/vault.service
sudo systemctl enable vault.service
sudo systemctl start vault.service
# Add hostname to /etc/hosts
echo "127.0.0.1 $(hostname)" | sudo tee --append /etc/hosts
# Add Docker bridge network IP to /etc/resolv.conf (at the top)
echo "nameserver $DOCKER_BRIDGE_IP_ADDRESS" | sudo tee /etc/resolv.conf.new
cat /etc/resolv.conf | sudo tee --append /etc/resolv.conf.new
sudo mv /etc/resolv.conf.new /etc/resolv.conf
# Hadoop
sudo cp $CONFIGDIR/core-site.xml $HADOOPCONFIGDIR
# Move examples directory to $HOME
sudo mv /ops/examples /home/$HOME_DIR
sudo chown -R $HOME_DIR:$HOME_DIR /home/$HOME_DIR/examples
sudo chmod -R 775 /home/$HOME_DIR/examples
# Set env vars for tool CLIs
echo "export CONSUL_RPC_ADDR=$IP_ADDRESS:8400" | sudo tee --append /home/$HOME_DIR/.bashrc
echo "export CONSUL_HTTP_ADDR=$IP_ADDRESS:8500" | sudo tee --append /home/$HOME_DIR/.bashrc
echo "export VAULT_ADDR=http://$IP_ADDRESS:8200" | sudo tee --append /home/$HOME_DIR/.bashrc
echo "export NOMAD_ADDR=http://$IP_ADDRESS:4646" | sudo tee --append /home/$HOME_DIR/.bashrc
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre" | sudo tee --append /home/$HOME_DIR/.bashrc
# Update PATH
echo "export PATH=$PATH:/usr/local/bin/spark/bin:/usr/local/$HADOOP_VERSION/bin" | sudo tee --append /home/$HOME_DIR/.bashrc
# Nomad
NOMAD_SRC=/ops/shared/nomad
NOMAD_DEST=/etc/nomad.d
NOMAD_CONFIG_FILENAME=$(basename "$NOMAD_CONFIG")
# download
aws s3 cp "s3://nomad-team-test-binary/builds-oss/nomad_linux_amd64_${NOMAD_SHA}.tar.gz" nomad.tar.gz
# unpack and install
sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/
sudo chmod 0755 /usr/local/bin/nomad
sudo chown root:root /usr/local/bin/nomad
sudo cp "$NOMAD_SRC/base.hcl" "$NOMAD_DEST/"
sed "s/3 # SERVER_COUNT/$SERVER_COUNT/g" "$NOMAD_SRC/$NOMAD_CONFIG" \
> "/tmp/$NOMAD_CONFIG_FILENAME"
sudo mv "/tmp/$NOMAD_CONFIG_FILENAME" "$NOMAD_DEST/$NOMAD_CONFIG_FILENAME"
# enable as a systemd service
sudo cp "$NOMAD_SRC/nomad.service" /etc/systemd/system/nomad.service
sudo systemctl enable nomad.service
sudo systemctl start nomad.service

View File

@@ -1,12 +0,0 @@
backend "consul" {
path = "vault/"
address = "IP_ADDRESS:8500"
cluster_addr = "https://IP_ADDRESS:8201"
redirect_addr = "http://IP_ADDRESS:8200"
}
listener "tcp" {
address = "IP_ADDRESS:8200"
cluster_address = "IP_ADDRESS:8201"
tls_disable = 1
}

View File

@@ -0,0 +1,13 @@
{
"log_level": "INFO",
"data_dir": "/opt/consul/data",
"bind_addr": "0.0.0.0",
"client_addr": "0.0.0.0",
"advertise_addr": "{{ GetPrivateIP }}",
"connect": {
"enabled": true
},
"ports": {
"grpc": 8502
}
}

Some files were not shown because too many files have changed in this diff Show More