mirror of
https://github.com/kemko/nomad.git
synced 2026-01-02 00:15:43 +03:00
Merge branch 'master' into release-0101
This commit is contained in:
@@ -61,8 +61,6 @@ workflows:
|
||||
test_packages: "./client/fingerprint"
|
||||
goarch: "386"
|
||||
<<: *IGNORE_FOR_UI_BRANCHES
|
||||
- test-rkt:
|
||||
<<: *IGNORE_FOR_UI_BRANCHES
|
||||
- test-e2e:
|
||||
<<: *IGNORE_FOR_UI_BRANCHES
|
||||
- test-ui
|
||||
@@ -178,32 +176,6 @@ jobs:
|
||||
- checkout
|
||||
- run: make test-website
|
||||
|
||||
test-rkt:
|
||||
executor: go-machine-recent
|
||||
environment:
|
||||
<<: *COMMON_ENVS
|
||||
GOTEST_PKGS: "./drivers/rkt"
|
||||
GOPATH: /home/circleci/go
|
||||
RKT_VERSION: 1.29.0
|
||||
steps:
|
||||
- checkout
|
||||
- install-golang
|
||||
- install-protoc
|
||||
- run:
|
||||
name: install rkt
|
||||
command: |
|
||||
gpg --recv-key 18AD5014C99EF7E3BA5F6CE950BDD3E0FC8A365E
|
||||
wget https://github.com/rkt/rkt/releases/download/v$RKT_VERSION/rkt_$RKT_VERSION-1_amd64.deb
|
||||
wget https://github.com/rkt/rkt/releases/download/v$RKT_VERSION/rkt_$RKT_VERSION-1_amd64.deb.asc
|
||||
gpg --verify rkt_$RKT_VERSION-1_amd64.deb.asc
|
||||
sudo dpkg -i rkt_$RKT_VERSION-1_amd64.deb
|
||||
- run: PATH="$GOPATH/bin:/usr/local/go/bin:$PATH" make bootstrap
|
||||
- run-tests
|
||||
- store_test_results:
|
||||
path: /tmp/test-reports
|
||||
- store_artifacts:
|
||||
path: /tmp/test-reports
|
||||
|
||||
test-machine:
|
||||
executor: "<< parameters.executor >>"
|
||||
parameters:
|
||||
@@ -326,7 +298,7 @@ commands:
|
||||
parameters:
|
||||
version:
|
||||
type: string
|
||||
default: 1.0.0
|
||||
default: 1.2.3
|
||||
steps:
|
||||
- run:
|
||||
name: Install Vault << parameters.version >>
|
||||
|
||||
2
.github/ISSUE_TEMPLATE.md
vendored
2
.github/ISSUE_TEMPLATE.md
vendored
@@ -1,3 +1,5 @@
|
||||
For reporting security vulnerabilities [please refer to the website.](https://www.nomadproject.io/security.html)
|
||||
|
||||
If you have a question, prepend your issue with `[question]` or preferably use the [nomad mailing list](https://www.nomadproject.io/community.html).
|
||||
|
||||
If filing a bug please include the following:
|
||||
|
||||
3
.github/SECURITY.md
vendored
Normal file
3
.github/SECURITY.md
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# Security Policy
|
||||
|
||||
Please see https://www.nomadproject.io/security.html
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -86,6 +86,8 @@ rkt-*
|
||||
/ui/libpeerconnection.log
|
||||
/ui/npm-debug.log*
|
||||
/ui/testem.log
|
||||
/ui/.env*
|
||||
/ui/.pnp*
|
||||
.ignore
|
||||
|
||||
# ember-try
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
## 0.10.2 (Unreleased)
|
||||
|
||||
IMPROVEMENTS:
|
||||
* client: Enable setting tags on Consul Connect sidecar service
|
||||
|
||||
BUG FIXES:
|
||||
|
||||
* cli: Make scoring column orders consistent `nomad alloc status` [[GH-6609](https://github.com/hashicorp/nomad/issues/6609)]
|
||||
|
||||
## 0.10.1 (November 4, 2019)
|
||||
|
||||
BUG FIXES:
|
||||
@@ -46,6 +53,8 @@ BUG FIXES:
|
||||
* client: Fixed failure to start if another client is already running with the same data directory [[GH-6348](https://github.com/hashicorp/nomad/pull/6348)]
|
||||
* devices: Fixed a bug causing CPU usage spike when a device is detected [[GH-6201](https://github.com/hashicorp/nomad/issues/6201)]
|
||||
* drivers/docker: Set gc image_delay default to 3 minutes [[GH-6078](https://github.com/hashicorp/nomad/pull/6078)]
|
||||
* ui: Fixed a bug where the allocation log viewer would render HTML or hide content that matched XML syntax [[GH-6048](https://github.com/hashicorp/nomad/issues/6048)]
|
||||
* ui: Fixed a bug where allocation log viewer doesn't show all content in Firefox [[GH-6466](https://github.com/hashicorp/nomad/issues/6466)]
|
||||
* ui: Fixed navigation via clicking recent allocation row [[GH-6087](https://github.com/hashicorp/nomad/pull/6087)]
|
||||
* ui: Fixed a bug where the allocation log viewer would render HTML or hide content that matched XML syntax [[GH-6048](https://github.com/hashicorp/nomad/issues/6048)]
|
||||
* ui: Fixed a bug where allocation log viewer doesn't show all content in Firefox [[GH-6466](https://github.com/hashicorp/nomad/issues/6466)]
|
||||
|
||||
20
GNUmakefile
20
GNUmakefile
@@ -6,7 +6,7 @@ GIT_COMMIT := $(shell git rev-parse HEAD)
|
||||
GIT_DIRTY := $(if $(shell git status --porcelain),+CHANGES)
|
||||
|
||||
GO_LDFLAGS := "-X github.com/hashicorp/nomad/version.GitCommit=$(GIT_COMMIT)$(GIT_DIRTY)"
|
||||
GO_TAGS ?=
|
||||
GO_TAGS ?= codegen_generated
|
||||
|
||||
GO_TEST_CMD = $(if $(shell which gotestsum),gotestsum --,go test)
|
||||
|
||||
@@ -147,6 +147,7 @@ deps: ## Install build and development dependencies
|
||||
go get -u github.com/a8m/tree/cmd/tree
|
||||
go get -u github.com/magiconair/vendorfmt/cmd/vendorfmt
|
||||
go get -u gotest.tools/gotestsum
|
||||
go get -u github.com/fatih/hclfmt
|
||||
@bash -C "$(PROJECT_ROOT)/scripts/install-codecgen.sh"
|
||||
@bash -C "$(PROJECT_ROOT)/scripts/install-protoc-gen-go.sh"
|
||||
|
||||
@@ -224,21 +225,30 @@ generate-examples: command/job_init.bindata_assetfs.go
|
||||
command/job_init.bindata_assetfs.go: command/assets/*
|
||||
go-bindata-assetfs -pkg command -o command/job_init.bindata_assetfs.go ./command/assets/...
|
||||
|
||||
.PHONY: vendorfmt
|
||||
vendorfmt:
|
||||
@echo "--> Formatting vendor/vendor.json"
|
||||
test -x $(GOPATH)/bin/vendorfmt || go get -u github.com/magiconair/vendorfmt/cmd/vendorfmt
|
||||
vendorfmt
|
||||
|
||||
.PHONY: changelogfmt
|
||||
changelogfmt:
|
||||
@echo "--> Making [GH-xxxx] references clickable..."
|
||||
@sed -E 's|([^\[])\[GH-([0-9]+)\]|\1[[GH-\2](https://github.com/hashicorp/nomad/issues/\2)]|g' CHANGELOG.md > changelog.tmp && mv changelog.tmp CHANGELOG.md
|
||||
|
||||
## We skip the terraform directory as there are templated hcl configurations
|
||||
## that do not successfully compile without rendering
|
||||
.PHONY: hclfmt
|
||||
hclfmt:
|
||||
@echo "--> Formatting HCL"
|
||||
@find . -path ./terraform -prune -o -name 'upstart.nomad' -prune -o \( -name '*.nomad' -o -name '*.hcl' \) -exec hclfmt -w {} +
|
||||
|
||||
.PHONY: dev
|
||||
dev: GOOS=$(shell go env GOOS)
|
||||
dev: GOARCH=$(shell go env GOARCH)
|
||||
dev: GOPATH=$(shell go env GOPATH)
|
||||
dev: DEV_TARGET=pkg/$(GOOS)_$(GOARCH)/nomad
|
||||
dev: vendorfmt changelogfmt ## Build for the current development platform
|
||||
dev: vendorfmt changelogfmt hclfmt ## Build for the current development platform
|
||||
@echo "==> Removing old development build..."
|
||||
@rm -f $(PROJECT_ROOT)/$(DEV_TARGET)
|
||||
@rm -f $(PROJECT_ROOT)/bin/nomad
|
||||
@@ -252,11 +262,11 @@ dev: vendorfmt changelogfmt ## Build for the current development platform
|
||||
@cp $(PROJECT_ROOT)/$(DEV_TARGET) $(GOPATH)/bin
|
||||
|
||||
.PHONY: prerelease
|
||||
prerelease: GO_TAGS=ui release
|
||||
prerelease: GO_TAGS=ui codegen_generated release
|
||||
prerelease: generate-all ember-dist static-assets ## Generate all the static assets for a Nomad release
|
||||
|
||||
.PHONY: release
|
||||
release: GO_TAGS=ui release
|
||||
release: GO_TAGS=ui codegen_generated release
|
||||
release: clean $(foreach t,$(ALL_TARGETS),pkg/$(t).zip) ## Build all release packages which can be built on this platform.
|
||||
@echo "==> Results:"
|
||||
@tree --dirsfirst $(PROJECT_ROOT)/pkg
|
||||
@@ -283,6 +293,7 @@ test-nomad: dev ## Run Nomad test suites
|
||||
$(if $(ENABLE_RACE),-race) $(if $(VERBOSE),-v) \
|
||||
-cover \
|
||||
-timeout=15m \
|
||||
-tags "$(GO_TAGS)" \
|
||||
$(GOTEST_PKGS) $(if $(VERBOSE), >test.log ; echo $$? > exit-code)
|
||||
@if [ $(VERBOSE) ] ; then \
|
||||
bash -C "$(PROJECT_ROOT)/scripts/test_check.sh" ; \
|
||||
@@ -295,6 +306,7 @@ e2e-test: dev ## Run the Nomad e2e test suite
|
||||
$(if $(ENABLE_RACE),-race) $(if $(VERBOSE),-v) \
|
||||
-cover \
|
||||
-timeout=900s \
|
||||
-tags "$(GO_TAGS)" \
|
||||
github.com/hashicorp/nomad/e2e/vault/ \
|
||||
-integration
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ Who Uses Nomad
|
||||
* [Nomad at Target: Scaling Microservices Across Public and Private Clouds](https://www.hashicorp.com/resources/nomad-scaling-target-microservices-across-cloud)
|
||||
* [Playing with Nomad from HashiCorp](https://danielparker.me/nomad/hashicorp/schedulers/nomad/)
|
||||
* Trivago
|
||||
* [Maybe You Don’t Need Kubernetes](https://matthias-endler.de/2019/maybe-you-dont-need-kubernetes/)
|
||||
* [Maybe You Don’t Need Kubernetes](https://endler.dev/2019/maybe-you-dont-need-kubernetes/)
|
||||
* [Nomad - Our Experiences and Best Practices](https://tech.trivago.com/2019/01/25/nomad-our-experiences-and-best-practices/)
|
||||
* Roblox
|
||||
* [How Roblox runs a platform for 70 million gamers on HashiCorp Nomad](https://portworx.com/architects-corner-roblox-runs-platform-70-million-gamers-hashicorp-nomad/)
|
||||
@@ -123,8 +123,6 @@ Who Uses Nomad
|
||||
* [Tech at N26 - The Bank in the Cloud](https://medium.com/insiden26/tech-at-n26-the-bank-in-the-cloud-e5ff818b528b)
|
||||
* Elsevier
|
||||
* [Eslevier’s Container Framework with Nomad, Terraform, and Consul](https://www.hashicorp.com/resources/elsevier-nomad-container-framework-demo)
|
||||
* Palantir
|
||||
* [Enterprise Security at Palantir with the HashiCorp stack](https://www.hashicorp.com/resources/enterprise-security-hashicorp-stack)
|
||||
* Graymeta
|
||||
* [Backend Batch Processing At Scale with Nomad](https://www.hashicorp.com/resources/backend-batch-processing-nomad)
|
||||
* NIH NCBI
|
||||
|
||||
@@ -210,12 +210,7 @@ func (a *Allocations) Exec(ctx context.Context,
|
||||
|
||||
func (a *Allocations) execFrames(ctx context.Context, alloc *Allocation, task string, tty bool, command []string,
|
||||
errCh chan<- error, q *QueryOptions) (sendFn func(*ExecStreamingInput) error, output <-chan *ExecStreamingOutput) {
|
||||
|
||||
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return nil, nil
|
||||
}
|
||||
nodeClient, _ := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
|
||||
if q == nil {
|
||||
q = &QueryOptions{}
|
||||
@@ -236,15 +231,17 @@ func (a *Allocations) execFrames(ctx context.Context, alloc *Allocation, task st
|
||||
|
||||
reqPath := fmt.Sprintf("/v1/client/allocation/%s/exec", alloc.ID)
|
||||
|
||||
conn, _, err := nodeClient.websocket(reqPath, q)
|
||||
if err != nil {
|
||||
// There was an error talking directly to the client. Non-network
|
||||
// errors are fatal, but network errors can attempt to route via RPC.
|
||||
if _, ok := err.(net.Error); !ok {
|
||||
var conn *websocket.Conn
|
||||
|
||||
if nodeClient != nil {
|
||||
conn, _, err = nodeClient.websocket(reqPath, q)
|
||||
if _, ok := err.(net.Error); err != nil && !ok {
|
||||
errCh <- err
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
if conn == nil {
|
||||
conn, _, err = a.client.websocket(reqPath, q)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
|
||||
179
api/fs.go
179
api/fs.go
@@ -92,72 +92,24 @@ func (a *AllocFS) Stat(alloc *Allocation, path string, q *QueryOptions) (*AllocF
|
||||
// ReadAt is used to read bytes at a given offset until limit at the given path
|
||||
// in an allocation directory. If limit is <= 0, there is no limit.
|
||||
func (a *AllocFS) ReadAt(alloc *Allocation, path string, offset int64, limit int64, q *QueryOptions) (io.ReadCloser, error) {
|
||||
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if q == nil {
|
||||
q = &QueryOptions{}
|
||||
}
|
||||
if q.Params == nil {
|
||||
q.Params = make(map[string]string)
|
||||
}
|
||||
|
||||
q.Params["path"] = path
|
||||
q.Params["offset"] = strconv.FormatInt(offset, 10)
|
||||
q.Params["limit"] = strconv.FormatInt(limit, 10)
|
||||
|
||||
reqPath := fmt.Sprintf("/v1/client/fs/readat/%s", alloc.ID)
|
||||
r, err := nodeClient.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
// There was a networking error when talking directly to the client.
|
||||
if _, ok := err.(net.Error); !ok {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Try via the server
|
||||
r, err = a.client.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return r, nil
|
||||
return queryClientNode(a.client, alloc, reqPath, q,
|
||||
func(q *QueryOptions) {
|
||||
q.Params["path"] = path
|
||||
q.Params["offset"] = strconv.FormatInt(offset, 10)
|
||||
q.Params["limit"] = strconv.FormatInt(limit, 10)
|
||||
})
|
||||
}
|
||||
|
||||
// Cat is used to read contents of a file at the given path in an allocation
|
||||
// directory
|
||||
func (a *AllocFS) Cat(alloc *Allocation, path string, q *QueryOptions) (io.ReadCloser, error) {
|
||||
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if q == nil {
|
||||
q = &QueryOptions{}
|
||||
}
|
||||
if q.Params == nil {
|
||||
q.Params = make(map[string]string)
|
||||
}
|
||||
|
||||
q.Params["path"] = path
|
||||
reqPath := fmt.Sprintf("/v1/client/fs/cat/%s", alloc.ID)
|
||||
r, err := nodeClient.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
// There was a networking error when talking directly to the client.
|
||||
if _, ok := err.(net.Error); !ok {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Try via the server
|
||||
r, err = a.client.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return r, nil
|
||||
return queryClientNode(a.client, alloc, reqPath, q,
|
||||
func(q *QueryOptions) {
|
||||
q.Params["path"] = path
|
||||
})
|
||||
}
|
||||
|
||||
// Stream streams the content of a file blocking on EOF.
|
||||
@@ -172,40 +124,19 @@ func (a *AllocFS) Stream(alloc *Allocation, path, origin string, offset int64,
|
||||
cancel <-chan struct{}, q *QueryOptions) (<-chan *StreamFrame, <-chan error) {
|
||||
|
||||
errCh := make(chan error, 1)
|
||||
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
|
||||
reqPath := fmt.Sprintf("/v1/client/fs/stream/%s", alloc.ID)
|
||||
r, err := queryClientNode(a.client, alloc, reqPath, q,
|
||||
func(q *QueryOptions) {
|
||||
q.Params["path"] = path
|
||||
q.Params["offset"] = strconv.FormatInt(offset, 10)
|
||||
q.Params["origin"] = origin
|
||||
})
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return nil, errCh
|
||||
}
|
||||
|
||||
if q == nil {
|
||||
q = &QueryOptions{}
|
||||
}
|
||||
if q.Params == nil {
|
||||
q.Params = make(map[string]string)
|
||||
}
|
||||
|
||||
q.Params["path"] = path
|
||||
q.Params["offset"] = strconv.FormatInt(offset, 10)
|
||||
q.Params["origin"] = origin
|
||||
|
||||
reqPath := fmt.Sprintf("/v1/client/fs/stream/%s", alloc.ID)
|
||||
r, err := nodeClient.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
// There was a networking error when talking directly to the client.
|
||||
if _, ok := err.(net.Error); !ok {
|
||||
errCh <- err
|
||||
return nil, errCh
|
||||
}
|
||||
|
||||
// Try via the server
|
||||
r, err = a.client.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return nil, errCh
|
||||
}
|
||||
}
|
||||
|
||||
// Create the output channel
|
||||
frames := make(chan *StreamFrame, 10)
|
||||
|
||||
@@ -244,6 +175,40 @@ func (a *AllocFS) Stream(alloc *Allocation, path, origin string, offset int64,
|
||||
return frames, errCh
|
||||
}
|
||||
|
||||
func queryClientNode(c *Client, alloc *Allocation, reqPath string, q *QueryOptions, customizeQ func(*QueryOptions)) (io.ReadCloser, error) {
|
||||
nodeClient, _ := c.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
|
||||
if q == nil {
|
||||
q = &QueryOptions{}
|
||||
}
|
||||
if q.Params == nil {
|
||||
q.Params = make(map[string]string)
|
||||
}
|
||||
if customizeQ != nil {
|
||||
customizeQ(q)
|
||||
}
|
||||
|
||||
var r io.ReadCloser
|
||||
var err error
|
||||
|
||||
if nodeClient != nil {
|
||||
r, err = nodeClient.rawQuery(reqPath, q)
|
||||
if _, ok := err.(net.Error); err != nil && !ok {
|
||||
// found a non networking error talking to client directly
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// failed to query node, access through server directly
|
||||
// or network error when talking to the client directly
|
||||
if r == nil {
|
||||
return c.rawQuery(reqPath, q)
|
||||
}
|
||||
|
||||
return r, err
|
||||
}
|
||||
|
||||
// Logs streams the content of a tasks logs blocking on EOF.
|
||||
// The parameters are:
|
||||
// * allocation: the allocation to stream from.
|
||||
@@ -264,42 +229,20 @@ func (a *AllocFS) Logs(alloc *Allocation, follow bool, task, logType, origin str
|
||||
|
||||
errCh := make(chan error, 1)
|
||||
|
||||
nodeClient, err := a.client.GetNodeClientWithTimeout(alloc.NodeID, ClientConnTimeout, q)
|
||||
reqPath := fmt.Sprintf("/v1/client/fs/logs/%s", alloc.ID)
|
||||
r, err := queryClientNode(a.client, alloc, reqPath, q,
|
||||
func(q *QueryOptions) {
|
||||
q.Params["follow"] = strconv.FormatBool(follow)
|
||||
q.Params["task"] = task
|
||||
q.Params["type"] = logType
|
||||
q.Params["origin"] = origin
|
||||
q.Params["offset"] = strconv.FormatInt(offset, 10)
|
||||
})
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return nil, errCh
|
||||
}
|
||||
|
||||
if q == nil {
|
||||
q = &QueryOptions{}
|
||||
}
|
||||
if q.Params == nil {
|
||||
q.Params = make(map[string]string)
|
||||
}
|
||||
|
||||
q.Params["follow"] = strconv.FormatBool(follow)
|
||||
q.Params["task"] = task
|
||||
q.Params["type"] = logType
|
||||
q.Params["origin"] = origin
|
||||
q.Params["offset"] = strconv.FormatInt(offset, 10)
|
||||
|
||||
reqPath := fmt.Sprintf("/v1/client/fs/logs/%s", alloc.ID)
|
||||
r, err := nodeClient.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
// There was a networking error when talking directly to the client.
|
||||
if _, ok := err.(net.Error); !ok {
|
||||
errCh <- err
|
||||
return nil, errCh
|
||||
}
|
||||
|
||||
// Try via the server
|
||||
r, err = a.client.rawQuery(reqPath, q)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return nil, errCh
|
||||
}
|
||||
}
|
||||
|
||||
// Create the output channel
|
||||
frames := make(chan *StreamFrame, 10)
|
||||
|
||||
|
||||
@@ -143,6 +143,7 @@ type ConsulConnect struct {
|
||||
// ConsulSidecarService represents a Consul Connect SidecarService jobspec
|
||||
// stanza.
|
||||
type ConsulSidecarService struct {
|
||||
Tags []string
|
||||
Port string
|
||||
Proxy *ConsulProxy
|
||||
}
|
||||
|
||||
25
api/tasks.go
25
api/tasks.go
@@ -370,12 +370,28 @@ type VolumeRequest struct {
|
||||
ReadOnly bool `mapstructure:"read_only"`
|
||||
}
|
||||
|
||||
const (
|
||||
VolumeMountPropagationPrivate = "private"
|
||||
VolumeMountPropagationHostToTask = "host-to-task"
|
||||
VolumeMountPropagationBidirectional = "bidirectional"
|
||||
)
|
||||
|
||||
// VolumeMount represents the relationship between a destination path in a task
|
||||
// and the task group volume that should be mounted there.
|
||||
type VolumeMount struct {
|
||||
Volume string
|
||||
Destination string
|
||||
ReadOnly bool `mapstructure:"read_only"`
|
||||
Volume *string
|
||||
Destination *string
|
||||
ReadOnly *bool `mapstructure:"read_only"`
|
||||
PropagationMode *string `mapstructure:"propagation_mode"`
|
||||
}
|
||||
|
||||
func (vm *VolumeMount) Canonicalize() {
|
||||
if vm.PropagationMode == nil {
|
||||
vm.PropagationMode = stringToPtr(VolumeMountPropagationPrivate)
|
||||
}
|
||||
if vm.ReadOnly == nil {
|
||||
vm.ReadOnly = boolToPtr(false)
|
||||
}
|
||||
}
|
||||
|
||||
// TaskGroup is the unit of scheduling.
|
||||
@@ -632,6 +648,9 @@ func (t *Task) Canonicalize(tg *TaskGroup, job *Job) {
|
||||
for _, a := range t.Affinities {
|
||||
a.Canonicalize()
|
||||
}
|
||||
for _, vm := range t.VolumeMounts {
|
||||
vm.Canonicalize()
|
||||
}
|
||||
}
|
||||
|
||||
// TaskArtifact is used to download artifacts before running a task.
|
||||
|
||||
@@ -368,6 +368,14 @@ func TestTask_Artifact(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTask_VolumeMount(t *testing.T) {
|
||||
t.Parallel()
|
||||
vm := &VolumeMount{}
|
||||
vm.Canonicalize()
|
||||
require.NotNil(t, vm.PropagationMode)
|
||||
require.Equal(t, *vm.PropagationMode, "private")
|
||||
}
|
||||
|
||||
// Ensures no regression on https://github.com/hashicorp/nomad/issues/3132
|
||||
func TestTaskGroup_Canonicalize_Update(t *testing.T) {
|
||||
// Job with an Empty() Update
|
||||
|
||||
@@ -89,7 +89,7 @@ type TaskPrestartHook interface {
|
||||
// Prestart is called before the task is started including after every
|
||||
// restart. Prestart is not called if the allocation is terminal.
|
||||
//
|
||||
// The context is cancelled if the task is killed.
|
||||
// The context is cancelled if the task is killed or shutdown.
|
||||
Prestart(context.Context, *TaskPrestartRequest, *TaskPrestartResponse) error
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/LK4D4/joincontext"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
||||
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
|
||||
@@ -192,8 +193,11 @@ func (tr *TaskRunner) prestart() error {
|
||||
}
|
||||
|
||||
// Run the prestart hook
|
||||
// use a joint context to allow any blocking pre-start hooks
|
||||
// to be canceled by either killCtx or shutdownCtx
|
||||
joinedCtx, _ := joincontext.Join(tr.killCtx, tr.shutdownCtx)
|
||||
var resp interfaces.TaskPrestartResponse
|
||||
if err := pre.Prestart(tr.killCtx, &req, &resp); err != nil {
|
||||
if err := pre.Prestart(joinedCtx, &req, &resp); err != nil {
|
||||
tr.emitHookError(err, name)
|
||||
return structs.WrapRecoverable(fmt.Sprintf("prestart hook %q failed: %v", name, err), err)
|
||||
}
|
||||
|
||||
@@ -1742,6 +1742,69 @@ func TestTaskRunner_Template_Artifact(t *testing.T) {
|
||||
require.NoErrorf(t, err, "%v not rendered", f2)
|
||||
}
|
||||
|
||||
// TestTaskRunner_Template_BlockingPreStart asserts that a template
|
||||
// that fails to render in PreStart can gracefully be shutdown by
|
||||
// either killCtx or shutdownCtx
|
||||
func TestTaskRunner_Template_BlockingPreStart(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
alloc := mock.BatchAlloc()
|
||||
task := alloc.Job.TaskGroups[0].Tasks[0]
|
||||
task.Templates = []*structs.Template{
|
||||
{
|
||||
EmbeddedTmpl: `{{ with secret "foo/secret" }}{{ .Data.certificate }}{{ end }}`,
|
||||
DestPath: "local/test",
|
||||
ChangeMode: structs.TemplateChangeModeNoop,
|
||||
},
|
||||
}
|
||||
|
||||
task.Vault = &structs.Vault{Policies: []string{"default"}}
|
||||
|
||||
conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
|
||||
defer cleanup()
|
||||
|
||||
tr, err := NewTaskRunner(conf)
|
||||
require.NoError(t, err)
|
||||
go tr.Run()
|
||||
defer tr.Shutdown()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
ts := tr.TaskState()
|
||||
|
||||
if len(ts.Events) == 0 {
|
||||
return false, fmt.Errorf("no events yet")
|
||||
}
|
||||
|
||||
for _, e := range ts.Events {
|
||||
if e.Type == "Template" && strings.Contains(e.DisplayMessage, "vault.read(foo/secret)") {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, fmt.Errorf("no missing vault secret template event yet: %#v", ts.Events)
|
||||
|
||||
}, func(err error) {
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
shutdown := func() <-chan bool {
|
||||
finished := make(chan bool)
|
||||
go func() {
|
||||
tr.Shutdown()
|
||||
finished <- true
|
||||
}()
|
||||
|
||||
return finished
|
||||
}
|
||||
|
||||
select {
|
||||
case <-shutdown():
|
||||
// it shut down like it should have
|
||||
case <-time.After(10 * time.Second):
|
||||
require.Fail(t, "timeout shutting down task")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTaskRunner_Template_NewVaultToken asserts that a new vault token is
|
||||
// created when rendering template and that it is revoked on alloc completion
|
||||
func TestTaskRunner_Template_NewVaultToken(t *testing.T) {
|
||||
|
||||
@@ -76,6 +76,10 @@ func TestPrevAlloc_StreamAllocDir_Ok(t *testing.T) {
|
||||
tw := tar.NewWriter(buf)
|
||||
|
||||
walkFn := func(path string, fileInfo os.FileInfo, err error) error {
|
||||
// filepath.Walk passes in an error
|
||||
if err != nil {
|
||||
return fmt.Errorf("error from filepath.Walk(): %s", err)
|
||||
}
|
||||
// Include the path of the file name relative to the alloc dir
|
||||
// so that we can put the files in the right directories
|
||||
link := ""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
package structs
|
||||
|
||||
//go:generate codecgen -d 102 -o structs.generated.go structs.go
|
||||
//go:generate codecgen -d 102 -t codec_generated -o structs.generated.go structs.go
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
@@ -45,14 +45,6 @@ type VaultClient interface {
|
||||
// StopRenewToken removes the token from the min-heap, stopping its
|
||||
// renewal.
|
||||
StopRenewToken(string) error
|
||||
|
||||
// RenewLease renews a vault secret's lease and adds the lease
|
||||
// identifier to the min-heap for periodic renewal.
|
||||
RenewLease(string, int) (<-chan error, error)
|
||||
|
||||
// StopRenewLease removes a secret's lease ID from the min-heap,
|
||||
// stopping its renewal.
|
||||
StopRenewLease(string) error
|
||||
}
|
||||
|
||||
// Implementation of VaultClient interface to interact with vault and perform
|
||||
@@ -325,44 +317,6 @@ func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, err
|
||||
return errCh, nil
|
||||
}
|
||||
|
||||
// RenewLease renews the supplied lease identifier for a supplied duration (in
|
||||
// seconds) and adds it to the min-heap so that it gets renewed periodically by
|
||||
// the renewal loop. Any error returned during renewal will be written to a
|
||||
// buffered channel and the channel is returned instead of an actual error.
|
||||
// This helps the caller be notified of a renewal failure asynchronously for
|
||||
// appropriate actions to be taken. The caller of this function need not have
|
||||
// to close the error channel.
|
||||
func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) {
|
||||
if leaseId == "" {
|
||||
err := fmt.Errorf("missing lease ID")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if increment < 1 {
|
||||
err := fmt.Errorf("increment cannot be less than 1")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create a buffered error channel
|
||||
errCh := make(chan error, 1)
|
||||
|
||||
// Create a renewal request using the supplied lease and duration
|
||||
renewalReq := &vaultClientRenewalRequest{
|
||||
errCh: errCh,
|
||||
id: leaseId,
|
||||
increment: increment,
|
||||
}
|
||||
|
||||
// Renew the secret and send any error to the dedicated error channel
|
||||
if err := c.renew(renewalReq); err != nil {
|
||||
c.logger.Error("error during renewal of lease", "error", err)
|
||||
metrics.IncrCounter([]string{"client", "vault", "renew_lease_error"}, 1)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return errCh, nil
|
||||
}
|
||||
|
||||
// renew is a common method to handle renewal of both tokens and secret leases.
|
||||
// It invokes a token renewal or a secret's lease renewal. If renewal is
|
||||
// successful, min-heap is updated based on the duration after which it needs
|
||||
@@ -558,12 +512,6 @@ func (c *vaultClient) StopRenewToken(token string) error {
|
||||
return c.stopRenew(token)
|
||||
}
|
||||
|
||||
// StopRenewLease removes the item from the heap which represents the given
|
||||
// lease identifier.
|
||||
func (c *vaultClient) StopRenewLease(leaseId string) error {
|
||||
return c.stopRenew(leaseId)
|
||||
}
|
||||
|
||||
// stopRenew removes the given identifier from the heap and signals the renewal
|
||||
// loop to compute the next best candidate for renewal.
|
||||
func (c *vaultClient) stopRenew(id string) error {
|
||||
|
||||
@@ -111,10 +111,6 @@ func (vc *MockVaultClient) StopRenewToken(token string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (vc *MockVaultClient) RenewLease(leaseId string, interval int) (<-chan error, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (vc *MockVaultClient) StopRenewLease(leaseId string) error { return nil }
|
||||
func (vc *MockVaultClient) Start() {}
|
||||
func (vc *MockVaultClient) Stop() {}
|
||||
func (vc *MockVaultClient) GetConsulACL(string, string) (*vaultapi.Secret, error) { return nil, nil }
|
||||
|
||||
@@ -278,14 +278,14 @@ func (c *Command) readConfig() *Config {
|
||||
config.PluginDir = filepath.Join(config.DataDir, "plugins")
|
||||
}
|
||||
|
||||
if !c.isValidConfig(config) {
|
||||
if !c.isValidConfig(config, cmdConfig) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
func (c *Command) isValidConfig(config *Config) bool {
|
||||
func (c *Command) isValidConfig(config, cmdConfig *Config) bool {
|
||||
|
||||
// Check that the server is running in at least one mode.
|
||||
if !(config.Server.Enabled || config.Client.Enabled) {
|
||||
@@ -361,11 +361,12 @@ func (c *Command) isValidConfig(config *Config) bool {
|
||||
}
|
||||
|
||||
// Check the bootstrap flags
|
||||
if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
|
||||
if !config.Server.Enabled && cmdConfig.Server.BootstrapExpect > 0 {
|
||||
// report an error if BootstrapExpect is set in CLI but server is disabled
|
||||
c.Ui.Error("Bootstrap requires server mode to be enabled")
|
||||
return false
|
||||
}
|
||||
if config.Server.BootstrapExpect == 1 {
|
||||
if config.Server.Enabled && config.Server.BootstrapExpect == 1 {
|
||||
c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
|
||||
}
|
||||
|
||||
@@ -391,28 +392,58 @@ func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter,
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
// Create a log writer, and wrap a logOutput around it
|
||||
logWriter := NewLogWriter(512)
|
||||
writers := []io.Writer{c.logFilter, logWriter}
|
||||
|
||||
// Check if syslog is enabled
|
||||
var syslog io.Writer
|
||||
if config.EnableSyslog {
|
||||
l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
|
||||
return nil, nil, nil
|
||||
}
|
||||
syslog = &SyslogWrapper{l, c.logFilter}
|
||||
writers = append(writers, &SyslogWrapper{l, c.logFilter})
|
||||
}
|
||||
|
||||
// Create a log writer, and wrap a logOutput around it
|
||||
logWriter := NewLogWriter(512)
|
||||
var logOutput io.Writer
|
||||
if syslog != nil {
|
||||
logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
|
||||
} else {
|
||||
logOutput = io.MultiWriter(c.logFilter, logWriter)
|
||||
// Check if file logging is enabled
|
||||
if config.LogFile != "" {
|
||||
dir, fileName := filepath.Split(config.LogFile)
|
||||
|
||||
// if a path is provided, but has no filename, then a default is used.
|
||||
if fileName == "" {
|
||||
fileName = "nomad.log"
|
||||
}
|
||||
|
||||
// Try to enter the user specified log rotation duration first
|
||||
var logRotateDuration time.Duration
|
||||
if config.LogRotateDuration != "" {
|
||||
duration, err := time.ParseDuration(config.LogRotateDuration)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to parse log rotation duration: %v", err))
|
||||
return nil, nil, nil
|
||||
}
|
||||
logRotateDuration = duration
|
||||
} else {
|
||||
// Default to 24 hrs if no rotation period is specified
|
||||
logRotateDuration = 24 * time.Hour
|
||||
}
|
||||
|
||||
logFile := &logFile{
|
||||
logFilter: c.logFilter,
|
||||
fileName: fileName,
|
||||
logPath: dir,
|
||||
duration: logRotateDuration,
|
||||
MaxBytes: config.LogRotateBytes,
|
||||
MaxFiles: config.LogRotateMaxFiles,
|
||||
}
|
||||
|
||||
writers = append(writers, logFile)
|
||||
}
|
||||
c.logOutput = logOutput
|
||||
log.SetOutput(logOutput)
|
||||
return logGate, logWriter, logOutput
|
||||
|
||||
c.logOutput = io.MultiWriter(writers...)
|
||||
log.SetOutput(c.logOutput)
|
||||
return logGate, logWriter, c.logOutput
|
||||
}
|
||||
|
||||
// setupAgent is used to start the agent and various interfaces
|
||||
|
||||
@@ -57,6 +57,18 @@ type Config struct {
|
||||
// LogJson enables log output in a JSON format
|
||||
LogJson bool `hcl:"log_json"`
|
||||
|
||||
// LogFile enables logging to a file
|
||||
LogFile string `hcl:"log_file"`
|
||||
|
||||
// LogRotateDuration is the time period that logs should be rotated in
|
||||
LogRotateDuration string `hcl:"log_rotate_duration"`
|
||||
|
||||
// LogRotateBytes is the max number of bytes that should be written to a file
|
||||
LogRotateBytes int `hcl:"log_rotate_bytes"`
|
||||
|
||||
// LogRotateMaxFiles is the max number of log files to keep
|
||||
LogRotateMaxFiles int `hcl:"log_rotate_max_files"`
|
||||
|
||||
// BindAddr is the address on which all of nomad's services will
|
||||
// be bound. If not specified, this defaults to 127.0.0.1.
|
||||
BindAddr string `hcl:"bind_addr"`
|
||||
@@ -898,6 +910,18 @@ func (c *Config) Merge(b *Config) *Config {
|
||||
if b.LogJson {
|
||||
result.LogJson = true
|
||||
}
|
||||
if b.LogFile != "" {
|
||||
result.LogFile = b.LogFile
|
||||
}
|
||||
if b.LogRotateDuration != "" {
|
||||
result.LogRotateDuration = b.LogRotateDuration
|
||||
}
|
||||
if b.LogRotateBytes != 0 {
|
||||
result.LogRotateBytes = b.LogRotateBytes
|
||||
}
|
||||
if b.LogRotateMaxFiles != 0 {
|
||||
result.LogRotateMaxFiles = b.LogRotateMaxFiles
|
||||
}
|
||||
if b.BindAddr != "" {
|
||||
result.BindAddr = b.BindAddr
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ var basicConfig = &Config{
|
||||
NodeName: "my-web",
|
||||
DataDir: "/tmp/nomad",
|
||||
PluginDir: "/tmp/nomad-plugins",
|
||||
LogFile: "/var/log/nomad.log",
|
||||
LogLevel: "ERR",
|
||||
LogJson: true,
|
||||
BindAddr: "192.168.0.1",
|
||||
@@ -409,14 +410,10 @@ func TestConfig_Parse(t *testing.T) {
|
||||
t.Run(tc.File, func(t *testing.T) {
|
||||
require := require.New(t)
|
||||
path, err := filepath.Abs(filepath.Join("./testdata", tc.File))
|
||||
if err != nil {
|
||||
t.Fatalf("file: %s\n\n%s", tc.File, err)
|
||||
}
|
||||
require.NoError(err)
|
||||
|
||||
actual, err := ParseConfigFile(path)
|
||||
if (err != nil) != tc.Err {
|
||||
t.Fatalf("file: %s\n\n%s", tc.File, err)
|
||||
}
|
||||
require.NoError(err)
|
||||
|
||||
// ParseConfig used to re-merge defaults for these three objects,
|
||||
// despite them already being merged in LoadConfig. The test structs
|
||||
|
||||
@@ -1480,6 +1480,7 @@ func newConnect(serviceName string, nc *structs.ConsulConnect, networks structs.
|
||||
|
||||
// Advertise host IP:port
|
||||
cc.SidecarService = &api.AgentServiceRegistration{
|
||||
Tags: helper.CopySliceString(nc.SidecarService.Tags),
|
||||
Address: net.IP,
|
||||
Port: port.Value,
|
||||
|
||||
|
||||
@@ -17,12 +17,12 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
allocIDNotPresentErr = fmt.Errorf("must provide a valid alloc id")
|
||||
fileNameNotPresentErr = fmt.Errorf("must provide a file name")
|
||||
taskNotPresentErr = fmt.Errorf("must provide task name")
|
||||
logTypeNotPresentErr = fmt.Errorf("must provide log type (stdout/stderr)")
|
||||
clientNotRunning = fmt.Errorf("node is not running a Nomad Client")
|
||||
invalidOrigin = fmt.Errorf("origin must be start or end")
|
||||
allocIDNotPresentErr = CodedError(400, "must provide a valid alloc id")
|
||||
fileNameNotPresentErr = CodedError(400, "must provide a file name")
|
||||
taskNotPresentErr = CodedError(400, "must provide task name")
|
||||
logTypeNotPresentErr = CodedError(400, "must provide log type (stdout/stderr)")
|
||||
clientNotRunning = CodedError(400, "node is not running a Nomad Client")
|
||||
invalidOrigin = CodedError(400, "origin must be start or end")
|
||||
)
|
||||
|
||||
func (s *HTTPServer) FsRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
@@ -273,13 +273,13 @@ func (s *HTTPServer) Logs(resp http.ResponseWriter, req *http.Request) (interfac
|
||||
|
||||
if followStr := q.Get("follow"); followStr != "" {
|
||||
if follow, err = strconv.ParseBool(followStr); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse follow field to boolean: %v", err)
|
||||
return nil, CodedError(400, fmt.Sprintf("failed to parse follow field to boolean: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
if plainStr := q.Get("plain"); plainStr != "" {
|
||||
if plain, err = strconv.ParseBool(plainStr); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse plain field to boolean: %v", err)
|
||||
return nil, CodedError(400, fmt.Sprintf("failed to parse plain field to boolean: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,7 +295,7 @@ func (s *HTTPServer) Logs(resp http.ResponseWriter, req *http.Request) (interfac
|
||||
if offsetString != "" {
|
||||
var err error
|
||||
if offset, err = strconv.ParseInt(offsetString, 10, 64); err != nil {
|
||||
return nil, fmt.Errorf("error parsing offset: %v", err)
|
||||
return nil, CodedError(400, fmt.Sprintf("error parsing offset: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,10 +388,13 @@ func (s *HTTPServer) fsStreamImpl(resp http.ResponseWriter,
|
||||
decoder.Reset(httpPipe)
|
||||
|
||||
if err := res.Error; err != nil {
|
||||
code := 500
|
||||
if err.Code != nil {
|
||||
errCh <- CodedError(int(*err.Code), err.Error())
|
||||
return
|
||||
code = int(*err.Code)
|
||||
}
|
||||
|
||||
errCh <- CodedError(code, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := io.Copy(output, bytes.NewReader(res.Payload)); err != nil {
|
||||
|
||||
@@ -3,7 +3,6 @@ package agent
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -12,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
@@ -189,25 +189,26 @@ func TestHTTP_FS_Stream_MissingParams(t *testing.T) {
|
||||
require := require.New(t)
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
req, err := http.NewRequest("GET", "/v1/client/fs/stream/", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW := httptest.NewRecorder()
|
||||
|
||||
_, err = s.Server.Stream(respW, req)
|
||||
require.EqualError(err, allocIDNotPresentErr.Error())
|
||||
|
||||
req, err = http.NewRequest("GET", "/v1/client/fs/stream/foo", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW = httptest.NewRecorder()
|
||||
|
||||
_, err = s.Server.Stream(respW, req)
|
||||
require.EqualError(err, fileNameNotPresentErr.Error())
|
||||
|
||||
req, err = http.NewRequest("GET", "/v1/client/fs/stream/foo?path=/path/to/file", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW = httptest.NewRecorder()
|
||||
|
||||
_, err = s.Server.Stream(respW, req)
|
||||
require.Nil(err)
|
||||
require.Error(err)
|
||||
require.Contains(err.Error(), "alloc lookup failed")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -219,38 +220,39 @@ func TestHTTP_FS_Logs_MissingParams(t *testing.T) {
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
// AllocID Not Present
|
||||
req, err := http.NewRequest("GET", "/v1/client/fs/logs/", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW := httptest.NewRecorder()
|
||||
|
||||
s.Server.mux.ServeHTTP(respW, req)
|
||||
require.Equal(respW.Body.String(), allocIDNotPresentErr.Error())
|
||||
require.Equal(500, respW.Code) // 500 for backward compat
|
||||
require.Equal(400, respW.Code)
|
||||
|
||||
// Task Not Present
|
||||
req, err = http.NewRequest("GET", "/v1/client/fs/logs/foo", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW = httptest.NewRecorder()
|
||||
|
||||
s.Server.mux.ServeHTTP(respW, req)
|
||||
require.Equal(respW.Body.String(), taskNotPresentErr.Error())
|
||||
require.Equal(500, respW.Code) // 500 for backward compat
|
||||
require.Equal(400, respW.Code)
|
||||
|
||||
// Log Type Not Present
|
||||
req, err = http.NewRequest("GET", "/v1/client/fs/logs/foo?task=foo", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW = httptest.NewRecorder()
|
||||
|
||||
s.Server.mux.ServeHTTP(respW, req)
|
||||
require.Equal(respW.Body.String(), logTypeNotPresentErr.Error())
|
||||
require.Equal(500, respW.Code) // 500 for backward compat
|
||||
require.Equal(400, respW.Code)
|
||||
|
||||
// Ok
|
||||
// case where all parameters are set but alloc isn't found
|
||||
req, err = http.NewRequest("GET", "/v1/client/fs/logs/foo?task=foo&type=stdout", nil)
|
||||
require.Nil(err)
|
||||
require.NoError(err)
|
||||
respW = httptest.NewRecorder()
|
||||
|
||||
s.Server.mux.ServeHTTP(respW, req)
|
||||
require.Equal(200, respW.Code)
|
||||
require.Equal(500, respW.Code)
|
||||
require.Contains(respW.Body.String(), "alloc lookup failed")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -354,8 +356,7 @@ func TestHTTP_FS_Stream_NoFollow(t *testing.T) {
|
||||
path := fmt.Sprintf("/v1/client/fs/stream/%s?path=alloc/logs/web.stdout.0&offset=%d&origin=end&follow=false",
|
||||
a.ID, offset)
|
||||
|
||||
p, _ := io.Pipe()
|
||||
req, err := http.NewRequest("GET", path, p)
|
||||
req, err := http.NewRequest("GET", path, nil)
|
||||
require.Nil(err)
|
||||
respW := testutil.NewResponseRecorder()
|
||||
doneCh := make(chan struct{})
|
||||
@@ -383,8 +384,6 @@ func TestHTTP_FS_Stream_NoFollow(t *testing.T) {
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatal("should close but did not")
|
||||
}
|
||||
|
||||
p.Close()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -401,9 +400,7 @@ func TestHTTP_FS_Stream_Follow(t *testing.T) {
|
||||
path := fmt.Sprintf("/v1/client/fs/stream/%s?path=alloc/logs/web.stdout.0&offset=%d&origin=end",
|
||||
a.ID, offset)
|
||||
|
||||
p, _ := io.Pipe()
|
||||
|
||||
req, err := http.NewRequest("GET", path, p)
|
||||
req, err := http.NewRequest("GET", path, nil)
|
||||
require.Nil(err)
|
||||
respW := httptest.NewRecorder()
|
||||
doneCh := make(chan struct{})
|
||||
@@ -431,8 +428,6 @@ func TestHTTP_FS_Stream_Follow(t *testing.T) {
|
||||
t.Fatal("shouldn't close")
|
||||
case <-time.After(1 * time.Second):
|
||||
}
|
||||
|
||||
p.Close()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -448,8 +443,7 @@ func TestHTTP_FS_Logs(t *testing.T) {
|
||||
path := fmt.Sprintf("/v1/client/fs/logs/%s?type=stdout&task=web&offset=%d&origin=end&plain=true",
|
||||
a.ID, offset)
|
||||
|
||||
p, _ := io.Pipe()
|
||||
req, err := http.NewRequest("GET", path, p)
|
||||
req, err := http.NewRequest("GET", path, nil)
|
||||
require.Nil(err)
|
||||
respW := testutil.NewResponseRecorder()
|
||||
go func() {
|
||||
@@ -469,8 +463,6 @@ func TestHTTP_FS_Logs(t *testing.T) {
|
||||
}, func(err error) {
|
||||
t.Fatal(err)
|
||||
})
|
||||
|
||||
p.Close()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -486,8 +478,7 @@ func TestHTTP_FS_Logs_Follow(t *testing.T) {
|
||||
path := fmt.Sprintf("/v1/client/fs/logs/%s?type=stdout&task=web&offset=%d&origin=end&plain=true&follow=true",
|
||||
a.ID, offset)
|
||||
|
||||
p, _ := io.Pipe()
|
||||
req, err := http.NewRequest("GET", path, p)
|
||||
req, err := http.NewRequest("GET", path, nil)
|
||||
require.Nil(err)
|
||||
respW := testutil.NewResponseRecorder()
|
||||
errCh := make(chan error)
|
||||
@@ -514,7 +505,23 @@ func TestHTTP_FS_Logs_Follow(t *testing.T) {
|
||||
t.Fatalf("shouldn't exit: %v", err)
|
||||
case <-time.After(1 * time.Second):
|
||||
}
|
||||
|
||||
p.Close()
|
||||
})
|
||||
}
|
||||
|
||||
func TestHTTP_FS_Logs_PropagatesErrors(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
path := fmt.Sprintf("/v1/client/fs/logs/%s?type=stdout&task=web&offset=0&origin=end&plain=true",
|
||||
uuid.Generate())
|
||||
|
||||
req, err := http.NewRequest("GET", path, nil)
|
||||
require.NoError(t, err)
|
||||
respW := testutil.NewResponseRecorder()
|
||||
|
||||
_, err = s.Server.Logs(respW, req)
|
||||
require.Error(t, err)
|
||||
|
||||
_, ok := err.(HTTPCodedError)
|
||||
require.Truef(t, ok, "expected a coded error but found: %#+v", err)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -812,9 +812,10 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
|
||||
structsTask.VolumeMounts = make([]*structs.VolumeMount, l)
|
||||
for i, mount := range apiTask.VolumeMounts {
|
||||
structsTask.VolumeMounts[i] = &structs.VolumeMount{
|
||||
Volume: mount.Volume,
|
||||
Destination: mount.Destination,
|
||||
ReadOnly: mount.ReadOnly,
|
||||
Volume: *mount.Volume,
|
||||
Destination: *mount.Destination,
|
||||
ReadOnly: *mount.ReadOnly,
|
||||
PropagationMode: *mount.PropagationMode,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1062,6 +1063,7 @@ func ApiConsulConnectToStructs(in *api.ConsulConnect) *structs.ConsulConnect {
|
||||
if in.SidecarService != nil {
|
||||
|
||||
out.SidecarService = &structs.ConsulSidecarService{
|
||||
Tags: helper.CopySliceString(in.SidecarService.Tags),
|
||||
Port: in.SidecarService.Port,
|
||||
}
|
||||
|
||||
|
||||
@@ -1537,6 +1537,13 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
|
||||
TaskName: "task1",
|
||||
},
|
||||
},
|
||||
Connect: &api.ConsulConnect{
|
||||
Native: false,
|
||||
SidecarService: &api.ConsulSidecarService{
|
||||
Tags: []string{"f", "g"},
|
||||
Port: "9000",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Tasks: []*api.Task{
|
||||
@@ -1877,6 +1884,13 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
|
||||
TaskName: "task1",
|
||||
},
|
||||
},
|
||||
Connect: &structs.ConsulConnect{
|
||||
Native: false,
|
||||
SidecarService: &structs.ConsulSidecarService{
|
||||
Tags: []string{"f", "g"},
|
||||
Port: "9000",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Tasks: []*structs.Task{
|
||||
|
||||
146
command/agent/log_file.go
Normal file
146
command/agent/log_file.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/logutils"
|
||||
)
|
||||
|
||||
var (
|
||||
now = time.Now
|
||||
)
|
||||
|
||||
// logFile is used to setup a file based logger that also performs log rotation
|
||||
type logFile struct {
|
||||
// Log level Filter to filter out logs that do not matcch LogLevel criteria
|
||||
logFilter *logutils.LevelFilter
|
||||
|
||||
//Name of the log file
|
||||
fileName string
|
||||
|
||||
//Path to the log file
|
||||
logPath string
|
||||
|
||||
//Duration between each file rotation operation
|
||||
duration time.Duration
|
||||
|
||||
//LastCreated represents the creation time of the latest log
|
||||
LastCreated time.Time
|
||||
|
||||
//FileInfo is the pointer to the current file being written to
|
||||
FileInfo *os.File
|
||||
|
||||
//MaxBytes is the maximum number of desired bytes for a log file
|
||||
MaxBytes int
|
||||
|
||||
//BytesWritten is the number of bytes written in the current log file
|
||||
BytesWritten int64
|
||||
|
||||
// Max rotated files to keep before removing them.
|
||||
MaxFiles int
|
||||
|
||||
//acquire is the mutex utilized to ensure we have no concurrency issues
|
||||
acquire sync.Mutex
|
||||
}
|
||||
|
||||
func (l *logFile) fileNamePattern() string {
|
||||
// Extract the file extension
|
||||
fileExt := filepath.Ext(l.fileName)
|
||||
// If we have no file extension we append .log
|
||||
if fileExt == "" {
|
||||
fileExt = ".log"
|
||||
}
|
||||
// Remove the file extension from the filename
|
||||
return strings.TrimSuffix(l.fileName, fileExt) + "-%s" + fileExt
|
||||
}
|
||||
|
||||
func (l *logFile) openNew() error {
|
||||
fileNamePattern := l.fileNamePattern()
|
||||
// New file name has the format : filename-timestamp.extension
|
||||
createTime := now()
|
||||
newfileName := fmt.Sprintf(fileNamePattern, strconv.FormatInt(createTime.UnixNano(), 10))
|
||||
newfilePath := filepath.Join(l.logPath, newfileName)
|
||||
// Try creating a file. We truncate the file because we are the only authority to write the logs
|
||||
filePointer, err := os.OpenFile(newfilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0640)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
l.FileInfo = filePointer
|
||||
// New file, new bytes tracker, new creation time :)
|
||||
l.LastCreated = createTime
|
||||
l.BytesWritten = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *logFile) rotate() error {
|
||||
// Get the time from the last point of contact
|
||||
timeElapsed := time.Since(l.LastCreated)
|
||||
// Rotate if we hit the byte file limit or the time limit
|
||||
if (l.BytesWritten >= int64(l.MaxBytes) && (l.MaxBytes > 0)) || timeElapsed >= l.duration {
|
||||
l.FileInfo.Close()
|
||||
if err := l.pruneFiles(); err != nil {
|
||||
return err
|
||||
}
|
||||
return l.openNew()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *logFile) pruneFiles() error {
|
||||
if l.MaxFiles == 0 {
|
||||
return nil
|
||||
}
|
||||
pattern := l.fileNamePattern()
|
||||
//get all the files that match the log file pattern
|
||||
globExpression := filepath.Join(l.logPath, fmt.Sprintf(pattern, "*"))
|
||||
matches, err := filepath.Glob(globExpression)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Stort the strings as filepath.Glob does not publicly guarantee that files
|
||||
// are sorted, so here we add an extra defensive sort.
|
||||
sort.Strings(matches)
|
||||
|
||||
// Prune if there are more files stored than the configured max
|
||||
stale := len(matches) - l.MaxFiles
|
||||
for i := 0; i < stale; i++ {
|
||||
if err := os.Remove(matches[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write is used to implement io.Writer
|
||||
func (l *logFile) Write(b []byte) (int, error) {
|
||||
// Filter out log entries that do not match log level criteria
|
||||
if !l.logFilter.Check(b) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
l.acquire.Lock()
|
||||
defer l.acquire.Unlock()
|
||||
//Create a new file if we have no file to write to
|
||||
if l.FileInfo == nil {
|
||||
if err := l.openNew(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
// Check for the last contact and rotate if necessary
|
||||
if err := l.rotate(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
n, err := l.FileInfo.Write(b)
|
||||
l.BytesWritten += int64(n)
|
||||
return n, err
|
||||
}
|
||||
171
command/agent/log_file_test.go
Normal file
171
command/agent/log_file_test.go
Normal file
@@ -0,0 +1,171 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/logutils"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
testFileName = "Nomad.log"
|
||||
testDuration = 2 * time.Second
|
||||
testBytes = 10
|
||||
)
|
||||
|
||||
func TestLogFile_timeRotation(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
tempDir, err := ioutil.TempDir("", "LogWriterTimeTest")
|
||||
require.NoError(err)
|
||||
|
||||
defer os.Remove(tempDir)
|
||||
|
||||
filt := LevelFilter()
|
||||
logFile := logFile{
|
||||
logFilter: filt,
|
||||
fileName: testFileName,
|
||||
logPath: tempDir,
|
||||
duration: testDuration,
|
||||
}
|
||||
logFile.Write([]byte("Hello World"))
|
||||
time.Sleep(2 * time.Second)
|
||||
logFile.Write([]byte("Second File"))
|
||||
want := 2
|
||||
if got, _ := ioutil.ReadDir(tempDir); len(got) != want {
|
||||
t.Errorf("Expected %d files, got %v file(s)", want, len(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogFile_openNew(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
tempDir, err := ioutil.TempDir("", "LogWriterOpenTest")
|
||||
require.NoError(err)
|
||||
defer os.Remove(tempDir)
|
||||
|
||||
logFile := logFile{fileName: testFileName, logPath: tempDir, duration: testDuration}
|
||||
require.NoError(logFile.openNew())
|
||||
|
||||
_, err = ioutil.ReadFile(logFile.FileInfo.Name())
|
||||
require.NoError(err)
|
||||
}
|
||||
|
||||
func TestLogFile_byteRotation(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
tempDir, err := ioutil.TempDir("", "LogWriterByteTest")
|
||||
require.NoError(err)
|
||||
defer os.Remove(tempDir)
|
||||
|
||||
filt := LevelFilter()
|
||||
filt.MinLevel = logutils.LogLevel("INFO")
|
||||
logFile := logFile{
|
||||
logFilter: filt,
|
||||
fileName: testFileName,
|
||||
logPath: tempDir,
|
||||
MaxBytes: testBytes,
|
||||
duration: 24 * time.Hour,
|
||||
}
|
||||
logFile.Write([]byte("Hello World"))
|
||||
logFile.Write([]byte("Second File"))
|
||||
want := 2
|
||||
tempFiles, _ := ioutil.ReadDir(tempDir)
|
||||
require.Equal(want, len(tempFiles))
|
||||
}
|
||||
|
||||
func TestLogFile_logLevelFiltering(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
tempDir, err := ioutil.TempDir("", "LogWriterFilterTest")
|
||||
require.NoError(err)
|
||||
defer os.Remove(tempDir)
|
||||
filt := LevelFilter()
|
||||
logFile := logFile{
|
||||
logFilter: filt,
|
||||
fileName: testFileName,
|
||||
logPath: tempDir,
|
||||
MaxBytes: testBytes,
|
||||
duration: testDuration,
|
||||
}
|
||||
logFile.Write([]byte("[INFO] This is an info message"))
|
||||
logFile.Write([]byte("[DEBUG] This is a debug message"))
|
||||
logFile.Write([]byte("[ERR] This is an error message"))
|
||||
want := 2
|
||||
tempFiles, _ := ioutil.ReadDir(tempDir)
|
||||
require.Equal(want, len(tempFiles))
|
||||
}
|
||||
|
||||
func TestLogFile_deleteArchives(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
tempDir, err := ioutil.TempDir("", "LogWriterDeleteArchivesTest")
|
||||
require.NoError(err)
|
||||
defer os.Remove(tempDir)
|
||||
|
||||
filt := LevelFilter()
|
||||
filt.MinLevel = logutils.LogLevel("INFO")
|
||||
logFile := logFile{
|
||||
logFilter: filt,
|
||||
fileName: testFileName,
|
||||
logPath: tempDir,
|
||||
MaxBytes: testBytes,
|
||||
duration: 24 * time.Hour,
|
||||
MaxFiles: 1,
|
||||
}
|
||||
logFile.Write([]byte("[INFO] Hello World"))
|
||||
logFile.Write([]byte("[INFO] Second File"))
|
||||
logFile.Write([]byte("[INFO] Third File"))
|
||||
want := 2
|
||||
tempFiles, _ := ioutil.ReadDir(tempDir)
|
||||
|
||||
require.Equal(want, len(tempFiles))
|
||||
|
||||
for _, tempFile := range tempFiles {
|
||||
var bytes []byte
|
||||
var err error
|
||||
path := filepath.Join(tempDir, tempFile.Name())
|
||||
if bytes, err = ioutil.ReadFile(path); err != nil {
|
||||
t.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
contents := string(bytes)
|
||||
|
||||
require.NotEqual("[INFO] Hello World", contents, "oldest log should have been deleted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogFile_deleteArchivesDisabled(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require := require.New(t)
|
||||
tempDir, err := ioutil.TempDir("", "LogWriterDeleteArchivesDisabledTest")
|
||||
require.NoError(err)
|
||||
defer os.Remove(tempDir)
|
||||
|
||||
filt := LevelFilter()
|
||||
filt.MinLevel = logutils.LogLevel("INFO")
|
||||
logFile := logFile{
|
||||
logFilter: filt,
|
||||
fileName: testFileName,
|
||||
logPath: tempDir,
|
||||
MaxBytes: testBytes,
|
||||
duration: 24 * time.Hour,
|
||||
MaxFiles: 0,
|
||||
}
|
||||
logFile.Write([]byte("[INFO] Hello World"))
|
||||
logFile.Write([]byte("[INFO] Second File"))
|
||||
logFile.Write([]byte("[INFO] Third File"))
|
||||
want := 3
|
||||
tempFiles, _ := ioutil.ReadDir(tempDir)
|
||||
require.Equal(want, len(tempFiles))
|
||||
}
|
||||
2
command/agent/testdata/basic.hcl
vendored
2
command/agent/testdata/basic.hcl
vendored
@@ -13,6 +13,8 @@ log_level = "ERR"
|
||||
|
||||
log_json = true
|
||||
|
||||
log_file = "/var/log/nomad.log"
|
||||
|
||||
bind_addr = "192.168.0.1"
|
||||
|
||||
enable_debug = true
|
||||
|
||||
1
command/agent/testdata/basic.json
vendored
1
command/agent/testdata/basic.json
vendored
@@ -143,6 +143,7 @@
|
||||
],
|
||||
"leave_on_interrupt": true,
|
||||
"leave_on_terminate": true,
|
||||
"log_file": "/var/log/nomad.log",
|
||||
"log_json": true,
|
||||
"log_level": "ERR",
|
||||
"name": "my-web",
|
||||
|
||||
@@ -234,7 +234,7 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength
|
||||
}
|
||||
|
||||
basic := []string{
|
||||
fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)),
|
||||
fmt.Sprintf("ID|%s", alloc.ID),
|
||||
fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)),
|
||||
fmt.Sprintf("Name|%s", alloc.Name),
|
||||
fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)),
|
||||
|
||||
@@ -286,6 +286,9 @@ func TestAllocStatusCommand_ScoreMetrics(t *testing.T) {
|
||||
require.Contains(out, "Placement Metrics")
|
||||
require.Contains(out, mockNode1.ID)
|
||||
require.Contains(out, mockNode2.ID)
|
||||
|
||||
// assert we sort headers alphabetically
|
||||
require.Contains(out, "binpack node-affinity")
|
||||
require.Contains(out, "final score")
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -380,7 +381,16 @@ func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix strin
|
||||
// Add header as first row
|
||||
if i == 0 {
|
||||
scoreOutput[0] = "Node|"
|
||||
for scorerName := range scoreMeta.Scores {
|
||||
|
||||
// sort scores alphabetically
|
||||
scores := make([]string, 0, len(scoreMeta.Scores))
|
||||
for score := range scoreMeta.Scores {
|
||||
scores = append(scores, score)
|
||||
}
|
||||
sort.Strings(scores)
|
||||
|
||||
// build score header output
|
||||
for _, scorerName := range scores {
|
||||
scoreOutput[0] += fmt.Sprintf("%v|", scorerName)
|
||||
scorerNames = append(scorerNames, scorerName)
|
||||
}
|
||||
|
||||
@@ -333,7 +333,7 @@ func formatDrain(n *api.Node) string {
|
||||
func (c *NodeStatusCommand) formatNode(client *api.Client, node *api.Node) int {
|
||||
// Format the header output
|
||||
basic := []string{
|
||||
fmt.Sprintf("ID|%s", limit(node.ID, c.length)),
|
||||
fmt.Sprintf("ID|%s", node.ID),
|
||||
fmt.Sprintf("Name|%s", node.Name),
|
||||
fmt.Sprintf("Class|%s", node.NodeClass),
|
||||
fmt.Sprintf("DC|%s", node.Datacenter),
|
||||
|
||||
@@ -137,11 +137,8 @@ func TestNodeStatusCommand_Run(t *testing.T) {
|
||||
if !strings.Contains(out, "mynode") {
|
||||
t.Fatalf("expect to find mynode, got: %s", out)
|
||||
}
|
||||
if strings.Contains(out, nodeID) {
|
||||
t.Fatalf("expected truncated node id, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, nodeID[:8]) {
|
||||
t.Fatalf("expected node id %q, got: %s", nodeID[:8], out)
|
||||
if !strings.Contains(out, nodeID) {
|
||||
t.Fatalf("expected node id %q, got: %s", nodeID, out)
|
||||
}
|
||||
ui.OutputWriter.Reset()
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/hashicorp/hcl/hcl/ast"
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/jobspec"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/posener/complete"
|
||||
)
|
||||
@@ -261,6 +262,7 @@ func parseQuotaResource(result *api.Resources, list *ast.ObjectList) error {
|
||||
valid := []string{
|
||||
"cpu",
|
||||
"memory",
|
||||
"network",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
|
||||
return multierror.Prefix(err, "resources ->")
|
||||
@@ -275,5 +277,20 @@ func parseQuotaResource(result *api.Resources, list *ast.ObjectList) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Find the network ObjectList, parse it
|
||||
nw := listVal.Filter("network")
|
||||
if len(nw.Items) > 0 {
|
||||
rl, err := jobspec.ParseNetwork(nw)
|
||||
if err != nil {
|
||||
return multierror.Prefix(err, "resources ->")
|
||||
}
|
||||
if rl != nil {
|
||||
if rl.Mode != "" || rl.HasPorts() {
|
||||
return fmt.Errorf("resources -> network only allows mbits")
|
||||
}
|
||||
result.Networks = []*api.NetworkResource{rl}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -8,8 +8,10 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/mitchellh/cli"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestQuotaApplyCommand_Implements(t *testing.T) {
|
||||
@@ -97,3 +99,42 @@ func TestQuotaApplyCommand_Good_JSON(t *testing.T) {
|
||||
assert.Nil(t, err)
|
||||
assert.Len(t, quotas, 1)
|
||||
}
|
||||
|
||||
func TestQuotaApplyNetwork(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
mbits := 20
|
||||
|
||||
cases := []struct {
|
||||
hcl string
|
||||
q *api.QuotaSpec
|
||||
err string
|
||||
}{{
|
||||
hcl: `limit {region = "global", region_limit {network {mbits = 20}}}`,
|
||||
q: &api.QuotaSpec{
|
||||
Limits: []*api.QuotaLimit{{
|
||||
Region: "global",
|
||||
RegionLimit: &api.Resources{
|
||||
Networks: []*api.NetworkResource{{
|
||||
MBits: &mbits,
|
||||
}},
|
||||
},
|
||||
}},
|
||||
},
|
||||
err: "",
|
||||
}, {
|
||||
hcl: `limit {region = "global", region_limit {network { mbits = 20, device = "eth0"}}}`,
|
||||
q: nil,
|
||||
err: "1 error(s) occurred:\n\n* limit -> region_limit -> resources -> network -> invalid key: device",
|
||||
}}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.hcl, func(t *testing.T) {
|
||||
q, err := parseQuotaSpec([]byte(c.hcl))
|
||||
require.Equal(t, c.q, q)
|
||||
if c.err != "" {
|
||||
require.EqualError(t, err, c.err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
This directory contains some documentation about the Nomad codebase,
|
||||
aimed at readers who are interested in making code contributions.
|
||||
|
||||
If you're looking for information on _using_ using, please instead refer
|
||||
to [the main Nomad website](https://nomadproject.io).
|
||||
If you're looking for information on _using_ Nomad, please instead refer
|
||||
to the [Nomad website](https://nomadproject.io).
|
||||
|
||||
## Architecture
|
||||
|
||||
@@ -38,3 +38,4 @@ developers and reviewers confidence that the proper changes have been made:
|
||||
|
||||
* [New `jobspec` entry](checklist-jobspec.md)
|
||||
* [New CLI command](checklist-command.md)
|
||||
* [New RPC endpoint](checklist-rpc-endpoint.md)
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
* [ ] Changelog
|
||||
* [ ] Jobspec entry https://www.nomadproject.io/docs/job-specification/index.html
|
||||
* [ ] Jobspec sidebar entry https://github.com/hashicorp/nomad/blob/master/website/source/layouts/docs.erb
|
||||
* [ ] Job JSON API entry https://www.nomadproject.io/api/json-jobs.html
|
||||
* [ ] Sample Response output in API https://www.nomadproject.io/api/jobs.html
|
||||
* [ ] Consider if it needs a guide https://www.nomadproject.io/guides/index.html
|
||||
|
||||
6
demo/vagrant/Vagrantfile
vendored
6
demo/vagrant/Vagrantfile
vendored
@@ -24,7 +24,7 @@ sudo docker --version
|
||||
sudo apt-get install unzip curl vim -y
|
||||
|
||||
echo "Installing Nomad..."
|
||||
NOMAD_VERSION=0.8.6
|
||||
NOMAD_VERSION=0.10.0
|
||||
cd /tmp/
|
||||
curl -sSL https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -o nomad.zip
|
||||
unzip nomad.zip
|
||||
@@ -34,7 +34,7 @@ sudo chmod a+w /etc/nomad.d
|
||||
|
||||
|
||||
echo "Installing Consul..."
|
||||
CONSUL_VERSION=1.4.0
|
||||
CONSUL_VERSION=1.6.1
|
||||
curl -sSL https://releases.hashicorp.com/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip > consul.zip
|
||||
unzip /tmp/consul.zip
|
||||
sudo install consul /usr/bin/consul
|
||||
@@ -68,7 +68,7 @@ nomad -autocomplete-install
|
||||
SCRIPT
|
||||
|
||||
Vagrant.configure(2) do |config|
|
||||
config.vm.box = "bento/ubuntu-16.04" # 16.04 LTS
|
||||
config.vm.box = "bento/ubuntu-18.04" # 18.04 LTS
|
||||
config.vm.hostname = "nomad"
|
||||
config.vm.provision "shell", inline: $script, privileged: false
|
||||
|
||||
|
||||
@@ -134,6 +134,25 @@ var (
|
||||
Name: pluginName,
|
||||
}
|
||||
|
||||
danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
|
||||
"enabled": hclspec.NewDefault(
|
||||
hclspec.NewAttr("enabled", "bool", false),
|
||||
hclspec.NewLiteral(`true`),
|
||||
),
|
||||
"period": hclspec.NewDefault(
|
||||
hclspec.NewAttr("period", "string", false),
|
||||
hclspec.NewLiteral(`"5m"`),
|
||||
),
|
||||
"creation_grace": hclspec.NewDefault(
|
||||
hclspec.NewAttr("creation_grace", "string", false),
|
||||
hclspec.NewLiteral(`"5m"`),
|
||||
),
|
||||
"dry_run": hclspec.NewDefault(
|
||||
hclspec.NewAttr("dry_run", "bool", false),
|
||||
hclspec.NewLiteral(`false`),
|
||||
),
|
||||
})
|
||||
|
||||
// configSpec is the hcl specification returned by the ConfigSchema RPC
|
||||
// and is used to parse the contents of the 'plugin "docker" {...}' block.
|
||||
// Example:
|
||||
@@ -195,9 +214,22 @@ var (
|
||||
hclspec.NewAttr("container", "bool", false),
|
||||
hclspec.NewLiteral("true"),
|
||||
),
|
||||
"dangling_containers": hclspec.NewDefault(
|
||||
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
|
||||
hclspec.NewLiteral(`{
|
||||
enabled = true
|
||||
period = "5m"
|
||||
creation_grace = "5m"
|
||||
}`),
|
||||
),
|
||||
})), hclspec.NewLiteral(`{
|
||||
image = true
|
||||
container = true
|
||||
dangling_containers = {
|
||||
enabled = true
|
||||
period = "5m"
|
||||
creation_grace = "5m"
|
||||
}
|
||||
}`)),
|
||||
|
||||
// docker volume options
|
||||
@@ -491,6 +523,28 @@ type DockerVolumeDriverConfig struct {
|
||||
Options hclutils.MapStrStr `codec:"options"`
|
||||
}
|
||||
|
||||
// ContainerGCConfig controls the behavior of the GC reconciler to detects
|
||||
// dangling nomad containers that aren't tracked due to docker/nomad bugs
|
||||
type ContainerGCConfig struct {
|
||||
// Enabled controls whether container reconciler is enabled
|
||||
Enabled bool `codec:"enabled"`
|
||||
|
||||
// DryRun indicates that reconciler should log unexpectedly running containers
|
||||
// if found without actually killing them
|
||||
DryRun bool `codec:"dry_run"`
|
||||
|
||||
// PeriodStr controls the frequency of scanning containers
|
||||
PeriodStr string `codec:"period"`
|
||||
period time.Duration `codec:"-"`
|
||||
|
||||
// CreationGraceStr is the duration allowed for a newly created container
|
||||
// to live without being registered as a running task in nomad.
|
||||
// A container is treated as leaked if it lived more than grace duration
|
||||
// and haven't been registered in tasks.
|
||||
CreationGraceStr string `codec:"creation_grace"`
|
||||
CreationGrace time.Duration `codec:"-"`
|
||||
}
|
||||
|
||||
type DriverConfig struct {
|
||||
Endpoint string `codec:"endpoint"`
|
||||
Auth AuthConfig `codec:"auth"`
|
||||
@@ -519,6 +573,8 @@ type GCConfig struct {
|
||||
ImageDelay string `codec:"image_delay"`
|
||||
imageDelayDuration time.Duration `codec:"-"`
|
||||
Container bool `codec:"container"`
|
||||
|
||||
DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
|
||||
}
|
||||
|
||||
type VolumeConfig struct {
|
||||
@@ -534,6 +590,8 @@ func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
|
||||
return configSpec, nil
|
||||
}
|
||||
|
||||
const danglingContainersCreationGraceMinimum = 1 * time.Minute
|
||||
|
||||
func (d *Driver) SetConfig(c *base.Config) error {
|
||||
var config DriverConfig
|
||||
if len(c.PluginConfig) != 0 {
|
||||
@@ -551,6 +609,25 @@ func (d *Driver) SetConfig(c *base.Config) error {
|
||||
d.config.GC.imageDelayDuration = dur
|
||||
}
|
||||
|
||||
if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
|
||||
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse 'period' duration: %v", err)
|
||||
}
|
||||
d.config.GC.DanglingContainers.period = dur
|
||||
}
|
||||
|
||||
if len(d.config.GC.DanglingContainers.CreationGraceStr) > 0 {
|
||||
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationGraceStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse 'creation_grace' duration: %v", err)
|
||||
}
|
||||
if dur < danglingContainersCreationGraceMinimum {
|
||||
return fmt.Errorf("creation_grace is less than minimum, %v", danglingContainersCreationGraceMinimum)
|
||||
}
|
||||
d.config.GC.DanglingContainers.CreationGrace = dur
|
||||
}
|
||||
|
||||
if c.AgentConfig != nil {
|
||||
d.clientConfig = c.AgentConfig.Driver
|
||||
}
|
||||
@@ -568,6 +645,8 @@ func (d *Driver) SetConfig(c *base.Config) error {
|
||||
|
||||
d.coordinator = newDockerCoordinator(coordinatorConfig)
|
||||
|
||||
d.reconciler = newReconciler(d)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -423,3 +423,63 @@ config {
|
||||
|
||||
require.EqualValues(t, expected, tc)
|
||||
}
|
||||
|
||||
// TestConfig_DriverConfig_DanglingContainers asserts that dangling_containers is parsed
|
||||
// and populated with defaults as expected
|
||||
func TestConfig_DriverConfig_DanglingContainers(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
config string
|
||||
expected ContainerGCConfig
|
||||
}{
|
||||
{
|
||||
name: "pure default",
|
||||
config: `{}`,
|
||||
expected: ContainerGCConfig{Enabled: true, PeriodStr: "5m", CreationGraceStr: "5m"},
|
||||
},
|
||||
{
|
||||
name: "partial gc",
|
||||
config: `{ gc { } }`,
|
||||
expected: ContainerGCConfig{Enabled: true, PeriodStr: "5m", CreationGraceStr: "5m"},
|
||||
},
|
||||
{
|
||||
name: "partial gc",
|
||||
config: `{ gc { dangling_containers { } } }`,
|
||||
expected: ContainerGCConfig{Enabled: true, PeriodStr: "5m", CreationGraceStr: "5m"},
|
||||
},
|
||||
{
|
||||
name: "partial dangling_containers",
|
||||
config: `{ gc { dangling_containers { enabled = false } } }`,
|
||||
expected: ContainerGCConfig{Enabled: false, PeriodStr: "5m", CreationGraceStr: "5m"},
|
||||
},
|
||||
{
|
||||
name: "incomplete dangling_containers 2",
|
||||
config: `{ gc { dangling_containers { period = "10m" } } }`,
|
||||
expected: ContainerGCConfig{Enabled: true, PeriodStr: "10m", CreationGraceStr: "5m"},
|
||||
},
|
||||
{
|
||||
name: "full default",
|
||||
config: `{ gc { dangling_containers {
|
||||
enabled = false
|
||||
dry_run = true
|
||||
period = "10m"
|
||||
creation_grace = "20m"
|
||||
}}}`,
|
||||
expected: ContainerGCConfig{
|
||||
Enabled: false,
|
||||
DryRun: true,
|
||||
PeriodStr: "10m",
|
||||
CreationGraceStr: "20m",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
var tc DriverConfig
|
||||
hclutils.NewConfigParser(configSpec).ParseHCL(t, "config "+c.config, &tc)
|
||||
require.EqualValues(t, c.expected, tc.GC.DanglingContainers)
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +66,10 @@ var (
|
||||
nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerLabelAllocID = "com.hashicorp.nomad.alloc_id"
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
// eventer is used to handle multiplexing of TaskEvents calls such that an
|
||||
// event can be broadcast to all callers
|
||||
@@ -108,6 +112,8 @@ type Driver struct {
|
||||
// for use during fingerprinting.
|
||||
detected bool
|
||||
detectedLock sync.RWMutex
|
||||
|
||||
reconciler *containerReconciler
|
||||
}
|
||||
|
||||
// NewDockerDriver returns a docker implementation of a driver plugin
|
||||
@@ -309,6 +315,10 @@ CREATE:
|
||||
// the container is started
|
||||
runningContainer, err := client.InspectContainer(container.ID)
|
||||
if err != nil {
|
||||
client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: container.ID,
|
||||
Force: true,
|
||||
})
|
||||
msg := "failed to inspect started container"
|
||||
d.logger.Error(msg, "error", err)
|
||||
client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
@@ -642,6 +652,15 @@ func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConf
|
||||
return binds, nil
|
||||
}
|
||||
|
||||
var userMountToUnixMount = map[string]string{
|
||||
// Empty string maps to `rprivate` for backwards compatibility in restored
|
||||
// older tasks, where mount propagation will not be present.
|
||||
"": "rprivate",
|
||||
nstructs.VolumeMountPropagationPrivate: "rprivate",
|
||||
nstructs.VolumeMountPropagationHostToTask: "rslave",
|
||||
nstructs.VolumeMountPropagationBidirectional: "rshared",
|
||||
}
|
||||
|
||||
func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
|
||||
imageID string) (docker.CreateContainerOptions, error) {
|
||||
|
||||
@@ -833,13 +852,24 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
|
||||
|
||||
hostConfig.Mounts = append(hostConfig.Mounts, hm)
|
||||
}
|
||||
|
||||
for _, m := range task.Mounts {
|
||||
hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{
|
||||
hm := docker.HostMount{
|
||||
Type: "bind",
|
||||
Target: m.TaskPath,
|
||||
Source: m.HostPath,
|
||||
ReadOnly: m.Readonly,
|
||||
})
|
||||
}
|
||||
|
||||
// MountPropagation is only supported by Docker on Linux:
|
||||
// https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation
|
||||
if runtime.GOOS == "linux" {
|
||||
hm.BindOptions = &docker.BindOptions{
|
||||
Propagation: userMountToUnixMount[m.PropagationMode],
|
||||
}
|
||||
}
|
||||
|
||||
hostConfig.Mounts = append(hostConfig.Mounts, hm)
|
||||
}
|
||||
|
||||
// set DNS search domains and extra hosts
|
||||
@@ -882,7 +912,6 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
|
||||
|
||||
// Setup port mapping and exposed ports
|
||||
if len(task.Resources.NomadResources.Networks) == 0 {
|
||||
logger.Debug("no network interfaces are available")
|
||||
if len(driverConfig.PortMap) > 0 {
|
||||
return c, fmt.Errorf("Trying to map ports but no network interface is available")
|
||||
}
|
||||
@@ -957,9 +986,16 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
|
||||
|
||||
if len(driverConfig.Labels) > 0 {
|
||||
config.Labels = driverConfig.Labels
|
||||
logger.Debug("applied labels on the container", "labels", config.Labels)
|
||||
}
|
||||
|
||||
labels := make(map[string]string, len(driverConfig.Labels)+1)
|
||||
for k, v := range driverConfig.Labels {
|
||||
labels[k] = v
|
||||
}
|
||||
labels[dockerLabelAllocID] = task.AllocID
|
||||
config.Labels = labels
|
||||
logger.Debug("applied labels on the container", "labels", config.Labels)
|
||||
|
||||
config.Env = task.EnvList()
|
||||
|
||||
containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)
|
||||
|
||||
@@ -905,7 +905,8 @@ func TestDockerDriver_Labels(t *testing.T) {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
require.Equal(t, 2, len(container.Config.Labels))
|
||||
// expect to see 1 additional standard labels
|
||||
require.Equal(t, len(cfg.Labels)+1, len(container.Config.Labels))
|
||||
for k, v := range cfg.Labels {
|
||||
require.Equal(t, v, container.Config.Labels[k])
|
||||
}
|
||||
@@ -1008,6 +1009,56 @@ func TestDockerDriver_CreateContainerConfig(t *testing.T) {
|
||||
require.Equal(t, containerName, c.Name)
|
||||
}
|
||||
|
||||
func TestDockerDriver_CreateContainerConfig_User(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
task, cfg, _ := dockerTask(t)
|
||||
task.User = "random-user-1"
|
||||
|
||||
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
|
||||
|
||||
dh := dockerDriverHarness(t, nil)
|
||||
driver := dh.Impl().(*Driver)
|
||||
|
||||
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, task.User, c.Config.User)
|
||||
}
|
||||
|
||||
func TestDockerDriver_CreateContainerConfig_Labels(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
task, cfg, _ := dockerTask(t)
|
||||
task.AllocID = uuid.Generate()
|
||||
task.JobName = "redis-demo-job"
|
||||
|
||||
cfg.Labels = map[string]string{
|
||||
"user_label": "user_value",
|
||||
|
||||
// com.hashicorp.nomad. labels are reserved and
|
||||
// cannot be overridden
|
||||
"com.hashicorp.nomad.alloc_id": "bad_value",
|
||||
}
|
||||
|
||||
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
|
||||
|
||||
dh := dockerDriverHarness(t, nil)
|
||||
driver := dh.Impl().(*Driver)
|
||||
|
||||
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedLabels := map[string]string{
|
||||
// user provided labels
|
||||
"user_label": "user_value",
|
||||
// default labels
|
||||
"com.hashicorp.nomad.alloc_id": task.AllocID,
|
||||
}
|
||||
|
||||
require.Equal(t, expectedLabels, c.Config.Labels)
|
||||
}
|
||||
|
||||
func TestDockerDriver_CreateContainerConfig_Logging(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -598,22 +599,32 @@ func TestDockerDriver_CreateContainerConfig_MountsCombined(t *testing.T) {
|
||||
|
||||
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedMounts := []docker.HostMount{
|
||||
{
|
||||
Type: "bind",
|
||||
Source: "/tmp/cfg-mount",
|
||||
Target: "/container/tmp/cfg-mount",
|
||||
ReadOnly: false,
|
||||
BindOptions: &docker.BindOptions{},
|
||||
Type: "bind",
|
||||
Source: "/tmp/cfg-mount",
|
||||
Target: "/container/tmp/cfg-mount",
|
||||
ReadOnly: false,
|
||||
BindOptions: &docker.BindOptions{
|
||||
Propagation: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "bind",
|
||||
Source: "/tmp/task-mount",
|
||||
Target: "/container/tmp/task-mount",
|
||||
ReadOnly: true,
|
||||
BindOptions: &docker.BindOptions{
|
||||
Propagation: "rprivate",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if runtime.GOOS != "linux" {
|
||||
expectedMounts[0].BindOptions = &docker.BindOptions{}
|
||||
expectedMounts[1].BindOptions = &docker.BindOptions{}
|
||||
}
|
||||
|
||||
foundMounts := c.HostConfig.Mounts
|
||||
sort.Slice(foundMounts, func(i, j int) bool {
|
||||
return foundMounts[i].Target < foundMounts[j].Target
|
||||
|
||||
@@ -13,6 +13,10 @@ import (
|
||||
)
|
||||
|
||||
func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
|
||||
// start reconciler when we start fingerprinting
|
||||
// this is the only method called when driver is launched properly
|
||||
d.reconciler.Start()
|
||||
|
||||
ch := make(chan *drivers.Fingerprint)
|
||||
go d.handleFingerprint(ctx, ch)
|
||||
return ch, nil
|
||||
|
||||
228
drivers/docker/reconciler.go
Normal file
228
drivers/docker/reconciler.go
Normal file
@@ -0,0 +1,228 @@
|
||||
package docker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
hclog "github.com/hashicorp/go-hclog"
|
||||
)
|
||||
|
||||
// containerReconciler detects and kills unexpectedly running containers.
|
||||
//
|
||||
// Due to Docker architecture and network based communication, it is
|
||||
// possible for Docker to start a container successfully, but have the
|
||||
// creation API call fail with a network error. containerReconciler
|
||||
// scans for these untracked containers and kill them.
|
||||
type containerReconciler struct {
|
||||
ctx context.Context
|
||||
config *ContainerGCConfig
|
||||
client *docker.Client
|
||||
logger hclog.Logger
|
||||
|
||||
isDriverHealthy func() bool
|
||||
trackedContainers func() map[string]bool
|
||||
isNomadContainer func(c docker.APIContainers) bool
|
||||
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
func newReconciler(d *Driver) *containerReconciler {
|
||||
return &containerReconciler{
|
||||
ctx: d.ctx,
|
||||
config: &d.config.GC.DanglingContainers,
|
||||
client: client,
|
||||
logger: d.logger,
|
||||
|
||||
isDriverHealthy: func() bool { return d.previouslyDetected() && d.fingerprintSuccessful() },
|
||||
trackedContainers: d.trackedContainers,
|
||||
isNomadContainer: isNomadContainer,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *containerReconciler) Start() {
|
||||
if !r.config.Enabled {
|
||||
r.logger.Debug("skipping dangling containers handling; is disabled")
|
||||
return
|
||||
}
|
||||
|
||||
r.once.Do(func() {
|
||||
go r.removeDanglingContainersGoroutine()
|
||||
})
|
||||
}
|
||||
|
||||
func (r *containerReconciler) removeDanglingContainersGoroutine() {
|
||||
period := r.config.period
|
||||
|
||||
lastIterSucceeded := true
|
||||
|
||||
// ensure that we wait for at least a period or creation timeout
|
||||
// for first container GC iteration
|
||||
// The initial period is a grace period for restore allocation
|
||||
// before a driver may kill containers launched by an earlier nomad
|
||||
// process.
|
||||
initialDelay := period
|
||||
if r.config.CreationGrace > initialDelay {
|
||||
initialDelay = r.config.CreationGrace
|
||||
}
|
||||
|
||||
timer := time.NewTimer(initialDelay)
|
||||
for {
|
||||
select {
|
||||
case <-timer.C:
|
||||
if r.isDriverHealthy() {
|
||||
err := r.removeDanglingContainersIteration()
|
||||
if err != nil && lastIterSucceeded {
|
||||
r.logger.Warn("failed to remove dangling containers", "error", err)
|
||||
}
|
||||
lastIterSucceeded = (err == nil)
|
||||
}
|
||||
|
||||
timer.Reset(period)
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *containerReconciler) removeDanglingContainersIteration() error {
|
||||
cutoff := time.Now().Add(-r.config.CreationGrace)
|
||||
tracked := r.trackedContainers()
|
||||
untracked, err := r.untrackedContainers(tracked, cutoff)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to find untracked containers: %v", err)
|
||||
}
|
||||
|
||||
if len(untracked) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.config.DryRun {
|
||||
r.logger.Info("detected untracked containers", "container_ids", untracked)
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, id := range untracked {
|
||||
ctx, cancel := r.dockerAPIQueryContext()
|
||||
err := client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
Context: ctx,
|
||||
ID: id,
|
||||
Force: true,
|
||||
})
|
||||
cancel()
|
||||
if err != nil {
|
||||
r.logger.Warn("failed to remove untracked container", "container_id", id, "error", err)
|
||||
} else {
|
||||
r.logger.Info("removed untracked container", "container_id", id)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// untrackedContainers returns the ids of containers that suspected
|
||||
// to have been started by Nomad but aren't tracked by this driver
|
||||
func (r *containerReconciler) untrackedContainers(tracked map[string]bool, cutoffTime time.Time) ([]string, error) {
|
||||
result := []string{}
|
||||
|
||||
ctx, cancel := r.dockerAPIQueryContext()
|
||||
defer cancel()
|
||||
|
||||
cc, err := client.ListContainers(docker.ListContainersOptions{
|
||||
Context: ctx,
|
||||
All: false, // only reconcile running containers
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list containers: %v", err)
|
||||
}
|
||||
|
||||
cutoff := cutoffTime.Unix()
|
||||
|
||||
for _, c := range cc {
|
||||
if tracked[c.ID] {
|
||||
continue
|
||||
}
|
||||
|
||||
if c.Created > cutoff {
|
||||
continue
|
||||
}
|
||||
|
||||
if !r.isNomadContainer(c) {
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, c.ID)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// dockerAPIQueryTimeout returns a context for docker API response with an appropriate timeout
|
||||
// to protect against wedged locked-up API call.
|
||||
//
|
||||
// We'll try hitting Docker API on subsequent iteration.
|
||||
func (r *containerReconciler) dockerAPIQueryContext() (context.Context, context.CancelFunc) {
|
||||
// use a reasoanble floor to avoid very small limit
|
||||
timeout := 30 * time.Second
|
||||
|
||||
if timeout < r.config.period {
|
||||
timeout = r.config.period
|
||||
}
|
||||
|
||||
return context.WithTimeout(context.Background(), timeout)
|
||||
}
|
||||
|
||||
func isNomadContainer(c docker.APIContainers) bool {
|
||||
if _, ok := c.Labels[dockerLabelAllocID]; ok {
|
||||
return true
|
||||
}
|
||||
|
||||
// pre-0.10 containers aren't tagged or labeled in any way,
|
||||
// so use cheap heuristic based on mount paths
|
||||
// before inspecting container details
|
||||
if !hasMount(c, "/alloc") ||
|
||||
!hasMount(c, "/local") ||
|
||||
!hasMount(c, "/secrets") ||
|
||||
!hasNomadName(c) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func hasMount(c docker.APIContainers, p string) bool {
|
||||
for _, m := range c.Mounts {
|
||||
if m.Destination == p {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
var nomadContainerNamePattern = regexp.MustCompile(`\/.*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)
|
||||
|
||||
func hasNomadName(c docker.APIContainers) bool {
|
||||
for _, n := range c.Names {
|
||||
if nomadContainerNamePattern.MatchString(n) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *Driver) trackedContainers() map[string]bool {
|
||||
d.tasks.lock.RLock()
|
||||
defer d.tasks.lock.RUnlock()
|
||||
|
||||
r := make(map[string]bool, len(d.tasks.store))
|
||||
for _, h := range d.tasks.store {
|
||||
r[h.containerID] = true
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
202
drivers/docker/reconciler_test.go
Normal file
202
drivers/docker/reconciler_test.go
Normal file
@@ -0,0 +1,202 @@
|
||||
package docker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
"github.com/hashicorp/nomad/client/testutil"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func fakeContainerList(t *testing.T) (nomadContainer, nonNomadContainer docker.APIContainers) {
|
||||
path := "./test-resources/docker/reconciler_containers_list.json"
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open file: %v", err)
|
||||
}
|
||||
|
||||
var sampleContainerList []docker.APIContainers
|
||||
err = json.NewDecoder(f).Decode(&sampleContainerList)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to decode container list: %v", err)
|
||||
}
|
||||
|
||||
return sampleContainerList[0], sampleContainerList[1]
|
||||
}
|
||||
|
||||
func Test_HasMount(t *testing.T) {
|
||||
nomadContainer, nonNomadContainer := fakeContainerList(t)
|
||||
|
||||
require.True(t, hasMount(nomadContainer, "/alloc"))
|
||||
require.True(t, hasMount(nomadContainer, "/data"))
|
||||
require.True(t, hasMount(nomadContainer, "/secrets"))
|
||||
require.False(t, hasMount(nomadContainer, "/random"))
|
||||
|
||||
require.False(t, hasMount(nonNomadContainer, "/alloc"))
|
||||
require.False(t, hasMount(nonNomadContainer, "/data"))
|
||||
require.False(t, hasMount(nonNomadContainer, "/secrets"))
|
||||
require.False(t, hasMount(nonNomadContainer, "/random"))
|
||||
}
|
||||
|
||||
func Test_HasNomadName(t *testing.T) {
|
||||
nomadContainer, nonNomadContainer := fakeContainerList(t)
|
||||
|
||||
require.True(t, hasNomadName(nomadContainer))
|
||||
require.False(t, hasNomadName(nonNomadContainer))
|
||||
}
|
||||
|
||||
// TestDanglingContainerRemoval asserts containers without corresponding tasks
|
||||
// are removed after the creation grace period.
|
||||
func TestDanglingContainerRemoval(t *testing.T) {
|
||||
testutil.DockerCompatible(t)
|
||||
|
||||
// start two containers: one tracked nomad container, and one unrelated container
|
||||
task, cfg, _ := dockerTask(t)
|
||||
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
|
||||
|
||||
client, d, handle, cleanup := dockerSetup(t, task)
|
||||
defer cleanup()
|
||||
require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second))
|
||||
|
||||
nonNomadContainer, err := client.CreateContainer(docker.CreateContainerOptions{
|
||||
Name: "mytest-image-" + uuid.Generate(),
|
||||
Config: &docker.Config{
|
||||
Image: cfg.Image,
|
||||
Cmd: append([]string{cfg.Command}, cfg.Args...),
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: nonNomadContainer.ID,
|
||||
Force: true,
|
||||
})
|
||||
|
||||
err = client.StartContainer(nonNomadContainer.ID, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
untrackedNomadContainer, err := client.CreateContainer(docker.CreateContainerOptions{
|
||||
Name: "mytest-image-" + uuid.Generate(),
|
||||
Config: &docker.Config{
|
||||
Image: cfg.Image,
|
||||
Cmd: append([]string{cfg.Command}, cfg.Args...),
|
||||
Labels: map[string]string{
|
||||
dockerLabelAllocID: uuid.Generate(),
|
||||
},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: untrackedNomadContainer.ID,
|
||||
Force: true,
|
||||
})
|
||||
|
||||
err = client.StartContainer(untrackedNomadContainer.ID, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
dd := d.Impl().(*Driver)
|
||||
|
||||
reconciler := newReconciler(dd)
|
||||
trackedContainers := map[string]bool{handle.containerID: true}
|
||||
|
||||
tf := reconciler.trackedContainers()
|
||||
require.Contains(t, tf, handle.containerID)
|
||||
require.NotContains(t, tf, untrackedNomadContainer)
|
||||
require.NotContains(t, tf, nonNomadContainer.ID)
|
||||
|
||||
// assert tracked containers should never be untracked
|
||||
untracked, err := reconciler.untrackedContainers(trackedContainers, time.Now())
|
||||
require.NoError(t, err)
|
||||
require.NotContains(t, untracked, handle.containerID)
|
||||
require.NotContains(t, untracked, nonNomadContainer.ID)
|
||||
require.Contains(t, untracked, untrackedNomadContainer.ID)
|
||||
|
||||
// assert we recognize nomad containers with appropriate cutoff
|
||||
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now())
|
||||
require.NoError(t, err)
|
||||
require.Contains(t, untracked, handle.containerID)
|
||||
require.Contains(t, untracked, untrackedNomadContainer.ID)
|
||||
require.NotContains(t, untracked, nonNomadContainer.ID)
|
||||
|
||||
// but ignore if creation happened before cutoff
|
||||
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now().Add(-1*time.Minute))
|
||||
require.NoError(t, err)
|
||||
require.NotContains(t, untracked, handle.containerID)
|
||||
require.NotContains(t, untracked, untrackedNomadContainer.ID)
|
||||
require.NotContains(t, untracked, nonNomadContainer.ID)
|
||||
|
||||
// a full integration tests to assert that containers are removed
|
||||
prestineDriver := dockerDriverHarness(t, nil).Impl().(*Driver)
|
||||
prestineDriver.config.GC.DanglingContainers = ContainerGCConfig{
|
||||
Enabled: true,
|
||||
period: 1 * time.Second,
|
||||
CreationGrace: 0 * time.Second,
|
||||
}
|
||||
nReconciler := newReconciler(prestineDriver)
|
||||
|
||||
require.NoError(t, nReconciler.removeDanglingContainersIteration())
|
||||
|
||||
_, err = client.InspectContainer(nonNomadContainer.ID)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = client.InspectContainer(handle.containerID)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), NoSuchContainerError)
|
||||
|
||||
_, err = client.InspectContainer(untrackedNomadContainer.ID)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), NoSuchContainerError)
|
||||
}
|
||||
|
||||
// TestDanglingContainerRemoval_Stopped asserts stopped containers without
|
||||
// corresponding tasks are not removed even if after creation grace period.
|
||||
func TestDanglingContainerRemoval_Stopped(t *testing.T) {
|
||||
testutil.DockerCompatible(t)
|
||||
|
||||
_, cfg, _ := dockerTask(t)
|
||||
|
||||
client := newTestDockerClient(t)
|
||||
container, err := client.CreateContainer(docker.CreateContainerOptions{
|
||||
Name: "mytest-image-" + uuid.Generate(),
|
||||
Config: &docker.Config{
|
||||
Image: cfg.Image,
|
||||
Cmd: append([]string{cfg.Command}, cfg.Args...),
|
||||
Labels: map[string]string{
|
||||
dockerLabelAllocID: uuid.Generate(),
|
||||
},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: container.ID,
|
||||
Force: true,
|
||||
})
|
||||
|
||||
err = client.StartContainer(container.ID, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.StopContainer(container.ID, 60)
|
||||
require.NoError(t, err)
|
||||
|
||||
dd := dockerDriverHarness(t, nil).Impl().(*Driver)
|
||||
reconciler := newReconciler(dd)
|
||||
|
||||
// assert nomad container is tracked, and we ignore stopped one
|
||||
tf := reconciler.trackedContainers()
|
||||
require.NotContains(t, tf, container.ID)
|
||||
|
||||
untracked, err := reconciler.untrackedContainers(map[string]bool{}, time.Now())
|
||||
require.NoError(t, err)
|
||||
require.NotContains(t, untracked, container.ID)
|
||||
|
||||
// if we start container again, it'll be marked as untracked
|
||||
require.NoError(t, client.StartContainer(container.ID, nil))
|
||||
|
||||
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now())
|
||||
require.NoError(t, err)
|
||||
require.Contains(t, untracked, container.ID)
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
[
|
||||
{
|
||||
"Id": "eb23be71498c2dc0254c029f32b360a000caf33157d1c93e226f4c1a4c9d2218",
|
||||
"Names": [
|
||||
"/redis-72bfa388-024e-a903-45b8-2bc28b74ed69"
|
||||
],
|
||||
"Image": "redis:3.2",
|
||||
"ImageID": "sha256:87856cc39862cec77541d68382e4867d7ccb29a85a17221446c857ddaebca916",
|
||||
"Command": "docker-entrypoint.sh redis-server",
|
||||
"Created": 1568383081,
|
||||
"Ports": [
|
||||
{
|
||||
"PrivatePort": 6379,
|
||||
"Type": "tcp"
|
||||
}
|
||||
],
|
||||
"Labels": {},
|
||||
"State": "running",
|
||||
"Status": "Up 9 seconds",
|
||||
"HostConfig": {
|
||||
"NetworkMode": "default"
|
||||
},
|
||||
"NetworkSettings": {
|
||||
"Networks": {
|
||||
"bridge": {
|
||||
"IPAMConfig": null,
|
||||
"Links": null,
|
||||
"Aliases": null,
|
||||
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
|
||||
"EndpointID": "ed830b4f2f33ab4134aea941611b00b9e576b35a4325d52bacfedd1e2e1ba213",
|
||||
"Gateway": "172.17.0.1",
|
||||
"IPAddress": "172.17.0.3",
|
||||
"IPPrefixLen": 16,
|
||||
"IPv6Gateway": "",
|
||||
"GlobalIPv6Address": "",
|
||||
"GlobalIPv6PrefixLen": 0,
|
||||
"MacAddress": "02:42:ac:11:00:03",
|
||||
"DriverOpts": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"Mounts": [
|
||||
{
|
||||
"Type": "bind",
|
||||
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/alloc",
|
||||
"Destination": "/alloc",
|
||||
"Mode": "",
|
||||
"RW": true,
|
||||
"Propagation": "rprivate"
|
||||
},
|
||||
{
|
||||
"Type": "volume",
|
||||
"Name": "d5d7f0f9a3326414257c57cfca01db96c53a424b43e251516511694554309681",
|
||||
"Source": "",
|
||||
"Destination": "/data",
|
||||
"Driver": "local",
|
||||
"Mode": "",
|
||||
"RW": true,
|
||||
"Propagation": ""
|
||||
},
|
||||
{
|
||||
"Type": "bind",
|
||||
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/local",
|
||||
"Destination": "/local",
|
||||
"Mode": "",
|
||||
"RW": true,
|
||||
"Propagation": "rprivate"
|
||||
},
|
||||
{
|
||||
"Type": "bind",
|
||||
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/secrets",
|
||||
"Destination": "/secrets",
|
||||
"Mode": "",
|
||||
"RW": true,
|
||||
"Propagation": "rprivate"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Id": "99c49fbe999f6df7b7d6a891d69fe57d7b771a30d5d2899a922b44698084e5c9",
|
||||
"Names": [
|
||||
"/serene_keller"
|
||||
],
|
||||
"Image": "ubuntu:16.04",
|
||||
"ImageID": "sha256:9361ce633ff193349d54bed380a5afe86043b09fd6ea8da7549dbbedfc2a7077",
|
||||
"Command": "/bin/bash",
|
||||
"Created": 1567795217,
|
||||
"Ports": [],
|
||||
"Labels": {},
|
||||
"State": "running",
|
||||
"Status": "Up 6 days",
|
||||
"HostConfig": {
|
||||
"NetworkMode": "default"
|
||||
},
|
||||
"NetworkSettings": {
|
||||
"Networks": {
|
||||
"bridge": {
|
||||
"IPAMConfig": null,
|
||||
"Links": null,
|
||||
"Aliases": null,
|
||||
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
|
||||
"EndpointID": "fab83a0d4089ca9944ca53c882bdf40ad310c6fda30dda0092731feb9bc9fab6",
|
||||
"Gateway": "172.17.0.1",
|
||||
"IPAddress": "172.17.0.2",
|
||||
"IPPrefixLen": 16,
|
||||
"IPv6Gateway": "",
|
||||
"GlobalIPv6Address": "",
|
||||
"GlobalIPv6PrefixLen": 0,
|
||||
"MacAddress": "02:42:ac:11:00:02",
|
||||
"DriverOpts": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"Mounts": []
|
||||
}
|
||||
]
|
||||
@@ -813,6 +813,15 @@ func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
var userMountToUnixMount = map[string]int{
|
||||
// Empty string maps to `rprivate` for backwards compatibility in restored
|
||||
// older tasks, where mount propagation will not be present.
|
||||
"": unix.MS_PRIVATE | unix.MS_REC, // rprivate
|
||||
structs.VolumeMountPropagationPrivate: unix.MS_PRIVATE | unix.MS_REC, // rprivate
|
||||
structs.VolumeMountPropagationHostToTask: unix.MS_SLAVE | unix.MS_REC, // rslave
|
||||
structs.VolumeMountPropagationBidirectional: unix.MS_SHARED | unix.MS_REC, // rshared
|
||||
}
|
||||
|
||||
// cmdMounts converts a list of driver.MountConfigs into excutor.Mounts.
|
||||
func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
|
||||
if len(mounts) == 0 {
|
||||
@@ -826,11 +835,13 @@ func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount {
|
||||
if m.Readonly {
|
||||
flags |= unix.MS_RDONLY
|
||||
}
|
||||
|
||||
r[i] = &lconfigs.Mount{
|
||||
Source: m.HostPath,
|
||||
Destination: m.TaskPath,
|
||||
Device: "bind",
|
||||
Flags: flags,
|
||||
Source: m.HostPath,
|
||||
Destination: m.TaskPath,
|
||||
Device: "bind",
|
||||
Flags: flags,
|
||||
PropagationFlags: []int{userMountToUnixMount[m.PropagationMode]},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -467,16 +467,18 @@ func TestExecutor_cmdMounts(t *testing.T) {
|
||||
|
||||
expected := []*lconfigs.Mount{
|
||||
{
|
||||
Source: "/host/path-ro",
|
||||
Destination: "/task/path-ro",
|
||||
Flags: unix.MS_BIND | unix.MS_RDONLY,
|
||||
Device: "bind",
|
||||
Source: "/host/path-ro",
|
||||
Destination: "/task/path-ro",
|
||||
Flags: unix.MS_BIND | unix.MS_RDONLY,
|
||||
Device: "bind",
|
||||
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
|
||||
},
|
||||
{
|
||||
Source: "/host/path-rw",
|
||||
Destination: "/task/path-rw",
|
||||
Flags: unix.MS_BIND,
|
||||
Device: "bind",
|
||||
Source: "/host/path-rw",
|
||||
Destination: "/task/path-rw",
|
||||
Flags: unix.MS_BIND,
|
||||
Device: "bind",
|
||||
PropagationFlags: []int{unix.MS_PRIVATE | unix.MS_REC},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -1,20 +1,24 @@
|
||||
job "test1" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
affinity {
|
||||
attribute ="${meta.rack}"
|
||||
operator = "="
|
||||
value = "r1"
|
||||
weight = -50
|
||||
attribute = "${meta.rack}"
|
||||
operator = "="
|
||||
value = "r1"
|
||||
weight = -50
|
||||
}
|
||||
|
||||
group "test1" {
|
||||
count = 4
|
||||
|
||||
affinity {
|
||||
attribute ="${node.datacenter}"
|
||||
operator = "="
|
||||
value = "dc1"
|
||||
weight = -50
|
||||
attribute = "${node.datacenter}"
|
||||
operator = "="
|
||||
value = "dc1"
|
||||
weight = -50
|
||||
}
|
||||
|
||||
task "test" {
|
||||
driver = "raw_exec"
|
||||
|
||||
@@ -24,4 +28,4 @@ job "test1" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,20 +1,24 @@
|
||||
job "test1" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
affinity {
|
||||
attribute ="${meta.rack}"
|
||||
operator = "="
|
||||
value = "r1"
|
||||
weight = 100
|
||||
attribute = "${meta.rack}"
|
||||
operator = "="
|
||||
value = "r1"
|
||||
weight = 100
|
||||
}
|
||||
|
||||
group "test1" {
|
||||
count = 4
|
||||
|
||||
affinity {
|
||||
attribute ="${node.datacenter}"
|
||||
operator = "="
|
||||
value = "dc1"
|
||||
weight = 100
|
||||
attribute = "${node.datacenter}"
|
||||
operator = "="
|
||||
value = "dc1"
|
||||
weight = 100
|
||||
}
|
||||
|
||||
task "test" {
|
||||
driver = "raw_exec"
|
||||
|
||||
@@ -24,4 +28,4 @@ job "test1" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
job "test1" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
group "test1" {
|
||||
count = 5
|
||||
|
||||
affinity {
|
||||
attribute ="${node.datacenter}"
|
||||
operator = "="
|
||||
value = "dc1"
|
||||
weight = 100
|
||||
attribute = "${node.datacenter}"
|
||||
operator = "="
|
||||
value = "dc1"
|
||||
weight = 100
|
||||
}
|
||||
|
||||
task "test" {
|
||||
driver = "raw_exec"
|
||||
|
||||
@@ -19,4 +21,4 @@ job "test1" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,15 +3,15 @@ job "test_raw" {
|
||||
type = "service"
|
||||
|
||||
group "test" {
|
||||
count = 1
|
||||
count = 1
|
||||
|
||||
task "test1" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "bash"
|
||||
args = ["-c", "var=10000;while true; do a=$(awk -v x=$var 'BEGIN{print sqrt(x)}'); ((var++)); done" ]
|
||||
}
|
||||
args = ["-c", "var=10000;while true; do a=$(awk -v x=$var 'BEGIN{print sqrt(x)}'); ((var++)); done"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,13 +4,16 @@
|
||||
|
||||
job "restarter" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
group "restarter" {
|
||||
restart {
|
||||
attempts = 100
|
||||
delay = "3s"
|
||||
}
|
||||
|
||||
task "restarter" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/bash"
|
||||
args = ["-c", "echo $$ >> pid && sleep 1 && exit 99"]
|
||||
|
||||
@@ -3,8 +3,10 @@
|
||||
|
||||
job "sleeper" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
task "sleeper" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/bash"
|
||||
args = ["-c", "echo $$ >> pid && sleep 999999"]
|
||||
|
||||
@@ -13,8 +13,8 @@ job "consul_canary_test" {
|
||||
}
|
||||
|
||||
service {
|
||||
name = "canarytest"
|
||||
tags = ["foo", "bar"]
|
||||
name = "canarytest"
|
||||
tags = ["foo", "bar"]
|
||||
canary_tags = ["foo", "canary"]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,30 +1,31 @@
|
||||
job "consul-example" {
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "3m"
|
||||
max_parallel = 1
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "3m"
|
||||
progress_deadline = "10m"
|
||||
auto_revert = false
|
||||
canary = 0
|
||||
auto_revert = false
|
||||
canary = 0
|
||||
}
|
||||
|
||||
migrate {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
}
|
||||
|
||||
group "cache" {
|
||||
count = 3
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
|
||||
ephemeral_disk {
|
||||
@@ -33,18 +34,22 @@ job "consul-example" {
|
||||
|
||||
task "redis" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "redis:3.2"
|
||||
|
||||
port_map {
|
||||
db = 6379
|
||||
}
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500 # 500 MHz
|
||||
memory = 256 # 256MB
|
||||
|
||||
network {
|
||||
mbits = 10
|
||||
port "db" {}
|
||||
port "db" {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,6 +57,7 @@ job "consul-example" {
|
||||
name = "redis-cache"
|
||||
tags = ["global", "cache"]
|
||||
port = "db"
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "tcp"
|
||||
@@ -61,4 +67,4 @@ job "consul-example" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,21 +7,22 @@ job "deployment_auto.nomad" {
|
||||
update {
|
||||
max_parallel = 3
|
||||
auto_promote = true
|
||||
canary = 2
|
||||
canary = 2
|
||||
}
|
||||
|
||||
task "one" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
# change args to update the job, the only changes
|
||||
args = ["1000000"]
|
||||
command = "/bin/sleep"
|
||||
|
||||
# change args to update the job, the only changes
|
||||
args = ["1000000"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 20
|
||||
memory = 20
|
||||
cpu = 20
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -30,9 +31,9 @@ job "deployment_auto.nomad" {
|
||||
count = 3
|
||||
|
||||
update {
|
||||
max_parallel = 2
|
||||
auto_promote = true
|
||||
canary = 2
|
||||
max_parallel = 2
|
||||
auto_promote = true
|
||||
canary = 2
|
||||
min_healthy_time = "2s"
|
||||
}
|
||||
|
||||
@@ -40,14 +41,15 @@ job "deployment_auto.nomad" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
# change args to update the job, the only changes
|
||||
args = ["2000000"]
|
||||
command = "/bin/sleep"
|
||||
|
||||
# change args to update the job, the only changes
|
||||
args = ["2000000"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 20
|
||||
memory = 20
|
||||
cpu = 20
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,21 +7,22 @@ job "deployment_auto.nomad" {
|
||||
update {
|
||||
max_parallel = 3
|
||||
auto_promote = true
|
||||
canary = 2
|
||||
canary = 2
|
||||
}
|
||||
|
||||
task "one" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
# change args to update the job, the only changes
|
||||
args = ["1000001"]
|
||||
command = "/bin/sleep"
|
||||
|
||||
# change args to update the job, the only changes
|
||||
args = ["1000001"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 20
|
||||
memory = 20
|
||||
cpu = 20
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -30,9 +31,9 @@ job "deployment_auto.nomad" {
|
||||
count = 3
|
||||
|
||||
update {
|
||||
max_parallel = 2
|
||||
auto_promote = true
|
||||
canary = 2
|
||||
max_parallel = 2
|
||||
auto_promote = true
|
||||
canary = 2
|
||||
min_healthy_time = "2s"
|
||||
}
|
||||
|
||||
@@ -40,14 +41,15 @@ job "deployment_auto.nomad" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
# change args to update the job, the only changes
|
||||
args = ["2000001"]
|
||||
command = "/bin/sleep"
|
||||
|
||||
# change args to update the job, the only changes
|
||||
args = ["2000001"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 20
|
||||
memory = 20
|
||||
cpu = 20
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,23 +1,27 @@
|
||||
job "fabio" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "system"
|
||||
type = "system"
|
||||
|
||||
group "fabio" {
|
||||
task "fabio" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "fabiolb/fabio"
|
||||
image = "fabiolb/fabio"
|
||||
network_mode = "host"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
|
||||
network {
|
||||
mbits = 20
|
||||
|
||||
port "lb" {
|
||||
static = 9999
|
||||
}
|
||||
|
||||
port "ui" {
|
||||
static = 9998
|
||||
}
|
||||
|
||||
@@ -1,27 +1,33 @@
|
||||
job "cpustress" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "batch"
|
||||
type = "batch"
|
||||
|
||||
group "cpustress" {
|
||||
count = 1
|
||||
|
||||
restart {
|
||||
mode = "fail"
|
||||
mode = "fail"
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
reschedule {
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
unlimited = false
|
||||
}
|
||||
|
||||
task "cpustress" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "progrium/stress"
|
||||
args = [
|
||||
"-c", "4",
|
||||
"-t", "600"
|
||||
]
|
||||
|
||||
args = [
|
||||
"-c",
|
||||
"4",
|
||||
"-t",
|
||||
"600",
|
||||
]
|
||||
}
|
||||
|
||||
resources {
|
||||
@@ -30,4 +36,4 @@ job "cpustress" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,32 +1,39 @@
|
||||
job "diskstress" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "batch"
|
||||
type = "batch"
|
||||
|
||||
group "diskstress" {
|
||||
count = 1
|
||||
|
||||
restart {
|
||||
mode = "fail"
|
||||
mode = "fail"
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
reschedule {
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
unlimited = false
|
||||
}
|
||||
|
||||
task "diskstress" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "progrium/stress"
|
||||
args = [
|
||||
"-d", "2",
|
||||
"-t", "30"
|
||||
]
|
||||
|
||||
args = [
|
||||
"-d",
|
||||
"2",
|
||||
"-t",
|
||||
"30",
|
||||
]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 4096
|
||||
memory = 256
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,13 +2,12 @@ job "hello" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
max_parallel = 1
|
||||
min_healthy_time = "15s"
|
||||
auto_revert = true
|
||||
auto_revert = true
|
||||
}
|
||||
|
||||
group "hello" {
|
||||
|
||||
count = 3
|
||||
|
||||
task "hello" {
|
||||
@@ -19,17 +18,18 @@ job "hello" {
|
||||
}
|
||||
|
||||
artifact {
|
||||
source = "https://nomad-community-demo.s3.amazonaws.com/hellov1"
|
||||
source = "https://nomad-community-demo.s3.amazonaws.com/hellov1"
|
||||
destination = "local/hello"
|
||||
mode = "file"
|
||||
mode = "file"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 256
|
||||
|
||||
network {
|
||||
mbits = 10
|
||||
port "web" {}
|
||||
port "web" {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ job "hello" {
|
||||
name = "hello"
|
||||
tags = ["urlprefix-hello/"]
|
||||
port = "web"
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
|
||||
@@ -1,32 +1,39 @@
|
||||
job "memstress" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "batch"
|
||||
type = "batch"
|
||||
|
||||
group "memstress" {
|
||||
count = 1
|
||||
|
||||
restart {
|
||||
mode = "fail"
|
||||
mode = "fail"
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
reschedule {
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
unlimited = false
|
||||
}
|
||||
|
||||
task "memstress" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "progrium/stress"
|
||||
args = [
|
||||
"-m", "2",
|
||||
"-t", "120"
|
||||
]
|
||||
|
||||
args = [
|
||||
"-m",
|
||||
"2",
|
||||
"-t",
|
||||
"120",
|
||||
]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 4096
|
||||
memory = 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,26 +3,31 @@ job "redis" {
|
||||
|
||||
group "cache" {
|
||||
count = 4
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
min_healthy_time = "5s"
|
||||
healthy_deadline = "30s"
|
||||
max_parallel = 1
|
||||
min_healthy_time = "5s"
|
||||
healthy_deadline = "30s"
|
||||
progress_deadline = "1m"
|
||||
}
|
||||
|
||||
restart {
|
||||
mode = "fail"
|
||||
mode = "fail"
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
reschedule {
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
unlimited = false
|
||||
}
|
||||
|
||||
task "redis" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "redis:4.0"
|
||||
|
||||
port_map {
|
||||
db = 6379
|
||||
}
|
||||
@@ -31,9 +36,10 @@ job "redis" {
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 256
|
||||
|
||||
network {
|
||||
mbits = 10
|
||||
port "db" {}
|
||||
port "db" {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,6 +47,7 @@ job "redis" {
|
||||
name = "redis-cache"
|
||||
tags = ["global", "cache"]
|
||||
port = "db"
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "tcp"
|
||||
@@ -50,4 +57,4 @@ job "redis" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
job "nginx" {
|
||||
datacenters = ["dc1"]
|
||||
type = "system"
|
||||
type = "system"
|
||||
|
||||
group "simpleweb" {
|
||||
|
||||
update {
|
||||
stagger = "5s"
|
||||
max_parallel = 1
|
||||
@@ -29,8 +28,7 @@ job "nginx" {
|
||||
|
||||
network {
|
||||
mbits = 1
|
||||
port "http" {
|
||||
}
|
||||
port "http"{}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,4 +47,3 @@ job "nginx" {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,4 +16,3 @@ job "sleep" {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ job "sleep" {
|
||||
|
||||
config {
|
||||
command = "sleep"
|
||||
args = ["10000"]
|
||||
args = ["10000"]
|
||||
}
|
||||
|
||||
resources {
|
||||
@@ -17,4 +17,3 @@ job "sleep" {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ job "sleep" {
|
||||
|
||||
config {
|
||||
command = "sleep"
|
||||
args = ["10000"]
|
||||
args = ["10000"]
|
||||
}
|
||||
|
||||
resources {
|
||||
@@ -17,4 +17,3 @@ job "sleep" {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
6
e2e/nomadexec/testdata/docker.nomad
vendored
6
e2e/nomadexec/testdata/docker.nomad
vendored
@@ -6,9 +6,9 @@ job "nomadexec-docker" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "busybox:1.29.2"
|
||||
image = "busybox:1.29.2"
|
||||
command = "/bin/sleep"
|
||||
args = ["1000"]
|
||||
args = ["1000"]
|
||||
}
|
||||
|
||||
resources {
|
||||
@@ -17,4 +17,4 @@ job "nomadexec-docker" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
job "prometheus" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
group "monitoring" {
|
||||
count = 1
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
|
||||
ephemeral_disk {
|
||||
size = 300
|
||||
}
|
||||
@@ -18,6 +20,7 @@ job "prometheus" {
|
||||
template {
|
||||
change_mode = "noop"
|
||||
destination = "local/prometheus.yml"
|
||||
|
||||
data = <<EOH
|
||||
---
|
||||
global:
|
||||
@@ -43,26 +46,33 @@ scrape_configs:
|
||||
format: ['prometheus']
|
||||
EOH
|
||||
}
|
||||
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "prom/prometheus:latest"
|
||||
|
||||
volumes = [
|
||||
"local/prometheus.yml:/etc/prometheus/prometheus.yml"
|
||||
"local/prometheus.yml:/etc/prometheus/prometheus.yml",
|
||||
]
|
||||
|
||||
port_map {
|
||||
prometheus_ui = 9090
|
||||
}
|
||||
}
|
||||
|
||||
resources {
|
||||
network {
|
||||
mbits = 10
|
||||
port "prometheus_ui" {}
|
||||
port "prometheus_ui"{}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "prometheus"
|
||||
tags = ["urlprefix-/"]
|
||||
port = "prometheus_ui"
|
||||
|
||||
check {
|
||||
name = "prometheus_ui port alive"
|
||||
type = "http"
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
job "r1" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
group "r1" {
|
||||
count = 6
|
||||
|
||||
spread {
|
||||
attribute ="${node.datacenter}"
|
||||
weight = 100
|
||||
attribute = "${node.datacenter}"
|
||||
weight = 100
|
||||
}
|
||||
|
||||
task "test" {
|
||||
driver = "raw_exec"
|
||||
|
||||
@@ -17,4 +19,4 @@ job "r1" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
job "r1" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
type = "service"
|
||||
|
||||
spread {
|
||||
attribute ="${node.datacenter}"
|
||||
weight = 100
|
||||
attribute = "${node.datacenter}"
|
||||
weight = 100
|
||||
}
|
||||
|
||||
group "test1" {
|
||||
count = 10
|
||||
|
||||
spread {
|
||||
attribute ="${meta.rack}"
|
||||
weight = 100
|
||||
attribute = "${meta.rack}"
|
||||
weight = 100
|
||||
|
||||
target "r1" {
|
||||
percent = 70
|
||||
}
|
||||
|
||||
target "r2" {
|
||||
percent = 30
|
||||
}
|
||||
}
|
||||
|
||||
task "test" {
|
||||
driver = "raw_exec"
|
||||
|
||||
@@ -26,4 +32,4 @@ job "r1" {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
job "completed_leader" {
|
||||
type = "batch"
|
||||
datacenters = ["dc1"]
|
||||
type = "batch"
|
||||
datacenters = ["dc1"]
|
||||
|
||||
group "completed_leader" {
|
||||
restart {
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
|
||||
# Only the task named the same as the job has its events tested.
|
||||
task "completed_leader" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "sleep"
|
||||
args = ["1000"]
|
||||
@@ -19,6 +20,7 @@ job "completed_leader" {
|
||||
task "leader" {
|
||||
leader = true
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "sleep"
|
||||
args = ["1"]
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
job "failed_batch" {
|
||||
type = "batch"
|
||||
datacenters = ["dc1"]
|
||||
type = "batch"
|
||||
datacenters = ["dc1"]
|
||||
|
||||
group "failed_batch" {
|
||||
restart {
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
|
||||
task "failed_batch" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "SomeInvalidCommand"
|
||||
}
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
job "failed_sibling" {
|
||||
type = "service"
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
datacenters = ["dc1"]
|
||||
|
||||
group "failed_sibling" {
|
||||
restart {
|
||||
attempts = 0
|
||||
}
|
||||
|
||||
|
||||
# Only the task named the same as the job has its events tested.
|
||||
task "failed_sibling" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "sleep"
|
||||
args = ["1000"]
|
||||
@@ -18,6 +19,7 @@ job "failed_sibling" {
|
||||
|
||||
task "failure" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "sleep 1 && exit 99"]
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
job "simple_batch" {
|
||||
type = "batch"
|
||||
datacenters = ["dc1"]
|
||||
type = "batch"
|
||||
datacenters = ["dc1"]
|
||||
|
||||
task "simple_batch" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "sleep"
|
||||
args = ["1"]
|
||||
|
||||
@@ -1,142 +1,97 @@
|
||||
data "template_file" "user_data_server" {
|
||||
template = "${file("${path.root}/user-data-server.sh")}"
|
||||
|
||||
vars {
|
||||
server_count = "${var.server_count}"
|
||||
region = "${var.region}"
|
||||
retry_join = "${var.retry_join}"
|
||||
}
|
||||
}
|
||||
|
||||
data "template_file" "user_data_client" {
|
||||
template = "${file("${path.root}/user-data-client.sh")}"
|
||||
count = "${var.client_count}"
|
||||
|
||||
vars {
|
||||
region = "${var.region}"
|
||||
retry_join = "${var.retry_join}"
|
||||
}
|
||||
}
|
||||
|
||||
data "template_file" "nomad_client_config" {
|
||||
template = "${file("${path.root}/configs/client.hcl")}"
|
||||
}
|
||||
|
||||
data "template_file" "nomad_server_config" {
|
||||
template = "}"
|
||||
}
|
||||
|
||||
resource "aws_instance" "server" {
|
||||
ami = "${data.aws_ami.main.image_id}"
|
||||
instance_type = "${var.instance_type}"
|
||||
key_name = "${module.keys.key_name}"
|
||||
vpc_security_group_ids = ["${aws_security_group.primary.id}"]
|
||||
count = "${var.server_count}"
|
||||
ami = data.aws_ami.main.image_id
|
||||
instance_type = var.instance_type
|
||||
key_name = module.keys.key_name
|
||||
vpc_security_group_ids = [aws_security_group.primary.id]
|
||||
count = var.server_count
|
||||
|
||||
# Instance tags
|
||||
tags {
|
||||
tags = {
|
||||
Name = "${local.random_name}-server-${count.index}"
|
||||
ConsulAutoJoin = "auto-join"
|
||||
SHA = "${var.nomad_sha}"
|
||||
User = "${data.aws_caller_identity.current.arn}"
|
||||
SHA = var.nomad_sha
|
||||
User = data.aws_caller_identity.current.arn
|
||||
}
|
||||
|
||||
user_data = "${data.template_file.user_data_server.rendered}"
|
||||
iam_instance_profile = "${aws_iam_instance_profile.instance_profile.name}"
|
||||
iam_instance_profile = aws_iam_instance_profile.instance_profile.name
|
||||
|
||||
# copy up all provisioning scripts and configs
|
||||
provisioner "file" {
|
||||
content = "${file("${path.root}/configs/${var.indexed == false ? "server.hcl" : "indexed/server-${count.index}.hcl"}")}"
|
||||
destination = "/tmp/server.hcl"
|
||||
source = "shared/"
|
||||
destination = "/ops/shared"
|
||||
|
||||
connection {
|
||||
host = coalesce(self.public_ip, self.private_ip)
|
||||
type = "ssh"
|
||||
user = "ubuntu"
|
||||
private_key = "${module.keys.private_key_pem}"
|
||||
private_key = module.keys.private_key_pem
|
||||
}
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
"aws s3 cp s3://nomad-team-test-binary/builds-oss/${var.nomad_sha}.tar.gz nomad.tar.gz",
|
||||
"sudo cp /ops/shared/config/nomad.service /etc/systemd/system/nomad.service",
|
||||
"sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/",
|
||||
"sudo cp /tmp/server.hcl /etc/nomad.d/nomad.hcl",
|
||||
"sudo chmod 0755 /usr/local/bin/nomad",
|
||||
"sudo chown root:root /usr/local/bin/nomad",
|
||||
"sudo systemctl enable nomad.service",
|
||||
"sudo systemctl start nomad.service",
|
||||
"chmod +x /ops/shared/config/provision-server.sh",
|
||||
"/ops/shared/config/provision-server.sh aws ${var.server_count} '${var.nomad_sha}' '${var.indexed == false ? "server.hcl" : "indexed/server-${count.index}.hcl"}'",
|
||||
]
|
||||
|
||||
connection {
|
||||
host = coalesce(self.public_ip, self.private_ip)
|
||||
type = "ssh"
|
||||
user = "ubuntu"
|
||||
private_key = "${module.keys.private_key_pem}"
|
||||
private_key = module.keys.private_key_pem
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_instance" "client" {
|
||||
ami = "${data.aws_ami.main.image_id}"
|
||||
instance_type = "${var.instance_type}"
|
||||
key_name = "${module.keys.key_name}"
|
||||
vpc_security_group_ids = ["${aws_security_group.primary.id}"]
|
||||
count = "${var.client_count}"
|
||||
depends_on = ["aws_instance.server"]
|
||||
ami = data.aws_ami.main.image_id
|
||||
instance_type = var.instance_type
|
||||
key_name = module.keys.key_name
|
||||
vpc_security_group_ids = [aws_security_group.primary.id]
|
||||
count = var.client_count
|
||||
depends_on = [aws_instance.server]
|
||||
|
||||
# Instance tags
|
||||
tags {
|
||||
tags = {
|
||||
Name = "${local.random_name}-client-${count.index}"
|
||||
ConsulAutoJoin = "auto-join"
|
||||
SHA = "${var.nomad_sha}"
|
||||
User = "${data.aws_caller_identity.current.arn}"
|
||||
SHA = var.nomad_sha
|
||||
User = data.aws_caller_identity.current.arn
|
||||
}
|
||||
|
||||
ebs_block_device = {
|
||||
ebs_block_device {
|
||||
device_name = "/dev/xvdd"
|
||||
volume_type = "gp2"
|
||||
volume_size = "50"
|
||||
delete_on_termination = "true"
|
||||
}
|
||||
|
||||
user_data = "${element(data.template_file.user_data_client.*.rendered, count.index)}"
|
||||
iam_instance_profile = "${aws_iam_instance_profile.instance_profile.name}"
|
||||
iam_instance_profile = aws_iam_instance_profile.instance_profile.name
|
||||
|
||||
# copy up all provisioning scripts and configs
|
||||
provisioner "file" {
|
||||
content = "${file("${path.root}/configs/${var.indexed == false ? "client.hcl" : "indexed/client-${count.index}.hcl"}")}"
|
||||
destination = "/tmp/client.hcl"
|
||||
source = "shared/"
|
||||
destination = "/ops/shared"
|
||||
|
||||
connection {
|
||||
host = coalesce(self.public_ip, self.private_ip)
|
||||
type = "ssh"
|
||||
user = "ubuntu"
|
||||
private_key = "${module.keys.private_key_pem}"
|
||||
private_key = module.keys.private_key_pem
|
||||
}
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
"aws s3 cp s3://nomad-team-test-binary/builds-oss/${var.nomad_sha}.tar.gz nomad.tar.gz",
|
||||
"sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/",
|
||||
"sudo cp /ops/shared/config/nomad.service /etc/systemd/system/nomad.service",
|
||||
"sudo cp /tmp/client.hcl /etc/nomad.d/nomad.hcl",
|
||||
"sudo chmod 0755 /usr/local/bin/nomad",
|
||||
"sudo chown root:root /usr/local/bin/nomad",
|
||||
|
||||
# Setup Host Volumes
|
||||
"sudo mkdir /tmp/data",
|
||||
|
||||
# Run Nomad Service
|
||||
"sudo systemctl enable nomad.service",
|
||||
|
||||
"sudo systemctl start nomad.service",
|
||||
|
||||
# Install CNI plugins
|
||||
"sudo mkdir -p /opt/cni/bin",
|
||||
|
||||
"wget -q -O - https://github.com/containernetworking/plugins/releases/download/v0.8.2/cni-plugins-linux-amd64-v0.8.2.tgz | sudo tar -C /opt/cni/bin -xz",
|
||||
"chmod +x /ops/shared/config/provision-client.sh",
|
||||
"/ops/shared/config/provision-client.sh aws '${var.nomad_sha}' '${var.indexed == false ? "client.hcl" : "indexed/client-${count.index}.hcl"}'",
|
||||
]
|
||||
|
||||
# Setup host volumes
|
||||
|
||||
connection {
|
||||
host = coalesce(self.public_ip, self.private_ip)
|
||||
type = "ssh"
|
||||
user = "ubuntu"
|
||||
private_key = "${module.keys.private_key_pem}"
|
||||
private_key = module.keys.private_key_pem
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
options {
|
||||
"driver.raw_exec.enable" = "1"
|
||||
"docker.privileged.enabled" = "true"
|
||||
}
|
||||
|
||||
meta {
|
||||
"rack" = "r1"
|
||||
}
|
||||
|
||||
host_volume "shared_data" {
|
||||
path = "/tmp/data"
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
datacenter = "dc2"
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
options {
|
||||
"driver.raw_exec.enable" = "1"
|
||||
"docker.privileged.enabled" = "true"
|
||||
}
|
||||
|
||||
meta {
|
||||
"rack" = "r1"
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
datacenter = "dc2"
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
||||
options {
|
||||
"driver.raw_exec.enable" = "1"
|
||||
"docker.privileged.enabled" = "true"
|
||||
}
|
||||
|
||||
meta {
|
||||
"rack" = "r2"
|
||||
}
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
# Enable the server
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = false
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
task_token_ttl = "1h"
|
||||
create_from_role = "nomad-cluster"
|
||||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
# Enable the server
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = false
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
task_token_ttl = "1h"
|
||||
create_from_role = "nomad-cluster"
|
||||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
# Enable the server
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = false
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
task_token_ttl = "1h"
|
||||
create_from_role = "nomad-cluster"
|
||||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
enable_debug = true
|
||||
|
||||
log_level = "debug"
|
||||
|
||||
data_dir = "/opt/nomad/data"
|
||||
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
# Enable the server
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
}
|
||||
|
||||
consul {
|
||||
address = "127.0.0.1:8500"
|
||||
}
|
||||
|
||||
vault {
|
||||
enabled = false
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
task_token_ttl = "1h"
|
||||
create_from_role = "nomad-cluster"
|
||||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
resource "aws_iam_instance_profile" "instance_profile" {
|
||||
name_prefix = "${local.random_name}"
|
||||
role = "${aws_iam_role.instance_role.name}"
|
||||
name_prefix = local.random_name
|
||||
role = aws_iam_role.instance_role.name
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "instance_role" {
|
||||
name_prefix = "${local.random_name}"
|
||||
assume_role_policy = "${data.aws_iam_policy_document.instance_role.json}"
|
||||
name_prefix = local.random_name
|
||||
assume_role_policy = data.aws_iam_policy_document.instance_role.json
|
||||
}
|
||||
|
||||
data "aws_iam_policy_document" "instance_role" {
|
||||
@@ -22,8 +22,8 @@ data "aws_iam_policy_document" "instance_role" {
|
||||
|
||||
resource "aws_iam_role_policy" "auto_discover_cluster" {
|
||||
name = "auto-discover-cluster"
|
||||
role = "${aws_iam_role.instance_role.id}"
|
||||
policy = "${data.aws_iam_policy_document.auto_discover_cluster.json}"
|
||||
role = aws_iam_role.instance_role.id
|
||||
policy = data.aws_iam_policy_document.auto_discover_cluster.json
|
||||
}
|
||||
|
||||
# Note: Overloading this instance profile to access
|
||||
@@ -55,10 +55,11 @@ data "aws_iam_policy_document" "auto_discover_cluster" {
|
||||
effect = "Allow"
|
||||
|
||||
actions = [
|
||||
"s3:PutObject",
|
||||
"s3:GetObject",
|
||||
"s3:DeleteObject"
|
||||
"s3:PutObject",
|
||||
"s3:GetObject",
|
||||
"s3:DeleteObject",
|
||||
]
|
||||
resources = ["arn:aws:s3:::nomad-team-test-binary/*"]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,20 +28,16 @@ variable "client_count" {
|
||||
default = "4"
|
||||
}
|
||||
|
||||
variable "retry_join" {
|
||||
description = "Used by Consul to automatically form a cluster."
|
||||
default = "provider=aws tag_key=ConsulAutoJoin tag_value=auto-join"
|
||||
}
|
||||
|
||||
variable "nomad_sha" {
|
||||
description = "The sha of Nomad to run"
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
region = "${var.region}"
|
||||
region = var.region
|
||||
}
|
||||
|
||||
resource "random_pet" "e2e" {}
|
||||
resource "random_pet" "e2e" {
|
||||
}
|
||||
|
||||
locals {
|
||||
random_name = "${var.name}-${random_pet.e2e.id}"
|
||||
@@ -49,10 +45,10 @@ locals {
|
||||
|
||||
# Generates keys to use for provisioning and access
|
||||
module "keys" {
|
||||
name = "${local.random_name}"
|
||||
path = "${path.root}/keys"
|
||||
source = "mitchellh/dynamic-keys/aws"
|
||||
version = "v1.0.0"
|
||||
name = local.random_name
|
||||
path = "${path.root}/keys"
|
||||
source = "mitchellh/dynamic-keys/aws"
|
||||
version = "v2.0.0"
|
||||
}
|
||||
|
||||
data "aws_ami" "main" {
|
||||
@@ -68,17 +64,17 @@ data "aws_ami" "main" {
|
||||
name = "tag:OS"
|
||||
values = ["Ubuntu"]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
data "aws_caller_identity" "current" {}
|
||||
data "aws_caller_identity" "current" {
|
||||
}
|
||||
|
||||
output "servers" {
|
||||
value = "${aws_instance.server.*.public_ip}"
|
||||
value = aws_instance.server.*.public_ip
|
||||
}
|
||||
|
||||
output "clients" {
|
||||
value = "${aws_instance.client.*.public_ip}"
|
||||
value = aws_instance.client.*.public_ip
|
||||
}
|
||||
|
||||
output "message" {
|
||||
@@ -87,8 +83,8 @@ Your cluster has been provisioned! - To prepare your environment, run the
|
||||
following:
|
||||
|
||||
```
|
||||
export NOMAD_ADDR=http://${aws_instance.client.0.public_ip}:4646
|
||||
export CONSUL_HTTP_ADDR=http://${aws_instance.client.0.public_ip}:8500
|
||||
export NOMAD_ADDR=http://${aws_instance.client[0].public_ip}:4646
|
||||
export CONSUL_HTTP_ADDR=http://${aws_instance.client[0].public_ip}:8500
|
||||
export NOMAD_E2E=1
|
||||
```
|
||||
|
||||
@@ -100,7 +96,8 @@ go test -v ./e2e
|
||||
|
||||
ssh into nodes with:
|
||||
```
|
||||
ssh -i keys/${local.random_name}.pem ubuntu@${aws_instance.client.0.public_ip}
|
||||
ssh -i keys/${local.random_name}.pem ubuntu@${aws_instance.client[0].public_ip}
|
||||
```
|
||||
EOM
|
||||
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@ data "aws_vpc" "default" {
|
||||
}
|
||||
|
||||
resource "aws_security_group" "primary" {
|
||||
name = "${local.random_name}"
|
||||
vpc_id = "${data.aws_vpc.default.id}"
|
||||
name = local.random_name
|
||||
vpc_id = data.aws_vpc.default.id
|
||||
|
||||
ingress {
|
||||
from_port = 22
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
{
|
||||
"builders": [{
|
||||
"type": "amazon-ebs",
|
||||
"region": "us-east-1",
|
||||
"source_ami": "ami-80861296",
|
||||
"instance_type": "t2.medium",
|
||||
"ssh_username": "ubuntu",
|
||||
"ami_name": "nomad-e2e-{{timestamp}}",
|
||||
"ami_groups": ["all"],
|
||||
"tags": {
|
||||
"OS": "Ubuntu"
|
||||
}
|
||||
}],
|
||||
"provisioners": [
|
||||
{
|
||||
"type": "shell",
|
||||
"inline": [
|
||||
"sudo mkdir /ops",
|
||||
"sudo chmod 777 /ops"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source": "shared",
|
||||
"destination": "/ops"
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"script": "shared/scripts/setup.sh"
|
||||
}]
|
||||
}
|
||||
21
e2e/terraform/packer/README.md
Normal file
21
e2e/terraform/packer/README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Packer Builds
|
||||
|
||||
These builds are run as-needed to update the AMIs used by the end-to-end test infrastructure.
|
||||
|
||||
|
||||
## What goes here?
|
||||
|
||||
* steps that aren't specific to a given Nomad build: ex. all Linux instances need `jq` and `awscli`.
|
||||
* steps that aren't specific to a given EC2 instance: nothing that includes an IP address.
|
||||
* steps that infrequently change: the version of Consul or Vault we ship.
|
||||
|
||||
|
||||
## Running Packer builds
|
||||
|
||||
```sh
|
||||
$ packer --version
|
||||
1.4.4
|
||||
|
||||
# build linux AMI
|
||||
$ packer build packer.json
|
||||
```
|
||||
24
e2e/terraform/shared/scripts/setup.sh → e2e/terraform/packer/linux/setup.sh
Normal file → Executable file
24
e2e/terraform/shared/scripts/setup.sh → e2e/terraform/packer/linux/setup.sh
Normal file → Executable file
@@ -5,9 +5,10 @@ set -e
|
||||
# Disable interactive apt prompts
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
cd /ops
|
||||
sudo mkdir -p /ops/shared
|
||||
sudo chown -R ubuntu:ubuntu /ops/shared
|
||||
|
||||
CONFIGDIR=/ops/shared/config
|
||||
cd /ops
|
||||
|
||||
CONSULVERSION=1.6.0
|
||||
CONSULDOWNLOAD=https://releases.hashicorp.com/consul/${CONSULVERSION}/consul_${CONSULVERSION}_linux_amd64.zip
|
||||
@@ -37,13 +38,18 @@ sudo apt-get install -y python-setuptools
|
||||
sudo easy_install pip
|
||||
sudo pip install numpy
|
||||
|
||||
# Disable the firewall
|
||||
# Install sockaddr
|
||||
aws s3 cp "s3://nomad-team-test-binary/tools/sockaddr_linux_amd64" /tmp/sockaddr
|
||||
sudo mv /tmp/sockaddr /usr/local/bin
|
||||
sudo chmod +x /usr/local/bin/sockaddr
|
||||
sudo chown root:root /usr/local/bin/sockaddr
|
||||
|
||||
# Disable the firewall
|
||||
sudo ufw disable || echo "ufw not installed"
|
||||
|
||||
echo "Install Consul"
|
||||
curl -L $CONSULDOWNLOAD > consul.zip
|
||||
sudo unzip consul.zip -d /usr/local/bin
|
||||
curl -L -o /tmp/consul.zip $CONSULDOWNLOAD
|
||||
sudo unzip /tmp/consul.zip -d /usr/local/bin
|
||||
sudo chmod 0755 /usr/local/bin/consul
|
||||
sudo chown root:root /usr/local/bin/consul
|
||||
|
||||
@@ -54,8 +60,8 @@ sudo mkdir -p $CONSULDIR
|
||||
sudo chmod 755 $CONSULDIR
|
||||
|
||||
echo "Install Vault"
|
||||
curl -L $VAULTDOWNLOAD > vault.zip
|
||||
sudo unzip vault.zip -d /usr/local/bin
|
||||
curl -L -o /tmp/vault.zip $VAULTDOWNLOAD
|
||||
sudo unzip /tmp/vault.zip -d /usr/local/bin
|
||||
sudo chmod 0755 /usr/local/bin/vault
|
||||
sudo chown root:root /usr/local/bin/vault
|
||||
|
||||
@@ -66,8 +72,8 @@ sudo mkdir -p $VAULTDIR
|
||||
sudo chmod 755 $VAULTDIR
|
||||
|
||||
echo "Install Nomad"
|
||||
curl -L $NOMADDOWNLOAD > nomad.zip
|
||||
sudo unzip nomad.zip -d /usr/local/bin
|
||||
curl -L -o /tmp/nomad.zip $NOMADDOWNLOAD
|
||||
sudo unzip /tmp/nomad.zip -d /usr/local/bin
|
||||
sudo chmod 0755 /usr/local/bin/nomad
|
||||
sudo chown root:root /usr/local/bin/nomad
|
||||
|
||||
25
e2e/terraform/packer/packer.json
Normal file
25
e2e/terraform/packer/packer.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"builders": [
|
||||
{
|
||||
"type": "amazon-ebs",
|
||||
"region": "us-east-1",
|
||||
"source_ami": "ami-80861296",
|
||||
"instance_type": "t2.medium",
|
||||
"ssh_username": "ubuntu",
|
||||
"iam_instance_profile": "packer-builder",
|
||||
"ami_name": "nomad-e2e-{{timestamp}}",
|
||||
"ami_groups": [
|
||||
"all"
|
||||
],
|
||||
"tags": {
|
||||
"OS": "Ubuntu"
|
||||
}
|
||||
}
|
||||
],
|
||||
"provisioners": [
|
||||
{
|
||||
"type": "shell",
|
||||
"script": "./linux/setup.sh"
|
||||
}
|
||||
]
|
||||
}
|
||||
10
e2e/terraform/shared/README.md
Normal file
10
e2e/terraform/shared/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Terraform Provisioning
|
||||
|
||||
These scripts are copied up to instances via Terraform provisioning and executed after launch. This allows us to update the Nomad configurations for features that land on master without having to re-bake AMIs.
|
||||
|
||||
## What goes here?
|
||||
|
||||
* steps that are specific to a given Nomad build: ex. all Nomad configuration files.
|
||||
* steps that are specific to a given EC2 instance: configuring IP addresses.
|
||||
|
||||
These scripts *should* be idempotent: copy configurations from `/ops/shared` to their destinations where the services expect them to be, rather than moving them.
|
||||
83
e2e/terraform/shared/config/provision-client.sh
Normal file
83
e2e/terraform/shared/config/provision-client.sh
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/bin/bash
|
||||
# installs and configures the desired build of Nomad as a server
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
|
||||
CONFIGDIR=/ops/shared/config
|
||||
|
||||
HADOOP_VERSION=hadoop-2.7.7
|
||||
HADOOPCONFIGDIR=/usr/local/$HADOOP_VERSION/etc/hadoop
|
||||
HOME_DIR=ubuntu
|
||||
|
||||
IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetPrivateIP')
|
||||
DOCKER_BRIDGE_IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetInterfaceIP "docker0"')
|
||||
|
||||
CLOUD="$1"
|
||||
NOMAD_SHA="$2"
|
||||
NOMAD_CONFIG="$3"
|
||||
|
||||
# Consul
|
||||
CONSUL_SRC=/ops/shared/consul
|
||||
CONSUL_DEST=/etc/consul.d
|
||||
|
||||
sudo cp "$CONSUL_SRC/base.json" "$CONSUL_DEST/"
|
||||
sudo cp "$CONSUL_SRC/retry_$CLOUD.json" "$CONSUL_DEST/"
|
||||
sudo cp "$CONSUL_SRC/consul_$CLOUD.service" /etc/systemd/system/consul.service
|
||||
|
||||
sudo systemctl enable consul.service
|
||||
sudo systemctl start consul.service
|
||||
sleep 10
|
||||
|
||||
# Add hostname to /etc/hosts
|
||||
echo "127.0.0.1 $(hostname)" | sudo tee --append /etc/hosts
|
||||
|
||||
# Add Docker bridge network IP to /etc/resolv.conf (at the top)
|
||||
echo "nameserver $DOCKER_BRIDGE_IP_ADDRESS" | sudo tee /etc/resolv.conf.new
|
||||
cat /etc/resolv.conf | sudo tee --append /etc/resolv.conf.new
|
||||
sudo mv /etc/resolv.conf.new /etc/resolv.conf
|
||||
|
||||
# Hadoop config file to enable HDFS CLI
|
||||
sudo cp $CONFIGDIR/core-site.xml $HADOOPCONFIGDIR
|
||||
|
||||
# Move examples directory to $HOME
|
||||
sudo mv /ops/examples /home/$HOME_DIR
|
||||
sudo chown -R $HOME_DIR:$HOME_DIR /home/$HOME_DIR/examples
|
||||
sudo chmod -R 775 /home/$HOME_DIR/examples
|
||||
|
||||
# Set env vars for tool CLIs
|
||||
echo "export NOMAD_ADDR=http://$IP_ADDRESS:4646" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
|
||||
# Update PATH
|
||||
echo "export PATH=$PATH:/usr/local/bin/spark/bin:/usr/local/$HADOOP_VERSION/bin" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
|
||||
# Nomad
|
||||
|
||||
NOMAD_SRC=/ops/shared/nomad
|
||||
NOMAD_DEST=/etc/nomad.d
|
||||
NOMAD_CONFIG_FILENAME=$(basename "$NOMAD_CONFIG")
|
||||
|
||||
# download
|
||||
aws s3 cp "s3://nomad-team-test-binary/builds-oss/nomad_linux_amd64_${NOMAD_SHA}.tar.gz" nomad.tar.gz
|
||||
|
||||
# unpack and install
|
||||
sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/
|
||||
sudo chmod 0755 /usr/local/bin/nomad
|
||||
sudo chown root:root /usr/local/bin/nomad
|
||||
|
||||
sudo cp "$NOMAD_SRC/base.hcl" "$NOMAD_DEST/"
|
||||
sudo cp "$NOMAD_SRC/$NOMAD_CONFIG" "$NOMAD_DEST/$NOMAD_CONFIG_FILENAME"
|
||||
|
||||
# Setup Host Volumes
|
||||
sudo mkdir /tmp/data
|
||||
|
||||
# Install CNI plugins
|
||||
sudo mkdir -p /opt/cni/bin
|
||||
wget -q -O - \
|
||||
https://github.com/containernetworking/plugins/releases/download/v0.8.2/cni-plugins-linux-amd64-v0.8.2.tgz \
|
||||
| sudo tar -C /opt/cni/bin -xz
|
||||
|
||||
# enable as a systemd service
|
||||
sudo cp "$NOMAD_SRC/nomad.service" /etc/systemd/system/nomad.service
|
||||
sudo systemctl enable nomad.service
|
||||
sudo systemctl start nomad.service
|
||||
96
e2e/terraform/shared/config/provision-server.sh
Normal file
96
e2e/terraform/shared/config/provision-server.sh
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/bin/bash
|
||||
# installs and configures the desired build of Nomad as a server
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
|
||||
CONFIGDIR=/ops/shared/config
|
||||
|
||||
HADOOP_VERSION=hadoop-2.7.7
|
||||
HADOOPCONFIGDIR=/usr/local/$HADOOP_VERSION/etc/hadoop
|
||||
HOME_DIR=ubuntu
|
||||
|
||||
IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetPrivateIP')
|
||||
DOCKER_BRIDGE_IP_ADDRESS=$(/usr/local/bin/sockaddr eval 'GetInterfaceIP "docker0"')
|
||||
|
||||
CLOUD="$1"
|
||||
SERVER_COUNT="$2"
|
||||
NOMAD_SHA="$3"
|
||||
NOMAD_CONFIG="$4"
|
||||
|
||||
# Consul
|
||||
CONSUL_SRC=/ops/shared/consul
|
||||
CONSUL_DEST=/etc/consul.d
|
||||
|
||||
sed "s/SERVER_COUNT/$SERVER_COUNT/g" "$CONSUL_SRC/server.json" > /tmp/server.json
|
||||
sudo mv /tmp/server.json "$CONSUL_DEST/server.json"
|
||||
sudo cp "$CONSUL_SRC/base.json" "$CONSUL_DEST/"
|
||||
sudo cp "$CONSUL_SRC/retry_$CLOUD.json" "$CONSUL_DEST/"
|
||||
sudo cp "$CONSUL_SRC/consul_$CLOUD.service" /etc/systemd/system/consul.service
|
||||
|
||||
sudo systemctl enable consul.service
|
||||
sudo systemctl start consul.service
|
||||
sleep 10
|
||||
export CONSUL_HTTP_ADDR=$IP_ADDRESS:8500
|
||||
export CONSUL_RPC_ADDR=$IP_ADDRESS:8400
|
||||
|
||||
# Vault
|
||||
VAULT_SRC=/ops/shared/vault
|
||||
VAULT_DEST=/etc/vault.d
|
||||
|
||||
sudo cp "$VAULT_SRC/vault.hcl" "$VAULT_DEST"
|
||||
sudo cp "$VAULT_SRC/vault.service" /etc/systemd/system/vault.service
|
||||
|
||||
sudo systemctl enable vault.service
|
||||
sudo systemctl start vault.service
|
||||
|
||||
# Add hostname to /etc/hosts
|
||||
echo "127.0.0.1 $(hostname)" | sudo tee --append /etc/hosts
|
||||
|
||||
# Add Docker bridge network IP to /etc/resolv.conf (at the top)
|
||||
|
||||
echo "nameserver $DOCKER_BRIDGE_IP_ADDRESS" | sudo tee /etc/resolv.conf.new
|
||||
cat /etc/resolv.conf | sudo tee --append /etc/resolv.conf.new
|
||||
sudo mv /etc/resolv.conf.new /etc/resolv.conf
|
||||
|
||||
# Hadoop
|
||||
sudo cp $CONFIGDIR/core-site.xml $HADOOPCONFIGDIR
|
||||
|
||||
# Move examples directory to $HOME
|
||||
sudo mv /ops/examples /home/$HOME_DIR
|
||||
sudo chown -R $HOME_DIR:$HOME_DIR /home/$HOME_DIR/examples
|
||||
sudo chmod -R 775 /home/$HOME_DIR/examples
|
||||
|
||||
# Set env vars for tool CLIs
|
||||
echo "export CONSUL_RPC_ADDR=$IP_ADDRESS:8400" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
echo "export CONSUL_HTTP_ADDR=$IP_ADDRESS:8500" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
echo "export VAULT_ADDR=http://$IP_ADDRESS:8200" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
echo "export NOMAD_ADDR=http://$IP_ADDRESS:4646" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
|
||||
# Update PATH
|
||||
echo "export PATH=$PATH:/usr/local/bin/spark/bin:/usr/local/$HADOOP_VERSION/bin" | sudo tee --append /home/$HOME_DIR/.bashrc
|
||||
|
||||
# Nomad
|
||||
|
||||
NOMAD_SRC=/ops/shared/nomad
|
||||
NOMAD_DEST=/etc/nomad.d
|
||||
NOMAD_CONFIG_FILENAME=$(basename "$NOMAD_CONFIG")
|
||||
|
||||
# download
|
||||
aws s3 cp "s3://nomad-team-test-binary/builds-oss/nomad_linux_amd64_${NOMAD_SHA}.tar.gz" nomad.tar.gz
|
||||
|
||||
# unpack and install
|
||||
sudo tar -zxvf nomad.tar.gz -C /usr/local/bin/
|
||||
sudo chmod 0755 /usr/local/bin/nomad
|
||||
sudo chown root:root /usr/local/bin/nomad
|
||||
|
||||
sudo cp "$NOMAD_SRC/base.hcl" "$NOMAD_DEST/"
|
||||
|
||||
sed "s/3 # SERVER_COUNT/$SERVER_COUNT/g" "$NOMAD_SRC/$NOMAD_CONFIG" \
|
||||
> "/tmp/$NOMAD_CONFIG_FILENAME"
|
||||
sudo mv "/tmp/$NOMAD_CONFIG_FILENAME" "$NOMAD_DEST/$NOMAD_CONFIG_FILENAME"
|
||||
|
||||
# enable as a systemd service
|
||||
sudo cp "$NOMAD_SRC/nomad.service" /etc/systemd/system/nomad.service
|
||||
sudo systemctl enable nomad.service
|
||||
sudo systemctl start nomad.service
|
||||
@@ -1,12 +0,0 @@
|
||||
backend "consul" {
|
||||
path = "vault/"
|
||||
address = "IP_ADDRESS:8500"
|
||||
cluster_addr = "https://IP_ADDRESS:8201"
|
||||
redirect_addr = "http://IP_ADDRESS:8200"
|
||||
}
|
||||
|
||||
listener "tcp" {
|
||||
address = "IP_ADDRESS:8200"
|
||||
cluster_address = "IP_ADDRESS:8201"
|
||||
tls_disable = 1
|
||||
}
|
||||
13
e2e/terraform/shared/consul/base.json
Normal file
13
e2e/terraform/shared/consul/base.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"log_level": "INFO",
|
||||
"data_dir": "/opt/consul/data",
|
||||
"bind_addr": "0.0.0.0",
|
||||
"client_addr": "0.0.0.0",
|
||||
"advertise_addr": "{{ GetPrivateIP }}",
|
||||
"connect": {
|
||||
"enabled": true
|
||||
},
|
||||
"ports": {
|
||||
"grpc": 8502
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user