From 98b295d61713be89a78f43b8263f2ef799b2363c Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Tue, 6 Nov 2018 00:39:48 -0500 Subject: [PATCH] docker: started work on porting docker driver to new plugin framework --- client/allocrunner/taskrunner/task_runner.go | 6 +- drivers/docker/coordinator.go | 416 +++ drivers/docker/coordinator_test.go | 239 ++ drivers/docker/driver.go | 1556 +++++++++++ drivers/docker/driver_default.go | 36 + drivers/docker/driver_linux_test.go | 99 + drivers/docker/driver_test.go | 2595 ++++++++++++++++++ drivers/docker/driver_unix_test.go | 105 + drivers/docker/driver_windows.go | 17 + drivers/docker/handle.go | 278 ++ drivers/docker/progress.go | 289 ++ drivers/docker/progress_test.go | 52 + drivers/docker/state.go | 33 + drivers/docker/utils.go | 190 ++ plugins/drivers/driver.go | 18 + plugins/drivers/proto/driver.proto | 6 + plugins/drivers/utils.go | 4 + plugins/shared/catalog/register.go | 2 + 18 files changed, 5940 insertions(+), 1 deletion(-) create mode 100644 drivers/docker/coordinator.go create mode 100644 drivers/docker/coordinator_test.go create mode 100644 drivers/docker/driver.go create mode 100644 drivers/docker/driver_default.go create mode 100644 drivers/docker/driver_linux_test.go create mode 100644 drivers/docker/driver_test.go create mode 100644 drivers/docker/driver_unix_test.go create mode 100644 drivers/docker/driver_windows.go create mode 100644 drivers/docker/handle.go create mode 100644 drivers/docker/progress.go create mode 100644 drivers/docker/progress_test.go create mode 100644 drivers/docker/state.go create mode 100644 drivers/docker/utils.go diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index a801b15fa..eed1cd3b6 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -650,12 +650,16 @@ func (tr *TaskRunner) persistLocalState() error { // buildTaskConfig builds a drivers.TaskConfig with an unique ID for the task. // The ID is consistently built from the alloc ID, task name and restart attempt. func (tr *TaskRunner) buildTaskConfig() *drivers.TaskConfig { + return &drivers.TaskConfig{ ID: fmt.Sprintf("%s/%s/%d", tr.allocID, tr.taskName, tr.restartTracker.GetCount()), Name: tr.task.Name, Resources: &drivers.Resources{ NomadResources: tr.task.Resources, - //TODO Calculate the LinuxResources + LinuxResources: &drivers.LinuxResources{ + MemoryLimitBytes: int64(tr.Task().Resources.MemoryMB) * 1024 * 1024, + CPUShares: int64(tr.Task().Resources.CPU), + }, }, Env: tr.envBuilder.Build().Map(), User: tr.task.User, diff --git a/drivers/docker/coordinator.go b/drivers/docker/coordinator.go new file mode 100644 index 000000000..f5ed2298f --- /dev/null +++ b/drivers/docker/coordinator.go @@ -0,0 +1,416 @@ +package docker + +import ( + "context" + "fmt" + "regexp" + "sync" + "time" + + docker "github.com/fsouza/go-dockerclient" + hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/nomad/structs" +) + +var ( + // createCoordinator allows us to only create a single coordinator + createCoordinator sync.Once + + // globalCoordinator is the shared coordinator and should only be retrieved + // using the GetDockerCoordinator() method. + globalCoordinator *dockerCoordinator + + // imageNotFoundMatcher is a regex expression that matches the image not + // found error Docker returns. + imageNotFoundMatcher = regexp.MustCompile(`Error: image .+ not found`) +) + +// pullFuture is a sharable future for retrieving a pulled images ID and any +// error that may have occurred during the pull. +type pullFuture struct { + waitCh chan struct{} + + err error + imageID string +} + +// newPullFuture returns a new pull future +func newPullFuture() *pullFuture { + return &pullFuture{ + waitCh: make(chan struct{}), + } +} + +// wait waits till the future has a result +func (p *pullFuture) wait() *pullFuture { + <-p.waitCh + return p +} + +// result returns the results of the future and should only ever be called after +// wait returns. +func (p *pullFuture) result() (imageID string, err error) { + return p.imageID, p.err +} + +// set is used to set the results and unblock any waiter. This may only be +// called once. +func (p *pullFuture) set(imageID string, err error) { + p.imageID = imageID + p.err = err + close(p.waitCh) +} + +// DockerImageClient provides the methods required to do CRUD operations on the +// Docker images +type DockerImageClient interface { + PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error + InspectImage(id string) (*docker.Image, error) + RemoveImage(id string) error +} + +// LogEventFn is a callback which allows Drivers to emit task events. +type LogEventFn func(message string, annotations map[string]string) + +// dockerCoordinatorConfig is used to configure the Docker coordinator. +type dockerCoordinatorConfig struct { + // logger is the logger the coordinator should use + logger hclog.Logger + + // cleanup marks whether images should be deleting when the reference count + // is zero + cleanup bool + + // client is the Docker client to use for communicating with Docker + client DockerImageClient + + // removeDelay is the delay between an image's reference count going to + // zero and the image actually being deleted. + removeDelay time.Duration +} + +// dockerCoordinator is used to coordinate actions against images to prevent +// racy deletions. It can be thought of as a reference counter on images. +type dockerCoordinator struct { + *dockerCoordinatorConfig + + // imageLock is used to lock access to all images + imageLock sync.Mutex + + // pullFutures is used to allow multiple callers to pull the same image but + // only have one request be sent to Docker + pullFutures map[string]*pullFuture + + // pullLoggers is used to track the LogEventFn for each alloc pulling an image. + // If multiple alloc's are attempting to pull the same image, each will need + // to register its own LogEventFn with the coordinator. + pullLoggers map[string][]LogEventFn + + // pullLoggerLock is used to sync access to the pullLoggers map + pullLoggerLock sync.RWMutex + + // imageRefCount is the reference count of image IDs + imageRefCount map[string]map[string]struct{} + + // deleteFuture is indexed by image ID and has a cancelable delete future + deleteFuture map[string]context.CancelFunc +} + +// NewDockerCoordinator returns a new Docker coordinator +func NewDockerCoordinator(config *dockerCoordinatorConfig) *dockerCoordinator { + if config.client == nil { + return nil + } + + return &dockerCoordinator{ + dockerCoordinatorConfig: config, + pullFutures: make(map[string]*pullFuture), + pullLoggers: make(map[string][]LogEventFn), + imageRefCount: make(map[string]map[string]struct{}), + deleteFuture: make(map[string]context.CancelFunc), + } +} + +// GetDockerCoordinator returns the shared dockerCoordinator instance +func GetDockerCoordinator(config *dockerCoordinatorConfig) *dockerCoordinator { + createCoordinator.Do(func() { + globalCoordinator = NewDockerCoordinator(config) + }) + + return globalCoordinator +} + +// PullImage is used to pull an image. It returns the pulled imaged ID or an +// error that occurred during the pull +func (d *dockerCoordinator) PullImage(image string, authOptions *docker.AuthConfiguration, callerID string, emitFn LogEventFn) (imageID string, err error) { + // Get the future + d.imageLock.Lock() + future, ok := d.pullFutures[image] + d.registerPullLogger(image, emitFn) + if !ok { + // Make the future + future = newPullFuture() + d.pullFutures[image] = future + go d.pullImageImpl(image, authOptions, future) + } + d.imageLock.Unlock() + + // We unlock while we wait since this can take a while + id, err := future.wait().result() + + d.imageLock.Lock() + defer d.imageLock.Unlock() + + // Delete the future since we don't need it and we don't want to cache an + // image being there if it has possibly been manually deleted (outside of + // Nomad). + if _, ok := d.pullFutures[image]; ok { + delete(d.pullFutures, image) + } + + // If we are cleaning up, we increment the reference count on the image + if err == nil && d.cleanup { + d.incrementImageReferenceImpl(id, image, callerID) + } + + return id, err +} + +// pullImageImpl is the implementation of pulling an image. The results are +// returned via the passed future +func (d *dockerCoordinator) pullImageImpl(image string, authOptions *docker.AuthConfiguration, future *pullFuture) { + defer d.clearPullLogger(image) + // Parse the repo and tag + repo, tag := parseDockerImage(image) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + pm := newImageProgressManager(image, cancel, d.handlePullInactivity, + d.handlePullProgressReport, d.handleSlowPullProgressReport) + defer pm.stop() + + pullOptions := docker.PullImageOptions{ + Repository: repo, + Tag: tag, + OutputStream: pm, + RawJSONStream: true, + Context: ctx, + } + + // Attempt to pull the image + var auth docker.AuthConfiguration + if authOptions != nil { + auth = *authOptions + } + + err := d.client.PullImage(pullOptions, auth) + + if ctxErr := ctx.Err(); ctxErr == context.DeadlineExceeded { + d.logger.Error("timeout pulling container", "image_ref", dockerImageRef(repo, tag)) + future.set("", recoverablePullError(ctxErr, image)) + return + } + + if err != nil { + d.logger.Error("failed pulling container", "image_ref", dockerImageRef(repo, tag), + "error", err) + future.set("", recoverablePullError(err, image)) + return + } + + d.logger.Debug("docker pull succeeded", "image_ref", dockerImageRef(repo, tag)) + + dockerImage, err := d.client.InspectImage(image) + if err != nil { + d.logger.Error("failed getting image id", "image_name", image, "error", err) + future.set("", recoverableErrTimeouts(err)) + return + } + + future.set(dockerImage.ID, nil) + return +} + +// IncrementImageReference is used to increment an image reference count +func (d *dockerCoordinator) IncrementImageReference(imageID, imageName, callerID string) { + d.imageLock.Lock() + defer d.imageLock.Unlock() + if d.cleanup { + d.incrementImageReferenceImpl(imageID, imageName, callerID) + } +} + +// incrementImageReferenceImpl assumes the lock is held +func (d *dockerCoordinator) incrementImageReferenceImpl(imageID, imageName, callerID string) { + // Cancel any pending delete + if cancel, ok := d.deleteFuture[imageID]; ok { + d.logger.Debug("cancelling removal of container image", "image_name", imageName) + cancel() + delete(d.deleteFuture, imageID) + } + + // Increment the reference + references, ok := d.imageRefCount[imageID] + if !ok { + references = make(map[string]struct{}) + d.imageRefCount[imageID] = references + } + + if _, ok := references[callerID]; !ok { + references[callerID] = struct{}{} + d.logger.Debug("image reference count incremented", "image_name", imageName, "image_id", imageID, "references", len(references)) + } +} + +// RemoveImage removes the given image. If there are any errors removing the +// image, the remove is retried internally. +func (d *dockerCoordinator) RemoveImage(imageID, callerID string) { + d.imageLock.Lock() + defer d.imageLock.Unlock() + + if !d.cleanup { + return + } + + references, ok := d.imageRefCount[imageID] + if !ok { + d.logger.Warn("RemoveImage on non-referenced counted image id", "image_id", imageID) + return + } + + // Decrement the reference count + delete(references, callerID) + count := len(references) + d.logger.Debug("image id reference count decremented", "image_id", imageID, "references", count) + + // Nothing to do + if count != 0 { + return + } + + // This should never be the case but we safety guard so we don't leak a + // cancel. + if cancel, ok := d.deleteFuture[imageID]; ok { + d.logger.Error("image id has lingering delete future", "image_id", imageID) + cancel() + } + + // Setup a future to delete the image + ctx, cancel := context.WithCancel(context.Background()) + d.deleteFuture[imageID] = cancel + go d.removeImageImpl(imageID, ctx) + + // Delete the key from the reference count + delete(d.imageRefCount, imageID) +} + +// removeImageImpl is used to remove an image. It wil wait the specified remove +// delay to remove the image. If the context is cancelled before that the image +// removal will be cancelled. +func (d *dockerCoordinator) removeImageImpl(id string, ctx context.Context) { + // Wait for the delay or a cancellation event + select { + case <-ctx.Done(): + // We have been cancelled + return + case <-time.After(d.removeDelay): + } + + // Ensure we are suppose to delete. Do a short check while holding the lock + // so there can't be interleaving. There is still the smallest chance that + // the delete occurs after the image has been pulled but before it has been + // incremented. For handling that we just treat it as a recoverable error in + // the docker driver. + d.imageLock.Lock() + select { + case <-ctx.Done(): + d.imageLock.Unlock() + return + default: + } + d.imageLock.Unlock() + + for i := 0; i < 3; i++ { + err := d.client.RemoveImage(id) + if err == nil { + break + } + + if err == docker.ErrNoSuchImage { + d.logger.Debug("unable to cleanup image, does not exist", "image_id", id) + return + } + if derr, ok := err.(*docker.Error); ok && derr.Status == 409 { + d.logger.Debug("unable to cleanup image, still in use", "image_id", id) + return + } + + // Retry on unknown errors + d.logger.Debug("failed to remove image", "image_id", id, "attempt", i+1, "error", err) + + select { + case <-ctx.Done(): + // We have been cancelled + return + case <-time.After(3 * time.Second): + } + } + + d.logger.Debug("cleanup removed downloaded image", "image_id", id) + + // Cleanup the future from the map and free the context by cancelling it + d.imageLock.Lock() + if cancel, ok := d.deleteFuture[id]; ok { + delete(d.deleteFuture, id) + cancel() + } + d.imageLock.Unlock() +} + +func (d *dockerCoordinator) registerPullLogger(image string, logger LogEventFn) { + d.pullLoggerLock.Lock() + defer d.pullLoggerLock.Unlock() + if _, ok := d.pullLoggers[image]; !ok { + d.pullLoggers[image] = []LogEventFn{} + } + d.pullLoggers[image] = append(d.pullLoggers[image], logger) +} + +func (d *dockerCoordinator) clearPullLogger(image string) { + d.pullLoggerLock.Lock() + defer d.pullLoggerLock.Unlock() + delete(d.pullLoggers, image) +} + +func (d *dockerCoordinator) emitEvent(image, message string, annotations map[string]string) { + d.pullLoggerLock.RLock() + defer d.pullLoggerLock.RUnlock() + for i := range d.pullLoggers[image] { + go d.pullLoggers[image][i](message, annotations) + } +} + +func (d *dockerCoordinator) handlePullInactivity(image, msg string, timestamp time.Time) { + d.logger.Error("image pull aborted due to inactivity", "image_name", image, + "last_event_timestamp", timestamp.String(), "last_event", msg) +} + +func (d *dockerCoordinator) handlePullProgressReport(image, msg string, _ time.Time) { + d.logger.Debug("image pull progress", "image_name", image, "message", msg) +} + +func (d *dockerCoordinator) handleSlowPullProgressReport(image, msg string, _ time.Time) { + d.emitEvent(image, fmt.Sprintf("Docker image pull progress: %s", msg), map[string]string{ + "image": image, + }) +} + +// recoverablePullError wraps the error gotten when trying to pull and image if +// the error is recoverable. +func recoverablePullError(err error, image string) error { + recoverable := true + if imageNotFoundMatcher.MatchString(err.Error()) { + recoverable = false + } + return structs.NewRecoverableError(fmt.Errorf("Failed to pull `%s`: %s", image, err), recoverable) +} diff --git a/drivers/docker/coordinator_test.go b/drivers/docker/coordinator_test.go new file mode 100644 index 000000000..1f645b732 --- /dev/null +++ b/drivers/docker/coordinator_test.go @@ -0,0 +1,239 @@ +package docker + +import ( + "fmt" + "testing" + "time" + + docker "github.com/fsouza/go-dockerclient" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/testutil" +) + +type mockImageClient struct { + pulled map[string]int + idToName map[string]string + removed map[string]int + pullDelay time.Duration +} + +func newMockImageClient(idToName map[string]string, pullDelay time.Duration) *mockImageClient { + return &mockImageClient{ + pulled: make(map[string]int), + removed: make(map[string]int), + idToName: idToName, + pullDelay: pullDelay, + } +} + +func (m *mockImageClient) PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error { + time.Sleep(m.pullDelay) + m.pulled[opts.Repository]++ + return nil +} + +func (m *mockImageClient) InspectImage(id string) (*docker.Image, error) { + return &docker.Image{ + ID: m.idToName[id], + }, nil +} + +func (m *mockImageClient) RemoveImage(id string) error { + m.removed[id]++ + return nil +} + +func TestDockerCoordinator_ConcurrentPulls(t *testing.T) { + t.Parallel() + image := "foo" + imageID := uuid.Generate() + mapping := map[string]string{imageID: image} + + // Add a delay so we can get multiple queued up + mock := newMockImageClient(mapping, 10*time.Millisecond) + config := &dockerCoordinatorConfig{ + logger: testlog.HCLogger(t), + cleanup: true, + client: mock, + removeDelay: 100 * time.Millisecond, + } + + // Create a coordinator + coordinator := NewDockerCoordinator(config) + + id := "" + for i := 0; i < 10; i++ { + go func() { + id, _ = coordinator.PullImage(image, nil, uuid.Generate(), nil) + }() + } + + testutil.WaitForResult(func() (bool, error) { + p := mock.pulled[image] + if p >= 10 { + return false, fmt.Errorf("Wrong number of pulls: %d", p) + } + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 10 { + return false, fmt.Errorf("Got reference count %d; want %d", len(references), 10) + } + + // Ensure there is no pull future + if len(coordinator.pullFutures) != 0 { + return false, fmt.Errorf("Pull future exists after pull finished") + } + + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} + +func TestDockerCoordinator_Pull_Remove(t *testing.T) { + t.Parallel() + image := "foo" + imageID := uuid.Generate() + mapping := map[string]string{imageID: image} + + // Add a delay so we can get multiple queued up + mock := newMockImageClient(mapping, 10*time.Millisecond) + config := &dockerCoordinatorConfig{ + logger: testlog.HCLogger(t), + cleanup: true, + client: mock, + removeDelay: 1 * time.Millisecond, + } + + // Create a coordinator + coordinator := NewDockerCoordinator(config) + + id := "" + callerIDs := make([]string, 10, 10) + for i := 0; i < 10; i++ { + callerIDs[i] = uuid.Generate() + id, _ = coordinator.PullImage(image, nil, callerIDs[i], nil) + } + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 10 { + t.Fatalf("Got reference count %d; want %d", len(references), 10) + } + + // Remove some + for i := 0; i < 8; i++ { + coordinator.RemoveImage(id, callerIDs[i]) + } + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 2 { + t.Fatalf("Got reference count %d; want %d", len(references), 2) + } + + // Remove all + for i := 8; i < 10; i++ { + coordinator.RemoveImage(id, callerIDs[i]) + } + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 0 { + t.Fatalf("Got reference count %d; want %d", len(references), 0) + } + + // Check that only one delete happened + testutil.WaitForResult(func() (bool, error) { + removes := mock.removed[id] + return removes == 1, fmt.Errorf("Wrong number of removes: %d", removes) + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Make sure there is no future still + if _, ok := coordinator.deleteFuture[id]; ok { + t.Fatal("Got delete future") + } +} + +func TestDockerCoordinator_Remove_Cancel(t *testing.T) { + t.Parallel() + image := "foo" + imageID := uuid.Generate() + mapping := map[string]string{imageID: image} + + mock := newMockImageClient(mapping, 1*time.Millisecond) + config := &dockerCoordinatorConfig{ + logger: testlog.HCLogger(t), + cleanup: true, + client: mock, + removeDelay: 100 * time.Millisecond, + } + + // Create a coordinator + coordinator := NewDockerCoordinator(config) + callerID := uuid.Generate() + + // Pull image + id, _ := coordinator.PullImage(image, nil, callerID, nil) + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 1 { + t.Fatalf("Got reference count %d; want %d", len(references), 1) + } + + // Remove image + coordinator.RemoveImage(id, callerID) + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 0 { + t.Fatalf("Got reference count %d; want %d", len(references), 0) + } + + // Pull image again within delay + id, _ = coordinator.PullImage(image, nil, callerID, nil) + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 1 { + t.Fatalf("Got reference count %d; want %d", len(references), 1) + } + + // Check that only no delete happened + if removes := mock.removed[id]; removes != 0 { + t.Fatalf("Image deleted when it shouldn't have") + } +} + +func TestDockerCoordinator_No_Cleanup(t *testing.T) { + t.Parallel() + image := "foo" + imageID := uuid.Generate() + mapping := map[string]string{imageID: image} + + mock := newMockImageClient(mapping, 1*time.Millisecond) + config := &dockerCoordinatorConfig{ + logger: testlog.HCLogger(t), + cleanup: false, + client: mock, + removeDelay: 1 * time.Millisecond, + } + + // Create a coordinator + coordinator := NewDockerCoordinator(config) + callerID := uuid.Generate() + + // Pull image + id, _ := coordinator.PullImage(image, nil, callerID, nil) + + // Check the reference count + if references := coordinator.imageRefCount[id]; len(references) != 0 { + t.Fatalf("Got reference count %d; want %d", len(references), 0) + } + + // Remove image + coordinator.RemoveImage(id, callerID) + + // Check that only no delete happened + if removes := mock.removed[id]; removes != 0 { + t.Fatalf("Image deleted when it shouldn't have") + } +} diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go new file mode 100644 index 000000000..beb309893 --- /dev/null +++ b/drivers/docker/driver.go @@ -0,0 +1,1556 @@ +package docker + +import ( + "fmt" + "net" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "time" + + "golang.org/x/net/context" + + "github.com/davecgh/go-spew/spew" + docker "github.com/fsouza/go-dockerclient" + "github.com/hashicorp/consul-template/signals" + hclog "github.com/hashicorp/go-hclog" + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/client/driver/env" + "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/drivers/docker/docklog" + "github.com/hashicorp/nomad/drivers/shared/eventer" + nstructs "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/base" + "github.com/hashicorp/nomad/plugins/drivers" + "github.com/hashicorp/nomad/plugins/shared/hclspec" + "github.com/hashicorp/nomad/plugins/shared/loader" +) + +const ( + // NoSuchContainerError is returned by the docker daemon if the container + // does not exist. + NoSuchContainerError = "No such container" + + // pluginName is the name of the plugin + pluginName = "docker" + + // fingerprintPeriod is the interval at which the driver will send fingerprint responses + fingerprintPeriod = 30 * time.Second + + // dockerTimeout is the length of time a request can be outstanding before + // it is timed out. + dockerTimeout = 5 * time.Minute + + // dockerBasicCaps is comma-separated list of Linux capabilities that are + // allowed by docker by default, as documented in + // https://docs.docker.com/engine/reference/run/#block-io-bandwidth-blkio-constraint + dockerBasicCaps = "CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID," + + "SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE" + + // dockerAuthHelperPrefix is the prefix to attach to the credential helper + // and should be found in the $PATH. Example: ${prefix-}${helper-name} + dockerAuthHelperPrefix = "docker-credential-" +) + +var ( + // PluginID is the rawexec plugin metadata registered in the plugin + // catalog. + PluginID = loader.PluginID{ + Name: pluginName, + PluginType: base.PluginTypeDriver, + } + + // PluginConfig is the rawexec factory function registered in the + // plugin catalog. + PluginConfig = &loader.InternalPluginConfig{ + Config: map[string]interface{}{}, + Factory: func(l hclog.Logger) interface{} { return NewDockerDriver(l) }, + } +) + +func PluginLoader(opts map[string]string) (map[string]interface{}, error) { + conf := map[string]interface{}{} + if v, ok := opts["docker.endpoint"]; ok { + conf["endpoint"] = v + } + if v, ok := opts["docker.auth.config"]; ok { + conf["auth_config"] = v + } + if v, ok := opts["docker.auth.helper"]; ok { + conf["auth_helper"] = v + } + if _, ok := opts["docker.tls.cert"]; ok { + conf["tls"] = map[string]interface{}{ + "cert": opts["docker.tls.cert"], + "key": opts["docker.tls.key"], + "ca": opts["docker.tls.ca"], + } + } + if v, ok := opts["docker.cleanup.image.delay"]; ok { + conf["image_gc_delay"] = v + } + if v, ok := opts["docker.volumes.selinuxlabel"]; ok { + conf["volumes_selinuxlabel"] = v + } + if v, ok := opts["docker.caps.whitelist"]; ok { + conf["allow_caps"] = strings.Split(v, ",") + } + if v, err := strconv.ParseBool(opts["docker.cleanup.image"]); err == nil { + conf["image_gc"] = v + } + if v, err := strconv.ParseBool(opts["docker.volumes.enabled"]); err == nil { + conf["volumes_enabled"] = v + } + if v, err := strconv.ParseBool(opts["docker.privileged.enabled"]); err == nil { + conf["allow_privileged"] = v + } + if v, err := strconv.ParseBool(opts["docker.cleanup.container"]); err == nil { + conf["container_gc"] = v + } + return conf, nil +} + +var ( + // pluginInfo is the response returned for the PluginInfo RPC + pluginInfo = &base.PluginInfoResponse{ + Type: base.PluginTypeDriver, + PluginApiVersion: "0.0.1", + PluginVersion: "0.1.0", + Name: pluginName, + } + + // configSpec is the hcl specification returned by the ConfigSchema RPC + configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ + "endpoint": hclspec.NewAttr("endpoint", "string", false), + "auth_config": hclspec.NewAttr("auth_config", "string", false), + "auth_helper": hclspec.NewAttr("auth_helper", "string", false), + "tls": hclspec.NewBlock("tls", false, hclspec.NewObject(map[string]*hclspec.Spec{ + "cert": hclspec.NewAttr("cert", "string", false), + "key": hclspec.NewAttr("key", "string", false), + "ca": hclspec.NewAttr("ca", "string", false), + })), + "image_gc": hclspec.NewDefault( + hclspec.NewAttr("image_gc", "bool", false), + hclspec.NewLiteral("true"), + ), + "image_gc_delay": hclspec.NewAttr("image_gc_delay", "string", false), + "volumes_enabled": hclspec.NewDefault( + hclspec.NewAttr("volumes_enabled", "bool", false), + hclspec.NewLiteral("true"), + ), + "volumes_selinuxlabel": hclspec.NewAttr("volumes_selinuxlabel", "string", false), + "allow_privileged": hclspec.NewAttr("allow_privileged", "bool", false), + "allow_caps": hclspec.NewDefault( + hclspec.NewAttr("allow_caps", "list(string)", false), + hclspec.NewLiteral(`["CHOWN","DAC_OVERRIDE","FSETID","FOWNER","MKNOD","NET_RAW","SETGID","SETUID","SETFCAP","SETPCAP","NET_BIND_SERVICE","SYS_CHROOT","KILL","AUDIT_WRITE"]`), + ), + "container_gc": hclspec.NewDefault( + hclspec.NewAttr("container_gc", "bool", false), + hclspec.NewLiteral("true"), + ), + }) + + // taskConfigSpec is the hcl specification for the driver config section of + // a task within a job. It is returned in the TaskConfigSchema RPC + taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ + "image": hclspec.NewAttr("image", "string", true), + "advertise_ipv6_address": hclspec.NewAttr("advertise_ipv6_address", "bool", false), + "args": hclspec.NewAttr("args", "list(string)", false), + "auth": hclspec.NewBlock("auth", false, hclspec.NewObject(map[string]*hclspec.Spec{ + "username": hclspec.NewAttr("username", "string", false), + "password": hclspec.NewAttr("password", "string", false), + "email": hclspec.NewAttr("email", "string", false), + "server_address": hclspec.NewAttr("server_address", "string", false), + })), + "auth_soft_fail": hclspec.NewAttr("auth_soft_fail", "bool", false), + "cap_add": hclspec.NewAttr("cap_add", "list(string)", false), + "cap_drop": hclspec.NewAttr("cap_drop", "list(string)", false), + "command": hclspec.NewAttr("command", "string", false), + "cpu_hard_limit": hclspec.NewAttr("cpu_hard_limit", "bool", false), + "cpu_cfs_period": hclspec.NewAttr("cpu_cfs_period", "number", false), + "devices": hclspec.NewBlockSet("devices", hclspec.NewObject(map[string]*hclspec.Spec{ + "host_path": hclspec.NewAttr("host_path", "string", false), + "container_path": hclspec.NewAttr("container_path", "string", false), + "cgroup_permissions": hclspec.NewAttr("cgroup_permissions", "string", false), + })), + "dns_search_domains": hclspec.NewAttr("dns_search_domains", "list(string)", false), + "dns_options": hclspec.NewAttr("dns_options", "list(string)", false), + "dns_servers": hclspec.NewAttr("dns_servers", "list(string)", false), + "entrypoint": hclspec.NewAttr("entrypoint", "list(string)", false), + "extra_hosts": hclspec.NewAttr("extra_hosts", "list(string)", false), + "force_pull": hclspec.NewAttr("force_pull", "bool", false), + "hostname": hclspec.NewAttr("hostname", "string", false), + "interactive": hclspec.NewAttr("interactive", "bool", false), + "ipc_mode": hclspec.NewAttr("ipc_mode", "string", false), + "ipv4_address": hclspec.NewAttr("ipv4_address", "string", false), + "ipv6_address": hclspec.NewAttr("ipv6_address", "string", false), + "labels": hclspec.NewAttr("labels", "map(string)", false), + "load": hclspec.NewAttr("load", "string", false), + "logging": hclspec.NewAttr("logging", "map(string)", false), + "mac_address": hclspec.NewAttr("mac_address", "map(string)", false), + "mounts": hclspec.NewBlockSet("mounts", hclspec.NewObject(map[string]*hclspec.Spec{ + "target": hclspec.NewAttr("target", "string", false), + "source": hclspec.NewAttr("source", "string", false), + "readonly": hclspec.NewAttr("readonly", "bool", false), + "volume_options": hclspec.NewBlockSet("volume_options", hclspec.NewObject(map[string]*hclspec.Spec{ + "no_copy": hclspec.NewAttr("no_copy", "bool", false), + "labels": hclspec.NewAttr("labels", "map(string)", false), + "driver_config": hclspec.NewBlockSet("driver_config", hclspec.NewObject(map[string]*hclspec.Spec{ + "name": hclspec.NewAttr("name", "string", false), + "options": hclspec.NewAttr("name", "map(string)", false), + })), + })), + })), + "network_aliases": hclspec.NewAttr("network_aliases", "list(string)", false), + "network_mode": hclspec.NewAttr("network_mode", "string", false), + "pids_limit": hclspec.NewAttr("pids_limit", "number", false), + "pid_mode": hclspec.NewAttr("pid_mode", "string", false), + "port_map": hclspec.NewAttr("port_map", "map(number)", false), + "privileged": hclspec.NewAttr("privileged", "bool", false), + "readonly_rootfs": hclspec.NewAttr("readonly_rootfs", "bool", false), + "security_opt": hclspec.NewAttr("security_opt", "list(string)", false), + "shm_size": hclspec.NewAttr("shm_size", "number", false), + "sysctl": hclspec.NewAttr("sysctl", "map(string)", false), + "tty": hclspec.NewAttr("tty", "bool", false), + "ulimit": hclspec.NewAttr("ulimit", "map(string)", false), + "uts_mode": hclspec.NewAttr("uts_mode", "string", false), + "userns_mode": hclspec.NewAttr("userns_mode", "string", false), + "volumes": hclspec.NewAttr("volumes", "list(string)", false), + "volume_driver": hclspec.NewAttr("volume_driver", "string", false), + "work_dir": hclspec.NewAttr("work_dir", "string", false), + }) + + // capabilities is returned by the Capabilities RPC and indicates what + // optional features this driver supports + capabilities = &drivers.Capabilities{ + SendSignals: true, + Exec: true, + FSIsolation: structs.FSIsolationImage, + } + + // createClientsLock is a lock that protects reading/writing global client + // variables + createClientsLock sync.Mutex + + // client is a docker client with a timeout of 5 minutes. This is for doing + // all operations with the docker daemon besides which are not long running + // such as creating, killing containers, etc. + client *docker.Client + + // waitClient is a docker client with no timeouts. This is used for long + // running operations such as waiting on containers and collect stats + waitClient *docker.Client + + // healthCheckClient is a docker client with a timeout of 1 minute. This is + // necessary to have a shorter timeout than other API or fingerprint calls + healthCheckClient *docker.Client + + // The statistics the Docker driver exposes + DockerMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage"} + DockerMeasuredCpuStats = []string{"Throttled Periods", "Throttled Time", "Percent"} + + // recoverableErrTimeouts returns a recoverable error if the error was due + // to timeouts + recoverableErrTimeouts = func(err error) error { + r := false + if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") || + strings.Contains(err.Error(), "EOF") { + r = true + } + return nstructs.NewRecoverableError(err, r) + } +) + +type TaskConfig struct { + Image string `codec:"image"` + AdvertiseIPv6Addr bool `codec:"advertise_ipv6_address"` + Args []string `codec:"args"` + Auth DockerAuth `codec:"auth"` + AuthSoftFail bool `codec:"auth_soft_fail"` + CapAdd []string `codec:"cap_add"` + CapDrop []string `codec:"cap_drop"` + Command string `codec:"command"` + CPUCFSPeriod int64 `codec:"cpu_cfs_period"` + CPUHardLimit bool `codec:"cpu_hard_limit"` + Devices []DockerDevice `codec:"devices"` + DNSSearchDomains []string `codec:"dns_search_domains"` + DNSOptions []string `codec:"dns_options"` + DNSServers []string `codec:"dns_servers"` + Entrypoint []string `codec:"entrypoint"` + ExtraHosts []string `codec:"extra_hosts"` + ForcePull bool `codec:"force_pull"` + Hostname string `codec:"hostname"` + Interactive bool `codec:"interactive"` + IPCMode string `codec:"ipc_mode"` + IPv4Address string `codec:"ipv4_address"` + IPv6Address string `codec:"ipv6_address"` + Labels map[string]string `codec:"labels"` + LoadImage string `codec:"load"` + Logging DockerLogging `codec:"logging"` + MacAddress string `codec:"mac_address"` + Mounts []DockerMount `codec:"mounts"` + NetworkAliases []string `codec:"network_aliases"` + NetworkMode string `codec:"network_mode"` + PidsLimit int64 `codec:"pids_limit"` + PidMode string `codec:"pid_mode"` + PortMap map[string]int `codec:"port_map"` + Privileged bool `codec:"privileged"` + ReadonlyRootfs bool `codec:"readonly_rootfs"` + SecurityOpt []string `codec:"security_opt"` + ShmSize int64 `codec:"shm_size"` + Sysctl map[string]string `codec:"sysctl"` + TTY bool `codec:"tty"` + Ulimit map[string]string `codec:"ulimit"` + UTSMode string `codec:"uts_mode"` + UsernsMode string `codec:"userns_mode"` + Volumes []string `codec:"volumes"` + VolumeDriver string `codec:"volume_driver"` + WorkDir string `codec:"work_dir"` +} + +type DockerAuth struct { + Username string `codec:"username"` + Password string `codec:"password"` + Email string `codec:"email"` + ServerAddr string `codec:"server_address"` +} + +type DockerDevice struct { + HostPath string `codec:"host_path"` + ContainerPath string `codec:"container_path"` + CgroupPermissions string `codec:"cgroup_permissions"` +} + +type DockerLogging struct { + Type string `codec:"type"` + Config map[string]string `codec:"config"` +} + +type DockerMount struct { + Target string `codec:"target"` + Source string `codec:"source"` + ReadOnly bool `codec:"readonly"` + VolumeOptions DockerVolumeOptions `codec:"volume_options"` +} + +type DockerVolumeOptions struct { + NoCopy bool `codec:"no_copy"` + Labels map[string]string `codec:"labels"` + DriverConfig DockerVolumeDriverConfig `codec:"driver_config"` +} + +// VolumeDriverConfig holds a map of volume driver specific options +type DockerVolumeDriverConfig struct { + Name string `codec:"name"` + Options map[string]string `codec:"options"` +} + +type DriverConfig struct { + Endpoint string `codec:"endpoint"` + AuthConfig string `codec:"auth_config"` + AuthHelper string `codec:"auth_helper"` + TLS TLSConfig `codec:"tls"` + ImageGC bool `codec:"image_gc"` + ImageGCDelay string `codec:"image_gc_delay"` + imageGCDelayDuration time.Duration `codec:"-"` + VolumesEnabled bool `codec:"volumes_enabled"` + VolumesSelinuxLabel string `codec:"volumes_selinuxlabel"` + AllowPrivileged bool `codec:"allow_privileged"` + AllowCaps []string `codec:"allow_caps"` + ContainerGC bool `codec:"container_gc"` +} + +type TLSConfig struct { + Cert string `codec:"cert"` + Key string `codec:"key"` + CA string `codec:"ca"` +} + +type Driver struct { + // eventer is used to handle multiplexing of TaskEvents calls such that an + // event can be broadcast to all callers + eventer *eventer.Eventer + + config *DriverConfig + clientConfig *base.ClientDriverConfig + // ctx is the context for the driver. It is passed to other subsystems to + // coordinate shutdown + ctx context.Context + + // signalShutdown is called when the driver is shutting down and cancels the + // ctx passed to any subsystems + signalShutdown context.CancelFunc + + // tasks is the in memory datastore mapping taskIDs to taskHandles + tasks *taskStore + + // logger will log to the plugin output which is usually an 'executor.out' + // file located in the root of the TaskDir + logger hclog.Logger +} + +// NewDockerDriver returns a docker implementation of a driver plugin +func NewDockerDriver(logger hclog.Logger) drivers.DriverPlugin { + ctx, cancel := context.WithCancel(context.Background()) + logger = logger.Named(pluginName) + return &Driver{ + eventer: eventer.NewEventer(ctx, logger), + config: &DriverConfig{}, + tasks: newTaskStore(), + ctx: ctx, + signalShutdown: cancel, + logger: logger, + } +} + +func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { + return pluginInfo, nil +} + +func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { + return configSpec, nil +} + +func (d *Driver) SetConfig(data []byte, cfg *base.ClientAgentConfig) error { + var config DriverConfig + if err := base.MsgPackDecode(data, &config); err != nil { + return err + } + + d.config = &config + if len(d.config.ImageGCDelay) > 0 { + dur, err := time.ParseDuration(d.config.ImageGCDelay) + if err != nil { + return fmt.Errorf("failed to parse 'image_gc_delay' duration: %v", err) + } + d.config.imageGCDelayDuration = dur + } + + if cfg != nil { + d.clientConfig = cfg.Driver + } + return nil +} + +func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { + return taskConfigSpec, nil +} + +func (d *Driver) Capabilities() (*drivers.Capabilities, error) { + return capabilities, nil +} + +func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { + ch := make(chan *drivers.Fingerprint) + go d.handleFingerprint(ctx, ch) + return ch, nil +} + +func (d *Driver) handleFingerprint(ctx context.Context, ch chan *drivers.Fingerprint) { + defer close(ch) + ticker := time.NewTimer(0) + for { + select { + case <-ctx.Done(): + return + case <-d.ctx.Done(): + return + case <-ticker.C: + ticker.Reset(fingerprintPeriod) + ch <- d.buildFingerprint() + } + } +} + +func (d *Driver) buildFingerprint() *drivers.Fingerprint { + fp := &drivers.Fingerprint{ + Attributes: map[string]string{}, + Health: drivers.HealthStateHealthy, + HealthDescription: "healthy", + } + client, _, err := d.dockerClients() + if err != nil { + d.logger.Info("failed to initialize client", "error", err) + return &drivers.Fingerprint{ + Health: drivers.HealthStateUndetected, + HealthDescription: "ready", + } + } + + env, err := client.Version() + if err != nil { + d.logger.Debug("could not connect to docker daemon", "endpoint", client.Endpoint(), "error", err) + return &drivers.Fingerprint{ + Health: drivers.HealthStateUnhealthy, + HealthDescription: "failed to connect to docker daemon", + } + } + + fp.Attributes["driver.docker"] = "1" + fp.Attributes["driver.docker.version"] = env.Get("Version") + if d.config.AllowPrivileged { + fp.Attributes["driver.docker.privileged.enabled"] = "1" + } + + if d.config.VolumesEnabled { + fp.Attributes["driver.docker.volumes.enabled"] = "1" + } + + if nets, err := client.ListNetworks(); err != nil { + d.logger.Warn("error discovering bridge IP", "error", err) + } else { + for _, n := range nets { + if n.Name != "bridge" { + continue + } + + if len(n.IPAM.Config) == 0 { + d.logger.Warn("no IPAM config for bridge network") + break + } + + if n.IPAM.Config[0].Gateway != "" { + fp.Attributes["driver.docker.bridge_ip"] = n.IPAM.Config[0].Gateway + } else { + // Docker 17.09.0-ce dropped the Gateway IP from the bridge network + // See https://github.com/moby/moby/issues/32648 + d.logger.Debug("bridge_ip could not be discovered") + } + break + } + } + + return fp +} + +func (d *Driver) RecoverTask(*drivers.TaskHandle) error { + panic("not implemented") +} + +func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *structs.DriverNetwork, error) { + if _, ok := d.tasks.Get(cfg.ID); ok { + return nil, nil, fmt.Errorf("taskConfig with ID '%s' already started", cfg.ID) + } + + var driverConfig TaskConfig + + if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { + return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) + } + + handle := drivers.NewTaskHandle(pluginName) + handle.Config = cfg + + // Initialize docker API clients + client, _, err := d.dockerClients() + if err != nil { + return nil, nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) + } + + id, err := d.createImage(cfg, &driverConfig, client) + if err != nil { + return nil, nil, err + } + + containerCfg, err := d.createContainerConfig(cfg, &driverConfig, id) + if err != nil { + d.logger.Error("failed to create container configuration", "image_name", driverConfig.Image, + "image_id", id, "error", err) + return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err) + } + + container, err := d.createContainer(client, containerCfg, &driverConfig) + if err != nil { + d.logger.Error("failed to create container", "error", err) + } + + d.logger.Info("created container", "container_id", container.ID) + + // We don't need to start the container if the container is already running + // since we don't create containers which are already present on the host + // and are running + if !container.State.Running { + // Start the container + if err := d.startContainer(container); err != nil { + d.logger.Error("failed to start container", "container_id", container.ID, "error", err) + return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("Failed to start container %s: %s", container.ID, err), nstructs.IsRecoverable(err)) + } + + // InspectContainer to get all of the container metadata as + // much of the metadata (eg networking) isn't populated until + // the container is started + runningContainer, err := client.InspectContainer(container.ID) + if err != nil { + msg := "failed to inspect started container" + d.logger.Error(msg, "error", err) + return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("%s %s: %s", msg, container.ID, err), true) + } + container = runningContainer + d.logger.Info("started container", "container_id", container.ID) + } else { + d.logger.Debug("re-attaching to container", "container_id", + container.ID, "container_state", container.State.String()) + } + + dlogger, pluginClient, err := docklog.LaunchDockerLogger(d.logger) + if err != nil { + return nil, nil, fmt.Errorf("failed to launch docker logger plugin: %v", err) + } + + if err := dlogger.Start(&docklog.StartOpts{ + Endpoint: d.config.Endpoint, + ContainerID: container.ID, + Stdout: cfg.StdoutPath, + Stderr: cfg.StderrPath, + TLSCert: d.config.TLS.Cert, + TLSKey: d.config.TLS.Key, + TLSCA: d.config.TLS.CA, + }); err != nil { + pluginClient.Kill() + return nil, nil, fmt.Errorf("failed to launch docker logger process %s: %v", container.ID, err) + } + + // Detect container address + ip, autoUse := d.detectIP(container, &driverConfig) + + net := &structs.DriverNetwork{ + PortMap: driverConfig.PortMap, + IP: ip, + AutoAdvertise: autoUse, + } + + // Return a driver handle + h := &taskHandle{ + client: client, + waitClient: waitClient, + dlogger: dlogger, + dloggerPluginClient: pluginClient, + logger: d.logger.With("container_id", container.ID), + task: cfg, + container: container, + doneCh: make(chan bool), + waitCh: make(chan struct{}), + removeContainerOnExit: d.config.ContainerGC, + net: net, + } + d.tasks.Set(cfg.ID, h) + go h.collectStats() + go h.run() + + return handle, net, nil +} + +// createContainerClient is the subset of Docker Client methods used by the +// createContainer method to ease testing subtle error conditions. +type createContainerClient interface { + CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) + InspectContainer(id string) (*docker.Container, error) + ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) + RemoveContainer(opts docker.RemoveContainerOptions) error +} + +// createContainer creates the container given the passed configuration. It +// attempts to handle any transient Docker errors. +func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions, + driverConfig *TaskConfig) (*docker.Container, error) { + // Create a container + attempted := 0 +CREATE: + container, createErr := client.CreateContainer(config) + if createErr == nil { + return container, nil + } + + d.logger.Debug("failed to create container", "container_name", + config.Name, "image_name", driverConfig.Image, "image_id", config.Config.Image, + "attempt", attempted+1, "error", createErr) + + // Volume management tools like Portworx may not have detached a volume + // from a previous node before Nomad started a task replacement task. + // Treat these errors as recoverable so we retry. + if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") { + return nil, nstructs.NewRecoverableError(createErr, true) + } + + // If the container already exists determine whether it's already + // running or if it's dead and needs to be recreated. + if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") { + containers, err := client.ListContainers(docker.ListContainersOptions{ + All: true, + }) + if err != nil { + d.logger.Error("failed to query list of containers matching name", "container_name", config.Name) + return nil, recoverableErrTimeouts(fmt.Errorf("Failed to query list of containers: %s", err)) + } + + // Delete matching containers + // Adding a / infront of the container name since Docker returns the + // container names with a / pre-pended to the Nomad generated container names + containerName := "/" + config.Name + d.logger.Debug("searching for container to purge", "container_name", containerName) + for _, shimContainer := range containers { + d.logger.Debug("listed container", "names", hclog.Fmt("%+v", shimContainer.Names)) + found := false + for _, name := range shimContainer.Names { + if name == containerName { + d.logger.Debug("Found container", "containter_name", containerName, "container_id", shimContainer.ID) + found = true + break + } + } + + if !found { + continue + } + + // Inspect the container and if the container isn't dead then return + // the container + container, err := client.InspectContainer(shimContainer.ID) + if err != nil { + err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err) + + // This error is always recoverable as it could + // be caused by races between listing + // containers and this container being removed. + // See #2802 + return nil, nstructs.NewRecoverableError(err, true) + } + if container != nil && container.State.Running { + return container, nil + } + + err = client.RemoveContainer(docker.RemoveContainerOptions{ + ID: container.ID, + Force: true, + }) + if err != nil { + d.logger.Error("failed to purge container", "container_id", container.ID) + return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err)) + } else if err == nil { + d.logger.Info("purged container", "container_id", container.ID) + } + } + + if attempted < 5 { + attempted++ + time.Sleep(1 * time.Second) + goto CREATE + } + } else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") { + // There is still a very small chance this is possible even with the + // coordinator so retry. + return nil, nstructs.NewRecoverableError(createErr, true) + } + + return nil, recoverableErrTimeouts(createErr) +} + +// startContainer starts the passed container. It attempts to handle any +// transient Docker errors. +func (d *Driver) startContainer(c *docker.Container) error { + // Start a container + attempted := 0 +START: + startErr := client.StartContainer(c.ID, c.HostConfig) + if startErr == nil { + return nil + } + + d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr) + + // If it is a 500 error it is likely we can retry and be successful + if strings.Contains(startErr.Error(), "API error (500)") { + if attempted < 5 { + attempted++ + time.Sleep(1 * time.Second) + goto START + } + return nstructs.NewRecoverableError(startErr, true) + } + + return recoverableErrTimeouts(startErr) +} + +// getDockerCoordinator returns the docker coordinator and the caller ID to use when +// interacting with the coordinator +func (d *Driver) getDockerCoordinator(client *docker.Client, task *drivers.TaskConfig) (*dockerCoordinator, string) { + config := &dockerCoordinatorConfig{ + client: client, + cleanup: d.config.ImageGC, + logger: d.logger, + removeDelay: d.config.imageGCDelayDuration, + } + + return GetDockerCoordinator(config), fmt.Sprintf("%s-%s", task.ID, task.Name) +} + +// createImage creates a docker image either by pulling it from a registry or by +// loading it from the file system +func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) { + image := driverConfig.Image + repo, tag := parseDockerImage(image) + + coordinator, callerID := d.getDockerCoordinator(client, task) + + // We're going to check whether the image is already downloaded. If the tag + // is "latest", or ForcePull is set, we have to check for a new version every time so we don't + // bother to check and cache the id here. We'll download first, then cache. + if driverConfig.ForcePull { + d.logger.Debug("force pulling image instead of inspecting local", "image_ref", dockerImageRef(repo, tag)) + } else if tag != "latest" { + if dockerImage, _ := client.InspectImage(image); dockerImage != nil { + // Image exists so just increment its reference count + coordinator.IncrementImageReference(dockerImage.ID, image, callerID) + return dockerImage.ID, nil + } + } + + // Load the image if specified + if driverConfig.LoadImage != "" { + return d.loadImage(task, driverConfig, client) + } + + // Download the image + return d.pullImage(task, driverConfig, client, repo, tag) +} + +// pullImage creates an image by pulling it from a docker registry +func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client, repo, tag string) (id string, err error) { + authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo) + if err != nil { + if driverConfig.AuthSoftFail { + d.logger.Warn("Failed to find docker repo auth", "repo", repo, "error", err) + } else { + return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err) + } + } + + if authIsEmpty(authOptions) { + d.logger.Debug("did not find docker auth for repo", "repo", repo) + } + + d.eventer.EmitEvent(&drivers.TaskEvent{ + TaskID: task.ID, + Timestamp: time.Now(), + Message: "Downloading image", + Annotations: map[string]string{ + "image": dockerImageRef(repo, tag), + }, + }) + coordinator, callerID := d.getDockerCoordinator(client, task) + + return coordinator.PullImage(driverConfig.Image, authOptions, callerID, d.emitEventFunc(task)) +} + +func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn { + return func(msg string, annotations map[string]string) { + d.eventer.EmitEvent(&drivers.TaskEvent{ + TaskID: task.ID, + Timestamp: time.Now(), + Message: msg, + Annotations: annotations, + }) + } +} + +// authBackend encapsulates a function that resolves registry credentials. +type authBackend func(string) (*docker.AuthConfiguration, error) + +// resolveRegistryAuthentication attempts to retrieve auth credentials for the +// repo, trying all authentication-backends possible. +func (d *Driver) resolveRegistryAuthentication(driverConfig *TaskConfig, repo string) (*docker.AuthConfiguration, error) { + return firstValidAuth(repo, []authBackend{ + authFromTaskConfig(driverConfig), + authFromDockerConfig(d.config.AuthConfig), + authFromHelper(d.config.AuthHelper), + }) +} + +// loadImage creates an image by loading it from the file system +func (d *Driver) loadImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (id string, err error) { + + archive := filepath.Join(task.TaskDir().LocalDir, driverConfig.LoadImage) + d.logger.Debug("loading image from disk", "archive", archive) + + f, err := os.Open(archive) + if err != nil { + return "", fmt.Errorf("unable to open image archive: %v", err) + } + + if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil { + return "", err + } + f.Close() + + dockerImage, err := client.InspectImage(driverConfig.Image) + if err != nil { + return "", recoverableErrTimeouts(err) + } + + coordinator, callerID := d.getDockerCoordinator(client, task) + coordinator.IncrementImageReference(dockerImage.ID, driverConfig.Image, callerID) + return dockerImage.ID, nil +} + +func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConfig) ([]string, error) { + + allocDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SharedAllocDir, task.Env[env.AllocDir]) + taskLocalBind := fmt.Sprintf("%s:%s", task.TaskDir().LocalDir, task.Env[env.TaskLocalDir]) + secretDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SecretsDir, task.Env[env.SecretsDir]) + binds := []string{allocDirBind, taskLocalBind, secretDirBind} + + if !d.config.VolumesEnabled && driverConfig.VolumeDriver != "" { + return nil, fmt.Errorf("'volumes_enabled' is false; cannot use volume driver %q", driverConfig.VolumeDriver) + } + + for _, userbind := range driverConfig.Volumes { + parts := strings.Split(userbind, ":") + if len(parts) < 2 { + return nil, fmt.Errorf("invalid docker volume: %q", userbind) + } + + // Resolve dotted path segments + parts[0] = filepath.Clean(parts[0]) + + // Absolute paths aren't always supported + if filepath.IsAbs(parts[0]) { + if !d.config.VolumesEnabled { + // Disallow mounting arbitrary absolute paths + return nil, fmt.Errorf("'volumes_enabled' is false; cannot mount host paths: %+q", userbind) + } + binds = append(binds, userbind) + continue + } + + // Relative paths are always allowed as they mount within a container + // When a VolumeDriver is set, we assume we receive a binding in the format volume-name:container-dest + // Otherwise, we assume we receive a relative path binding in the format relative/to/task:/also/in/container + if driverConfig.VolumeDriver == "" { + // Expand path relative to alloc dir + parts[0] = filepath.Join(task.TaskDir().Dir, parts[0]) + } + + binds = append(binds, strings.Join(parts, ":")) + } + + if selinuxLabel := d.config.VolumesSelinuxLabel; selinuxLabel != "" { + // Apply SELinux Label to each volume + for i := range binds { + binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel) + } + } + + return binds, nil +} + +func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, + imageID string) (docker.CreateContainerOptions, error) { + + logger := d.logger.With("task_name", task.Name) + var c docker.CreateContainerOptions + if task.Resources == nil { + // Guard against missing resources. We should never have been able to + // schedule a job without specifying this. + logger.Error("task.Resources is empty") + return c, fmt.Errorf("task.Resources is empty") + } + + binds, err := d.containerBinds(task, driverConfig) + if err != nil { + return c, err + } + + // create the config block that will later be consumed by go-dockerclient + config := &docker.Config{ + Image: imageID, + Entrypoint: driverConfig.Entrypoint, + Hostname: driverConfig.Hostname, + User: task.User, + Tty: driverConfig.TTY, + OpenStdin: driverConfig.Interactive, + } + + if driverConfig.WorkDir != "" { + config.WorkingDir = driverConfig.WorkDir + } + + hostConfig := &docker.HostConfig{ + // Convert MB to bytes. This is an absolute value. + Memory: task.Resources.LinuxResources.MemoryLimitBytes, + // Convert Mhz to shares. This is a relative value. + CPUShares: task.Resources.LinuxResources.CPUShares, + + // Binds are used to mount a host volume into the container. We mount a + // local directory for storage and a shared alloc directory that can be + // used to share data between different tasks in the same task group. + Binds: binds, + + VolumeDriver: driverConfig.VolumeDriver, + + PidsLimit: driverConfig.PidsLimit, + } + + // Calculate CPU Quota + // cfs_quota_us is the time per core, so we must + // multiply the time by the number of cores available + // See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu + if driverConfig.CPUHardLimit { + if driverConfig.CPUCFSPeriod < 0 || driverConfig.CPUCFSPeriod > 1000000 { + return c, fmt.Errorf("invalid value for cpu_cfs_period") + } + if driverConfig.CPUCFSPeriod == 0 { + driverConfig.CPUCFSPeriod = task.Resources.LinuxResources.CPUPeriod + } + hostConfig.CPUPeriod = driverConfig.CPUCFSPeriod + hostConfig.CPUQuota = task.Resources.LinuxResources.CPUQuota + } + + // Windows does not support MemorySwap/MemorySwappiness #2193 + if runtime.GOOS == "windows" { + hostConfig.MemorySwap = 0 + hostConfig.MemorySwappiness = -1 + } else { + hostConfig.MemorySwap = task.Resources.LinuxResources.MemoryLimitBytes // MemorySwap is memory + swap. + } + + hostConfig.LogConfig = docker.LogConfig{ + Type: driverConfig.Logging.Type, + Config: driverConfig.Logging.Config, + } + + logger.Debug("configured resources", "memory", hostConfig.Memory, + "cpu_shares", hostConfig.CPUShares, "cpu_quota", hostConfig.CPUQuota, + "cpu_period", hostConfig.CPUPeriod) + logger.Debug("binding directories", "binds", hclog.Fmt("%#v", hostConfig.Binds)) + + // set privileged mode + if driverConfig.Privileged && !d.config.AllowPrivileged { + return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) + } + hostConfig.Privileged = driverConfig.Privileged + + // set capabilities + hostCapsWhitelistConfig := d.config.AllowCaps + hostCapsWhitelist := make(map[string]struct{}) + for _, cap := range hostCapsWhitelistConfig { + cap = strings.ToLower(strings.TrimSpace(cap)) + hostCapsWhitelist[cap] = struct{}{} + } + + if _, ok := hostCapsWhitelist["all"]; !ok { + effectiveCaps, err := tweakCapabilities( + strings.Split(dockerBasicCaps, ","), + driverConfig.CapAdd, + driverConfig.CapDrop, + ) + if err != nil { + return c, err + } + var missingCaps []string + for _, cap := range effectiveCaps { + cap = strings.ToLower(cap) + if _, ok := hostCapsWhitelist[cap]; !ok { + missingCaps = append(missingCaps, cap) + } + } + if len(missingCaps) > 0 { + return c, fmt.Errorf("Docker driver doesn't have the following caps whitelisted on this Nomad agent: %s", missingCaps) + } + } + + hostConfig.CapAdd = driverConfig.CapAdd + hostConfig.CapDrop = driverConfig.CapDrop + + // set SHM size + if driverConfig.ShmSize != 0 { + hostConfig.ShmSize = driverConfig.ShmSize + } + + // set DNS servers + for _, ip := range driverConfig.DNSServers { + if net.ParseIP(ip) != nil { + hostConfig.DNS = append(hostConfig.DNS, ip) + } else { + logger.Error("invalid ip address for container dns server", "ip", ip) + } + } + + if len(driverConfig.Devices) > 0 { + var devices []docker.Device + for _, device := range driverConfig.Devices { + dev := docker.Device{ + PathOnHost: device.HostPath, + PathInContainer: device.ContainerPath, + CgroupPermissions: device.CgroupPermissions} + devices = append(devices, dev) + } + hostConfig.Devices = devices + } + + // Setup mounts + for _, m := range driverConfig.Mounts { + hm := docker.HostMount{ + Target: m.Target, + Source: m.Source, + Type: "volume", // Only type supported + ReadOnly: m.ReadOnly, + } + vo := m.VolumeOptions + hm.VolumeOptions = &docker.VolumeOptions{ + NoCopy: vo.NoCopy, + } + + dc := vo.DriverConfig + hm.VolumeOptions.DriverConfig = docker.VolumeDriverConfig{ + Name: dc.Name, + } + hm.VolumeOptions.DriverConfig.Options = dc.Options + hm.VolumeOptions.Labels = vo.Labels + hostConfig.Mounts = append(hostConfig.Mounts, hm) + } + + // set DNS search domains and extra hosts + hostConfig.DNSSearch = driverConfig.DNSSearchDomains + hostConfig.DNSOptions = driverConfig.DNSOptions + hostConfig.ExtraHosts = driverConfig.ExtraHosts + + hostConfig.IpcMode = driverConfig.IPCMode + hostConfig.PidMode = driverConfig.PidMode + hostConfig.UTSMode = driverConfig.UTSMode + hostConfig.UsernsMode = driverConfig.UsernsMode + hostConfig.SecurityOpt = driverConfig.SecurityOpt + hostConfig.Sysctls = driverConfig.Sysctl + + ulimits, err := sliceMergeUlimit(driverConfig.Ulimit) + if err != nil { + return c, fmt.Errorf("failed to parse ulimit configuration: %v", err) + } + hostConfig.Ulimits = ulimits + + hostConfig.ReadonlyRootfs = driverConfig.ReadonlyRootfs + + hostConfig.NetworkMode = driverConfig.NetworkMode + if hostConfig.NetworkMode == "" { + // docker default + logger.Debug("networking mode not specified; using default", "network_mode", defaultNetworkMode) + hostConfig.NetworkMode = defaultNetworkMode + } + + // Setup port mapping and exposed ports + if len(task.Resources.NomadResources.Networks) == 0 { + logger.Debug("no network interfaces are available") + if len(driverConfig.PortMap) > 0 { + return c, fmt.Errorf("Trying to map ports but no network interface is available") + } + } else { + // TODO add support for more than one network + network := task.Resources.NomadResources.Networks[0] + publishedPorts := map[docker.Port][]docker.PortBinding{} + exposedPorts := map[docker.Port]struct{}{} + + for _, port := range network.ReservedPorts { + // By default we will map the allocated port 1:1 to the container + containerPortInt := port.Value + + // If the user has mapped a port using port_map we'll change it here + if mapped, ok := driverConfig.PortMap[port.Label]; ok { + containerPortInt = mapped + } + + hostPortStr := strconv.Itoa(port.Value) + containerPort := docker.Port(strconv.Itoa(containerPortInt)) + + publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) + publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) + logger.Debug("allocated static port", "ip", network.IP, "port", port.Value) + + exposedPorts[containerPort+"/tcp"] = struct{}{} + exposedPorts[containerPort+"/udp"] = struct{}{} + logger.Debug("exposed port", "port", port.Value) + } + + spew.Dump(network) + for _, port := range network.DynamicPorts { + // By default we will map the allocated port 1:1 to the container + containerPortInt := port.Value + + // If the user has mapped a port using port_map we'll change it here + if mapped, ok := driverConfig.PortMap[port.Label]; ok { + containerPortInt = mapped + } + + hostPortStr := strconv.Itoa(port.Value) + containerPort := docker.Port(strconv.Itoa(containerPortInt)) + + publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) + publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) + logger.Debug("allocated mapped port", "ip", network.IP, "port", port.Value) + + exposedPorts[containerPort+"/tcp"] = struct{}{} + exposedPorts[containerPort+"/udp"] = struct{}{} + logger.Debug("exposed port", "port", containerPort) + } + + hostConfig.PortBindings = publishedPorts + config.ExposedPorts = exposedPorts + } + + // If the user specified a custom command to run, we'll inject it here. + if driverConfig.Command != "" { + // Validate command + if err := validateCommand(driverConfig.Command, "args"); err != nil { + return c, err + } + + cmd := []string{driverConfig.Command} + if len(driverConfig.Args) != 0 { + cmd = append(cmd, driverConfig.Args...) + } + logger.Debug("setting container startup command", "command", strings.Join(cmd, " ")) + config.Cmd = cmd + } else if len(driverConfig.Args) != 0 { + config.Cmd = driverConfig.Args + } + + if len(driverConfig.Labels) > 0 { + config.Labels = driverConfig.Labels + logger.Debug("applied labels on the container", "labels", config.Labels) + } + + config.Env = task.EnvList() + + containerName := strings.Replace(task.ID, "/", "_", -1) + logger.Debug("setting container name", "container_name", containerName) + + var networkingConfig *docker.NetworkingConfig + if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { + networkingConfig = &docker.NetworkingConfig{ + EndpointsConfig: map[string]*docker.EndpointConfig{ + hostConfig.NetworkMode: {}, + }, + } + } + + if len(driverConfig.NetworkAliases) > 0 { + networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases + logger.Debug("setting container network aliases", "network_mode", hostConfig.NetworkMode, + "network_aliases", strings.Join(driverConfig.NetworkAliases, ", ")) + } + + if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { + networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{ + IPv4Address: driverConfig.IPv4Address, + IPv6Address: driverConfig.IPv6Address, + } + logger.Debug("setting container network configuration", "network_mode", hostConfig.NetworkMode, + "ipv4_address", driverConfig.IPv4Address, "ipv6_address", driverConfig.IPv6Address) + } + + if driverConfig.MacAddress != "" { + config.MacAddress = driverConfig.MacAddress + logger.Debug("setting container mac address", "mac_address", config.MacAddress) + } + + return docker.CreateContainerOptions{ + Name: containerName, + Config: config, + HostConfig: hostConfig, + NetworkingConfig: networkingConfig, + }, nil +} + +// detectIP of Docker container. Returns the first IP found as well as true if +// the IP should be advertised (bridge network IPs return false). Returns an +// empty string and false if no IP could be found. +func (d *Driver) detectIP(c *docker.Container, driverConfig *TaskConfig) (string, bool) { + if c.NetworkSettings == nil { + // This should only happen if there's been a coding error (such + // as not calling InspectContainer after CreateContainer). Code + // defensively in case the Docker API changes subtly. + d.logger.Error("no network settings for container", "container_id", c.ID) + return "", false + } + + ip, ipName := "", "" + auto := false + for name, net := range c.NetworkSettings.Networks { + if net.IPAddress == "" { + // Ignore networks without an IP address + continue + } + + ip = net.IPAddress + if driverConfig.AdvertiseIPv6Addr { + ip = net.GlobalIPv6Address + auto = true + } + ipName = name + + // Don't auto-advertise IPs for default networks (bridge on + // Linux, nat on Windows) + if name != "bridge" && name != "nat" { + auto = true + } + + break + } + + if n := len(c.NetworkSettings.Networks); n > 1 { + d.logger.Warn("multiple Docker networks for container found but Nomad only supports 1", + "total_networks", n, + "container_id", c.ID, + "container_network", ipName) + } + + return ip, auto +} + +// validateCommand validates that the command only has a single value and +// returns a user friendly error message telling them to use the passed +// argField. +func validateCommand(command, argField string) error { + trimmed := strings.TrimSpace(command) + if len(trimmed) == 0 { + return fmt.Errorf("command empty: %q", command) + } + + if len(trimmed) != len(command) { + return fmt.Errorf("command contains extra white space: %q", command) + } + + return nil +} + +func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { + h, ok := d.tasks.Get(taskID) + if !ok { + return nil, drivers.ErrTaskNotFound + } + ch := make(chan *drivers.ExitResult) + go d.handleWait(ctx, ch, h) + return ch, nil +} + +func (d *Driver) handleWait(ctx context.Context, ch chan *drivers.ExitResult, h *taskHandle) { + defer close(ch) + select { + case <-h.waitCh: + ch <- h.exitResult + case <-ctx.Done(): + ch <- &drivers.ExitResult{ + Err: ctx.Err(), + } + } +} + +func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { + h, ok := d.tasks.Get(taskID) + if !ok { + return drivers.ErrTaskNotFound + } + + if signal == "" { + signal = "SIGINT" + } + + sig, err := signals.Parse(signal) + if err != nil { + return fmt.Errorf("failed to parse signal: %v", err) + } + + return h.Kill(timeout, sig) +} + +func (d *Driver) DestroyTask(taskID string, force bool) error { + h, ok := d.tasks.Get(taskID) + if !ok { + return drivers.ErrTaskNotFound + } + + defer h.dloggerPluginClient.Kill() + + c, err := h.client.InspectContainer(h.container.ID) + if err != nil { + return fmt.Errorf("failed to inspect container state: %v", err) + } + if c.State.Running && !force { + return fmt.Errorf("must call StopTask for the given task before Destroy or set force to true") + } + + if err := h.client.StopContainer(h.container.ID, 0); err != nil { + h.logger.Warn("failed to stop container during destroy", "error", err) + } + + if err := h.dlogger.Stop(); err != nil { + h.logger.Error("failed to stop docker logger process during destroy", + "error", err, "logger_pid", h.dloggerPluginClient.ReattachConfig().Pid) + } + + return nil +} + +func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { + h, ok := d.tasks.Get(taskID) + if !ok { + return nil, drivers.ErrTaskNotFound + } + + return &drivers.TaskStatus{ID: h.container.ID}, nil +} + +func (d *Driver) TaskStats(taskID string) (*structs.TaskResourceUsage, error) { + h, ok := d.tasks.Get(taskID) + if !ok { + return nil, drivers.ErrTaskNotFound + } + + return h.Stats() +} + +func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { + return d.eventer.TaskEvents(ctx) +} + +func (d *Driver) SignalTask(taskID string, signal string) error { + h, ok := d.tasks.Get(taskID) + if !ok { + return drivers.ErrTaskNotFound + } + + sig, err := signals.Parse(signal) + if err != nil { + return fmt.Errorf("failed to parse signal: %v", err) + } + + return h.Signal(sig) +} + +func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { + h, ok := d.tasks.Get(taskID) + if !ok { + return nil, drivers.ErrTaskNotFound + } + + if len(cmd) == 0 { + return nil, fmt.Errorf("cmd is required, but was empty") + } + + ctx, _ := context.WithTimeout(context.Background(), timeout) + + return h.Exec(ctx, cmd[0], cmd[1:]) +} + +// dockerClients creates two *docker.Client, one for long running operations and +// the other for shorter operations. In test / dev mode we can use ENV vars to +// connect to the docker daemon. In production mode we will read docker.endpoint +// from the config file. +func (d *Driver) dockerClients() (*docker.Client, *docker.Client, error) { + createClientsLock.Lock() + defer createClientsLock.Unlock() + + if client != nil && waitClient != nil { + return client, waitClient, nil + } + + var err error + + // Onlt initialize the client if it hasn't yet been done + if client == nil { + client, err = d.newDockerClient(dockerTimeout) + if err != nil { + return nil, nil, err + } + } + + // Only initialize the waitClient if it hasn't yet been done + if waitClient == nil { + waitClient, err = d.newDockerClient(0 * time.Minute) + if err != nil { + return nil, nil, err + } + } + + return client, waitClient, nil +} + +// newDockerClient creates a new *docker.Client with a configurable timeout +func (d *Driver) newDockerClient(timeout time.Duration) (*docker.Client, error) { + var err error + var merr multierror.Error + var newClient *docker.Client + + // Default to using whatever is configured in docker.endpoint. If this is + // not specified we'll fall back on NewClientFromEnv which reads config from + // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and + // DOCKER_CERT_PATH. This allows us to lock down the config in production + // but also accept the standard ENV configs for dev and test. + dockerEndpoint := d.config.Endpoint + if dockerEndpoint != "" { + cert := d.config.TLS.Cert + key := d.config.TLS.Key + ca := d.config.TLS.CA + + if cert+key+ca != "" { + d.logger.Debug("using TLS client connection", "endpoint", dockerEndpoint) + newClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) + if err != nil { + merr.Errors = append(merr.Errors, err) + } + } else { + d.logger.Debug("using standard client connection", "endpoint", dockerEndpoint) + newClient, err = docker.NewClient(dockerEndpoint) + if err != nil { + merr.Errors = append(merr.Errors, err) + } + } + } else { + d.logger.Debug("using client connection initialized from environment") + newClient, err = docker.NewClientFromEnv() + if err != nil { + merr.Errors = append(merr.Errors, err) + } + } + + if timeout != 0 && newClient != nil { + newClient.SetTimeout(timeout) + } + return newClient, merr.ErrorOrNil() +} + +func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) { + var ulimits []docker.ULimit + + for name, ulimitRaw := range ulimitsRaw { + if len(ulimitRaw) == 0 { + return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %q, cannot be empty", name, ulimitRaw) + } + // hard limit is optional + if strings.Contains(ulimitRaw, ":") == false { + ulimitRaw = ulimitRaw + ":" + ulimitRaw + } + + splitted := strings.SplitN(ulimitRaw, ":", 2) + if len(splitted) < 2 { + return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %v", name, ulimitRaw) + } + soft, err := strconv.Atoi(splitted[0]) + if err != nil { + return []docker.ULimit{}, fmt.Errorf("Malformed soft ulimit %v: %v", name, ulimitRaw) + } + hard, err := strconv.Atoi(splitted[1]) + if err != nil { + return []docker.ULimit{}, fmt.Errorf("Malformed hard ulimit %v: %v", name, ulimitRaw) + } + + ulimit := docker.ULimit{ + Name: name, + Soft: int64(soft), + Hard: int64(hard), + } + ulimits = append(ulimits, ulimit) + } + return ulimits, nil +} diff --git a/drivers/docker/driver_default.go b/drivers/docker/driver_default.go new file mode 100644 index 000000000..8cb644d16 --- /dev/null +++ b/drivers/docker/driver_default.go @@ -0,0 +1,36 @@ +//+build !windows + +package docker + +import ( + docker "github.com/fsouza/go-dockerclient" + "github.com/moby/moby/daemon/caps" +) + +const ( + // Setting default network mode for non-windows OS as bridge + defaultNetworkMode = "bridge" +) + +func getPortBinding(ip string, port string) []docker.PortBinding { + return []docker.PortBinding{{HostIP: ip, HostPort: port}} +} + +func tweakCapabilities(basics, adds, drops []string) ([]string, error) { + // Moby mixes 2 different capabilities formats: prefixed with "CAP_" + // and not. We do the conversion here to have a consistent, + // non-prefixed format on the Nomad side. + for i, cap := range basics { + basics[i] = "CAP_" + cap + } + + effectiveCaps, err := caps.TweakCapabilities(basics, adds, drops) + if err != nil { + return effectiveCaps, err + } + + for i, cap := range effectiveCaps { + effectiveCaps[i] = cap[len("CAP_"):] + } + return effectiveCaps, nil +} diff --git a/drivers/docker/driver_linux_test.go b/drivers/docker/driver_linux_test.go new file mode 100644 index 000000000..9cae67ce8 --- /dev/null +++ b/drivers/docker/driver_linux_test.go @@ -0,0 +1,99 @@ +package docker + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/hashicorp/nomad/client/testutil" + tu "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" +) + +func TestDockerDriver_authFromHelper(t *testing.T) { + dir, err := ioutil.TempDir("", "test-docker-driver_authfromhelper") + require.NoError(t, err) + defer os.RemoveAll(dir) + helperPayload := "{\"Username\":\"hashi\",\"Secret\":\"nomad\"}" + helperContent := []byte(fmt.Sprintf("#!/bin/sh\ncat > %s/helper-$1.out;echo '%s'", dir, helperPayload)) + + helperFile := filepath.Join(dir, "docker-credential-testnomad") + err = ioutil.WriteFile(helperFile, helperContent, 0777) + require.NoError(t, err) + + path := os.Getenv("PATH") + os.Setenv("PATH", fmt.Sprintf("%s:%s", path, dir)) + defer os.Setenv("PATH", path) + + helper := authFromHelper("testnomad") + creds, err := helper("registry.local:5000/repo/image") + require.NoError(t, err) + require.NotNil(t, creds) + require.Equal(t, "hashi", creds.Username) + require.Equal(t, "nomad", creds.Password) + + if _, err := os.Stat(filepath.Join(dir, "helper-get.out")); os.IsNotExist(err) { + t.Fatalf("Expected helper-get.out to exist") + } + content, err := ioutil.ReadFile(filepath.Join(dir, "helper-get.out")) + require.NoError(t, err) + require.Equal(t, []byte("https://registry.local:5000"), content) +} + +func TestDockerDriver_PidsLimit(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["pids_limit"] = "1" + task.Config["command"] = "/bin/sh" + task.Config["args"] = []string{"-c", "sleep 2 & sleep 2"} + + ctx := testDockerDriverContexts(t, task) + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + + // Copy the image into the task's directory + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("err: %v", err) + } + defer resp.Handle.Kill() + + select { + case res := <-resp.Handle.WaitCh(): + if res.Successful() { + t.Fatalf("expected error, but container exited successful") + } + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } + + // XXX Logging doesn't work on OSX so just test on Linux + // Check that data was written to the directory. + outputFile := filepath.Join(ctx.ExecCtx.TaskDir.LogDir, "redis-demo.stderr.0") + act, err := ioutil.ReadFile(outputFile) + if err != nil { + t.Fatalf("Couldn't read expected output: %v", err) + } + + exp := "can't fork" + if !strings.Contains(string(act), exp) { + t.Fatalf("Expected failed fork: %q", act) + } + +} diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go new file mode 100644 index 000000000..8dfb23298 --- /dev/null +++ b/drivers/docker/driver_test.go @@ -0,0 +1,2595 @@ +package docker + +import ( + "context" + "fmt" + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "reflect" + "runtime" + "runtime/debug" + "sort" + "strconv" + "strings" + "testing" + "time" + + docker "github.com/fsouza/go-dockerclient" + "github.com/hashicorp/consul/lib/freeport" + hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/allocdir" + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/driver/env" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/client/testutil" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/base" + "github.com/hashicorp/nomad/plugins/drivers" + "github.com/hashicorp/nomad/plugins/shared/loader" + tu "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + basicResources = &drivers.Resources{ + NomadResources: &structs.Resources{ + CPU: 250, + MemoryMB: 256, + DiskMB: 20, + }, + LinuxResources: &drivers.LinuxResources{ + CPUShares: 250, + MemoryLimitBytes: 256 * 1024 * 1024, + }, + } +) + +func dockerIsRemote(t *testing.T) bool { + client, err := docker.NewClientFromEnv() + if err != nil { + return false + } + + // Technically this could be a local tcp socket but for testing purposes + // we'll just assume that tcp is only used for remote connections. + if client.Endpoint()[0:3] == "tcp" { + return true + } + return false +} + +// Returns a task with a reserved and dynamic port. The ports are returned +// respectively. +func dockerTask(t *testing.T) (*drivers.TaskConfig, int, int) { + ports := freeport.GetT(t, 2) + dockerReserved := ports[0] + dockerDynamic := ports[1] + + cfg := TaskConfig{ + Image: "busybox", + LoadImage: "busybox.tar", + Command: "/bin/nc", + Args: []string{"-l", "127.0.0.1", "-p", "0"}, + } + task := &drivers.TaskConfig{ + ID: uuid.Generate(), + Name: "redis-demo", + Resources: &drivers.Resources{ + NomadResources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + Networks: []*structs.NetworkResource{ + { + IP: "127.0.0.1", + ReservedPorts: []structs.Port{{Label: "main", Value: dockerReserved}}, + DynamicPorts: []structs.Port{{Label: "REDIS", Value: dockerDynamic}}, + }, + }, + }, + LinuxResources: &drivers.LinuxResources{ + CPUShares: 512, + MemoryLimitBytes: 256 * 1024 * 1024, + }, + }, + } + + require.NoError(t, task.EncodeConcreteDriverConfig(&cfg)) + + return task, dockerReserved, dockerDynamic +} + +// dockerSetup does all of the basic setup you need to get a running docker +// process up and running for testing. Use like: +// +// task := taskTemplate() +// // do custom task configuration +// client, handle, cleanup := dockerSetup(t, task) +// defer cleanup() +// // do test stuff +// +// If there is a problem during setup this function will abort or skip the test +// and indicate the reason. +func dockerSetup(t *testing.T, task *drivers.TaskConfig, enableLogs bool) (*docker.Client, drivers.DriverPlugin, func()) { + client := newTestDockerClient(t) + driver, cleanup := dockerDriverHarness(t, task, enableLogs) + + return client, driver, cleanup +} + +// dockerDriverHarness wires up everything needed to launch a task with a docker driver. +// A driver plugin interface and cleanup function is returned +func dockerDriverHarness(t *testing.T, task *drivers.TaskConfig, enableLogs bool) (drivers.DriverPlugin, func()) { + logger := testlog.HCLogger(t) + harness := drivers.NewDriverHarness(t, NewDockerDriver(logger)) + plugLoader, err := loader.NewPluginLoader(&loader.PluginLoaderConfig{ + Logger: logger, + PluginDir: "./plugins", + InternalPlugins: map[loader.PluginID]*loader.InternalPluginConfig{ + PluginID: &loader.InternalPluginConfig{ + Config: map[string]interface{}{ + "image_gc_delay": "1s", + }, + Factory: func(hclog.Logger) interface{} { + return harness + }, + }, + }, + }) + + require.NoError(t, err) + instance, err := plugLoader.Dispense(pluginName, base.PluginTypeDriver, nil, logger) + require.NoError(t, err) + driver, ok := instance.Plugin().(drivers.DriverPlugin) + if !ok { + t.Fatal("plugin instance is not a driver... wat?") + } + + cleanup := harness.MkAllocDir(task, enableLogs) + + return driver, cleanup +} + +func newTestDockerClient(t *testing.T) *docker.Client { + t.Helper() + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + client, err := docker.NewClientFromEnv() + if err != nil { + t.Fatalf("Failed to initialize client: %s\nStack\n%s", err, debug.Stack()) + } + return client +} + +/* +// This test should always pass, even if docker daemon is not available +func TestDockerDriver_Fingerprint(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + + ctx := testDockerDriverContexts(t, &structs.Task{Name: "foo", Driver: "docker", Resources: basicResources}) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + node := &structs.Node{ + Attributes: make(map[string]string), + } + + request := &cstructs.FingerprintRequest{Config: &config.Config{}, Node: node} + var response cstructs.FingerprintResponse + err := d.Fingerprint(request, &response) + if err != nil { + t.Fatalf("err: %v", err) + } + + attributes := response.Attributes + if testutil.DockerIsConnected(t) && attributes["driver.docker"] == "" { + t.Fatalf("Fingerprinter should detect when docker is available") + } + + if attributes["driver.docker"] != "1" { + t.Log("Docker daemon not available. The remainder of the docker tests will be skipped.") + } else { + + // if docker is available, make sure that the response is tagged as + // applicable + if !response.Detected { + t.Fatalf("expected response to be applicable") + } + } + + t.Logf("Found docker version %s", attributes["driver.docker.version"]) +} + +// TestDockerDriver_Fingerprint_Bridge asserts that if Docker is running we set +// the bridge network's IP as a node attribute. See #2785 +func TestDockerDriver_Fingerprint_Bridge(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("requires Docker") + } + if runtime.GOOS != "linux" { + t.Skip("expect only on linux") + } + + // This seems fragile, so we might need to reconsider this test if it + // proves flaky + expectedAddr, err := sockaddr.GetInterfaceIP("docker0") + if err != nil { + t.Fatalf("unable to get ip for docker0: %v", err) + } + if expectedAddr == "" { + t.Fatalf("unable to get ip for docker bridge") + } + + conf := testConfig(t) + conf.Node = mock.Node() + dd := NewDockerDriver(NewDriverContext("", "", "", "", conf, conf.Node, testlog.Logger(t), nil)) + + request := &cstructs.FingerprintRequest{Config: conf, Node: conf.Node} + var response cstructs.FingerprintResponse + + err = dd.Fingerprint(request, &response) + if err != nil { + t.Fatalf("error fingerprinting docker: %v", err) + } + + if !response.Detected { + t.Fatalf("expected response to be applicable") + } + + attributes := response.Attributes + if attributes == nil { + t.Fatalf("expected attributes to be set") + } + + if attributes["driver.docker"] == "" { + t.Fatalf("expected Docker to be enabled but false was returned") + } + + if found := attributes["driver.docker.bridge_ip"]; found != expectedAddr { + t.Fatalf("expected bridge ip %q but found: %q", expectedAddr, found) + } + t.Logf("docker bridge ip: %q", attributes["driver.docker.bridge_ip"]) +} + +func TestDockerDriver_Check_DockerHealthStatus(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("requires Docker") + } + if runtime.GOOS != "linux" { + t.Skip("expect only on linux") + } + + require := require.New(t) + + expectedAddr, err := sockaddr.GetInterfaceIP("docker0") + if err != nil { + t.Fatalf("unable to get ip for docker0: %v", err) + } + if expectedAddr == "" { + t.Fatalf("unable to get ip for docker bridge") + } + + conf := testConfig(t) + conf.Node = mock.Node() + dd := NewDockerDriver(NewDriverContext("", "", "", "", conf, conf.Node, testlog.Logger(t), nil)) + + request := &cstructs.HealthCheckRequest{} + var response cstructs.HealthCheckResponse + + dc, ok := dd.(fingerprint.HealthCheck) + require.True(ok) + err = dc.HealthCheck(request, &response) + require.Nil(err) + + driverInfo := response.Drivers["docker"] + require.NotNil(driverInfo) + require.True(driverInfo.Healthy) +}*/ + +func TestDockerDriver_Start_Wait(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + taskCfg := TaskConfig{ + Image: "busybox", + LoadImage: "busybox.tar", + Command: "/bin/nc", + Args: []string{"-l", "127.0.0.1", "-p", "0"}, + } + task := &drivers.TaskConfig{ + ID: uuid.Generate(), + Name: "nc-demo", + Resources: basicResources, + } + require.NoError(t, task.EncodeConcreteDriverConfig(&taskCfg)) + + d, cleanup := dockerDriverHarness(t, task, false) + defer cleanup() + copyImage(t, task.TaskDir(), "busybox.tar") + + handle, net, err := d.StartTask(task) + require.NoError(t, err) + + defer d.DestroyTask(task.ID, true) + + // Attempt to wait + waitCh, err := d.WaitTask(context.Background(), task.ID) + require.NoError(t, err) + + select { + case res := <-waitCh: + t.Fatalf("wait channel should not have recieved an exit result") + case <-time.After(time.Duration(tu.TestMultiplier()*1) * time.Second): + } +} + +func TestDockerDriver_Start_Wait(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "load": "busybox.tar", + "image": "busybox", + "command": "/bin/echo", + "args": []string{"hello"}, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + _, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + // Update should be a no-op + err := handle.Update(task) + if err != nil { + t.Fatalf("err: %v", err) + } + + select { + case res := <-handle.WaitCh(): + if !res.Successful() { + t.Fatalf("err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } +} + +// TestDockerDriver_Start_StoppedContainer asserts that Nomad will detect a +// stopped task container, remove it, and start a new container. +// +// See https://github.com/hashicorp/nomad/issues/3419 +func TestDockerDriver_Start_StoppedContainer(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "load": "busybox.tar", + "image": "busybox", + "command": "sleep", + "args": []string{"9000"}, + }, + Resources: &structs.Resources{ + MemoryMB: 100, + CPU: 100, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 1, + MaxFileSizeMB: 10, + }, + } + + tctx := testDockerDriverContexts(t, task) + defer tctx.Destroy() + + copyImage(t, tctx.ExecCtx.TaskDir, "busybox.tar") + client := newTestDockerClient(t) + driver := NewDockerDriver(tctx.DriverCtx).(*DockerDriver) + driverConfig := &DockerDriverConfig{ImageName: "busybox", LoadImage: "busybox.tar"} + if _, err := driver.loadImage(driverConfig, client, tctx.ExecCtx.TaskDir); err != nil { + t.Fatalf("error loading image: %v", err) + } + + // Create a container of the same name but don't start it. This mimics + // the case of dockerd getting restarted and stopping containers while + // Nomad is watching them. + opts := docker.CreateContainerOptions{ + Name: fmt.Sprintf("%s-%s", task.Name, tctx.DriverCtx.allocID), + Config: &docker.Config{ + Image: "busybox", + Cmd: []string{"sleep", "9000"}, + }, + } + if _, err := client.CreateContainer(opts); err != nil { + t.Fatalf("error creating initial container: %v", err) + } + + // Now assert that the driver can still start normally + presp, err := driver.Prestart(tctx.ExecCtx, task) + if err != nil { + driver.Cleanup(tctx.ExecCtx, presp.CreatedResources) + t.Fatalf("error in prestart: %v", err) + } + defer driver.Cleanup(tctx.ExecCtx, presp.CreatedResources) + + sresp, err := driver.Start(tctx.ExecCtx, task) + if err != nil { + t.Fatalf("failed to start driver: %s", err) + } + handle := sresp.Handle.(*DockerHandle) + waitForExist(t, client, handle) + handle.Kill() +} + +func TestDockerDriver_Start_LoadImage(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + task := &structs.Task{ + Name: "busybox-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sh", + "args": []string{ + "-c", + "echo hello > $NOMAD_TASK_DIR/output", + }, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + } + + ctx := testDockerDriverContexts(t, task) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + + // Copy the image into the task's directory + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("err: %v", err) + } + defer resp.Handle.Kill() + + select { + case res := <-resp.Handle.WaitCh(): + if !res.Successful() { + t.Fatalf("err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } + + // Check that data was written to the shared alloc directory. + outputFile := filepath.Join(ctx.ExecCtx.TaskDir.LocalDir, "output") + act, err := ioutil.ReadFile(outputFile) + if err != nil { + t.Fatalf("Couldn't read expected output: %v", err) + } + + exp := "hello" + if strings.TrimSpace(string(act)) != exp { + t.Fatalf("Command outputted %v; want %v", act, exp) + } + +} + +func TestDockerDriver_Start_BadPull_Recoverable(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + task := &structs.Task{ + Name: "busybox-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "127.0.1.1:32121/foo", // bad path + "command": "/bin/echo", + "args": []string{ + "hello", + }, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + } + + ctx := testDockerDriverContexts(t, task) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + + _, err := d.Prestart(ctx.ExecCtx, task) + if err == nil { + t.Fatalf("want error in prestart: %v", err) + } + + if rerr, ok := err.(*structs.RecoverableError); !ok { + t.Fatalf("want recoverable error: %+v", err) + } else if !rerr.IsRecoverable() { + t.Fatalf("error not recoverable: %+v", err) + } +} + +func TestDockerDriver_Start_Wait_AllocDir(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + // This test requires that the alloc dir be mounted into docker as a volume. + // Because this cannot happen when docker is run remotely, e.g. when running + // docker in a VM, we skip this when we detect Docker is being run remotely. + if !testutil.DockerIsConnected(t) || dockerIsRemote(t) { + t.Skip("Docker not connected") + } + + exp := []byte{'w', 'i', 'n'} + file := "output.txt" + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sh", + "args": []string{ + "-c", + fmt.Sprintf(`sleep 1; echo -n %s > $%s/%s`, + string(exp), env.AllocDir, file), + }, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + } + + ctx := testDockerDriverContexts(t, task) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("err: %v", err) + } + defer resp.Handle.Kill() + + select { + case res := <-resp.Handle.WaitCh(): + if !res.Successful() { + t.Fatalf("err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } + + // Check that data was written to the shared alloc directory. + outputFile := filepath.Join(ctx.AllocDir.SharedDir, file) + act, err := ioutil.ReadFile(outputFile) + if err != nil { + t.Fatalf("Couldn't read expected output: %v", err) + } + + if !reflect.DeepEqual(act, exp) { + t.Fatalf("Command outputted %v; want %v", act, exp) + } +} + +func TestDockerDriver_Start_Kill_Wait(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sleep", + "args": []string{"10"}, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: basicResources, + } + + _, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + go func() { + time.Sleep(100 * time.Millisecond) + err := handle.Kill() + if err != nil { + t.Fatalf("err: %v", err) + } + }() + + select { + case res := <-handle.WaitCh(): + if res.Successful() { + t.Fatalf("should err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*10) * time.Second): + t.Fatalf("timeout") + } +} + +func TestDockerDriver_Start_KillTimeout(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + timeout := 2 * time.Second + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sleep", + "args": []string{"10"}, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: basicResources, + KillTimeout: timeout, + KillSignal: "SIGUSR1", // Pick something that doesn't actually kill it + } + + _, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + // Reduce the timeout for the docker client. + handle.client.SetTimeout(1 * time.Second) + + // Kill the task + var killSent, killed time.Time + go func() { + killSent = time.Now() + if err := handle.Kill(); err != nil { + t.Fatalf("err: %v", err) + } + }() + + select { + case <-handle.WaitCh(): + killed = time.Now() + case <-time.After(10 * time.Second): + t.Fatalf("timeout") + } + + if killed.Sub(killSent) < timeout { + t.Fatalf("kill timeout not respected") + } +} + +func TestDockerDriver_StartN(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task1, _, _ := dockerTask(t) + task2, _, _ := dockerTask(t) + task3, _, _ := dockerTask(t) + taskList := []*structs.Task{task1, task2, task3} + + handles := make([]DriverHandle, len(taskList)) + + t.Logf("Starting %d tasks", len(taskList)) + + // Let's spin up a bunch of things + for idx, task := range taskList { + ctx := testDockerDriverContexts(t, task) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart #%d: %v", idx+1, err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Errorf("Failed starting task #%d: %s", idx+1, err) + continue + } + handles[idx] = resp.Handle + } + + t.Log("All tasks are started. Terminating...") + + for idx, handle := range handles { + if handle == nil { + t.Errorf("Bad handle for task #%d", idx+1) + continue + } + + err := handle.Kill() + if err != nil { + t.Errorf("Failed stopping task #%d: %s", idx+1, err) + } + } + + t.Log("Test complete!") +} + +func TestDockerDriver_StartNVersions(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task1, _, _ := dockerTask(t) + task1.Config["image"] = "busybox" + task1.Config["load"] = "busybox.tar" + + task2, _, _ := dockerTask(t) + task2.Config["image"] = "busybox:musl" + task2.Config["load"] = "busybox_musl.tar" + task2.Config["args"] = []string{"-l", "-p", "0"} + + task3, _, _ := dockerTask(t) + task3.Config["image"] = "busybox:glibc" + task3.Config["load"] = "busybox_glibc.tar" + + taskList := []*structs.Task{task1, task2, task3} + + handles := make([]DriverHandle, len(taskList)) + + t.Logf("Starting %d tasks", len(taskList)) + client := newTestDockerClient(t) + + // Let's spin up a bunch of things + for idx, task := range taskList { + ctx := testDockerDriverContexts(t, task) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + copyImage(t, ctx.ExecCtx.TaskDir, "busybox_musl.tar") + copyImage(t, ctx.ExecCtx.TaskDir, "busybox_glibc.tar") + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart #%d: %v", idx+1, err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Errorf("Failed starting task #%d: %s", idx+1, err) + continue + } + handles[idx] = resp.Handle + waitForExist(t, client, resp.Handle.(*DockerHandle)) + } + + t.Log("All tasks are started. Terminating...") + + for idx, handle := range handles { + if handle == nil { + t.Errorf("Bad handle for task #%d", idx+1) + continue + } + + err := handle.Kill() + if err != nil { + t.Errorf("Failed stopping task #%d: %s", idx+1, err) + } + } + + t.Log("Test complete!") +} + +func waitForExist(t *testing.T, client *docker.Client, handle *DockerHandle) { + handle.logger.Printf("[DEBUG] docker.test: waiting for container %s to exist...", handle.ContainerID()) + tu.WaitForResult(func() (bool, error) { + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + if _, ok := err.(*docker.NoSuchContainer); !ok { + return false, err + } + } + + return container != nil, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + handle.logger.Printf("[DEBUG] docker.test: ...container %s exists!", handle.ContainerID()) +} + +func TestDockerDriver_NetworkMode_Host(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + expected := "host" + + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/nc", + "args": []string{"-l", "127.0.0.1", "-p", "0"}, + "network_mode": expected, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + actual := container.HostConfig.NetworkMode + if actual != expected { + t.Fatalf("Got network mode %q; want %q", expected, actual) + } +} + +func TestDockerDriver_NetworkAliases_Bridge(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + // Because go-dockerclient doesn't provide api for query network aliases, just check that + // a container can be created with a 'network_aliases' property + + // Create network, network-scoped alias is supported only for containers in user defined networks + client := newTestDockerClient(t) + networkOpts := docker.CreateNetworkOptions{Name: "foobar", Driver: "bridge"} + network, err := client.CreateNetwork(networkOpts) + if err != nil { + t.Fatalf("err: %v", err) + } + defer client.RemoveNetwork(network.ID) + + expected := []string{"foobar"} + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/nc", + "args": []string{"-l", "127.0.0.1", "-p", "0"}, + "network_mode": network.Name, + "network_aliases": expected, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + client, handle, cleanup := dockerSetupWithClient(t, task, client) + defer cleanup() + + waitForExist(t, client, handle) + + _, err = client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } +} + +func TestDockerDriver_Sysctl_Ulimit(t *testing.T) { + task, _, _ := dockerTask(t) + expectedUlimits := map[string]string{ + "nproc": "4242", + "nofile": "2048:4096", + } + task.Config["sysctl"] = []map[string]string{ + { + "net.core.somaxconn": "16384", + }, + } + task.Config["ulimit"] = []map[string]string{ + expectedUlimits, + } + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + assert.Nil(t, err, "unexpected error: %v", err) + + want := "16384" + got := container.HostConfig.Sysctls["net.core.somaxconn"] + assert.Equal(t, want, got, "Wrong net.core.somaxconn config for docker job. Expect: %s, got: %s", want, got) + + expectedUlimitLen := 2 + actualUlimitLen := len(container.HostConfig.Ulimits) + assert.Equal(t, want, got, "Wrong number of ulimit configs for docker job. Expect: %d, got: %d", expectedUlimitLen, actualUlimitLen) + + for _, got := range container.HostConfig.Ulimits { + if expectedStr, ok := expectedUlimits[got.Name]; !ok { + t.Errorf("%s config unexpected for docker job.", got.Name) + } else { + if !strings.Contains(expectedStr, ":") { + expectedStr = expectedStr + ":" + expectedStr + } + + splitted := strings.SplitN(expectedStr, ":", 2) + soft, _ := strconv.Atoi(splitted[0]) + hard, _ := strconv.Atoi(splitted[1]) + assert.Equal(t, int64(soft), got.Soft, "Wrong soft %s ulimit for docker job. Expect: %d, got: %d", got.Name, soft, got.Soft) + assert.Equal(t, int64(hard), got.Hard, "Wrong hard %s ulimit for docker job. Expect: %d, got: %d", got.Name, hard, got.Hard) + + } + } +} + +func TestDockerDriver_Sysctl_Ulimit_Errors(t *testing.T) { + brokenConfigs := []interface{}{ + map[string]interface{}{ + "nofile": "", + }, + map[string]interface{}{ + "nofile": "abc:1234", + }, + map[string]interface{}{ + "nofile": "1234:abc", + }, + } + + test_cases := []struct { + ulimitConfig interface{} + err error + }{ + {[]interface{}{brokenConfigs[0]}, fmt.Errorf("Malformed ulimit specification nofile: \"\", cannot be empty")}, + {[]interface{}{brokenConfigs[1]}, fmt.Errorf("Malformed soft ulimit nofile: abc:1234")}, + {[]interface{}{brokenConfigs[2]}, fmt.Errorf("Malformed hard ulimit nofile: 1234:abc")}, + } + + for _, tc := range test_cases { + task, _, _ := dockerTask(t) + task.Config["ulimit"] = tc.ulimitConfig + + ctx := testDockerDriverContexts(t, task) + driver := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + defer ctx.Destroy() + + _, err := driver.Prestart(ctx.ExecCtx, task) + assert.NotNil(t, err, "Expected non nil error") + assert.Equal(t, err.Error(), tc.err.Error(), "unexpected error in prestart, got %v, expected %v", err, tc.err) + } +} + +func TestDockerDriver_Labels(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["labels"] = []map[string]string{ + { + "label1": "value1", + "label2": "value2", + }, + } + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if want, got := 2, len(container.Config.Labels); want != got { + t.Errorf("Wrong labels count for docker job. Expect: %d, got: %d", want, got) + } + + if want, got := "value1", container.Config.Labels["label1"]; want != got { + t.Errorf("Wrong label value docker job. Expect: %s, got: %s", want, got) + } +} + +func TestDockerDriver_ForcePull_IsInvalidConfig(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["force_pull"] = "nothing" + + ctx := testDockerDriverContexts(t, task) + defer ctx.Destroy() + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + driver := NewDockerDriver(ctx.DriverCtx) + + if _, err := driver.Prestart(ctx.ExecCtx, task); err == nil { + t.Fatalf("error expected in prestart") + } +} + +func TestDockerDriver_ForcePull(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["force_pull"] = "true" + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + _, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } +} + +func TestDockerDriver_ForcePull_RepoDigest(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["load"] = "" + task.Config["image"] = "library/busybox@sha256:58ac43b2cc92c687a32c8be6278e50a063579655fe3090125dcb2af0ff9e1a64" + localDigest := "sha256:8ac48589692a53a9b8c2d1ceaa6b402665aa7fe667ba51ccc03002300856d8c7" + task.Config["force_pull"] = "true" + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + require.NoError(t, err) + require.Equal(t, localDigest, container.Image) +} + +func TestDockerDriver_SecurityOpt(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["security_opt"] = []string{"seccomp=unconfined"} + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(task.Config["security_opt"], container.HostConfig.SecurityOpt) { + t.Errorf("Security Opts don't match.\nExpected:\n%s\nGot:\n%s\n", task.Config["security_opt"], container.HostConfig.SecurityOpt) + } +} + +func TestDockerDriver_Capabilities(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + if runtime.GOOS == "windows" { + t.Skip("Capabilities not supported on windows") + } + + testCases := []struct { + Name string + CapAdd []string + CapDrop []string + Whitelist string + StartError string + }{ + { + Name: "default-whitelist-add-allowed", + CapAdd: []string{"fowner", "mknod"}, + CapDrop: []string{"all"}, + }, + { + Name: "default-whitelist-add-forbidden", + CapAdd: []string{"net_admin"}, + StartError: "net_admin", + }, + { + Name: "default-whitelist-drop-existing", + CapDrop: []string{"fowner", "mknod"}, + }, + { + Name: "restrictive-whitelist-drop-all", + CapDrop: []string{"all"}, + Whitelist: "fowner,mknod", + }, + { + Name: "restrictive-whitelist-add-allowed", + CapAdd: []string{"fowner", "mknod"}, + CapDrop: []string{"all"}, + Whitelist: "fowner,mknod", + }, + { + Name: "restrictive-whitelist-add-forbidden", + CapAdd: []string{"net_admin", "mknod"}, + CapDrop: []string{"all"}, + Whitelist: "fowner,mknod", + StartError: "net_admin", + }, + { + Name: "permissive-whitelist", + CapAdd: []string{"net_admin", "mknod"}, + Whitelist: "all", + }, + { + Name: "permissive-whitelist-add-all", + CapAdd: []string{"all"}, + Whitelist: "all", + }, + } + + for _, tc := range testCases { + t.Run(tc.Name, func(t *testing.T) { + client := newTestDockerClient(t) + task, _, _ := dockerTask(t) + if len(tc.CapAdd) > 0 { + task.Config["cap_add"] = tc.CapAdd + } + if len(tc.CapDrop) > 0 { + task.Config["cap_drop"] = tc.CapDrop + } + + tctx := testDockerDriverContexts(t, task) + if tc.Whitelist != "" { + tctx.DriverCtx.config.Options[dockerCapsWhitelistConfigOption] = tc.Whitelist + } + + driver := NewDockerDriver(tctx.DriverCtx) + copyImage(t, tctx.ExecCtx.TaskDir, "busybox.tar") + defer tctx.Destroy() + + presp, err := driver.Prestart(tctx.ExecCtx, task) + defer driver.Cleanup(tctx.ExecCtx, presp.CreatedResources) + if err != nil { + t.Fatalf("Error in prestart: %v", err) + } + + sresp, err := driver.Start(tctx.ExecCtx, task) + if err == nil && tc.StartError != "" { + t.Fatalf("Expected error in start: %v", tc.StartError) + } else if err != nil { + if tc.StartError == "" { + t.Fatalf("Failed to start driver: %s\nStack\n%s", err, debug.Stack()) + } else if !strings.Contains(err.Error(), tc.StartError) { + t.Fatalf("Expect error containing \"%s\", got %v", tc.StartError, err) + } + return + } + + if sresp.Handle == nil { + t.Fatalf("handle is nil\nStack\n%s", debug.Stack()) + } + defer sresp.Handle.Kill() + handle := sresp.Handle.(*DockerHandle) + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("Error inspecting container: %v", err) + } + + if !reflect.DeepEqual(tc.CapAdd, container.HostConfig.CapAdd) { + t.Errorf("CapAdd doesn't match.\nExpected:\n%s\nGot:\n%s\n", tc.CapAdd, container.HostConfig.CapAdd) + } + + if !reflect.DeepEqual(tc.CapDrop, container.HostConfig.CapDrop) { + t.Errorf("CapDrop doesn't match.\nExpected:\n%s\nGot:\n%s\n", tc.CapDrop, container.HostConfig.CapDrop) + } + }) + } +} + +func TestDockerDriver_DNS(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["dns_servers"] = []string{"8.8.8.8", "8.8.4.4"} + task.Config["dns_search_domains"] = []string{"example.com", "example.org", "example.net"} + task.Config["dns_options"] = []string{"ndots:1"} + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(task.Config["dns_servers"], container.HostConfig.DNS) { + t.Errorf("DNS Servers don't match.\nExpected:\n%s\nGot:\n%s\n", task.Config["dns_servers"], container.HostConfig.DNS) + } + + if !reflect.DeepEqual(task.Config["dns_search_domains"], container.HostConfig.DNSSearch) { + t.Errorf("DNS Search Domains don't match.\nExpected:\n%s\nGot:\n%s\n", task.Config["dns_search_domains"], container.HostConfig.DNSSearch) + } + + if !reflect.DeepEqual(task.Config["dns_options"], container.HostConfig.DNSOptions) { + t.Errorf("DNS Options don't match.\nExpected:\n%s\nGot:\n%s\n", task.Config["dns_options"], container.HostConfig.DNSOptions) + } +} + +func TestDockerDriver_MACAddress(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["mac_address"] = "00:16:3e:00:00:00" + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if container.NetworkSettings.MacAddress != task.Config["mac_address"] { + t.Errorf("expected mac_address=%q but found %q", task.Config["mac_address"], container.NetworkSettings.MacAddress) + } +} + +func TestDockerWorkDir(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["work_dir"] = "/some/path" + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if want, got := "/some/path", container.Config.WorkingDir; want != got { + t.Errorf("Wrong working directory for docker job. Expect: %s, got: %s", want, got) + } +} + +func inSlice(needle string, haystack []string) bool { + for _, h := range haystack { + if h == needle { + return true + } + } + return false +} + +func TestDockerDriver_PortsNoMap(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, res, dyn := dockerTask(t) + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Verify that the correct ports are EXPOSED + expectedExposedPorts := map[docker.Port]struct{}{ + docker.Port(fmt.Sprintf("%d/tcp", res)): {}, + docker.Port(fmt.Sprintf("%d/udp", res)): {}, + docker.Port(fmt.Sprintf("%d/tcp", dyn)): {}, + docker.Port(fmt.Sprintf("%d/udp", dyn)): {}, + } + + if !reflect.DeepEqual(container.Config.ExposedPorts, expectedExposedPorts) { + t.Errorf("Exposed ports don't match.\nExpected:\n%s\nGot:\n%s\n", expectedExposedPorts, container.Config.ExposedPorts) + } + + // Verify that the correct ports are FORWARDED + expectedPortBindings := map[docker.Port][]docker.PortBinding{ + docker.Port(fmt.Sprintf("%d/tcp", res)): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", res)}}, + docker.Port(fmt.Sprintf("%d/udp", res)): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", res)}}, + docker.Port(fmt.Sprintf("%d/tcp", dyn)): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", dyn)}}, + docker.Port(fmt.Sprintf("%d/udp", dyn)): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", dyn)}}, + } + + if !reflect.DeepEqual(container.HostConfig.PortBindings, expectedPortBindings) { + t.Errorf("Forwarded ports don't match.\nExpected:\n%s\nGot:\n%s\n", expectedPortBindings, container.HostConfig.PortBindings) + } + + expectedEnvironment := map[string]string{ + "NOMAD_ADDR_main": fmt.Sprintf("127.0.0.1:%d", res), + "NOMAD_ADDR_REDIS": fmt.Sprintf("127.0.0.1:%d", dyn), + } + + for key, val := range expectedEnvironment { + search := fmt.Sprintf("%s=%s", key, val) + if !inSlice(search, container.Config.Env) { + t.Errorf("Expected to find %s in container environment: %+v", search, container.Config.Env) + } + } +} + +func TestDockerDriver_PortsMapping(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, res, dyn := dockerTask(t) + task.Config["port_map"] = []map[string]string{ + { + "main": "8080", + "REDIS": "6379", + }, + } + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Verify that the correct ports are EXPOSED + expectedExposedPorts := map[docker.Port]struct{}{ + docker.Port("8080/tcp"): {}, + docker.Port("8080/udp"): {}, + docker.Port("6379/tcp"): {}, + docker.Port("6379/udp"): {}, + } + + if !reflect.DeepEqual(container.Config.ExposedPorts, expectedExposedPorts) { + t.Errorf("Exposed ports don't match.\nExpected:\n%s\nGot:\n%s\n", expectedExposedPorts, container.Config.ExposedPorts) + } + + // Verify that the correct ports are FORWARDED + expectedPortBindings := map[docker.Port][]docker.PortBinding{ + docker.Port("8080/tcp"): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", res)}}, + docker.Port("8080/udp"): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", res)}}, + docker.Port("6379/tcp"): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", dyn)}}, + docker.Port("6379/udp"): {{HostIP: "127.0.0.1", HostPort: fmt.Sprintf("%d", dyn)}}, + } + + if !reflect.DeepEqual(container.HostConfig.PortBindings, expectedPortBindings) { + t.Errorf("Forwarded ports don't match.\nExpected:\n%s\nGot:\n%s\n", expectedPortBindings, container.HostConfig.PortBindings) + } + + expectedEnvironment := map[string]string{ + "NOMAD_PORT_main": "8080", + "NOMAD_PORT_REDIS": "6379", + "NOMAD_HOST_PORT_main": strconv.Itoa(res), + } + + sort.Strings(container.Config.Env) + for key, val := range expectedEnvironment { + search := fmt.Sprintf("%s=%s", key, val) + if !inSlice(search, container.Config.Env) { + t.Errorf("Expected to find %s in container environment:\n%s\n\n", search, strings.Join(container.Config.Env, "\n")) + } + } +} + +func TestDockerDriver_User(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task := &structs.Task{ + Name: "redis-demo", + User: "alice", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sleep", + "args": []string{"10000"}, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + ctx := testDockerDriverContexts(t, task) + //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} + driver := NewDockerDriver(ctx.DriverCtx) + defer ctx.Destroy() + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + _, err := driver.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + + // It should fail because the user "alice" does not exist on the given + // image. + resp, err := driver.Start(ctx.ExecCtx, task) + if err == nil { + resp.Handle.Kill() + t.Fatalf("Should've failed") + } + + if !strings.Contains(err.Error(), "alice") { + t.Fatalf("Expected failure string not found, found %q instead", err.Error()) + } +} + +func TestDockerDriver_CleanupContainer(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task := &structs.Task{ + Name: "redis-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/echo", + "args": []string{"hello"}, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + _, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + // Update should be a no-op + err := handle.Update(task) + if err != nil { + t.Fatalf("err: %v", err) + } + + select { + case res := <-handle.WaitCh(): + if !res.Successful() { + t.Fatalf("err: %v", res) + } + + time.Sleep(3 * time.Second) + + // Ensure that the container isn't present + _, err := client.InspectContainer(handle.containerID) + if err == nil { + t.Fatalf("expected to not get container") + } + + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } +} + +func TestDockerDriver_Stats(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task := &structs.Task{ + Name: "sleep", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sleep", + "args": []string{"100"}, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: basicResources, + } + + _, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + go func() { + time.Sleep(3 * time.Second) + ru, err := handle.Stats() + if err != nil { + t.Fatalf("err: %v", err) + } + if ru.ResourceUsage == nil { + handle.Kill() + t.Fatalf("expected resource usage") + } + err = handle.Kill() + if err != nil { + t.Fatalf("err: %v", err) + } + }() + + select { + case res := <-handle.WaitCh(): + if res.Successful() { + t.Fatalf("should err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*10) * time.Second): + t.Fatalf("timeout") + } +} + +func setupDockerVolumes(t *testing.T, cfg *config.Config, hostpath string) (*structs.Task, Driver, *ExecContext, string, func()) { + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + randfn := fmt.Sprintf("test-%d", rand.Int()) + hostfile := filepath.Join(hostpath, randfn) + containerPath := "/mnt/vol" + containerFile := filepath.Join(containerPath, randfn) + + task := &structs.Task{ + Name: "ls", + Env: map[string]string{"VOL_PATH": containerPath}, + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "touch", + "args": []string{containerFile}, + "volumes": []string{fmt.Sprintf("%s:${VOL_PATH}", hostpath)}, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: basicResources, + } + + // Build alloc and task directory structure + allocDir := allocdir.NewAllocDir(testlog.HCLogger(t), filepath.Join(cfg.AllocDir, uuid.Generate())) + if err := allocDir.Build(); err != nil { + t.Fatalf("failed to build alloc dir: %v", err) + } + taskDir := allocDir.NewTaskDir(task.Name) + if err := taskDir.Build(false, nil, cstructs.FSIsolationImage); err != nil { + allocDir.Destroy() + t.Fatalf("failed to build task dir: %v", err) + } + copyImage(t, taskDir, "busybox.tar") + + // Setup driver + alloc := mock.Alloc() + logger := testlog.Logger(t) + emitter := func(m string, args ...interface{}) { + logger.Printf("[EVENT] "+m, args...) + } + driverCtx := NewDriverContext(alloc.Job.Name, alloc.TaskGroup, task.Name, alloc.ID, cfg, cfg.Node, testlog.Logger(t), emitter) + driver := NewDockerDriver(driverCtx) + + // Setup execCtx + envBuilder := env.NewBuilder(cfg.Node, alloc, task, cfg.Region) + SetEnvvars(envBuilder, driver.FSIsolation(), taskDir, cfg) + execCtx := NewExecContext(taskDir, envBuilder.Build()) + + // Setup cleanup function + cleanup := func() { + allocDir.Destroy() + if filepath.IsAbs(hostpath) { + os.RemoveAll(hostpath) + } + } + return task, driver, execCtx, hostfile, cleanup +} + +func TestDockerDriver_VolumesDisabled(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + cfg := testConfig(t) + cfg.Options = map[string]string{ + dockerVolumesConfigOption: "false", + "docker.cleanup.image": "false", + } + + { + tmpvol, err := ioutil.TempDir("", "nomadtest_docker_volumesdisabled") + if err != nil { + t.Fatalf("error creating temporary dir: %v", err) + } + + task, driver, execCtx, _, cleanup := setupDockerVolumes(t, cfg, tmpvol) + defer cleanup() + + _, err = driver.Prestart(execCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + if _, err := driver.Start(execCtx, task); err == nil { + t.Fatalf("Started driver successfully when volumes should have been disabled.") + } + } + + // Relative paths should still be allowed + { + task, driver, execCtx, fn, cleanup := setupDockerVolumes(t, cfg, ".") + defer cleanup() + + _, err := driver.Prestart(execCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := driver.Start(execCtx, task) + if err != nil { + t.Fatalf("err: %v", err) + } + defer resp.Handle.Kill() + + select { + case res := <-resp.Handle.WaitCh(): + if !res.Successful() { + t.Fatalf("unexpected err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*10) * time.Second): + t.Fatalf("timeout") + } + + if _, err := ioutil.ReadFile(filepath.Join(execCtx.TaskDir.Dir, fn)); err != nil { + t.Fatalf("unexpected error reading %s: %v", fn, err) + } + } + + // Volume Drivers should be rejected (error) + { + task, driver, execCtx, _, cleanup := setupDockerVolumes(t, cfg, "fake_flocker_vol") + defer cleanup() + task.Config["volume_driver"] = "flocker" + + if _, err := driver.Prestart(execCtx, task); err != nil { + t.Fatalf("error in prestart: %v", err) + } + if _, err := driver.Start(execCtx, task); err == nil { + t.Fatalf("Started driver successfully when volume drivers should have been disabled.") + } + } + +} + +func TestDockerDriver_VolumesEnabled(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + cfg := testConfig(t) + + tmpvol, err := ioutil.TempDir("", "nomadtest_docker_volumesenabled") + if err != nil { + t.Fatalf("error creating temporary dir: %v", err) + } + + // Evaluate symlinks so it works on MacOS + tmpvol, err = filepath.EvalSymlinks(tmpvol) + if err != nil { + t.Fatalf("error evaluating symlinks: %v", err) + } + + task, driver, execCtx, hostpath, cleanup := setupDockerVolumes(t, cfg, tmpvol) + defer cleanup() + + _, err = driver.Prestart(execCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := driver.Start(execCtx, task) + if err != nil { + t.Fatalf("Failed to start docker driver: %v", err) + } + defer resp.Handle.Kill() + + select { + case res := <-resp.Handle.WaitCh(): + if !res.Successful() { + t.Fatalf("unexpected err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*10) * time.Second): + t.Fatalf("timeout") + } + + if _, err := ioutil.ReadFile(hostpath); err != nil { + t.Fatalf("unexpected error reading %s: %v", hostpath, err) + } +} + +func TestDockerDriver_Mounts(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + goodMount := map[string]interface{}{ + "target": "/nomad", + "volume_options": []interface{}{ + map[string]interface{}{ + "labels": []interface{}{ + map[string]string{"foo": "bar"}, + }, + "driver_config": []interface{}{ + map[string]interface{}{ + "name": "local", + "options": []interface{}{ + map[string]interface{}{ + "foo": "bar", + }, + }, + }, + }, + }, + }, + "readonly": true, + "source": "test", + } + + cases := []struct { + Name string + Mounts []interface{} + Error string + }{ + { + Name: "good-one", + Error: "", + Mounts: []interface{}{goodMount}, + }, + { + Name: "good-many", + Error: "", + Mounts: []interface{}{goodMount, goodMount, goodMount}, + }, + { + Name: "multiple volume options", + Error: "Only one volume_options stanza allowed", + Mounts: []interface{}{ + map[string]interface{}{ + "target": "/nomad", + "volume_options": []interface{}{ + map[string]interface{}{ + "driver_config": []interface{}{ + map[string]interface{}{ + "name": "local", + }, + }, + }, + map[string]interface{}{ + "driver_config": []interface{}{ + map[string]interface{}{ + "name": "local", + }, + }, + }, + }, + }, + }, + }, + { + Name: "multiple driver configs", + Error: "volume driver config may only be specified once", + Mounts: []interface{}{ + map[string]interface{}{ + "target": "/nomad", + "volume_options": []interface{}{ + map[string]interface{}{ + "driver_config": []interface{}{ + map[string]interface{}{ + "name": "local", + }, + map[string]interface{}{ + "name": "local", + }, + }, + }, + }, + }, + }, + }, + { + Name: "multiple volume labels", + Error: "labels may only be", + Mounts: []interface{}{ + map[string]interface{}{ + "target": "/nomad", + "volume_options": []interface{}{ + map[string]interface{}{ + "labels": []interface{}{ + map[string]string{"foo": "bar"}, + map[string]string{"baz": "bam"}, + }, + }, + }, + }, + }, + }, + { + Name: "multiple driver options", + Error: "driver options may only", + Mounts: []interface{}{ + map[string]interface{}{ + "target": "/nomad", + "volume_options": []interface{}{ + map[string]interface{}{ + "driver_config": []interface{}{ + map[string]interface{}{ + "name": "local", + "options": []interface{}{ + map[string]interface{}{ + "foo": "bar", + }, + map[string]interface{}{ + "bam": "bar", + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + + task := &structs.Task{ + Name: "redis-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sleep", + "args": []string{"10000"}, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + for _, c := range cases { + t.Run(c.Name, func(t *testing.T) { + // Build the task + task.Config["mounts"] = c.Mounts + + ctx := testDockerDriverContexts(t, task) + driver := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + defer ctx.Destroy() + + _, err := driver.Prestart(ctx.ExecCtx, task) + if err == nil && c.Error != "" { + t.Fatalf("expected error: %v", c.Error) + } else if err != nil { + if c.Error == "" { + t.Fatalf("unexpected error in prestart: %v", err) + } else if !strings.Contains(err.Error(), c.Error) { + t.Fatalf("expected error %q; got %v", c.Error, err) + } + } + }) + } +} + +// TestDockerDriver_Cleanup ensures Cleanup removes only downloaded images. +func TestDockerDriver_Cleanup(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + imageName := "hello-world:latest" + task := &structs.Task{ + Name: "cleanup_test", + Driver: "docker", + Config: map[string]interface{}{ + "image": imageName, + }, + } + tctx := testDockerDriverContexts(t, task) + defer tctx.Destroy() + + // Run Prestart + driver := NewDockerDriver(tctx.DriverCtx).(*DockerDriver) + resp, err := driver.Prestart(tctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + res := resp.CreatedResources + if len(res.Resources) == 0 || len(res.Resources[dockerImageResKey]) == 0 { + t.Fatalf("no created resources: %#v", res) + } + + // Cleanup + rescopy := res.Copy() + if err := driver.Cleanup(tctx.ExecCtx, rescopy); err != nil { + t.Fatalf("Cleanup failed: %v", err) + } + + // Make sure rescopy is updated + if len(rescopy.Resources) > 0 { + t.Errorf("Cleanup should have cleared resource map: %#v", rescopy.Resources) + } + + // Ensure image was removed + tu.WaitForResult(func() (bool, error) { + if _, err := client.InspectImage(driver.driverConfig.ImageName); err == nil { + return false, fmt.Errorf("image exists but should have been removed. Does another %v container exist?", imageName) + } + + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // The image doesn't exist which shouldn't be an error when calling + // Cleanup, so call it again to make sure. + if err := driver.Cleanup(tctx.ExecCtx, res.Copy()); err != nil { + t.Fatalf("Cleanup failed: %v", err) + } +} + +func copyImage(t *testing.T, taskDir *allocdir.TaskDir, image string) { + dst := filepath.Join(taskDir.LocalDir, image) + copyFile(filepath.Join("./test-resources/docker", image), dst, t) +} + +func TestDockerDriver_AuthConfiguration(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + path := "./test-resources/docker/auth.json" + cases := []struct { + Repo string + AuthConfig *docker.AuthConfiguration + }{ + { + Repo: "lolwhat.com/what:1337", + AuthConfig: nil, + }, + { + Repo: "redis:3.2", + AuthConfig: &docker.AuthConfiguration{ + Username: "test", + Password: "1234", + Email: "", + ServerAddress: "https://index.docker.io/v1/", + }, + }, + { + Repo: "quay.io/redis:3.2", + AuthConfig: &docker.AuthConfiguration{ + Username: "test", + Password: "5678", + Email: "", + ServerAddress: "quay.io", + }, + }, + { + Repo: "other.io/redis:3.2", + AuthConfig: &docker.AuthConfiguration{ + Username: "test", + Password: "abcd", + Email: "", + ServerAddress: "https://other.io/v1/", + }, + }, + } + + for i, c := range cases { + act, err := authFromDockerConfig(path)(c.Repo) + if err != nil { + t.Fatalf("Test %d failed: %v", i+1, err) + } + + if !reflect.DeepEqual(act, c.AuthConfig) { + t.Fatalf("Test %d failed: Unexpected auth config: got %+v; want %+v", i+1, act, c.AuthConfig) + } + } +} + +func TestDockerDriver_OOMKilled(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task := &structs.Task{ + Name: "oom-killed", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "sh", + // Incrementally creates a bigger and bigger variable. + "args": []string{"-c", "x=a; while true; do eval x='$x$x'; done"}, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: &structs.Resources{ + CPU: 250, + MemoryMB: 10, + DiskMB: 20, + Networks: []*structs.NetworkResource{}, + }, + } + + _, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + select { + case res := <-handle.WaitCh(): + if res.Successful() { + t.Fatalf("expected error, but container exited successful") + } + + if res.Err.Error() != "OOM Killed" { + t.Fatalf("not killed by OOM killer: %s", res.Err) + } + + t.Logf("Successfully killed by OOM killer") + + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } +} + +func TestDockerDriver_Devices_IsInvalidConfig(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + brokenConfigs := []interface{}{ + map[string]interface{}{ + "host_path": "", + }, + map[string]interface{}{ + "host_path": "/dev/sda1", + "cgroup_permissions": "rxb", + }, + } + + test_cases := []struct { + deviceConfig interface{} + err error + }{ + {[]interface{}{brokenConfigs[0]}, fmt.Errorf("host path must be set in configuration for devices")}, + {[]interface{}{brokenConfigs[1]}, fmt.Errorf("invalid cgroup permission string: \"rxb\"")}, + } + + for _, tc := range test_cases { + task, _, _ := dockerTask(t) + task.Config["devices"] = tc.deviceConfig + + ctx := testDockerDriverContexts(t, task) + driver := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + defer ctx.Destroy() + + if _, err := driver.Prestart(ctx.ExecCtx, task); err == nil || err.Error() != tc.err.Error() { + t.Fatalf("error expected in prestart, got %v, expected %v", err, tc.err) + } + } +} + +func TestDockerDriver_Device_Success(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + if runtime.GOOS != "linux" { + t.Skip("test device mounts only on linux") + } + + hostPath := "/dev/random" + containerPath := "/dev/myrandom" + perms := "rwm" + + expectedDevice := docker.Device{ + PathOnHost: hostPath, + PathInContainer: containerPath, + CgroupPermissions: perms, + } + config := map[string]interface{}{ + "host_path": hostPath, + "container_path": containerPath, + } + + task, _, _ := dockerTask(t) + task.Config["devices"] = []interface{}{config} + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + assert.NotEmpty(t, container.HostConfig.Devices, "Expected one device") + assert.Equal(t, expectedDevice, container.HostConfig.Devices[0], "Incorrect device ") +} + +func TestDockerDriver_Entrypoint(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + entrypoint := []string{"/bin/sh", "-c"} + task, _, _ := dockerTask(t) + task.Config["entrypoint"] = entrypoint + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("err: %v", err) + } + + require.Len(t, container.Config.Entrypoint, 2, "Expected one entrypoint") + require.Equal(t, entrypoint, container.Config.Entrypoint, "Incorrect entrypoint ") +} + +func TestDockerDriver_Kill(t *testing.T) { + assert := assert.New(t) + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + // Tasks started with a signal that is not supported should not error + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + KillSignal: "SIGKILL", + Config: map[string]interface{}{ + "load": "busybox.tar", + "image": "busybox", + "command": "/bin/nc", + "args": []string{"-l", "127.0.0.1", "-p", "0"}, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + Resources: basicResources, + } + + ctx := testDockerDriverContexts(t, task) + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + + resp, err := d.Start(ctx.ExecCtx, task) + assert.Nil(err) + assert.NotNil(resp.Handle) + + handle := resp.Handle.(*DockerHandle) + waitForExist(t, client, handle) + err = handle.Kill() + assert.Nil(err) +} + +func TestDockerDriver_ReadonlyRootfs(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["readonly_rootfs"] = true + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + assert.Nil(t, err, "Error inspecting container: %v", err) + + assert.True(t, container.HostConfig.ReadonlyRootfs, "ReadonlyRootfs option not set") +} + +// fakeDockerClient can be used in places that accept an interface for the +// docker client such as createContainer. +type fakeDockerClient struct{} + +func (fakeDockerClient) CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) { + return nil, fmt.Errorf("volume is attached on another node") +} +func (fakeDockerClient) InspectContainer(id string) (*docker.Container, error) { + panic("not implemented") +} +func (fakeDockerClient) ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) { + panic("not implemented") +} +func (fakeDockerClient) RemoveContainer(opts docker.RemoveContainerOptions) error { + panic("not implemented") +} + +// TestDockerDriver_VolumeError asserts volume related errors when creating a +// container are recoverable. +func TestDockerDriver_VolumeError(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + + // setup + task, _, _ := dockerTask(t) + tctx := testDockerDriverContexts(t, task) + driver := NewDockerDriver(tctx.DriverCtx).(*DockerDriver) + driver.driverConfig = &DockerDriverConfig{ImageName: "test"} + + // assert volume error is recoverable + _, err := driver.createContainer(fakeDockerClient{}, docker.CreateContainerOptions{}) + require.True(t, structs.IsRecoverable(err)) +} + +func TestDockerDriver_AdvertiseIPv6Address(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + expectedPrefix := "2001:db8:1::242:ac11" + expectedAdvertise := true + task := &structs.Task{ + Name: "nc-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/nc", + "args": []string{"-l", "127.0.0.1", "-p", "0"}, + "advertise_ipv6_address": expectedAdvertise, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + client := newTestDockerClient(t) + + // Make sure IPv6 is enabled + net, err := client.NetworkInfo("bridge") + if err != nil { + t.Skip("error retrieving bridge network information, skipping") + } + if net == nil || !net.EnableIPv6 { + t.Skip("IPv6 not enabled on bridge network, skipping") + } + + tctx := testDockerDriverContexts(t, task) + driver := NewDockerDriver(tctx.DriverCtx) + copyImage(t, tctx.ExecCtx.TaskDir, "busybox.tar") + defer tctx.Destroy() + + presp, err := driver.Prestart(tctx.ExecCtx, task) + defer driver.Cleanup(tctx.ExecCtx, presp.CreatedResources) + if err != nil { + t.Fatalf("Error in prestart: %v", err) + } + + sresp, err := driver.Start(tctx.ExecCtx, task) + if err != nil { + t.Fatalf("Error in start: %v", err) + } + + if sresp.Handle == nil { + t.Fatalf("handle is nil\nStack\n%s", debug.Stack()) + } + + assert.Equal(t, expectedAdvertise, sresp.Network.AutoAdvertise, "Wrong autoadvertise. Expect: %s, got: %s", expectedAdvertise, sresp.Network.AutoAdvertise) + + if !strings.HasPrefix(sresp.Network.IP, expectedPrefix) { + t.Fatalf("Got IP address %q want ip address with prefix %q", sresp.Network.IP, expectedPrefix) + } + + defer sresp.Handle.Kill() + handle := sresp.Handle.(*DockerHandle) + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + if err != nil { + t.Fatalf("Error inspecting container: %v", err) + } + + if !strings.HasPrefix(container.NetworkSettings.GlobalIPv6Address, expectedPrefix) { + t.Fatalf("Got GlobalIPv6address %s want GlobalIPv6address with prefix %s", expectedPrefix, container.NetworkSettings.GlobalIPv6Address) + } +} + +func TestParseDockerImage(t *testing.T) { + tests := []struct { + Image string + Repo string + Tag string + }{ + {"library/hello-world:1.0", "library/hello-world", "1.0"}, + {"library/hello-world", "library/hello-world", "latest"}, + {"library/hello-world:latest", "library/hello-world", "latest"}, + {"library/hello-world@sha256:f5233545e43561214ca4891fd1157e1c3c563316ed8e237750d59bde73361e77", "library/hello-world@sha256:f5233545e43561214ca4891fd1157e1c3c563316ed8e237750d59bde73361e77", ""}, + } + for _, test := range tests { + t.Run(test.Image, func(t *testing.T) { + repo, tag := parseDockerImage(test.Image) + require.Equal(t, test.Repo, repo) + require.Equal(t, test.Tag, tag) + }) + } +} + +func TestDockerImageRef(t *testing.T) { + tests := []struct { + Image string + Repo string + Tag string + }{ + {"library/hello-world:1.0", "library/hello-world", "1.0"}, + {"library/hello-world:latest", "library/hello-world", "latest"}, + {"library/hello-world@sha256:f5233545e43561214ca4891fd1157e1c3c563316ed8e237750d59bde73361e77", "library/hello-world@sha256:f5233545e43561214ca4891fd1157e1c3c563316ed8e237750d59bde73361e77", ""}, + } + for _, test := range tests { + t.Run(test.Image, func(t *testing.T) { + image := dockerImageRef(test.Repo, test.Tag) + require.Equal(t, test.Image, image) + }) + } +} + +func TestDockerDriver_CPUCFSPeriod(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task, _, _ := dockerTask(t) + task.Config["cpu_hard_limit"] = true + task.Config["cpu_cfs_period"] = 1000000 + + client, handle, cleanup := dockerSetup(t, task) + defer cleanup() + + waitForExist(t, client, handle) + + container, err := client.InspectContainer(handle.ContainerID()) + assert.Nil(t, err, "Error inspecting container: %v", err) +} diff --git a/drivers/docker/driver_unix_test.go b/drivers/docker/driver_unix_test.go new file mode 100644 index 000000000..96004a1e9 --- /dev/null +++ b/drivers/docker/driver_unix_test.go @@ -0,0 +1,105 @@ +// +build !windows + +package docker + +import ( + "io/ioutil" + "path/filepath" + "strings" + "syscall" + "testing" + "time" + + "github.com/hashicorp/nomad/client/testutil" + "github.com/hashicorp/nomad/nomad/structs" + tu "github.com/hashicorp/nomad/testutil" +) + +func TestDockerDriver_Signal(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("Docker not connected") + } + + task := &structs.Task{ + Name: "redis-demo", + Driver: "docker", + Config: map[string]interface{}{ + "image": "busybox", + "load": "busybox.tar", + "command": "/bin/sh", + "args": []string{"local/test.sh"}, + }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, + LogConfig: &structs.LogConfig{ + MaxFiles: 10, + MaxFileSizeMB: 10, + }, + } + + ctx := testDockerDriverContexts(t, task) + defer ctx.Destroy() + d := NewDockerDriver(ctx.DriverCtx) + + // Copy the image into the task's directory + copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar") + + testFile := filepath.Join(ctx.ExecCtx.TaskDir.LocalDir, "test.sh") + testData := []byte(` +at_term() { + echo 'Terminated.' > $NOMAD_TASK_DIR/output + exit 3 +} +trap at_term INT +while true; do + echo 'sleeping' + sleep 0.2 +done + `) + if err := ioutil.WriteFile(testFile, testData, 0777); err != nil { + t.Fatalf("Failed to write data: %v", err) + } + + _, err := d.Prestart(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("error in prestart: %v", err) + } + resp, err := d.Start(ctx.ExecCtx, task) + if err != nil { + t.Fatalf("err: %v", err) + } + defer resp.Handle.Kill() + + waitForExist(t, resp.Handle.(*DockerHandle).client, resp.Handle.(*DockerHandle)) + + time.Sleep(1 * time.Second) + if err := resp.Handle.Signal(syscall.SIGINT); err != nil { + t.Fatalf("Signal returned an error: %v", err) + } + + select { + case res := <-resp.Handle.WaitCh(): + if res.Successful() { + t.Fatalf("should err: %v", res) + } + case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second): + t.Fatalf("timeout") + } + + // Check the log file to see it exited because of the signal + outputFile := filepath.Join(ctx.ExecCtx.TaskDir.LocalDir, "output") + act, err := ioutil.ReadFile(outputFile) + if err != nil { + t.Fatalf("Couldn't read expected output: %v", err) + } + + exp := "Terminated." + if strings.TrimSpace(string(act)) != exp { + t.Fatalf("Command outputted %v; want %v", act, exp) + } +} diff --git a/drivers/docker/driver_windows.go b/drivers/docker/driver_windows.go new file mode 100644 index 000000000..832171a6e --- /dev/null +++ b/drivers/docker/driver_windows.go @@ -0,0 +1,17 @@ +package docker + +import docker "github.com/fsouza/go-dockerclient" + +const ( + // Default network mode for windows containers is nat + defaultNetworkMode = "nat" +) + +//Currently Windows containers don't support host ip in port binding. +func getPortBinding(ip string, port string) []docker.PortBinding { + return []docker.PortBinding{{HostIP: "", HostPort: port}} +} + +func tweakCapabilities(basics, adds, drops []string) ([]string, error) { + return nil, nil +} diff --git a/drivers/docker/handle.go b/drivers/docker/handle.go new file mode 100644 index 000000000..cb61a34da --- /dev/null +++ b/drivers/docker/handle.go @@ -0,0 +1,278 @@ +package docker + +import ( + "fmt" + "os" + "runtime" + "strings" + "sync" + "syscall" + "time" + + "github.com/armon/circbuf" + metrics "github.com/armon/go-metrics" + docker "github.com/fsouza/go-dockerclient" + hclog "github.com/hashicorp/go-hclog" + plugin "github.com/hashicorp/go-plugin" + "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/drivers/docker/docklog" + "github.com/hashicorp/nomad/helper/stats" + "github.com/hashicorp/nomad/plugins/drivers" + "golang.org/x/net/context" +) + +type taskHandle struct { + client *docker.Client + waitClient *docker.Client + logger hclog.Logger + dlogger docklog.DockerLogger + dloggerPluginClient *plugin.Client + task *drivers.TaskConfig + container *docker.Container + resourceUsageLock sync.RWMutex + resourceUsage *structs.TaskResourceUsage + doneCh chan bool + waitCh chan struct{} + removeContainerOnExit bool + net *structs.DriverNetwork + + startedAt time.Time + completedAt time.Time + exitResult *drivers.ExitResult +} + +func (h *taskHandle) Exec(ctx context.Context, cmd string, args []string) (*drivers.ExecTaskResult, error) { + fullCmd := make([]string, len(args)+1) + fullCmd[0] = cmd + copy(fullCmd[1:], args) + createExecOpts := docker.CreateExecOptions{ + AttachStdin: false, + AttachStdout: true, + AttachStderr: true, + Tty: false, + Cmd: fullCmd, + Container: h.container.ID, + Context: ctx, + } + exec, err := h.client.CreateExec(createExecOpts) + if err != nil { + return nil, err + } + + execResult := &drivers.ExecTaskResult{ExitResult: &drivers.ExitResult{}} + stdout, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) + stderr, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) + startOpts := docker.StartExecOptions{ + Detach: false, + Tty: false, + OutputStream: stdout, + ErrorStream: stderr, + Context: ctx, + } + if err := client.StartExec(exec.ID, startOpts); err != nil { + return nil, err + } + execResult.Stdout = stdout.Bytes() + execResult.Stderr = stderr.Bytes() + res, err := client.InspectExec(exec.ID) + if err != nil { + return execResult, err + } + + execResult.ExitResult.ExitCode = res.ExitCode + return execResult, nil +} + +func (h *taskHandle) Signal(s os.Signal) error { + // Convert types + sysSig, ok := s.(syscall.Signal) + if !ok { + return fmt.Errorf("Failed to determine signal number") + } + + // TODO When we expose signals we will need a mapping layer that converts + // MacOS signals to the correct signal number for docker. Or we change the + // interface to take a signal string and leave it up to driver to map? + + dockerSignal := docker.Signal(sysSig) + opts := docker.KillContainerOptions{ + ID: h.container.ID, + Signal: dockerSignal, + } + return h.client.KillContainer(opts) + +} + +// Kill is used to terminate the task. +func (h *taskHandle) Kill(killTimeout time.Duration, signal os.Signal) error { + // Only send signal if killTimeout is set, otherwise stop container + if killTimeout > 0 { + if err := h.Signal(signal); err != nil { + return err + } + select { + case <-h.waitCh: + return nil + case <-time.After(killTimeout): + } + } + + // Stop the container + err := h.client.StopContainer(h.container.ID, 0) + if err != nil { + + // Container has already been removed. + if strings.Contains(err.Error(), NoSuchContainerError) { + h.logger.Debug("attempted to stop nonexistent container") + return nil + } + h.logger.Error("failed to stop container", "error", err) + return fmt.Errorf("Failed to stop container %s: %s", h.container.ID, err) + } + h.logger.Info("stopped container") + return nil +} + +func (h *taskHandle) Stats() (*structs.TaskResourceUsage, error) { + h.resourceUsageLock.RLock() + defer h.resourceUsageLock.RUnlock() + var err error + if h.resourceUsage == nil { + err = fmt.Errorf("stats collection hasn't started yet") + } + return h.resourceUsage, err +} + +func (h *taskHandle) run() { + exitCode, werr := h.waitClient.WaitContainer(h.container.ID) + if werr != nil { + h.logger.Error("failed to wait for container; already terminated") + } + + if exitCode != 0 { + werr = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) + } + + container, ierr := h.waitClient.InspectContainer(h.container.ID) + oom := false + if ierr != nil { + h.logger.Error("failed to inspect container", "error", ierr) + } else if container.State.OOMKilled { + oom = true + werr = fmt.Errorf("OOM Killed") + labels := []metrics.Label{ + { + Name: "job", + Value: h.task.JobName, + }, + { + Name: "task_group", + Value: h.task.TaskGroupName, + }, + { + Name: "task", + Value: h.task.Name, + }, + } + metrics.IncrCounterWithLabels([]string{"driver", "docker", "oom"}, 1, labels) + } + + close(h.doneCh) + + // Shutdown the syslog collector + + // Stop the container just incase the docker daemon's wait returned + // incorrectly + if err := h.client.StopContainer(h.container.ID, 0); err != nil { + _, noSuchContainer := err.(*docker.NoSuchContainer) + _, containerNotRunning := err.(*docker.ContainerNotRunning) + if !containerNotRunning && !noSuchContainer { + h.logger.Error("error stopping container", "error", err) + } + } + + // Remove the container + if h.removeContainerOnExit == true { + if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.container.ID, RemoveVolumes: true, Force: true}); err != nil { + h.logger.Error("error removing container", "error", err) + } + } else { + h.logger.Debug("not removing container due to config") + } + + // Set the result + h.exitResult = &drivers.ExitResult{ + ExitCode: exitCode, + Signal: 0, + OOMKilled: oom, + } + close(h.waitCh) +} + +// collectStats starts collecting resource usage stats of a docker container +func (h *taskHandle) collectStats() { + + statsCh := make(chan *docker.Stats) + statsOpts := docker.StatsOptions{ID: h.container.ID, Done: h.doneCh, Stats: statsCh, Stream: true} + go func() { + //TODO handle Stats error + if err := h.waitClient.Stats(statsOpts); err != nil { + h.logger.Debug("error collecting stats from container", "error", err) + } + }() + numCores := runtime.NumCPU() + for { + select { + case s := <-statsCh: + if s != nil { + ms := &structs.MemoryStats{ + RSS: s.MemoryStats.Stats.Rss, + Cache: s.MemoryStats.Stats.Cache, + Swap: s.MemoryStats.Stats.Swap, + MaxUsage: s.MemoryStats.MaxUsage, + Measured: DockerMeasuredMemStats, + } + + cs := &structs.CpuStats{ + ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods, + ThrottledTime: s.CPUStats.ThrottlingData.ThrottledTime, + Measured: DockerMeasuredCpuStats, + } + + // Calculate percentage + cs.Percent = calculatePercent( + s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, + s.CPUStats.SystemCPUUsage, s.PreCPUStats.SystemCPUUsage, numCores) + cs.SystemMode = calculatePercent( + s.CPUStats.CPUUsage.UsageInKernelmode, s.PreCPUStats.CPUUsage.UsageInKernelmode, + s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, numCores) + cs.UserMode = calculatePercent( + s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode, + s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, numCores) + cs.TotalTicks = (cs.Percent / 100) * stats.TotalTicksAvailable() / float64(numCores) + + h.resourceUsageLock.Lock() + h.resourceUsage = &structs.TaskResourceUsage{ + ResourceUsage: &structs.ResourceUsage{ + MemoryStats: ms, + CpuStats: cs, + }, + Timestamp: s.Read.UTC().UnixNano(), + } + h.resourceUsageLock.Unlock() + } + case <-h.doneCh: + return + } + } +} + +func calculatePercent(newSample, oldSample, newTotal, oldTotal uint64, cores int) float64 { + numerator := newSample - oldSample + denom := newTotal - oldTotal + if numerator <= 0 || denom <= 0 { + return 0.0 + } + + return (float64(numerator) / float64(denom)) * float64(cores) * 100.0 +} diff --git a/drivers/docker/progress.go b/drivers/docker/progress.go new file mode 100644 index 000000000..bde1478ba --- /dev/null +++ b/drivers/docker/progress.go @@ -0,0 +1,289 @@ +package docker + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "strings" + "sync" + "time" + + "github.com/docker/docker/pkg/jsonmessage" + units "github.com/docker/go-units" +) + +const ( + // dockerPullActivityDeadline is the default value set in the imageProgressManager + // when newImageProgressManager is called + dockerPullActivityDeadline = 2 * time.Minute + + // dockerImageProgressReportInterval is the default value set in the + // imageProgressManager when newImageProgressManager is called + dockerImageProgressReportInterval = 10 * time.Second + + // dockerImageSlowProgressReportInterval is the default value set in the + // imageProgressManager when newImageProgressManager is called + dockerImageSlowProgressReportInterval = 2 * time.Minute +) + +// layerProgress tracks the state and downloaded bytes of a single layer within +// a docker image +type layerProgress struct { + id string + status layerProgressStatus + currentBytes int64 + totalBytes int64 +} + +type layerProgressStatus int + +const ( + layerProgressStatusUnknown layerProgressStatus = iota + layerProgressStatusStarting + layerProgressStatusWaiting + layerProgressStatusDownloading + layerProgressStatusVerifying + layerProgressStatusDownloaded + layerProgressStatusExtracting + layerProgressStatusComplete + layerProgressStatusExists +) + +func lpsFromString(status string) layerProgressStatus { + switch status { + case "Pulling fs layer": + return layerProgressStatusStarting + case "Waiting": + return layerProgressStatusWaiting + case "Downloading": + return layerProgressStatusDownloading + case "Verifying Checksum": + return layerProgressStatusVerifying + case "Download complete": + return layerProgressStatusDownloaded + case "Extracting": + return layerProgressStatusExtracting + case "Pull complete": + return layerProgressStatusComplete + case "Already exists": + return layerProgressStatusExists + default: + return layerProgressStatusUnknown + } +} + +// imageProgress tracks the status of each child layer as its pulled from a +// docker image repo +type imageProgress struct { + sync.RWMutex + lastMessage *jsonmessage.JSONMessage + timestamp time.Time + layers map[string]*layerProgress + pullStart time.Time +} + +// get returns a status message and the timestamp of the last status update +func (p *imageProgress) get() (string, time.Time) { + p.RLock() + defer p.RUnlock() + + if p.lastMessage == nil { + return "No progress", p.timestamp + } + + var pulled, pulling, waiting int + for _, l := range p.layers { + switch { + case l.status == layerProgressStatusStarting || + l.status == layerProgressStatusWaiting: + waiting++ + case l.status == layerProgressStatusDownloading || + l.status == layerProgressStatusVerifying: + pulling++ + case l.status >= layerProgressStatusDownloaded: + pulled++ + } + } + + elapsed := time.Now().Sub(p.pullStart) + cur := p.currentBytes() + total := p.totalBytes() + var est int64 + if cur != 0 { + est = (elapsed.Nanoseconds() / cur * total) - elapsed.Nanoseconds() + } + + var msg strings.Builder + fmt.Fprintf(&msg, "Pulled %d/%d (%s/%s) layers: %d waiting/%d pulling", + pulled, len(p.layers), units.BytesSize(float64(cur)), units.BytesSize(float64(total)), + waiting, pulling) + + if est > 0 { + fmt.Fprintf(&msg, " - est %.1fs remaining", time.Duration(est).Seconds()) + } + return msg.String(), p.timestamp +} + +// set takes a status message received from the docker engine api during an image +// pull and updates the status of the corresponding layer +func (p *imageProgress) set(msg *jsonmessage.JSONMessage) { + p.Lock() + defer p.Unlock() + + p.lastMessage = msg + p.timestamp = time.Now() + + lps := lpsFromString(msg.Status) + if lps == layerProgressStatusUnknown { + return + } + + layer, ok := p.layers[msg.ID] + if !ok { + layer = &layerProgress{id: msg.ID} + p.layers[msg.ID] = layer + } + layer.status = lps + if msg.Progress != nil && lps == layerProgressStatusDownloading { + layer.currentBytes = msg.Progress.Current + layer.totalBytes = msg.Progress.Total + } else if lps == layerProgressStatusDownloaded { + layer.currentBytes = layer.totalBytes + } +} + +// currentBytes iterates through all image layers and sums the total of +// current bytes. The caller is responsible for acquiring a read lock on the +// imageProgress struct +func (p *imageProgress) currentBytes() int64 { + var b int64 + for _, l := range p.layers { + b += l.currentBytes + } + return b +} + +// totalBytes iterates through all image layers and sums the total of +// total bytes. The caller is responsible for acquiring a read lock on the +// imageProgress struct +func (p *imageProgress) totalBytes() int64 { + var b int64 + for _, l := range p.layers { + b += l.totalBytes + } + return b +} + +// progressReporterFunc defines the method for handling inactivity and report +// events from the imageProgressManager. The image name, current status message +// and timestamp of last received status update are passed in. +type progressReporterFunc func(image string, msg string, timestamp time.Time) + +// imageProgressManager tracks the progress of pulling a docker image from an +// image repository. +// It also implemented the io.Writer interface so as to be passed to the docker +// client pull image method in order to receive status updates from the docker +// engine api. +type imageProgressManager struct { + imageProgress *imageProgress + image string + activityDeadline time.Duration + inactivityFunc progressReporterFunc + reportInterval time.Duration + reporter progressReporterFunc + slowReportInterval time.Duration + slowReporter progressReporterFunc + lastSlowReport time.Time + cancel context.CancelFunc + stopCh chan struct{} + buf bytes.Buffer +} + +func newImageProgressManager( + image string, cancel context.CancelFunc, + inactivityFunc, reporter, slowReporter progressReporterFunc) *imageProgressManager { + + pm := &imageProgressManager{ + image: image, + activityDeadline: dockerPullActivityDeadline, + inactivityFunc: inactivityFunc, + reportInterval: dockerImageProgressReportInterval, + reporter: reporter, + slowReportInterval: dockerImageSlowProgressReportInterval, + slowReporter: slowReporter, + imageProgress: &imageProgress{ + timestamp: time.Now(), + layers: make(map[string]*layerProgress), + }, + cancel: cancel, + stopCh: make(chan struct{}), + } + + pm.start() + return pm +} + +// start intiates the ticker to trigger the inactivity and reporter handlers +func (pm *imageProgressManager) start() { + now := time.Now() + pm.imageProgress.pullStart = now + pm.lastSlowReport = now + go func() { + ticker := time.NewTicker(dockerImageProgressReportInterval) + for { + select { + case <-ticker.C: + msg, lastStatusTime := pm.imageProgress.get() + t := time.Now() + if t.Sub(lastStatusTime) > pm.activityDeadline { + pm.inactivityFunc(pm.image, msg, lastStatusTime) + pm.cancel() + return + } + if t.Sub(pm.lastSlowReport) > pm.slowReportInterval { + pm.slowReporter(pm.image, msg, lastStatusTime) + pm.lastSlowReport = t + } + pm.reporter(pm.image, msg, lastStatusTime) + case <-pm.stopCh: + return + } + } + }() +} + +func (pm *imageProgressManager) stop() { + close(pm.stopCh) +} + +func (pm *imageProgressManager) Write(p []byte) (n int, err error) { + n, err = pm.buf.Write(p) + var msg jsonmessage.JSONMessage + + for { + line, err := pm.buf.ReadBytes('\n') + if err == io.EOF { + // Partial write of line; push back onto buffer and break until full line + pm.buf.Write(line) + break + } + if err != nil { + return n, err + } + err = json.Unmarshal(line, &msg) + if err != nil { + return n, err + } + + if msg.Error != nil { + // error received from the docker engine api + return n, msg.Error + } + + pm.imageProgress.set(&msg) + } + + return +} diff --git a/drivers/docker/progress_test.go b/drivers/docker/progress_test.go new file mode 100644 index 000000000..7f5b5dc46 --- /dev/null +++ b/drivers/docker/progress_test.go @@ -0,0 +1,52 @@ +package docker + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func Test_DockerImageProgressManager(t *testing.T) { + + pm := &imageProgressManager{ + imageProgress: &imageProgress{ + timestamp: time.Now(), + layers: make(map[string]*layerProgress), + }, + } + + _, err := pm.Write([]byte(`{"status":"Pulling from library/golang","id":"1.9.5"} +{"status":"Pulling fs layer","progressDetail":{},"id":"c73ab1c6897b"} +{"status":"Pulling fs layer","progressDetail":{},"id":"1ab373b3deae"} +`)) + require.NoError(t, err) + require.Equal(t, 2, len(pm.imageProgress.layers), "number of layers should be 2") + + cur := pm.imageProgress.currentBytes() + require.Zero(t, cur) + tot := pm.imageProgress.totalBytes() + require.Zero(t, tot) + + _, err = pm.Write([]byte(`{"status":"Pulling fs layer","progress`)) + require.NoError(t, err) + require.Equal(t, 2, len(pm.imageProgress.layers), "number of layers should be 2") + + _, err = pm.Write([]byte(`Detail":{},"id":"b542772b4177"}` + "\n")) + require.NoError(t, err) + require.Equal(t, 3, len(pm.imageProgress.layers), "number of layers should be 3") + + _, err = pm.Write([]byte(`{"status":"Downloading","progressDetail":{"current":45800,"total":4335495},"progress":"[\u003e ] 45.8kB/4.335MB","id":"b542772b4177"} +{"status":"Downloading","progressDetail":{"current":113576,"total":11108010},"progress":"[\u003e ] 113.6kB/11.11MB","id":"1ab373b3deae"} +{"status":"Downloading","progressDetail":{"current":694257,"total":4335495},"progress":"[========\u003e ] 694.3kB/4.335MB","id":"b542772b4177"}` + "\n")) + require.NoError(t, err) + require.Equal(t, 3, len(pm.imageProgress.layers), "number of layers should be 3") + require.Equal(t, int64(807833), pm.imageProgress.currentBytes()) + require.Equal(t, int64(15443505), pm.imageProgress.totalBytes()) + + _, err = pm.Write([]byte(`{"status":"Download complete","progressDetail":{},"id":"b542772b4177"}` + "\n")) + require.NoError(t, err) + require.Equal(t, 3, len(pm.imageProgress.layers), "number of layers should be 3") + require.Equal(t, int64(4449071), pm.imageProgress.currentBytes()) + require.Equal(t, int64(15443505), pm.imageProgress.totalBytes()) +} diff --git a/drivers/docker/state.go b/drivers/docker/state.go new file mode 100644 index 000000000..309eba957 --- /dev/null +++ b/drivers/docker/state.go @@ -0,0 +1,33 @@ +package docker + +import ( + "sync" +) + +type taskStore struct { + store map[string]*taskHandle + lock sync.RWMutex +} + +func newTaskStore() *taskStore { + return &taskStore{store: map[string]*taskHandle{}} +} + +func (ts *taskStore) Set(id string, handle *taskHandle) { + ts.lock.Lock() + defer ts.lock.Unlock() + ts.store[id] = handle +} + +func (ts *taskStore) Get(id string) (*taskHandle, bool) { + ts.lock.RLock() + defer ts.lock.RUnlock() + t, ok := ts.store[id] + return t, ok +} + +func (ts *taskStore) Delete(id string) { + ts.lock.Lock() + defer ts.lock.Unlock() + delete(ts.store, id) +} diff --git a/drivers/docker/utils.go b/drivers/docker/utils.go new file mode 100644 index 000000000..d5180d19f --- /dev/null +++ b/drivers/docker/utils.go @@ -0,0 +1,190 @@ +package docker + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "strings" + + "github.com/docker/cli/cli/config/configfile" + "github.com/docker/distribution/reference" + "github.com/docker/docker/registry" + docker "github.com/fsouza/go-dockerclient" +) + +func parseDockerImage(image string) (repo, tag string) { + repo, tag = docker.ParseRepositoryTag(image) + if tag != "" { + return repo, tag + } + if i := strings.IndexRune(image, '@'); i > -1 { // Has digest (@sha256:...) + // when pulling images with a digest, the repository contains the sha hash, and the tag is empty + // see: https://github.com/fsouza/go-dockerclient/blob/master/image_test.go#L471 + repo = image + } else { + tag = "latest" + } + return repo, tag +} + +func dockerImageRef(repo string, tag string) string { + if tag == "" { + return repo + } + return fmt.Sprintf("%s:%s", repo, tag) +} + +// loadDockerConfig loads the docker config at the specified path, returning an +// error if it couldn't be read. +func loadDockerConfig(file string) (*configfile.ConfigFile, error) { + f, err := os.Open(file) + if err != nil { + return nil, fmt.Errorf("Failed to open auth config file: %v, error: %v", file, err) + } + defer f.Close() + + cfile := new(configfile.ConfigFile) + if err = cfile.LoadFromReader(f); err != nil { + return nil, fmt.Errorf("Failed to parse auth config file: %v", err) + } + return cfile, nil +} + +// parseRepositoryInfo takes a repo and returns the Docker RepositoryInfo. This +// is useful for interacting with a Docker config object. +func parseRepositoryInfo(repo string) (*registry.RepositoryInfo, error) { + name, err := reference.ParseNamed(repo) + if err != nil { + return nil, fmt.Errorf("Failed to parse named repo %q: %v", repo, err) + } + + repoInfo, err := registry.ParseRepositoryInfo(name) + if err != nil { + return nil, fmt.Errorf("Failed to parse repository: %v", err) + } + + return repoInfo, nil +} + +// firstValidAuth tries a list of auth backends, returning first error or AuthConfiguration +func firstValidAuth(repo string, backends []authBackend) (*docker.AuthConfiguration, error) { + for _, backend := range backends { + auth, err := backend(repo) + if auth != nil || err != nil { + return auth, err + } + } + return nil, nil +} + +// authFromTaskConfig generates an authBackend for any auth given in the task-configuration +func authFromTaskConfig(driverConfig *TaskConfig) authBackend { + return func(string) (*docker.AuthConfiguration, error) { + if len(driverConfig.Auth.Email) == 0 { + return nil, nil + } + return &docker.AuthConfiguration{ + Username: driverConfig.Auth.Username, + Password: driverConfig.Auth.Password, + Email: driverConfig.Auth.Email, + ServerAddress: driverConfig.Auth.ServerAddr, + }, nil + } +} + +// authFromDockerConfig generate an authBackend for a dockercfg-compatible file. +// The authBacken can either be from explicit auth definitions or via credential +// helpers +func authFromDockerConfig(file string) authBackend { + return func(repo string) (*docker.AuthConfiguration, error) { + if file == "" { + return nil, nil + } + repoInfo, err := parseRepositoryInfo(repo) + if err != nil { + return nil, err + } + + cfile, err := loadDockerConfig(file) + if err != nil { + return nil, err + } + + return firstValidAuth(repo, []authBackend{ + func(string) (*docker.AuthConfiguration, error) { + dockerAuthConfig := registry.ResolveAuthConfig(cfile.AuthConfigs, repoInfo.Index) + auth := &docker.AuthConfiguration{ + Username: dockerAuthConfig.Username, + Password: dockerAuthConfig.Password, + Email: dockerAuthConfig.Email, + ServerAddress: dockerAuthConfig.ServerAddress, + } + if authIsEmpty(auth) { + return nil, nil + } + return auth, nil + }, + authFromHelper(cfile.CredentialHelpers[registry.GetAuthConfigKey(repoInfo.Index)]), + authFromHelper(cfile.CredentialsStore), + }) + } +} + +// authFromHelper generates an authBackend for a docker-credentials-helper; +// A script taking the requested domain on input, outputting JSON with +// "Username" and "Secret" +func authFromHelper(helperName string) authBackend { + return func(repo string) (*docker.AuthConfiguration, error) { + if helperName == "" { + return nil, nil + } + helper := dockerAuthHelperPrefix + helperName + cmd := exec.Command(helper, "get") + + repoInfo, err := parseRepositoryInfo(repo) + if err != nil { + return nil, err + } + + // Ensure that the HTTPs prefix exists + repoAddr := fmt.Sprintf("https://%s", repoInfo.Index.Name) + + cmd.Stdin = strings.NewReader(repoAddr) + output, err := cmd.Output() + if err != nil { + switch err.(type) { + default: + return nil, err + case *exec.ExitError: + return nil, fmt.Errorf("%s with input %q failed with stderr: %s", helper, repo, output) + } + } + + var response map[string]string + if err := json.Unmarshal(output, &response); err != nil { + return nil, err + } + + auth := &docker.AuthConfiguration{ + Username: response["Username"], + Password: response["Secret"], + } + + if authIsEmpty(auth) { + return nil, nil + } + return auth, nil + } +} + +// authIsEmpty returns if auth is nil or an empty structure +func authIsEmpty(auth *docker.AuthConfiguration) bool { + if auth == nil { + return false + } + return auth.Username == "" && + auth.Password == "" && + auth.Email == "" && + auth.ServerAddress == "" +} diff --git a/plugins/drivers/driver.go b/plugins/drivers/driver.go index 458635f6d..d4478db10 100644 --- a/plugins/drivers/driver.go +++ b/plugins/drivers/driver.go @@ -17,6 +17,11 @@ import ( "github.com/zclconf/go-cty/cty/msgpack" ) +const ( + // CheckBufSize is the size of the check output result + CheckBufSize = 4 * 1024 +) + // DriverPlugin is the interface with drivers will implement. It is also // implemented by a plugin client which proxies the calls to go-plugin. See // the proto/driver.proto file for detailed information about each RPC and @@ -98,6 +103,8 @@ type Capabilities struct { type TaskConfig struct { ID string + JobName string + TaskGroupName string Name string Env map[string]string Resources *Resources @@ -157,6 +164,17 @@ func (tc *TaskConfig) EncodeDriverConfig(val cty.Value) error { return nil } +func (tc *TaskConfig) EncodeConcreteDriverConfig(t interface{}) error { + data := []byte{} + err := base.MsgPackEncode(&data, t) + if err != nil { + return err + } + + tc.rawDriverConfig = data + return nil +} + type Resources struct { NomadResources *structs.Resources LinuxResources *LinuxResources diff --git a/plugins/drivers/proto/driver.proto b/plugins/drivers/proto/driver.proto index 527419274..da0b75db4 100644 --- a/plugins/drivers/proto/driver.proto +++ b/plugins/drivers/proto/driver.proto @@ -331,6 +331,12 @@ message TaskConfig { // StderrPath is the path to the file to open and write task stderr to string stderr_path = 11; + + // TaskGroupName is the name of the task group which this task is a member of + string task_group_name = 12; + + // JobName is the name of the job of which this task is part of + string job_name = 13; } message Resources { diff --git a/plugins/drivers/utils.go b/plugins/drivers/utils.go index 4caf3da7f..9e213243a 100644 --- a/plugins/drivers/utils.go +++ b/plugins/drivers/utils.go @@ -51,6 +51,8 @@ func taskConfigFromProto(pb *proto.TaskConfig) *TaskConfig { } return &TaskConfig{ ID: pb.Id, + JobName: pb.JobName, + TaskGroupName: pb.TaskGroupName, Name: pb.Name, Env: pb.Env, rawDriverConfig: pb.MsgpackDriverConfig, @@ -70,6 +72,8 @@ func taskConfigToProto(cfg *TaskConfig) *proto.TaskConfig { } pb := &proto.TaskConfig{ Id: cfg.ID, + JobName: cfg.JobName, + TaskGroupName: cfg.TaskGroupName, Name: cfg.Name, Env: cfg.Env, Resources: resourcesToProto(cfg.Resources), diff --git a/plugins/shared/catalog/register.go b/plugins/shared/catalog/register.go index f1e2d253e..bd238e88f 100644 --- a/plugins/shared/catalog/register.go +++ b/plugins/shared/catalog/register.go @@ -1,6 +1,7 @@ package catalog import ( + "github.com/hashicorp/nomad/drivers/docker" "github.com/hashicorp/nomad/drivers/exec" "github.com/hashicorp/nomad/drivers/java" "github.com/hashicorp/nomad/drivers/qemu" @@ -15,4 +16,5 @@ func init() { Register(exec.PluginID, exec.PluginConfig) Register(qemu.PluginID, qemu.PluginConfig) Register(java.PluginID, java.PluginConfig) + RegisterDeferredConfig(docker.PluginID, docker.PluginConfig, docker.PluginLoader) }