Merge pull request #6325 from hashicorp/b-docker-reconcile-periodically

docker: periodically reconcile containers
This commit is contained in:
Mahmood Ali
2019-10-18 15:53:30 -04:00
committed by GitHub
7 changed files with 673 additions and 2 deletions

View File

@@ -134,6 +134,25 @@ var (
Name: pluginName,
}
danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewDefault(
hclspec.NewAttr("enabled", "bool", false),
hclspec.NewLiteral(`true`),
),
"period": hclspec.NewDefault(
hclspec.NewAttr("period", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"creation_grace": hclspec.NewDefault(
hclspec.NewAttr("creation_grace", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"dry_run": hclspec.NewDefault(
hclspec.NewAttr("dry_run", "bool", false),
hclspec.NewLiteral(`false`),
),
})
// configSpec is the hcl specification returned by the ConfigSchema RPC
// and is used to parse the contents of the 'plugin "docker" {...}' block.
// Example:
@@ -195,6 +214,10 @@ var (
hclspec.NewAttr("container", "bool", false),
hclspec.NewLiteral("true"),
),
"dangling_containers": hclspec.NewDefault(
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
hclspec.NewLiteral("{}"),
),
})), hclspec.NewLiteral(`{
image = true
container = true
@@ -491,6 +514,28 @@ type DockerVolumeDriverConfig struct {
Options hclutils.MapStrStr `codec:"options"`
}
// ContainerGCConfig controls the behavior of the GC reconciler to detects
// dangling nomad containers that aren't tracked due to docker/nomad bugs
type ContainerGCConfig struct {
// Enabled controls whether container reconciler is enabled
Enabled bool `codec:"enabled"`
// DryRun indicates that reconciler should log unexpectedly running containers
// if found without actually killing them
DryRun bool `codec:"dry_run"`
// PeriodStr controls the frequency of scanning containers
PeriodStr string `codec:"period"`
period time.Duration `codec:"-"`
// CreationGraceStr is the duration allowed for a newly created container
// to live without being registered as a running task in nomad.
// A container is treated as leaked if it lived more than grace duration
// and haven't been registered in tasks.
CreationGraceStr string `codec:"creation_grace"`
CreationGrace time.Duration `codec:"-"`
}
type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
@@ -519,6 +564,8 @@ type GCConfig struct {
ImageDelay string `codec:"image_delay"`
imageDelayDuration time.Duration `codec:"-"`
Container bool `codec:"container"`
DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
}
type VolumeConfig struct {
@@ -534,6 +581,8 @@ func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
return configSpec, nil
}
const danglingContainersCreationGraceMinimum = 1 * time.Minute
func (d *Driver) SetConfig(c *base.Config) error {
var config DriverConfig
if len(c.PluginConfig) != 0 {
@@ -551,6 +600,25 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.config.GC.imageDelayDuration = dur
}
if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
if err != nil {
return fmt.Errorf("failed to parse 'period' duration: %v", err)
}
d.config.GC.DanglingContainers.period = dur
}
if len(d.config.GC.DanglingContainers.CreationGraceStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationGraceStr)
if err != nil {
return fmt.Errorf("failed to parse 'creation_grace' duration: %v", err)
}
if dur < danglingContainersCreationGraceMinimum {
return fmt.Errorf("creation_grace is less than minimum, %v", danglingContainersCreationGraceMinimum)
}
d.config.GC.DanglingContainers.CreationGrace = dur
}
if c.AgentConfig != nil {
d.clientConfig = c.AgentConfig.Driver
}
@@ -568,6 +636,8 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.coordinator = newDockerCoordinator(coordinatorConfig)
d.reconciler = newReconciler(d)
return nil
}

View File

@@ -66,6 +66,10 @@ var (
nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
)
const (
dockerLabelAllocID = "com.hashicorp.nomad.alloc_id"
)
type Driver struct {
// eventer is used to handle multiplexing of TaskEvents calls such that an
// event can be broadcast to all callers
@@ -108,6 +112,8 @@ type Driver struct {
// for use during fingerprinting.
detected bool
detectedLock sync.RWMutex
reconciler *containerReconciler
}
// NewDockerDriver returns a docker implementation of a driver plugin
@@ -309,6 +315,10 @@ CREATE:
// the container is started
runningContainer, err := client.InspectContainer(container.ID)
if err != nil {
client.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
Force: true,
})
msg := "failed to inspect started container"
d.logger.Error(msg, "error", err)
client.RemoveContainer(docker.RemoveContainerOptions{
@@ -977,9 +987,16 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
if len(driverConfig.Labels) > 0 {
config.Labels = driverConfig.Labels
logger.Debug("applied labels on the container", "labels", config.Labels)
}
labels := make(map[string]string, len(driverConfig.Labels)+1)
for k, v := range driverConfig.Labels {
labels[k] = v
}
labels[dockerLabelAllocID] = task.AllocID
config.Labels = labels
logger.Debug("applied labels on the container", "labels", config.Labels)
config.Env = task.EnvList()
containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)

View File

@@ -905,7 +905,8 @@ func TestDockerDriver_Labels(t *testing.T) {
t.Fatalf("err: %v", err)
}
require.Equal(t, 2, len(container.Config.Labels))
// expect to see 1 additional standard labels
require.Equal(t, len(cfg.Labels)+1, len(container.Config.Labels))
for k, v := range cfg.Labels {
require.Equal(t, v, container.Config.Labels[k])
}
@@ -1008,6 +1009,39 @@ func TestDockerDriver_CreateContainerConfig(t *testing.T) {
require.Equal(t, containerName, c.Name)
}
func TestDockerDriver_CreateContainerConfig_Labels(t *testing.T) {
t.Parallel()
task, cfg, _ := dockerTask(t)
task.AllocID = uuid.Generate()
task.JobName = "redis-demo-job"
cfg.Labels = map[string]string{
"user_label": "user_value",
// com.hashicorp.nomad. labels are reserved and
// cannot be overridden
"com.hashicorp.nomad.alloc_id": "bad_value",
}
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
dh := dockerDriverHarness(t, nil)
driver := dh.Impl().(*Driver)
c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)
expectedLabels := map[string]string{
// user provided labels
"user_label": "user_value",
// default labels
"com.hashicorp.nomad.alloc_id": task.AllocID,
}
require.Equal(t, expectedLabels, c.Config.Labels)
}
func TestDockerDriver_CreateContainerConfig_Logging(t *testing.T) {
t.Parallel()

View File

@@ -13,6 +13,10 @@ import (
)
func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
// start reconciler when we start fingerprinting
// this is the only method called when driver is launched properly
d.reconciler.Start()
ch := make(chan *drivers.Fingerprint)
go d.handleFingerprint(ctx, ch)
return ch, nil

View File

@@ -0,0 +1,228 @@
package docker
import (
"context"
"fmt"
"regexp"
"sync"
"time"
docker "github.com/fsouza/go-dockerclient"
hclog "github.com/hashicorp/go-hclog"
)
// containerReconciler detects and kills unexpectedly running containers.
//
// Due to Docker architecture and network based communication, it is
// possible for Docker to start a container successfully, but have the
// creation API call fail with a network error. containerReconciler
// scans for these untracked containers and kill them.
type containerReconciler struct {
ctx context.Context
config *ContainerGCConfig
client *docker.Client
logger hclog.Logger
isDriverHealthy func() bool
trackedContainers func() map[string]bool
isNomadContainer func(c docker.APIContainers) bool
once sync.Once
}
func newReconciler(d *Driver) *containerReconciler {
return &containerReconciler{
ctx: d.ctx,
config: &d.config.GC.DanglingContainers,
client: client,
logger: d.logger,
isDriverHealthy: func() bool { return d.previouslyDetected() && d.fingerprintSuccessful() },
trackedContainers: d.trackedContainers,
isNomadContainer: isNomadContainer,
}
}
func (r *containerReconciler) Start() {
if !r.config.Enabled {
r.logger.Debug("skipping dangling containers handling; is disabled")
return
}
r.once.Do(func() {
go r.removeDanglingContainersGoroutine()
})
}
func (r *containerReconciler) removeDanglingContainersGoroutine() {
period := r.config.period
lastIterSucceeded := true
// ensure that we wait for at least a period or creation timeout
// for first container GC iteration
// The initial period is a grace period for restore allocation
// before a driver may kill containers launched by an earlier nomad
// process.
initialDelay := period
if r.config.CreationGrace > initialDelay {
initialDelay = r.config.CreationGrace
}
timer := time.NewTimer(initialDelay)
for {
select {
case <-timer.C:
if r.isDriverHealthy() {
err := r.removeDanglingContainersIteration()
if err != nil && lastIterSucceeded {
r.logger.Warn("failed to remove dangling containers", "error", err)
}
lastIterSucceeded = (err == nil)
}
timer.Reset(period)
case <-r.ctx.Done():
return
}
}
}
func (r *containerReconciler) removeDanglingContainersIteration() error {
cutoff := time.Now().Add(-r.config.CreationGrace)
tracked := r.trackedContainers()
untracked, err := r.untrackedContainers(tracked, cutoff)
if err != nil {
return fmt.Errorf("failed to find untracked containers: %v", err)
}
if len(untracked) == 0 {
return nil
}
if r.config.DryRun {
r.logger.Info("detected untracked containers", "container_ids", untracked)
return nil
}
for _, id := range untracked {
ctx, cancel := r.dockerAPIQueryContext()
err := client.RemoveContainer(docker.RemoveContainerOptions{
Context: ctx,
ID: id,
Force: true,
})
cancel()
if err != nil {
r.logger.Warn("failed to remove untracked container", "container_id", id, "error", err)
} else {
r.logger.Info("removed untracked container", "container_id", id)
}
}
return nil
}
// untrackedContainers returns the ids of containers that suspected
// to have been started by Nomad but aren't tracked by this driver
func (r *containerReconciler) untrackedContainers(tracked map[string]bool, cutoffTime time.Time) ([]string, error) {
result := []string{}
ctx, cancel := r.dockerAPIQueryContext()
defer cancel()
cc, err := client.ListContainers(docker.ListContainersOptions{
Context: ctx,
All: false, // only reconcile running containers
})
if err != nil {
return nil, fmt.Errorf("failed to list containers: %v", err)
}
cutoff := cutoffTime.Unix()
for _, c := range cc {
if tracked[c.ID] {
continue
}
if c.Created > cutoff {
continue
}
if !r.isNomadContainer(c) {
continue
}
result = append(result, c.ID)
}
return result, nil
}
// dockerAPIQueryTimeout returns a context for docker API response with an appropriate timeout
// to protect against wedged locked-up API call.
//
// We'll try hitting Docker API on subsequent iteration.
func (r *containerReconciler) dockerAPIQueryContext() (context.Context, context.CancelFunc) {
// use a reasoanble floor to avoid very small limit
timeout := 30 * time.Second
if timeout < r.config.period {
timeout = r.config.period
}
return context.WithTimeout(context.Background(), timeout)
}
func isNomadContainer(c docker.APIContainers) bool {
if _, ok := c.Labels[dockerLabelAllocID]; ok {
return true
}
// pre-0.10 containers aren't tagged or labeled in any way,
// so use cheap heuristic based on mount paths
// before inspecting container details
if !hasMount(c, "/alloc") ||
!hasMount(c, "/local") ||
!hasMount(c, "/secrets") ||
!hasNomadName(c) {
return false
}
return true
}
func hasMount(c docker.APIContainers, p string) bool {
for _, m := range c.Mounts {
if m.Destination == p {
return true
}
}
return false
}
var nomadContainerNamePattern = regexp.MustCompile(`\/.*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)
func hasNomadName(c docker.APIContainers) bool {
for _, n := range c.Names {
if nomadContainerNamePattern.MatchString(n) {
return true
}
}
return false
}
func (d *Driver) trackedContainers() map[string]bool {
d.tasks.lock.RLock()
defer d.tasks.lock.RUnlock()
r := make(map[string]bool, len(d.tasks.store))
for _, h := range d.tasks.store {
r[h.containerID] = true
}
return r
}

View File

@@ -0,0 +1,202 @@
package docker
import (
"encoding/json"
"os"
"testing"
"time"
docker "github.com/fsouza/go-dockerclient"
"github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/require"
)
func fakeContainerList(t *testing.T) (nomadContainer, nonNomadContainer docker.APIContainers) {
path := "./test-resources/docker/reconciler_containers_list.json"
f, err := os.Open(path)
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
var sampleContainerList []docker.APIContainers
err = json.NewDecoder(f).Decode(&sampleContainerList)
if err != nil {
t.Fatalf("failed to decode container list: %v", err)
}
return sampleContainerList[0], sampleContainerList[1]
}
func Test_HasMount(t *testing.T) {
nomadContainer, nonNomadContainer := fakeContainerList(t)
require.True(t, hasMount(nomadContainer, "/alloc"))
require.True(t, hasMount(nomadContainer, "/data"))
require.True(t, hasMount(nomadContainer, "/secrets"))
require.False(t, hasMount(nomadContainer, "/random"))
require.False(t, hasMount(nonNomadContainer, "/alloc"))
require.False(t, hasMount(nonNomadContainer, "/data"))
require.False(t, hasMount(nonNomadContainer, "/secrets"))
require.False(t, hasMount(nonNomadContainer, "/random"))
}
func Test_HasNomadName(t *testing.T) {
nomadContainer, nonNomadContainer := fakeContainerList(t)
require.True(t, hasNomadName(nomadContainer))
require.False(t, hasNomadName(nonNomadContainer))
}
// TestDanglingContainerRemoval asserts containers without corresponding tasks
// are removed after the creation grace period.
func TestDanglingContainerRemoval(t *testing.T) {
testutil.DockerCompatible(t)
// start two containers: one tracked nomad container, and one unrelated container
task, cfg, _ := dockerTask(t)
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
client, d, handle, cleanup := dockerSetup(t, task)
defer cleanup()
require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second))
nonNomadContainer, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: nonNomadContainer.ID,
Force: true,
})
err = client.StartContainer(nonNomadContainer.ID, nil)
require.NoError(t, err)
untrackedNomadContainer, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
Labels: map[string]string{
dockerLabelAllocID: uuid.Generate(),
},
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: untrackedNomadContainer.ID,
Force: true,
})
err = client.StartContainer(untrackedNomadContainer.ID, nil)
require.NoError(t, err)
dd := d.Impl().(*Driver)
reconciler := newReconciler(dd)
trackedContainers := map[string]bool{handle.containerID: true}
tf := reconciler.trackedContainers()
require.Contains(t, tf, handle.containerID)
require.NotContains(t, tf, untrackedNomadContainer)
require.NotContains(t, tf, nonNomadContainer.ID)
// assert tracked containers should never be untracked
untracked, err := reconciler.untrackedContainers(trackedContainers, time.Now())
require.NoError(t, err)
require.NotContains(t, untracked, handle.containerID)
require.NotContains(t, untracked, nonNomadContainer.ID)
require.Contains(t, untracked, untrackedNomadContainer.ID)
// assert we recognize nomad containers with appropriate cutoff
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now())
require.NoError(t, err)
require.Contains(t, untracked, handle.containerID)
require.Contains(t, untracked, untrackedNomadContainer.ID)
require.NotContains(t, untracked, nonNomadContainer.ID)
// but ignore if creation happened before cutoff
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now().Add(-1*time.Minute))
require.NoError(t, err)
require.NotContains(t, untracked, handle.containerID)
require.NotContains(t, untracked, untrackedNomadContainer.ID)
require.NotContains(t, untracked, nonNomadContainer.ID)
// a full integration tests to assert that containers are removed
prestineDriver := dockerDriverHarness(t, nil).Impl().(*Driver)
prestineDriver.config.GC.DanglingContainers = ContainerGCConfig{
Enabled: true,
period: 1 * time.Second,
CreationGrace: 0 * time.Second,
}
nReconciler := newReconciler(prestineDriver)
require.NoError(t, nReconciler.removeDanglingContainersIteration())
_, err = client.InspectContainer(nonNomadContainer.ID)
require.NoError(t, err)
_, err = client.InspectContainer(handle.containerID)
require.Error(t, err)
require.Contains(t, err.Error(), NoSuchContainerError)
_, err = client.InspectContainer(untrackedNomadContainer.ID)
require.Error(t, err)
require.Contains(t, err.Error(), NoSuchContainerError)
}
// TestDanglingContainerRemoval_Stopped asserts stopped containers without
// corresponding tasks are not removed even if after creation grace period.
func TestDanglingContainerRemoval_Stopped(t *testing.T) {
testutil.DockerCompatible(t)
_, cfg, _ := dockerTask(t)
client := newTestDockerClient(t)
container, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
Labels: map[string]string{
dockerLabelAllocID: uuid.Generate(),
},
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
Force: true,
})
err = client.StartContainer(container.ID, nil)
require.NoError(t, err)
err = client.StopContainer(container.ID, 60)
require.NoError(t, err)
dd := dockerDriverHarness(t, nil).Impl().(*Driver)
reconciler := newReconciler(dd)
// assert nomad container is tracked, and we ignore stopped one
tf := reconciler.trackedContainers()
require.NotContains(t, tf, container.ID)
untracked, err := reconciler.untrackedContainers(map[string]bool{}, time.Now())
require.NoError(t, err)
require.NotContains(t, untracked, container.ID)
// if we start container again, it'll be marked as untracked
require.NoError(t, client.StartContainer(container.ID, nil))
untracked, err = reconciler.untrackedContainers(map[string]bool{}, time.Now())
require.NoError(t, err)
require.Contains(t, untracked, container.ID)
}

View File

@@ -0,0 +1,116 @@
[
{
"Id": "eb23be71498c2dc0254c029f32b360a000caf33157d1c93e226f4c1a4c9d2218",
"Names": [
"/redis-72bfa388-024e-a903-45b8-2bc28b74ed69"
],
"Image": "redis:3.2",
"ImageID": "sha256:87856cc39862cec77541d68382e4867d7ccb29a85a17221446c857ddaebca916",
"Command": "docker-entrypoint.sh redis-server",
"Created": 1568383081,
"Ports": [
{
"PrivatePort": 6379,
"Type": "tcp"
}
],
"Labels": {},
"State": "running",
"Status": "Up 9 seconds",
"HostConfig": {
"NetworkMode": "default"
},
"NetworkSettings": {
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
"EndpointID": "ed830b4f2f33ab4134aea941611b00b9e576b35a4325d52bacfedd1e2e1ba213",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.3",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:03",
"DriverOpts": null
}
}
},
"Mounts": [
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/alloc",
"Destination": "/alloc",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "volume",
"Name": "d5d7f0f9a3326414257c57cfca01db96c53a424b43e251516511694554309681",
"Source": "",
"Destination": "/data",
"Driver": "local",
"Mode": "",
"RW": true,
"Propagation": ""
},
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/local",
"Destination": "/local",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/secrets",
"Destination": "/secrets",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
}
]
},
{
"Id": "99c49fbe999f6df7b7d6a891d69fe57d7b771a30d5d2899a922b44698084e5c9",
"Names": [
"/serene_keller"
],
"Image": "ubuntu:16.04",
"ImageID": "sha256:9361ce633ff193349d54bed380a5afe86043b09fd6ea8da7549dbbedfc2a7077",
"Command": "/bin/bash",
"Created": 1567795217,
"Ports": [],
"Labels": {},
"State": "running",
"Status": "Up 6 days",
"HostConfig": {
"NetworkMode": "default"
},
"NetworkSettings": {
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
"EndpointID": "fab83a0d4089ca9944ca53c882bdf40ad310c6fda30dda0092731feb9bc9fab6",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:02",
"DriverOpts": null
}
}
},
"Mounts": []
}
]