docker: periodically reconcile containers

When running at scale, it's possible that Docker Engine starts
containers successfully but gets wedged in a way where API call fails.
The Docker Engine may remain unavailable for arbitrary long time.

Here, we introduce a periodic reconcilation process that ensures that any
container started by nomad is tracked, and killed if is running
unexpectedly.

Basically, the periodic job inspects any container that isn't tracked in
its handlers.  A creation grace period is used to prevent killing newly
created containers that aren't registered yet.

Also, we aim to avoid killing unrelated containters started by host or
through raw_exec drivers.  The logic is to pattern against containers
environment variables and mounts to infer if they are an alloc docker
container.

Lastly, the periodic job can be disabled to avoid any interference if
need be.
This commit is contained in:
Mahmood Ali
2019-09-13 11:24:58 -04:00
parent 0078b79199
commit 911d17e3ee
4 changed files with 479 additions and 0 deletions

View File

@@ -134,6 +134,21 @@ var (
Name: pluginName,
}
danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewDefault(
hclspec.NewAttr("enabled", "bool", false),
hclspec.NewLiteral(`true`),
),
"period": hclspec.NewDefault(
hclspec.NewAttr("period", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"creation_timeout": hclspec.NewDefault(
hclspec.NewAttr("creation_timeout", "string", false),
hclspec.NewLiteral(`"5m"`),
),
})
// configSpec is the hcl specification returned by the ConfigSchema RPC
// and is used to parse the contents of the 'plugin "docker" {...}' block.
// Example:
@@ -195,6 +210,10 @@ var (
hclspec.NewAttr("container", "bool", false),
hclspec.NewLiteral("true"),
),
"dangling_containers": hclspec.NewDefault(
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
hclspec.NewLiteral("{}"),
),
})), hclspec.NewLiteral(`{
image = true
container = true
@@ -491,6 +510,16 @@ type DockerVolumeDriverConfig struct {
Options hclutils.MapStrStr `codec:"options"`
}
type ContainerGCConfig struct {
Enabled bool `codec:"enabled"`
PeriodStr string `codec:"period"`
period time.Duration `codec:"-"`
CreationTimeoutStr string `codec:"creation_timeout"`
creationTimeout time.Duration `codec:"-"`
}
type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
@@ -519,6 +548,8 @@ type GCConfig struct {
ImageDelay string `codec:"image_delay"`
imageDelayDuration time.Duration `codec:"-"`
Container bool `codec:"container"`
DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
}
type VolumeConfig struct {
@@ -551,6 +582,22 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.config.GC.imageDelayDuration = dur
}
if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
if err != nil {
return fmt.Errorf("failed to parse 'period' duration: %v", err)
}
d.config.GC.DanglingContainers.period = dur
}
if len(d.config.GC.DanglingContainers.CreationTimeoutStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationTimeoutStr)
if err != nil {
return fmt.Errorf("failed to parse 'container_delay' duration: %v", err)
}
d.config.GC.DanglingContainers.creationTimeout = dur
}
if c.AgentConfig != nil {
d.clientConfig = c.AgentConfig.Driver
}
@@ -568,6 +615,8 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.coordinator = newDockerCoordinator(coordinatorConfig)
go d.removeDanglingContainersGoroutine()
return nil
}

View File

@@ -0,0 +1,164 @@
package docker
import (
"context"
"fmt"
"regexp"
"strings"
"time"
docker "github.com/fsouza/go-dockerclient"
)
func (d *Driver) removeDanglingContainersGoroutine() {
if !d.config.GC.DanglingContainers.Enabled {
d.logger.Debug("skipping dangling containers handling; is disabled")
return
}
period := d.config.GC.DanglingContainers.period
succeeded := true
timer := time.NewTimer(period)
for {
select {
case <-timer.C:
if d.previouslyDetected() && d.fingerprintSuccessful() {
err := d.removeDanglingContainersIteration()
if err != nil && succeeded {
d.logger.Warn("failed to remove dangling containers", "error", err)
}
succeeded = (err == nil)
}
timer.Reset(period)
case <-d.ctx.Done():
return
}
}
}
func (d *Driver) removeDanglingContainersIteration() error {
tracked := d.trackedContainers()
untracked, err := d.untrackedContainers(tracked, d.config.GC.DanglingContainers.creationTimeout)
if err != nil {
return fmt.Errorf("failed to find untracked containers: %v", err)
}
for _, id := range untracked {
d.logger.Info("removing untracked container", "container_id", id)
err := client.RemoveContainer(docker.RemoveContainerOptions{
ID: id,
Force: true,
})
if err != nil {
d.logger.Warn("failed to remove untracked container", "container_id", id, "error", err)
}
}
return nil
}
// untrackedContainers returns the ids of containers that look
func (d *Driver) untrackedContainers(tracked map[string]bool, creationTimeout time.Duration) ([]string, error) {
result := []string{}
cc, err := client.ListContainers(docker.ListContainersOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list containers: %v", err)
}
cutoff := time.Now().Add(-creationTimeout).Unix()
for _, c := range cc {
if tracked[c.ID] {
continue
}
if c.Created > cutoff {
continue
}
if !d.isNomadContainer(c) {
continue
}
result = append(result, c.ID)
}
return result, nil
}
func (d *Driver) isNomadContainer(c docker.APIContainers) bool {
if _, ok := c.Labels["com.hashicorp.nomad.alloc_id"]; ok {
return true
}
// pre-0.10 containers aren't tagged or labeled in any way,
// so use cheap heauristic based on mount paths
// before inspecting container details
if !hasMount(c, "/alloc") ||
!hasMount(c, "/local") ||
!hasMount(c, "/secrets") ||
!hasNomadName(c) {
return false
}
// double check before killing process
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
ci, err := client.InspectContainerWithContext(c.ID, ctx)
if err != nil {
return false
}
env := ci.Config.Env
return hasEnvVar(env, "NOMAD_ALLOC_ID") &&
hasEnvVar(env, "NOMAD_GROUP_NAME")
}
func hasMount(c docker.APIContainers, p string) bool {
for _, m := range c.Mounts {
if m.Destination == p {
return true
}
}
return false
}
var nomadContainerNamePattern = regexp.MustCompile(`\/.*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)
func hasNomadName(c docker.APIContainers) bool {
for _, n := range c.Names {
if nomadContainerNamePattern.MatchString(n) {
return true
}
}
return false
}
func hasEnvVar(vars []string, key string) bool {
for _, v := range vars {
if strings.HasPrefix(v, key+"=") {
return true
}
}
return false
}
func (d *Driver) trackedContainers() map[string]bool {
d.tasks.lock.RLock()
defer d.tasks.lock.RUnlock()
r := make(map[string]bool, len(d.tasks.store))
for _, h := range d.tasks.store {
r[h.containerID] = true
}
return r
}

View File

@@ -0,0 +1,150 @@
package docker
import (
"encoding/json"
"os"
"testing"
"time"
docker "github.com/fsouza/go-dockerclient"
"github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/uuid"
tu "github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)
var sampleContainerList []docker.APIContainers
var sampleNomadContainerListItem docker.APIContainers
var sampleNonNomadContainerListItem docker.APIContainers
func init() {
path := "./test-resources/docker/reconciler_containers_list.json"
f, err := os.Open(path)
if err != nil {
return
}
err = json.NewDecoder(f).Decode(&sampleContainerList)
if err != nil {
return
}
sampleNomadContainerListItem = sampleContainerList[0]
sampleNonNomadContainerListItem = sampleContainerList[1]
}
func Test_HasMount(t *testing.T) {
require.True(t, hasMount(sampleNomadContainerListItem, "/alloc"))
require.True(t, hasMount(sampleNomadContainerListItem, "/data"))
require.True(t, hasMount(sampleNomadContainerListItem, "/secrets"))
require.False(t, hasMount(sampleNomadContainerListItem, "/random"))
require.False(t, hasMount(sampleNonNomadContainerListItem, "/alloc"))
require.False(t, hasMount(sampleNonNomadContainerListItem, "/data"))
require.False(t, hasMount(sampleNonNomadContainerListItem, "/secrets"))
require.False(t, hasMount(sampleNonNomadContainerListItem, "/random"))
}
func Test_HasNomadName(t *testing.T) {
require.True(t, hasNomadName(sampleNomadContainerListItem))
require.False(t, hasNomadName(sampleNonNomadContainerListItem))
}
func TestHasEnv(t *testing.T) {
envvars := []string{
"NOMAD_ALLOC_DIR=/alloc",
"NOMAD_ALLOC_ID=72bfa388-024e-a903-45b8-2bc28b74ed69",
"NOMAD_ALLOC_INDEX=0",
"NOMAD_ALLOC_NAME=example.cache[0]",
"NOMAD_CPU_LIMIT=500",
"NOMAD_DC=dc1",
"NOMAD_GROUP_NAME=cache",
"NOMAD_JOB_NAME=example",
"NOMAD_MEMORY_LIMIT=256",
"NOMAD_NAMESPACE=default",
"NOMAD_REGION=global",
"NOMAD_SECRETS_DIR=/secrets",
"NOMAD_TASK_DIR=/local",
"NOMAD_TASK_NAME=redis",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"GOSU_VERSION=1.10",
"REDIS_VERSION=3.2.12",
"REDIS_DOWNLOAD_URL=http://download.redis.io/releases/redis-3.2.12.tar.gz",
"REDIS_DOWNLOAD_SHA=98c4254ae1be4e452aa7884245471501c9aa657993e0318d88f048093e7f88fd",
}
require.True(t, hasEnvVar(envvars, "NOMAD_ALLOC_ID"))
require.True(t, hasEnvVar(envvars, "NOMAD_ALLOC_DIR"))
require.True(t, hasEnvVar(envvars, "GOSU_VERSION"))
require.False(t, hasEnvVar(envvars, "NOMAD_ALLOC_"))
require.False(t, hasEnvVar(envvars, "OTHER_VARIABLE"))
}
func TestDanglingContainerRemoval(t *testing.T) {
if !tu.IsCI() {
t.Parallel()
}
testutil.DockerCompatible(t)
task, cfg, _ := dockerTask(t)
require.NoError(t, task.EncodeConcreteDriverConfig(cfg))
client, d, handle, cleanup := dockerSetup(t, task)
defer cleanup()
require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second))
c, err := client.CreateContainer(docker.CreateContainerOptions{
Name: "mytest-image-" + uuid.Generate(),
Config: &docker.Config{
Image: cfg.Image,
Cmd: append([]string{cfg.Command}, cfg.Args...),
},
})
require.NoError(t, err)
defer client.RemoveContainer(docker.RemoveContainerOptions{
ID: c.ID,
Force: true,
})
err = client.StartContainer(c.ID, nil)
require.NoError(t, err)
time.Sleep(1 * time.Second)
dd := d.Impl().(*Driver)
trackedContainers := map[string]bool{handle.containerID: true}
{
tf := dd.trackedContainers()
require.Contains(t, tf, handle.containerID)
require.NotContains(t, tf, c.ID)
}
untracked, err := dd.untrackedContainers(trackedContainers, 1*time.Minute)
require.NoError(t, err)
require.NotContains(t, untracked, handle.containerID)
require.NotContains(t, untracked, c.ID)
untracked, err = dd.untrackedContainers(map[string]bool{}, 0)
require.NoError(t, err)
require.Contains(t, untracked, handle.containerID)
require.NotContains(t, untracked, c.ID)
// Actually try to kill hosts
prestineDriver := dockerDriverHarness(t, nil).Impl().(*Driver)
prestineDriver.config.GC.DanglingContainers = ContainerGCConfig{
Enabled: true,
period: 1 * time.Second,
creationTimeout: 1 * time.Second,
}
require.NoError(t, prestineDriver.removeDanglingContainersIteration())
_, err = client.InspectContainer(c.ID)
require.NoError(t, err)
_, err = client.InspectContainer(handle.containerID)
require.Error(t, err)
require.Contains(t, err.Error(), NoSuchContainerError)
}

View File

@@ -0,0 +1,116 @@
[
{
"Id": "eb23be71498c2dc0254c029f32b360a000caf33157d1c93e226f4c1a4c9d2218",
"Names": [
"/redis-72bfa388-024e-a903-45b8-2bc28b74ed69"
],
"Image": "redis:3.2",
"ImageID": "sha256:87856cc39862cec77541d68382e4867d7ccb29a85a17221446c857ddaebca916",
"Command": "docker-entrypoint.sh redis-server",
"Created": 1568383081,
"Ports": [
{
"PrivatePort": 6379,
"Type": "tcp"
}
],
"Labels": {},
"State": "running",
"Status": "Up 9 seconds",
"HostConfig": {
"NetworkMode": "default"
},
"NetworkSettings": {
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
"EndpointID": "ed830b4f2f33ab4134aea941611b00b9e576b35a4325d52bacfedd1e2e1ba213",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.3",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:03",
"DriverOpts": null
}
}
},
"Mounts": [
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/alloc",
"Destination": "/alloc",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "volume",
"Name": "d5d7f0f9a3326414257c57cfca01db96c53a424b43e251516511694554309681",
"Source": "",
"Destination": "/data",
"Driver": "local",
"Mode": "",
"RW": true,
"Propagation": ""
},
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/local",
"Destination": "/local",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "bind",
"Source": "/private/var/folders/r6/346cfqyn76b_lx1nrcl5278c0000gp/T/NomadClient831122597/72bfa388-024e-a903-45b8-2bc28b74ed69/redis/secrets",
"Destination": "/secrets",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
}
]
},
{
"Id": "99c49fbe999f6df7b7d6a891d69fe57d7b771a30d5d2899a922b44698084e5c9",
"Names": [
"/serene_keller"
],
"Image": "ubuntu:16.04",
"ImageID": "sha256:9361ce633ff193349d54bed380a5afe86043b09fd6ea8da7549dbbedfc2a7077",
"Command": "/bin/bash",
"Created": 1567795217,
"Ports": [],
"Labels": {},
"State": "running",
"Status": "Up 6 days",
"HostConfig": {
"NetworkMode": "default"
},
"NetworkSettings": {
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "6715ed501c1cef14545cd6680f54b4971373ee4441aec2300fff1031c8dbf3a4",
"EndpointID": "fab83a0d4089ca9944ca53c882bdf40ad310c6fda30dda0092731feb9bc9fab6",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:02",
"DriverOpts": null
}
}
},
"Mounts": []
}
]