cni: use check command when restoring from restart (#24658)

When the Nomad client restarts and restores allocations, the network namespace
for an allocation may exist but no longer be correctly configured. For example,
if the host is rebooted and the task was a Docker task using a pause container,
the network namespace may be recreated by the docker daemon.

When we restore an allocation, use the CNI "check" command to verify that any
existing network namespace matches the expected configuration. This requires CNI
plugins of at least version 1.2.0 to avoid a bug in older plugin versions that
would cause the check to fail.

If the check fails, destroy the network namespace and try to recreate it from
scratch once. If that fails in the second pass, fail the restore so that the
allocation can be recreated (rather than silently having networking fail).

This should fix the gap left #24650 for Docker task drivers and any other
drivers with the `MustInitiateNetwork` capability.

Fixes: https://github.com/hashicorp/nomad/issues/24292
Ref: https://github.com/hashicorp/nomad/pull/24650
This commit is contained in:
Tim Gross
2025-01-07 09:38:39 -05:00
committed by GitHub
parent 0906f788f0
commit 08a6f870ad
10 changed files with 247 additions and 15 deletions

3
.changelog/24658.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
networking: check network namespaces on Linux during client restarts and fail the allocation if an existing namespace is invalid
```

View File

@@ -5,6 +5,7 @@ package allocrunner
import (
"context"
"errors"
"fmt"
hclog "github.com/hashicorp/go-hclog"
@@ -28,6 +29,8 @@ const (
dockerNetSpecHostnameKey = "docker_sandbox_hostname"
)
var ErrCNICheckFailed = errors.New("network namespace already exists but was misconfigured")
type networkIsolationSetter interface {
SetNetworkIsolation(*drivers.NetworkIsolationSpec)
}
@@ -132,6 +135,9 @@ func (h *networkHook) Prerun() error {
Hostname: interpolatedNetworks[0].Hostname,
}
var checkedOnce bool
CREATE:
spec, created, err := h.manager.CreateNetwork(h.alloc.ID, &networkCreateReq)
if err != nil {
return fmt.Errorf("failed to create network for alloc: %v", err)
@@ -142,11 +148,26 @@ func (h *networkHook) Prerun() error {
h.isolationSetter.SetNetworkIsolation(spec)
}
if created {
status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec)
if spec != nil {
status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec, created)
if err != nil {
// if the netns already existed but is invalid, we get
// ErrCNICheckFailed. We'll try to recover from this one time by
// recreating the netns from scratch before giving up
if errors.Is(err, ErrCNICheckFailed) && !checkedOnce {
checkedOnce = true
destroyErr := h.manager.DestroyNetwork(h.alloc.ID, spec)
if destroyErr != nil {
return fmt.Errorf("%w: destroying network to retry failed: %v", err, destroyErr)
}
goto CREATE
}
return fmt.Errorf("failed to configure networking for alloc: %v", err)
}
if status == nil {
return nil // netns already existed and was correctly configured
}
// If the driver set the sandbox hostname label, then we will use that
// to set the HostsConfig.Hostname. Otherwise, identify the sandbox

View File

@@ -4,6 +4,7 @@
package allocrunner
import (
"fmt"
"testing"
"github.com/hashicorp/nomad/ci"
@@ -14,6 +15,7 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/testutils"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
)
@@ -65,7 +67,7 @@ func TestNetworkHook_Prerun_Postrun_group(t *testing.T) {
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
test.Eq(t, alloc.ID, allocID)
return spec, false, nil
return spec, true, nil
},
DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
@@ -137,3 +139,145 @@ func TestNetworkHook_Prerun_Postrun_host(t *testing.T) {
must.NoError(t, hook.Postrun())
must.True(t, destroyCalled)
}
// TestNetworkHook_Prerun_Postrun_ExistingNetNS tests that the prerun and
// postrun hooks call the Setup and Destroy with the expected behaviors when the
// network namespace already exists (typical of agent restarts and host reboots)
func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{
{Mode: "bridge"},
}
spec := &drivers.NetworkIsolationSpec{
Mode: drivers.NetIsolationModeGroup,
Path: "test",
Labels: map[string]string{"abc": "123"},
}
isolationSetter := &mockNetworkIsolationSetter{t: t, expectedSpec: spec}
statusSetter := &mockNetworkStatusSetter{t: t, expectedStatus: nil}
callCounts := testutil.NewCallCounter()
nm := &testutils.MockDriver{
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
test.Eq(t, alloc.ID, allocID)
callCounts.Inc("CreateNetwork")
return spec, false, nil
},
DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
test.Eq(t, alloc.ID, allocID)
test.Eq(t, spec, netSpec)
callCounts.Inc("DestroyNetwork")
return nil
},
},
}
fakePlugin := newMockCNIPlugin()
configurator := &cniNetworkConfigurator{
nodeAttrs: map[string]string{
"plugins.cni.version.bridge": "1.6.1",
},
nodeMeta: map[string]string{},
logger: testlog.HCLogger(t),
cni: fakePlugin,
nsOpts: &nsOpts{},
}
envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region)
testCases := []struct {
name string
cniVersion string
checkErrs []error
setupErrs []string
expectPrerunCreateNetworkCalls int
expectPrerunDestroyNetworkCalls int
expectCheckCalls int
expectSetupCalls int
expectPostrunDestroyNetworkCalls int
expectPrerunError string
}{
{
name: "good check",
cniVersion: "1.6.1",
expectPrerunCreateNetworkCalls: 1,
expectPrerunDestroyNetworkCalls: 0,
expectCheckCalls: 1,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 1,
},
{
name: "initial check fails",
cniVersion: "1.6.1",
checkErrs: []error{fmt.Errorf("whatever")},
expectPrerunCreateNetworkCalls: 2,
expectPrerunDestroyNetworkCalls: 1,
expectCheckCalls: 2,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 2,
},
{
name: "check fails twice",
cniVersion: "1.6.1",
checkErrs: []error{
fmt.Errorf("whatever"),
fmt.Errorf("whatever"),
},
expectPrerunCreateNetworkCalls: 2,
expectPrerunDestroyNetworkCalls: 1,
expectCheckCalls: 2,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 2,
expectPrerunError: "failed to configure networking for alloc: network namespace already exists but was misconfigured: whatever",
},
{
name: "old CNI version skips check",
cniVersion: "1.2.0",
expectPrerunCreateNetworkCalls: 1,
expectPrerunDestroyNetworkCalls: 0,
expectCheckCalls: 0,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 1,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
callCounts.Reset()
fakePlugin.counter.Reset()
fakePlugin.checkErrors = tc.checkErrs
configurator.nodeAttrs["plugins.cni.version.bridge"] = tc.cniVersion
hook := newNetworkHook(testlog.HCLogger(t), isolationSetter,
alloc, nm, configurator, statusSetter, envBuilder.Build())
err := hook.Prerun()
if tc.expectPrerunError == "" {
must.NoError(t, err)
} else {
must.EqError(t, err, tc.expectPrerunError)
}
test.Eq(t, tc.expectPrerunDestroyNetworkCalls,
callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after prerun"))
test.Eq(t, tc.expectPrerunCreateNetworkCalls,
callCounts.Get()["CreateNetwork"], test.Sprint("CreateNetwork calls after prerun"))
test.Eq(t, tc.expectCheckCalls, fakePlugin.counter.Get()["Check"], test.Sprint("Check calls"))
test.Eq(t, tc.expectSetupCalls, fakePlugin.counter.Get()["Setup"], test.Sprint("Setup calls"))
must.NoError(t, hook.Postrun())
test.Eq(t, tc.expectPostrunDestroyNetworkCalls,
callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after postrun"))
})
}
}

View File

@@ -14,7 +14,7 @@ import (
// NetworkConfigurator sets up and tears down the interfaces, routes, firewall
// rules, etc for the configured networking mode of the allocation.
type NetworkConfigurator interface {
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error)
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error)
Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error
}
@@ -23,7 +23,7 @@ type NetworkConfigurator interface {
// require further configuration
type hostNetworkConfigurator struct{}
func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) {
return nil, nil
}
func (h *hostNetworkConfigurator) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error {
@@ -41,10 +41,10 @@ type synchronizedNetworkConfigurator struct {
nc NetworkConfigurator
}
func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {
networkingGlobalMutex.Lock()
defer networkingGlobalMutex.Unlock()
return s.nc.Setup(ctx, allocation, spec)
return s.nc.Setup(ctx, allocation, spec, created)
}
func (s *synchronizedNetworkConfigurator) Teardown(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) error {

View File

@@ -120,12 +120,12 @@ func (b *bridgeNetworkConfigurator) ensureForwardingRules() error {
}
// Setup calls the CNI plugins with the add action
func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {
if err := b.ensureForwardingRules(); err != nil {
return nil, fmt.Errorf("failed to initialize table forwarding rules: %v", err)
}
return b.cni.Setup(ctx, alloc, spec)
return b.cni.Setup(ctx, alloc, spec, created)
}
// Teardown calls the CNI plugins with the delete action

View File

@@ -28,6 +28,7 @@ import (
consulIPTables "github.com/hashicorp/consul/sdk/iptables"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-set/v3"
"github.com/hashicorp/go-version"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/envoy"
@@ -138,8 +139,19 @@ func addNomadWorkloadCNIArgs(logger log.Logger, alloc *structs.Allocation, cniAr
}
}
var supportsCNICheck = mustCNICheckConstraint()
func mustCNICheckConstraint() version.Constraints {
v, err := version.NewConstraint(">= 1.3.0")
if err != nil {
panic(err)
}
return v
}
// Setup calls the CNI plugins with the add action
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {
if err := c.ensureCNIInitialized(); err != nil {
return nil, fmt.Errorf("cni not initialized: %w", err)
}
@@ -171,6 +183,31 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc
cniArgs[ConsulIPTablesConfigEnvVar] = string(iptablesCfg)
}
if !created {
// The netns will not be created if it already exists, typically on
// agent restart. If the configuration of a prexisting netns is wrong
// (ex. after a host reboot for docker created netns), networking will
// be broken. CNI's ADD command is not idempotent so we can't simply try
// again. Run CHECK to verify the network is still valid. Older plugins
// have a broken CHECK, so we have to allow the buggy behavior in the
// case of a host reboot with docker-created netns there.
cniVersion, err := version.NewSemver(c.nodeAttrs["plugins.cni.version.bridge"])
if err == nil && supportsCNICheck.Check(cniVersion) {
err := c.cni.Check(ctx, alloc.ID, spec.Path,
c.nsOpts.withCapabilityPortMap(portMaps.ports),
c.nsOpts.withArgs(cniArgs),
)
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrCNICheckFailed, err)
}
} else {
c.logger.Debug("network namespace exists but could not check if networking is valid because bridge plugin version was <1.3.0: continuing anyways")
return nil, nil
}
c.logger.Trace("network namespace exists and passed check: skipping setup")
return nil, nil
}
// Depending on the version of bridge cni plugin used, a known race could occure
// where two alloc attempt to create the nomad bridge at the same time, resulting
// in one of them to fail. This rety attempts to overcome those erroneous failures.

View File

@@ -18,6 +18,7 @@ var _ cni.CNI = &mockCNIPlugin{}
type mockCNIPlugin struct {
counter *testutil.CallCounter
setupErrors []string
checkErrors []error
}
func newMockCNIPlugin() *mockCNIPlugin {
@@ -26,6 +27,8 @@ func newMockCNIPlugin() *mockCNIPlugin {
callCounts.Reset()
return &mockCNIPlugin{
counter: callCounts,
setupErrors: []string{},
checkErrors: []error{},
}
}
@@ -65,6 +68,13 @@ func (f *mockCNIPlugin) Remove(ctx context.Context, id, path string, opts ...cni
}
func (f *mockCNIPlugin) Check(ctx context.Context, id, path string, opts ...cni.NamespaceOpts) error {
if f.counter != nil {
f.counter.Inc("Check")
}
numOfCalls := f.counter.Get()["Check"]
if numOfCalls <= len(f.checkErrors) {
return f.checkErrors[numOfCalls-1]
}
return nil
}

View File

@@ -296,7 +296,7 @@ func TestSetup(t *testing.T) {
}
// method under test
result, err := c.Setup(context.Background(), alloc, spec)
result, err := c.Setup(context.Background(), alloc, spec, true)
if tc.expectErr == "" {
must.NoError(t, err)
must.Eq(t, tc.expectResult, result)

View File

@@ -13,6 +13,19 @@ upgrade. However, specific versions of Nomad may have more details provided for
their upgrades as a result of new features or changed behavior. This page is
used to document those details separately from the standard upgrade flow.
## Nomad 1.9.5
#### CNI plugins
Nomad 1.9.5 includes a bug fix for restoring allocation networking after a
client host reboot. This fix requires recent versions of the CNI reference
plugins (minimum 1.2.0) and will fallback to the existing behavior if the CNI
reference plugins cannot support the fix.
We recommend installing the CNI reference plugins from the [CNI project release
page](https://github.com/containernetworking/plugins/releases) rather than your
Linux distribution's package manager.
## Nomad 1.9.4
#### Security updates to default deny lists

View File

@@ -4,19 +4,23 @@ that use network namespaces. Refer to the [CNI Plugins external
guide](https://www.cni.dev/plugins/current/) for details on individual plugins.
The following series of commands determines your operating system architecture,
downloads the [CNI 1.5.1
release](https://github.com/containernetworking/plugins/releases/tag/v1.5.1),
downloads the [CNI 1.6.1
release](https://github.com/containernetworking/plugins/releases/tag/v1.6.1),
and then extracts the CNI plugin binaries into the `/opt/cni/bin` directory.
Update the `CNI_PLUGIN_VERSION` value to use a different release version.
```shell-session
$ export ARCH_CNI=$( [ $(uname -m) = aarch64 ] && echo arm64 || echo amd64)
$ export CNI_PLUGIN_VERSION=v1.5.1
$ export CNI_PLUGIN_VERSION=v1.6.1
$ curl -L -o cni-plugins.tgz "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-linux-${ARCH_CNI}-${CNI_PLUGIN_VERSION}".tgz && \
sudo mkdir -p /opt/cni/bin && \
sudo tar -C /opt/cni/bin -xzf cni-plugins.tgz
```
Your Linux distribution's package manager may provide the CNI reference plugins
but we recommend installing the most recent stable version to ensure you have
fixes for known bugs shipping in those versions.
Nomad looks for CNI plugin binaries by default in the `/opt/cni/bin` directory.
However, you may install in the binaries in a different directory and then
configure using the [`cni_path`](/nomad/docs/configuration/client#cni_path)