Files
nomad/client/allocrunner/networking.go
Tim Gross 08a6f870ad cni: use check command when restoring from restart (#24658)
When the Nomad client restarts and restores allocations, the network namespace
for an allocation may exist but no longer be correctly configured. For example,
if the host is rebooted and the task was a Docker task using a pause container,
the network namespace may be recreated by the docker daemon.

When we restore an allocation, use the CNI "check" command to verify that any
existing network namespace matches the expected configuration. This requires CNI
plugins of at least version 1.2.0 to avoid a bug in older plugin versions that
would cause the check to fail.

If the check fails, destroy the network namespace and try to recreate it from
scratch once. If that fails in the second pass, fail the restore so that the
allocation can be recreated (rather than silently having networking fail).

This should fix the gap left #24650 for Docker task drivers and any other
drivers with the `MustInitiateNetwork` capability.

Fixes: https://github.com/hashicorp/nomad/issues/24292
Ref: https://github.com/hashicorp/nomad/pull/24650
2025-01-07 09:38:39 -05:00

55 lines
2.1 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package allocrunner
import (
"context"
"sync"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
)
// NetworkConfigurator sets up and tears down the interfaces, routes, firewall
// rules, etc for the configured networking mode of the allocation.
type NetworkConfigurator interface {
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error)
Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error
}
// hostNetworkConfigurator is a noop implementation of a NetworkConfigurator for
// when the alloc join's a client host's network namespace and thus does not
// require further configuration
type hostNetworkConfigurator struct{}
func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) {
return nil, nil
}
func (h *hostNetworkConfigurator) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error {
return nil
}
// networkingGlobalMutex is used by a synchronizedNetworkConfigurator to serialize
// network operations done by the client to prevent race conditions when manipulating
// iptables rules
var networkingGlobalMutex sync.Mutex
// synchronizedNetworkConfigurator wraps a NetworkConfigurator to provide serialized access to network
// operations performed by the client
type synchronizedNetworkConfigurator struct {
nc NetworkConfigurator
}
func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) {
networkingGlobalMutex.Lock()
defer networkingGlobalMutex.Unlock()
return s.nc.Setup(ctx, allocation, spec, created)
}
func (s *synchronizedNetworkConfigurator) Teardown(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) error {
networkingGlobalMutex.Lock()
defer networkingGlobalMutex.Unlock()
return s.nc.Teardown(ctx, allocation, spec)
}