client: set network status on tasks when restoring allocations (#26699)

The allocation network hook was not properly restoring network status from state when the network had previously been setup.  This led to missing environment variables, misconfigured hosts file, and resolv.conf when a task was restarted after the nomad agent has restarted.
---------

Co-authored-by: Daniel Bennett <dbennett@hashicorp.com>
This commit is contained in:
Michael Smithhisler
2025-09-11 13:10:21 -04:00
committed by GitHub
parent 8b51acf259
commit c20f854d16
4 changed files with 47 additions and 16 deletions

3
.changelog/26699.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
client: restore task network status on client restart so restarted tasks receive proper networking environment variables, hosts file, and resolv.conf.
```

View File

@@ -49,8 +49,9 @@ func (a *allocNetworkIsolationSetter) SetNetworkIsolation(n *drivers.NetworkIsol
}
}
type networkStatusSetter interface {
type networkStatus interface {
SetNetworkStatus(*structs.AllocNetworkStatus)
NetworkStatus() *structs.AllocNetworkStatus
}
// networkHook is an alloc lifecycle hook that manages the network namespace
@@ -60,9 +61,9 @@ type networkHook struct {
// network is created
isolationSetter networkIsolationSetter
// statusSetter is a callback to the alloc runner to set the network status once
// network setup is complete
networkStatusSetter networkStatusSetter
// statusSetter is a callback to the alloc runner to get or set the network
// status once network setup is complete
networkStatus networkStatus
// manager is used when creating the network namespace. This defaults to
// bind mounting a network namespace descritor under /var/run/netns but
@@ -87,11 +88,11 @@ func newNetworkHook(logger hclog.Logger,
alloc *structs.Allocation,
netManager drivers.DriverNetworkManager,
netConfigurator NetworkConfigurator,
networkStatusSetter networkStatusSetter,
networkStatus networkStatus,
) *networkHook {
return &networkHook{
isolationSetter: ns,
networkStatusSetter: networkStatusSetter,
networkStatus: networkStatus,
alloc: alloc,
manager: netManager,
networkConfigurator: netConfigurator,
@@ -168,8 +169,14 @@ CREATE:
return fmt.Errorf("failed to configure networking for alloc: %v", err)
}
// A nil status indicates a netns already exists and is configured correctly.
// It should have been saved to the local state store.
if status == nil {
return nil // netns already existed and was correctly configured
stateStatus := h.networkStatus.NetworkStatus()
if stateStatus == nil {
return errors.New("network already configured but not found in state")
}
status = stateStatus
}
// If the driver set the sandbox hostname label, then we will use that
@@ -196,7 +203,8 @@ CREATE:
}
}
h.networkStatusSetter.SetNetworkStatus(status)
// Saves the network status state, and also propagates status to task runners.
h.networkStatus.SetNetworkStatus(status)
}
return nil
}

View File

@@ -39,7 +39,7 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
Labels: map[string]string{"abc": "123"},
}
isolationSetter := &mockNetworkIsolationSetter{t: t, expectedSpec: spec}
statusSetter := &mockNetworkStatusSetter{t: t, expectedStatus: nil}
statusSetter := &mockNetworkStatus{t: t, expectedStatus: mock.AllocNetworkStatus()}
callCounts := testutil.NewCallCounter()
@@ -84,6 +84,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
expectSetupCalls int
expectPostrunDestroyNetworkCalls int
expectPrerunError string
expectGetStatusCalls int
expectSetStatusCalls int
}{
{
name: "good check",
@@ -92,6 +94,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
expectPrerunDestroyNetworkCalls: 0,
expectCheckCalls: 1,
expectSetupCalls: 0,
expectGetStatusCalls: 1,
expectSetStatusCalls: 1,
expectPostrunDestroyNetworkCalls: 1,
},
{
@@ -102,6 +106,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
expectPrerunDestroyNetworkCalls: 1,
expectCheckCalls: 2,
expectSetupCalls: 0,
expectGetStatusCalls: 1,
expectSetStatusCalls: 1,
expectPostrunDestroyNetworkCalls: 2,
},
{
@@ -116,6 +122,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
expectCheckCalls: 2,
expectSetupCalls: 0,
expectPostrunDestroyNetworkCalls: 2,
expectGetStatusCalls: 0,
expectSetStatusCalls: 0,
expectPrerunError: "failed to configure networking for alloc: network namespace already exists but was misconfigured: whatever",
},
{
@@ -125,6 +133,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
expectPrerunDestroyNetworkCalls: 0,
expectCheckCalls: 0,
expectSetupCalls: 0,
expectGetStatusCalls: 1,
expectSetStatusCalls: 1,
expectPostrunDestroyNetworkCalls: 1,
},
}
@@ -135,6 +145,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
callCounts.Reset()
fakePlugin.counter.Reset()
fakePlugin.checkErrors = tc.checkErrs
statusSetter.getCalls = 0
statusSetter.setCalls = 0
configurator.nodeAttrs["plugins.cni.version.bridge"] = tc.cniVersion
hook := newNetworkHook(testlog.HCLogger(t), isolationSetter,
alloc, nm, configurator, statusSetter)
@@ -158,6 +170,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) {
test.Eq(t, tc.expectPostrunDestroyNetworkCalls,
callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after postrun"))
test.Eq(t, tc.expectGetStatusCalls, statusSetter.getCalls, test.Sprint("NetworkStatus Calls"))
test.Eq(t, tc.expectSetStatusCalls, statusSetter.setCalls, test.Sprint("SetNetworkStatus Calls"))
})
}
}

View File

@@ -28,17 +28,23 @@ func (m *mockNetworkIsolationSetter) SetNetworkIsolation(spec *drivers.NetworkIs
test.Eq(m.t, m.expectedSpec, spec)
}
type mockNetworkStatusSetter struct {
type mockNetworkStatus struct {
t *testing.T
expectedStatus *structs.AllocNetworkStatus
called bool
getCalls int
setCalls int
}
func (m *mockNetworkStatusSetter) SetNetworkStatus(status *structs.AllocNetworkStatus) {
m.called = true
func (m *mockNetworkStatus) SetNetworkStatus(status *structs.AllocNetworkStatus) {
m.setCalls++
test.Eq(m.t, m.expectedStatus, status)
}
func (m *mockNetworkStatus) NetworkStatus() *structs.AllocNetworkStatus {
m.getCalls++
return m.expectedStatus
}
// Test that the prerun and postrun hooks call the setter with the expected
// NetworkIsolationSpec for group bridge network.
func TestNetworkHook_Prerun_Postrun_group(t *testing.T) {
@@ -75,9 +81,9 @@ func TestNetworkHook_Prerun_Postrun_group(t *testing.T) {
t: t,
expectedSpec: spec,
}
statusSetter := &mockNetworkStatusSetter{
statusSetter := &mockNetworkStatus{
t: t,
expectedStatus: nil,
expectedStatus: mock.AllocNetworkStatus(),
}
logger := testlog.HCLogger(t)
@@ -119,7 +125,7 @@ func TestNetworkHook_Prerun_Postrun_host(t *testing.T) {
t: t,
expectedSpec: nil,
}
statusSetter := &mockNetworkStatusSetter{
statusSetter := &mockNetworkStatus{
t: t,
expectedStatus: nil,
}