diff --git a/.changelog/26699.txt b/.changelog/26699.txt new file mode 100644 index 000000000..12ebcf9ec --- /dev/null +++ b/.changelog/26699.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: restore task network status on client restart so restarted tasks receive proper networking environment variables, hosts file, and resolv.conf. +``` diff --git a/client/allocrunner/network_hook.go b/client/allocrunner/network_hook.go index 1ac34d678..e3e34af96 100644 --- a/client/allocrunner/network_hook.go +++ b/client/allocrunner/network_hook.go @@ -49,8 +49,9 @@ func (a *allocNetworkIsolationSetter) SetNetworkIsolation(n *drivers.NetworkIsol } } -type networkStatusSetter interface { +type networkStatus interface { SetNetworkStatus(*structs.AllocNetworkStatus) + NetworkStatus() *structs.AllocNetworkStatus } // networkHook is an alloc lifecycle hook that manages the network namespace @@ -60,9 +61,9 @@ type networkHook struct { // network is created isolationSetter networkIsolationSetter - // statusSetter is a callback to the alloc runner to set the network status once - // network setup is complete - networkStatusSetter networkStatusSetter + // statusSetter is a callback to the alloc runner to get or set the network + // status once network setup is complete + networkStatus networkStatus // manager is used when creating the network namespace. This defaults to // bind mounting a network namespace descritor under /var/run/netns but @@ -87,11 +88,11 @@ func newNetworkHook(logger hclog.Logger, alloc *structs.Allocation, netManager drivers.DriverNetworkManager, netConfigurator NetworkConfigurator, - networkStatusSetter networkStatusSetter, + networkStatus networkStatus, ) *networkHook { return &networkHook{ isolationSetter: ns, - networkStatusSetter: networkStatusSetter, + networkStatus: networkStatus, alloc: alloc, manager: netManager, networkConfigurator: netConfigurator, @@ -168,8 +169,14 @@ CREATE: return fmt.Errorf("failed to configure networking for alloc: %v", err) } + // A nil status indicates a netns already exists and is configured correctly. + // It should have been saved to the local state store. if status == nil { - return nil // netns already existed and was correctly configured + stateStatus := h.networkStatus.NetworkStatus() + if stateStatus == nil { + return errors.New("network already configured but not found in state") + } + status = stateStatus } // If the driver set the sandbox hostname label, then we will use that @@ -196,7 +203,8 @@ CREATE: } } - h.networkStatusSetter.SetNetworkStatus(status) + // Saves the network status state, and also propagates status to task runners. + h.networkStatus.SetNetworkStatus(status) } return nil } diff --git a/client/allocrunner/network_hook_linux_test.go b/client/allocrunner/network_hook_linux_test.go index 0f9dc98e6..284796d2b 100644 --- a/client/allocrunner/network_hook_linux_test.go +++ b/client/allocrunner/network_hook_linux_test.go @@ -39,7 +39,7 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { Labels: map[string]string{"abc": "123"}, } isolationSetter := &mockNetworkIsolationSetter{t: t, expectedSpec: spec} - statusSetter := &mockNetworkStatusSetter{t: t, expectedStatus: nil} + statusSetter := &mockNetworkStatus{t: t, expectedStatus: mock.AllocNetworkStatus()} callCounts := testutil.NewCallCounter() @@ -84,6 +84,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { expectSetupCalls int expectPostrunDestroyNetworkCalls int expectPrerunError string + expectGetStatusCalls int + expectSetStatusCalls int }{ { name: "good check", @@ -92,6 +94,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { expectPrerunDestroyNetworkCalls: 0, expectCheckCalls: 1, expectSetupCalls: 0, + expectGetStatusCalls: 1, + expectSetStatusCalls: 1, expectPostrunDestroyNetworkCalls: 1, }, { @@ -102,6 +106,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { expectPrerunDestroyNetworkCalls: 1, expectCheckCalls: 2, expectSetupCalls: 0, + expectGetStatusCalls: 1, + expectSetStatusCalls: 1, expectPostrunDestroyNetworkCalls: 2, }, { @@ -116,6 +122,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { expectCheckCalls: 2, expectSetupCalls: 0, expectPostrunDestroyNetworkCalls: 2, + expectGetStatusCalls: 0, + expectSetStatusCalls: 0, expectPrerunError: "failed to configure networking for alloc: network namespace already exists but was misconfigured: whatever", }, { @@ -125,6 +133,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { expectPrerunDestroyNetworkCalls: 0, expectCheckCalls: 0, expectSetupCalls: 0, + expectGetStatusCalls: 1, + expectSetStatusCalls: 1, expectPostrunDestroyNetworkCalls: 1, }, } @@ -135,6 +145,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { callCounts.Reset() fakePlugin.counter.Reset() fakePlugin.checkErrors = tc.checkErrs + statusSetter.getCalls = 0 + statusSetter.setCalls = 0 configurator.nodeAttrs["plugins.cni.version.bridge"] = tc.cniVersion hook := newNetworkHook(testlog.HCLogger(t), isolationSetter, alloc, nm, configurator, statusSetter) @@ -158,6 +170,8 @@ func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { test.Eq(t, tc.expectPostrunDestroyNetworkCalls, callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after postrun")) + test.Eq(t, tc.expectGetStatusCalls, statusSetter.getCalls, test.Sprint("NetworkStatus Calls")) + test.Eq(t, tc.expectSetStatusCalls, statusSetter.setCalls, test.Sprint("SetNetworkStatus Calls")) }) } } diff --git a/client/allocrunner/network_hook_test.go b/client/allocrunner/network_hook_test.go index ae13977fe..182dcbe3e 100644 --- a/client/allocrunner/network_hook_test.go +++ b/client/allocrunner/network_hook_test.go @@ -28,17 +28,23 @@ func (m *mockNetworkIsolationSetter) SetNetworkIsolation(spec *drivers.NetworkIs test.Eq(m.t, m.expectedSpec, spec) } -type mockNetworkStatusSetter struct { +type mockNetworkStatus struct { t *testing.T expectedStatus *structs.AllocNetworkStatus - called bool + getCalls int + setCalls int } -func (m *mockNetworkStatusSetter) SetNetworkStatus(status *structs.AllocNetworkStatus) { - m.called = true +func (m *mockNetworkStatus) SetNetworkStatus(status *structs.AllocNetworkStatus) { + m.setCalls++ test.Eq(m.t, m.expectedStatus, status) } +func (m *mockNetworkStatus) NetworkStatus() *structs.AllocNetworkStatus { + m.getCalls++ + return m.expectedStatus +} + // Test that the prerun and postrun hooks call the setter with the expected // NetworkIsolationSpec for group bridge network. func TestNetworkHook_Prerun_Postrun_group(t *testing.T) { @@ -75,9 +81,9 @@ func TestNetworkHook_Prerun_Postrun_group(t *testing.T) { t: t, expectedSpec: spec, } - statusSetter := &mockNetworkStatusSetter{ + statusSetter := &mockNetworkStatus{ t: t, - expectedStatus: nil, + expectedStatus: mock.AllocNetworkStatus(), } logger := testlog.HCLogger(t) @@ -119,7 +125,7 @@ func TestNetworkHook_Prerun_Postrun_host(t *testing.T) { t: t, expectedSpec: nil, } - statusSetter := &mockNetworkStatusSetter{ + statusSetter := &mockNetworkStatus{ t: t, expectedStatus: nil, }