docker: stop network pause container of lost alloc after node restart (#17455)

This PR fixes a bug where the docker network pause container would not be
stopped and removed in the case where a node is restarted, the alloc is
moved to another node, the node comes back up. See the issue below for
full repro conditions.

Basically in the DestroyNetwork PostRun hook we would depend on the
NetworkIsolationSpec field not being nil - which is only the case
if the Client stays alive all the way from network creation to network
teardown. If the node is rebooted we lose that state and previously
would not be able to find the pause container to remove. Now, we manually
find the pause container by scanning them and looking for the associated
allocID.

Fixes #17299
This commit is contained in:
Seth Hoenig
2023-06-09 08:46:29 -05:00
committed by GitHub
parent 408ab828f7
commit 89ce092b20
7 changed files with 142 additions and 38 deletions

View File

@@ -14,7 +14,8 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/testutils"
"github.com/stretchr/testify/require"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
)
// statically assert network hook implements the expected interfaces
@@ -29,7 +30,7 @@ type mockNetworkIsolationSetter struct {
func (m *mockNetworkIsolationSetter) SetNetworkIsolation(spec *drivers.NetworkIsolationSpec) {
m.called = true
require.Exactly(m.t, m.expectedSpec, spec)
test.Eq(m.t, m.expectedSpec, spec)
}
type mockNetworkStatusSetter struct {
@@ -40,20 +41,19 @@ type mockNetworkStatusSetter struct {
func (m *mockNetworkStatusSetter) SetNetworkStatus(status *structs.AllocNetworkStatus) {
m.called = true
require.Exactly(m.t, m.expectedStatus, status)
test.Eq(m.t, m.expectedStatus, status)
}
// Test that the prerun and postrun hooks call the setter with the expected spec when
// the network mode is not host
func TestNetworkHook_Prerun_Postrun(t *testing.T) {
// Test that the prerun and postrun hooks call the setter with the expected
// NetworkIsolationSpec for group bridge network.
func TestNetworkHook_Prerun_Postrun_group(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{
{
Mode: "bridge",
},
{Mode: "bridge"},
}
spec := &drivers.NetworkIsolationSpec{
Mode: drivers.NetIsolationModeGroup,
Path: "test",
@@ -64,14 +64,14 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) {
nm := &testutils.MockDriver{
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
require.Equal(t, alloc.ID, allocID)
test.Eq(t, alloc.ID, allocID)
return spec, false, nil
},
DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
destroyCalled = true
require.Equal(t, alloc.ID, allocID)
require.Exactly(t, spec, netSpec)
test.Eq(t, alloc.ID, allocID)
test.Eq(t, spec, netSpec)
return nil
},
},
@@ -84,26 +84,56 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) {
t: t,
expectedStatus: nil,
}
require := require.New(t)
envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region)
logger := testlog.HCLogger(t)
hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build())
require.NoError(hook.Prerun())
require.True(setter.called)
require.False(destroyCalled)
require.NoError(hook.Postrun())
require.True(destroyCalled)
// reset and use host network mode
setter.called = false
destroyCalled = false
alloc.Job.TaskGroups[0].Networks[0].Mode = "host"
hook = newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build())
require.NoError(hook.Prerun())
require.False(setter.called)
require.False(destroyCalled)
require.NoError(hook.Postrun())
require.False(destroyCalled)
must.NoError(t, hook.Prerun())
must.True(t, setter.called)
must.False(t, destroyCalled)
must.NoError(t, hook.Postrun())
must.True(t, destroyCalled)
}
// Test that prerun and postrun hooks do not expect a NetworkIsolationSpec
func TestNetworkHook_Prerun_Postrun_host(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{
{Mode: "host"},
}
destroyCalled := false
nm := &testutils.MockDriver{
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
test.Unreachable(t, test.Sprintf("should not call CreateNetwork for host network"))
return nil, false, nil
},
DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
destroyCalled = true
test.Nil(t, netSpec)
return nil
},
},
}
setter := &mockNetworkIsolationSetter{
t: t,
expectedSpec: nil,
}
statusSetter := &mockNetworkStatusSetter{
t: t,
expectedStatus: nil,
}
envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region)
logger := testlog.HCLogger(t)
hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build())
must.NoError(t, hook.Prerun())
must.False(t, setter.called)
must.False(t, destroyCalled)
must.NoError(t, hook.Postrun())
must.True(t, destroyCalled)
}