Merge pull request #10857 from hashicorp/b-rm-canarys

consul: avoid triggering unnecessary sync when removing workload
This commit is contained in:
Seth Hoenig
2021-07-07 09:47:15 -05:00
committed by GitHub
7 changed files with 42 additions and 58 deletions

3
.changelog/10842.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
consul: remove ineffective edge case handling on service deregistration
```

View File

@@ -120,12 +120,12 @@ func TestAllocRunner_Restore_RunningTerminal(t *testing.T) {
require.Error(t, logmonProc.Signal(syscall.Signal(0)))
// Assert consul was cleaned up:
// 2 removals (canary+noncanary) during prekill
// 2 removals (canary+noncanary) during exited
// 2 removals (canary+noncanary) during stop
// 2 removals (canary+noncanary) group during stop
// 1 removal during prekill
// 1 removal during exited
// 1 removal during stop
// 1 removal group during stop
consulOps := conf2.Consul.(*consul.MockConsulServiceClient).GetOps()
require.Len(t, consulOps, 8)
require.Len(t, consulOps, 4)
for _, op := range consulOps {
require.Equal(t, "remove", op.Op)
}

View File

@@ -208,11 +208,6 @@ func (h *groupServiceHook) deregister() {
if len(h.services) > 0 {
workloadServices := h.getWorkloadServices()
h.consulClient.RemoveWorkload(workloadServices)
// Canary flag may be getting flipped when the alloc is being
// destroyed, so remove both variations of the service
workloadServices.Canary = !workloadServices.Canary
h.consulClient.RemoveWorkload(workloadServices)
}
}

View File

@@ -54,14 +54,12 @@ func TestGroupServiceHook_NoGroupServices(t *testing.T) {
require.NoError(t, h.PreTaskRestart())
ops := consulClient.GetOps()
require.Len(t, ops, 7)
require.Len(t, ops, 5)
require.Equal(t, "add", ops[0].Op) // Prerun
require.Equal(t, "update", ops[1].Op) // Update
require.Equal(t, "remove", ops[2].Op) // Postrun (1st)
require.Equal(t, "remove", ops[3].Op) // Postrun (2nd)
require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st)
require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd)
require.Equal(t, "add", ops[6].Op) // Restart -> preRun
require.Equal(t, "remove", ops[2].Op) // Postrun
require.Equal(t, "remove", ops[3].Op) // Restart -> preKill
require.Equal(t, "add", ops[4].Op) // Restart -> preRun
}
// TestGroupServiceHook_ShutdownDelayUpdate asserts calling group service hooks
@@ -127,14 +125,12 @@ func TestGroupServiceHook_GroupServices(t *testing.T) {
require.NoError(t, h.PreTaskRestart())
ops := consulClient.GetOps()
require.Len(t, ops, 7)
require.Len(t, ops, 5)
require.Equal(t, "add", ops[0].Op) // Prerun
require.Equal(t, "update", ops[1].Op) // Update
require.Equal(t, "remove", ops[2].Op) // Postrun (1st)
require.Equal(t, "remove", ops[3].Op) // Postrun (2nd)
require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st)
require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd)
require.Equal(t, "add", ops[6].Op) // Restart -> preRun
require.Equal(t, "remove", ops[2].Op) // Postrun
require.Equal(t, "remove", ops[3].Op) // Restart -> preKill
require.Equal(t, "add", ops[4].Op) // Restart -> preRun
}
// TestGroupServiceHook_Error asserts group service hooks with group
@@ -175,14 +171,12 @@ func TestGroupServiceHook_NoNetwork(t *testing.T) {
require.NoError(t, h.PreTaskRestart())
ops := consulClient.GetOps()
require.Len(t, ops, 7)
require.Len(t, ops, 5)
require.Equal(t, "add", ops[0].Op) // Prerun
require.Equal(t, "update", ops[1].Op) // Update
require.Equal(t, "remove", ops[2].Op) // Postrun (1st)
require.Equal(t, "remove", ops[3].Op) // Postrun (2nd)
require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st)
require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd)
require.Equal(t, "add", ops[6].Op) // Restart -> preRun
require.Equal(t, "remove", ops[2].Op) // Postrun
require.Equal(t, "remove", ops[3].Op) // Restart -> preKill
require.Equal(t, "add", ops[4].Op) // Restart -> preRun
}
func TestGroupServiceHook_getWorkloadServices(t *testing.T) {

View File

@@ -171,13 +171,10 @@ func (h *serviceHook) Exited(context.Context, *interfaces.TaskExitedRequest, *in
// deregister services from Consul.
func (h *serviceHook) deregister() {
workloadServices := h.getWorkloadServices()
h.consulServices.RemoveWorkload(workloadServices)
// Canary flag may be getting flipped when the alloc is being
// destroyed, so remove both variations of the service
workloadServices.Canary = !workloadServices.Canary
h.consulServices.RemoveWorkload(workloadServices)
if len(h.services) > 0 {
workloadServices := h.getWorkloadServices()
h.consulServices.RemoveWorkload(workloadServices)
}
h.initialRegistration = false
}

View File

@@ -39,16 +39,15 @@ func TestUpdate_beforePoststart(t *testing.T) {
// so Update should again wait on Poststart.
require.NoError(t, hook.Exited(context.Background(), &interfaces.TaskExitedRequest{}, &interfaces.TaskExitedResponse{}))
require.Len(t, c.GetOps(), 3)
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
require.Len(t, c.GetOps(), 3)
require.NoError(t, hook.Poststart(context.Background(), &interfaces.TaskPoststartRequest{}, &interfaces.TaskPoststartResponse{}))
require.Len(t, c.GetOps(), 4)
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
require.Len(t, c.GetOps(), 4)
require.NoError(t, hook.Poststart(context.Background(), &interfaces.TaskPoststartRequest{}, &interfaces.TaskPoststartResponse{}))
require.Len(t, c.GetOps(), 5)
require.NoError(t, hook.PreKilling(context.Background(), &interfaces.TaskPreKillRequest{}, &interfaces.TaskPreKillResponse{}))
require.Len(t, c.GetOps(), 6)
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
require.Len(t, c.GetOps(), 6)
require.NoError(t, hook.PreKilling(context.Background(), &interfaces.TaskPreKillRequest{}, &interfaces.TaskPreKillResponse{}))
require.Len(t, c.GetOps(), 8)
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
require.Len(t, c.GetOps(), 8)
}

View File

@@ -957,17 +957,16 @@ func TestTaskRunner_ShutdownDelay(t *testing.T) {
assert.NoError(t, tr.Kill(context.Background(), structs.NewTaskEvent("test")))
}()
// Wait for *2* deregistration calls (due to needing to remove both
// canary tag variants)
// Wait for *1* de-registration calls (all [non-]canary variants removed).
WAIT:
for {
ops := mockConsul.GetOps()
switch n := len(ops); n {
case 1, 2:
// Waiting for both deregistration calls
case 3:
case 1:
// Waiting for single de-registration call.
case 2:
require.Equalf(t, "remove", ops[1].Op, "expected deregistration but found: %#v", ops[1])
require.Equalf(t, "remove", ops[2].Op, "expected deregistration but found: %#v", ops[2])
break WAIT
default:
// ?!
@@ -2401,25 +2400,22 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) {
consul := conf.Consul.(*consulapi.MockConsulServiceClient)
consulOps := consul.GetOps()
require.Len(t, consulOps, 8)
require.Len(t, consulOps, 5)
// Initial add
require.Equal(t, "add", consulOps[0].Op)
// Removing canary and non-canary entries on first exit
// Removing entries on first exit
require.Equal(t, "remove", consulOps[1].Op)
require.Equal(t, "remove", consulOps[2].Op)
// Second add on retry
require.Equal(t, "add", consulOps[3].Op)
require.Equal(t, "add", consulOps[2].Op)
// Removing canary and non-canary entries on retry
// Removing entries on retry
require.Equal(t, "remove", consulOps[3].Op)
// Removing entries on stop
require.Equal(t, "remove", consulOps[4].Op)
require.Equal(t, "remove", consulOps[5].Op)
// Removing canary and non-canary entries on stop
require.Equal(t, "remove", consulOps[6].Op)
require.Equal(t, "remove", consulOps[7].Op)
}
// testWaitForTaskToStart waits for the task to be running or fails the test