diff --git a/nomad/consul.go b/nomad/consul.go index 14511ad4b..b83ecbbc9 100644 --- a/nomad/consul.go +++ b/nomad/consul.go @@ -95,6 +95,9 @@ type ConsulACLsAPI interface { // RevokeTokens instructs Consul to revoke the given token accessors. RevokeTokens(context.Context, []*structs.SITokenAccessor, bool) bool + // MarkForRevocation marks the tokens for background revocation + MarkForRevocation([]*structs.SITokenAccessor) + // Stop is used to stop background token revocations. Intended to be used // on Nomad Server shutdown. Stop() @@ -285,6 +288,10 @@ func (c *consulACLsAPI) RevokeTokens(ctx context.Context, accessors []*structs.S return false } +func (c *consulACLsAPI) MarkForRevocation(accessors []*structs.SITokenAccessor) { + c.storeForRevocation(accessors) +} + func (c *consulACLsAPI) storeForRevocation(accessors []*structs.SITokenAccessor) { c.bgRevokeLock.Lock() defer c.bgRevokeLock.Unlock() @@ -369,6 +376,10 @@ func (c *consulACLsAPI) bgRetryRevokeDaemon() { } } +// maxConsulRevocationBatchSize is the maximum tokens a bgRetryRevoke should revoke +// at any given time. +const maxConsulRevocationBatchSize = 1000 + func (c *consulACLsAPI) bgRetryRevoke() { c.bgRevokeLock.Lock() defer c.bgRevokeLock.Unlock() @@ -380,7 +391,11 @@ func (c *consulACLsAPI) bgRetryRevoke() { // unlike vault tokens, SI tokens do not have a TTL, and so we must try to // remove all SI token accessors, every time, until they're gone - toPurge := make([]*structs.SITokenAccessor, len(c.bgRetryRevocation), len(c.bgRetryRevocation)) + toRevoke := len(c.bgRetryRevocation) + if toRevoke > maxConsulRevocationBatchSize { + toRevoke = maxConsulRevocationBatchSize + } + toPurge := make([]*structs.SITokenAccessor, toRevoke) copy(toPurge, c.bgRetryRevocation) if err := c.parallelRevoke(context.Background(), toPurge); err != nil { diff --git a/nomad/consul_test.go b/nomad/consul_test.go index 051c45e3e..333cd2e38 100644 --- a/nomad/consul_test.go +++ b/nomad/consul_test.go @@ -65,6 +65,14 @@ func (mps *mockPurgingServer) purgeFunc(accessors []*structs.SITokenAccessor) er } func (m *mockConsulACLsAPI) RevokeTokens(_ context.Context, accessors []*structs.SITokenAccessor, committed bool) bool { + return m.storeForRevocation(accessors, committed) +} + +func (m *mockConsulACLsAPI) MarkForRevocation(accessors []*structs.SITokenAccessor) { + m.storeForRevocation(accessors, true) +} + +func (m *mockConsulACLsAPI) storeForRevocation(accessors []*structs.SITokenAccessor, committed bool) bool { m.lock.Lock() defer m.lock.Unlock() @@ -168,6 +176,31 @@ func TestConsulACLsAPI_RevokeTokens(t *testing.T) { }) } +func TestConsulACLsAPI_MarkForRevocation(t *testing.T) { + t.Parallel() + + logger := testlog.HCLogger(t) + aclAPI := consul.NewMockACLsAPI(logger) + + c := NewConsulACLsAPI(aclAPI, logger, nil) + + generated, err := c.CreateToken(context.Background(), ServiceIdentityRequest{ + ClusterID: uuid.Generate(), + AllocID: uuid.Generate(), + TaskName: "task1-sidecar-proxy", + TaskKind: structs.NewTaskKind(structs.ConnectProxyPrefix, "service1"), + }) + require.NoError(t, err) + + // set the mock error after calling CreateToken for setting up + aclAPI.SetError(nil) + + accessors := []*structs.SITokenAccessor{{AccessorID: generated.AccessorID}} + c.MarkForRevocation(accessors) + require.Len(t, c.bgRetryRevocation, 1) + require.Contains(t, c.bgRetryRevocation, accessors[0]) +} + func TestConsulACLsAPI_bgRetryRevoke(t *testing.T) { t.Parallel() diff --git a/nomad/leader.go b/nomad/leader.go index a246d4f56..e849dd635 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -438,8 +438,8 @@ func (s *Server) revokeSITokenAccessorsOnRestore() error { } if len(toRevoke) > 0 { - ctx := context.Background() - s.consulACLs.RevokeTokens(ctx, toRevoke, true) + s.logger.Info("revoking consul accessors on restore", "accessors", len(toRevoke)) + s.consulACLs.MarkForRevocation(toRevoke) } return nil