From 506e441a4dab1314e43f09bbdecd07c12c23b3c5 Mon Sep 17 00:00:00 2001
From: Mahmood Ali <mahmood@hashicorp.com>
Date: Thu, 21 May 2020 07:55:36 -0400
Subject: [PATCH] rate limit revokeDaemon

---
 nomad/vault.go | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/nomad/vault.go b/nomad/vault.go
index 71263cbbe..a008885ad 100644
--- a/nomad/vault.go
+++ b/nomad/vault.go
@@ -1223,6 +1223,16 @@ func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.V
 	return g.Wait()
 }
 
+// maxVaultRevokeBatchSize is the maximum tokens a revokeDaemon should revoke
+// at any given time.
+//
+// Limiting the revocation batch size is beneficial for few reasons:
+// * A single revocation failure of any entry in batch result into retrying the whole batch;
+//   the larger the batch is the higher likelihood of such failure
+// * Smaller batch sizes result into more co-operativeness: provides hooks for
+//   reconsidering token TTL and leadership steps down.
+const maxVaultRevokeBatchSize = 1000
+
 // revokeDaemon should be called in a goroutine and is used to periodically
 // revoke Vault accessors that failed the original revocation
 func (v *vaultClient) revokeDaemon() {
@@ -1247,13 +1257,21 @@ func (v *vaultClient) revokeDaemon() {
 			}
 
 			// Build the list of accessors that need to be revoked while pruning any TTL'd checks
-			revoking := make([]*structs.VaultAccessor, 0, len(v.revoking))
+			toRevoke := len(v.revoking)
+			if toRevoke > maxVaultRevokeBatchSize {
+				toRevoke = maxVaultRevokeBatchSize
+			}
+			revoking := make([]*structs.VaultAccessor, 0, toRevoke)
 			for va, ttl := range v.revoking {
 				if now.After(ttl) {
 					delete(v.revoking, va)
 				} else {
 					revoking = append(revoking, va)
 				}
+
+				if len(revoking) >= toRevoke {
+					break
+				}
 			}
 
 			if err := v.parallelRevoke(context.Background(), revoking); err != nil {