vault: catch expired lease as fatal error (#24409)

When a Vault lease expires, it's revoked on the server and cannot be removed, so
this error should be treated as fatal.

The errors we get aren't wrapped by the Vault SDK, so unfortunately we have to
read the error messages and can't easily enumerate non-fatal error
messages (which might be bubbling up from the stdlib). I've audited the errors
currently used and have documented their source.

Ref 52ba156d47/vault/expiration.go (L1327)
Fixes: https://github.com/hashicorp/nomad/issues/23859
This commit is contained in:
Tim Gross
2024-11-18 09:12:35 -05:00
committed by GitHub
parent 270b4f97a6
commit 6be9a50626
2 changed files with 22 additions and 8 deletions

3
.changelog/24409.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
vault: Fixed a bug where expired secret leases were treated as non-fatal and retried
```

View File

@@ -399,6 +399,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
var renewalErr error
leaseDuration := req.increment
if req.isToken {
// Set the token in the API client to the one that needs renewal
c.client.SetToken(req.id)
@@ -434,14 +435,24 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
next := time.Now().Add(renewalDuration)
fatal := false
if renewalErr != nil &&
(strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") ||
strings.Contains(renewalErr.Error(), "invalid lease ID") ||
strings.Contains(renewalErr.Error(), "lease is not renewable") ||
strings.Contains(renewalErr.Error(), "token not found") ||
strings.Contains(renewalErr.Error(), "permission denied")) {
fatal = true
} else if renewalErr != nil {
if renewalErr != nil {
// These errors aren't wrapped by the Vault SDK, so we have to read the
// error messages. Unfortunately we can't easily enumerate non-fatal
// errors so we have a large set here. These can be found at in
// vault/expiration.go.
// Current as of vault commit 52ba156d47da170bf40471fe57d72522030bdc7e
errMsg := renewalErr.Error()
if strings.Contains(errMsg, "no namespace") ||
strings.Contains(errMsg, "cannot renew a token across namespaces") ||
strings.Contains(errMsg, "invalid lease ID") ||
strings.Contains(errMsg, "lease expired") ||
strings.Contains(errMsg, "lease is not renewable") ||
strings.Contains(errMsg, "lease not found") ||
strings.Contains(errMsg, "permission denied") ||
strings.Contains(errMsg, "token not found") {
fatal = true
}
} else {
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration)
c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying",
"error", renewalErr, "period", next)