mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
keyring: reduce locking and replication overhead (#23975)
While working on #23655 I found there were a few places in the encrypter/keyring where we could make modest improvements to performance and reliability of the existing code. This changeset allows keyring replication to skip trying to replicate from itself, switches some of the read-only keyring accesses to use the read lock instead of a r/w lock, fixes the logging configuration to drop spurious "extra value" warnings in the logs, drops an unused type, and makes a minor refactoring to eliminate shadowing of the `keyset` type. Pulling this out to its own PR lets us backport these changes to the LTS and reduces the size of the PR that implements #23665. Ref https://github.com/hashicorp/nomad/issues/23665
This commit is contained in:
@@ -34,6 +34,7 @@ import (
|
||||
"github.com/hashicorp/nomad/helper/joseutil"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
"github.com/hashicorp/raft"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
@@ -78,7 +79,7 @@ func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) {
|
||||
|
||||
encrypter := &Encrypter{
|
||||
srv: srv,
|
||||
log: srv.logger.With("keyring"),
|
||||
log: srv.logger.Named("keyring"),
|
||||
keystorePath: keystorePath,
|
||||
keyring: make(map[string]*keyset),
|
||||
issuer: srv.GetConfig().OIDCIssuer,
|
||||
@@ -218,16 +219,16 @@ func (e *Encrypter) Decrypt(ciphertext []byte, keyID string) ([]byte, error) {
|
||||
e.lock.RLock()
|
||||
defer e.lock.RUnlock()
|
||||
|
||||
keyset, err := e.keysetByIDLocked(keyID)
|
||||
ks, err := e.keysetByIDLocked(keyID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nonceSize := keyset.cipher.NonceSize()
|
||||
nonceSize := ks.cipher.NonceSize()
|
||||
nonce := ciphertext[:nonceSize] // nonce was stored alongside ciphertext
|
||||
additional := []byte(keyID) // keyID was included in the signature inputs
|
||||
|
||||
return keyset.cipher.Open(nil, nonce, ciphertext[nonceSize:], additional)
|
||||
return ks.cipher.Open(nil, nonce, ciphertext[nonceSize:], additional)
|
||||
}
|
||||
|
||||
// keyIDHeader is the JWT header for the Nomad Key ID used to sign the
|
||||
@@ -249,7 +250,7 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string,
|
||||
// If a key is rotated immediately following a leader election, plans that
|
||||
// are in-flight may get signed before the new leader has the key. Allow for
|
||||
// a short timeout-and-retry to avoid rejecting plans
|
||||
keyset, err := e.activeKeySet()
|
||||
ks, err := e.activeKeySet()
|
||||
if err != nil {
|
||||
ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, 5*time.Second)
|
||||
defer cancel()
|
||||
@@ -259,8 +260,8 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string,
|
||||
return "", "", err
|
||||
default:
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
keyset, err = e.activeKeySet()
|
||||
if keyset != nil {
|
||||
ks, err = e.activeKeySet()
|
||||
if ks != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -272,18 +273,18 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string,
|
||||
claims.Issuer = e.issuer
|
||||
}
|
||||
|
||||
opts := (&jose.SignerOptions{}).WithHeader("kid", keyset.rootKey.Meta.KeyID).WithType("JWT")
|
||||
opts := (&jose.SignerOptions{}).WithHeader("kid", ks.rootKey.Meta.KeyID).WithType("JWT")
|
||||
|
||||
var sig jose.Signer
|
||||
if keyset.rsaPrivateKey != nil {
|
||||
if ks.rsaPrivateKey != nil {
|
||||
// If an RSA key has been created prefer it as it is more widely compatible
|
||||
sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.RS256, Key: keyset.rsaPrivateKey}, opts)
|
||||
sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.RS256, Key: ks.rsaPrivateKey}, opts)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
} else {
|
||||
// No RSA key has been created, fallback to ed25519 which always exists
|
||||
sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.EdDSA, Key: keyset.eddsaPrivateKey}, opts)
|
||||
sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.EdDSA, Key: ks.eddsaPrivateKey}, opts)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
@@ -294,7 +295,7 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string,
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return raw, keyset.rootKey.Meta.KeyID, nil
|
||||
return raw, ks.rootKey.Meta.KeyID, nil
|
||||
}
|
||||
|
||||
// VerifyClaim accepts a previously-signed encoded claim and validates
|
||||
@@ -312,7 +313,7 @@ func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, er
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Find the Key
|
||||
// Find the key material
|
||||
pubKey, err := e.GetPublicKey(keyID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -594,8 +595,8 @@ var ErrDecryptFailed = errors.New("unable to decrypt wrapped key")
|
||||
// GetPublicKey returns the public signing key for the requested key id or an
|
||||
// error if the key could not be found.
|
||||
func (e *Encrypter) GetPublicKey(keyID string) (*structs.KeyringPublicKey, error) {
|
||||
e.lock.Lock()
|
||||
defer e.lock.Unlock()
|
||||
e.lock.RLock()
|
||||
defer e.lock.RUnlock()
|
||||
|
||||
ks, err := e.keysetByIDLocked(keyID)
|
||||
if err != nil {
|
||||
@@ -746,6 +747,7 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs
|
||||
keyID := keyMeta.KeyID
|
||||
krr.logger.Debug("replicating new key", "id", keyID)
|
||||
|
||||
var err error
|
||||
getReq := &structs.KeyringGetRootKeyRequest{
|
||||
KeyID: keyID,
|
||||
QueryOptions: structs.QueryOptions{
|
||||
@@ -754,7 +756,12 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs
|
||||
},
|
||||
}
|
||||
getResp := &structs.KeyringGetRootKeyResponse{}
|
||||
err := krr.srv.RPC("Keyring.Get", getReq, getResp)
|
||||
|
||||
// Servers should avoid querying themselves
|
||||
_, leaderID := krr.srv.raft.LeaderWithID()
|
||||
if leaderID != raft.ServerID(krr.srv.GetConfig().NodeID) {
|
||||
err = krr.srv.RPC("Keyring.Get", getReq, getResp)
|
||||
}
|
||||
|
||||
if err != nil || getResp.Key == nil {
|
||||
// Key replication needs to tolerate leadership flapping. If a key is
|
||||
@@ -764,17 +771,28 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs
|
||||
krr.logger.Warn("failed to fetch key from current leader, trying peers",
|
||||
"key", keyID, "error", err)
|
||||
getReq.AllowStale = true
|
||||
|
||||
cfg := krr.srv.GetConfig()
|
||||
self := fmt.Sprintf("%s.%s", cfg.NodeName, cfg.Region)
|
||||
|
||||
for _, peer := range krr.getAllPeers() {
|
||||
if peer.Name == self {
|
||||
continue
|
||||
}
|
||||
|
||||
krr.logger.Trace("attempting to replicate key from peer",
|
||||
"id", keyID, "peer", peer.Name)
|
||||
err = krr.srv.forwardServer(peer, "Keyring.Get", getReq, getResp)
|
||||
if err == nil && getResp.Key != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
if getResp.Key == nil {
|
||||
krr.logger.Error("failed to fetch key from any peer",
|
||||
"key", keyID, "error", err)
|
||||
return fmt.Errorf("failed to fetch key from any peer: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if getResp.Key == nil {
|
||||
krr.logger.Error("failed to fetch key from any peer",
|
||||
"key", keyID, "error", err)
|
||||
return fmt.Errorf("failed to fetch key from any peer: %v", err)
|
||||
}
|
||||
|
||||
err = krr.encrypter.AddKey(getResp.Key)
|
||||
@@ -786,7 +804,6 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: move this method into Server?
|
||||
func (krr *KeyringReplicator) getAllPeers() []*serverParts {
|
||||
krr.srv.peerLock.RLock()
|
||||
defer krr.srv.peerLock.RUnlock()
|
||||
|
||||
@@ -196,17 +196,6 @@ func NewRootKeyMeta() *RootKeyMeta {
|
||||
}
|
||||
}
|
||||
|
||||
// RootKeyMetaStub is for serializing root key metadata to the
|
||||
// keystore, not for the List API. It excludes frequently-changing
|
||||
// fields such as ModifyIndex so we don't have to sync them to the
|
||||
// on-disk keystore when the fields are already in raft.
|
||||
type RootKeyMetaStub struct {
|
||||
KeyID string
|
||||
Algorithm EncryptionAlgorithm
|
||||
CreateTime int64
|
||||
State RootKeyState
|
||||
}
|
||||
|
||||
// IsActive indicates this key is the one currently being used for crypto
|
||||
// operations (at most one key can be Active)
|
||||
func (rkm *RootKeyMeta) IsActive() bool {
|
||||
@@ -270,18 +259,6 @@ func (rkm *RootKeyMeta) IsInactive() bool {
|
||||
return rkm.State == RootKeyStateInactive || rkm.State == RootKeyStateDeprecated
|
||||
}
|
||||
|
||||
func (rkm *RootKeyMeta) Stub() *RootKeyMetaStub {
|
||||
if rkm == nil {
|
||||
return nil
|
||||
}
|
||||
return &RootKeyMetaStub{
|
||||
KeyID: rkm.KeyID,
|
||||
Algorithm: rkm.Algorithm,
|
||||
CreateTime: rkm.CreateTime,
|
||||
State: rkm.State,
|
||||
}
|
||||
|
||||
}
|
||||
func (rkm *RootKeyMeta) Copy() *RootKeyMeta {
|
||||
if rkm == nil {
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user