keyring RPC handlers (#13075)

Implement the upsert, list, delete, and rotate RPC handlers for the
secure variables keyring. Operations on the keyring itself are still
stubbed out.
This commit is contained in:
Tim Gross
2022-05-19 16:27:59 -04:00
parent 233fc63168
commit 1348a76e4b
5 changed files with 529 additions and 26 deletions

View File

@@ -1,6 +1,10 @@
package nomad
import "crypto/cipher"
import (
"crypto/cipher"
"github.com/hashicorp/nomad/nomad/structs"
)
type Encrypter struct {
ciphers map[string]cipher.AEAD // map of key IDs to ciphers
@@ -27,3 +31,13 @@ func (e *Encrypter) Decrypt(encryptedData []byte, keyID string) ([]byte, error)
// TODO: actually decrypt!
return encryptedData, nil
}
// GenerateNewRootKey returns a new root key and its metadata.
func (e *Encrypter) GenerateNewRootKey(algorithm structs.EncryptionAlgorithm) *structs.RootKey {
meta := structs.NewRootKeyMeta()
meta.Algorithm = algorithm
return &structs.RootKey{
Meta: meta,
Key: []byte{}, // TODO: generate based on algorithm
}
}

View File

@@ -1,95 +1,290 @@
package nomad
import (
"fmt"
"time"
metrics "github.com/armon/go-metrics"
"github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
)
// KeyRing endpoint serves RPCs for secure variables key management
type KeyRing struct {
// Keyring endpoint serves RPCs for secure variables key management
type Keyring struct {
srv *Server
logger hclog.Logger
encrypter *Encrypter
ctx *RPCContext // context for connection, to check TLS role
}
func (k *KeyRing) Rotate(args *structs.KeyringRotateRootKeyRequest, reply *structs.KeyringRotateRootKeyResponse) error {
if done, err := k.srv.forward("KeyRing.Rotate", args, args, reply); done {
func (k *Keyring) Rotate(args *structs.KeyringRotateRootKeyRequest, reply *structs.KeyringRotateRootKeyResponse) error {
if done, err := k.srv.forward("Keyring.Rotate", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "keyring", "rotate"}, time.Now())
// TODO: allow for servers to force rotation as well
if aclObj, err := k.srv.ResolveToken(args.AuthToken); err != nil {
return err
} else if aclObj != nil && !aclObj.IsManagement() {
return structs.ErrPermissionDenied
}
// TODO: implementation; this just silences the structcheck lint
if args.Full {
// TODO: implement full key rotation via a core job
}
if args.Algorithm == "" {
// TODO: set this default value from server config
args.Algorithm = structs.EncryptionAlgorithmXChaCha20
}
meta := structs.NewRootKeyMeta()
meta.Algorithm = args.Algorithm
meta.Active = true
// TODO: have the Encrypter generate and persist the actual key
// material. this is just here to silence the structcheck lint
for keyID := range k.encrypter.ciphers {
k.logger.Trace("TODO", "key", keyID)
}
// Update metadata via Raft so followers can retrieve this key
req := structs.KeyringUpdateRootKeyMetaRequest{
RootKeyMeta: meta,
WriteRequest: args.WriteRequest,
}
out, index, err := k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, req)
if err != nil {
return err
}
if err, ok := out.(error); ok && err != nil {
return err
}
reply.Key = meta
reply.Index = index
return nil
}
func (k *KeyRing) List(args *structs.KeyringListRootKeyMetaRequest, reply *structs.KeyringListRootKeyMetaResponse) error {
if done, err := k.srv.forward("KeyRing.List", args, args, reply); done {
func (k *Keyring) List(args *structs.KeyringListRootKeyMetaRequest, reply *structs.KeyringListRootKeyMetaResponse) error {
if done, err := k.srv.forward("Keyring.List", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "keyring", "list"}, time.Now())
// TODO: probably need to allow for servers to list keys as well, to support replication?
if aclObj, err := k.srv.ResolveToken(args.AuthToken); err != nil {
return err
} else if aclObj != nil && !aclObj.IsManagement() {
return structs.ErrPermissionDenied
// we need to allow both humans with management tokens and
// non-leader servers to list keys, in order to support
// replication
err := validateTLSCertificateLevel(k.srv, k.ctx, tlsCertificateLevelServer)
if err != nil {
if aclObj, err := k.srv.ResolveToken(args.AuthToken); err != nil {
return err
} else if aclObj != nil && !aclObj.IsManagement() {
return structs.ErrPermissionDenied
}
}
// TODO: implementation
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
run: func(ws memdb.WatchSet, s *state.StateStore) error {
return nil
// retrieve all the key metadata
snap, err := k.srv.fsm.State().Snapshot()
if err != nil {
return err
}
iter, err := snap.RootKeyMetas(ws)
if err != nil {
return err
}
for {
raw := iter.Next()
if raw == nil {
break
}
keyMeta := raw.(*structs.RootKeyMeta)
reply.Keys = append(reply.Keys, keyMeta)
}
return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta)
},
}
return k.srv.blockingRPC(&opts)
}
func (k *KeyRing) Update(args *structs.KeyringUpdateRootKeyRequest, reply *structs.KeyringUpdateRootKeyResponse) error {
if done, err := k.srv.forward("KeyRing.Update", args, args, reply); done {
// Update updates an existing key in the keyring, including both the
// key material and metadata.
func (k *Keyring) Update(args *structs.KeyringUpdateRootKeyRequest, reply *structs.KeyringUpdateRootKeyResponse) error {
if done, err := k.srv.forward("Keyring.Update", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "keyring", "update"}, time.Now())
// TODO: need to allow for servers to update keys as well, to support replication
if aclObj, err := k.srv.ResolveToken(args.AuthToken); err != nil {
return err
} else if aclObj != nil && !aclObj.IsManagement() {
return structs.ErrPermissionDenied
}
// TODO: implementation
err := k.validateUpdate(args)
if err != nil {
return err
}
// unwrap the request to turn it into a meta update only
metaReq := &structs.KeyringUpdateRootKeyMetaRequest{
RootKeyMeta: args.RootKey.Meta,
WriteRequest: args.WriteRequest,
}
// update via Raft
out, index, err := k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, metaReq)
if err != nil {
return err
}
if err, ok := out.(error); ok && err != nil {
return err
}
reply.Index = index
return nil
}
// validateUpdate validates both the request and that any change to an
// existing key is valid
func (k *Keyring) validateUpdate(args *structs.KeyringUpdateRootKeyRequest) error {
if args.RootKey.Meta == nil {
return fmt.Errorf("root key metadata is required")
}
if args.RootKey.Meta.KeyID == "" || !helper.IsUUID(args.RootKey.Meta.KeyID) {
return fmt.Errorf("root key UUID is required")
}
if args.RootKey.Meta.Algorithm == "" {
return fmt.Errorf("algorithm is required")
}
// TODO: once the encrypter is implemented
// if len(args.RootKey.Key) == 0 {
// return fmt.Errorf("root key material is required")
// }
// lookup any existing key and validate the update
snap, err := k.srv.fsm.State().Snapshot()
if err != nil {
return err
}
ws := memdb.NewWatchSet()
keyMeta, err := snap.RootKeyMetaByID(ws, args.RootKey.Meta.KeyID)
if err != nil {
return err
}
if keyMeta != nil && keyMeta.Algorithm != args.RootKey.Meta.Algorithm {
return fmt.Errorf("root key algorithm cannot be changed after a key is created")
}
return nil
}
func (k *KeyRing) Delete(args *structs.KeyringDeleteRootKeyRequest, reply *structs.KeyringDeleteRootKeyResponse) error {
if done, err := k.srv.forward("KeyRing.Delete", args, args, reply); done {
// Get retrieves an existing key from the keyring, including both the
// key material and metadata. It is used only for replication.
func (k *Keyring) Get(args *structs.KeyringGetRootKeyRequest, reply *structs.KeyringGetRootKeyResponse) error {
// ensure that only another server can make this request
err := validateTLSCertificateLevel(k.srv, k.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
if done, err := k.srv.forward("Keyring.Get", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "keyring", "get"}, time.Now())
if args.KeyID == "" {
return fmt.Errorf("root key ID is required")
}
// Setup the blocking query
opts := blockingOptions{
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
run: func(ws memdb.WatchSet, s *state.StateStore) error {
// retrieve the key metadata
snap, err := k.srv.fsm.State().Snapshot()
if err != nil {
return err
}
keyMeta, err := snap.RootKeyMetaByID(ws, args.KeyID)
if err != nil {
return err
}
if keyMeta == nil {
return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta)
}
// TODO: retrieve the key material from the keyring
key := &structs.RootKey{
Meta: keyMeta,
Key: []byte{},
}
reply.Key = key
reply.Index = keyMeta.ModifyIndex
return nil
},
}
return k.srv.blockingRPC(&opts)
}
func (k *Keyring) Delete(args *structs.KeyringDeleteRootKeyRequest, reply *structs.KeyringDeleteRootKeyResponse) error {
if done, err := k.srv.forward("Keyring.Delete", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "keyring", "delete"}, time.Now())
// TODO: need to allow for servers to delete keys as well, to support replication
if aclObj, err := k.srv.ResolveToken(args.AuthToken); err != nil {
return err
} else if aclObj != nil && !aclObj.IsManagement() {
return structs.ErrPermissionDenied
}
// TODO: implementation
if args.KeyID == "" {
return fmt.Errorf("root key ID is required")
}
// lookup any existing key and validate the delete
snap, err := k.srv.fsm.State().Snapshot()
if err != nil {
return err
}
ws := memdb.NewWatchSet()
keyMeta, err := snap.RootKeyMetaByID(ws, args.KeyID)
if err != nil {
return err
}
if keyMeta == nil {
return nil // safe to bail out early
}
if keyMeta.Active {
return fmt.Errorf("active root key cannot be deleted - call rotate first")
}
// update via Raft
out, index, err := k.srv.raftApply(structs.RootKeyMetaDeleteRequestType, args)
if err != nil {
return err
}
if err, ok := out.(error); ok && err != nil {
return err
}
reply.Index = index
return nil
}

View File

@@ -0,0 +1,275 @@
package nomad
import (
"sync"
"testing"
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/stretchr/testify/require"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
)
// TestKeyringEndpoint_CRUD exercises the basic keyring operations
func TestKeyringEndpoint_CRUD(t *testing.T) {
ci.Parallel(t)
srv, rootToken, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
codec := rpcClient(t, srv)
id := uuid.Generate()
// Upsert a new key
updateReq := &structs.KeyringUpdateRootKeyRequest{
RootKey: &structs.RootKey{
Meta: &structs.RootKeyMeta{
KeyID: id,
Algorithm: structs.EncryptionAlgorithmXChaCha20,
Active: true,
},
Key: []byte{},
},
WriteRequest: structs.WriteRequest{Region: "global"},
}
var updateResp structs.KeyringUpdateRootKeyResponse
var err error
err = msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.EqualError(t, err, structs.ErrPermissionDenied.Error())
updateReq.AuthToken = rootToken.SecretID
err = msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.NoError(t, err)
require.NotEqual(t, uint64(0), updateResp.Index)
// Get and List don't need a token here because they rely on mTLS role verification
getReq := &structs.KeyringGetRootKeyRequest{
KeyID: id,
QueryOptions: structs.QueryOptions{Region: "global"},
}
var getResp structs.KeyringGetRootKeyResponse
err = msgpackrpc.CallWithCodec(codec, "Keyring.Get", getReq, &getResp)
require.NoError(t, err)
require.Equal(t, updateResp.Index, getResp.Index)
require.Equal(t, structs.EncryptionAlgorithmXChaCha20, getResp.Key.Meta.Algorithm)
// Make a blocking query for List and wait for an Update. Note
// that List/Get queries don't need ACL tokens in the test server
// because they always pass the mTLS check
var wg sync.WaitGroup
wg.Add(1)
var listResp structs.KeyringListRootKeyMetaResponse
go func() {
defer wg.Done()
codec := rpcClient(t, srv) // not safe to share across goroutines
listReq := &structs.KeyringListRootKeyMetaRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
MinQueryIndex: getResp.Index,
},
}
err = msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp)
require.NoError(t, err)
}()
updateReq.RootKey.Meta.EncryptionsCount++
err = msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.NoError(t, err)
require.NotEqual(t, uint64(0), updateResp.Index)
// wait for the blocking query to complete and check the response
wg.Wait()
require.Greater(t, listResp.Index, getResp.Index)
require.Len(t, listResp.Keys, 1)
// Delete the key and verify that it's gone
delReq := &structs.KeyringDeleteRootKeyRequest{
KeyID: id,
WriteRequest: structs.WriteRequest{Region: "global"},
}
var delResp structs.KeyringDeleteRootKeyResponse
err = msgpackrpc.CallWithCodec(codec, "Keyring.Delete", delReq, &delResp)
require.EqualError(t, err, structs.ErrPermissionDenied.Error())
delReq.AuthToken = rootToken.SecretID
err = msgpackrpc.CallWithCodec(codec, "Keyring.Delete", delReq, &delResp)
require.EqualError(t, err, "active root key cannot be deleted - call rotate first")
// set inactive
updateReq.RootKey.Meta.Active = false
err = msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.NoError(t, err)
err = msgpackrpc.CallWithCodec(codec, "Keyring.Delete", delReq, &delResp)
require.NoError(t, err)
require.Greater(t, delResp.Index, getResp.Index)
listReq := &structs.KeyringListRootKeyMetaRequest{
QueryOptions: structs.QueryOptions{Region: "global"},
}
err = msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp)
require.NoError(t, err)
require.Greater(t, listResp.Index, getResp.Index)
require.Len(t, listResp.Keys, 0)
}
// TestKeyringEndpoint_validateUpdate exercises all the various
// validations we make for the update RPC
func TestKeyringEndpoint_InvalidUpdates(t *testing.T) {
ci.Parallel(t)
srv, rootToken, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
codec := rpcClient(t, srv)
id := uuid.Generate()
// Setup an existing key
updateReq := &structs.KeyringUpdateRootKeyRequest{
RootKey: &structs.RootKey{
Meta: &structs.RootKeyMeta{
KeyID: id,
Algorithm: structs.EncryptionAlgorithmXChaCha20,
Active: true,
},
Key: []byte{},
},
WriteRequest: structs.WriteRequest{
Region: "global",
AuthToken: rootToken.SecretID,
},
}
var updateResp structs.KeyringUpdateRootKeyResponse
err := msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.NoError(t, err)
testCases := []struct {
key *structs.RootKey
expectedErrMsg string
}{
{
key: &structs.RootKey{},
expectedErrMsg: "root key metadata is required",
},
{
key: &structs.RootKey{Meta: &structs.RootKeyMeta{}},
expectedErrMsg: "root key UUID is required",
},
{
key: &structs.RootKey{Meta: &structs.RootKeyMeta{KeyID: "invalid"}},
expectedErrMsg: "root key UUID is required",
},
{
key: &structs.RootKey{Meta: &structs.RootKeyMeta{
KeyID: id,
Algorithm: structs.EncryptionAlgorithmAES256GCM,
}},
expectedErrMsg: "root key algorithm cannot be changed after a key is created",
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.expectedErrMsg, func(t *testing.T) {
updateReq := &structs.KeyringUpdateRootKeyRequest{
RootKey: tc.key,
WriteRequest: structs.WriteRequest{
Region: "global",
AuthToken: rootToken.SecretID,
},
}
var updateResp structs.KeyringUpdateRootKeyResponse
err := msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.EqualError(t, err, tc.expectedErrMsg)
})
}
}
// TestKeyringEndpoint_Rotate exercises the key rotation logic
func TestKeyringEndpoint_Rotate(t *testing.T) {
ci.Parallel(t)
srv, rootToken, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
codec := rpcClient(t, srv)
id := uuid.Generate()
// Setup an existing key
updateReq := &structs.KeyringUpdateRootKeyRequest{
RootKey: &structs.RootKey{
Meta: &structs.RootKeyMeta{
KeyID: id,
Algorithm: structs.EncryptionAlgorithmXChaCha20,
Active: true,
},
Key: []byte{},
},
WriteRequest: structs.WriteRequest{
Region: "global",
AuthToken: rootToken.SecretID,
},
}
var updateResp structs.KeyringUpdateRootKeyResponse
err := msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp)
require.NoError(t, err)
// Rotate the key
rotateReq := &structs.KeyringRotateRootKeyRequest{
WriteRequest: structs.WriteRequest{
Region: "global",
},
}
var rotateResp structs.KeyringRotateRootKeyResponse
err = msgpackrpc.CallWithCodec(codec, "Keyring.Rotate", rotateReq, &rotateResp)
require.EqualError(t, err, structs.ErrPermissionDenied.Error())
rotateReq.AuthToken = rootToken.SecretID
err = msgpackrpc.CallWithCodec(codec, "Keyring.Rotate", rotateReq, &rotateResp)
require.NoError(t, err)
require.NotEqual(t, updateResp.Index, rotateResp.Index)
// Verify we have a new key and the old one is inactive
listReq := &structs.KeyringListRootKeyMetaRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
},
}
var listResp structs.KeyringListRootKeyMetaResponse
err = msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp)
require.NoError(t, err)
require.Greater(t, listResp.Index, updateResp.Index)
require.Len(t, listResp.Keys, 2)
for _, keyMeta := range listResp.Keys {
if keyMeta.KeyID == id {
require.False(t, keyMeta.Active, "expected old key to be inactive")
} else {
require.True(t, keyMeta.Active, "expected new key to be inactive")
}
}
// TODO: verify that Encrypter has been updated
}

View File

@@ -288,6 +288,7 @@ type endpoints struct {
Event *Event
Namespace *Namespace
SecureVariables *SecureVariables
Keyring *Keyring
ServiceRegistration *ServiceRegistration
// Client endpoints
@@ -1160,6 +1161,9 @@ func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error {
// setupRpcServer is used to populate an RPC server with endpoints
func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
// Add the static endpoints to the RPC server.
encrypter := NewEncrypter()
if s.staticEndpoints.Status == nil {
// Initialize the list just once
s.staticEndpoints.ACL = &ACL{srv: s, logger: s.logger.Named("acl")}
@@ -1176,7 +1180,8 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
s.staticEndpoints.System = &System{srv: s, logger: s.logger.Named("system")}
s.staticEndpoints.Search = &Search{srv: s, logger: s.logger.Named("search")}
s.staticEndpoints.Namespace = &Namespace{srv: s}
s.staticEndpoints.SecureVariables = &SecureVariables{srv: s, logger: s.logger.Named("secure_variables"), encrypter: NewEncrypter()}
s.staticEndpoints.SecureVariables = &SecureVariables{srv: s, logger: s.logger.Named("secure_variables"), encrypter: encrypter}
s.staticEndpoints.Enterprise = NewEnterpriseEndpoints(s)
// These endpoints are dynamic because they need access to the
@@ -1233,6 +1238,7 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
node := &Node{srv: s, ctx: ctx, logger: s.logger.Named("client")}
plan := &Plan{srv: s, ctx: ctx, logger: s.logger.Named("plan")}
serviceReg := &ServiceRegistration{srv: s, ctx: ctx}
keyringReg := &Keyring{srv: s, logger: s.logger.Named("keyring"), encrypter: encrypter}
// Register the dynamic endpoints
server.Register(alloc)
@@ -1241,6 +1247,7 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
server.Register(node)
server.Register(plan)
_ = server.Register(serviceReg)
_ = server.Register(keyringReg)
}
// setupRaft is used to setup and initialize Raft

View File

@@ -139,7 +139,7 @@ type SecureVariablesDeleteResponse struct {
// RootKey is used to encrypt and decrypt secure variables. It is
// never stored in raft.
type RootKey struct {
Meta RootKeyMeta
Meta *RootKeyMeta
Key []byte // serialized to keystore as base64 blob
}
@@ -216,6 +216,18 @@ type KeyringUpdateRootKeyResponse struct {
WriteMeta
}
// KeyringGetRootKeyRequest is used internally for key replication
// only and for keyring restores.
type KeyringGetRootKeyRequest struct {
KeyID string
QueryOptions
}
type KeyringGetRootKeyResponse struct {
Key *RootKey
QueryMeta
}
// KeyringUpdateRootKeyMetaRequest is used internally for key
// replication so that we have a request wrapper for writing the
// metadata to the FSM without including the key material