diff --git a/.semgrep/rpc_endpoint.yml b/.semgrep/rpc_endpoint.yml index af94a45ec..3100fa6fb 100644 --- a/.semgrep/rpc_endpoint.yml +++ b/.semgrep/rpc_endpoint.yml @@ -36,6 +36,15 @@ rules: if done, err := $A.$B.forward($METHOD, ...); done { return err } + # Pattern used by endpoints that support both normal ACLs and + # workload identity + - pattern-not-inside: | + if done, err := $A.$B.forward($METHOD, ...); done { + return err + } + ... + ... := $T.handleMixedAuthEndpoint(...) + ... # Pattern used by some Node endpoints. - pattern-not-inside: | if done, err := $A.$B.forward($METHOD, ...); done { diff --git a/client/allocrunner/interfaces/task_lifecycle.go b/client/allocrunner/interfaces/task_lifecycle.go index e02b1366f..9ab7eb5fc 100644 --- a/client/allocrunner/interfaces/task_lifecycle.go +++ b/client/allocrunner/interfaces/task_lifecycle.go @@ -56,6 +56,9 @@ type TaskPrestartRequest struct { // Vault token may optionally be set if a Vault token is available VaultToken string + // NomadToken token may optionally be set if a Nomad token is available + NomadToken string + // TaskDir contains the task's directory tree on the host TaskDir *allocdir.TaskDir @@ -153,6 +156,8 @@ type TaskExitedHook interface { type TaskUpdateRequest struct { VaultToken string + NomadToken string + // Alloc is the current version of the allocation (may have been // updated since the hook was created) Alloc *structs.Allocation diff --git a/client/allocrunner/taskrunner/identity_hook.go b/client/allocrunner/taskrunner/identity_hook.go new file mode 100644 index 000000000..f318b89b2 --- /dev/null +++ b/client/allocrunner/taskrunner/identity_hook.go @@ -0,0 +1,50 @@ +package taskrunner + +import ( + "context" + "sync" + + log "github.com/hashicorp/go-hclog" + + "github.com/hashicorp/nomad/client/allocrunner/interfaces" +) + +// identityHook sets the task runner's Nomad workload identity token +// based on the signed identity stored on the Allocation +type identityHook struct { + tr *TaskRunner + logger log.Logger + taskName string + lock sync.Mutex +} + +func newIdentityHook(tr *TaskRunner, logger log.Logger) *identityHook { + h := &identityHook{ + tr: tr, + taskName: tr.taskName, + } + h.logger = logger.Named(h.Name()) + return h +} + +func (*identityHook) Name() string { + return "identity" +} + +func (h *identityHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { + h.lock.Lock() + defer h.lock.Unlock() + + token := h.tr.alloc.SignedIdentities[h.taskName] + h.tr.setNomadToken(token) + return nil +} + +func (h *identityHook) Update(_ context.Context, req *interfaces.TaskUpdateRequest, _ *interfaces.TaskUpdateResponse) error { + h.lock.Lock() + defer h.lock.Unlock() + + token := h.tr.alloc.SignedIdentities[h.taskName] + h.tr.setNomadToken(token) + return nil +} diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 95c87440e..981998b4f 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -187,6 +187,11 @@ type TaskRunner struct { vaultToken string vaultTokenLock sync.Mutex + // nomadToken is the current Nomad workload identity token. It + // should be accessed with the getter. + nomadToken string + nomadTokenLock sync.Mutex + // baseLabels are used when emitting tagged metrics. All task runner metrics // will have these tags, and optionally more. baseLabels []metrics.Label diff --git a/client/allocrunner/taskrunner/task_runner_getters.go b/client/allocrunner/taskrunner/task_runner_getters.go index 4d9c35e6e..4207bfb36 100644 --- a/client/allocrunner/taskrunner/task_runner_getters.go +++ b/client/allocrunner/taskrunner/task_runner_getters.go @@ -76,6 +76,18 @@ func (tr *TaskRunner) setVaultToken(token string) { tr.envBuilder.SetVaultToken(token, ns, tr.task.Vault.Env) } +func (tr *TaskRunner) getNomadToken() string { + tr.nomadTokenLock.Lock() + defer tr.nomadTokenLock.Unlock() + return tr.nomadToken +} + +func (tr *TaskRunner) setNomadToken(token string) { + tr.nomadTokenLock.Lock() + defer tr.nomadTokenLock.Unlock() + tr.nomadToken = token +} + // getDriverHandle returns a driver handle. func (tr *TaskRunner) getDriverHandle() *DriverHandle { tr.handleLock.Lock() diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 0789dd184..5c15e3a79 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -61,6 +61,7 @@ func (tr *TaskRunner) initHooks() { tr.runnerHooks = []interfaces.TaskHook{ newValidateHook(tr.clientConfig, hookLogger), newTaskDirHook(tr, hookLogger), + newIdentityHook(tr, hookLogger), newLogMonHook(tr, hookLogger), newDispatchHook(alloc, hookLogger), newVolumeHook(tr, hookLogger), @@ -243,6 +244,7 @@ func (tr *TaskRunner) prestart() error { } req.VaultToken = tr.getVaultToken() + req.NomadToken = tr.getNomadToken() // Time the prestart hook var start time.Time diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index d6c718196..37c3f1da6 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -105,6 +105,9 @@ type TaskTemplateManagerConfig struct { // NomadNamespace is the Nomad namespace for the task NomadNamespace string + + // NomadToken is the Nomad token or identity claim for the task + NomadToken string } // Validate validates the configuration. @@ -813,9 +816,7 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, // Set up Nomad conf.Nomad.Namespace = &config.NomadNamespace conf.Nomad.Transport.CustomDialer = cc.TemplateDialer - - // Use the Node's SecretID to authenticate Nomad template function calls. - conf.Nomad.Token = &cc.Node.SecretID + conf.Nomad.Token = &config.NomadToken conf.Finalize() return conf, nil diff --git a/client/allocrunner/taskrunner/template_hook.go b/client/allocrunner/taskrunner/template_hook.go index a5ad9f8fd..cdc6ee16f 100644 --- a/client/allocrunner/taskrunner/template_hook.go +++ b/client/allocrunner/taskrunner/template_hook.go @@ -63,6 +63,9 @@ type templateHook struct { // vaultNamespace is the current Vault namespace vaultNamespace string + // nomadToken is the current Nomad token + nomadToken string + // taskDir is the task directory taskDir string } @@ -91,6 +94,7 @@ func (h *templateHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar // Store the current Vault token and the task directory h.taskDir = req.TaskDir.Dir h.vaultToken = req.VaultToken + h.nomadToken = req.NomadToken // Set vault namespace if specified if req.Task.Vault != nil { @@ -126,6 +130,7 @@ func (h *templateHook) newManager() (unblock chan struct{}, err error) { EnvBuilder: h.config.envBuilder, MaxTemplateEventRate: template.DefaultMaxTemplateEventRate, NomadNamespace: h.config.nomadNamespace, + NomadToken: h.nomadToken, }) if err != nil { h.logger.Error("failed to create template manager", "error", err) @@ -158,11 +163,12 @@ func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateReq return nil } - // Check if the Vault token has changed - if req.VaultToken == h.vaultToken { + // Check if either the Nomad or Vault tokens have changed + if req.VaultToken == h.vaultToken && req.NomadToken == h.nomadToken { return nil } else { h.vaultToken = req.VaultToken + h.nomadToken = req.NomadToken } // Shutdown the old template diff --git a/go.mod b/go.mod index 6b9ddb56e..223839c2a 100644 --- a/go.mod +++ b/go.mod @@ -193,7 +193,7 @@ require ( github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/gojuno/minimock/v3 v3.0.6 // indirect - github.com/golang-jwt/jwt/v4 v4.0.0 // indirect + github.com/golang-jwt/jwt/v4 v4.4.1 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/btree v1.0.0 // indirect github.com/google/go-querystring v0.0.0-20170111101155-53e6ce116135 // indirect diff --git a/go.sum b/go.sum index 7e8d1b3cd..954231827 100644 --- a/go.sum +++ b/go.sum @@ -527,8 +527,9 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69 github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI= github.com/gojuno/minimock/v3 v3.0.6 h1:YqHcVR10x2ZvswPK8Ix5yk+hMpspdQ3ckSpkOzyF85I= github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY= -github.com/golang-jwt/jwt/v4 v4.0.0 h1:RAqyYixv1p7uEnocuy8P1nru5wprCh/MH2BIlW5z5/o= github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= +github.com/golang-jwt/jwt/v4 v4.4.1 h1:pC5DB52sCeK48Wlb9oPcdhnjkz1TKt1D/P7WKJ0kUcQ= +github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= diff --git a/nomad/acl.go b/nomad/acl.go index 90ecb417b..d40e37c49 100644 --- a/nomad/acl.go +++ b/nomad/acl.go @@ -35,6 +35,10 @@ func (s *Server) ResolveToken(secretID string) (*acl.ACL, error) { return resolveTokenFromSnapshotCache(snap, s.aclCache, secretID) } +func (s *Server) ResolveClaim(token string) (*structs.IdentityClaims, error) { + return s.encrypter.VerifyClaim(token) +} + // resolveTokenFromSnapshotCache is used to resolve an ACL object from a snapshot of state, // using a cache to avoid parsing and ACL construction when possible. It is split from resolveToken // to simplify testing. diff --git a/nomad/encrypter.go b/nomad/encrypter.go index 298418543..16ea17c53 100644 --- a/nomad/encrypter.go +++ b/nomad/encrypter.go @@ -5,6 +5,7 @@ import ( "context" "crypto/aes" "crypto/cipher" + "crypto/ed25519" "encoding/base64" "encoding/json" "fmt" @@ -15,6 +16,7 @@ import ( "sync" "time" + jwt "github.com/golang-jwt/jwt/v4" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-msgpack/codec" "golang.org/x/time/rate" @@ -27,15 +29,22 @@ const nomadKeystoreExtension = ".nks.json" // Encrypter is the keyring for secure variables. type Encrypter struct { - lock sync.RWMutex - keys map[string]*structs.RootKey // map of key IDs to key material - ciphers map[string]cipher.AEAD // map of key IDs to ciphers + srv *Server keystorePath string + + keyring map[string]*keyset + lock sync.RWMutex +} + +type keyset struct { + rootKey *structs.RootKey + cipher cipher.AEAD + privateKey ed25519.PrivateKey } // NewEncrypter loads or creates a new local keystore and returns an // encryption keyring with the keys it finds. -func NewEncrypter(keystorePath string) (*Encrypter, error) { +func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) { err := os.MkdirAll(keystorePath, 0700) if err != nil { return nil, err @@ -44,14 +53,14 @@ func NewEncrypter(keystorePath string) (*Encrypter, error) { if err != nil { return nil, err } + encrypter.srv = srv return encrypter, nil } func encrypterFromKeystore(keystoreDirectory string) (*Encrypter, error) { encrypter := &Encrypter{ - ciphers: make(map[string]cipher.AEAD), - keys: make(map[string]*structs.RootKey), + keyring: make(map[string]*keyset), keystorePath: keystoreDirectory, } @@ -107,6 +116,62 @@ func (e *Encrypter) Encrypt(unencryptedData []byte, keyID string) []byte { return unencryptedData } +// keyIDHeader is the JWT header for the Nomad Key ID used to sign the +// claim. This name matches the common industry practice for this +// header name. +const keyIDHeader = "kid" + +// SignClaims signs the identity claim for the task and returns an +// encoded JWT with both the claim and its signature +func (e *Encrypter) SignClaims(claim *structs.IdentityClaims) (string, error) { + + keyset, err := e.activeKeySet() + if err != nil { + return "", err + } + + token := jwt.NewWithClaims(&jwt.SigningMethodEd25519{}, claim) + token.Header[keyIDHeader] = keyset.rootKey.Meta.KeyID + + tokenString, err := token.SignedString(keyset.privateKey) + if err != nil { + return "", err + } + + return tokenString, nil +} + +// VerifyClaim accepts a previously-signed encoded claim and validates +// it before returning the claim +func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, error) { + + token, err := jwt.ParseWithClaims(tokenString, &structs.IdentityClaims{}, func(token *jwt.Token) (interface{}, error) { + if _, ok := token.Method.(*jwt.SigningMethodEd25519); !ok { + return nil, fmt.Errorf("unexpected signing method: %v", token.Method.Alg()) + } + raw := token.Header[keyIDHeader] + if raw == nil { + return nil, fmt.Errorf("missing key ID header") + } + keyID := raw.(string) + keyset, err := e.keysetByID(keyID) + if err != nil { + return nil, err + } + return keyset.privateKey.Public(), nil + }) + + if err != nil { + return nil, fmt.Errorf("failed to verify token: %v", err) + } + + claims, ok := token.Claims.(*structs.IdentityClaims) + if !ok || !token.Valid { + return nil, fmt.Errorf("failed to verify token: invalid token") + } + return claims, nil +} + // Decrypt takes an encrypted buffer and then root key ID. It extracts // the nonce, decrypts the content, and returns the cleartext data. func (e *Encrypter) Decrypt(encryptedData []byte, keyID string) ([]byte, error) { @@ -150,22 +215,46 @@ func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { return fmt.Errorf("invalid algorithm %s", rootKey.Meta.Algorithm) } + privateKey := ed25519.NewKeyFromSeed(rootKey.Key) + e.lock.Lock() defer e.lock.Unlock() - e.ciphers[rootKey.Meta.KeyID] = aead - e.keys[rootKey.Meta.KeyID] = rootKey + e.keyring[rootKey.Meta.KeyID] = &keyset{ + rootKey: rootKey, + cipher: aead, + privateKey: privateKey, + } return nil } // GetKey retrieves the key material by ID from the keyring func (e *Encrypter) GetKey(keyID string) ([]byte, error) { + keyset, err := e.keysetByID(keyID) + if err != nil { + return nil, err + } + return keyset.rootKey.Key, nil +} + +func (e *Encrypter) activeKeySet() (*keyset, error) { + store := e.srv.fsm.State() + keyMeta, err := store.GetActiveRootKeyMeta(nil) + if err != nil { + return nil, err + } + + return e.keysetByID(keyMeta.KeyID) +} + +func (e *Encrypter) keysetByID(keyID string) (*keyset, error) { e.lock.RLock() defer e.lock.RUnlock() - key, ok := e.keys[keyID] + + keyset, ok := e.keyring[keyID] if !ok { - return []byte{}, fmt.Errorf("no such key %s in keyring", keyID) + return nil, fmt.Errorf("no such key %s in keyring", keyID) } - return key.Key, nil + return keyset, nil } // RemoveKey removes a key by ID from the keyring @@ -175,8 +264,7 @@ func (e *Encrypter) RemoveKey(keyID string) error { // remove the serialized file? e.lock.Lock() defer e.lock.Unlock() - delete(e.ciphers, keyID) - delete(e.keys, keyID) + delete(e.keyring, keyID) return nil } @@ -212,6 +300,7 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { if err := json.Unmarshal(raw, storedKey); err != nil { return nil, err } + meta := &structs.RootKeyMeta{ Active: storedKey.Meta.Active, KeyID: storedKey.Meta.KeyID, @@ -231,7 +320,6 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { Meta: meta, Key: key, }, nil - } type KeyringReplicator struct { diff --git a/nomad/encrypter_test.go b/nomad/encrypter_test.go index 677bd06e4..895fda38d 100644 --- a/nomad/encrypter_test.go +++ b/nomad/encrypter_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" ) @@ -20,7 +21,7 @@ func TestEncrypter_LoadSave(t *testing.T) { ci.Parallel(t) tmpDir := t.TempDir() - encrypter, err := NewEncrypter(tmpDir) + encrypter, err := NewEncrypter(nil, tmpDir) require.NoError(t, err) algos := []structs.EncryptionAlgorithm{ @@ -270,5 +271,29 @@ func TestKeyringReplicator(t *testing.T) { require.Eventually(t, checkReplicationFn(keyID3), time.Second*5, time.Second, "expected keys to be replicated to followers after election") - +} + +func TestEncrypter_SignVerify(t *testing.T) { + + ci.Parallel(t) + srv, shutdown := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer shutdown() + testutil.WaitForLeader(t, srv.RPC) + + alloc := mock.Alloc() + claim := alloc.ToTaskIdentityClaims("web") + e := srv.encrypter + + out, err := e.SignClaims(claim) + require.NoError(t, err) + + got, err := e.VerifyClaim(out) + require.NoError(t, err) + require.NotNil(t, got) + require.NoError(t, got.Valid()) + require.Equal(t, alloc.ID, got.AllocationID) + require.Equal(t, alloc.JobID, got.JobID) + require.Equal(t, "web", got.TaskName) } diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 58002b225..ced8198ef 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -242,6 +242,11 @@ func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap // to approximate the scheduling time. updateAllocTimestamps(req.AllocsUpdated, now) + err := p.signAllocIdentities(plan.Job, req.AllocsUpdated) + if err != nil { + return nil, err + } + for _, preemptions := range result.NodePreemptions { for _, preemptedAlloc := range preemptions { req.AllocsPreempted = append(req.AllocsPreempted, normalizePreemptedAlloc(preemptedAlloc, now)) @@ -367,6 +372,25 @@ func updateAllocTimestamps(allocations []*structs.Allocation, timestamp int64) { } } +func (p *planner) signAllocIdentities(job *structs.Job, allocations []*structs.Allocation) error { + + encrypter := p.Server.encrypter + + for _, alloc := range allocations { + alloc.SignedIdentities = map[string]string{} + tg := job.LookupTaskGroup(alloc.TaskGroup) + for _, task := range tg.Tasks { + claims := alloc.ToTaskIdentityClaims(task.Name) + token, err := encrypter.SignClaims(claims) + if err != nil { + return err + } + alloc.SignedIdentities[task.Name] = token + } + } + return nil +} + // asyncPlanWait is used to apply and respond to a plan async. On successful // commit the plan's index will be sent on the chan. On error the chan will be // closed. diff --git a/nomad/secure_variables_endpoint.go b/nomad/secure_variables_endpoint.go index c1d04f025..09f871425 100644 --- a/nomad/secure_variables_endpoint.go +++ b/nomad/secure_variables_endpoint.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "net/http" + "strings" "time" metrics "github.com/armon/go-metrics" @@ -11,6 +12,7 @@ import ( memdb "github.com/hashicorp/go-memdb" multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/acl" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state/paginator" "github.com/hashicorp/nomad/nomad/structs" @@ -128,13 +130,11 @@ func (sv *SecureVariables) Read(args *structs.SecureVariablesReadRequest, reply } defer metrics.MeasureSince([]string{"nomad", "secure_variables", "read"}, time.Now()) - if aclObj, err := sv.srv.ResolveToken(args.AuthToken); err != nil { + // FIXME: Temporary ACL Test policy. Update once implementation complete + err := sv.handleMixedAuthEndpoint(args.QueryOptions, + acl.NamespaceCapabilitySubmitJob, args.Path) + if err != nil { return err - } else if aclObj != nil { - // FIXME: Temporary ACL Test policy. Update once implementation complete - if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { - return structs.ErrPermissionDenied - } } // Setup the blocking query @@ -180,13 +180,14 @@ func (sv *SecureVariables) List( return sv.listAllSecureVariables(args, reply) } - if aclObj, err := sv.srv.ResolveToken(args.AuthToken); err != nil { + // FIXME: Temporary ACL Test policy. Update once implementation complete + err := sv.handleMixedAuthEndpoint(args.QueryOptions, + acl.NamespaceCapabilitySubmitJob, args.Prefix) + if err != nil { + return err + } + if err != nil { return err - } else if aclObj != nil { - // FIXME: Temporary ACL Test policy. Update once implementation complete - if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { - return structs.ErrPermissionDenied - } } // Set up and return the blocking query. @@ -367,3 +368,95 @@ func (sv *SecureVariables) decrypt(v *structs.SecureVariable) error { v.EncryptedData = nil return nil } + +// handleMixedAuthEndpoint is a helper to handle auth on RPC endpoints that can +// either be called by external clients or by workload identity +func (sv *SecureVariables) handleMixedAuthEndpoint(args structs.QueryOptions, cap, pathOrPrefix string) error { + + // Perform the initial token resolution. + aclObj, err := sv.srv.ResolveToken(args.AuthToken) + if err == nil { + // Perform our ACL validation. If the object is nil, this means ACLs + // are not enabled, otherwise trigger the allowed namespace function. + if aclObj != nil { + if !aclObj.AllowNsOp(args.RequestNamespace(), cap) { + return structs.ErrPermissionDenied + } + } + return nil + } + if helper.IsUUID(args.AuthToken) { + // early return for ErrNotFound or other errors if it's formed + // like an ACLToken.SecretID + return err + } + + // Attempt to verify the token as a JWT with a workload + // identity claim + claim, err := sv.srv.ResolveClaim(args.AuthToken) + if err != nil { + return structs.ErrPermissionDenied + } + + alloc, err := sv.authValidateAllocationIdentity(claim.AllocationID, args.RequestNamespace()) + if err != nil { + metrics.IncrCounter([]string{ + "nomad", "secure_variables", "invalid_allocation_identity"}, 1) + sv.logger.Trace("allocation identity was not valid", "error", err) + return structs.ErrPermissionDenied + } + + err = sv.authValidatePrefix(alloc, claim.TaskName, pathOrPrefix) + if err != nil { + sv.logger.Trace("allocation identity did not have permission for path", "error", err) + return structs.ErrPermissionDenied + } + + return nil +} + +// authValidateAllocationIdentity asserts that the allocation ID +// belongs to a non-terminal Allocation in the requested namespace +func (sv *SecureVariables) authValidateAllocationIdentity(allocID, ns string) (*structs.Allocation, error) { + + store, err := sv.srv.fsm.State().Snapshot() + if err != nil { + return nil, err + } + alloc, err := store.AllocByID(nil, allocID) + if err != nil { + return nil, err + } + if alloc == nil || alloc.Job == nil { + return nil, fmt.Errorf("allocation does not exist") + } + + // the claims for terminal allocs are always treated as expired + if alloc.TerminalStatus() { + return nil, fmt.Errorf("allocation is terminal") + } + + if alloc.Job.Namespace != ns { + return nil, fmt.Errorf("allocation is in another namespace") + } + + return alloc, nil +} + +// authValidatePrefix asserts that the requested path is valid for +// this allocation +func (sv *SecureVariables) authValidatePrefix(alloc *structs.Allocation, taskName, pathOrPrefix string) error { + + parts := strings.Split(pathOrPrefix, "/") + expect := []string{"jobs", alloc.Job.ID, alloc.TaskGroup, taskName} + if len(parts) > len(expect) { + return structs.ErrPermissionDenied + } + + for idx, part := range parts { + if part != expect[idx] { + return structs.ErrPermissionDenied + } + } + return nil +} diff --git a/nomad/secure_variables_endpoint_test.go b/nomad/secure_variables_endpoint_test.go new file mode 100644 index 000000000..79beb39b1 --- /dev/null +++ b/nomad/secure_variables_endpoint_test.go @@ -0,0 +1,148 @@ +package nomad + +import ( + "fmt" + "math/rand" + "strings" + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" +) + +func TestSecureVariablesEndpoint_auth(t *testing.T) { + + ci.Parallel(t) + srv, _, shutdown := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer shutdown() + testutil.WaitForLeader(t, srv.RPC) + + const ns = "nondefault-namespace" + + alloc := mock.Alloc() + alloc.ClientStatus = structs.AllocClientStatusFailed + alloc.Job.Namespace = ns + jobID := alloc.JobID + + store := srv.fsm.State() + require.NoError(t, store.UpsertAllocs( + structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc})) + + claim := alloc.ToTaskIdentityClaims("web") + e := srv.encrypter + + idToken, err := e.SignClaims(claim) + require.NoError(t, err) + + // corrupt the signature of the token + idTokenParts := strings.Split(idToken, ".") + require.Len(t, idTokenParts, 3) + sig := []string(strings.Split(idTokenParts[2], "")) + rand.Shuffle(len(sig), func(i, j int) { + sig[i], sig[j] = sig[j], sig[i] + }) + idTokenParts[2] = strings.Join(sig, "") + invalidIDToken := strings.Join(idTokenParts, ".") + + t.Run("terminal alloc should be denied", func(t *testing.T) { + err = srv.staticEndpoints.SecureVariables.handleMixedAuthEndpoint( + structs.QueryOptions{AuthToken: idToken, Namespace: ns}, "n/a", + fmt.Sprintf("jobs/%s/web/web", jobID)) + require.EqualError(t, err, structs.ErrPermissionDenied.Error()) + }) + + // make alloc non-terminal + alloc.ClientStatus = structs.AllocClientStatusRunning + require.NoError(t, store.UpsertAllocs( + structs.MsgTypeTestSetup, 1200, []*structs.Allocation{alloc})) + + t.Run("wrong namespace should be denied", func(t *testing.T) { + err = srv.staticEndpoints.SecureVariables.handleMixedAuthEndpoint( + structs.QueryOptions{AuthToken: idToken, Namespace: structs.DefaultNamespace}, "n/a", + fmt.Sprintf("jobs/%s/web/web", jobID)) + require.EqualError(t, err, structs.ErrPermissionDenied.Error()) + }) + + testCases := []struct { + name string + token string + cap string + path string + expectedErr error + }{ + { + name: "valid claim for path with task secret", + token: idToken, + cap: "n/a", + path: fmt.Sprintf("jobs/%s/web/web", jobID), + expectedErr: nil, + }, + { + name: "valid claim for path with group secret", + token: idToken, + cap: "n/a", + path: fmt.Sprintf("jobs/%s/web", jobID), + expectedErr: nil, + }, + { + name: "valid claim for path with job secret", + token: idToken, + cap: "n/a", + path: fmt.Sprintf("jobs/%s", jobID), + expectedErr: nil, + }, + { + name: "valid claim for path with namespace secret", + token: idToken, + cap: "n/a", + path: "jobs", + expectedErr: nil, + }, + { + name: "extra trailing slash is denied", + token: idToken, + cap: "n/a", + path: fmt.Sprintf("jobs/%s/web/", jobID), + expectedErr: structs.ErrPermissionDenied, + }, + { + name: "invalid prefix is denied", + token: idToken, + cap: "n/a", + path: fmt.Sprintf("jobs/%s/w", jobID), + expectedErr: structs.ErrPermissionDenied, + }, + { + name: "missing auth token is denied", + cap: "n/a", + path: fmt.Sprintf("jobs/%s/web/web", jobID), + expectedErr: structs.ErrPermissionDenied, + }, + { + name: "invalid signature is denied", + token: invalidIDToken, + cap: "n/a", + path: fmt.Sprintf("jobs/%s/web/web", jobID), + expectedErr: structs.ErrPermissionDenied, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + err := srv.staticEndpoints.SecureVariables.handleMixedAuthEndpoint( + structs.QueryOptions{AuthToken: tc.token, Namespace: ns}, tc.cap, tc.path) + if tc.expectedErr == nil { + require.NoError(t, err) + } else { + require.EqualError(t, err, tc.expectedErr.Error()) + } + }) + } + +} diff --git a/nomad/server.go b/nomad/server.go index ad6db00c9..6e9687955 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -394,7 +394,7 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigEntr } // Set up the keyring - encrypter, err := NewEncrypter(filepath.Join(s.config.DataDir, "keystore")) + encrypter, err := NewEncrypter(s, filepath.Join(s.config.DataDir, "keystore")) if err != nil { return nil, err } diff --git a/nomad/service_registration_endpoint.go b/nomad/service_registration_endpoint.go index f417a8c85..9428707df 100644 --- a/nomad/service_registration_endpoint.go +++ b/nomad/service_registration_endpoint.go @@ -11,6 +11,7 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/acl" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state/paginator" "github.com/hashicorp/nomad/nomad/structs" @@ -528,27 +529,46 @@ func (s *ServiceRegistration) handleMixedAuthEndpoint(args structs.QueryOptions, } } default: + // Attempt to verify the token as a JWT with a workload + // identity claim if it's not a secret ID. + // COMPAT(1.4.0): we can remove this conditional in 1.5.0 + if !helper.IsUUID(args.AuthToken) { + claims, err := s.srv.ResolveClaim(args.AuthToken) + if err != nil { + return err + } + if claims == nil { + return structs.ErrPermissionDenied + } + return nil + } + + // COMPAT(1.4.0): Nomad 1.3.0 shipped with authentication by + // node secret but that's been replaced with workload identity + // in 1.4.0. Leave this here for backwards compatibility + // between clients and servers during cluster upgrades, but + // remove for 1.5.0 + // In the event we got any error other than ErrTokenNotFound, consider this // terminal. if err != structs.ErrTokenNotFound { return err } - // Attempt to lookup AuthToken as a Node.SecretID and return any error - // wrapped along with the original. + // Attempt to lookup AuthToken as a Node.SecretID and + // return any error wrapped along with the original. node, stateErr := s.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) if stateErr != nil { var mErr multierror.Error mErr.Errors = append(mErr.Errors, err, stateErr) return mErr.ErrorOrNil() } - // At this point, we do not have a valid ACL token, nor are we being // called, or able to confirm via the state store, by a node. if node == nil { return structs.ErrTokenNotFound } - } + } return nil } diff --git a/nomad/service_registration_endpoint_test.go b/nomad/service_registration_endpoint_test.go index 34c76eed6..1f047508b 100644 --- a/nomad/service_registration_endpoint_test.go +++ b/nomad/service_registration_endpoint_test.go @@ -858,7 +858,66 @@ func TestServiceRegistration_List(t *testing.T) { }}, }, serviceRegResp.Services) }, - name: "ACLs enabled with node secret toekn", + name: "ACLs enabled with node secret token", + }, + { + serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) { + return TestACLServer(t, nil) + }, + testFn: func(t *testing.T, s *Server, token *structs.ACLToken) { + codec := rpcClient(t, s) + testutil.WaitForLeader(t, s.RPC) + + // Create a namespace as this is needed when using an ACL like + // we do in this test. + ns := &structs.Namespace{ + Name: "platform", + Description: "test namespace", + CreateIndex: 5, + ModifyIndex: 5, + } + ns.SetHash() + require.NoError(t, s.State().UpsertNamespaces(5, []*structs.Namespace{ns})) + + // Generate an allocation with a signed identity + allocs := []*structs.Allocation{mock.Alloc()} + job := allocs[0].Job + require.NoError(t, s.State().UpsertJob(structs.MsgTypeTestSetup, 10, job)) + s.signAllocIdentities(job, allocs) + require.NoError(t, s.State().UpsertAllocs(structs.MsgTypeTestSetup, 15, allocs)) + + signedToken := allocs[0].SignedIdentities["web"] + + // Generate and upsert some service registrations. + services := mock.ServiceRegistrations() + require.NoError(t, s.State().UpsertServiceRegistrations( + structs.MsgTypeTestSetup, 20, services)) + + // Test a request while setting the auth token to the signed token + serviceRegReq := &structs.ServiceRegistrationListRequest{ + QueryOptions: structs.QueryOptions{ + Namespace: "platform", + Region: DefaultRegion, + AuthToken: signedToken, + }, + } + var serviceRegResp structs.ServiceRegistrationListResponse + err := msgpackrpc.CallWithCodec( + codec, structs.ServiceRegistrationListRPCMethod, + serviceRegReq, &serviceRegResp) + require.NoError(t, err) + require.ElementsMatch(t, []*structs.ServiceRegistrationListStub{ + { + Namespace: "platform", + Services: []*structs.ServiceRegistrationStub{ + { + ServiceName: "countdash-api", + Tags: []string{"bar"}, + }, + }}, + }, serviceRegResp.Services) + }, + name: "ACLs enabled with valid signed identity", }, } @@ -1023,6 +1082,7 @@ func TestServiceRegistration_GetService(t *testing.T) { }, name: "ACLs enabled", }, + { serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) { return TestACLServer(t, nil) @@ -1075,6 +1135,50 @@ func TestServiceRegistration_GetService(t *testing.T) { }, name: "ACLs enabled using node secret", }, + { + serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) { + return TestACLServer(t, nil) + }, + testFn: func(t *testing.T, s *Server, token *structs.ACLToken) { + codec := rpcClient(t, s) + testutil.WaitForLeader(t, s.RPC) + + // Generate mock services then upsert them individually using different indexes. + services := mock.ServiceRegistrations() + + require.NoError(t, s.fsm.State().UpsertServiceRegistrations( + structs.MsgTypeTestSetup, 10, []*structs.ServiceRegistration{services[0]})) + + require.NoError(t, s.fsm.State().UpsertServiceRegistrations( + structs.MsgTypeTestSetup, 20, []*structs.ServiceRegistration{services[1]})) + + // Generate an allocation with a signed identity + allocs := []*structs.Allocation{mock.Alloc()} + job := allocs[0].Job + require.NoError(t, s.State().UpsertJob(structs.MsgTypeTestSetup, 10, job)) + s.signAllocIdentities(job, allocs) + require.NoError(t, s.State().UpsertAllocs(structs.MsgTypeTestSetup, 15, allocs)) + + signedToken := allocs[0].SignedIdentities["web"] + + // Lookup the first registration. + serviceRegReq := &structs.ServiceRegistrationByNameRequest{ + ServiceName: services[0].ServiceName, + QueryOptions: structs.QueryOptions{ + Namespace: services[0].Namespace, + Region: s.Region(), + AuthToken: signedToken, + }, + } + var serviceRegResp structs.ServiceRegistrationByNameResponse + err := msgpackrpc.CallWithCodec(codec, structs.ServiceRegistrationGetServiceRPCMethod, serviceRegReq, &serviceRegResp) + require.NoError(t, err) + require.Equal(t, uint64(10), serviceRegResp.Services[0].CreateIndex) + require.Equal(t, uint64(20), serviceRegResp.Index) + require.Len(t, serviceRegResp.Services, 1) + }, + name: "ACLs enabled using valid signed identity", + }, { serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) { server, cleanup := TestServer(t, nil) diff --git a/nomad/structs/generate.sh b/nomad/structs/generate.sh index c30a6fe32..024e55a75 100755 --- a/nomad/structs/generate.sh +++ b/nomad/structs/generate.sh @@ -8,4 +8,5 @@ codecgen \ -d 100 \ -t codegen_generated \ -o structs.generated.go \ + -nr="^IdentityClaims$" \ ${FILES} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index fc9b39417..209baab7f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -24,6 +24,7 @@ import ( "strings" "time" + jwt "github.com/golang-jwt/jwt/v4" "github.com/hashicorp/nomad/helper/escapingfs" "golang.org/x/crypto/blake2b" @@ -9593,6 +9594,11 @@ type Allocation struct { // to stop running because it got preempted PreemptedByAllocation string + // SignedIdentities is a map of task names to signed + // identity/capability claim tokens for those tasks. If needed, it + // is populated in the plan applier + SignedIdentities map[string]string `json:"-"` + // Raft Indexes CreateIndex uint64 ModifyIndex uint64 @@ -10287,6 +10293,42 @@ func (a *Allocation) Reconnected() (bool, bool) { return true, a.Expired(lastReconnect) } +func (a *Allocation) ToIdentityClaims() *IdentityClaims { + now := jwt.NewNumericDate(time.Now().UTC()) + return &IdentityClaims{ + Namespace: a.Namespace, + JobID: a.JobID, + AllocationID: a.ID, + RegisteredClaims: jwt.RegisteredClaims{ + // TODO: in Nomad 1.5.0 we'll have a refresh loop to + // prevent allocation identities from expiring before the + // allocation is terminal. Once that's implemented, add an + // ExpiresAt here ExpiresAt: &jwt.NumericDate{}, + NotBefore: now, + IssuedAt: now, + }, + } +} + +func (a *Allocation) ToTaskIdentityClaims(taskName string) *IdentityClaims { + claims := a.ToIdentityClaims() + if claims != nil { + claims.TaskName = taskName + } + return claims +} + +// IdentityClaims are the input to a JWT identifying a workload. It +// should never be serialized to msgpack unsigned. +type IdentityClaims struct { + Namespace string `json:"nomad_namespace"` + JobID string `json:"nomad_job_id"` + AllocationID string `json:"nomad_allocation_id"` + TaskName string `json:"nomad_task"` + + jwt.RegisteredClaims +} + // AllocationDiff is another named type for Allocation (to use the same fields), // which is used to represent the delta for an Allocation. If you need a method // defined on the al