From 484f91b893c6f054e9339f8db6bd157429c2ef20 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 12 Oct 2023 16:43:11 -0400 Subject: [PATCH] auth: remove "mixed auth" special casing for Variables endpoint (#18744) The RPC handlers expect to see `nil` ACL objects whenever ACLs are disabled. By using `nil` as a sentinel value, we have the risk of nil pointer exceptions and improper handling of `nil` when returned from our various auth methods that can lead to privilege escalation bugs. This is the third in a series to eliminate the use of `nil` ACLs as a sentinel value for when ACLs are disabled. This patch involves leveraging the refactored `auth` package to remove the weird "mixed auth" helper functions that only support the Variables read/list RPC handlers. Instead, pass the ACL object and claim together into the `AllowVariableOperations` method in the usual `acl` package. Ref: https://github.com/hashicorp/nomad-enterprise/pull/1218 Ref: https://github.com/hashicorp/nomad/pull/18703 Ref: https://github.com/hashicorp/nomad/pull/18715 Ref: https://github.com/hashicorp/nomad/pull/16799 Ref: https://github.com/hashicorp/nomad/pull/18730 Fixes: https://github.com/hashicorp/nomad/issues/15875 --- acl/acl.go | 10 +++ nomad/acl.go | 8 -- nomad/acl_test.go | 117 --------------------------- nomad/auth/auth.go | 70 +++++++++++----- nomad/auth/auth_test.go | 62 ++++++++++++-- nomad/variables_endpoint.go | 135 +++++-------------------------- nomad/variables_endpoint_test.go | 16 ++-- 7 files changed, 147 insertions(+), 271 deletions(-) diff --git a/acl/acl.go b/acl/acl.go index 44c807b1c..c4c9a2370 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -493,6 +493,11 @@ func (a *ACL) AllowHostVolume(ns string) bool { } func (a *ACL) AllowVariableOperation(ns, path, op string, claim *ACLClaim) bool { + if a == nil { + // ACL is nil only if ACLs are disabled + // TODO(tgross): return false when there are no nil ACLs + return true + } if a.management { return true } @@ -517,6 +522,11 @@ type ACLClaim struct { // a variables path for the namespace, with an expectation that the actual // search result will be filtered by specific paths func (a *ACL) AllowVariableSearch(ns string) bool { + if a == nil { + // ACL is nil only if ACLs are disabled + // TODO(tgross): return false when there are no nil ACLs + return true + } if a.management { return true } diff --git a/nomad/acl.go b/nomad/acl.go index 39157d3e3..215df5d98 100644 --- a/nomad/acl.go +++ b/nomad/acl.go @@ -40,14 +40,6 @@ func (srv *Server) ResolvePoliciesForClaims(claims *structs.IdentityClaims) ([]* return srv.auth.ResolvePoliciesForClaims(claims) } -func (srv *Server) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) { - return srv.auth.ResolveACLForToken(aclToken) -} - func (srv *Server) ResolveSecretToken(secretID string) (*structs.ACLToken, error) { return srv.auth.ResolveSecretToken(secretID) } - -func (srv *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) { - return srv.auth.ResolveClaims(claims) -} diff --git a/nomad/acl_test.go b/nomad/acl_test.go index 5e1e1de7f..d4886643b 100644 --- a/nomad/acl_test.go +++ b/nomad/acl_test.go @@ -686,120 +686,3 @@ func TestResolveSecretToken(t *testing.T) { }) } } - -func TestResolveClaims(t *testing.T) { - ci.Parallel(t) - - srv, _, cleanup := TestACLServer(t, nil) - defer cleanup() - - store := srv.fsm.State() - index := uint64(100) - - alloc := mock.Alloc() - - claims := &structs.IdentityClaims{ - Namespace: alloc.Namespace, - JobID: alloc.Job.ID, - AllocationID: alloc.ID, - TaskName: alloc.Job.TaskGroups[0].Tasks[0].Name, - } - - // unrelated policy - policy0 := mock.ACLPolicy() - - // policy for job - policy1 := mock.ACLPolicy() - policy1.JobACL = &structs.JobACL{ - Namespace: claims.Namespace, - JobID: claims.JobID, - } - - // policy for job and group - policy2 := mock.ACLPolicy() - policy2.JobACL = &structs.JobACL{ - Namespace: claims.Namespace, - JobID: claims.JobID, - Group: alloc.Job.TaskGroups[0].Name, - } - - // policy for job and group and task - policy3 := mock.ACLPolicy() - policy3.JobACL = &structs.JobACL{ - Namespace: claims.Namespace, - JobID: claims.JobID, - Group: alloc.Job.TaskGroups[0].Name, - Task: claims.TaskName, - } - - // policy for job and group but different task - policy4 := mock.ACLPolicy() - policy4.JobACL = &structs.JobACL{ - Namespace: claims.Namespace, - JobID: claims.JobID, - Group: alloc.Job.TaskGroups[0].Name, - Task: "another", - } - - // policy for job but different group - policy5 := mock.ACLPolicy() - policy5.JobACL = &structs.JobACL{ - Namespace: claims.Namespace, - JobID: claims.JobID, - Group: "another", - } - - // policy for same namespace but different job - policy6 := mock.ACLPolicy() - policy6.JobACL = &structs.JobACL{ - Namespace: claims.Namespace, - JobID: "another", - } - - // policy for same job in different namespace - policy7 := mock.ACLPolicy() - policy7.JobACL = &structs.JobACL{ - Namespace: "another", - JobID: claims.JobID, - } - - aclObj, err := srv.ResolveClaims(claims) - must.Nil(t, aclObj) - must.EqError(t, err, "allocation does not exist") - - // upsert the allocation - index++ - err = store.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc}) - must.NoError(t, err) - - // Resolve claims and check we that the ACL object without policies provides no access - aclObj, err = srv.ResolveClaims(claims) - must.NoError(t, err) - must.NotNil(t, aclObj) - must.False(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)) - - // Add the policies - index++ - err = store.UpsertACLPolicies(structs.MsgTypeTestSetup, index, []*structs.ACLPolicy{ - policy0, policy1, policy2, policy3, policy4, policy5, policy6, policy7}) - must.NoError(t, err) - - // Re-resolve and check that the resulting ACL looks reasonable - aclObj, err = srv.ResolveClaims(claims) - must.NoError(t, err) - must.NotNil(t, aclObj) - must.False(t, aclObj.IsManagement()) - must.True(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)) - must.False(t, aclObj.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs)) - - // Resolve the same claim again, should get cache value - aclObj2, err := srv.ResolveClaims(claims) - must.NoError(t, err) - must.NotNil(t, aclObj) - must.Eq(t, aclObj, aclObj2, must.Sprintf("expected cached value")) - - policies, err := srv.ResolvePoliciesForClaims(claims) - must.NoError(t, err) - must.Len(t, 3, policies) - must.SliceContainsAll(t, policies, []*structs.ACLPolicy{policy1, policy2, policy3}) -} diff --git a/nomad/auth/auth.go b/nomad/auth/auth.go index 696a23fe7..c7d5fd4eb 100644 --- a/nomad/auth/auth.go +++ b/nomad/auth/auth.go @@ -194,24 +194,37 @@ func (s *Authenticator) Authenticate(ctx RPCContext, args structs.RequestWithIde return nil } -// ResolveACL is an authentication wrapper which handles resolving both ACL -// tokens and Workload Identities. If both are provided the ACL token is -// preferred, but it is best for the RPC caller to only include the credentials -// for the identity they intend the operation to be performed with. +// ResolveACL is an authentication wrapper which handles resolving ACL tokens, +// Workload Identities, or client secrets into acl.ACL objects. Exclusively +// server-to-server or client-to-server requests should be using +// AuthenticateServerOnly or AuthenticateClientOnly and never use this method. func (s *Authenticator) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) { identity := args.GetIdentity() - if !s.aclsEnabled || identity == nil { + if identity == nil { + // should never happen + return nil, structs.ErrPermissionDenied + } + + if !s.aclsEnabled { return nil, nil } - aclToken := identity.GetACLToken() - if aclToken != nil { - return s.ResolveACLForToken(aclToken) + + if identity.ClientID != "" { + return acl.ClientACL, nil } claims := identity.GetClaims() if claims != nil { - return s.ResolveClaims(claims) + return s.resolveClaims(claims) } - return nil, nil + + // this will include any anonymous token, so this is the last chance to + // avoid an error + aclToken := identity.GetACLToken() + if aclToken != nil { + return s.resolveACLForToken(aclToken) + } + + return nil, structs.ErrPermissionDenied } // AuthenticateServerOnly returns an ACL object for use *only* with internal @@ -363,16 +376,35 @@ func validateCertificateForNames(cert *x509.Certificate, expectedNames []string) } -// ResolveACLForToken resolves an ACL from a token only. It should be used only +// IdentityToACLClaim returns an ACLClaim suitable for checking permissions +func IdentityToACLClaim(ai *structs.AuthenticatedIdentity, store *state.StateStore) *acl.ACLClaim { + if ai == nil || ai.Claims == nil { + return nil + } + + var group string + alloc, err := store.AllocByID(nil, ai.Claims.AllocationID) + if err != nil { + // we should never hit this error, but if we did the caller would get a + // nil claim and auth will fail + return nil + } + if alloc != nil { + group = alloc.TaskGroup + } + + return &acl.ACLClaim{ + Namespace: ai.Claims.Namespace, + Job: ai.Claims.JobID, + Group: group, + Task: ai.Claims.TaskName, + } +} + +// resolveACLForToken resolves an ACL from a token only. It should be used only // by Variables endpoints, which have additional implicit policies for their // claims so we can't wrap them up in ResolveACL. -// -// TODO: figure out a way to the Variables endpoint implicit policies baked into -// their acl.ACL object so that we can avoid using this method. -func (s *Authenticator) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) { - if !s.aclsEnabled { - return nil, nil - } +func (s *Authenticator) resolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) { snap, err := s.getState().Snapshot() if err != nil { return nil, err @@ -433,7 +465,7 @@ func (s *Authenticator) VerifyClaim(token string) (*structs.IdentityClaims, erro return claims, nil } -func (s *Authenticator) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) { +func (s *Authenticator) resolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) { policies, err := s.ResolvePoliciesForClaims(claims) if err != nil { diff --git a/nomad/auth/auth_test.go b/nomad/auth/auth_test.go index 376d2f39a..6bd73073b 100644 --- a/nomad/auth/auth_test.go +++ b/nomad/auth/auth_test.go @@ -124,7 +124,8 @@ func TestAuthenticateDefault(t *testing.T) { aclObj, err := auth.ResolveACL(args) must.NoError(t, err) - must.Nil(t, aclObj) + must.NotNil(t, aclObj) + must.True(t, aclObj.AllowClientOp()) }, }, { @@ -840,6 +841,55 @@ func TestResolveACLToken(t *testing.T) { } } +func TestIdentityToACLClaim(t *testing.T) { + + alloc := mock.Alloc() + alloc.ClientStatus = structs.AllocClientStatusRunning + tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) + task := tg.Tasks[0] + + defaultWI := &structs.WorkloadIdentity{Name: "default"} + claims := structs.NewIdentityClaims(alloc.Job, alloc, + task.IdentityHandle(defaultWI), task.Identity, time.Now()) + + store := testStateStore(t) + + leaderACL := uuid.Generate() + + auth := NewAuthenticator(&AuthenticatorConfig{ + StateFn: func() *state.StateStore { return store }, + Logger: testlog.HCLogger(t), + GetLeaderACLFn: func() string { return leaderACL }, + AclsEnabled: true, + TLSEnabled: true, + Region: "global", + Encrypter: newTestEncrypter(), + }) + + store.UpsertAllocs(structs.MsgTypeTestSetup, 100, + []*structs.Allocation{alloc}) + + token, err := auth.encrypter.(*testEncrypter).signClaim(claims) + must.NoError(t, err) + + ctx := newTestContext(t, "client.nomad.global", "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = token + + err = auth.Authenticate(ctx, args) + must.NoError(t, err) + + claim := IdentityToACLClaim(args.GetIdentity(), auth.getState()) + must.Eq(t, &acl.ACLClaim{ + Namespace: alloc.Job.Namespace, + Job: alloc.Job.ID, + Group: alloc.TaskGroup, + Task: alloc.Job.TaskGroups[0].Tasks[0].Name, + }, claim) + + must.Nil(t, IdentityToACLClaim(nil, auth.getState())) +} + func TestResolveSecretToken(t *testing.T) { ci.Parallel(t) auth := testDefaultAuthenticator(t) @@ -1001,7 +1051,7 @@ func TestResolveClaims(t *testing.T) { JobID: claims.JobID, } - aclObj, err := auth.ResolveClaims(claims) + aclObj, err := auth.resolveClaims(claims) must.Nil(t, aclObj) must.EqError(t, err, "allocation does not exist") @@ -1011,7 +1061,7 @@ func TestResolveClaims(t *testing.T) { must.NoError(t, err) // Resolve claims and check we that the ACL object without policies provides no access - aclObj, err = auth.ResolveClaims(claims) + aclObj, err = auth.resolveClaims(claims) must.NoError(t, err) must.NotNil(t, aclObj) must.False(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)) @@ -1023,7 +1073,7 @@ func TestResolveClaims(t *testing.T) { must.NoError(t, err) // Re-resolve and check that the resulting ACL looks reasonable - aclObj, err = auth.ResolveClaims(claims) + aclObj, err = auth.resolveClaims(claims) must.NoError(t, err) must.NotNil(t, aclObj) must.False(t, aclObj.IsManagement()) @@ -1031,7 +1081,7 @@ func TestResolveClaims(t *testing.T) { must.False(t, aclObj.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs)) // Resolve the same claim again, should get cache value - aclObj2, err := auth.ResolveClaims(claims) + aclObj2, err := auth.resolveClaims(claims) must.NoError(t, err) must.NotNil(t, aclObj) must.Eq(t, aclObj, aclObj2, must.Sprintf("expected cached value")) @@ -1042,7 +1092,7 @@ func TestResolveClaims(t *testing.T) { must.SliceContainsAll(t, policies, []*structs.ACLPolicy{policy1, policy2, policy3}) // Check the dispatch claims - aclObj3, err := auth.ResolveClaims(dispatchClaims) + aclObj3, err := auth.resolveClaims(dispatchClaims) must.NoError(t, err) must.NotNil(t, aclObj) must.Eq(t, aclObj, aclObj3, must.Sprintf("expected cached value")) diff --git a/nomad/variables_endpoint.go b/nomad/variables_endpoint.go index cf7fbcf65..158c88714 100644 --- a/nomad/variables_endpoint.go +++ b/nomad/variables_endpoint.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/acl" + "github.com/hashicorp/nomad/nomad/auth" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state/paginator" "github.com/hashicorp/nomad/nomad/structs" @@ -346,11 +347,14 @@ func (sv *Variables) Read(args *structs.VariablesReadRequest, reply *structs.Var defer metrics.MeasureSince([]string{"nomad", "variables", "read"}, time.Now()) - aclObj, err := sv.handleMixedAuthEndpoint(args.QueryOptions, - acl.PolicyRead, args.Path) + aclObj, err := sv.srv.ResolveACL(args) if err != nil { return err } + if !aclObj.AllowVariableOperation(args.RequestNamespace(), args.Path, acl.PolicyRead, + auth.IdentityToACLClaim(args.GetIdentity(), sv.srv.State())) { + return structs.ErrPermissionDenied + } // Setup the blocking query opts := blockingOptions{ @@ -409,16 +413,10 @@ func (sv *Variables) List( return sv.listAllVariables(args, reply) } - var aclObj *acl.ACL - var err error - aclToken := args.GetIdentity().GetACLToken() - if aclToken != nil { - aclObj, err = sv.srv.ResolveACLForToken(aclToken) - if err != nil { - return err - } + aclObj, err := sv.srv.ResolveACL(args) + if err != nil { + return err } - claims := args.GetIdentity().GetClaims() // Set up and return the blocking query. return sv.srv.blockingRPC(&blockingOptions{ @@ -448,9 +446,10 @@ func (sv *Variables) List( if !strings.HasPrefix(v.Path, args.Prefix) { return false, nil } - // Note: the authorize method modifies the aclObj parameter. - err := sv.authorize(aclObj, claims, v.Namespace, acl.PolicyList, v.Path) - return err == nil, nil + + return aclObj.AllowVariableOperation(args.Namespace, v.Path, + acl.PolicyList, + auth.IdentityToACLClaim(args.GetIdentity(), sv.srv.State())), nil }, }, } @@ -503,18 +502,10 @@ func (sv *Variables) listAllVariables( args *structs.VariablesListRequest, reply *structs.VariablesListResponse) error { - // Perform token resolution. The request already goes through forwarding - // and metrics setup before being called. - var aclObj *acl.ACL - var err error - aclToken := args.GetIdentity().GetACLToken() - if aclToken != nil { - aclObj, err = sv.srv.ResolveACLForToken(aclToken) - if err != nil { - return err - } + aclObj, err := sv.srv.ResolveACL(args) + if err != nil { + return err } - claims := args.GetIdentity().GetClaims() // Set up and return the blocking query. return sv.srv.blockingRPC(&blockingOptions{ @@ -546,9 +537,9 @@ func (sv *Variables) listAllVariables( return false, nil } - // Note: the authorize method modifies the aclObj parameter. - err := sv.authorize(aclObj, claims, v.Namespace, acl.PolicyList, v.Path) - return err == nil, nil + return aclObj.AllowVariableOperation(v.Namespace, v.Path, + acl.PolicyList, + auth.IdentityToACLClaim(args.GetIdentity(), sv.srv.State())), nil }, }, } @@ -623,94 +614,6 @@ func (sv *Variables) decrypt(v *structs.VariableEncrypted) (*structs.VariableDec return &dv, nil } -// handleMixedAuthEndpoint is a helper to handle auth on RPC endpoints that can -// either be called by external clients or by workload identity -func (sv *Variables) handleMixedAuthEndpoint(args structs.QueryOptions, policy, pathOrPrefix string) (*acl.ACL, error) { - - var aclObj *acl.ACL - var err error - aclToken := args.GetIdentity().GetACLToken() - if aclToken != nil { - aclObj, err = sv.srv.ResolveACLForToken(aclToken) - if err != nil { - return nil, err - } - } - claims := args.GetIdentity().GetClaims() - - // Note: the authorize method modifies the aclObj parameter. - err = sv.authorize(aclObj, claims, args.RequestNamespace(), policy, pathOrPrefix) - if err != nil { - return aclObj, err - } - - return aclObj, nil -} - -// The authorize method modifies the aclObj parameter. In case the incoming request -// uses identity workload claims, the aclObj is populated. This logic will be -// updated when the work to eliminate nil ACLs is merged. -func (sv *Variables) authorize(aclObj *acl.ACL, claims *structs.IdentityClaims, ns, policy, pathOrPrefix string) error { - - if aclObj == nil && claims == nil { - return nil // ACLs aren't enabled - } - - // Perform normal ACL validation. If the ACL object is nil, that means we're - // working with an identity claim. - if aclObj != nil { - allowed := aclObj.AllowVariableOperation(ns, pathOrPrefix, policy, nil) - if !allowed { - return structs.ErrPermissionDenied - } - return nil - } - - // Check the workload-associated policies and automatic task access to - // variables. - var err error - if claims != nil { - aclObj, err = sv.srv.ResolveClaims(claims) - if err != nil { - return err // returns internal errors only - } - if aclObj != nil { - group, err := sv.groupForAlloc(claims) - if err != nil { - // returns ErrPermissionDenied for claims from terminal - // allocations, otherwise only internal errors - return err - } - allowed := aclObj.AllowVariableOperation( - ns, pathOrPrefix, policy, &acl.ACLClaim{ - Namespace: claims.Namespace, - Job: claims.JobID, - Group: group, - Task: claims.TaskName, - }) - if allowed { - return nil - } - } - } - return structs.ErrPermissionDenied -} - -func (sv *Variables) groupForAlloc(claims *structs.IdentityClaims) (string, error) { - store, err := sv.srv.fsm.State().Snapshot() - if err != nil { - return "", err - } - alloc, err := store.AllocByID(nil, claims.AllocationID) - if err != nil { - return "", err - } - if alloc == nil || alloc.Job == nil { - return "", structs.ErrPermissionDenied - } - return alloc.TaskGroup, nil -} - // RenewLock is used to apply a SV renew lock operation on a variable to maintain the lease. func (sv *Variables) RenewLock(args *structs.VariablesRenewLockRequest, reply *structs.VariablesRenewLockResponse) error { authErr := sv.srv.Authenticate(sv.ctx, args) diff --git a/nomad/variables_endpoint_test.go b/nomad/variables_endpoint_test.go index 8f6b016d6..435329eb3 100644 --- a/nomad/variables_endpoint_test.go +++ b/nomad/variables_endpoint_test.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/auth" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" @@ -542,16 +543,21 @@ func TestVariablesEndpoint_auth(t *testing.T) { err = store.UpsertACLTokens(structs.MsgTypeTestSetup, 1150, []*structs.ACLToken{aclToken}) must.NoError(t, err) - variablesRPC := NewVariablesEndpoint(srv, nil, srv.encrypter) - testFn := func(args *structs.QueryOptions, cap, path string) error { err := srv.Authenticate(nil, args) if err != nil { return structs.ErrPermissionDenied } - _, err = variablesRPC.handleMixedAuthEndpoint( - *args, cap, path) - return err + aclObj, err := srv.ResolveACL(args) + if err != nil { + return err + } + if !aclObj.AllowVariableOperation(args.Namespace, path, cap, + auth.IdentityToACLClaim(args.GetIdentity(), srv.State())) { + return structs.ErrPermissionDenied + } + + return nil } t.Run("terminal alloc should be denied", func(t *testing.T) {