From ce04fe4a4eef2fefbfce399d7d6958f190b2efe9 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 12 Jun 2024 11:32:22 -0400 Subject: [PATCH] acls: reduce permissions of client agent virtual policy (#23304) Nomad client agents run as privileged processes and require access to much of the cluster state, secrets, etc. to operate. But we can improve upon this by tightening up the virtual policy that use for RPC requests authenticated by the node secret ID. This changeset removes the `node:read`, `plugin:read`, and `plugin:list` policy, as well as namespace operations. In return, we add a `AllowClientOp` check to the RPCs the client uses that would otherwise need those policies. Where possible, the update RPCs have also been changed to match on node ID so that a client can only make the RPC that impacts itself. In future work, we may be able to downscope further by adding node pool filtering to `AllowClientOp`. Ref: https://github.com/hashicorp/nomad-enterprise/issues/1528 Ref: https://github.com/hashicorp/nomad-enterprise/pull/1529 Ref: https://hashicorp.atlassian.net/browse/NET-9925 --- .semgrep/rpc_endpoint.yml | 5 +++-- acl/acl.go | 27 +++++++------------------- nomad/alloc_endpoint.go | 2 +- nomad/csi_endpoint.go | 9 ++++++--- nomad/csi_endpoint_test.go | 14 ++++++------- nomad/node_endpoint.go | 5 +++-- nomad/service_registration_endpoint.go | 3 ++- 7 files changed, 29 insertions(+), 36 deletions(-) diff --git a/.semgrep/rpc_endpoint.yml b/.semgrep/rpc_endpoint.yml index 5080dade7..51e78f616 100644 --- a/.semgrep/rpc_endpoint.yml +++ b/.semgrep/rpc_endpoint.yml @@ -40,7 +40,8 @@ rules: # Pattern used by endpoints that are used only for client-to-server. # Authorization can be done after forwarding, but must check the - # AllowClientOp policy + # AllowClientOp policy; the AllowClientOp condition is left open so that + # additional ACL checks can be made (ex. to scope to a given node/pool). - pattern-not-inside: | aclObj, err := $A.$B.AuthenticateClientOnly($A.ctx, args) ... @@ -48,7 +49,7 @@ rules: return err } ... - if !aclObj.AllowClientOp() { + if !aclObj.AllowClientOp() return structs.ErrPermissionDenied } ... diff --git a/acl/acl.go b/acl/acl.go index 8ed59b66d..32393b417 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -336,11 +336,6 @@ func (a *ACL) AllowNamespaceOperation(ns string, op string) bool { return true } - // Clients need to be able to read their namespaced objects - if a.client != PolicyDeny { - return true - } - // If using the all namespaces wildcard, allow if any namespace allows the // operation. if ns == AllNamespacesSentinel && a.anyNamespaceAllowsOp(op) { @@ -793,9 +788,6 @@ func (a *ACL) AllowNodeRead() bool { return true case a.node == PolicyRead: return true - case a.client == PolicyRead, - a.client == PolicyWrite: - return true case a.server == PolicyRead, a.server == PolicyWrite: return true @@ -887,9 +879,6 @@ func (a *ACL) AllowPluginRead() bool { return false case a.aclsDisabled, a.management: return true - case a.client == PolicyRead, - a.client == PolicyWrite: - return true case a.plugin == PolicyRead: return true default: @@ -904,9 +893,6 @@ func (a *ACL) AllowPluginList() bool { return false case a.aclsDisabled, a.management: return true - case a.client == PolicyRead, - a.client == PolicyWrite: - return true case a.plugin == PolicyList: return true case a.plugin == PolicyRead: @@ -920,6 +906,10 @@ func (a *ACL) AllowServiceRegistrationReadList(ns string, isWorkload bool) bool switch { case a == nil: return false + case a.client == PolicyRead, + a.client == PolicyWrite: + // COMPAT: older clients won't send WI tokens for these requests + return true case a.aclsDisabled, a.management: return true } @@ -934,11 +924,13 @@ func (a *ACL) AllowServerOp() bool { return a.server != PolicyDeny || a.isLeader } +// AllowClientOp checks if client-only operations are allowed, or ACLs are +// disabled func (a *ACL) AllowClientOp() bool { if a == nil { return false } - return a.client != PolicyDeny + return a.client != PolicyDeny || a.aclsDisabled } // IsManagement checks if this represents a management token @@ -962,11 +954,6 @@ func NamespaceValidator(ops ...string) func(*ACL, string) bool { return true } - // Clients need to be able to read namespaced objects - if a.client != PolicyDeny { - return true - } - for _, op := range ops { if a.AllowNamespaceOperation(ns, op) { // An operation is allowed, return true diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index 83aee5219..2b3423b8a 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -175,7 +175,7 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest, reply.Alloc = out if out != nil { // Re-check namespace in case it differs from request. - if !allowNsOp(aclObj, out.Namespace) { + if !aclObj.AllowClientOp() && !allowNsOp(aclObj, out.Namespace) { return structs.NewErrUnknownAllocation(args.AllocID) } diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 2bf6aa68c..aa0ac393d 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -454,12 +454,11 @@ func (v *CSIVolume) Claim(args *structs.CSIVolumeClaimRequest, reply *structs.CS defer metrics.MeasureSince([]string{"nomad", "volume", "claim"}, time.Now()) - allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityCSIMountVolume) aclObj, err := v.srv.ResolveACL(args) if err != nil { return err } - if !allowVolume(aclObj, args.RequestNamespace()) || !aclObj.AllowPluginRead() { + if !aclObj.AllowClientOp() { return structs.ErrPermissionDenied } @@ -697,7 +696,11 @@ func (v *CSIVolume) Unpublish(args *structs.CSIVolumeUnpublishRequest, reply *st if err != nil { return err } - if !allowVolume(aclObj, args.RequestNamespace()) || !aclObj.AllowPluginRead() { + // this RPC is called by both clients and by `nomad volume detach`. we can't + // safely match the node ID for client RPCs because we may not have the node + // ID anymore + if !aclObj.AllowClientOp() && + !(allowVolume(aclObj, args.RequestNamespace()) && aclObj.AllowPluginRead()) { return structs.ErrPermissionDenied } diff --git a/nomad/csi_endpoint_test.go b/nomad/csi_endpoint_test.go index c3b608620..ecc46bc13 100644 --- a/nomad/csi_endpoint_test.go +++ b/nomad/csi_endpoint_test.go @@ -331,18 +331,22 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) { require.NoError(t, state.UpsertJobSummary(index, summary)) index++ require.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc})) + index++ + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, index, node)) // Create an initial volume claim request; we expect it to fail // because there's no such volume yet. claimReq := &structs.CSIVolumeClaimRequest{ VolumeID: id0, AllocationID: alloc.ID, + NodeID: node.ID, Claim: structs.CSIVolumeClaimWrite, AccessMode: structs.CSIVolumeAccessModeMultiNodeSingleWriter, AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, WriteRequest: structs.WriteRequest{ Region: "global", Namespace: structs.DefaultNamespace, + AuthToken: node.SecretID, }, } claimResp := &structs.CSIVolumeClaimResponse{} @@ -475,10 +479,6 @@ func TestCSIVolumeEndpoint_ClaimWithController(t *testing.T) { ns := structs.DefaultNamespace state := srv.fsm.State() - policy := mock.NamespacePolicy(ns, "", []string{acl.NamespaceCapabilityCSIMountVolume}) + - mock.PluginPolicy("read") - accessToken := mock.CreatePolicyAndToken(t, state, 1001, "claim", policy) - codec := rpcClient(t, srv) id0 := uuid.Generate() @@ -528,17 +528,17 @@ func TestCSIVolumeEndpoint_ClaimWithController(t *testing.T) { claimReq := &structs.CSIVolumeClaimRequest{ VolumeID: id0, AllocationID: alloc.ID, + NodeID: node.ID, Claim: structs.CSIVolumeClaimWrite, WriteRequest: structs.WriteRequest{ Region: "global", Namespace: ns, - AuthToken: accessToken.SecretID, }, } claimResp := &structs.CSIVolumeClaimResponse{} err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Claim", claimReq, claimResp) - // Because the node is not registered - require.EqualError(t, err, "controller publish: controller attach volume: No path to node") + // Because we passed no auth token + must.EqError(t, err, structs.ErrPermissionDenied.Error()) // The node SecretID is authorized for all policies claimReq.AuthToken = node.SecretID diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index c2f2914d1..582d1ce40 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -740,7 +740,8 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, // Check node write permissions if aclObj, err := n.srv.ResolveACL(args); err != nil { return err - } else if !aclObj.AllowNodeWrite() { + } else if !aclObj.AllowNodeWrite() && + !(aclObj.AllowClientOp() && args.GetIdentity().ClientID == args.NodeID) { return structs.ErrPermissionDenied } @@ -1008,7 +1009,7 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest, reply *structs.SingleN if err != nil { return err } - if !aclObj.AllowNodeRead() { + if !aclObj.AllowClientOp() && !aclObj.AllowNodeRead() { return structs.ErrPermissionDenied } diff --git a/nomad/service_registration_endpoint.go b/nomad/service_registration_endpoint.go index 57402cb4a..c82aab634 100644 --- a/nomad/service_registration_endpoint.go +++ b/nomad/service_registration_endpoint.go @@ -115,7 +115,8 @@ func (s *ServiceRegistration) DeleteByID( if aclObj, err := s.srv.ResolveACL(args); err != nil { return structs.ErrPermissionDenied - } else if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { + } else if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) && + !aclObj.AllowClientOp() { return structs.ErrPermissionDenied }