acl: Check for duplicate or invalid keys when writing new policies (#26836)

ACL policies are parsed when creating, updating, or compiling the resulting ACL object when used. This parsing was silently ignoring duplicate singleton keys, or invalid keys which does not grant any additional access, but is a poor UX and can be unexpected. This change parses all new policy writes and updates, so that duplicate or invalid keys return an error to the caller. This is called strict parsing. In order to correctly handle upgrades of clusters which have existing policies that would fall foul of the change, a lenient parsing mode is also available. This allows the policy to continue to be parsed and compiled after an upgrade without the need for an operator to correct the policy document prior to further use. Co-authored-by: Tim Gross <tgross@hashicorp.com>
2026-01-01 16:05:42 +03:00 · 2025-09-30 08:16:59 +01:00
parent 250b8f9d07
commit e6a04e06d1
9 changed files with 220 additions and 28 deletions
--- a/.changelog/26836.txt
+++ b/.changelog/26836.txt
@@ -0,0 +1,3 @@
 ```release-note:bug
 acl: Fixed a bug where ACL policies would silently accept invalid or duplicate blocks
 ```
--- a/acl/acl_test.go
+++ b/acl/acl_test.go
@@ -107,9 +107,9 @@ func TestACLMerge(t *testing.T) {
 	ci.Parallel(t)
 	// Merge read + write policy
-	p1, err := Parse(readAll)
+	p1, err := Parse(readAll, PolicyParseStrict)
 	must.NoError(t, err)
-	p2, err := Parse(writeAll)
+	p2, err := Parse(writeAll, PolicyParseStrict)
 	must.NoError(t, err)
 	acl, err := NewACL(false, []*Policy{p1, p2})
 	must.NoError(t, err)
@@ -147,7 +147,7 @@ func TestACLMerge(t *testing.T) {
 	must.False(t, acl.AllowClientOp())
 	// Merge read + blank
-	p3, err := Parse("")
+	p3, err := Parse("", PolicyParseStrict)
 	must.NoError(t, err)
 	acl, err = NewACL(false, []*Policy{p1, p3})
 	must.NoError(t, err)
@@ -185,7 +185,7 @@ func TestACLMerge(t *testing.T) {
 	must.False(t, acl.AllowClientOp())
 	// Merge read + deny
-	p4, err := Parse(denyAll)
+	p4, err := Parse(denyAll, PolicyParseStrict)
 	must.NoError(t, err)
 	acl, err = NewACL(false, []*Policy{p1, p4})
 	must.NoError(t, err)
@@ -362,7 +362,7 @@ func TestAllowNamespace(t *testing.T) {
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			acl, err := NewACL(false, []*Policy{policy})
@@ -434,7 +434,7 @@ func TestWildcardNamespaceMatching(t *testing.T) {
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces)
@@ -629,7 +629,7 @@ node_pool "*" {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.NodePools)
@@ -694,7 +694,7 @@ func TestWildcardHostVolumeMatching(t *testing.T) {
 	for _, tc := range tests {
 		t.Run(tc.Policy, func(t *testing.T) {
-			policy, err := Parse(tc.Policy)
+			policy, err := Parse(tc.Policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.HostVolumes)
@@ -905,7 +905,7 @@ func TestVariablesMatching(t *testing.T) {
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces[0].Variables)
@@ -918,7 +918,7 @@ func TestVariablesMatching(t *testing.T) {
 	t.Run("search over namespace", func(t *testing.T) {
 		policy, err := Parse(`namespace "ns" {
-					variables { path "foo/bar" { capabilities = ["read"] }}}`)
+					variables { path "foo/bar" { capabilities = ["read"] }}}`, PolicyParseStrict)
 		must.NoError(t, err)
 		must.NotNil(t, policy.Namespaces[0].Variables)
@@ -958,7 +958,7 @@ func TestACL_matchingCapabilitySet_returnsAllMatches(t *testing.T) {
 	for _, tc := range tests {
 		t.Run(tc.Policy, func(t *testing.T) {
-			policy, err := Parse(tc.Policy)
+			policy, err := Parse(tc.Policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces)
@@ -1012,7 +1012,7 @@ func TestACL_matchingCapabilitySet_difference(t *testing.T) {
 	for _, tc := range tests {
 		t.Run(tc.Policy, func(t *testing.T) {
-			policy, err := Parse(tc.Policy)
+			policy, err := Parse(tc.Policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces)
@@ -1079,7 +1079,7 @@ func TestAgentDebug(t *testing.T) {
 			acl := ACLsDisabledACL
 			if !tc.aclsDisabled {
-				policy, err := Parse(tc.policy)
+				policy, err := Parse(tc.policy, PolicyParseStrict)
 				must.NoError(t, err)
 				acl, err = NewACL(false, []*Policy{policy})
--- a/acl/policy.go
+++ b/acl/policy.go
@@ -7,6 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"regexp"
 	"slices"
 	"strings"
 	"github.com/hashicorp/hcl"
@@ -119,6 +120,17 @@ type Policy struct {
 	Quota       *QuotaPolicy        `hcl:"quota"`
 	Plugin      *PluginPolicy       `hcl:"plugin"`
 	Raw         string              `hcl:"-"`
 	// ExtraKeysHCL is used to capture any extra keys in the HCL input, so we
 	// can return an error if the user specified something unknown.
 	//
 	// Unfortunately, due to our current HCL use, keys from known blocks
 	// (namespace, node pools, and host volumes) will appear here, so we need to
 	// remove those as we process them. If the policy contains multiple blocks
 	// of the same type (e.g. multiple namespace blocks), the extra keys will
 	// also include "namespace" for all but the first block, so we need to
 	// remove those as we process them too.
 	ExtraKeysHCL []string `hcl:",unusedKeys"`
 }
 // IsEmpty checks to make sure that at least one policy has been set and is not
@@ -134,6 +146,14 @@ func (p *Policy) IsEmpty() bool {
 		p.Plugin == nil
 }
 // removeExtraKey removes a single occurrence of the passed key from the
 // ExtraKeysHCL slice. If the key is not found, this is a no-op.
 func (p *Policy) removeExtraKey(key string) {
 	if idx := slices.Index(p.ExtraKeysHCL, key); idx > -1 {
 		p.ExtraKeysHCL = append(p.ExtraKeysHCL[:idx], p.ExtraKeysHCL[idx+1:]...)
 	}
 }
 // NamespacePolicy is the policy for a specific namespace
 type NamespacePolicy struct {
 	Name         string `hcl:",key"`
@@ -378,10 +398,30 @@ func expandVariablesCapabilities(caps []string) []string {
 	return caps
 }
-// Parse is used to parse the specified ACL rules into an
+const (
-// intermediary set of policies, before being compiled into
+	// PolicyParseStrict can be used to indicate that the policy should be
-// the ACL
+	// parsed in strict mode, returning an error if there are any unknown keys.
-func Parse(rules string) (*Policy, error) {
+	// This should be used when creating or updating policies.
 	PolicyParseStrict = true
 	// PolicyParseLenient can be used to indicate that the policy should be
 	// parsed in lenient mode, ignoring any unknown keys. This should be used
 	// when evaluating policies, so we gracefully handle policies that were
 	// created before we added stricter validation.
 	PolicyParseLenient = false
 )
 // Parse is used to parse the specified ACL rules into an intermediary set of
 // policies, before being compiled into the ACL.
 //
 // The "strict" parameter should be set to true if the policy is being created
 // or updated, and false if it is being used for evaluation. This allowed us to
 // tighten restrictions around unknown keys when writing policies, while not
 // breaking existing policies that may have unknown keys when evaluating them,
 // since they may have been written before the restrictions were added. The
 // constants PolicyParseStrict and PolicyParseLenient can be used to make the
 // intent clear at the call site.
 func Parse(rules string, strict bool) (*Policy, error) {
 	// Decode the rules
 	p := &Policy{Raw: rules}
 	if rules == "" {
@@ -446,9 +486,10 @@ func Parse(rules string) (*Policy, error) {
 				pathPolicy.Capabilities = expandVariablesCapabilities(pathPolicy.Capabilities)
 			}
 		}
 		// Remove the namespace name from the extra key list.
 		p.removeExtraKey(ns.Name)
 	}
 	for _, np := range p.NodePools {
@@ -468,6 +509,9 @@ func Parse(rules string) (*Policy, error) {
 			extraCap := expandNodePoolPolicy(np.Policy)
 			np.Capabilities = append(np.Capabilities, extraCap...)
 		}
 		// Remove the node-pool name from the extra key list.
 		p.removeExtraKey(np.Name)
 	}
 	for _, hv := range p.HostVolumes {
@@ -489,8 +533,23 @@ func Parse(rules string) (*Policy, error) {
 			extraCap := expandHostVolumePolicy(hv.Policy)
 			hv.Capabilities = append(hv.Capabilities, extraCap...)
 		}
 		// Remove the host-volume name from the extra key list.
 		p.removeExtraKey(hv.Name)
 	}
 	// Now that we have processed all known keys, return an error if the
 	// operator wrote a policy with unknown keys if we are being strict. While
 	// these do not grant any extra privileges, it can be misleaing to allow
 	// these and cause problems later if we add new capabilities that collide
 	// with the unknown keys.
 	if len(p.ExtraKeysHCL) > 0 && strict {
 		return nil, fmt.Errorf("Invalid or duplicate policy keys: %v",
 			strings.Join(p.ExtraKeysHCL, ", "))
 	}
 	p.ExtraKeysHCL = nil
 	if p.Agent != nil && !isPolicyValid(p.Agent.Policy) {
 		return nil, fmt.Errorf("Invalid agent policy: %#v", p.Agent)
 	}
@@ -550,6 +609,9 @@ func hclDecode(p *Policy, rules string) (err error) {
 		if len(nsObj.Keys) == 0 {
 			p.Namespaces[i].Name = ""
 		}
 		if i > 0 {
 			p.removeExtraKey("namespace")
 		}
 		// Fix missing variable paths.
 		nsOT, ok := nsObj.Val.(*ast.ObjectType)
@@ -579,6 +641,9 @@ func hclDecode(p *Policy, rules string) (err error) {
 		if len(npObj.Keys) == 0 {
 			p.NodePools[i].Name = ""
 		}
 		if i > 0 {
 			p.removeExtraKey("node_pool")
 		}
 	}
 	hvList := list.Filter("host_volume")
@@ -587,6 +652,9 @@ func hclDecode(p *Policy, rules string) (err error) {
 		if len(hvObj.Keys) == 0 {
 			p.HostVolumes[i].Name = ""
 		}
 		if i > 0 {
 			p.removeExtraKey("host_volume")
 		}
 	}
 	return nil
--- a/acl/policy_test.go
+++ b/acl/policy_test.go
@@ -934,7 +934,7 @@ func TestParse(t *testing.T) {
 	for idx, tc := range tcases {
 		t.Run(fmt.Sprintf("%02d", idx), func(t *testing.T) {
-			p, err := Parse(tc.Raw)
+			p, err := Parse(tc.Raw, PolicyParseStrict)
 			if tc.ExpectErr == "" {
 				must.NoError(t, err)
 			} else {
@@ -958,12 +958,105 @@ func TestParse_BadInput(t *testing.T) {
 	for i, c := range inputs {
 		t.Run(fmt.Sprintf("%d: %v", i, c), func(t *testing.T) {
-			_, err := Parse(c)
+			_, err := Parse(c, PolicyParseStrict)
 			must.Error(t, err)
 		})
 	}
 }
 func TestParse_ExtraKeys(t *testing.T) {
 	ci.Parallel(t)
 	inputPolicy := `
 namespace "foo" {
  policy = "write"
  variables {
    path "project/*" {
      capabilities = ["read", "write"]
    }
  }
 }
 namespace "bar" {
  policy = "write"
  variables {
    path "system/*" {
      capabilities = ["read", "write"]
    }
  }
 }
 bogus {
  policy = "read"
 }
 anotherbogus {
  policy = "write"
 }
 node {
  policy = "read"
 }
 node {
  policy = "write"
 }
 agent {
  policy = "read"
 }
 agent {
  policy = "write"
 }
 operator {
  policy = "read"
 }
 operator {
  policy = "write"
 }
 quota {
  policy = "read"
 }
 quota {
  policy = "write"
 }
 host_volume "volume-read-only" {
  policy = "read"
 }
 host_volume "volume-read-write" {
  policy = "write"
 }
 plugin {
  policy = "read"
 }
 plugin {
  policy = "write"
 }
 node_pool "pool-read-only" {
  policy = "read"
 }
 node_pool "pool-read-write" {
  policy = "write"
 }
 `
 	// Expect an error mentioning all the bad keys when indicating we are
 	// parsing the policy in strict mode.
 	_, err := Parse(inputPolicy, PolicyParseStrict)
 	must.ErrorContains(
 		t,
 		err,
 		"Invalid or duplicate policy keys: agent, anotherbogus, bogus, node, operator, plugin, quota",
 	)
 	// Expect no error when parsing in non-strict mode.
 	_, err = Parse(inputPolicy, PolicyParseLenient)
 	must.NoError(t, err)
 }
 func TestPluginPolicy_isValid(t *testing.T) {
 	ci.Parallel(t)
--- a/nomad/acl_endpoint.go
+++ b/nomad/acl_endpoint.go
@@ -294,7 +294,7 @@ func (a *ACL) GetPolicy(args *structs.ACLPolicySpecificRequest, reply *structs.S
 			reply.Policy = out
 			if out != nil {
 				reply.Index = out.ModifyIndex
-				rules, err := policy.Parse(out.Rules)
+				rules, err := policy.Parse(out.Rules, acl.PolicyParseLenient)
 				if err != nil {
 					return err
--- a/nomad/event_endpoint_test.go
+++ b/nomad/event_endpoint_test.go
@@ -474,7 +474,7 @@ func TestEventStream_validateNsOp(t *testing.T) {
 	for _, tc := range cases {
 		t.Run(tc.Name, func(t *testing.T) {
-			p, err := acl.Parse(tc.Policy)
+			p, err := acl.Parse(tc.Policy, acl.PolicyParseStrict)
 			require.NoError(err)
 			testACL, err := acl.NewACL(tc.Management, []*acl.Policy{p})
@@ -501,7 +501,7 @@ func TestEventStream_validateACL(t *testing.T) {
 	testEvent := &Event{srv: s1}
 	t.Run("single namespace ACL errors on wildcard", func(t *testing.T) {
-		policy, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}))
+		policy, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
 		// does not contain policy for default NS
@@ -516,9 +516,9 @@ func TestEventStream_validateACL(t *testing.T) {
 	})
 	t.Run("all namespace ACL succeeds on wildcard", func(t *testing.T) {
-		policy1, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}))
+		policy1, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
-		policy2, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}))
+		policy2, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
 		testAcl, err := acl.NewACL(false, []*acl.Policy{policy1, policy2})
@@ -533,7 +533,7 @@ func TestEventStream_validateACL(t *testing.T) {
 	})
 	t.Run("single namespace ACL succeeds with correct NS", func(t *testing.T) {
-		policy, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}))
+		policy, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
 		testAcl, err := acl.NewACL(false, []*acl.Policy{policy})
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -433,7 +433,7 @@ func CompileACLObject(cache *ACLCache[*acl.ACL], policies []*ACLPolicy) (*acl.AC
 	// Parse the policies
 	parsed := make([]*acl.Policy, 0, len(policies))
 	for _, policy := range policies {
-		p, err := acl.Parse(policy.Rules)
+		p, err := acl.Parse(policy.Rules, acl.PolicyParseLenient)
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse %q: %v", policy.Name, err)
 		}
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -13434,7 +13434,7 @@ func (a *ACLPolicy) Validate() error {
 		err := fmt.Errorf("invalid name '%s'", a.Name)
 		mErr.Errors = append(mErr.Errors, err)
 	}
-	if _, err := acl.Parse(a.Rules); err != nil {
+	if _, err := acl.Parse(a.Rules, acl.PolicyParseStrict); err != nil {
 		err = fmt.Errorf("failed to parse rules: %v", err)
 		mErr.Errors = append(mErr.Errors, err)
 	}
--- a/website/content/docs/upgrade/upgrade-specific.mdx
+++ b/website/content/docs/upgrade/upgrade-specific.mdx
@@ -30,6 +30,24 @@ metrics refers to the parent job ID for dispatch and periodic jobs. The
 running high volume dispatch workloads, this change significantly reduces
 metrics cardinality and memory usage on the leader.
 #### ACL policies no longer silently ignore duplicate or invalid keys
 Nomad 1.11.0 introduces stricter validation for ACL policies. Policy writes that
 include duplicate or invalid keys will be rejected with an error instead of
 being silently ignored. Any existing policies with duplicate or invalid keys
 will continue to work, but the source policy document will need to be updated
 to be valid before it can be written to Nomad.
 ## Nomad 1.10.6
 #### ACL policies no longer silently ignore duplicate or invalid keys
 Nomad 1.10.6 introduces stricter validation for ACL policies. Policy writes that
 include duplicate or invalid keys will be rejected with an error instead of
 being silently ignored. Any existing policies with duplicate or invalid keys
 will continue to work, but the source policy document will need to be updated
 to be valid before it can be written to Nomad.
 ## Nomad 1.10.2
 #### Clients respect `telemetry.publish_allocation_metrics`
@@ -188,6 +206,16 @@ labels = [
 ]
 ```
 ## Nomad 1.8.18
 #### ACL policies no longer silently ignore duplicate or invalid keys
 Nomad 1.8.18 introduces stricter validation for ACL policies. Policy writes that
 include duplicate or invalid keys will be rejected with an error instead of
 being silently ignored. Any existing policies with duplicate or invalid keys
 will continue to work, but the source policy document will need to be updated
 to be valid before it can be written to Nomad.
 ## Nomad 1.8.4
 #### Default Docker `infra_image` changed