From e6a04e06d1588bf7d4785a6c357758ad983d749e Mon Sep 17 00:00:00 2001
From: James Rasell <jrasell@users.noreply.github.com>
Date: Tue, 30 Sep 2025 08:16:59 +0100
Subject: [PATCH] acl: Check for duplicate or invalid keys when writing new
 policies (#26836)

ACL policies are parsed when creating, updating, or compiling the
resulting ACL object when used. This parsing was silently ignoring
duplicate singleton keys, or invalid keys which does not grant any
additional access, but is a poor UX and can be unexpected.

This change parses all new policy writes and updates, so that
duplicate or invalid keys return an error to the caller. This is
called strict parsing. In order to correctly handle upgrades of
clusters which have existing policies that would fall foul of the
change, a lenient parsing mode is also available. This allows
the policy to continue to be parsed and compiled after an upgrade
without the need for an operator to correct the policy document
prior to further use.

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 .changelog/26836.txt                          |  3 +
 acl/acl_test.go                               | 26 ++---
 acl/policy.go                                 | 78 ++++++++++++++-
 acl/policy_test.go                            | 97 ++++++++++++++++++-
 nomad/acl_endpoint.go                         |  2 +-
 nomad/event_endpoint_test.go                  | 10 +-
 nomad/structs/funcs.go                        |  2 +-
 nomad/structs/structs.go                      |  2 +-
 .../content/docs/upgrade/upgrade-specific.mdx | 28 ++++++
 9 files changed, 220 insertions(+), 28 deletions(-)
 create mode 100644 .changelog/26836.txt

diff --git a/.changelog/26836.txt b/.changelog/26836.txt
new file mode 100644
index 000000000..07912d441
--- /dev/null
+++ b/.changelog/26836.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+acl: Fixed a bug where ACL policies would silently accept invalid or duplicate blocks
+```
diff --git a/acl/acl_test.go b/acl/acl_test.go
index 2a88c9d2a..c52ae1cf5 100644
--- a/acl/acl_test.go
+++ b/acl/acl_test.go
@@ -107,9 +107,9 @@ func TestACLMerge(t *testing.T) {
 	ci.Parallel(t)
 
 	// Merge read + write policy
-	p1, err := Parse(readAll)
+	p1, err := Parse(readAll, PolicyParseStrict)
 	must.NoError(t, err)
-	p2, err := Parse(writeAll)
+	p2, err := Parse(writeAll, PolicyParseStrict)
 	must.NoError(t, err)
 	acl, err := NewACL(false, []*Policy{p1, p2})
 	must.NoError(t, err)
@@ -147,7 +147,7 @@ func TestACLMerge(t *testing.T) {
 	must.False(t, acl.AllowClientOp())
 
 	// Merge read + blank
-	p3, err := Parse("")
+	p3, err := Parse("", PolicyParseStrict)
 	must.NoError(t, err)
 	acl, err = NewACL(false, []*Policy{p1, p3})
 	must.NoError(t, err)
@@ -185,7 +185,7 @@ func TestACLMerge(t *testing.T) {
 	must.False(t, acl.AllowClientOp())
 
 	// Merge read + deny
-	p4, err := Parse(denyAll)
+	p4, err := Parse(denyAll, PolicyParseStrict)
 	must.NoError(t, err)
 	acl, err = NewACL(false, []*Policy{p1, p4})
 	must.NoError(t, err)
@@ -362,7 +362,7 @@ func TestAllowNamespace(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 
 			acl, err := NewACL(false, []*Policy{policy})
@@ -434,7 +434,7 @@ func TestWildcardNamespaceMatching(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces)
 
@@ -629,7 +629,7 @@ node_pool "*" {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.NodePools)
 
@@ -694,7 +694,7 @@ func TestWildcardHostVolumeMatching(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.Policy, func(t *testing.T) {
-			policy, err := Parse(tc.Policy)
+			policy, err := Parse(tc.Policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.HostVolumes)
 
@@ -905,7 +905,7 @@ func TestVariablesMatching(t *testing.T) {
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
-			policy, err := Parse(tc.policy)
+			policy, err := Parse(tc.policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces[0].Variables)
 
@@ -918,7 +918,7 @@ func TestVariablesMatching(t *testing.T) {
 
 	t.Run("search over namespace", func(t *testing.T) {
 		policy, err := Parse(`namespace "ns" {
-					variables { path "foo/bar" { capabilities = ["read"] }}}`)
+					variables { path "foo/bar" { capabilities = ["read"] }}}`, PolicyParseStrict)
 		must.NoError(t, err)
 		must.NotNil(t, policy.Namespaces[0].Variables)
 
@@ -958,7 +958,7 @@ func TestACL_matchingCapabilitySet_returnsAllMatches(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.Policy, func(t *testing.T) {
-			policy, err := Parse(tc.Policy)
+			policy, err := Parse(tc.Policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces)
 
@@ -1012,7 +1012,7 @@ func TestACL_matchingCapabilitySet_difference(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.Policy, func(t *testing.T) {
-			policy, err := Parse(tc.Policy)
+			policy, err := Parse(tc.Policy, PolicyParseStrict)
 			must.NoError(t, err)
 			must.NotNil(t, policy.Namespaces)
 
@@ -1079,7 +1079,7 @@ func TestAgentDebug(t *testing.T) {
 
 			acl := ACLsDisabledACL
 			if !tc.aclsDisabled {
-				policy, err := Parse(tc.policy)
+				policy, err := Parse(tc.policy, PolicyParseStrict)
 				must.NoError(t, err)
 
 				acl, err = NewACL(false, []*Policy{policy})
diff --git a/acl/policy.go b/acl/policy.go
index 17a7aed21..4e48874d7 100644
--- a/acl/policy.go
+++ b/acl/policy.go
@@ -7,6 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/hashicorp/hcl"
@@ -119,6 +120,17 @@ type Policy struct {
 	Quota       *QuotaPolicy        `hcl:"quota"`
 	Plugin      *PluginPolicy       `hcl:"plugin"`
 	Raw         string              `hcl:"-"`
+
+	// ExtraKeysHCL is used to capture any extra keys in the HCL input, so we
+	// can return an error if the user specified something unknown.
+	//
+	// Unfortunately, due to our current HCL use, keys from known blocks
+	// (namespace, node pools, and host volumes) will appear here, so we need to
+	// remove those as we process them. If the policy contains multiple blocks
+	// of the same type (e.g. multiple namespace blocks), the extra keys will
+	// also include "namespace" for all but the first block, so we need to
+	// remove those as we process them too.
+	ExtraKeysHCL []string `hcl:",unusedKeys"`
 }
 
 // IsEmpty checks to make sure that at least one policy has been set and is not
@@ -134,6 +146,14 @@ func (p *Policy) IsEmpty() bool {
 		p.Plugin == nil
 }
 
+// removeExtraKey removes a single occurrence of the passed key from the
+// ExtraKeysHCL slice. If the key is not found, this is a no-op.
+func (p *Policy) removeExtraKey(key string) {
+	if idx := slices.Index(p.ExtraKeysHCL, key); idx > -1 {
+		p.ExtraKeysHCL = append(p.ExtraKeysHCL[:idx], p.ExtraKeysHCL[idx+1:]...)
+	}
+}
+
 // NamespacePolicy is the policy for a specific namespace
 type NamespacePolicy struct {
 	Name         string `hcl:",key"`
@@ -378,10 +398,30 @@ func expandVariablesCapabilities(caps []string) []string {
 	return caps
 }
 
-// Parse is used to parse the specified ACL rules into an
-// intermediary set of policies, before being compiled into
-// the ACL
-func Parse(rules string) (*Policy, error) {
+const (
+	// PolicyParseStrict can be used to indicate that the policy should be
+	// parsed in strict mode, returning an error if there are any unknown keys.
+	// This should be used when creating or updating policies.
+	PolicyParseStrict = true
+
+	// PolicyParseLenient can be used to indicate that the policy should be
+	// parsed in lenient mode, ignoring any unknown keys. This should be used
+	// when evaluating policies, so we gracefully handle policies that were
+	// created before we added stricter validation.
+	PolicyParseLenient = false
+)
+
+// Parse is used to parse the specified ACL rules into an intermediary set of
+// policies, before being compiled into the ACL.
+//
+// The "strict" parameter should be set to true if the policy is being created
+// or updated, and false if it is being used for evaluation. This allowed us to
+// tighten restrictions around unknown keys when writing policies, while not
+// breaking existing policies that may have unknown keys when evaluating them,
+// since they may have been written before the restrictions were added. The
+// constants PolicyParseStrict and PolicyParseLenient can be used to make the
+// intent clear at the call site.
+func Parse(rules string, strict bool) (*Policy, error) {
 	// Decode the rules
 	p := &Policy{Raw: rules}
 	if rules == "" {
@@ -446,9 +486,10 @@ func Parse(rules string) (*Policy, error) {
 				pathPolicy.Capabilities = expandVariablesCapabilities(pathPolicy.Capabilities)
 
 			}
-
 		}
 
+		// Remove the namespace name from the extra key list.
+		p.removeExtraKey(ns.Name)
 	}
 
 	for _, np := range p.NodePools {
@@ -468,6 +509,9 @@ func Parse(rules string) (*Policy, error) {
 			extraCap := expandNodePoolPolicy(np.Policy)
 			np.Capabilities = append(np.Capabilities, extraCap...)
 		}
+
+		// Remove the node-pool name from the extra key list.
+		p.removeExtraKey(np.Name)
 	}
 
 	for _, hv := range p.HostVolumes {
@@ -489,8 +533,23 @@ func Parse(rules string) (*Policy, error) {
 			extraCap := expandHostVolumePolicy(hv.Policy)
 			hv.Capabilities = append(hv.Capabilities, extraCap...)
 		}
+
+		// Remove the host-volume name from the extra key list.
+		p.removeExtraKey(hv.Name)
 	}
 
+	// Now that we have processed all known keys, return an error if the
+	// operator wrote a policy with unknown keys if we are being strict. While
+	// these do not grant any extra privileges, it can be misleaing to allow
+	// these and cause problems later if we add new capabilities that collide
+	// with the unknown keys.
+	if len(p.ExtraKeysHCL) > 0 && strict {
+		return nil, fmt.Errorf("Invalid or duplicate policy keys: %v",
+			strings.Join(p.ExtraKeysHCL, ", "))
+	}
+
+	p.ExtraKeysHCL = nil
+
 	if p.Agent != nil && !isPolicyValid(p.Agent.Policy) {
 		return nil, fmt.Errorf("Invalid agent policy: %#v", p.Agent)
 	}
@@ -550,6 +609,9 @@ func hclDecode(p *Policy, rules string) (err error) {
 		if len(nsObj.Keys) == 0 {
 			p.Namespaces[i].Name = ""
 		}
+		if i > 0 {
+			p.removeExtraKey("namespace")
+		}
 
 		// Fix missing variable paths.
 		nsOT, ok := nsObj.Val.(*ast.ObjectType)
@@ -579,6 +641,9 @@ func hclDecode(p *Policy, rules string) (err error) {
 		if len(npObj.Keys) == 0 {
 			p.NodePools[i].Name = ""
 		}
+		if i > 0 {
+			p.removeExtraKey("node_pool")
+		}
 	}
 
 	hvList := list.Filter("host_volume")
@@ -587,6 +652,9 @@ func hclDecode(p *Policy, rules string) (err error) {
 		if len(hvObj.Keys) == 0 {
 			p.HostVolumes[i].Name = ""
 		}
+		if i > 0 {
+			p.removeExtraKey("host_volume")
+		}
 	}
 
 	return nil
diff --git a/acl/policy_test.go b/acl/policy_test.go
index 88d776635..202f8840d 100644
--- a/acl/policy_test.go
+++ b/acl/policy_test.go
@@ -934,7 +934,7 @@ func TestParse(t *testing.T) {
 
 	for idx, tc := range tcases {
 		t.Run(fmt.Sprintf("%02d", idx), func(t *testing.T) {
-			p, err := Parse(tc.Raw)
+			p, err := Parse(tc.Raw, PolicyParseStrict)
 			if tc.ExpectErr == "" {
 				must.NoError(t, err)
 			} else {
@@ -958,12 +958,105 @@ func TestParse_BadInput(t *testing.T) {
 
 	for i, c := range inputs {
 		t.Run(fmt.Sprintf("%d: %v", i, c), func(t *testing.T) {
-			_, err := Parse(c)
+			_, err := Parse(c, PolicyParseStrict)
 			must.Error(t, err)
 		})
 	}
 }
 
+func TestParse_ExtraKeys(t *testing.T) {
+	ci.Parallel(t)
+
+	inputPolicy := `
+
+namespace "foo" {
+  policy = "write"
+  variables {
+    path "project/*" {
+      capabilities = ["read", "write"]
+    }
+  }
+}
+namespace "bar" {
+  policy = "write"
+  variables {
+    path "system/*" {
+      capabilities = ["read", "write"]
+    }
+  }
+}
+
+bogus {
+  policy = "read"
+}
+anotherbogus {
+  policy = "write"
+}
+
+node {
+  policy = "read"
+}
+node {
+  policy = "write"
+}
+
+agent {
+  policy = "read"
+}
+agent {
+  policy = "write"
+}
+
+operator {
+  policy = "read"
+}
+operator {
+  policy = "write"
+}
+
+quota {
+  policy = "read"
+}
+quota {
+  policy = "write"
+}
+
+host_volume "volume-read-only" {
+  policy = "read"
+}
+host_volume "volume-read-write" {
+  policy = "write"
+}
+
+plugin {
+  policy = "read"
+}
+plugin {
+  policy = "write"
+}
+
+node_pool "pool-read-only" {
+  policy = "read"
+}
+node_pool "pool-read-write" {
+  policy = "write"
+}
+`
+
+	// Expect an error mentioning all the bad keys when indicating we are
+	// parsing the policy in strict mode.
+	_, err := Parse(inputPolicy, PolicyParseStrict)
+	must.ErrorContains(
+		t,
+		err,
+		"Invalid or duplicate policy keys: agent, anotherbogus, bogus, node, operator, plugin, quota",
+	)
+
+	// Expect no error when parsing in non-strict mode.
+	_, err = Parse(inputPolicy, PolicyParseLenient)
+	must.NoError(t, err)
+}
+
 func TestPluginPolicy_isValid(t *testing.T) {
 	ci.Parallel(t)
 
diff --git a/nomad/acl_endpoint.go b/nomad/acl_endpoint.go
index 0bcb0ea83..104267018 100644
--- a/nomad/acl_endpoint.go
+++ b/nomad/acl_endpoint.go
@@ -294,7 +294,7 @@ func (a *ACL) GetPolicy(args *structs.ACLPolicySpecificRequest, reply *structs.S
 			reply.Policy = out
 			if out != nil {
 				reply.Index = out.ModifyIndex
-				rules, err := policy.Parse(out.Rules)
+				rules, err := policy.Parse(out.Rules, acl.PolicyParseLenient)
 
 				if err != nil {
 					return err
diff --git a/nomad/event_endpoint_test.go b/nomad/event_endpoint_test.go
index b75941445..c3bac5f0e 100644
--- a/nomad/event_endpoint_test.go
+++ b/nomad/event_endpoint_test.go
@@ -474,7 +474,7 @@ func TestEventStream_validateNsOp(t *testing.T) {
 	for _, tc := range cases {
 		t.Run(tc.Name, func(t *testing.T) {
 
-			p, err := acl.Parse(tc.Policy)
+			p, err := acl.Parse(tc.Policy, acl.PolicyParseStrict)
 			require.NoError(err)
 
 			testACL, err := acl.NewACL(tc.Management, []*acl.Policy{p})
@@ -501,7 +501,7 @@ func TestEventStream_validateACL(t *testing.T) {
 	testEvent := &Event{srv: s1}
 
 	t.Run("single namespace ACL errors on wildcard", func(t *testing.T) {
-		policy, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}))
+		policy, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
 
 		// does not contain policy for default NS
@@ -516,9 +516,9 @@ func TestEventStream_validateACL(t *testing.T) {
 	})
 
 	t.Run("all namespace ACL succeeds on wildcard", func(t *testing.T) {
-		policy1, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}))
+		policy1, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
-		policy2, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}))
+		policy2, err := acl.Parse(mock.NamespacePolicy(ns1.Name, "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
 
 		testAcl, err := acl.NewACL(false, []*acl.Policy{policy1, policy2})
@@ -533,7 +533,7 @@ func TestEventStream_validateACL(t *testing.T) {
 	})
 
 	t.Run("single namespace ACL succeeds with correct NS", func(t *testing.T) {
-		policy, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}))
+		policy, err := acl.Parse(mock.NamespacePolicy("default", "", []string{acl.NamespaceCapabilityReadJob}), acl.PolicyParseStrict)
 		must.NoError(t, err)
 
 		testAcl, err := acl.NewACL(false, []*acl.Policy{policy})
diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go
index 459be3910..59b6b3d38 100644
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -433,7 +433,7 @@ func CompileACLObject(cache *ACLCache[*acl.ACL], policies []*ACLPolicy) (*acl.AC
 	// Parse the policies
 	parsed := make([]*acl.Policy, 0, len(policies))
 	for _, policy := range policies {
-		p, err := acl.Parse(policy.Rules)
+		p, err := acl.Parse(policy.Rules, acl.PolicyParseLenient)
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse %q: %v", policy.Name, err)
 		}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 0b19f3708..99bdb2bfd 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -13434,7 +13434,7 @@ func (a *ACLPolicy) Validate() error {
 		err := fmt.Errorf("invalid name '%s'", a.Name)
 		mErr.Errors = append(mErr.Errors, err)
 	}
-	if _, err := acl.Parse(a.Rules); err != nil {
+	if _, err := acl.Parse(a.Rules, acl.PolicyParseStrict); err != nil {
 		err = fmt.Errorf("failed to parse rules: %v", err)
 		mErr.Errors = append(mErr.Errors, err)
 	}
diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx
index ae6308fa4..da815a4e5 100644
--- a/website/content/docs/upgrade/upgrade-specific.mdx
+++ b/website/content/docs/upgrade/upgrade-specific.mdx
@@ -30,6 +30,24 @@ metrics refers to the parent job ID for dispatch and periodic jobs. The
 running high volume dispatch workloads, this change significantly reduces
 metrics cardinality and memory usage on the leader.
 
+#### ACL policies no longer silently ignore duplicate or invalid keys
+
+Nomad 1.11.0 introduces stricter validation for ACL policies. Policy writes that
+include duplicate or invalid keys will be rejected with an error instead of
+being silently ignored. Any existing policies with duplicate or invalid keys
+will continue to work, but the source policy document will need to be updated
+to be valid before it can be written to Nomad.
+
+## Nomad 1.10.6
+
+#### ACL policies no longer silently ignore duplicate or invalid keys
+
+Nomad 1.10.6 introduces stricter validation for ACL policies. Policy writes that
+include duplicate or invalid keys will be rejected with an error instead of
+being silently ignored. Any existing policies with duplicate or invalid keys
+will continue to work, but the source policy document will need to be updated
+to be valid before it can be written to Nomad.
+
 ## Nomad 1.10.2
 
 #### Clients respect `telemetry.publish_allocation_metrics`
@@ -188,6 +206,16 @@ labels = [
 ]
 ```
 
+## Nomad 1.8.18
+
+#### ACL policies no longer silently ignore duplicate or invalid keys
+
+Nomad 1.8.18 introduces stricter validation for ACL policies. Policy writes that
+include duplicate or invalid keys will be rejected with an error instead of
+being silently ignored. Any existing policies with duplicate or invalid keys
+will continue to work, but the source policy document will need to be updated
+to be valid before it can be written to Nomad.
+
 ## Nomad 1.8.4
 
 #### Default Docker `infra_image` changed