From f5c563155e5ceab6382579971090cf172dce45e6 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Fri, 26 Sep 2025 14:38:39 +0100 Subject: [PATCH] server: Only generate identities for nodes that meet min version. (#26842) In a cluster where the Nomad servers have been upgraded before the clients, the cluster leader will generate an identity for each client at every heartbeat. The clients will not have the code path to handle this response, so the object is thrown away. This wastes server resources. This change introduces a minimum version check to the logic which decides whether an identity should be generated. In the situation above, the leader will now decline to generate identities for Nomad clients running pre-1.11 versions. --- nomad/node_endpoint.go | 2 +- nomad/node_endpoint_test.go | 13 ++ nomad/structs/node.go | 47 +++++++ nomad/structs/node_test.go | 242 +++++++++++++++++++++++++++++------- 4 files changed, 260 insertions(+), 44 deletions(-) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index b99dc6ffb..a5742b28f 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -743,7 +743,7 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct } // Check and generate a node identity if needed. - if args.ShouldGenerateNodeIdentity(timeNow.UTC(), identityTTL) { + if args.ShouldGenerateNodeIdentity(node, timeNow.UTC(), identityTTL) { claims := structs.GenerateNodeIdentityClaims(node, n.srv.Region(), identityTTL) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 9f1f42fed..c245514d0 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -145,6 +145,7 @@ func TestNode_Register_Identity(t *testing.T) { testFn: func(t *testing.T, srv *Server, codec rpc.ClientCodec) { node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" req := structs.NodeRegisterRequest{ Node: node, @@ -173,6 +174,7 @@ func TestNode_Register_Identity(t *testing.T) { testFn: func(t *testing.T, srv *Server, codec rpc.ClientCodec) { node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" node.NodePool = "custom-pool" req := structs.NodeRegisterRequest{ @@ -199,6 +201,7 @@ func TestNode_Register_Identity(t *testing.T) { must.NoError(t, srv.State().UpsertNodePools(structs.MsgTypeTestSetup, 1000, []*structs.NodePool{nodePool})) node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" node.NodePool = nodePool.Name req := structs.NodeRegisterRequest{ @@ -224,6 +227,7 @@ func TestNode_Register_Identity(t *testing.T) { timeJWTNow := jwt.NewNumericDate(timeNow) node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" must.NoError(t, srv.State().UpsertNode(structs.MsgTypeTestSetup, 1000, node)) claims := structs.GenerateNodeIdentityClaims( @@ -260,6 +264,7 @@ func TestNode_Register_Identity(t *testing.T) { testFn: func(t *testing.T, srv *Server, codec rpc.ClientCodec) { node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" must.NoError(t, srv.State().UpsertNode(structs.MsgTypeTestSetup, 1000, node)) claims := structs.GenerateNodeIdentityClaims( @@ -295,6 +300,7 @@ func TestNode_Register_Identity(t *testing.T) { testFn: func(t *testing.T, srv *Server, codec rpc.ClientCodec) { node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" must.NoError(t, srv.State().UpsertNode(structs.MsgTypeTestSetup, 1000, node.Copy())) claims := structs.GenerateNodeIdentityClaims( @@ -1575,6 +1581,7 @@ func TestNode_UpdateStatus_Identity(t *testing.T) { testFn: func(t *testing.T, srv *Server, codec rpc.ClientCodec) { node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" must.Eq(t, "", node.IdentitySigningKeyID) must.NoError(t, srv.State().UpsertNode(structs.MsgTypeTestSetup, srv.raft.LastIndex(), node)) @@ -1609,6 +1616,7 @@ func TestNode_UpdateStatus_Identity(t *testing.T) { )) node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" node.NodePool = nodePool.Name must.Eq(t, "", node.IdentitySigningKeyID) @@ -1669,6 +1677,7 @@ func TestNode_UpdateStatus_Identity(t *testing.T) { timeJWTNow := jwt.NewNumericDate(timeNow) node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" must.NoError(t, srv.State().UpsertNode(structs.MsgTypeTestSetup, srv.raft.LastIndex(), node)) claims := structs.GenerateNodeIdentityClaims( @@ -1717,6 +1726,7 @@ func TestNode_UpdateStatus_Identity(t *testing.T) { timeJWTNow := jwt.NewNumericDate(timeNow) node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" node.NodePool = nodePool.Name must.NoError(t, srv.State().UpsertNode(structs.MsgTypeTestSetup, srv.raft.LastIndex(), node)) @@ -2413,6 +2423,9 @@ func TestClientEndpoint_GetNode(t *testing.T) { // Create the register request node := mock.Node() + node.Attributes["nomad.version"] = "1.11.0" + must.NoError(t, node.ComputeClass()) + reg := &structs.NodeRegisterRequest{ Node: node, WriteRequest: structs.WriteRequest{Region: "global"}, diff --git a/nomad/structs/node.go b/nomad/structs/node.go index e491c9867..33ab2441b 100644 --- a/nomad/structs/node.go +++ b/nomad/structs/node.go @@ -8,15 +8,49 @@ import ( "fmt" "maps" "reflect" + "slices" "strings" "time" "github.com/go-jose/go-jose/v3/jwt" "github.com/hashicorp/go-multierror" + "github.com/hashicorp/go-version" "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/nomad/helper/uuid" ) +// minNodeIdentityNomadNodeVersion is the minimum Nomad version that supports +// node identities. This is used to determine whether we should generate a new +// identity for a node during registration and status updates. +// +// TODO(jrasell): Update this when we have a stable release with node identity +// support. +var minNodeIdentityNomadNodeVersion = version.Must(version.NewVersion("1.10.6-dev")) + +// meetsMinimumVersion identifies whether the node meets the minimum version as +// specified by vrsn. The node version is determined by the nomad.version +// attribute. If the attribute is not set or we cannot parse the version, we +// assume the node does not meet the minimum version. +func (n *Node) meetsMinimumVersion(vrsn *version.Version) bool { + + nomadVersionAttr, ok := n.Attributes["nomad.version"] + if !ok { + return false + } + + nomadBuildVer, err := version.NewVersion(nomadVersionAttr) + if err != nil { + return false + } + + versionsMatch := slices.Equal( + vrsn.Segments(), + nomadBuildVer.Segments(), + ) + + return !(nomadBuildVer.LessThan(vrsn) && !versionsMatch) +} + // CSITopology is a map of topological domains to topological segments. // A topological domain is a sub-division of a cluster, like "region", // "zone", "rack", etc. @@ -614,6 +648,12 @@ func (n *NodeRegisterRequest) ShouldGenerateNodeIdentity( return errors.Is(authErr, jwt.ErrExpired) } + // If the node does not meet the minimum version, we do not want to generate + // a new identity, as it will not be able to use it. + if !n.Node.meetsMinimumVersion(minNodeIdentityNomadNodeVersion) { + return false + } + // If an ACL token or client ID is set, a node is attempting to register for // the first time, or is re-registering using its secret ID. In either case, // we should generate a new identity. @@ -667,10 +707,17 @@ type NodeUpdateStatusRequest struct { // ShouldGenerateNodeIdentity determines whether the handler should generate a // new node identity based on the caller identity information. func (n *NodeUpdateStatusRequest) ShouldGenerateNodeIdentity( + node *Node, now time.Time, ttl time.Duration, ) bool { + // If the node does not meet the minimum version, we do not want to generate + // a new identity, as it will not be able to use it. + if !node.meetsMinimumVersion(minNodeIdentityNomadNodeVersion) { + return false + } + identity := n.GetIdentity() // If the client ID is set, we should generate a new identity as the node diff --git a/nomad/structs/node_test.go b/nomad/structs/node_test.go index 8d4ae9bc3..d6f1ca033 100644 --- a/nomad/structs/node_test.go +++ b/nomad/structs/node_test.go @@ -9,11 +9,98 @@ import ( "time" "github.com/go-jose/go-jose/v3/jwt" + "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/ci" "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) +func TestNode_meetsMinimumVersion(t *testing.T) { + ci.Parallel(t) + + cases := []struct { + name string + nodeVersion string + minVersion *version.Version + expectedOutput bool + }{ + { + name: "missing version attribute", + nodeVersion: "", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: false, + }, + { + name: "invalid version attribute", + nodeVersion: "not-a-semver", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: false, + }, + { + name: "node version less than minimum", + nodeVersion: "1.10.5", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: false, + }, + { + name: "node version equal to minimum", + nodeVersion: "1.10.6-dev", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: true, + }, + { + name: "node version greater than minimum", + nodeVersion: "1.10.7", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: true, + }, + { + name: "prerelease", + nodeVersion: "1.10.6-dev", + minVersion: version.Must(version.NewVersion("1.10.6")), + expectedOutput: true, + }, + { + name: "enterprise node version less than minimum", + nodeVersion: "1.10.5+ent", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: false, + }, + { + name: "enterprise node version equal to minimum", + nodeVersion: "1.10.6-dev+ent", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: true, + }, + { + name: "enterprise node version greater than minimum", + nodeVersion: "1.10.7+ent", + minVersion: version.Must(version.NewVersion("1.10.6-dev")), + expectedOutput: true, + }, + { + name: "enterprise prerelease", + nodeVersion: "1.10.6-dev+ent", + minVersion: version.Must(version.NewVersion("1.10.6")), + expectedOutput: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + + testNode := &Node{ + Attributes: map[string]string{ + "nomad.version": tc.nodeVersion, + }, + } + + actualOutput := testNode.meetsMinimumVersion(tc.minVersion) + must.Eq(t, tc.expectedOutput, actualOutput) + }) + } +} + func TestDriverInfoEquals(t *testing.T) { ci.Parallel(t) @@ -468,6 +555,7 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { inputAuthErr error inputTime time.Time inputTTL time.Duration + inputNomadVersionAttr string expectedOutput bool }{ { @@ -476,35 +564,40 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { inputAuthErr: jwt.ErrExpired, inputTime: time.Now(), inputTTL: 10 * time.Minute, + inputNomadVersionAttr: "1.11.0", expectedOutput: true, }, { name: "first time node registration", inputNodeRegisterRequest: &NodeRegisterRequest{ + Node: mockNode, WriteRequest: WriteRequest{ identity: &AuthenticatedIdentity{ ACLToken: AnonymousACLToken, }, }, }, - inputAuthErr: nil, - inputTime: time.Now(), - inputTTL: 10 * time.Minute, - expectedOutput: true, + inputAuthErr: nil, + inputTime: time.Now(), + inputTTL: 10 * time.Minute, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "registration using node secret ID", inputNodeRegisterRequest: &NodeRegisterRequest{ + Node: mockNode, WriteRequest: WriteRequest{ identity: &AuthenticatedIdentity{ ClientID: "client-id-1", }, }, }, - inputAuthErr: nil, - inputTime: time.Now(), - inputTTL: 10 * time.Minute, - expectedOutput: true, + inputAuthErr: nil, + inputTime: time.Now(), + inputTTL: 10 * time.Minute, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "modified node node pool configuration", @@ -526,10 +619,11 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputAuthErr: nil, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputAuthErr: nil, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "modified node class configuration", @@ -551,10 +645,11 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputAuthErr: nil, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputAuthErr: nil, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "modified node datacenter configuration", @@ -576,10 +671,11 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputAuthErr: nil, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputAuthErr: nil, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "expiring node identity", @@ -601,10 +697,37 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputAuthErr: nil, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputAuthErr: nil, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, + }, + { + name: "old client version", + inputNodeRegisterRequest: &NodeRegisterRequest{ + Node: mockNode, + WriteRequest: WriteRequest{ + identity: &AuthenticatedIdentity{ + Claims: &IdentityClaims{ + NodeIdentityClaims: &NodeIdentityClaims{ + NodeID: mockNode.ID, + NodePool: mockNode.NodePool, + NodeClass: mockNode.NodeClass, + NodeDatacenter: mockNode.Datacenter, + }, + Claims: jwt.Claims{ + Expiry: jwt.NewNumericDate(time.Now().UTC().Add(24 * time.Hour)), + }, + }, + }, + }, + }, + inputAuthErr: nil, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.8.17", + expectedOutput: false, }, { name: "no generation", @@ -626,15 +749,20 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputAuthErr: nil, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: false, + inputAuthErr: nil, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: false, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { + + // Set the nomad.version attribute on the mock node. + mockNode.Attributes["nomad.version"] = tc.inputNomadVersionAttr + actualOutput := tc.inputNodeRegisterRequest.ShouldGenerateNodeIdentity( tc.inputAuthErr, tc.inputTime, @@ -648,11 +776,15 @@ func TestNodeRegisterRequest_ShouldGenerateNodeIdentity(t *testing.T) { func TestNodeUpdateStatusRequest_ShouldGenerateNodeIdentity(t *testing.T) { ci.Parallel(t) + // Generate a stable mock node for testing. + mockNode := MockNode() + testCases := []struct { name string inputNodeRegisterRequest *NodeUpdateStatusRequest inputTime time.Time inputTTL time.Duration + inputNomadVersionAttr string expectedOutput bool }{ { @@ -664,9 +796,10 @@ func TestNodeUpdateStatusRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputTime: time.Now(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputTime: time.Now(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "expiring node identity", @@ -682,9 +815,10 @@ func TestNodeUpdateStatusRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "not expiring node identity", @@ -700,9 +834,10 @@ func TestNodeUpdateStatusRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: false, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: false, }, { name: "not expiring forced renewal node identity", @@ -719,9 +854,10 @@ func TestNodeUpdateStatusRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: true, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: true, }, { name: "server authenticated request", @@ -732,15 +868,35 @@ func TestNodeUpdateStatusRequest_ShouldGenerateNodeIdentity(t *testing.T) { }, }, }, - inputTime: time.Now().UTC(), - inputTTL: 24 * time.Hour, - expectedOutput: false, + inputTime: time.Now().UTC(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.11.0", + expectedOutput: false, + }, + { + name: "old client version", + inputNodeRegisterRequest: &NodeUpdateStatusRequest{ + WriteRequest: WriteRequest{ + identity: &AuthenticatedIdentity{ + ClientID: "client-id-1", + }, + }, + }, + inputTime: time.Now(), + inputTTL: 24 * time.Hour, + inputNomadVersionAttr: "1.8.17", + expectedOutput: false, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { + + // Set the nomad.version attribute on the mock node. + mockNode.Attributes["nomad.version"] = tc.inputNomadVersionAttr + actualOutput := tc.inputNodeRegisterRequest.ShouldGenerateNodeIdentity( + mockNode, tc.inputTime, tc.inputTTL, )