From a3e096b0c98e3e76bcf153e0eaae4ef7d5184e42 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Tue, 24 Jun 2025 08:30:15 +0100 Subject: [PATCH] tls: Reset server TLS authenticator when TLS config reloaded. (#26107) The Nomad server uses an authenticator backend for RPC handling which includes TLS verification. This verification setting is configured based on the servers TLS configuration object and is built when a new server is constructed. The bug occurs when a servers TLS configuration is reloaded which can change the desired TLS verification handling. In this case, the authenticator is not updated, meaning the RPC mTLS verification is not modified, even if the configuration indicates it should. This change adds a new function on the authenticator to allow updating its TLS verification rule. This new function is called when a servers TLS configuration is reloaded. --- .changelog/26107.txt | 3 ++ nomad/auth/auth.go | 25 +++++++--- nomad/server.go | 3 ++ nomad/server_test.go | 112 ++++++++++++++++++++++++++++++------------- 4 files changed, 105 insertions(+), 38 deletions(-) create mode 100644 .changelog/26107.txt diff --git a/.changelog/26107.txt b/.changelog/26107.txt new file mode 100644 index 000000000..65f50366d --- /dev/null +++ b/.changelog/26107.txt @@ -0,0 +1,3 @@ +```release-note:bug +tls: Fixed a bug where reloading the Nomad server process with an updated `tls.verify_server_hostname` configuration parameter would not apply an update to internal RPC handler verification and require a full server restart +``` diff --git a/nomad/auth/auth.go b/nomad/auth/auth.go index 95a4fed7d..9435a2e61 100644 --- a/nomad/auth/auth.go +++ b/nomad/auth/auth.go @@ -10,6 +10,7 @@ import ( "net" "slices" "strings" + "sync/atomic" "time" "github.com/hashicorp/go-hclog" @@ -40,8 +41,13 @@ type Encrypter interface { } type Authenticator struct { - aclsEnabled bool - verifyTLS bool + aclsEnabled bool + + // verifyTLS is used to determine whether the server should verify TLS and + // is an atomic bool, so that the server TLS reload can update it at runtime + // with a race condition. + verifyTLS *atomic.Bool + logger hclog.Logger getState StateGetter getLeaderACL LeaderACLGetter @@ -69,9 +75,9 @@ type AuthenticatorConfig struct { } func NewAuthenticator(cfg *AuthenticatorConfig) *Authenticator { - return &Authenticator{ + a := Authenticator{ aclsEnabled: cfg.AclsEnabled, - verifyTLS: cfg.VerifyTLS, + verifyTLS: &atomic.Bool{}, logger: cfg.Logger.With("auth"), getState: cfg.StateFn, getLeaderACL: cfg.GetLeaderACLFn, @@ -84,8 +90,15 @@ func NewAuthenticator(cfg *AuthenticatorConfig) *Authenticator { "server." + cfg.Region + ".nomad", }, } + + a.verifyTLS.Store(cfg.VerifyTLS) + return &a } +// SetVerifyTLS is a helper method to set the verifyTLS field. This is used in +// when the server TLS configuration is updated. +func (s *Authenticator) SetVerifyTLS(verifyTLS bool) { s.verifyTLS.Store(verifyTLS) } + // Authenticate extracts an AuthenticatedIdentity from the request context or // provided token and sets the identity on the request. The caller can extract // an acl.ACL, WorkloadIdentity, or other identifying tokens to use for @@ -255,7 +268,7 @@ func (s *Authenticator) AuthenticateServerOnly(ctx RPCContext, args structs.Requ identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP} defer args.SetIdentity(identity) // always set the identity, even on errors - if s.verifyTLS && !ctx.IsStatic() { + if s.verifyTLS.Load() && !ctx.IsStatic() { tlsCert := ctx.Certificate() if tlsCert == nil { return nil, errors.New("missing certificate information") @@ -298,7 +311,7 @@ func (s *Authenticator) AuthenticateClientOnly(ctx RPCContext, args structs.Requ identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP} defer args.SetIdentity(identity) // always set the identity, even on errors - if s.verifyTLS && !ctx.IsStatic() { + if s.verifyTLS.Load() && !ctx.IsStatic() { tlsCert := ctx.Certificate() if tlsCert == nil { return nil, errors.New("missing certificate information") diff --git a/nomad/server.go b/nomad/server.go index ca255783a..c6f7b0611 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -692,6 +692,9 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { // Kill any old listeners s.rpcCancel() + // Update the authenticator, so any changes in TLS verification are applied. + s.auth.SetVerifyTLS(s.config.TLSConfig != nil && s.config.TLSConfig.EnableRPC && s.config.TLSConfig.VerifyServerHostname) + s.rpcTLS = incomingTLS s.connPool.ReloadTLS(tlsWrap) diff --git a/nomad/server_test.go b/nomad/server_test.go index a4422df38..0777fd24a 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -210,7 +210,6 @@ func connectionReset(msg string) bool { // upgrading from plaintext to TLS if the server's TLS configuration changes. func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { ci.Parallel(t) - assert := assert.New(t) const ( cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" @@ -224,8 +223,15 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { }) defer cleanupS1() + originalRPCCodec := rpcClient(t, s1) + + // Upsert a node into state, so we can use the Node.GetClientAllocs RPC + // to test the TLS connection. + mockNode := mock.Node() + must.NoError(t, s1.State().UpsertNode(structs.MsgTypeTestSetup, 10, mockNode)) + // assert that the server started in plaintext mode - assert.Equal(s1.config.TLSConfig.CertFile, "") + must.Eq(t, s1.config.TLSConfig.CertFile, "") newTLSConfig := &config.TLSConfig{ EnableHTTP: true, @@ -236,29 +242,48 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { KeyFile: fookey, } - err := s1.reloadTLSConnections(newTLSConfig) - assert.Nil(err) - assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) + must.NoError(t, s1.reloadTLSConnections(newTLSConfig)) + + certEq, err := s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig) + must.NoError(t, err) + must.True(t, certEq) codec := rpcClient(t, s1) + tlsCodec := rpcClientWithTLS(t, s1, newTLSConfig) - node := mock.Node() - req := &structs.NodeRegisterRequest{ - Node: node, - WriteRequest: structs.WriteRequest{Region: "global"}, + req := &structs.NodeSpecificRequest{ + NodeID: mockNode.ID, + SecretID: mockNode.SecretID, + QueryOptions: structs.QueryOptions{ + Region: "global", + AuthToken: mockNode.SecretID, + }, } - var resp structs.GenericResponse - err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) - assert.NotNil(err) - assert.True(connectionReset(err.Error())) + var resp structs.NodeClientAllocsResponse + + // Perform a request using the original codec. This should fail with a + // permission denied error, as the server has now switched to TLS and is + // performing TLS verification. + err = msgpackrpc.CallWithCodec(originalRPCCodec, "Node.GetClientAllocs", req, &resp) + must.ErrorContains(t, err, "Permission denied") + + // Perform a request using a non-TLS codec. This should fail with a + // connection reset error, as the server has now switched to TLS. + err = msgpackrpc.CallWithCodec(codec, "Node.GetClientAllocs", req, &resp) + must.Error(t, err) + must.True(t, connectionReset(err.Error())) + + // Perform a request using the new TLS codec. This should succeed, as the + // server is now configured to accept and verify TLS connections. + err = msgpackrpc.CallWithCodec(tlsCodec, "Node.GetClientAllocs", req, &resp) + must.NoError(t, err) } // Tests that the server will successfully reload its network connections, // downgrading from TLS to plaintext if the server's TLS configuration changes. func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) { ci.Parallel(t) - assert := assert.New(t) const ( cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" @@ -268,36 +293,59 @@ func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) { dir := t.TempDir() + tlsConfig := config.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + } + s1, cleanupS1 := TestServer(t, func(c *Config) { c.DataDir = path.Join(dir, "nodeB") - c.TLSConfig = &config.TLSConfig{ - EnableHTTP: true, - EnableRPC: true, - VerifyServerHostname: true, - CAFile: cafile, - CertFile: foocert, - KeyFile: fookey, - } + c.TLSConfig = &tlsConfig }) defer cleanupS1() + originalRPCTLSCodec := rpcClientWithTLS(t, s1, &tlsConfig) + + // Upsert a node into state, so we can use the Node.GetClientAllocs RPC + // to test the TLS connection. + mockNode := mock.Node() + must.NoError(t, s1.State().UpsertNode(structs.MsgTypeTestSetup, 10, mockNode)) + newTLSConfig := &config.TLSConfig{} - err := s1.reloadTLSConnections(newTLSConfig) - assert.Nil(err) - assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) + must.NoError(t, s1.reloadTLSConnections(newTLSConfig)) + + certEq, err := s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig) + must.NoError(t, err) + must.True(t, certEq) codec := rpcClient(t, s1) - node := mock.Node() - req := &structs.NodeRegisterRequest{ - Node: node, - WriteRequest: structs.WriteRequest{Region: "global"}, + req := &structs.NodeSpecificRequest{ + NodeID: mockNode.ID, + SecretID: mockNode.SecretID, + QueryOptions: structs.QueryOptions{ + Region: "global", + AuthToken: mockNode.SecretID, + }, } - var resp structs.GenericResponse - err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) - assert.Nil(err) + var resp structs.NodeClientAllocsResponse + + // Perform a request using the original TLS codec. This should fail with a + // connection reset error, as the server has now switched to plaintext. + err = msgpackrpc.CallWithCodec(originalRPCTLSCodec, "Node.GetClientAllocs", req, &resp) + must.Error(t, err) + must.True(t, connectionReset(err.Error())) + + // Perform a request using a non-TLS codec. This should succeed, as the + // server is now configured to accept plaintext connections. + err = msgpackrpc.CallWithCodec(codec, "Node.GetClientAllocs", req, &resp) + must.NoError(t, err) } // Tests that the server will successfully reload its network connections,