From 739e5ed6ee302fd11158e9c224f7934a73edc2ca Mon Sep 17 00:00:00 2001 From: James Rasell Date: Fri, 24 Jan 2025 09:00:07 +0100 Subject: [PATCH] reporting: Update server to accommodate new enterprise reporting. (#24919) Nomad Enterprise will utilise new reporting metrics and the changes here allow this work to be conducted. The server specific GetClientNodesCount function has been remomved from CE as this is only called within enterprise code. A new heartbeater function allows us to get the number of active timers, which can be used by the heartbeater metrics and any other callers that want this data. --- nomad/heartbeat.go | 17 ++++++++++----- nomad/heartbeat_test.go | 38 +++++++++++++++++++++++++++++++++ nomad/reporting/reporting_ce.go | 3 +-- nomad/server.go | 15 ------------- 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/nomad/heartbeat.go b/nomad/heartbeat.go index be282b70d..1d8596a83 100644 --- a/nomad/heartbeat.go +++ b/nomad/heartbeat.go @@ -251,15 +251,13 @@ func (h *nodeHeartbeater) clearAllHeartbeatTimers() error { return nil } -// heartbeatStats is a long running routine used to capture -// the number of active heartbeats being tracked +// heartbeatStats is a long-running routine used to capture the number of +// active heartbeats being tracked. func (h *nodeHeartbeater) heartbeatStats() { for { select { case <-time.After(5 * time.Second): - h.heartbeatTimersLock.Lock() - num := len(h.heartbeatTimers) - h.heartbeatTimersLock.Unlock() + num := h.getHeartbeatTimerNum() metrics.SetGauge([]string{"nomad", "heartbeat", "active"}, float32(num)) case <-h.srv.shutdownCh: @@ -267,3 +265,12 @@ func (h *nodeHeartbeater) heartbeatStats() { } } } + +// getHeartbeatTimerNum is a helper function that returns the current number of +// active heartbeat timers. The caller should not hold the lock; this function +// handles that. +func (h *nodeHeartbeater) getHeartbeatTimerNum() int { + h.heartbeatTimersLock.Lock() + defer h.heartbeatTimersLock.Unlock() + return len(h.heartbeatTimers) +} diff --git a/nomad/heartbeat_test.go b/nomad/heartbeat_test.go index 52d234e6d..6c7a10131 100644 --- a/nomad/heartbeat_test.go +++ b/nomad/heartbeat_test.go @@ -8,10 +8,12 @@ import ( "testing" "time" + "github.com/hashicorp/go-hclog" memdb "github.com/hashicorp/go-memdb" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc/v2" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" @@ -423,3 +425,39 @@ func TestHeartbeat_InvalidateHeartbeatDisconnectedClient(t *testing.T) { }) } } + +func Test_nodeHeartbeater_getHeartbeatTimerNum(t *testing.T) { + ci.Parallel(t) + + nodeHeartbeat := &nodeHeartbeater{logger: hclog.NewNullLogger()} + + // Generate 5 initial node IDs that will be added to the heartbeater as + // active. + nodeIDs := []string{ + uuid.Generate(), + uuid.Generate(), + uuid.Generate(), + uuid.Generate(), + uuid.Generate(), + } + + // Use the locked insert function, so we can avoid setting up an entire + // server for this small test. We don't need the lock as there is no + // concurrency. + for _, nodeID := range nodeIDs { + nodeHeartbeat.resetHeartbeatTimerLocked(nodeID, 10*time.Minute) + } + + must.Eq(t, 5, nodeHeartbeat.getHeartbeatTimerNum()) + + // Remove a couple of nodes from the heartbeater and check that the number + // reports correctly. + must.NoError(t, nodeHeartbeat.clearHeartbeatTimer(nodeIDs[0])) + must.NoError(t, nodeHeartbeat.clearHeartbeatTimer(nodeIDs[2])) + + must.Eq(t, 3, nodeHeartbeat.getHeartbeatTimerNum()) + + // Clear all the timers and test. + must.NoError(t, nodeHeartbeat.clearAllHeartbeatTimers()) + must.Eq(t, 0, nodeHeartbeat.getHeartbeatTimerNum()) +} diff --git a/nomad/reporting/reporting_ce.go b/nomad/reporting/reporting_ce.go index cef27a67e..ffe600398 100644 --- a/nomad/reporting/reporting_ce.go +++ b/nomad/reporting/reporting_ce.go @@ -5,5 +5,4 @@ package reporting -type Manager struct { -} +type Manager struct{} diff --git a/nomad/server.go b/nomad/server.go index ba4d9ad59..655ce8758 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -36,7 +36,6 @@ import ( "github.com/hashicorp/nomad/helper/codec" "github.com/hashicorp/nomad/helper/goruntime" "github.com/hashicorp/nomad/helper/group" - "github.com/hashicorp/nomad/helper/iterator" "github.com/hashicorp/nomad/helper/pool" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/lib/auth/oidc" @@ -2188,20 +2187,6 @@ func (s *Server) isSingleServerCluster() bool { return s.config.BootstrapExpect == 1 } -func (s *Server) GetClientNodesCount() (int, error) { - stateSnapshot, err := s.State().Snapshot() - if err != nil { - return 0, err - } - - iter, err := stateSnapshot.Nodes(nil) - if err != nil { - return 0, err - } - - return iterator.Len(iter), nil -} - // peersInfoContent is used to help operators understand what happened to the // peers.json file. This is written to a file called peers.info in the same // location.