reporting: Update server to accommodate new enterprise reporting. (#24919)

Nomad Enterprise will utilise new reporting metrics and the
changes here allow this work to be conducted.

The server specific GetClientNodesCount function has been remomved
from CE as this is only called within enterprise code. A new
heartbeater function allows us to get the number of active timers,
which can be used by the heartbeater metrics and any other callers
that want this data.
This commit is contained in:
James Rasell
2025-01-24 09:00:07 +01:00
committed by GitHub
parent c1dc9ed75d
commit 739e5ed6ee
4 changed files with 51 additions and 22 deletions

View File

@@ -251,15 +251,13 @@ func (h *nodeHeartbeater) clearAllHeartbeatTimers() error {
return nil
}
// heartbeatStats is a long running routine used to capture
// the number of active heartbeats being tracked
// heartbeatStats is a long-running routine used to capture the number of
// active heartbeats being tracked.
func (h *nodeHeartbeater) heartbeatStats() {
for {
select {
case <-time.After(5 * time.Second):
h.heartbeatTimersLock.Lock()
num := len(h.heartbeatTimers)
h.heartbeatTimersLock.Unlock()
num := h.getHeartbeatTimerNum()
metrics.SetGauge([]string{"nomad", "heartbeat", "active"}, float32(num))
case <-h.srv.shutdownCh:
@@ -267,3 +265,12 @@ func (h *nodeHeartbeater) heartbeatStats() {
}
}
}
// getHeartbeatTimerNum is a helper function that returns the current number of
// active heartbeat timers. The caller should not hold the lock; this function
// handles that.
func (h *nodeHeartbeater) getHeartbeatTimerNum() int {
h.heartbeatTimersLock.Lock()
defer h.heartbeatTimersLock.Unlock()
return len(h.heartbeatTimers)
}

View File

@@ -8,10 +8,12 @@ import (
"testing"
"time"
"github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc/v2"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
@@ -423,3 +425,39 @@ func TestHeartbeat_InvalidateHeartbeatDisconnectedClient(t *testing.T) {
})
}
}
func Test_nodeHeartbeater_getHeartbeatTimerNum(t *testing.T) {
ci.Parallel(t)
nodeHeartbeat := &nodeHeartbeater{logger: hclog.NewNullLogger()}
// Generate 5 initial node IDs that will be added to the heartbeater as
// active.
nodeIDs := []string{
uuid.Generate(),
uuid.Generate(),
uuid.Generate(),
uuid.Generate(),
uuid.Generate(),
}
// Use the locked insert function, so we can avoid setting up an entire
// server for this small test. We don't need the lock as there is no
// concurrency.
for _, nodeID := range nodeIDs {
nodeHeartbeat.resetHeartbeatTimerLocked(nodeID, 10*time.Minute)
}
must.Eq(t, 5, nodeHeartbeat.getHeartbeatTimerNum())
// Remove a couple of nodes from the heartbeater and check that the number
// reports correctly.
must.NoError(t, nodeHeartbeat.clearHeartbeatTimer(nodeIDs[0]))
must.NoError(t, nodeHeartbeat.clearHeartbeatTimer(nodeIDs[2]))
must.Eq(t, 3, nodeHeartbeat.getHeartbeatTimerNum())
// Clear all the timers and test.
must.NoError(t, nodeHeartbeat.clearAllHeartbeatTimers())
must.Eq(t, 0, nodeHeartbeat.getHeartbeatTimerNum())
}

View File

@@ -5,5 +5,4 @@
package reporting
type Manager struct {
}
type Manager struct{}

View File

@@ -36,7 +36,6 @@ import (
"github.com/hashicorp/nomad/helper/codec"
"github.com/hashicorp/nomad/helper/goruntime"
"github.com/hashicorp/nomad/helper/group"
"github.com/hashicorp/nomad/helper/iterator"
"github.com/hashicorp/nomad/helper/pool"
"github.com/hashicorp/nomad/helper/tlsutil"
"github.com/hashicorp/nomad/lib/auth/oidc"
@@ -2188,20 +2187,6 @@ func (s *Server) isSingleServerCluster() bool {
return s.config.BootstrapExpect == 1
}
func (s *Server) GetClientNodesCount() (int, error) {
stateSnapshot, err := s.State().Snapshot()
if err != nil {
return 0, err
}
iter, err := stateSnapshot.Nodes(nil)
if err != nil {
return 0, err
}
return iterator.Len(iter), nil
}
// peersInfoContent is used to help operators understand what happened to the
// peers.json file. This is written to a file called peers.info in the same
// location.