Merge pull request #5841 from hashicorp/f-raft-snapshot-metrics

Raft and state store indexes as metrics
This commit is contained in:
Preetha
2019-06-19 12:01:03 -05:00
committed by GitHub
2 changed files with 43 additions and 0 deletions

View File

@@ -15,6 +15,7 @@ import (
"sync/atomic"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/agent/consul/autopilot"
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
@@ -410,6 +411,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI) (*Server, error)
// Emit metrics
go s.heartbeatStats()
// Emit raft and state store metrics
go s.EmitRaftStats(10*time.Second, s.shutdownCh)
// Start enterprise background workers
s.startEnterpriseBackground()
@@ -1450,6 +1454,27 @@ func (s *Server) Stats() map[string]map[string]string {
return stats
}
// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index
func (s *Server) EmitRaftStats(period time.Duration, stopCh <-chan struct{}) {
for {
select {
case <-time.After(period):
lastIndex := s.raft.LastIndex()
metrics.SetGauge([]string{"raft", "lastIndex"}, float32(lastIndex))
appliedIndex := s.raft.AppliedIndex()
metrics.SetGauge([]string{"raft", "appliedIndex"}, float32(appliedIndex))
stateStoreSnapshotIndex, err := s.State().LatestIndex()
if err != nil {
s.logger.Warn("Unable to read snapshot index from statestore, metric will not be emitted", "error", err)
} else {
metrics.SetGauge([]string{"state", "snapshotIndex"}, float32(stateStoreSnapshotIndex))
}
case <-stopCh:
return
}
}
}
// Region returns the region of the server
func (s *Server) Region() string {
return s.config.Region

View File

@@ -109,6 +109,18 @@ when retrieving metrics using the above described signals.
<td>Raft transactions / `interval`</td>
<td>Counter</td>
</tr>
<tr>
<td>`nomad.raft.lastIndex`</td>
<td>Index of the <a href="https://godoc.org/github.com/hashicorp/raft#Raft.LastIndex">last log in stable storage</a></td>
<td>Sequence number</td>
<td>Gauge</td>
</tr>
<tr>
<td>`nomad.raft.appliedIndex`</td>
<td>Index of the <a href="https://godoc.org/github.com/hashicorp/raft#Raft.AppliedIndex">last applied log</a></td>
<td>Sequence number</td>
<td>Gauge</td>
</tr>
<tr>
<td>`nomad.raft.replication.appendEntries`</td>
<td>Raft transaction commit time</td>
@@ -167,6 +179,12 @@ when retrieving metrics using the above described signals.
<td>ms / Plan Evaluation</td>
<td>Timer</td>
</tr>
<tr>
<td>`nomad.state.snapshotIndex`</td>
<td>Latest index in the server's in memory state store</td>
<td>Sequence number</td>
<td>Gauge</td>
</tr>
<tr>
<td>`nomad.worker.invoke_scheduler.<type>`</td>
<td>Time to run the scheduler of the given type</td>