Add raft snapshot configuration options (#15522)

* Add config elements
* Wire in snapshot configuration to raft
* Add hot reload of raft config
* Add documentation for new raft settings
* Add changelog
This commit is contained in:
Charlie Voiselle
2023-01-20 14:21:51 -05:00
committed by GitHub
parent 1ded25fbf1
commit 85f67d4a83
7 changed files with 214 additions and 8 deletions

4
.changelog/15522.txt Normal file
View File

@@ -0,0 +1,4 @@
```release-note:improvement
server: Added raft snapshot arguments to server config
server: Certain raft configuration elements can now be reloaded without restarting the server
```

View File

@@ -212,6 +212,32 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
return nil, fmt.Errorf("raft_multiplier cannot be %d. Must be between 1 and %d", *agentConfig.Server.RaftMultiplier, MaxRaftMultiplier)
}
}
if vPtr := agentConfig.Server.RaftTrailingLogs; vPtr != nil {
if *vPtr < 1 {
return nil, fmt.Errorf("raft_trailing_logs must be non-negative, got %d", *vPtr)
}
conf.RaftConfig.TrailingLogs = uint64(*vPtr)
}
if vPtr := agentConfig.Server.RaftSnapshotInterval; vPtr != nil {
dur, err := time.ParseDuration(*vPtr)
if err != nil {
return nil, err
}
if dur < 5*time.Millisecond {
return nil, fmt.Errorf("raft_snapshot_interval must be greater than 5ms, got %q", *vPtr)
}
conf.RaftConfig.SnapshotInterval = dur
}
if vPtr := agentConfig.Server.RaftSnapshotThreshold; vPtr != nil {
if *vPtr < 1 {
return nil, fmt.Errorf("raft_snapshot_threshold must be non-negative, got %d", *vPtr)
}
conf.RaftConfig.SnapshotThreshold = uint64(*vPtr)
}
conf.RaftConfig.ElectionTimeout *= time.Duration(raftMultiplier)
conf.RaftConfig.HeartbeatTimeout *= time.Duration(raftMultiplier)
conf.RaftConfig.LeaderLeaseTimeout *= time.Duration(raftMultiplier)
@@ -525,7 +551,7 @@ func (a *Agent) serverConfig() (*nomad.Config, error) {
}
// finalizeServerConfig sets configuration fields on the server config that are
// not staticly convertable and are from the agent.
// not statically convertible and are from the agent.
func (a *Agent) finalizeServerConfig(c *nomad.Config) {
// Setup the logging
c.Logger = a.logger
@@ -553,7 +579,7 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
}
// finalizeClientConfig sets configuration fields on the client config that are
// not staticly convertable and are from the agent.
// not statically convertible and are from the agent.
func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error {
// Setup the logging
c.Logger = a.logger
@@ -973,7 +999,7 @@ func (a *Agent) setupClient() error {
}
// Set up a custom listener and dialer. This is used by Nomad clients when
// running consul-template functions that utilise the Nomad API. We lazy
// running consul-template functions that utilize the Nomad API. We lazy
// load this into the client config, therefore this needs to happen before
// we call NewClient.
a.builtinListener, a.builtinDialer = bufconndialer.New()

View File

@@ -20,6 +20,7 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/testutil"
"github.com/hashicorp/raft"
)
func TestAgent_RPC_Ping(t *testing.T) {
@@ -551,6 +552,104 @@ func TestAgent_ServerConfig_RaftMultiplier_Bad(t *testing.T) {
}
}
func TestAgent_ServerConfig_RaftTrailingLogs(t *testing.T) {
ci.Parallel(t)
cases := []struct {
name string
value *int
expect interface{}
isErr bool
}{
{
name: "bad",
value: pointer.Of(int(-1)),
isErr: true,
expect: "raft_trailing_logs must be non-negative",
},
{
name: "good",
value: pointer.Of(int(10)),
expect: uint64(10),
},
{
name: "empty",
value: nil,
expect: raft.DefaultConfig().TrailingLogs,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ci.Parallel(t)
tc := tc
conf := DevConfig(nil)
require.NoError(t, conf.normalizeAddrs())
conf.Server.RaftTrailingLogs = tc.value
nc, err := convertServerConfig(conf)
if !tc.isErr {
must.NoError(t, err)
val := tc.expect.(uint64)
must.Eq(t, val, nc.RaftConfig.TrailingLogs)
return
}
must.Error(t, err)
must.StrContains(t, err.Error(), tc.expect.(string))
})
}
}
func TestAgent_ServerConfig_RaftSnapshotThreshold(t *testing.T) {
ci.Parallel(t)
cases := []struct {
name string
value *int
expect interface{}
isErr bool
}{
{
name: "bad",
value: pointer.Of(int(-1)),
isErr: true,
expect: "raft_snapshot_threshold must be non-negative",
},
{
name: "good",
value: pointer.Of(int(10)),
expect: uint64(10),
},
{
name: "empty",
value: nil,
expect: raft.DefaultConfig().SnapshotThreshold,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ci.Parallel(t)
tc := tc
conf := DevConfig(nil)
require.NoError(t, conf.normalizeAddrs())
conf.Server.RaftSnapshotThreshold = tc.value
nc, err := convertServerConfig(conf)
if !tc.isErr {
must.NoError(t, err)
val := tc.expect.(uint64)
must.Eq(t, val, nc.RaftConfig.SnapshotThreshold)
return
}
must.Error(t, err)
must.StrContains(t, err.Error(), tc.expect.(string))
})
}
}
func TestAgent_ServerConfig_RaftProtocol_3(t *testing.T) {
ci.Parallel(t)

View File

@@ -610,6 +610,25 @@ type ServerConfig struct {
// RaftBoltConfig configures boltdb as used by raft.
RaftBoltConfig *RaftBoltConfig `hcl:"raft_boltdb"`
// RaftSnapshotThreshold controls how many outstanding logs there must be
// before we perform a snapshot. This is to prevent excessive snapshotting by
// replaying a small set of logs instead. The value passed here is the initial
// setting used. This can be tuned during operation with a hot reload.
RaftSnapshotThreshold *int `hcl:"raft_snapshot_threshold"`
// RaftSnapshotInterval controls how often we check if we should perform a
// snapshot. We randomly stagger between this value and 2x this value to avoid
// the entire cluster from performing a snapshot at once. The value passed
// here is the initial setting used. This can be tuned during operation with a
// hot reload.
RaftSnapshotInterval *string `hcl:"raft_snapshot_interval"`
// RaftTrailingLogs controls how many logs are left after a snapshot. This is
// used so that we can quickly replay logs on a follower instead of being
// forced to send an entire snapshot. The value passed here is the initial
// setting used. This can be tuned during operation using a hot reload.
RaftTrailingLogs *int `hcl:"raft_trailing_logs"`
}
func (s *ServerConfig) Copy() *ServerConfig {
@@ -632,6 +651,9 @@ func (s *ServerConfig) Copy() *ServerConfig {
ns.ExtraKeysHCL = slices.Clone(s.ExtraKeysHCL)
ns.Search = s.Search.Copy()
ns.RaftBoltConfig = s.RaftBoltConfig.Copy()
ns.RaftSnapshotInterval = pointer.Copy(s.RaftSnapshotInterval)
ns.RaftSnapshotThreshold = pointer.Copy(s.RaftSnapshotThreshold)
ns.RaftTrailingLogs = pointer.Copy(s.RaftTrailingLogs)
return &ns
}

View File

@@ -885,6 +885,18 @@ func (s *Server) Reload(newConfig *Config) error {
reloadSchedulers(s, newVals)
}
raftRC := raft.ReloadableConfig{
TrailingLogs: newConfig.RaftConfig.TrailingLogs,
SnapshotInterval: newConfig.RaftConfig.SnapshotInterval,
SnapshotThreshold: newConfig.RaftConfig.SnapshotThreshold,
HeartbeatTimeout: newConfig.RaftConfig.HeartbeatTimeout,
ElectionTimeout: newConfig.RaftConfig.ElectionTimeout,
}
if err := s.raft.ReloadConfig(raftRC); err != nil {
multierror.Append(&mErr, err)
}
return mErr.ErrorOrNil()
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -516,6 +517,29 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) {
testutil.WaitForLeader(t, s2.RPC)
}
func TestServer_ReloadRaftConfig(t *testing.T) {
ci.Parallel(t)
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.RaftConfig.TrailingLogs = 10
})
defer cleanupS1()
testutil.WaitForLeader(t, s1.RPC)
rc := s1.raft.ReloadableConfig()
must.Eq(t, rc.TrailingLogs, uint64(10))
cfg := s1.GetConfig()
cfg.RaftConfig.TrailingLogs = 100
// Hot-reload the configuration
s1.Reload(cfg)
// Check it from the raft library
rc = s1.raft.ReloadableConfig()
must.Eq(t, rc.TrailingLogs, uint64(100))
}
func TestServer_InvalidSchedulers(t *testing.T) {
ci.Parallel(t)
require := require.New(t)

View File

@@ -135,8 +135,8 @@ server {
- `max_heartbeats_per_second` `(float: 50.0)` - Specifies the maximum target
rate of heartbeats being processed per second. This allows the TTL to be
increased to meet the target rate. See [Client
Heartbeats](#client-heartbeats) below for details.
increased to meet the target rate. See [Client Heartbeats](#client-heartbeats)
below for details.
- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether
this server will act as a non-voting member of the cluster to help provide
@@ -182,12 +182,31 @@ server {
to setting this to a value of 1. Increasing the timings makes leader election
less likely during periods of networking issues or resource starvation. Since
leader elections pause Nomad's normal work, it may be beneficial for slow or
unreliable networks to wait longer before electing a new leader. The tradeoff
unreliable networks to wait longer before electing a new leader. The trade-off
when raising this value is that during network partitions or other events
(server crash) where a leader is lost, Nomad will not elect a new leader for
a longer period of time than the default. The [`nomad.nomad.leader.barrier` and
`nomad.raft.leader.lastContact` metrics](/docs/operations/metrics-reference) are a good
indicator of how often leader elections occur and raft latency.
indicator of how often leader elections occur and Raft latency.
- `raft_snapshot_threshold` `(int: "8192")` - Specifies the minimum number of
Raft logs to be written to disk before the node is allowed to take a snapshot.
This reduces the frequency and impact of creating snapshots. During node
startup, Raft restores the latest snapshot and then applies the individual
logs to catch the node up to the last known state. This can be tuned during
operation by a hot configuration reload.
- `raft_snapshot_interval` `(string: "120s")` - Specifies the minimum time between
checks if Raft should perform a snapshot. The Raft library randomly staggers
between this value and twice this value to avoid the entire cluster performing
a snapshot at once. Nodes are eligible to snapshot once they have exceeded the
`raft_snapshot_threshold`. This value can be tuned during operation by a hot
configuration reload.
- `raft_trailing_logs` `(int: "10240")` - Specifies how many logs are retained
after a snapshot. These logs are used so that Raft can quickly replay logs on
a follower instead of being forced to send an entire snapshot. This value can
be tuned during operation by a hot configuration reload.
- `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy
zone that this server will be a part of for Autopilot management. For more
@@ -441,7 +460,7 @@ told to heartbeat after the maximum interval.
The actual value used should take into consideration how much tolerance your
system has for a delay in noticing crashed Clients. For example a
`failover_heartbeat_ttl` of 30 minutes may give even the slowest clients in the
largest clusters ample time to heartbeat after an election. However if the
largest clusters ample time to heartbeat after an election. However if the
election was due to a datacenter-wide failure affecting Clients, it will be 30
minutes before Nomad recognizes that they are `down` and reschedules their
work.