mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
Add raft snapshot configuration options (#15522)
* Add config elements * Wire in snapshot configuration to raft * Add hot reload of raft config * Add documentation for new raft settings * Add changelog
This commit is contained in:
4
.changelog/15522.txt
Normal file
4
.changelog/15522.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
```release-note:improvement
|
||||
server: Added raft snapshot arguments to server config
|
||||
server: Certain raft configuration elements can now be reloaded without restarting the server
|
||||
```
|
||||
@@ -212,6 +212,32 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
|
||||
return nil, fmt.Errorf("raft_multiplier cannot be %d. Must be between 1 and %d", *agentConfig.Server.RaftMultiplier, MaxRaftMultiplier)
|
||||
}
|
||||
}
|
||||
|
||||
if vPtr := agentConfig.Server.RaftTrailingLogs; vPtr != nil {
|
||||
if *vPtr < 1 {
|
||||
return nil, fmt.Errorf("raft_trailing_logs must be non-negative, got %d", *vPtr)
|
||||
}
|
||||
conf.RaftConfig.TrailingLogs = uint64(*vPtr)
|
||||
}
|
||||
|
||||
if vPtr := agentConfig.Server.RaftSnapshotInterval; vPtr != nil {
|
||||
dur, err := time.ParseDuration(*vPtr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if dur < 5*time.Millisecond {
|
||||
return nil, fmt.Errorf("raft_snapshot_interval must be greater than 5ms, got %q", *vPtr)
|
||||
}
|
||||
conf.RaftConfig.SnapshotInterval = dur
|
||||
}
|
||||
|
||||
if vPtr := agentConfig.Server.RaftSnapshotThreshold; vPtr != nil {
|
||||
if *vPtr < 1 {
|
||||
return nil, fmt.Errorf("raft_snapshot_threshold must be non-negative, got %d", *vPtr)
|
||||
}
|
||||
conf.RaftConfig.SnapshotThreshold = uint64(*vPtr)
|
||||
}
|
||||
|
||||
conf.RaftConfig.ElectionTimeout *= time.Duration(raftMultiplier)
|
||||
conf.RaftConfig.HeartbeatTimeout *= time.Duration(raftMultiplier)
|
||||
conf.RaftConfig.LeaderLeaseTimeout *= time.Duration(raftMultiplier)
|
||||
@@ -525,7 +551,7 @@ func (a *Agent) serverConfig() (*nomad.Config, error) {
|
||||
}
|
||||
|
||||
// finalizeServerConfig sets configuration fields on the server config that are
|
||||
// not staticly convertable and are from the agent.
|
||||
// not statically convertible and are from the agent.
|
||||
func (a *Agent) finalizeServerConfig(c *nomad.Config) {
|
||||
// Setup the logging
|
||||
c.Logger = a.logger
|
||||
@@ -553,7 +579,7 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
|
||||
}
|
||||
|
||||
// finalizeClientConfig sets configuration fields on the client config that are
|
||||
// not staticly convertable and are from the agent.
|
||||
// not statically convertible and are from the agent.
|
||||
func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error {
|
||||
// Setup the logging
|
||||
c.Logger = a.logger
|
||||
@@ -973,7 +999,7 @@ func (a *Agent) setupClient() error {
|
||||
}
|
||||
|
||||
// Set up a custom listener and dialer. This is used by Nomad clients when
|
||||
// running consul-template functions that utilise the Nomad API. We lazy
|
||||
// running consul-template functions that utilize the Nomad API. We lazy
|
||||
// load this into the client config, therefore this needs to happen before
|
||||
// we call NewClient.
|
||||
a.builtinListener, a.builtinDialer = bufconndialer.New()
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
func TestAgent_RPC_Ping(t *testing.T) {
|
||||
@@ -551,6 +552,104 @@ func TestAgent_ServerConfig_RaftMultiplier_Bad(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_ServerConfig_RaftTrailingLogs(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
value *int
|
||||
expect interface{}
|
||||
isErr bool
|
||||
}{
|
||||
{
|
||||
name: "bad",
|
||||
value: pointer.Of(int(-1)),
|
||||
isErr: true,
|
||||
expect: "raft_trailing_logs must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "good",
|
||||
value: pointer.Of(int(10)),
|
||||
expect: uint64(10),
|
||||
},
|
||||
{
|
||||
name: "empty",
|
||||
value: nil,
|
||||
expect: raft.DefaultConfig().TrailingLogs,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
tc := tc
|
||||
conf := DevConfig(nil)
|
||||
require.NoError(t, conf.normalizeAddrs())
|
||||
|
||||
conf.Server.RaftTrailingLogs = tc.value
|
||||
nc, err := convertServerConfig(conf)
|
||||
|
||||
if !tc.isErr {
|
||||
must.NoError(t, err)
|
||||
val := tc.expect.(uint64)
|
||||
must.Eq(t, val, nc.RaftConfig.TrailingLogs)
|
||||
return
|
||||
}
|
||||
must.Error(t, err)
|
||||
must.StrContains(t, err.Error(), tc.expect.(string))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_ServerConfig_RaftSnapshotThreshold(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
value *int
|
||||
expect interface{}
|
||||
isErr bool
|
||||
}{
|
||||
{
|
||||
name: "bad",
|
||||
value: pointer.Of(int(-1)),
|
||||
isErr: true,
|
||||
expect: "raft_snapshot_threshold must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "good",
|
||||
value: pointer.Of(int(10)),
|
||||
expect: uint64(10),
|
||||
},
|
||||
{
|
||||
name: "empty",
|
||||
value: nil,
|
||||
expect: raft.DefaultConfig().SnapshotThreshold,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
tc := tc
|
||||
conf := DevConfig(nil)
|
||||
require.NoError(t, conf.normalizeAddrs())
|
||||
|
||||
conf.Server.RaftSnapshotThreshold = tc.value
|
||||
nc, err := convertServerConfig(conf)
|
||||
|
||||
if !tc.isErr {
|
||||
must.NoError(t, err)
|
||||
val := tc.expect.(uint64)
|
||||
must.Eq(t, val, nc.RaftConfig.SnapshotThreshold)
|
||||
return
|
||||
}
|
||||
must.Error(t, err)
|
||||
must.StrContains(t, err.Error(), tc.expect.(string))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_ServerConfig_RaftProtocol_3(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
|
||||
@@ -610,6 +610,25 @@ type ServerConfig struct {
|
||||
|
||||
// RaftBoltConfig configures boltdb as used by raft.
|
||||
RaftBoltConfig *RaftBoltConfig `hcl:"raft_boltdb"`
|
||||
|
||||
// RaftSnapshotThreshold controls how many outstanding logs there must be
|
||||
// before we perform a snapshot. This is to prevent excessive snapshotting by
|
||||
// replaying a small set of logs instead. The value passed here is the initial
|
||||
// setting used. This can be tuned during operation with a hot reload.
|
||||
RaftSnapshotThreshold *int `hcl:"raft_snapshot_threshold"`
|
||||
|
||||
// RaftSnapshotInterval controls how often we check if we should perform a
|
||||
// snapshot. We randomly stagger between this value and 2x this value to avoid
|
||||
// the entire cluster from performing a snapshot at once. The value passed
|
||||
// here is the initial setting used. This can be tuned during operation with a
|
||||
// hot reload.
|
||||
RaftSnapshotInterval *string `hcl:"raft_snapshot_interval"`
|
||||
|
||||
// RaftTrailingLogs controls how many logs are left after a snapshot. This is
|
||||
// used so that we can quickly replay logs on a follower instead of being
|
||||
// forced to send an entire snapshot. The value passed here is the initial
|
||||
// setting used. This can be tuned during operation using a hot reload.
|
||||
RaftTrailingLogs *int `hcl:"raft_trailing_logs"`
|
||||
}
|
||||
|
||||
func (s *ServerConfig) Copy() *ServerConfig {
|
||||
@@ -632,6 +651,9 @@ func (s *ServerConfig) Copy() *ServerConfig {
|
||||
ns.ExtraKeysHCL = slices.Clone(s.ExtraKeysHCL)
|
||||
ns.Search = s.Search.Copy()
|
||||
ns.RaftBoltConfig = s.RaftBoltConfig.Copy()
|
||||
ns.RaftSnapshotInterval = pointer.Copy(s.RaftSnapshotInterval)
|
||||
ns.RaftSnapshotThreshold = pointer.Copy(s.RaftSnapshotThreshold)
|
||||
ns.RaftTrailingLogs = pointer.Copy(s.RaftTrailingLogs)
|
||||
return &ns
|
||||
}
|
||||
|
||||
|
||||
@@ -885,6 +885,18 @@ func (s *Server) Reload(newConfig *Config) error {
|
||||
reloadSchedulers(s, newVals)
|
||||
}
|
||||
|
||||
raftRC := raft.ReloadableConfig{
|
||||
TrailingLogs: newConfig.RaftConfig.TrailingLogs,
|
||||
SnapshotInterval: newConfig.RaftConfig.SnapshotInterval,
|
||||
SnapshotThreshold: newConfig.RaftConfig.SnapshotThreshold,
|
||||
HeartbeatTimeout: newConfig.RaftConfig.HeartbeatTimeout,
|
||||
ElectionTimeout: newConfig.RaftConfig.ElectionTimeout,
|
||||
}
|
||||
|
||||
if err := s.raft.ReloadConfig(raftRC); err != nil {
|
||||
multierror.Append(&mErr, err)
|
||||
}
|
||||
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -516,6 +517,29 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) {
|
||||
testutil.WaitForLeader(t, s2.RPC)
|
||||
}
|
||||
|
||||
func TestServer_ReloadRaftConfig(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0
|
||||
c.RaftConfig.TrailingLogs = 10
|
||||
})
|
||||
defer cleanupS1()
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
rc := s1.raft.ReloadableConfig()
|
||||
must.Eq(t, rc.TrailingLogs, uint64(10))
|
||||
cfg := s1.GetConfig()
|
||||
cfg.RaftConfig.TrailingLogs = 100
|
||||
|
||||
// Hot-reload the configuration
|
||||
s1.Reload(cfg)
|
||||
|
||||
// Check it from the raft library
|
||||
rc = s1.raft.ReloadableConfig()
|
||||
must.Eq(t, rc.TrailingLogs, uint64(100))
|
||||
}
|
||||
|
||||
func TestServer_InvalidSchedulers(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
require := require.New(t)
|
||||
|
||||
@@ -135,8 +135,8 @@ server {
|
||||
|
||||
- `max_heartbeats_per_second` `(float: 50.0)` - Specifies the maximum target
|
||||
rate of heartbeats being processed per second. This allows the TTL to be
|
||||
increased to meet the target rate. See [Client
|
||||
Heartbeats](#client-heartbeats) below for details.
|
||||
increased to meet the target rate. See [Client Heartbeats](#client-heartbeats)
|
||||
below for details.
|
||||
|
||||
- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether
|
||||
this server will act as a non-voting member of the cluster to help provide
|
||||
@@ -182,12 +182,31 @@ server {
|
||||
to setting this to a value of 1. Increasing the timings makes leader election
|
||||
less likely during periods of networking issues or resource starvation. Since
|
||||
leader elections pause Nomad's normal work, it may be beneficial for slow or
|
||||
unreliable networks to wait longer before electing a new leader. The tradeoff
|
||||
unreliable networks to wait longer before electing a new leader. The trade-off
|
||||
when raising this value is that during network partitions or other events
|
||||
(server crash) where a leader is lost, Nomad will not elect a new leader for
|
||||
a longer period of time than the default. The [`nomad.nomad.leader.barrier` and
|
||||
`nomad.raft.leader.lastContact` metrics](/docs/operations/metrics-reference) are a good
|
||||
indicator of how often leader elections occur and raft latency.
|
||||
indicator of how often leader elections occur and Raft latency.
|
||||
|
||||
- `raft_snapshot_threshold` `(int: "8192")` - Specifies the minimum number of
|
||||
Raft logs to be written to disk before the node is allowed to take a snapshot.
|
||||
This reduces the frequency and impact of creating snapshots. During node
|
||||
startup, Raft restores the latest snapshot and then applies the individual
|
||||
logs to catch the node up to the last known state. This can be tuned during
|
||||
operation by a hot configuration reload.
|
||||
|
||||
- `raft_snapshot_interval` `(string: "120s")` - Specifies the minimum time between
|
||||
checks if Raft should perform a snapshot. The Raft library randomly staggers
|
||||
between this value and twice this value to avoid the entire cluster performing
|
||||
a snapshot at once. Nodes are eligible to snapshot once they have exceeded the
|
||||
`raft_snapshot_threshold`. This value can be tuned during operation by a hot
|
||||
configuration reload.
|
||||
|
||||
- `raft_trailing_logs` `(int: "10240")` - Specifies how many logs are retained
|
||||
after a snapshot. These logs are used so that Raft can quickly replay logs on
|
||||
a follower instead of being forced to send an entire snapshot. This value can
|
||||
be tuned during operation by a hot configuration reload.
|
||||
|
||||
- `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy
|
||||
zone that this server will be a part of for Autopilot management. For more
|
||||
@@ -441,7 +460,7 @@ told to heartbeat after the maximum interval.
|
||||
The actual value used should take into consideration how much tolerance your
|
||||
system has for a delay in noticing crashed Clients. For example a
|
||||
`failover_heartbeat_ttl` of 30 minutes may give even the slowest clients in the
|
||||
largest clusters ample time to heartbeat after an election. However if the
|
||||
largest clusters ample time to heartbeat after an election. However if the
|
||||
election was due to a datacenter-wide failure affecting Clients, it will be 30
|
||||
minutes before Nomad recognizes that they are `down` and reschedules their
|
||||
work.
|
||||
|
||||
Reference in New Issue
Block a user