From 1d9ffa640bb876bb03c0c4f7d0449d282bb54ffb Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Sun, 16 Feb 2020 15:23:20 -0600 Subject: [PATCH] implement MinQuorum --- api/operator_autopilot.go | 4 ++++ command/agent/agent.go | 3 +++ command/agent/config_parse_test.go | 1 + command/agent/config_test.go | 2 ++ command/agent/operator_endpoint.go | 2 ++ command/agent/testdata/basic.hcl | 1 + command/agent/testdata/basic.json | 1 + command/operator_autopilot_get.go | 1 + command/operator_autopilot_set.go | 7 +++++++ command/operator_autopilot_set_test.go | 2 ++ nomad/autopilot.go | 1 + nomad/fsm_test.go | 4 ++++ nomad/state/autopilot_test.go | 1 + nomad/structs/config/autopilot.go | 7 +++++++ nomad/structs/config/autopilot_test.go | 3 +++ nomad/structs/operator.go | 4 ++++ 16 files changed, 44 insertions(+) diff --git a/api/operator_autopilot.go b/api/operator_autopilot.go index fd06bd4c7..a44befa8b 100644 --- a/api/operator_autopilot.go +++ b/api/operator_autopilot.go @@ -22,6 +22,10 @@ type AutopilotConfiguration struct { // be behind before being considered unhealthy. MaxTrailingLogs uint64 + // MinQuorum sets the minimum number of servers allowed in a cluster before + // autopilot can prune dead servers. + MinQuorum uint + // ServerStabilizationTime is the minimum amount of time a server must be // in a stable, healthy state before it can be added to the cluster. Only // applicable with Raft protocol version 3 or higher. diff --git a/command/agent/agent.go b/command/agent/agent.go index fff26e469..c918b03e7 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -225,6 +225,9 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { if agentConfig.Autopilot.MaxTrailingLogs != 0 { conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) } + if agentConfig.Autopilot.MinQuorum != 0 { + conf.AutopilotConfig.MinQuorum = uint(agentConfig.Autopilot.MinQuorum) + } if agentConfig.Autopilot.EnableRedundancyZones != nil { conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 62d187698..3ce173324 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -224,6 +224,7 @@ var basicConfig = &Config{ LastContactThreshold: 12705 * time.Second, LastContactThresholdHCL: "12705s", MaxTrailingLogs: 17849, + MinQuorum: 3, EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &trueValue, EnableCustomUpgrades: &trueValue, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 24126fd2c..c09c6c402 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -181,6 +181,7 @@ func TestConfig_Merge(t *testing.T) { ServerStabilizationTime: 1 * time.Second, LastContactThreshold: 1 * time.Second, MaxTrailingLogs: 1, + MinQuorum: 1, EnableRedundancyZones: &falseValue, DisableUpgradeMigration: &falseValue, EnableCustomUpgrades: &falseValue, @@ -365,6 +366,7 @@ func TestConfig_Merge(t *testing.T) { ServerStabilizationTime: 2 * time.Second, LastContactThreshold: 2 * time.Second, MaxTrailingLogs: 2, + MinQuorum: 2, EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &trueValue, EnableCustomUpgrades: &trueValue, diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go index 31e86d21a..7ac48c0fd 100644 --- a/command/agent/operator_endpoint.go +++ b/command/agent/operator_endpoint.go @@ -108,6 +108,7 @@ func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, re CleanupDeadServers: reply.CleanupDeadServers, LastContactThreshold: reply.LastContactThreshold, MaxTrailingLogs: reply.MaxTrailingLogs, + MinQuorum: reply.MinQuorum, ServerStabilizationTime: reply.ServerStabilizationTime, EnableRedundancyZones: reply.EnableRedundancyZones, DisableUpgradeMigration: reply.DisableUpgradeMigration, @@ -131,6 +132,7 @@ func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, re CleanupDeadServers: conf.CleanupDeadServers, LastContactThreshold: conf.LastContactThreshold, MaxTrailingLogs: conf.MaxTrailingLogs, + MinQuorum: conf.MinQuorum, ServerStabilizationTime: conf.ServerStabilizationTime, EnableRedundancyZones: conf.EnableRedundancyZones, DisableUpgradeMigration: conf.DisableUpgradeMigration, diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index 16db9d398..43b481ee9 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -234,6 +234,7 @@ autopilot { disable_upgrade_migration = true last_contact_threshold = "12705s" max_trailing_logs = 17849 + min_quorum = 3 enable_redundancy_zones = true server_stabilization_time = "23057s" enable_custom_upgrades = true diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 7adf2079b..b7575f02e 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -28,6 +28,7 @@ "enable_redundancy_zones": true, "last_contact_threshold": "12705s", "max_trailing_logs": 17849, + "min_quorum": 3, "server_stabilization_time": "23057s" } ], diff --git a/command/operator_autopilot_get.go b/command/operator_autopilot_get.go index b6e00b53a..6650949bb 100644 --- a/command/operator_autopilot_get.go +++ b/command/operator_autopilot_get.go @@ -45,6 +45,7 @@ func (c *OperatorAutopilotGetCommand) Run(args []string) int { c.Ui.Output(fmt.Sprintf("CleanupDeadServers = %v", config.CleanupDeadServers)) c.Ui.Output(fmt.Sprintf("LastContactThreshold = %v", config.LastContactThreshold.String())) c.Ui.Output(fmt.Sprintf("MaxTrailingLogs = %v", config.MaxTrailingLogs)) + c.Ui.Output(fmt.Sprintf("MinQuorum = %v", config.MinQuorum)) c.Ui.Output(fmt.Sprintf("ServerStabilizationTime = %v", config.ServerStabilizationTime.String())) c.Ui.Output(fmt.Sprintf("EnableRedundancyZones = %v", config.EnableRedundancyZones)) c.Ui.Output(fmt.Sprintf("DisableUpgradeMigration = %v", config.DisableUpgradeMigration)) diff --git a/command/operator_autopilot_set.go b/command/operator_autopilot_set.go index 45fe89499..c481079c5 100644 --- a/command/operator_autopilot_set.go +++ b/command/operator_autopilot_set.go @@ -34,6 +34,7 @@ func (c *OperatorAutopilotSetCommand) Name() string { return "operator autopilot func (c *OperatorAutopilotSetCommand) Run(args []string) int { var cleanupDeadServers flags.BoolValue var maxTrailingLogs flags.UintValue + var minQuorum flags.UintValue var lastContactThreshold flags.DurationValue var serverStabilizationTime flags.DurationValue var enableRedundancyZones flags.BoolValue @@ -50,6 +51,7 @@ func (c *OperatorAutopilotSetCommand) Run(args []string) int { f.Var(&enableRedundancyZones, "enable-redundancy-zones", "") f.Var(&disableUpgradeMigration, "disable-upgrade-migration", "") f.Var(&enableCustomUpgrades, "enable-custom-upgrades", "") + f.Var(&minQuorum, "min-quorum", "") if err := f.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err)) @@ -80,6 +82,7 @@ func (c *OperatorAutopilotSetCommand) Run(args []string) int { trailing := uint(conf.MaxTrailingLogs) maxTrailingLogs.Merge(&trailing) conf.MaxTrailingLogs = uint64(trailing) + minQuorum.Merge(&conf.MinQuorum) lastContactThreshold.Merge(&conf.LastContactThreshold) serverStabilizationTime.Merge(&conf.ServerStabilizationTime) @@ -131,6 +134,10 @@ Set Config Options: Controls the maximum number of log entries that a server can trail the leader by before being considered unhealthy. + -min-quorum= + Controls the minimum number of servers required in a cluster + before autopilot can prune dead servers. + -redundancy-zone-tag= (Enterprise-only) Controls the node_meta tag name used for separating servers into different redundancy zones. diff --git a/command/operator_autopilot_set_test.go b/command/operator_autopilot_set_test.go index 3f156cfa2..515b5f7d8 100644 --- a/command/operator_autopilot_set_test.go +++ b/command/operator_autopilot_set_test.go @@ -26,6 +26,7 @@ func TestOperatorAutopilotSetConfigCommand(t *testing.T) { "-address=" + addr, "-cleanup-dead-servers=false", "-max-trailing-logs=99", + "-min-quorum=3", "-last-contact-threshold=123ms", "-server-stabilization-time=123ms", "-enable-redundancy-zones=true", @@ -46,6 +47,7 @@ func TestOperatorAutopilotSetConfigCommand(t *testing.T) { require.False(conf.CleanupDeadServers) require.EqualValues(99, conf.MaxTrailingLogs) + require.EqualValues(3, conf.MinQuorum) require.EqualValues(123*time.Millisecond, conf.LastContactThreshold) require.EqualValues(123*time.Millisecond, conf.ServerStabilizationTime) require.True(conf.EnableRedundancyZones) diff --git a/nomad/autopilot.go b/nomad/autopilot.go index 5ddcdbfbc..5e42fff54 100644 --- a/nomad/autopilot.go +++ b/nomad/autopilot.go @@ -35,6 +35,7 @@ func (d *AutopilotDelegate) AutopilotConfig() *autopilot.Config { CleanupDeadServers: c.CleanupDeadServers, LastContactThreshold: c.LastContactThreshold, MaxTrailingLogs: c.MaxTrailingLogs, + MinQuorum: c.MinQuorum, ServerStabilizationTime: c.ServerStabilizationTime, DisableUpgradeMigration: c.DisableUpgradeMigration, ModifyIndex: c.ModifyIndex, diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 0fe91256f..f8bbcd752 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -3033,6 +3033,7 @@ func TestFSM_Autopilot(t *testing.T) { CleanupDeadServers: true, LastContactThreshold: 10 * time.Second, MaxTrailingLogs: 300, + MinQuorum: 3, }, } buf, err := structs.Encode(structs.AutopilotRequestType, req) @@ -3058,6 +3059,9 @@ func TestFSM_Autopilot(t *testing.T) { if config.MaxTrailingLogs != req.Config.MaxTrailingLogs { t.Fatalf("bad: %v", config.MaxTrailingLogs) } + if config.MinQuorum != req.Config.MinQuorum { + t.Fatalf("bad: %v", config.MinQuorum) + } // Now use CAS and provide an old index req.CAS = true diff --git a/nomad/state/autopilot_test.go b/nomad/state/autopilot_test.go index a43ddebe8..f1805e0f3 100644 --- a/nomad/state/autopilot_test.go +++ b/nomad/state/autopilot_test.go @@ -15,6 +15,7 @@ func TestStateStore_Autopilot(t *testing.T) { CleanupDeadServers: true, LastContactThreshold: 5 * time.Second, MaxTrailingLogs: 500, + MinQuorum: 3, ServerStabilizationTime: 100 * time.Second, EnableRedundancyZones: true, DisableUpgradeMigration: true, diff --git a/nomad/structs/config/autopilot.go b/nomad/structs/config/autopilot.go index 83f65b0f2..947aa35e5 100644 --- a/nomad/structs/config/autopilot.go +++ b/nomad/structs/config/autopilot.go @@ -26,6 +26,10 @@ type AutopilotConfig struct { // be behind before being considered unhealthy. MaxTrailingLogs int `hcl:"max_trailing_logs"` + // MinQuorum sets the minimum number of servers required in a cluster + // before autopilot can prune dead servers. + MinQuorum int `hcl:"min_quorum"` + // (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones. EnableRedundancyZones *bool `hcl:"enable_redundancy_zones"` @@ -73,6 +77,9 @@ func (a *AutopilotConfig) Merge(b *AutopilotConfig) *AutopilotConfig { if b.MaxTrailingLogs != 0 { result.MaxTrailingLogs = b.MaxTrailingLogs } + if b.MinQuorum != 0 { + result.MinQuorum = b.MinQuorum + } if b.EnableRedundancyZones != nil { result.EnableRedundancyZones = b.EnableRedundancyZones } diff --git a/nomad/structs/config/autopilot_test.go b/nomad/structs/config/autopilot_test.go index 644541c0a..e379ff8de 100644 --- a/nomad/structs/config/autopilot_test.go +++ b/nomad/structs/config/autopilot_test.go @@ -14,6 +14,7 @@ func TestAutopilotConfig_Merge(t *testing.T) { ServerStabilizationTime: 1 * time.Second, LastContactThreshold: 1 * time.Second, MaxTrailingLogs: 1, + MinQuorum: 1, EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &falseValue, EnableCustomUpgrades: &trueValue, @@ -24,6 +25,7 @@ func TestAutopilotConfig_Merge(t *testing.T) { ServerStabilizationTime: 2 * time.Second, LastContactThreshold: 2 * time.Second, MaxTrailingLogs: 2, + MinQuorum: 2, EnableRedundancyZones: nil, DisableUpgradeMigration: nil, EnableCustomUpgrades: nil, @@ -34,6 +36,7 @@ func TestAutopilotConfig_Merge(t *testing.T) { ServerStabilizationTime: 2 * time.Second, LastContactThreshold: 2 * time.Second, MaxTrailingLogs: 2, + MinQuorum: 2, EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &falseValue, EnableCustomUpgrades: &trueValue, diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index 978d5304f..01bea07bf 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -103,6 +103,10 @@ type AutopilotConfig struct { // be behind before being considered unhealthy. MaxTrailingLogs uint64 + // MinQuorum sets the minimum number of servers required in a cluster + // before autopilot can prune dead servers. + MinQuorum uint + // (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones. EnableRedundancyZones bool