mirror of
https://github.com/kemko/nomad.git
synced 2026-01-10 12:25:42 +03:00
Merge pull request #3670 from hashicorp/autopilot
Add Autopilot feature from Consul
This commit is contained in:
@@ -76,8 +76,6 @@ func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) err
|
||||
}
|
||||
r.setWriteOptions(q)
|
||||
|
||||
// TODO (alexdadgar) Currently we made address a query parameter. Once
|
||||
// IDs are in place this will be DELETE /v1/operator/raft/peer/<id>.
|
||||
r.params.Set("address", address)
|
||||
|
||||
_, resp, err := requireOK(op.c.doRequest(r))
|
||||
@@ -88,3 +86,23 @@ func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) err
|
||||
resp.Body.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
// RaftRemovePeerByID is used to kick a stale peer (one that is in the Raft
|
||||
// quorum but no longer known to Serf or the catalog) by ID.
|
||||
func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
|
||||
r, err := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.setWriteOptions(q)
|
||||
|
||||
r.params.Set("id", id)
|
||||
|
||||
_, resp, err := requireOK(op.c.doRequest(r))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp.Body.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
232
api/operator_autopilot.go
Normal file
232
api/operator_autopilot.go
Normal file
@@ -0,0 +1,232 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// AutopilotConfiguration is used for querying/setting the Autopilot configuration.
|
||||
// Autopilot helps manage operator tasks related to Nomad servers like removing
|
||||
// failed servers from the Raft quorum.
|
||||
type AutopilotConfiguration struct {
|
||||
// CleanupDeadServers controls whether to remove dead servers from the Raft
|
||||
// peer list when a new server joins
|
||||
CleanupDeadServers bool
|
||||
|
||||
// LastContactThreshold is the limit on the amount of time a server can go
|
||||
// without leader contact before being considered unhealthy.
|
||||
LastContactThreshold *ReadableDuration
|
||||
|
||||
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
|
||||
// be behind before being considered unhealthy.
|
||||
MaxTrailingLogs uint64
|
||||
|
||||
// ServerStabilizationTime is the minimum amount of time a server must be
|
||||
// in a stable, healthy state before it can be added to the cluster. Only
|
||||
// applicable with Raft protocol version 3 or higher.
|
||||
ServerStabilizationTime *ReadableDuration
|
||||
|
||||
// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
|
||||
// servers into zones for redundancy. If left blank, this feature will be disabled.
|
||||
RedundancyZoneTag string
|
||||
|
||||
// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
|
||||
// strategy of waiting until enough newer-versioned servers have been added to the
|
||||
// cluster before promoting them to voters.
|
||||
DisableUpgradeMigration bool
|
||||
|
||||
// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
|
||||
// performing upgrade migrations. If left blank, the Nomad version will be used.
|
||||
UpgradeVersionTag string
|
||||
|
||||
// CreateIndex holds the index corresponding the creation of this configuration.
|
||||
// This is a read-only field.
|
||||
CreateIndex uint64
|
||||
|
||||
// ModifyIndex will be set to the index of the last update when retrieving the
|
||||
// Autopilot configuration. Resubmitting a configuration with
|
||||
// AutopilotCASConfiguration will perform a check-and-set operation which ensures
|
||||
// there hasn't been a subsequent update since the configuration was retrieved.
|
||||
ModifyIndex uint64
|
||||
}
|
||||
|
||||
// ServerHealth is the health (from the leader's point of view) of a server.
|
||||
type ServerHealth struct {
|
||||
// ID is the raft ID of the server.
|
||||
ID string
|
||||
|
||||
// Name is the node name of the server.
|
||||
Name string
|
||||
|
||||
// Address is the address of the server.
|
||||
Address string
|
||||
|
||||
// The status of the SerfHealth check for the server.
|
||||
SerfStatus string
|
||||
|
||||
// Version is the Nomad version of the server.
|
||||
Version string
|
||||
|
||||
// Leader is whether this server is currently the leader.
|
||||
Leader bool
|
||||
|
||||
// LastContact is the time since this node's last contact with the leader.
|
||||
LastContact *ReadableDuration
|
||||
|
||||
// LastTerm is the highest leader term this server has a record of in its Raft log.
|
||||
LastTerm uint64
|
||||
|
||||
// LastIndex is the last log index this server has a record of in its Raft log.
|
||||
LastIndex uint64
|
||||
|
||||
// Healthy is whether or not the server is healthy according to the current
|
||||
// Autopilot config.
|
||||
Healthy bool
|
||||
|
||||
// Voter is whether this is a voting server.
|
||||
Voter bool
|
||||
|
||||
// StableSince is the last time this server's Healthy value changed.
|
||||
StableSince time.Time
|
||||
}
|
||||
|
||||
// OperatorHealthReply is a representation of the overall health of the cluster
|
||||
type OperatorHealthReply struct {
|
||||
// Healthy is true if all the servers in the cluster are healthy.
|
||||
Healthy bool
|
||||
|
||||
// FailureTolerance is the number of healthy servers that could be lost without
|
||||
// an outage occurring.
|
||||
FailureTolerance int
|
||||
|
||||
// Servers holds the health of each server.
|
||||
Servers []ServerHealth
|
||||
}
|
||||
|
||||
// ReadableDuration is a duration type that is serialized to JSON in human readable format.
|
||||
type ReadableDuration time.Duration
|
||||
|
||||
func NewReadableDuration(dur time.Duration) *ReadableDuration {
|
||||
d := ReadableDuration(dur)
|
||||
return &d
|
||||
}
|
||||
|
||||
func (d *ReadableDuration) String() string {
|
||||
return d.Duration().String()
|
||||
}
|
||||
|
||||
func (d *ReadableDuration) Duration() time.Duration {
|
||||
if d == nil {
|
||||
return time.Duration(0)
|
||||
}
|
||||
return time.Duration(*d)
|
||||
}
|
||||
|
||||
func (d *ReadableDuration) MarshalJSON() ([]byte, error) {
|
||||
return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil
|
||||
}
|
||||
|
||||
func (d *ReadableDuration) UnmarshalJSON(raw []byte) error {
|
||||
if d == nil {
|
||||
return fmt.Errorf("cannot unmarshal to nil pointer")
|
||||
}
|
||||
|
||||
str := string(raw)
|
||||
if len(str) < 2 || str[0] != '"' || str[len(str)-1] != '"' {
|
||||
return fmt.Errorf("must be enclosed with quotes: %s", str)
|
||||
}
|
||||
dur, err := time.ParseDuration(str[1 : len(str)-1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*d = ReadableDuration(dur)
|
||||
return nil
|
||||
}
|
||||
|
||||
// AutopilotGetConfiguration is used to query the current Autopilot configuration.
|
||||
func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, error) {
|
||||
r, err := op.c.newRequest("GET", "/v1/operator/autopilot/configuration")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.setQueryOptions(q)
|
||||
_, resp, err := requireOK(op.c.doRequest(r))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var out AutopilotConfiguration
|
||||
if err := decodeBody(resp, &out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
// AutopilotSetConfiguration is used to set the current Autopilot configuration.
|
||||
func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) error {
|
||||
r, err := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.setWriteOptions(q)
|
||||
r.obj = conf
|
||||
_, resp, err := requireOK(op.c.doRequest(r))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resp.Body.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
// AutopilotCASConfiguration is used to perform a Check-And-Set update on the
|
||||
// Autopilot configuration. The ModifyIndex value will be respected. Returns
|
||||
// true on success or false on failures.
|
||||
func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, error) {
|
||||
r, err := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
r.setWriteOptions(q)
|
||||
r.params.Set("cas", strconv.FormatUint(conf.ModifyIndex, 10))
|
||||
r.obj = conf
|
||||
_, resp, err := requireOK(op.c.doRequest(r))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var buf bytes.Buffer
|
||||
if _, err := io.Copy(&buf, resp.Body); err != nil {
|
||||
return false, fmt.Errorf("Failed to read response: %v", err)
|
||||
}
|
||||
res := strings.Contains(buf.String(), "true")
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// AutopilotServerHealth is used to query Autopilot's top-level view of the health
|
||||
// of each Nomad server.
|
||||
func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, error) {
|
||||
r, err := op.c.newRequest("GET", "/v1/operator/autopilot/health")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.setQueryOptions(q)
|
||||
_, resp, err := requireOK(op.c.doRequest(r))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var out OperatorHealthReply
|
||||
if err := decodeBody(resp, &out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &out, nil
|
||||
}
|
||||
89
api/operator_autopilot_test.go
Normal file
89
api/operator_autopilot_test.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/consul/testutil/retry"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestAPI_OperatorAutopilotGetSetConfiguration(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
c, s := makeClient(t, nil, nil)
|
||||
defer s.Stop()
|
||||
|
||||
operator := c.Operator()
|
||||
config, err := operator.AutopilotGetConfiguration(nil)
|
||||
assert.Nil(err)
|
||||
assert.True(config.CleanupDeadServers)
|
||||
|
||||
// Change a config setting
|
||||
newConf := &AutopilotConfiguration{CleanupDeadServers: false}
|
||||
err = operator.AutopilotSetConfiguration(newConf, nil)
|
||||
assert.Nil(err)
|
||||
|
||||
config, err = operator.AutopilotGetConfiguration(nil)
|
||||
assert.Nil(err)
|
||||
assert.False(config.CleanupDeadServers)
|
||||
}
|
||||
|
||||
func TestAPI_OperatorAutopilotCASConfiguration(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
c, s := makeClient(t, nil, nil)
|
||||
defer s.Stop()
|
||||
|
||||
operator := c.Operator()
|
||||
config, err := operator.AutopilotGetConfiguration(nil)
|
||||
assert.Nil(err)
|
||||
assert.True(config.CleanupDeadServers)
|
||||
|
||||
// Pass an invalid ModifyIndex
|
||||
{
|
||||
newConf := &AutopilotConfiguration{
|
||||
CleanupDeadServers: false,
|
||||
ModifyIndex: config.ModifyIndex - 1,
|
||||
}
|
||||
resp, err := operator.AutopilotCASConfiguration(newConf, nil)
|
||||
assert.Nil(err)
|
||||
assert.False(resp)
|
||||
}
|
||||
|
||||
// Pass a valid ModifyIndex
|
||||
{
|
||||
newConf := &AutopilotConfiguration{
|
||||
CleanupDeadServers: false,
|
||||
ModifyIndex: config.ModifyIndex,
|
||||
}
|
||||
resp, err := operator.AutopilotCASConfiguration(newConf, nil)
|
||||
assert.Nil(err)
|
||||
assert.True(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPI_OperatorAutopilotServerHealth(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
||||
c.AdvertiseAddrs.RPC = "127.0.0.1"
|
||||
c.Server.RaftProtocol = 3
|
||||
})
|
||||
defer s.Stop()
|
||||
|
||||
operator := c.Operator()
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
out, err := operator.AutopilotServerHealth(nil)
|
||||
if err != nil {
|
||||
r.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if len(out.Servers) != 1 ||
|
||||
!out.Servers[0].Healthy ||
|
||||
out.Servers[0].Name != fmt.Sprintf("%s.global", s.Config.NodeName) {
|
||||
r.Fatalf("bad: %v", out)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -36,3 +36,18 @@ func TestOperator_RaftRemovePeerByAddress(t *testing.T) {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOperator_RaftRemovePeerByID(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t, nil, nil)
|
||||
defer s.Stop()
|
||||
|
||||
// If we get this error, it proves we sent the address all the way
|
||||
// through.
|
||||
operator := c.Operator()
|
||||
err := operator.RaftRemovePeerByID("nope", nil)
|
||||
if err == nil || !strings.Contains(err.Error(),
|
||||
"id \"nope\" was not found in the Raft configuration") {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,6 +160,32 @@ func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Confi
|
||||
if agentConfig.Sentinel != nil {
|
||||
conf.SentinelConfig = agentConfig.Sentinel
|
||||
}
|
||||
if agentConfig.Server.NonVotingServer {
|
||||
conf.NonVoter = true
|
||||
}
|
||||
if agentConfig.Autopilot != nil {
|
||||
if agentConfig.Autopilot.CleanupDeadServers != nil {
|
||||
conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
|
||||
}
|
||||
if agentConfig.Autopilot.ServerStabilizationTime != 0 {
|
||||
conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
|
||||
}
|
||||
if agentConfig.Autopilot.LastContactThreshold != 0 {
|
||||
conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
|
||||
}
|
||||
if agentConfig.Autopilot.MaxTrailingLogs != 0 {
|
||||
conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
|
||||
}
|
||||
if agentConfig.Autopilot.RedundancyZoneTag != "" {
|
||||
conf.AutopilotConfig.RedundancyZoneTag = agentConfig.Autopilot.RedundancyZoneTag
|
||||
}
|
||||
if agentConfig.Autopilot.DisableUpgradeMigration != nil {
|
||||
conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
|
||||
}
|
||||
if agentConfig.Autopilot.UpgradeVersionTag != "" {
|
||||
conf.AutopilotConfig.UpgradeVersionTag = agentConfig.Autopilot.UpgradeVersionTag
|
||||
}
|
||||
}
|
||||
|
||||
// Set up the bind addresses
|
||||
rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
|
||||
|
||||
@@ -67,6 +67,7 @@ server {
|
||||
bootstrap_expect = 5
|
||||
data_dir = "/tmp/data"
|
||||
protocol_version = 3
|
||||
raft_protocol = 3
|
||||
num_schedulers = 2
|
||||
enabled_schedulers = ["test"]
|
||||
node_gc_threshold = "12h"
|
||||
@@ -81,6 +82,7 @@ server {
|
||||
retry_max = 3
|
||||
retry_interval = "15s"
|
||||
rejoin_after_leave = true
|
||||
non_voting_server = true
|
||||
encrypt = "abc"
|
||||
}
|
||||
acl {
|
||||
@@ -159,3 +161,12 @@ sentinel {
|
||||
args = ["x", "y", "z"]
|
||||
}
|
||||
}
|
||||
autopilot {
|
||||
cleanup_dead_servers = true
|
||||
disable_upgrade_migration = true
|
||||
last_contact_threshold = "12705s"
|
||||
max_trailing_logs = 17849
|
||||
redundancy_zone_tag = "foo"
|
||||
server_stabilization_time = "23057s"
|
||||
upgrade_version_tag = "bar"
|
||||
}
|
||||
|
||||
@@ -130,6 +130,9 @@ type Config struct {
|
||||
|
||||
// Sentinel holds sentinel related settings
|
||||
Sentinel *config.SentinelConfig `mapstructure:"sentinel"`
|
||||
|
||||
// Autopilot contains the configuration for Autopilot behavior.
|
||||
Autopilot *config.AutopilotConfig `mapstructure:"autopilot"`
|
||||
}
|
||||
|
||||
// ClientConfig is configuration specific to the client mode
|
||||
@@ -327,6 +330,10 @@ type ServerConfig struct {
|
||||
// true, we ignore the leave, and rejoin the cluster on start.
|
||||
RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"`
|
||||
|
||||
// NonVotingServer is whether this server will act as a non-voting member
|
||||
// of the cluster to help provide read scalability. (Enterprise-only)
|
||||
NonVotingServer bool `mapstructure:"non_voting_server"`
|
||||
|
||||
// Encryption key to use for the Serf communication
|
||||
EncryptKey string `mapstructure:"encrypt" json:"-"`
|
||||
}
|
||||
@@ -604,6 +611,7 @@ func DefaultConfig() *Config {
|
||||
TLSConfig: &config.TLSConfig{},
|
||||
Sentinel: &config.SentinelConfig{},
|
||||
Version: version.GetVersion(),
|
||||
Autopilot: config.DefaultAutopilotConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -762,6 +770,13 @@ func (c *Config) Merge(b *Config) *Config {
|
||||
result.Sentinel = result.Sentinel.Merge(b.Sentinel)
|
||||
}
|
||||
|
||||
if result.Autopilot == nil && b.Autopilot != nil {
|
||||
autopilot := *b.Autopilot
|
||||
result.Autopilot = &autopilot
|
||||
} else if b.Autopilot != nil {
|
||||
result.Autopilot = result.Autopilot.Merge(b.Autopilot)
|
||||
}
|
||||
|
||||
// Merge config files lists
|
||||
result.Files = append(result.Files, b.Files...)
|
||||
|
||||
@@ -1016,6 +1031,9 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
|
||||
if b.RejoinAfterLeave {
|
||||
result.RejoinAfterLeave = true
|
||||
}
|
||||
if b.NonVotingServer {
|
||||
result.NonVotingServer = true
|
||||
}
|
||||
if b.EncryptKey != "" {
|
||||
result.EncryptKey = b.EncryptKey
|
||||
}
|
||||
|
||||
@@ -98,6 +98,7 @@ func parseConfig(result *Config, list *ast.ObjectList) error {
|
||||
"http_api_response_headers",
|
||||
"acl",
|
||||
"sentinel",
|
||||
"autopilot",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(list, valid); err != nil {
|
||||
return multierror.Prefix(err, "config:")
|
||||
@@ -121,6 +122,7 @@ func parseConfig(result *Config, list *ast.ObjectList) error {
|
||||
delete(m, "http_api_response_headers")
|
||||
delete(m, "acl")
|
||||
delete(m, "sentinel")
|
||||
delete(m, "autopilot")
|
||||
|
||||
// Decode the rest
|
||||
if err := mapstructure.WeakDecode(m, result); err != nil {
|
||||
@@ -204,6 +206,13 @@ func parseConfig(result *Config, list *ast.ObjectList) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse Autopilot config
|
||||
if o := list.Filter("autopilot"); len(o.Items) > 0 {
|
||||
if err := parseAutopilot(&result.Autopilot, o); err != nil {
|
||||
return multierror.Prefix(err, "autopilot->")
|
||||
}
|
||||
}
|
||||
|
||||
// Parse out http_api_response_headers fields. These are in HCL as a list so
|
||||
// we need to iterate over them and merge them.
|
||||
if headersO := list.Filter("http_api_response_headers"); len(headersO.Items) > 0 {
|
||||
@@ -509,6 +518,7 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error {
|
||||
"bootstrap_expect",
|
||||
"data_dir",
|
||||
"protocol_version",
|
||||
"raft_protocol",
|
||||
"num_schedulers",
|
||||
"enabled_schedulers",
|
||||
"node_gc_threshold",
|
||||
@@ -525,6 +535,7 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error {
|
||||
"rejoin_after_leave",
|
||||
"encrypt",
|
||||
"authoritative_region",
|
||||
"non_voting_server",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
|
||||
return err
|
||||
@@ -838,3 +849,49 @@ func parseSentinel(result **config.SentinelConfig, list *ast.ObjectList) error {
|
||||
*result = &config
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseAutopilot(result **config.AutopilotConfig, list *ast.ObjectList) error {
|
||||
list = list.Elem()
|
||||
if len(list.Items) > 1 {
|
||||
return fmt.Errorf("only one 'autopilot' block allowed")
|
||||
}
|
||||
|
||||
// Get our Autopilot object
|
||||
listVal := list.Items[0].Val
|
||||
|
||||
// Check for invalid keys
|
||||
valid := []string{
|
||||
"cleanup_dead_servers",
|
||||
"server_stabilization_time",
|
||||
"last_contact_threshold",
|
||||
"max_trailing_logs",
|
||||
"redundancy_zone_tag",
|
||||
"disable_upgrade_migration",
|
||||
"upgrade_version_tag",
|
||||
}
|
||||
|
||||
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var m map[string]interface{}
|
||||
if err := hcl.DecodeObject(&m, listVal); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
autopilotConfig := config.DefaultAutopilotConfig()
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||
WeaklyTypedInput: true,
|
||||
Result: &autopilotConfig,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := dec.Decode(m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*result = autopilotConfig
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -88,6 +88,7 @@ func TestConfig_Parse(t *testing.T) {
|
||||
BootstrapExpect: 5,
|
||||
DataDir: "/tmp/data",
|
||||
ProtocolVersion: 3,
|
||||
RaftProtocol: 3,
|
||||
NumSchedulers: 2,
|
||||
EnabledSchedulers: []string{"test"},
|
||||
NodeGCThreshold: "12h",
|
||||
@@ -102,6 +103,7 @@ func TestConfig_Parse(t *testing.T) {
|
||||
RetryInterval: "15s",
|
||||
RejoinAfterLeave: true,
|
||||
RetryMaxAttempts: 3,
|
||||
NonVotingServer: true,
|
||||
EncryptKey: "abc",
|
||||
},
|
||||
ACL: &ACLConfig{
|
||||
@@ -186,6 +188,15 @@ func TestConfig_Parse(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
Autopilot: &config.AutopilotConfig{
|
||||
CleanupDeadServers: &trueValue,
|
||||
ServerStabilizationTime: 23057 * time.Second,
|
||||
LastContactThreshold: 12705 * time.Second,
|
||||
MaxTrailingLogs: 17849,
|
||||
RedundancyZoneTag: "foo",
|
||||
DisableUpgradeMigration: &trueValue,
|
||||
UpgradeVersionTag: "bar",
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
|
||||
@@ -35,6 +35,7 @@ func TestConfig_Merge(t *testing.T) {
|
||||
Vault: &config.VaultConfig{},
|
||||
Consul: &config.ConsulConfig{},
|
||||
Sentinel: &config.SentinelConfig{},
|
||||
Autopilot: &config.AutopilotConfig{},
|
||||
}
|
||||
|
||||
c2 := &Config{
|
||||
@@ -100,6 +101,7 @@ func TestConfig_Merge(t *testing.T) {
|
||||
BootstrapExpect: 1,
|
||||
DataDir: "/tmp/data1",
|
||||
ProtocolVersion: 1,
|
||||
RaftProtocol: 1,
|
||||
NumSchedulers: 1,
|
||||
NodeGCThreshold: "1h",
|
||||
HeartbeatGrace: 30 * time.Second,
|
||||
@@ -158,6 +160,15 @@ func TestConfig_Merge(t *testing.T) {
|
||||
ClientAutoJoin: &falseValue,
|
||||
ChecksUseAdvertise: &falseValue,
|
||||
},
|
||||
Autopilot: &config.AutopilotConfig{
|
||||
CleanupDeadServers: &falseValue,
|
||||
ServerStabilizationTime: 1 * time.Second,
|
||||
LastContactThreshold: 1 * time.Second,
|
||||
MaxTrailingLogs: 1,
|
||||
RedundancyZoneTag: "1",
|
||||
DisableUpgradeMigration: &falseValue,
|
||||
UpgradeVersionTag: "1",
|
||||
},
|
||||
}
|
||||
|
||||
c3 := &Config{
|
||||
@@ -248,6 +259,7 @@ func TestConfig_Merge(t *testing.T) {
|
||||
RetryJoin: []string{"1.1.1.1"},
|
||||
RetryInterval: "10s",
|
||||
retryInterval: time.Second * 10,
|
||||
NonVotingServer: true,
|
||||
},
|
||||
ACL: &ACLConfig{
|
||||
Enabled: true,
|
||||
@@ -311,6 +323,15 @@ func TestConfig_Merge(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
Autopilot: &config.AutopilotConfig{
|
||||
CleanupDeadServers: &trueValue,
|
||||
ServerStabilizationTime: 2 * time.Second,
|
||||
LastContactThreshold: 2 * time.Second,
|
||||
MaxTrailingLogs: 2,
|
||||
RedundancyZoneTag: "2",
|
||||
DisableUpgradeMigration: &trueValue,
|
||||
UpgradeVersionTag: "2",
|
||||
},
|
||||
}
|
||||
|
||||
result := c0.Merge(c1)
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
assetfs "github.com/elazarl/go-bindata-assetfs"
|
||||
"github.com/hashicorp/nomad/helper/tlsutil"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/rs/cors"
|
||||
"github.com/ugorji/go/codec"
|
||||
)
|
||||
@@ -183,7 +184,9 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
|
||||
|
||||
s.mux.HandleFunc("/v1/search", s.wrap(s.SearchRequest))
|
||||
|
||||
s.mux.HandleFunc("/v1/operator/", s.wrap(s.OperatorRequest))
|
||||
s.mux.HandleFunc("/v1/operator/raft/", s.wrap(s.OperatorRequest))
|
||||
s.mux.HandleFunc("/v1/operator/autopilot/configuration", s.wrap(s.OperatorAutopilotConfiguration))
|
||||
s.mux.HandleFunc("/v1/operator/autopilot/health", s.wrap(s.OperatorServerHealth))
|
||||
|
||||
s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest))
|
||||
s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries))
|
||||
@@ -337,6 +340,24 @@ func decodeBody(req *http.Request, out interface{}) error {
|
||||
return dec.Decode(&out)
|
||||
}
|
||||
|
||||
// decodeBodyFunc is used to decode a JSON request body invoking
|
||||
// a given callback function
|
||||
func decodeBodyFunc(req *http.Request, out interface{}, cb func(interface{}) error) error {
|
||||
var raw interface{}
|
||||
dec := json.NewDecoder(req.Body)
|
||||
if err := dec.Decode(&raw); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Invoke the callback prior to decode
|
||||
if cb != nil {
|
||||
if err := cb(raw); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return mapstructure.Decode(raw, out)
|
||||
}
|
||||
|
||||
// setIndex is used to set the index response header
|
||||
func setIndex(resp http.ResponseWriter, index uint64) {
|
||||
resp.Header().Set("X-Nomad-Index", strconv.FormatUint(index, 10))
|
||||
|
||||
@@ -4,6 +4,12 @@ import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
@@ -49,21 +55,222 @@ func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Reques
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var args structs.RaftPeerByAddressRequest
|
||||
s.parseWriteRequest(req, &args.WriteRequest)
|
||||
|
||||
params := req.URL.Query()
|
||||
if _, ok := params["address"]; ok {
|
||||
args.Address = raft.ServerAddress(params.Get("address"))
|
||||
} else {
|
||||
_, hasID := params["id"]
|
||||
_, hasAddress := params["address"]
|
||||
|
||||
if !hasID && !hasAddress {
|
||||
resp.WriteHeader(http.StatusBadRequest)
|
||||
resp.Write([]byte("Must specify ?address with IP:port of peer to remove"))
|
||||
fmt.Fprint(resp, "Must specify either ?id with the server's ID or ?address with IP:port of peer to remove")
|
||||
return nil, nil
|
||||
}
|
||||
if hasID && hasAddress {
|
||||
resp.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprint(resp, "Must specify only one of ?id or ?address")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var reply struct{}
|
||||
if err := s.agent.RPC("Operator.RaftRemovePeerByAddress", &args, &reply); err != nil {
|
||||
return nil, err
|
||||
if hasID {
|
||||
var args structs.RaftPeerByIDRequest
|
||||
s.parseWriteRequest(req, &args.WriteRequest)
|
||||
|
||||
var reply struct{}
|
||||
args.ID = raft.ServerID(params.Get("id"))
|
||||
if err := s.agent.RPC("Operator.RaftRemovePeerByID", &args, &reply); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
var args structs.RaftPeerByAddressRequest
|
||||
s.parseWriteRequest(req, &args.WriteRequest)
|
||||
|
||||
var reply struct{}
|
||||
args.Address = raft.ServerAddress(params.Get("address"))
|
||||
if err := s.agent.RPC("Operator.RaftRemovePeerByAddress", &args, &reply); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
|
||||
// This supports the stale query mode in case the cluster doesn't have a leader.
|
||||
func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Switch on the method
|
||||
switch req.Method {
|
||||
case "GET":
|
||||
var args structs.GenericRequest
|
||||
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var reply autopilot.Config
|
||||
if err := s.agent.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out := api.AutopilotConfiguration{
|
||||
CleanupDeadServers: reply.CleanupDeadServers,
|
||||
LastContactThreshold: api.NewReadableDuration(reply.LastContactThreshold),
|
||||
MaxTrailingLogs: reply.MaxTrailingLogs,
|
||||
ServerStabilizationTime: api.NewReadableDuration(reply.ServerStabilizationTime),
|
||||
RedundancyZoneTag: reply.RedundancyZoneTag,
|
||||
DisableUpgradeMigration: reply.DisableUpgradeMigration,
|
||||
UpgradeVersionTag: reply.UpgradeVersionTag,
|
||||
CreateIndex: reply.CreateIndex,
|
||||
ModifyIndex: reply.ModifyIndex,
|
||||
}
|
||||
|
||||
return out, nil
|
||||
|
||||
case "PUT":
|
||||
var args structs.AutopilotSetConfigRequest
|
||||
s.parseRegion(req, &args.Region)
|
||||
s.parseToken(req, &args.AuthToken)
|
||||
|
||||
var conf api.AutopilotConfiguration
|
||||
durations := NewDurationFixer("lastcontactthreshold", "serverstabilizationtime")
|
||||
if err := decodeBodyFunc(req, &conf, durations.FixupDurations); err != nil {
|
||||
resp.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprintf(resp, "Error parsing autopilot config: %v", err)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
args.Config = autopilot.Config{
|
||||
CleanupDeadServers: conf.CleanupDeadServers,
|
||||
LastContactThreshold: conf.LastContactThreshold.Duration(),
|
||||
MaxTrailingLogs: conf.MaxTrailingLogs,
|
||||
ServerStabilizationTime: conf.ServerStabilizationTime.Duration(),
|
||||
RedundancyZoneTag: conf.RedundancyZoneTag,
|
||||
DisableUpgradeMigration: conf.DisableUpgradeMigration,
|
||||
UpgradeVersionTag: conf.UpgradeVersionTag,
|
||||
}
|
||||
|
||||
// Check for cas value
|
||||
params := req.URL.Query()
|
||||
if _, ok := params["cas"]; ok {
|
||||
casVal, err := strconv.ParseUint(params.Get("cas"), 10, 64)
|
||||
if err != nil {
|
||||
resp.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprintf(resp, "Error parsing cas value: %v", err)
|
||||
return nil, nil
|
||||
}
|
||||
args.Config.ModifyIndex = casVal
|
||||
args.CAS = true
|
||||
}
|
||||
|
||||
var reply bool
|
||||
if err := s.agent.RPC("Operator.AutopilotSetConfiguration", &args, &reply); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Only use the out value if this was a CAS
|
||||
if !args.CAS {
|
||||
return true, nil
|
||||
}
|
||||
return reply, nil
|
||||
|
||||
default:
|
||||
resp.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
// OperatorServerHealth is used to get the health of the servers in the given Region.
|
||||
func (s *HTTPServer) OperatorServerHealth(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
if req.Method != "GET" {
|
||||
resp.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var args structs.GenericRequest
|
||||
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var reply autopilot.OperatorHealthReply
|
||||
if err := s.agent.RPC("Operator.ServerHealth", &args, &reply); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Reply with status 429 if something is unhealthy
|
||||
if !reply.Healthy {
|
||||
resp.WriteHeader(http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
out := &api.OperatorHealthReply{
|
||||
Healthy: reply.Healthy,
|
||||
FailureTolerance: reply.FailureTolerance,
|
||||
}
|
||||
for _, server := range reply.Servers {
|
||||
out.Servers = append(out.Servers, api.ServerHealth{
|
||||
ID: server.ID,
|
||||
Name: server.Name,
|
||||
Address: server.Address,
|
||||
Version: server.Version,
|
||||
Leader: server.Leader,
|
||||
SerfStatus: server.SerfStatus.String(),
|
||||
LastContact: api.NewReadableDuration(server.LastContact),
|
||||
LastTerm: server.LastTerm,
|
||||
LastIndex: server.LastIndex,
|
||||
Healthy: server.Healthy,
|
||||
Voter: server.Voter,
|
||||
StableSince: server.StableSince.Round(time.Second).UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
type durationFixer map[string]bool
|
||||
|
||||
func NewDurationFixer(fields ...string) durationFixer {
|
||||
d := make(map[string]bool)
|
||||
for _, field := range fields {
|
||||
d[field] = true
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// FixupDurations is used to handle parsing any field names in the map to time.Durations
|
||||
func (d durationFixer) FixupDurations(raw interface{}) error {
|
||||
rawMap, ok := raw.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
for key, val := range rawMap {
|
||||
switch val.(type) {
|
||||
case map[string]interface{}:
|
||||
if err := d.FixupDurations(val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case []interface{}:
|
||||
for _, v := range val.([]interface{}) {
|
||||
if err := d.FixupDurations(v); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
case []map[string]interface{}:
|
||||
for _, v := range val.([]map[string]interface{}) {
|
||||
if err := d.FixupDurations(v); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
if d[strings.ToLower(key)] {
|
||||
// Convert a string value into an integer
|
||||
if vStr, ok := val.(string); ok {
|
||||
dur, err := time.ParseDuration(vStr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rawMap[key] = dur
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,12 +2,18 @@ package agent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/consul/testutil/retry"
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestHTTP_OperatorRaftConfiguration(t *testing.T) {
|
||||
@@ -40,13 +46,12 @@ func TestHTTP_OperatorRaftConfiguration(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHTTP_OperatorRaftPeer(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
t.Parallel()
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer(nil)
|
||||
req, err := http.NewRequest("DELETE", "/v1/operator/raft/peer?address=nope", body)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assert.Nil(err)
|
||||
|
||||
// If we get this error, it proves we sent the address all the
|
||||
// way through.
|
||||
@@ -57,4 +62,244 @@ func TestHTTP_OperatorRaftPeer(t *testing.T) {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer(nil)
|
||||
req, err := http.NewRequest("DELETE", "/v1/operator/raft/peer?id=nope", body)
|
||||
assert.Nil(err)
|
||||
|
||||
// If we get this error, it proves we sent the address all the
|
||||
// way through.
|
||||
resp := httptest.NewRecorder()
|
||||
_, err = s.Server.OperatorRaftPeer(resp, req)
|
||||
if err == nil || !strings.Contains(err.Error(),
|
||||
"id \"nope\" was not found in the Raft configuration") {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestOperator_AutopilotGetConfiguration(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer(nil)
|
||||
req, _ := http.NewRequest("GET", "/v1/operator/autopilot/configuration", body)
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := s.Server.OperatorAutopilotConfiguration(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("bad code: %d", resp.Code)
|
||||
}
|
||||
out, ok := obj.(api.AutopilotConfiguration)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected: %T", obj)
|
||||
}
|
||||
if !out.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestOperator_AutopilotSetConfiguration(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer([]byte(`{"CleanupDeadServers": false}`))
|
||||
req, _ := http.NewRequest("PUT", "/v1/operator/autopilot/configuration", body)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := s.Server.OperatorAutopilotConfiguration(resp, req); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("bad code: %d", resp.Code)
|
||||
}
|
||||
|
||||
args := structs.GenericRequest{
|
||||
QueryOptions: structs.QueryOptions{
|
||||
Region: s.Config.Region,
|
||||
},
|
||||
}
|
||||
|
||||
var reply autopilot.Config
|
||||
if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if reply.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", reply)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestOperator_AutopilotCASConfiguration(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpTest(t, nil, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer([]byte(`{"CleanupDeadServers": false}`))
|
||||
req, _ := http.NewRequest("PUT", "/v1/operator/autopilot/configuration", body)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := s.Server.OperatorAutopilotConfiguration(resp, req); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("bad code: %d", resp.Code)
|
||||
}
|
||||
|
||||
args := structs.GenericRequest{
|
||||
QueryOptions: structs.QueryOptions{
|
||||
Region: s.Config.Region,
|
||||
},
|
||||
}
|
||||
|
||||
var reply autopilot.Config
|
||||
if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if reply.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", reply)
|
||||
}
|
||||
|
||||
// Create a CAS request, bad index
|
||||
{
|
||||
buf := bytes.NewBuffer([]byte(`{"CleanupDeadServers": true}`))
|
||||
req, _ := http.NewRequest("PUT", fmt.Sprintf("/v1/operator/autopilot/configuration?cas=%d", reply.ModifyIndex-1), buf)
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := s.Server.OperatorAutopilotConfiguration(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if res := obj.(bool); res {
|
||||
t.Fatalf("should NOT work")
|
||||
}
|
||||
}
|
||||
|
||||
// Create a CAS request, good index
|
||||
{
|
||||
buf := bytes.NewBuffer([]byte(`{"CleanupDeadServers": true}`))
|
||||
req, _ := http.NewRequest("PUT", fmt.Sprintf("/v1/operator/autopilot/configuration?cas=%d", reply.ModifyIndex), buf)
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := s.Server.OperatorAutopilotConfiguration(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
if res := obj.(bool); !res {
|
||||
t.Fatalf("should work")
|
||||
}
|
||||
}
|
||||
|
||||
// Verify the update
|
||||
if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !reply.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", reply)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestOperator_ServerHealth(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpTest(t, func(c *Config) {
|
||||
c.Server.RaftProtocol = 3
|
||||
}, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer(nil)
|
||||
req, _ := http.NewRequest("GET", "/v1/operator/autopilot/health", body)
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := s.Server.OperatorServerHealth(resp, req)
|
||||
if err != nil {
|
||||
r.Fatalf("err: %v", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
r.Fatalf("bad code: %d", resp.Code)
|
||||
}
|
||||
out, ok := obj.(*api.OperatorHealthReply)
|
||||
if !ok {
|
||||
r.Fatalf("unexpected: %T", obj)
|
||||
}
|
||||
if len(out.Servers) != 1 ||
|
||||
!out.Servers[0].Healthy ||
|
||||
out.Servers[0].Name != s.server.LocalMember().Name ||
|
||||
out.Servers[0].SerfStatus != "alive" ||
|
||||
out.FailureTolerance != 0 {
|
||||
r.Fatalf("bad: %v, %q", out, s.server.LocalMember().Name)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestOperator_ServerHealth_Unhealthy(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpTest(t, func(c *Config) {
|
||||
c.Server.RaftProtocol = 3
|
||||
c.Autopilot.LastContactThreshold = -1 * time.Second
|
||||
}, func(s *TestAgent) {
|
||||
body := bytes.NewBuffer(nil)
|
||||
req, _ := http.NewRequest("GET", "/v1/operator/autopilot/health", body)
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := s.Server.OperatorServerHealth(resp, req)
|
||||
if err != nil {
|
||||
r.Fatalf("err: %v", err)
|
||||
}
|
||||
if resp.Code != 429 {
|
||||
r.Fatalf("bad code: %d, %v", resp.Code, obj.(*api.OperatorHealthReply))
|
||||
}
|
||||
out, ok := obj.(*api.OperatorHealthReply)
|
||||
if !ok {
|
||||
r.Fatalf("unexpected: %T", obj)
|
||||
}
|
||||
if len(out.Servers) != 1 ||
|
||||
out.Healthy ||
|
||||
out.Servers[0].Name != s.server.LocalMember().Name {
|
||||
r.Fatalf("bad: %#v", out.Servers)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestDurationFixer(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
obj := map[string]interface{}{
|
||||
"key1": []map[string]interface{}{
|
||||
{
|
||||
"subkey1": "10s",
|
||||
},
|
||||
{
|
||||
"subkey2": "5d",
|
||||
},
|
||||
},
|
||||
"key2": map[string]interface{}{
|
||||
"subkey3": "30s",
|
||||
"subkey4": "20m",
|
||||
},
|
||||
"key3": "11s",
|
||||
"key4": "49h",
|
||||
}
|
||||
expected := map[string]interface{}{
|
||||
"key1": []map[string]interface{}{
|
||||
{
|
||||
"subkey1": 10 * time.Second,
|
||||
},
|
||||
{
|
||||
"subkey2": "5d",
|
||||
},
|
||||
},
|
||||
"key2": map[string]interface{}{
|
||||
"subkey3": "30s",
|
||||
"subkey4": 20 * time.Minute,
|
||||
},
|
||||
"key3": "11s",
|
||||
"key4": 49 * time.Hour,
|
||||
}
|
||||
|
||||
fixer := NewDurationFixer("key4", "subkey1", "subkey4")
|
||||
if err := fixer.FixupDurations(obj); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Ensure we only processed the intended fieldnames
|
||||
assert.Equal(obj, expected)
|
||||
}
|
||||
|
||||
@@ -301,6 +301,11 @@ func (a *TestAgent) config() *Config {
|
||||
config.RaftConfig.StartAsLeader = true
|
||||
config.RaftTimeout = 500 * time.Millisecond
|
||||
|
||||
// Tighten the autopilot timing
|
||||
config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond
|
||||
config.ServerHealthInterval = 50 * time.Millisecond
|
||||
config.AutopilotInterval = 100 * time.Millisecond
|
||||
|
||||
// Bootstrap ourselves
|
||||
config.Bootstrap = true
|
||||
config.BootstrapExpect = 1
|
||||
|
||||
29
command/operator_autopilot.go
Normal file
29
command/operator_autopilot.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
type OperatorAutopilotCommand struct {
|
||||
Meta
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotCommand) Run(args []string) int {
|
||||
return cli.RunResultHelp
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotCommand) Synopsis() string {
|
||||
return "Provides tools for modifying Autopilot configuration"
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: nomad operator autopilot <subcommand> [options]
|
||||
|
||||
The Autopilot operator command is used to interact with Nomad's Autopilot
|
||||
subsystem. The command can be used to view or modify the current configuration.
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
70
command/operator_autopilot_get.go
Normal file
70
command/operator_autopilot_get.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/posener/complete"
|
||||
)
|
||||
|
||||
type OperatorAutopilotGetCommand struct {
|
||||
Meta
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotGetCommand) AutocompleteFlags() complete.Flags {
|
||||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient))
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotGetCommand) AutocompleteArgs() complete.Predictor {
|
||||
return complete.PredictNothing
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotGetCommand) Run(args []string) int {
|
||||
flags := c.Meta.FlagSet("autopilot", FlagSetClient)
|
||||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
|
||||
if err := flags.Parse(args); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Set up a client.
|
||||
client, err := c.Meta.Client()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Fetch the current configuration.
|
||||
config, err := client.Operator().AutopilotGetConfiguration(nil)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error querying Autopilot configuration: %s", err))
|
||||
return 1
|
||||
}
|
||||
c.Ui.Output(fmt.Sprintf("CleanupDeadServers = %v", config.CleanupDeadServers))
|
||||
c.Ui.Output(fmt.Sprintf("LastContactThreshold = %v", config.LastContactThreshold.String()))
|
||||
c.Ui.Output(fmt.Sprintf("MaxTrailingLogs = %v", config.MaxTrailingLogs))
|
||||
c.Ui.Output(fmt.Sprintf("ServerStabilizationTime = %v", config.ServerStabilizationTime.String()))
|
||||
c.Ui.Output(fmt.Sprintf("RedundancyZoneTag = %q", config.RedundancyZoneTag))
|
||||
c.Ui.Output(fmt.Sprintf("DisableUpgradeMigration = %v", config.DisableUpgradeMigration))
|
||||
c.Ui.Output(fmt.Sprintf("UpgradeVersionTag = %q", config.UpgradeVersionTag))
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotGetCommand) Synopsis() string {
|
||||
return "Display the current Autopilot configuration"
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotGetCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: nomad operator autopilot get-config [options]
|
||||
|
||||
Displays the current Autopilot configuration.
|
||||
|
||||
General Options:
|
||||
|
||||
` + generalOptionsUsage()
|
||||
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
32
command/operator_autopilot_get_test.go
Normal file
32
command/operator_autopilot_get_test.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
func TestOperator_Autopilot_GetConfig_Implements(t *testing.T) {
|
||||
t.Parallel()
|
||||
var _ cli.Command = &OperatorRaftListCommand{}
|
||||
}
|
||||
|
||||
func TestOperatorAutopilotGetConfigCommand(t *testing.T) {
|
||||
t.Parallel()
|
||||
s, _, addr := testServer(t, false, nil)
|
||||
defer s.Shutdown()
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &OperatorAutopilotGetCommand{Meta: Meta{Ui: ui}}
|
||||
args := []string{"-address=" + addr}
|
||||
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
output := strings.TrimSpace(ui.OutputWriter.String())
|
||||
if !strings.Contains(output, "CleanupDeadServers = true") {
|
||||
t.Fatalf("bad: %s", output)
|
||||
}
|
||||
}
|
||||
156
command/operator_autopilot_set.go
Normal file
156
command/operator_autopilot_set.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/command/flags"
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/posener/complete"
|
||||
)
|
||||
|
||||
type OperatorAutopilotSetCommand struct {
|
||||
Meta
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotSetCommand) AutocompleteFlags() complete.Flags {
|
||||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||||
complete.Flags{
|
||||
"-cleanup-dead-servers": complete.PredictAnything,
|
||||
"-max-trailing-logs": complete.PredictAnything,
|
||||
"-last-contact-threshold": complete.PredictAnything,
|
||||
"-server-stabilization-time": complete.PredictAnything,
|
||||
"-redundancy-zone-tag": complete.PredictAnything,
|
||||
"-disable-upgrade-migration": complete.PredictAnything,
|
||||
"-upgrade-version-tag": complete.PredictAnything,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotSetCommand) AutocompleteArgs() complete.Predictor {
|
||||
return complete.PredictNothing
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotSetCommand) Run(args []string) int {
|
||||
var cleanupDeadServers flags.BoolValue
|
||||
var maxTrailingLogs flags.UintValue
|
||||
var lastContactThreshold flags.DurationValue
|
||||
var serverStabilizationTime flags.DurationValue
|
||||
var redundancyZoneTag flags.StringValue
|
||||
var disableUpgradeMigration flags.BoolValue
|
||||
var upgradeVersionTag flags.StringValue
|
||||
|
||||
f := c.Meta.FlagSet("autopilot", FlagSetClient)
|
||||
f.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
|
||||
f.Var(&cleanupDeadServers, "cleanup-dead-servers", "")
|
||||
f.Var(&maxTrailingLogs, "max-trailing-logs", "")
|
||||
f.Var(&lastContactThreshold, "last-contact-threshold", "")
|
||||
f.Var(&serverStabilizationTime, "server-stabilization-time", "")
|
||||
f.Var(&redundancyZoneTag, "redundancy-zone-tag", "")
|
||||
f.Var(&disableUpgradeMigration, "disable-upgrade-migration", "")
|
||||
f.Var(&upgradeVersionTag, "upgrade-version-tag", "")
|
||||
|
||||
if err := f.Parse(args); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Set up a client.
|
||||
client, err := c.Meta.Client()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Fetch the current configuration.
|
||||
operator := client.Operator()
|
||||
conf, err := operator.AutopilotGetConfiguration(nil)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error querying for Autopilot configuration: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Update the config values based on the set flags.
|
||||
cleanupDeadServers.Merge(&conf.CleanupDeadServers)
|
||||
redundancyZoneTag.Merge(&conf.RedundancyZoneTag)
|
||||
disableUpgradeMigration.Merge(&conf.DisableUpgradeMigration)
|
||||
upgradeVersionTag.Merge(&conf.UpgradeVersionTag)
|
||||
|
||||
trailing := uint(conf.MaxTrailingLogs)
|
||||
maxTrailingLogs.Merge(&trailing)
|
||||
conf.MaxTrailingLogs = uint64(trailing)
|
||||
|
||||
last := time.Duration(*conf.LastContactThreshold)
|
||||
lastContactThreshold.Merge(&last)
|
||||
conf.LastContactThreshold = api.NewReadableDuration(last)
|
||||
|
||||
stablization := time.Duration(*conf.ServerStabilizationTime)
|
||||
serverStabilizationTime.Merge(&stablization)
|
||||
conf.ServerStabilizationTime = api.NewReadableDuration(stablization)
|
||||
|
||||
// Check-and-set the new configuration.
|
||||
result, err := operator.AutopilotCASConfiguration(conf, nil)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error setting Autopilot configuration: %s", err))
|
||||
return 1
|
||||
}
|
||||
if result {
|
||||
c.Ui.Output("Configuration updated!")
|
||||
return 0
|
||||
}
|
||||
c.Ui.Output("Configuration could not be atomically updated, please try again")
|
||||
return 1
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotSetCommand) Synopsis() string {
|
||||
return "Modify the current Autopilot configuration"
|
||||
}
|
||||
|
||||
func (c *OperatorAutopilotSetCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: nomad operator autopilot set-config [options]
|
||||
|
||||
Modifies the current Autopilot configuration.
|
||||
|
||||
General Options:
|
||||
|
||||
` + generalOptionsUsage() + `
|
||||
|
||||
Set Config Options:
|
||||
|
||||
-cleanup-dead-servers=[true|false]
|
||||
Controls whether Nomad will automatically remove dead servers when
|
||||
new ones are successfully added. Must be one of [true|false].
|
||||
|
||||
-disable-upgrade-migration=[true|false]
|
||||
(Enterprise-only) Controls whether Nomad will avoid promoting
|
||||
new servers until it can perform a migration. Must be one of
|
||||
"true|false".
|
||||
|
||||
-last-contact-threshold=200ms
|
||||
Controls the maximum amount of time a server can go without contact
|
||||
from the leader before being considered unhealthy. Must be a
|
||||
duration value such as "200ms".
|
||||
|
||||
-max-trailing-logs=<value>
|
||||
Controls the maximum number of log entries that a server can trail
|
||||
the leader by before being considered unhealthy.
|
||||
|
||||
-redundancy-zone-tag=<value>
|
||||
(Enterprise-only) Controls the node_meta tag name used for
|
||||
separating servers into different redundancy zones.
|
||||
|
||||
-server-stabilization-time=<10s>
|
||||
Controls the minimum amount of time a server must be stable in
|
||||
the 'healthy' state before being added to the cluster. Only takes
|
||||
effect if all servers are running Raft protocol version 3 or
|
||||
higher. Must be a duration value such as "10s".
|
||||
|
||||
-upgrade-version-tag=<value>
|
||||
(Enterprise-only) The node_meta tag to use for version info when
|
||||
performing upgrade migrations. If left blank, the Nomad version
|
||||
will be used.
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
62
command/operator_autopilot_set_test.go
Normal file
62
command/operator_autopilot_set_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
func TestOperator_Autopilot_SetConfig_Implements(t *testing.T) {
|
||||
t.Parallel()
|
||||
var _ cli.Command = &OperatorRaftListCommand{}
|
||||
}
|
||||
|
||||
func TestOperatorAutopilotSetConfigCommmand(t *testing.T) {
|
||||
t.Parallel()
|
||||
s, _, addr := testServer(t, false, nil)
|
||||
defer s.Shutdown()
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &OperatorAutopilotSetCommand{Meta: Meta{Ui: ui}}
|
||||
args := []string{
|
||||
"-address=" + addr,
|
||||
"-cleanup-dead-servers=false",
|
||||
"-max-trailing-logs=99",
|
||||
"-last-contact-threshold=123ms",
|
||||
"-server-stabilization-time=123ms",
|
||||
}
|
||||
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
output := strings.TrimSpace(ui.OutputWriter.String())
|
||||
if !strings.Contains(output, "Configuration updated") {
|
||||
t.Fatalf("bad: %s", output)
|
||||
}
|
||||
|
||||
client, err := c.Client()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
conf, err := client.Operator().AutopilotGetConfiguration(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if conf.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", conf)
|
||||
}
|
||||
if conf.MaxTrailingLogs != 99 {
|
||||
t.Fatalf("bad: %#v", conf)
|
||||
}
|
||||
if conf.LastContactThreshold.Duration() != 123*time.Millisecond {
|
||||
t.Fatalf("bad: %#v", conf)
|
||||
}
|
||||
if conf.ServerStabilizationTime.Duration() != 123*time.Millisecond {
|
||||
t.Fatalf("bad: %#v", conf)
|
||||
}
|
||||
}
|
||||
12
command/operator_autopilot_test.go
Normal file
12
command/operator_autopilot_test.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
func TestOperator_Autopilot_Implements(t *testing.T) {
|
||||
t.Parallel()
|
||||
var _ cli.Command = &OperatorAutopilotCommand{}
|
||||
}
|
||||
@@ -32,7 +32,10 @@ General Options:
|
||||
Remove Peer Options:
|
||||
|
||||
-peer-address="IP:port"
|
||||
Remove a Nomad server with given address from the Raft configuration.
|
||||
Remove a Nomad server with given address from the Raft configuration.
|
||||
|
||||
-peer-id="id"
|
||||
Remove a Nomad server with the given ID from the Raft configuration.
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
@@ -41,6 +44,7 @@ func (c *OperatorRaftRemoveCommand) AutocompleteFlags() complete.Flags {
|
||||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||||
complete.Flags{
|
||||
"-peer-address": complete.PredictAnything,
|
||||
"-peer-id": complete.PredictAnything,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -54,11 +58,13 @@ func (c *OperatorRaftRemoveCommand) Synopsis() string {
|
||||
|
||||
func (c *OperatorRaftRemoveCommand) Run(args []string) int {
|
||||
var peerAddress string
|
||||
var peerID string
|
||||
|
||||
flags := c.Meta.FlagSet("raft", FlagSetClient)
|
||||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
|
||||
flags.StringVar(&peerAddress, "peer-address", "", "")
|
||||
flags.StringVar(&peerID, "peer-id", "", "")
|
||||
if err := flags.Parse(args); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
|
||||
return 1
|
||||
@@ -72,20 +78,37 @@ func (c *OperatorRaftRemoveCommand) Run(args []string) int {
|
||||
}
|
||||
operator := client.Operator()
|
||||
|
||||
// TODO (alexdadgar) Once we expose IDs, add support for removing
|
||||
// by ID, add support for that.
|
||||
if len(peerAddress) == 0 {
|
||||
c.Ui.Error(fmt.Sprintf("an address is required for the peer to remove"))
|
||||
if err := raftRemovePeers(peerAddress, peerID, operator); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error removing peer: %v", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Try to kick the peer.
|
||||
w := &api.WriteOptions{}
|
||||
if err := operator.RaftRemovePeerByAddress(peerAddress, w); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Failed to remove raft peer: %v", err))
|
||||
return 1
|
||||
if peerAddress != "" {
|
||||
c.Ui.Output(fmt.Sprintf("Removed peer with address %q", peerAddress))
|
||||
} else {
|
||||
c.Ui.Output(fmt.Sprintf("Removed peer with id %q", peerID))
|
||||
}
|
||||
c.Ui.Output(fmt.Sprintf("Removed peer with address %q", peerAddress))
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func raftRemovePeers(address, id string, operator *api.Operator) error {
|
||||
if len(address) == 0 && len(id) == 0 {
|
||||
return fmt.Errorf("an address or id is required for the peer to remove")
|
||||
}
|
||||
if len(address) > 0 && len(id) > 0 {
|
||||
return fmt.Errorf("cannot give both an address and id")
|
||||
}
|
||||
|
||||
// Try to kick the peer.
|
||||
if len(address) > 0 {
|
||||
if err := operator.RaftRemovePeerByAddress(address, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := operator.RaftRemovePeerByID(id, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mitchellh/cli"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestOperator_Raft_RemovePeers_Implements(t *testing.T) {
|
||||
@@ -14,6 +14,35 @@ func TestOperator_Raft_RemovePeers_Implements(t *testing.T) {
|
||||
|
||||
func TestOperator_Raft_RemovePeer(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
s, _, addr := testServer(t, false, nil)
|
||||
defer s.Shutdown()
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &OperatorRaftRemoveCommand{Meta: Meta{Ui: ui}}
|
||||
args := []string{"-address=" + addr, "-peer-address=nope", "-peer-id=nope"}
|
||||
|
||||
// Give both an address and ID
|
||||
code := c.Run(args)
|
||||
if code != 1 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
assert.Contains(ui.ErrorWriter.String(), "cannot give both an address and id")
|
||||
|
||||
// Neither address nor ID present
|
||||
args = args[:1]
|
||||
code = c.Run(args)
|
||||
if code != 1 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
assert.Contains(ui.ErrorWriter.String(), "an address or id is required for the peer to remove")
|
||||
}
|
||||
|
||||
func TestOperator_Raft_RemovePeerAddress(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
s, _, addr := testServer(t, false, nil)
|
||||
defer s.Shutdown()
|
||||
|
||||
@@ -27,8 +56,24 @@ func TestOperator_Raft_RemovePeer(t *testing.T) {
|
||||
}
|
||||
|
||||
// If we get this error, it proves we sent the address all they through.
|
||||
output := strings.TrimSpace(ui.ErrorWriter.String())
|
||||
if !strings.Contains(output, "address \"nope\" was not found in the Raft configuration") {
|
||||
t.Fatalf("bad: %s", output)
|
||||
}
|
||||
assert.Contains(ui.ErrorWriter.String(), "address \"nope\" was not found in the Raft configuration")
|
||||
}
|
||||
|
||||
func TestOperator_Raft_RemovePeerID(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
s, _, addr := testServer(t, false, nil)
|
||||
defer s.Shutdown()
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &OperatorRaftRemoveCommand{Meta: Meta{Ui: ui}}
|
||||
args := []string{"-address=" + addr, "-peer-id=nope"}
|
||||
|
||||
code := c.Run(args)
|
||||
if code != 1 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
// If we get this error, it proves we sent the address all they through.
|
||||
assert.Contains(ui.ErrorWriter.String(), "id \"nope\" was not found in the Raft configuration")
|
||||
}
|
||||
|
||||
18
commands.go
18
commands.go
@@ -275,6 +275,24 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
|
||||
}, nil
|
||||
},
|
||||
|
||||
"operator autopilot": func() (cli.Command, error) {
|
||||
return &command.OperatorAutopilotCommand{
|
||||
Meta: meta,
|
||||
}, nil
|
||||
},
|
||||
|
||||
"operator autopilot get-config": func() (cli.Command, error) {
|
||||
return &command.OperatorAutopilotGetCommand{
|
||||
Meta: meta,
|
||||
}, nil
|
||||
},
|
||||
|
||||
"operator autopilot set-config": func() (cli.Command, error) {
|
||||
return &command.OperatorAutopilotSetCommand{
|
||||
Meta: meta,
|
||||
}, nil
|
||||
},
|
||||
|
||||
"operator raft": func() (cli.Command, error) {
|
||||
return &command.OperatorRaftCommand{
|
||||
Meta: meta,
|
||||
|
||||
69
nomad/autopilot.go
Normal file
69
nomad/autopilot.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package nomad
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
// AutopilotDelegate is a Nomad delegate for autopilot operations.
|
||||
type AutopilotDelegate struct {
|
||||
server *Server
|
||||
}
|
||||
|
||||
func (d *AutopilotDelegate) AutopilotConfig() *autopilot.Config {
|
||||
return d.server.getOrCreateAutopilotConfig()
|
||||
}
|
||||
|
||||
func (d *AutopilotDelegate) FetchStats(ctx context.Context, servers []serf.Member) map[string]*autopilot.ServerStats {
|
||||
return d.server.statsFetcher.Fetch(ctx, servers)
|
||||
}
|
||||
|
||||
func (d *AutopilotDelegate) IsServer(m serf.Member) (*autopilot.ServerInfo, error) {
|
||||
ok, parts := isNomadServer(m)
|
||||
if !ok || parts.Region != d.server.Region() {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
server := &autopilot.ServerInfo{
|
||||
Name: m.Name,
|
||||
ID: parts.ID,
|
||||
Addr: parts.Addr,
|
||||
Build: parts.Build,
|
||||
Status: m.Status,
|
||||
}
|
||||
return server, nil
|
||||
}
|
||||
|
||||
// NotifyHealth heartbeats a metric for monitoring if we're the leader.
|
||||
func (d *AutopilotDelegate) NotifyHealth(health autopilot.OperatorHealthReply) {
|
||||
if d.server.raft.State() == raft.Leader {
|
||||
metrics.SetGauge([]string{"nomad", "autopilot", "failure_tolerance"}, float32(health.FailureTolerance))
|
||||
if health.Healthy {
|
||||
metrics.SetGauge([]string{"nomad", "autopilot", "healthy"}, 1)
|
||||
} else {
|
||||
metrics.SetGauge([]string{"nomad", "autopilot", "healthy"}, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *AutopilotDelegate) PromoteNonVoters(conf *autopilot.Config, health autopilot.OperatorHealthReply) ([]raft.Server, error) {
|
||||
future := d.server.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
return nil, fmt.Errorf("failed to get raft configuration: %v", err)
|
||||
}
|
||||
|
||||
return autopilot.PromoteStableServers(conf, health, future.Configuration().Servers), nil
|
||||
}
|
||||
|
||||
func (d *AutopilotDelegate) Raft() *raft.Raft {
|
||||
return d.server.raft
|
||||
}
|
||||
|
||||
func (d *AutopilotDelegate) Serf() *serf.Serf {
|
||||
return d.server.serf
|
||||
}
|
||||
350
nomad/autopilot_test.go
Normal file
350
nomad/autopilot_test.go
Normal file
@@ -0,0 +1,350 @@
|
||||
package nomad
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/consul/testutil/retry"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
// wantPeers determines whether the server has the given
|
||||
// number of voting raft peers.
|
||||
func wantPeers(s *Server, peers int) error {
|
||||
future := s.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n := autopilot.NumPeers(future.Configuration())
|
||||
if got, want := n, peers; got != want {
|
||||
return fmt.Errorf("got %d peers want %d", got, want)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// wantRaft determines if the servers have all of each other in their
|
||||
// Raft configurations,
|
||||
func wantRaft(servers []*Server) error {
|
||||
// Make sure all the servers are represented in the Raft config,
|
||||
// and that there are no extras.
|
||||
verifyRaft := func(c raft.Configuration) error {
|
||||
want := make(map[raft.ServerID]bool)
|
||||
for _, s := range servers {
|
||||
want[s.config.RaftConfig.LocalID] = true
|
||||
}
|
||||
|
||||
for _, s := range c.Servers {
|
||||
if !want[s.ID] {
|
||||
return fmt.Errorf("don't want %q", s.ID)
|
||||
}
|
||||
delete(want, s.ID)
|
||||
}
|
||||
|
||||
if len(want) > 0 {
|
||||
return fmt.Errorf("didn't find %v", want)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, s := range servers {
|
||||
future := s.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := verifyRaft(future.Configuration()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestAutopilot_CleanupDeadServer(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i := 1; i <= 3; i++ {
|
||||
testCleanupDeadServer(t, i)
|
||||
}
|
||||
}
|
||||
|
||||
func testCleanupDeadServer(t *testing.T, raftVersion int) {
|
||||
conf := func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.BootstrapExpect = 3
|
||||
c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(raftVersion)
|
||||
}
|
||||
s1 := testServer(t, conf)
|
||||
defer s1.Shutdown()
|
||||
|
||||
s2 := testServer(t, conf)
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, conf)
|
||||
defer s3.Shutdown()
|
||||
|
||||
servers := []*Server{s1, s2, s3}
|
||||
|
||||
// Try to join
|
||||
testJoin(t, s1, s2, s3)
|
||||
|
||||
for _, s := range servers {
|
||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||
}
|
||||
|
||||
// Bring up a new server
|
||||
s4 := testServer(t, conf)
|
||||
defer s4.Shutdown()
|
||||
|
||||
// Kill a non-leader server
|
||||
s3.Shutdown()
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
alive := 0
|
||||
for _, m := range s1.Members() {
|
||||
if m.Status == serf.StatusAlive {
|
||||
alive++
|
||||
}
|
||||
}
|
||||
if alive != 2 {
|
||||
r.Fatal(nil)
|
||||
}
|
||||
})
|
||||
|
||||
// Join the new server
|
||||
testJoin(t, s1, s4)
|
||||
servers[2] = s4
|
||||
|
||||
// Make sure the dead server is removed and we're back to 3 total peers
|
||||
for _, s := range servers {
|
||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||
}
|
||||
}
|
||||
|
||||
func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
|
||||
conf := func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
}
|
||||
|
||||
s2 := testServer(t, conf)
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, conf)
|
||||
defer s3.Shutdown()
|
||||
|
||||
s4 := testServer(t, conf)
|
||||
defer s4.Shutdown()
|
||||
|
||||
s5 := testServer(t, conf)
|
||||
defer s5.Shutdown()
|
||||
|
||||
servers := []*Server{s1, s2, s3, s4, s5}
|
||||
|
||||
// Join the servers to s1, and wait until they are all promoted to
|
||||
// voters.
|
||||
testJoin(t, s1, servers[1:]...)
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
r.Check(wantRaft(servers))
|
||||
for _, s := range servers {
|
||||
r.Check(wantPeers(s, 5))
|
||||
}
|
||||
})
|
||||
|
||||
// Kill a non-leader server
|
||||
s4.Shutdown()
|
||||
|
||||
// Should be removed from the peers automatically
|
||||
servers = []*Server{s1, s2, s3, s5}
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
r.Check(wantRaft(servers))
|
||||
for _, s := range servers {
|
||||
r.Check(wantPeers(s, 4))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestAutopilot_RollingUpdate(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
|
||||
conf := func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
}
|
||||
|
||||
s2 := testServer(t, conf)
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, conf)
|
||||
defer s3.Shutdown()
|
||||
|
||||
// Join the servers to s1, and wait until they are all promoted to
|
||||
// voters.
|
||||
servers := []*Server{s1, s2, s3}
|
||||
testJoin(t, s1, s2, s3)
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
r.Check(wantRaft(servers))
|
||||
for _, s := range servers {
|
||||
r.Check(wantPeers(s, 3))
|
||||
}
|
||||
})
|
||||
|
||||
// Add one more server like we are doing a rolling update.
|
||||
s4 := testServer(t, conf)
|
||||
defer s4.Shutdown()
|
||||
testJoin(t, s1, s4)
|
||||
servers = append(servers, s4)
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
r.Check(wantRaft(servers))
|
||||
for _, s := range servers {
|
||||
r.Check(wantPeers(s, 3))
|
||||
}
|
||||
})
|
||||
|
||||
// Now kill one of the "old" nodes like we are doing a rolling update.
|
||||
s3.Shutdown()
|
||||
|
||||
isVoter := func() bool {
|
||||
future := s1.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
for _, s := range future.Configuration().Servers {
|
||||
if string(s.ID) == string(s4.config.NodeID) {
|
||||
return s.Suffrage == raft.Voter
|
||||
}
|
||||
}
|
||||
t.Fatalf("didn't find s4")
|
||||
return false
|
||||
}
|
||||
|
||||
// Wait for s4 to stabilize, get promoted to a voter, and for s3 to be
|
||||
// removed.
|
||||
servers = []*Server{s1, s2, s4}
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
r.Check(wantRaft(servers))
|
||||
for _, s := range servers {
|
||||
r.Check(wantPeers(s, 3))
|
||||
}
|
||||
if !isVoter() {
|
||||
r.Fatalf("should be a voter")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestAutopilot_CleanupStaleRaftServer(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
|
||||
conf := func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
}
|
||||
s2 := testServer(t, conf)
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, conf)
|
||||
defer s3.Shutdown()
|
||||
|
||||
s4 := testServer(t, conf)
|
||||
defer s4.Shutdown()
|
||||
|
||||
servers := []*Server{s1, s2, s3}
|
||||
|
||||
// Join the servers to s1
|
||||
testJoin(t, s1, s2, s3)
|
||||
|
||||
for _, s := range servers {
|
||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||
}
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
// Add s4 to peers directly
|
||||
addr := fmt.Sprintf("127.0.0.1:%d", s4.config.SerfConfig.MemberlistConfig.BindPort)
|
||||
s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(addr), 0, 0)
|
||||
|
||||
// Verify we have 4 peers
|
||||
peers, err := s1.numPeers()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if peers != 4 {
|
||||
t.Fatalf("bad: %v", peers)
|
||||
}
|
||||
|
||||
// Wait for s4 to be removed
|
||||
for _, s := range []*Server{s1, s2, s3} {
|
||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||
}
|
||||
}
|
||||
|
||||
func TestAutopilot_PromoteNonVoter(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
s2 := testServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s2.Shutdown()
|
||||
testJoin(t, s1, s2)
|
||||
|
||||
// Make sure we see it as a nonvoter initially. We wait until half
|
||||
// the stabilization period has passed.
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
future := s1.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
r.Fatal(err)
|
||||
}
|
||||
|
||||
servers := future.Configuration().Servers
|
||||
if len(servers) != 2 {
|
||||
r.Fatalf("bad: %v", servers)
|
||||
}
|
||||
if servers[1].Suffrage != raft.Nonvoter {
|
||||
r.Fatalf("bad: %v", servers)
|
||||
}
|
||||
health := s1.autopilot.GetServerHealth(string(servers[1].ID))
|
||||
if health == nil {
|
||||
r.Fatalf("nil health, %v", s1.autopilot.GetClusterHealth())
|
||||
}
|
||||
if !health.Healthy {
|
||||
r.Fatalf("bad: %v", health)
|
||||
}
|
||||
if time.Since(health.StableSince) < s1.config.AutopilotConfig.ServerStabilizationTime/2 {
|
||||
r.Fatal("stable period not elapsed")
|
||||
}
|
||||
})
|
||||
|
||||
// Make sure it ends up as a voter.
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
future := s1.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
r.Fatal(err)
|
||||
}
|
||||
|
||||
servers := future.Configuration().Servers
|
||||
if len(servers) != 2 {
|
||||
r.Fatalf("bad: %v", servers)
|
||||
}
|
||||
if servers[1].Suffrage != raft.Voter {
|
||||
r.Fatalf("bad: %v", servers)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/memberlist"
|
||||
"github.com/hashicorp/nomad/helper/tlsutil"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
@@ -93,6 +94,10 @@ type Config struct {
|
||||
// RaftTimeout is applied to any network traffic for raft. Defaults to 10s.
|
||||
RaftTimeout time.Duration
|
||||
|
||||
// (Enterprise-only) NonVoter is used to prevent this server from being added
|
||||
// as a voting member of the Raft cluster.
|
||||
NonVoter bool
|
||||
|
||||
// SerfConfig is the configuration for the serf cluster
|
||||
SerfConfig *serf.Config
|
||||
|
||||
@@ -261,6 +266,19 @@ type Config struct {
|
||||
// BackwardsCompatibleMetrics determines whether to show methods of
|
||||
// displaying metrics for older verions, or to only show the new format
|
||||
BackwardsCompatibleMetrics bool
|
||||
|
||||
// AutopilotConfig is used to apply the initial autopilot config when
|
||||
// bootstrapping.
|
||||
AutopilotConfig *autopilot.Config
|
||||
|
||||
// ServerHealthInterval is the frequency with which the health of the
|
||||
// servers in the cluster will be updated.
|
||||
ServerHealthInterval time.Duration
|
||||
|
||||
// AutopilotInterval is the frequency with which the leader will perform
|
||||
// autopilot tasks, such as promoting eligible non-voters and removing
|
||||
// dead servers.
|
||||
AutopilotInterval time.Duration
|
||||
}
|
||||
|
||||
// CheckVersion is used to check if the ProtocolVersion is valid
|
||||
@@ -321,6 +339,14 @@ func DefaultConfig() *Config {
|
||||
TLSConfig: &config.TLSConfig{},
|
||||
ReplicationBackoff: 30 * time.Second,
|
||||
SentinelGCInterval: 30 * time.Second,
|
||||
AutopilotConfig: &autopilot.Config{
|
||||
CleanupDeadServers: true,
|
||||
LastContactThreshold: 200 * time.Millisecond,
|
||||
MaxTrailingLogs: 250,
|
||||
ServerStabilizationTime: 10 * time.Second,
|
||||
},
|
||||
ServerHealthInterval: 2 * time.Second,
|
||||
AutopilotInterval: 10 * time.Second,
|
||||
}
|
||||
|
||||
// Enable all known schedulers by default
|
||||
@@ -344,8 +370,8 @@ func DefaultConfig() *Config {
|
||||
// Disable shutdown on removal
|
||||
c.RaftConfig.ShutdownOnRemove = false
|
||||
|
||||
// Enable interoperability with raft protocol version 1, and don't
|
||||
// start using new ID-based features yet.
|
||||
// Enable interoperability with new raft APIs, requires all servers
|
||||
// to be on raft v1 or higher.
|
||||
c.RaftConfig.ProtocolVersion = 2
|
||||
|
||||
return c
|
||||
|
||||
19
nomad/fsm.go
19
nomad/fsm.go
@@ -234,6 +234,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} {
|
||||
return n.applyACLTokenDelete(buf[1:], log.Index)
|
||||
case structs.ACLTokenBootstrapRequestType:
|
||||
return n.applyACLTokenBootstrap(buf[1:], log.Index)
|
||||
case structs.AutopilotRequestType:
|
||||
return n.applyAutopilotUpdate(buf[1:], log.Index)
|
||||
}
|
||||
|
||||
// Check enterprise only message types.
|
||||
@@ -833,6 +835,23 @@ func (n *nomadFSM) applyACLTokenBootstrap(buf []byte, index uint64) interface{}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *nomadFSM) applyAutopilotUpdate(buf []byte, index uint64) interface{} {
|
||||
var req structs.AutopilotSetConfigRequest
|
||||
if err := structs.Decode(buf, &req); err != nil {
|
||||
panic(fmt.Errorf("failed to decode request: %v", err))
|
||||
}
|
||||
defer metrics.MeasureSince([]string{"nomad", "fsm", "autopilot"}, time.Now())
|
||||
|
||||
if req.CAS {
|
||||
act, err := n.state.AutopilotCASConfig(index, req.Config.ModifyIndex, &req.Config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return act
|
||||
}
|
||||
return n.state.AutopilotSetConfig(index, &req.Config)
|
||||
}
|
||||
|
||||
func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) {
|
||||
// Create a new snapshot
|
||||
snap, err := n.state.Snapshot()
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
@@ -2310,3 +2311,62 @@ func TestFSM_ReconcileSummaries(t *testing.T) {
|
||||
t.Fatalf("Diff % #v", pretty.Diff(&expected, out2))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFSM_Autopilot(t *testing.T) {
|
||||
t.Parallel()
|
||||
fsm := testFSM(t)
|
||||
|
||||
// Set the autopilot config using a request.
|
||||
req := structs.AutopilotSetConfigRequest{
|
||||
Datacenter: "dc1",
|
||||
Config: autopilot.Config{
|
||||
CleanupDeadServers: true,
|
||||
LastContactThreshold: 10 * time.Second,
|
||||
MaxTrailingLogs: 300,
|
||||
},
|
||||
}
|
||||
buf, err := structs.Encode(structs.AutopilotRequestType, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
resp := fsm.Apply(makeLog(buf))
|
||||
if _, ok := resp.(error); ok {
|
||||
t.Fatalf("bad: %v", resp)
|
||||
}
|
||||
|
||||
// Verify key is set directly in the state store.
|
||||
_, config, err := fsm.state.AutopilotConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if config.CleanupDeadServers != req.Config.CleanupDeadServers {
|
||||
t.Fatalf("bad: %v", config.CleanupDeadServers)
|
||||
}
|
||||
if config.LastContactThreshold != req.Config.LastContactThreshold {
|
||||
t.Fatalf("bad: %v", config.LastContactThreshold)
|
||||
}
|
||||
if config.MaxTrailingLogs != req.Config.MaxTrailingLogs {
|
||||
t.Fatalf("bad: %v", config.MaxTrailingLogs)
|
||||
}
|
||||
|
||||
// Now use CAS and provide an old index
|
||||
req.CAS = true
|
||||
req.Config.CleanupDeadServers = false
|
||||
req.Config.ModifyIndex = config.ModifyIndex - 1
|
||||
buf, err = structs.Encode(structs.AutopilotRequestType, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
resp = fsm.Apply(makeLog(buf))
|
||||
if _, ok := resp.(error); ok {
|
||||
t.Fatalf("bad: %v", resp)
|
||||
}
|
||||
|
||||
_, config, err = fsm.state.AutopilotConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !config.CleanupDeadServers {
|
||||
t.Fatalf("bad: %v", config.CleanupDeadServers)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,9 @@ import (
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
"github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
@@ -37,6 +39,8 @@ const (
|
||||
barrierWriteTimeout = 2 * time.Minute
|
||||
)
|
||||
|
||||
var minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
|
||||
|
||||
// monitorLeadership is used to monitor if we acquire or lose our role
|
||||
// as the leader in the Raft cluster. There is some work the leader is
|
||||
// expected to do, so we must react to changes
|
||||
@@ -168,6 +172,10 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize and start the autopilot routine
|
||||
s.getOrCreateAutopilotConfig()
|
||||
s.autopilot.Start()
|
||||
|
||||
// Enable the plan queue, since we are now the leader
|
||||
s.planQueue.SetEnabled(true)
|
||||
|
||||
@@ -635,6 +643,9 @@ func (s *Server) revokeLeadership() error {
|
||||
// Clear the leader token since we are no longer the leader.
|
||||
s.setLeaderAcl("")
|
||||
|
||||
// Disable autopilot
|
||||
s.autopilot.Stop()
|
||||
|
||||
// Disable the plan queue, since we are no longer leader
|
||||
s.planQueue.SetEnabled(false)
|
||||
|
||||
@@ -776,7 +787,7 @@ func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
// but we want to avoid doing that if possible to prevent useless Raft
|
||||
// log entries. If the address is the same but the ID changed, remove the
|
||||
// old server before adding the new one.
|
||||
minRaftProtocol, err := MinRaftProtocol(s.config.Region, members)
|
||||
minRaftProtocol, err := s.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -810,8 +821,7 @@ func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
// Attempt to add as a peer
|
||||
switch {
|
||||
case minRaftProtocol >= 3:
|
||||
// todo(kyhavlov): change this to AddNonVoter when adding autopilot
|
||||
addFuture := s.raft.AddVoter(raft.ServerID(parts.ID), raft.ServerAddress(addr), 0, 0)
|
||||
addFuture := s.raft.AddNonvoter(raft.ServerID(parts.ID), raft.ServerAddress(addr), 0, 0)
|
||||
if err := addFuture.Error(); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to add raft peer: %v", err)
|
||||
return err
|
||||
@@ -836,7 +846,6 @@ func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
// removeRaftPeer is used to remove a Raft peer when a Nomad server leaves
|
||||
// or is reaped
|
||||
func (s *Server) removeRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
// TODO (alexdadgar) - This will need to be changed once we support node IDs.
|
||||
addr := (&net.TCPAddr{IP: m.Addr, Port: parts.Port}).String()
|
||||
|
||||
// See if it's already in the configuration. It's harmless to re-remove it
|
||||
@@ -848,7 +857,7 @@ func (s *Server) removeRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
return err
|
||||
}
|
||||
|
||||
minRaftProtocol, err := MinRaftProtocol(s.config.Region, s.serf.Members())
|
||||
minRaftProtocol, err := s.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1163,3 +1172,31 @@ func diffACLTokens(state *state.StateStore, minIndex uint64, remoteList []*struc
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// getOrCreateAutopilotConfig is used to get the autopilot config, initializing it if necessary
|
||||
func (s *Server) getOrCreateAutopilotConfig() *autopilot.Config {
|
||||
state := s.fsm.State()
|
||||
_, config, err := state.AutopilotConfig()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: failed to get config: %v", err)
|
||||
return nil
|
||||
}
|
||||
if config != nil {
|
||||
return config
|
||||
}
|
||||
|
||||
if !ServersMeetMinimumVersion(s.Members(), minAutopilotVersion) {
|
||||
s.logger.Printf("[INFO] autopilot: version %v", s.Members()[0].Tags)
|
||||
s.logger.Printf("[WARN] autopilot: can't initialize until all servers are >= %s", minAutopilotVersion.String())
|
||||
return nil
|
||||
}
|
||||
|
||||
config = s.config.AutopilotConfig
|
||||
req := structs.AutopilotSetConfigRequest{Config: *config}
|
||||
if _, _, err = s.raftApply(structs.AutopilotRequestType, req); err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: failed to initialize config: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/testutil/retry"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
@@ -815,21 +816,18 @@ func TestLeader_DiffACLTokens(t *testing.T) {
|
||||
func TestLeader_UpgradeRaftVersion(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.Datacenter = "dc1"
|
||||
c.RaftConfig.ProtocolVersion = 2
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
|
||||
s2 := testServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.Datacenter = "dc1"
|
||||
c.RaftConfig.ProtocolVersion = 1
|
||||
})
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.Datacenter = "dc1"
|
||||
c.RaftConfig.ProtocolVersion = 2
|
||||
})
|
||||
defer s3.Shutdown()
|
||||
@@ -854,7 +852,7 @@ func TestLeader_UpgradeRaftVersion(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, s := range []*Server{s1, s3} {
|
||||
minVer, err := MinRaftProtocol(s1.config.Region, s.Members())
|
||||
minVer, err := s.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -902,3 +900,81 @@ func TestLeader_UpgradeRaftVersion(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLeader_RollRaftServer(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.RaftConfig.ProtocolVersion = 2
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
|
||||
s2 := testServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 1
|
||||
})
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 2
|
||||
})
|
||||
defer s3.Shutdown()
|
||||
|
||||
servers := []*Server{s1, s2, s3}
|
||||
|
||||
// Try to join
|
||||
testJoin(t, s1, s2, s3)
|
||||
|
||||
for _, s := range servers {
|
||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||
}
|
||||
|
||||
// Kill the v1 server
|
||||
s2.Shutdown()
|
||||
|
||||
for _, s := range []*Server{s1, s3} {
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
minVer, err := s.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
r.Fatal(err)
|
||||
}
|
||||
if got, want := minVer, 2; got != want {
|
||||
r.Fatalf("got min raft version %d want %d", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Replace the dead server with one running raft protocol v3
|
||||
s4 := testServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s4.Shutdown()
|
||||
testJoin(t, s4, s1)
|
||||
servers[1] = s4
|
||||
|
||||
// Make sure the dead server is removed and we're back to 3 total peers
|
||||
for _, s := range servers {
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
addrs := 0
|
||||
ids := 0
|
||||
future := s.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
r.Fatal(err)
|
||||
}
|
||||
for _, server := range future.Configuration().Servers {
|
||||
if string(server.ID) == string(server.Address) {
|
||||
addrs++
|
||||
} else {
|
||||
ids++
|
||||
}
|
||||
}
|
||||
if got, want := addrs, 2; got != want {
|
||||
r.Fatalf("got %d server addresses want %d", got, want)
|
||||
}
|
||||
if got, want := ids, 1; got != want {
|
||||
r.Fatalf("got %d server ids want %d", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
@@ -124,3 +125,161 @@ REMOVE:
|
||||
op.srv.logger.Printf("[WARN] nomad.operator: Removed Raft peer %q", args.Address)
|
||||
return nil
|
||||
}
|
||||
|
||||
// RaftRemovePeerByID is used to kick a stale peer (one that is in the Raft
|
||||
// quorum but no longer known to Serf or the catalog) by address in the form of
|
||||
// "IP:port". The reply argument is not used, but is required to fulfill the RPC
|
||||
// interface.
|
||||
func (op *Operator) RaftRemovePeerByID(args *structs.RaftPeerByIDRequest, reply *struct{}) error {
|
||||
if done, err := op.srv.forward("Operator.RaftRemovePeerByID", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check management permissions
|
||||
if aclObj, err := op.srv.ResolveToken(args.AuthToken); err != nil {
|
||||
return err
|
||||
} else if aclObj != nil && !aclObj.IsManagement() {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// Since this is an operation designed for humans to use, we will return
|
||||
// an error if the supplied id isn't among the peers since it's
|
||||
// likely they screwed up.
|
||||
var address raft.ServerAddress
|
||||
{
|
||||
future := op.srv.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, s := range future.Configuration().Servers {
|
||||
if s.ID == args.ID {
|
||||
address = s.Address
|
||||
goto REMOVE
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("id %q was not found in the Raft configuration",
|
||||
args.ID)
|
||||
}
|
||||
|
||||
REMOVE:
|
||||
// The Raft library itself will prevent various forms of foot-shooting,
|
||||
// like making a configuration with no voters. Some consideration was
|
||||
// given here to adding more checks, but it was decided to make this as
|
||||
// low-level and direct as possible. We've got ACL coverage to lock this
|
||||
// down, and if you are an operator, it's assumed you know what you are
|
||||
// doing if you are calling this. If you remove a peer that's known to
|
||||
// Serf, for example, it will come back when the leader does a reconcile
|
||||
// pass.
|
||||
minRaftProtocol, err := op.srv.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var future raft.Future
|
||||
if minRaftProtocol >= 2 {
|
||||
future = op.srv.raft.RemoveServer(args.ID, 0, 0)
|
||||
} else {
|
||||
future = op.srv.raft.RemovePeer(address)
|
||||
}
|
||||
if err := future.Error(); err != nil {
|
||||
op.srv.logger.Printf("[WARN] nomad.operator: Failed to remove Raft peer with id %q: %v",
|
||||
args.ID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
op.srv.logger.Printf("[WARN] nomad.operator: Removed Raft peer with id %q", args.ID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// AutopilotGetConfiguration is used to retrieve the current Autopilot configuration.
|
||||
func (op *Operator) AutopilotGetConfiguration(args *structs.GenericRequest, reply *autopilot.Config) error {
|
||||
if done, err := op.srv.forward("Operator.AutopilotGetConfiguration", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
// This action requires operator read access.
|
||||
rule, err := op.srv.ResolveToken(args.AuthToken)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if rule != nil && !rule.AllowOperatorRead() {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
state := op.srv.fsm.State()
|
||||
_, config, err := state.AutopilotConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if config == nil {
|
||||
return fmt.Errorf("autopilot config not initialized yet")
|
||||
}
|
||||
|
||||
*reply = *config
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AutopilotSetConfiguration is used to set the current Autopilot configuration.
|
||||
func (op *Operator) AutopilotSetConfiguration(args *structs.AutopilotSetConfigRequest, reply *bool) error {
|
||||
if done, err := op.srv.forward("Operator.AutopilotSetConfiguration", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
// This action requires operator write access.
|
||||
rule, err := op.srv.ResolveToken(args.AuthToken)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if rule != nil && !rule.AllowOperatorWrite() {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// Apply the update
|
||||
resp, _, err := op.srv.raftApply(structs.AutopilotRequestType, args)
|
||||
if err != nil {
|
||||
op.srv.logger.Printf("[ERR] nomad.operator: Apply failed: %v", err)
|
||||
return err
|
||||
}
|
||||
if respErr, ok := resp.(error); ok {
|
||||
return respErr
|
||||
}
|
||||
|
||||
// Check if the return type is a bool.
|
||||
if respBool, ok := resp.(bool); ok {
|
||||
*reply = respBool
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ServerHealth is used to get the current health of the servers.
|
||||
func (op *Operator) ServerHealth(args *structs.GenericRequest, reply *autopilot.OperatorHealthReply) error {
|
||||
// This must be sent to the leader, so we fix the args since we are
|
||||
// re-using a structure where we don't support all the options.
|
||||
args.AllowStale = false
|
||||
if done, err := op.srv.forward("Operator.ServerHealth", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
// This action requires operator read access.
|
||||
rule, err := op.srv.ResolveToken(args.AuthToken)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if rule != nil && !rule.AllowOperatorRead() {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// Exit early if the min Raft version is too low
|
||||
minRaftProtocol, err := op.srv.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting server raft protocol versions: %s", err)
|
||||
}
|
||||
if minRaftProtocol < 3 {
|
||||
return fmt.Errorf("all servers must have raft_protocol set to 3 or higher to use this endpoint")
|
||||
}
|
||||
|
||||
*reply = op.srv.autopilot.GetClusterHealth()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -225,3 +225,111 @@ func TestOperator_RaftRemovePeerByAddress_ACL(t *testing.T) {
|
||||
assert.Nil(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOperator_RaftRemovePeerByID(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, func(c *Config) {
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
// Try to remove a peer that's not there.
|
||||
arg := structs.RaftPeerByIDRequest{
|
||||
ID: raft.ServerID("e35bde83-4e9c-434f-a6ef-453f44ee21ea"),
|
||||
}
|
||||
arg.Region = s1.config.Region
|
||||
var reply struct{}
|
||||
err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByID", &arg, &reply)
|
||||
if err == nil || !strings.Contains(err.Error(), "not found in the Raft configuration") {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Add it manually to Raft.
|
||||
{
|
||||
future := s1.raft.AddVoter(arg.ID, raft.ServerAddress(fmt.Sprintf("127.0.0.1:%d", freeport.GetT(t, 1)[0])), 0, 0)
|
||||
if err := future.Error(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure it's there.
|
||||
{
|
||||
future := s1.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
configuration := future.Configuration()
|
||||
if len(configuration.Servers) != 2 {
|
||||
t.Fatalf("bad: %v", configuration)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove it, now it should go through.
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByID", &arg, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Make sure it's not there.
|
||||
{
|
||||
future := s1.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
configuration := future.Configuration()
|
||||
if len(configuration.Servers) != 1 {
|
||||
t.Fatalf("bad: %v", configuration)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOperator_RaftRemovePeerByID_ACL(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1, root := testACLServer(t, func(c *Config) {
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
assert := assert.New(t)
|
||||
state := s1.fsm.State()
|
||||
|
||||
// Create ACL token
|
||||
invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite))
|
||||
|
||||
arg := structs.RaftPeerByIDRequest{
|
||||
ID: raft.ServerID("e35bde83-4e9c-434f-a6ef-453f44ee21ea"),
|
||||
}
|
||||
arg.Region = s1.config.Region
|
||||
|
||||
// Add peer manually to Raft.
|
||||
{
|
||||
future := s1.raft.AddVoter(arg.ID, raft.ServerAddress(fmt.Sprintf("127.0.0.1:%d", freeport.GetT(t, 1)[0])), 0, 0)
|
||||
assert.Nil(future.Error())
|
||||
}
|
||||
|
||||
var reply struct{}
|
||||
|
||||
// Try with no token and expect permission denied
|
||||
{
|
||||
err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByID", &arg, &reply)
|
||||
assert.NotNil(err)
|
||||
assert.Equal(err.Error(), structs.ErrPermissionDenied.Error())
|
||||
}
|
||||
|
||||
// Try with an invalid token and expect permission denied
|
||||
{
|
||||
arg.AuthToken = invalidToken.SecretID
|
||||
err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByID", &arg, &reply)
|
||||
assert.NotNil(err)
|
||||
assert.Equal(err.Error(), structs.ErrPermissionDenied.Error())
|
||||
}
|
||||
|
||||
// Try with a management token
|
||||
{
|
||||
arg.AuthToken = root.SecretID
|
||||
err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByID", &arg, &reply)
|
||||
assert.Nil(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,7 +184,7 @@ func (s *Server) maybeBootstrap() {
|
||||
// Attempt a live bootstrap!
|
||||
var configuration raft.Configuration
|
||||
var addrs []string
|
||||
minRaftVersion, err := MinRaftProtocol(s.config.Region, members)
|
||||
minRaftVersion, err := s.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] nomad: Failed to read server raft versions: %v", err)
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
consulapi "github.com/hashicorp/consul/api"
|
||||
"github.com/hashicorp/consul/lib"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
@@ -100,6 +101,9 @@ type Server struct {
|
||||
raftInmem *raft.InmemStore
|
||||
raftTransport *raft.NetworkTransport
|
||||
|
||||
// autopilot is the Autopilot instance for this server.
|
||||
autopilot *autopilot.Autopilot
|
||||
|
||||
// fsm is the state machine used with Raft
|
||||
fsm *nomadFSM
|
||||
|
||||
@@ -171,6 +175,10 @@ type Server struct {
|
||||
leaderAcl string
|
||||
leaderAclLock sync.Mutex
|
||||
|
||||
// statsFetcher is used by autopilot to check the status of the other
|
||||
// Nomad router.
|
||||
statsFetcher *StatsFetcher
|
||||
|
||||
// EnterpriseState is used to fill in state for Pro/Ent builds
|
||||
EnterpriseState
|
||||
|
||||
@@ -271,6 +279,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg
|
||||
// Create the periodic dispatcher for launching periodic jobs.
|
||||
s.periodicDispatcher = NewPeriodicDispatch(s.logger, s)
|
||||
|
||||
// Initialize the stats fetcher that autopilot will use.
|
||||
s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Region)
|
||||
|
||||
// Setup Vault
|
||||
if err := s.setupVaultClient(); err != nil {
|
||||
s.Shutdown()
|
||||
@@ -346,6 +357,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg
|
||||
// Emit metrics
|
||||
go s.heartbeatStats()
|
||||
|
||||
// Start the server health checking.
|
||||
go s.autopilot.ServerHealthLoop(s.shutdownCh)
|
||||
|
||||
// Start enterprise background workers
|
||||
s.startEnterpriseBackground()
|
||||
|
||||
@@ -425,8 +439,6 @@ func (s *Server) Leave() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO (alexdadgar) - This will need to be updated before 0.8 release to
|
||||
// correctly handle using node IDs instead of address when raftProtocol = 3
|
||||
addr := s.raftTransport.LocalAddr()
|
||||
|
||||
// If we are the current leader, and we have any other peers (cluster has multiple
|
||||
@@ -435,9 +447,21 @@ func (s *Server) Leave() error {
|
||||
// for some sane period of time.
|
||||
isLeader := s.IsLeader()
|
||||
if isLeader && numPeers > 1 {
|
||||
future := s.raft.RemovePeer(addr)
|
||||
if err := future.Error(); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
|
||||
minRaftProtocol, err := s.autopilot.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 {
|
||||
future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0)
|
||||
if err := future.Error(); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
|
||||
}
|
||||
} else {
|
||||
future := s.raft.RemovePeer(addr)
|
||||
if err := future.Error(); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -777,6 +801,8 @@ func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error {
|
||||
}
|
||||
s.rpcListener = list
|
||||
|
||||
s.logger.Printf("[INFO] nomad: RPC listening on %q", s.rpcListener.Addr().String())
|
||||
|
||||
if s.config.RPCAdvertise != nil {
|
||||
s.rpcAdvertise = s.config.RPCAdvertise
|
||||
} else {
|
||||
@@ -935,8 +961,6 @@ func (s *Server) setupRaft() error {
|
||||
return err
|
||||
}
|
||||
if !hasState {
|
||||
// TODO (alexdadgar) - This will need to be updated when
|
||||
// we add support for node IDs.
|
||||
configuration := raft.Configuration{
|
||||
Servers: []raft.Server{
|
||||
{
|
||||
@@ -977,6 +1001,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (
|
||||
conf.Tags["build"] = s.config.Build
|
||||
conf.Tags["raft_vsn"] = fmt.Sprintf("%d", s.config.RaftConfig.ProtocolVersion)
|
||||
conf.Tags["id"] = s.config.NodeID
|
||||
conf.Tags["rpc_addr"] = s.rpcAdvertise.(*net.TCPAddr).IP.String()
|
||||
conf.Tags["port"] = fmt.Sprintf("%d", s.rpcAdvertise.(*net.TCPAddr).Port)
|
||||
if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
|
||||
conf.Tags["bootstrap"] = "1"
|
||||
@@ -985,6 +1010,9 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (
|
||||
if bootstrapExpect != 0 {
|
||||
conf.Tags["expect"] = fmt.Sprintf("%d", bootstrapExpect)
|
||||
}
|
||||
if s.config.NonVoter {
|
||||
conf.Tags["nonvoter"] = "1"
|
||||
}
|
||||
conf.MemberlistConfig.LogOutput = s.config.LogOutput
|
||||
conf.LogOutput = s.config.LogOutput
|
||||
conf.EventCh = ch
|
||||
|
||||
@@ -2,9 +2,15 @@
|
||||
|
||||
package nomad
|
||||
|
||||
import "github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
|
||||
type EnterpriseState struct{}
|
||||
|
||||
func (s *Server) setupEnterprise(config *Config) error {
|
||||
// Set up the OSS version of autopilot
|
||||
apDelegate := &AutopilotDelegate{s}
|
||||
s.autopilot = autopilot.NewAutopilot(s.logger, apDelegate, config.AutopilotInterval, config.ServerHealthInterval)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ func testACLServer(t *testing.T, cb func(*Config)) (*Server, *structs.ACLToken)
|
||||
func testServer(t *testing.T, cb func(*Config)) *Server {
|
||||
// Setup the default settings
|
||||
config := DefaultConfig()
|
||||
config.Build = "0.7.0+unittest"
|
||||
config.Build = "0.8.0+unittest"
|
||||
config.DevMode = true
|
||||
nodeNum := atomic.AddUint32(&nodeNumber, 1)
|
||||
config.NodeName = fmt.Sprintf("nomad-%03d", nodeNum)
|
||||
@@ -74,6 +74,11 @@ func testServer(t *testing.T, cb func(*Config)) *Server {
|
||||
config.RaftConfig.ElectionTimeout = 50 * time.Millisecond
|
||||
config.RaftTimeout = 500 * time.Millisecond
|
||||
|
||||
// Tighten the autopilot timing
|
||||
config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond
|
||||
config.ServerHealthInterval = 50 * time.Millisecond
|
||||
config.AutopilotInterval = 100 * time.Millisecond
|
||||
|
||||
// Disable Vault
|
||||
f := false
|
||||
config.VaultConfig.Enabled = &f
|
||||
|
||||
104
nomad/state/autopilot.go
Normal file
104
nomad/state/autopilot.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/go-memdb"
|
||||
)
|
||||
|
||||
// autopilotConfigTableSchema returns a new table schema used for storing
|
||||
// the autopilot configuration
|
||||
func autopilotConfigTableSchema() *memdb.TableSchema {
|
||||
return &memdb.TableSchema{
|
||||
Name: "autopilot-config",
|
||||
Indexes: map[string]*memdb.IndexSchema{
|
||||
"id": {
|
||||
Name: "id",
|
||||
AllowMissing: true,
|
||||
Unique: true,
|
||||
Indexer: &memdb.ConditionalIndex{
|
||||
Conditional: func(obj interface{}) (bool, error) { return true, nil },
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// AutopilotConfig is used to get the current Autopilot configuration.
|
||||
func (s *StateStore) AutopilotConfig() (uint64, *autopilot.Config, error) {
|
||||
tx := s.db.Txn(false)
|
||||
defer tx.Abort()
|
||||
|
||||
// Get the autopilot config
|
||||
c, err := tx.First("autopilot-config", "id")
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("failed autopilot config lookup: %s", err)
|
||||
}
|
||||
|
||||
config, ok := c.(*autopilot.Config)
|
||||
if !ok {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
return config.ModifyIndex, config, nil
|
||||
}
|
||||
|
||||
// AutopilotSetConfig is used to set the current Autopilot configuration.
|
||||
func (s *StateStore) AutopilotSetConfig(idx uint64, config *autopilot.Config) error {
|
||||
tx := s.db.Txn(true)
|
||||
defer tx.Abort()
|
||||
|
||||
s.autopilotSetConfigTxn(idx, tx, config)
|
||||
|
||||
tx.Commit()
|
||||
return nil
|
||||
}
|
||||
|
||||
// AutopilotCASConfig is used to try updating the Autopilot configuration with a
|
||||
// given Raft index. If the CAS index specified is not equal to the last observed index
|
||||
// for the config, then the call is a noop,
|
||||
func (s *StateStore) AutopilotCASConfig(idx, cidx uint64, config *autopilot.Config) (bool, error) {
|
||||
tx := s.db.Txn(true)
|
||||
defer tx.Abort()
|
||||
|
||||
// Check for an existing config
|
||||
existing, err := tx.First("autopilot-config", "id")
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed autopilot config lookup: %s", err)
|
||||
}
|
||||
|
||||
// If the existing index does not match the provided CAS
|
||||
// index arg, then we shouldn't update anything and can safely
|
||||
// return early here.
|
||||
e, ok := existing.(*autopilot.Config)
|
||||
if !ok || e.ModifyIndex != cidx {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
s.autopilotSetConfigTxn(idx, tx, config)
|
||||
|
||||
tx.Commit()
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (s *StateStore) autopilotSetConfigTxn(idx uint64, tx *memdb.Txn, config *autopilot.Config) error {
|
||||
// Check for an existing config
|
||||
existing, err := tx.First("autopilot-config", "id")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed autopilot config lookup: %s", err)
|
||||
}
|
||||
|
||||
// Set the indexes.
|
||||
if existing != nil {
|
||||
config.CreateIndex = existing.(*autopilot.Config).CreateIndex
|
||||
} else {
|
||||
config.CreateIndex = idx
|
||||
}
|
||||
config.ModifyIndex = idx
|
||||
|
||||
if err := tx.Insert("autopilot-config", config); err != nil {
|
||||
return fmt.Errorf("failed updating autopilot config: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
94
nomad/state/autopilot_test.go
Normal file
94
nomad/state/autopilot_test.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
)
|
||||
|
||||
func TestStateStore_Autopilot(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
expected := &autopilot.Config{
|
||||
CleanupDeadServers: true,
|
||||
LastContactThreshold: 5 * time.Second,
|
||||
MaxTrailingLogs: 500,
|
||||
ServerStabilizationTime: 100 * time.Second,
|
||||
RedundancyZoneTag: "az",
|
||||
DisableUpgradeMigration: true,
|
||||
UpgradeVersionTag: "build",
|
||||
}
|
||||
|
||||
if err := s.AutopilotSetConfig(0, expected); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
idx, config, err := s.AutopilotConfig()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if idx != 0 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
if !reflect.DeepEqual(expected, config) {
|
||||
t.Fatalf("bad: %#v, %#v", expected, config)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateStore_AutopilotCAS(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
expected := &autopilot.Config{
|
||||
CleanupDeadServers: true,
|
||||
}
|
||||
|
||||
if err := s.AutopilotSetConfig(0, expected); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := s.AutopilotSetConfig(1, expected); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Do a CAS with an index lower than the entry
|
||||
ok, err := s.AutopilotCASConfig(2, 0, &autopilot.Config{
|
||||
CleanupDeadServers: false,
|
||||
})
|
||||
if ok || err != nil {
|
||||
t.Fatalf("expected (false, nil), got: (%v, %#v)", ok, err)
|
||||
}
|
||||
|
||||
// Check that the index is untouched and the entry
|
||||
// has not been updated.
|
||||
idx, config, err := s.AutopilotConfig()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if idx != 1 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
if !config.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", config)
|
||||
}
|
||||
|
||||
// Do another CAS, this time with the correct index
|
||||
ok, err = s.AutopilotCASConfig(2, 1, &autopilot.Config{
|
||||
CleanupDeadServers: false,
|
||||
})
|
||||
if !ok || err != nil {
|
||||
t.Fatalf("expected (true, nil), got: (%v, %#v)", ok, err)
|
||||
}
|
||||
|
||||
// Make sure the config was updated
|
||||
idx, config, err = s.AutopilotConfig()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if idx != 2 {
|
||||
t.Fatalf("bad: %d", idx)
|
||||
}
|
||||
if config.CleanupDeadServers {
|
||||
t.Fatalf("bad: %#v", config)
|
||||
}
|
||||
}
|
||||
@@ -43,6 +43,7 @@ func init() {
|
||||
vaultAccessorTableSchema,
|
||||
aclPolicyTableSchema,
|
||||
aclTokenTableSchema,
|
||||
autopilotConfigTableSchema,
|
||||
}...)
|
||||
}
|
||||
|
||||
|
||||
103
nomad/stats_fetcher.go
Normal file
103
nomad/stats_fetcher.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package nomad
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
// StatsFetcher has two functions for autopilot. First, lets us fetch all the
|
||||
// stats in parallel so we are taking a sample as close to the same time as
|
||||
// possible, since we are comparing time-sensitive info for the health check.
|
||||
// Second, it bounds the time so that one slow RPC can't hold up the health
|
||||
// check loop; as a side effect of how it implements this, it also limits to
|
||||
// a single in-flight RPC to any given server, so goroutines don't accumulate
|
||||
// as we run the health check fairly frequently.
|
||||
type StatsFetcher struct {
|
||||
logger *log.Logger
|
||||
pool *ConnPool
|
||||
region string
|
||||
inflight map[string]struct{}
|
||||
inflightLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewStatsFetcher returns a stats fetcher.
|
||||
func NewStatsFetcher(logger *log.Logger, pool *ConnPool, region string) *StatsFetcher {
|
||||
return &StatsFetcher{
|
||||
logger: logger,
|
||||
pool: pool,
|
||||
region: region,
|
||||
inflight: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// fetch does the RPC to fetch the server stats from a single server. We don't
|
||||
// cancel this when the context is canceled because we only want one in-flight
|
||||
// RPC to each server, so we let it finish and then clean up the in-flight
|
||||
// tracking.
|
||||
func (f *StatsFetcher) fetch(server *serverParts, replyCh chan *autopilot.ServerStats) {
|
||||
var args struct{}
|
||||
var reply autopilot.ServerStats
|
||||
err := f.pool.RPC(f.region, server.RPCAddr, server.MajorVersion, "Status.RaftStats", &args, &reply)
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] nomad: error getting server health from %q: %v",
|
||||
server.Name, err)
|
||||
} else {
|
||||
replyCh <- &reply
|
||||
}
|
||||
|
||||
f.inflightLock.Lock()
|
||||
delete(f.inflight, server.ID)
|
||||
f.inflightLock.Unlock()
|
||||
}
|
||||
|
||||
// Fetch will attempt to query all the servers in parallel.
|
||||
func (f *StatsFetcher) Fetch(ctx context.Context, members []serf.Member) map[string]*autopilot.ServerStats {
|
||||
type workItem struct {
|
||||
server *serverParts
|
||||
replyCh chan *autopilot.ServerStats
|
||||
}
|
||||
var servers []*serverParts
|
||||
for _, s := range members {
|
||||
if ok, parts := isNomadServer(s); ok {
|
||||
servers = append(servers, parts)
|
||||
}
|
||||
}
|
||||
|
||||
// Skip any servers that have inflight requests.
|
||||
var work []*workItem
|
||||
f.inflightLock.Lock()
|
||||
for _, server := range servers {
|
||||
if _, ok := f.inflight[server.ID]; ok {
|
||||
f.logger.Printf("[WARN] nomad: error getting server health from %q: last request still outstanding",
|
||||
server.Name)
|
||||
} else {
|
||||
workItem := &workItem{
|
||||
server: server,
|
||||
replyCh: make(chan *autopilot.ServerStats, 1),
|
||||
}
|
||||
work = append(work, workItem)
|
||||
f.inflight[server.ID] = struct{}{}
|
||||
go f.fetch(workItem.server, workItem.replyCh)
|
||||
}
|
||||
}
|
||||
f.inflightLock.Unlock()
|
||||
|
||||
// Now wait for the results to come in, or for the context to be
|
||||
// canceled.
|
||||
replies := make(map[string]*autopilot.ServerStats)
|
||||
for _, workItem := range work {
|
||||
select {
|
||||
case reply := <-workItem.replyCh:
|
||||
replies[workItem.server.ID] = reply
|
||||
|
||||
case <-ctx.Done():
|
||||
f.logger.Printf("[WARN] nomad: error getting server health from %q: %v",
|
||||
workItem.server.Name, ctx.Err())
|
||||
}
|
||||
}
|
||||
return replies
|
||||
}
|
||||
95
nomad/stats_fetcher_test.go
Normal file
95
nomad/stats_fetcher_test.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package nomad
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
)
|
||||
|
||||
func TestStatsFetcher(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
conf := func(c *Config) {
|
||||
c.Region = "region-a"
|
||||
c.DevDisableBootstrap = true
|
||||
c.BootstrapExpect = 3
|
||||
}
|
||||
|
||||
s1 := testServer(t, conf)
|
||||
defer s1.Shutdown()
|
||||
|
||||
s2 := testServer(t, conf)
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := testServer(t, conf)
|
||||
defer s3.Shutdown()
|
||||
|
||||
testJoin(t, s1, s2, s3)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
members := s1.serf.Members()
|
||||
if len(members) != 3 {
|
||||
t.Fatalf("bad len: %d", len(members))
|
||||
}
|
||||
|
||||
var servers []*serverParts
|
||||
for _, member := range members {
|
||||
ok, server := isNomadServer(member)
|
||||
if !ok {
|
||||
t.Fatalf("bad: %#v", member)
|
||||
}
|
||||
servers = append(servers, server)
|
||||
}
|
||||
|
||||
// Do a normal fetch and make sure we get three responses.
|
||||
func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
stats := s1.statsFetcher.Fetch(ctx, s1.Members())
|
||||
if len(stats) != 3 {
|
||||
t.Fatalf("bad: %#v", stats)
|
||||
}
|
||||
for id, stat := range stats {
|
||||
switch id {
|
||||
case s1.config.NodeID, s2.config.NodeID, s3.config.NodeID:
|
||||
// OK
|
||||
default:
|
||||
t.Fatalf("bad: %s", id)
|
||||
}
|
||||
|
||||
if stat == nil || stat.LastTerm == 0 {
|
||||
t.Fatalf("bad: %#v", stat)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Fake an in-flight request to server 3 and make sure we don't fetch
|
||||
// from it.
|
||||
func() {
|
||||
s1.statsFetcher.inflight[string(s3.config.NodeID)] = struct{}{}
|
||||
defer delete(s1.statsFetcher.inflight, string(s3.config.NodeID))
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
stats := s1.statsFetcher.Fetch(ctx, s1.Members())
|
||||
if len(stats) != 2 {
|
||||
t.Fatalf("bad: %#v", stats)
|
||||
}
|
||||
for id, stat := range stats {
|
||||
switch id {
|
||||
case s1.config.NodeID, s2.config.NodeID:
|
||||
// OK
|
||||
case s3.config.NodeID:
|
||||
t.Fatalf("bad")
|
||||
default:
|
||||
t.Fatalf("bad: %s", id)
|
||||
}
|
||||
|
||||
if stat == nil || stat.LastTerm == 0 {
|
||||
t.Fatalf("bad: %#v", stat)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -1,6 +1,10 @@
|
||||
package nomad
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
@@ -104,3 +108,21 @@ func (s *Status) Members(args *structs.GenericRequest, reply *structs.ServerMemb
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used by Autopilot to query the raft stats of the local server.
|
||||
func (s *Status) RaftStats(args struct{}, reply *autopilot.ServerStats) error {
|
||||
stats := s.srv.raft.Stats()
|
||||
|
||||
var err error
|
||||
reply.LastContact = stats["last_contact"]
|
||||
reply.LastIndex, err = strconv.ParseUint(stats["last_log_index"], 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing server's last_log_index value: %s", err)
|
||||
}
|
||||
reply.LastTerm, err = strconv.ParseUint(stats["last_log_term"], 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing server's last_log_term value: %s", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
98
nomad/structs/config/autopilot.go
Normal file
98
nomad/structs/config/autopilot.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
)
|
||||
|
||||
type AutopilotConfig struct {
|
||||
// CleanupDeadServers controls whether to remove dead servers when a new
|
||||
// server is added to the Raft peers.
|
||||
CleanupDeadServers *bool `mapstructure:"cleanup_dead_servers"`
|
||||
|
||||
// ServerStabilizationTime is the minimum amount of time a server must be
|
||||
// in a stable, healthy state before it can be added to the cluster. Only
|
||||
// applicable with Raft protocol version 3 or higher.
|
||||
ServerStabilizationTime time.Duration `mapstructure:"server_stabilization_time"`
|
||||
|
||||
// LastContactThreshold is the limit on the amount of time a server can go
|
||||
// without leader contact before being considered unhealthy.
|
||||
LastContactThreshold time.Duration `mapstructure:"last_contact_threshold"`
|
||||
|
||||
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
|
||||
// be behind before being considered unhealthy.
|
||||
MaxTrailingLogs int `mapstructure:"max_trailing_logs"`
|
||||
|
||||
// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
|
||||
// servers into zones for redundancy. If left blank, this feature will be disabled.
|
||||
RedundancyZoneTag string `mapstructure:"redundancy_zone_tag"`
|
||||
|
||||
// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
|
||||
// strategy of waiting until enough newer-versioned servers have been added to the
|
||||
// cluster before promoting them to voters.
|
||||
DisableUpgradeMigration *bool `mapstructure:"disable_upgrade_migration"`
|
||||
|
||||
// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
|
||||
// performing upgrade migrations. If left blank, the Nomad version will be used.
|
||||
UpgradeVersionTag string `mapstructure:"upgrade_version_tag"`
|
||||
}
|
||||
|
||||
// DefaultAutopilotConfig() returns the canonical defaults for the Nomad
|
||||
// `autopilot` configuration.
|
||||
func DefaultAutopilotConfig() *AutopilotConfig {
|
||||
return &AutopilotConfig{
|
||||
CleanupDeadServers: helper.BoolToPtr(true),
|
||||
LastContactThreshold: 200 * time.Millisecond,
|
||||
MaxTrailingLogs: 250,
|
||||
ServerStabilizationTime: 10 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *AutopilotConfig) Merge(b *AutopilotConfig) *AutopilotConfig {
|
||||
result := a.Copy()
|
||||
|
||||
if b.CleanupDeadServers != nil {
|
||||
result.CleanupDeadServers = helper.BoolToPtr(*b.CleanupDeadServers)
|
||||
}
|
||||
if b.ServerStabilizationTime != 0 {
|
||||
result.ServerStabilizationTime = b.ServerStabilizationTime
|
||||
}
|
||||
if b.LastContactThreshold != 0 {
|
||||
result.LastContactThreshold = b.LastContactThreshold
|
||||
}
|
||||
if b.MaxTrailingLogs != 0 {
|
||||
result.MaxTrailingLogs = b.MaxTrailingLogs
|
||||
}
|
||||
if b.RedundancyZoneTag != "" {
|
||||
result.RedundancyZoneTag = b.RedundancyZoneTag
|
||||
}
|
||||
if b.DisableUpgradeMigration != nil {
|
||||
result.DisableUpgradeMigration = helper.BoolToPtr(*b.DisableUpgradeMigration)
|
||||
}
|
||||
if b.UpgradeVersionTag != "" {
|
||||
result.UpgradeVersionTag = b.UpgradeVersionTag
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Copy returns a copy of this Autopilot config.
|
||||
func (a *AutopilotConfig) Copy() *AutopilotConfig {
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
nc := new(AutopilotConfig)
|
||||
*nc = *a
|
||||
|
||||
// Copy the bools
|
||||
if a.CleanupDeadServers != nil {
|
||||
nc.CleanupDeadServers = helper.BoolToPtr(*a.CleanupDeadServers)
|
||||
}
|
||||
if a.DisableUpgradeMigration != nil {
|
||||
nc.DisableUpgradeMigration = helper.BoolToPtr(*a.DisableUpgradeMigration)
|
||||
}
|
||||
|
||||
return nc
|
||||
}
|
||||
46
nomad/structs/config/autopilot_test.go
Normal file
46
nomad/structs/config/autopilot_test.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestAutopilotConfig_Merge(t *testing.T) {
|
||||
trueValue, falseValue := true, false
|
||||
|
||||
c1 := &AutopilotConfig{
|
||||
CleanupDeadServers: &falseValue,
|
||||
ServerStabilizationTime: 1 * time.Second,
|
||||
LastContactThreshold: 1 * time.Second,
|
||||
MaxTrailingLogs: 1,
|
||||
RedundancyZoneTag: "1",
|
||||
DisableUpgradeMigration: &falseValue,
|
||||
UpgradeVersionTag: "1",
|
||||
}
|
||||
|
||||
c2 := &AutopilotConfig{
|
||||
CleanupDeadServers: &trueValue,
|
||||
ServerStabilizationTime: 2 * time.Second,
|
||||
LastContactThreshold: 2 * time.Second,
|
||||
MaxTrailingLogs: 2,
|
||||
RedundancyZoneTag: "2",
|
||||
DisableUpgradeMigration: nil,
|
||||
UpgradeVersionTag: "2",
|
||||
}
|
||||
|
||||
e := &AutopilotConfig{
|
||||
CleanupDeadServers: &trueValue,
|
||||
ServerStabilizationTime: 2 * time.Second,
|
||||
LastContactThreshold: 2 * time.Second,
|
||||
MaxTrailingLogs: 2,
|
||||
RedundancyZoneTag: "2",
|
||||
DisableUpgradeMigration: &falseValue,
|
||||
UpgradeVersionTag: "2",
|
||||
}
|
||||
|
||||
result := c1.Merge(c2)
|
||||
if !reflect.DeepEqual(result, e) {
|
||||
t.Fatalf("bad:\n%#v\n%#v", result, e)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package structs
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/consul/agent/consul/autopilot"
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
@@ -50,3 +51,34 @@ type RaftPeerByAddressRequest struct {
|
||||
// WriteRequest holds the Region for this request.
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// RaftPeerByIDRequest is used by the Operator endpoint to apply a Raft
|
||||
// operation on a specific Raft peer by ID.
|
||||
type RaftPeerByIDRequest struct {
|
||||
// ID is the peer ID to remove.
|
||||
ID raft.ServerID
|
||||
|
||||
// WriteRequest holds the Region for this request.
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// AutopilotSetConfigRequest is used by the Operator endpoint to update the
|
||||
// current Autopilot configuration of the cluster.
|
||||
type AutopilotSetConfigRequest struct {
|
||||
// Datacenter is the target this request is intended for.
|
||||
Datacenter string
|
||||
|
||||
// Config is the new Autopilot configuration to use.
|
||||
Config autopilot.Config
|
||||
|
||||
// CAS controls whether to use check-and-set semantics for this request.
|
||||
CAS bool
|
||||
|
||||
// WriteRequest holds the ACL token to go along with this request.
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given request.
|
||||
func (op *AutopilotSetConfigRequest) RequestDatacenter() string {
|
||||
return op.Datacenter
|
||||
}
|
||||
|
||||
@@ -78,6 +78,7 @@ const (
|
||||
ACLTokenUpsertRequestType
|
||||
ACLTokenDeleteRequestType
|
||||
ACLTokenBootstrapRequestType
|
||||
AutopilotRequestType
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
@@ -46,7 +46,9 @@ type serverParts struct {
|
||||
MinorVersion int
|
||||
Build version.Version
|
||||
RaftVersion int
|
||||
NonVoter bool
|
||||
Addr net.Addr
|
||||
RPCAddr net.Addr
|
||||
Status serf.MemberStatus
|
||||
}
|
||||
|
||||
@@ -69,24 +71,31 @@ func isNomadServer(m serf.Member) (bool, *serverParts) {
|
||||
region := m.Tags["region"]
|
||||
datacenter := m.Tags["dc"]
|
||||
_, bootstrap := m.Tags["bootstrap"]
|
||||
_, nonVoter := m.Tags["nonvoter"]
|
||||
|
||||
expect := 0
|
||||
expect_str, ok := m.Tags["expect"]
|
||||
expectStr, ok := m.Tags["expect"]
|
||||
var err error
|
||||
if ok {
|
||||
expect, err = strconv.Atoi(expect_str)
|
||||
expect, err = strconv.Atoi(expectStr)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
port_str := m.Tags["port"]
|
||||
port, err := strconv.Atoi(port_str)
|
||||
// If the server is missing the rpc_addr tag, default to the serf advertise addr
|
||||
rpcIP := net.ParseIP(m.Tags["rpc_addr"])
|
||||
if rpcIP == nil {
|
||||
rpcIP = m.Addr
|
||||
}
|
||||
|
||||
portStr := m.Tags["port"]
|
||||
port, err := strconv.Atoi(portStr)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
build_version, err := version.NewVersion(m.Tags["build"])
|
||||
buildVersion, err := version.NewVersion(m.Tags["build"])
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
@@ -106,16 +115,17 @@ func isNomadServer(m serf.Member) (bool, *serverParts) {
|
||||
minorVersion = 0
|
||||
}
|
||||
|
||||
raft_vsn := 0
|
||||
raft_vsn_str, ok := m.Tags["raft_vsn"]
|
||||
raftVsn := 0
|
||||
raftVsnString, ok := m.Tags["raft_vsn"]
|
||||
if ok {
|
||||
raft_vsn, err = strconv.Atoi(raft_vsn_str)
|
||||
raftVsn, err = strconv.Atoi(raftVsnString)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
addr := &net.TCPAddr{IP: m.Addr, Port: port}
|
||||
rpcAddr := &net.TCPAddr{IP: rpcIP, Port: port}
|
||||
parts := &serverParts{
|
||||
Name: m.Name,
|
||||
ID: id,
|
||||
@@ -125,10 +135,12 @@ func isNomadServer(m serf.Member) (bool, *serverParts) {
|
||||
Bootstrap: bootstrap,
|
||||
Expect: expect,
|
||||
Addr: addr,
|
||||
RPCAddr: rpcAddr,
|
||||
MajorVersion: majorVersion,
|
||||
MinorVersion: minorVersion,
|
||||
Build: *build_version,
|
||||
RaftVersion: raft_vsn,
|
||||
Build: *buildVersion,
|
||||
RaftVersion: raftVsn,
|
||||
NonVoter: nonVoter,
|
||||
Status: m.Status,
|
||||
}
|
||||
return true, parts
|
||||
@@ -139,7 +151,10 @@ func isNomadServer(m serf.Member) (bool, *serverParts) {
|
||||
func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Version) bool {
|
||||
for _, member := range members {
|
||||
if valid, parts := isNomadServer(member); valid && parts.Status == serf.StatusAlive {
|
||||
if parts.Build.LessThan(minVersion) {
|
||||
// Check if the versions match - version.LessThan will return true for
|
||||
// 0.8.0-rc1 < 0.8.0, so we want to ignore the metadata
|
||||
versionsMatch := slicesMatch(minVersion.Segments(), parts.Build.Segments())
|
||||
if parts.Build.LessThan(minVersion) && !versionsMatch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -148,34 +163,26 @@ func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Versio
|
||||
return true
|
||||
}
|
||||
|
||||
// MinRaftProtocol returns the lowest supported Raft protocol among alive servers
|
||||
// in the given region.
|
||||
func MinRaftProtocol(region string, members []serf.Member) (int, error) {
|
||||
minVersion := -1
|
||||
for _, m := range members {
|
||||
if m.Tags["role"] != "nomad" || m.Tags["region"] != region || m.Status != serf.StatusAlive {
|
||||
continue
|
||||
}
|
||||
func slicesMatch(a, b []int) bool {
|
||||
if a == nil && b == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
vsn, ok := m.Tags["raft_vsn"]
|
||||
if !ok {
|
||||
vsn = "1"
|
||||
}
|
||||
raftVsn, err := strconv.Atoi(vsn)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if a == nil || b == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if minVersion == -1 || raftVsn < minVersion {
|
||||
minVersion = raftVsn
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if minVersion == -1 {
|
||||
return minVersion, fmt.Errorf("no servers found")
|
||||
}
|
||||
|
||||
return minVersion, nil
|
||||
return true
|
||||
}
|
||||
|
||||
// shuffleStrings randomly shuffles the list of strings
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package nomad
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"reflect"
|
||||
"testing"
|
||||
@@ -18,12 +17,15 @@ func TestIsNomadServer(t *testing.T) {
|
||||
Addr: net.IP([]byte{127, 0, 0, 1}),
|
||||
Status: serf.StatusAlive,
|
||||
Tags: map[string]string{
|
||||
"role": "nomad",
|
||||
"region": "aws",
|
||||
"dc": "east-aws",
|
||||
"port": "10000",
|
||||
"vsn": "1",
|
||||
"build": "0.7.0+ent",
|
||||
"role": "nomad",
|
||||
"region": "aws",
|
||||
"dc": "east-aws",
|
||||
"rpc_addr": "1.1.1.1",
|
||||
"port": "10000",
|
||||
"vsn": "1",
|
||||
"raft_vsn": "2",
|
||||
"nonvoter": "1",
|
||||
"build": "0.7.0+ent",
|
||||
},
|
||||
}
|
||||
valid, parts := isNomadServer(m)
|
||||
@@ -43,6 +45,15 @@ func TestIsNomadServer(t *testing.T) {
|
||||
if parts.Status != serf.StatusAlive {
|
||||
t.Fatalf("bad: %v", parts.Status)
|
||||
}
|
||||
if parts.RaftVersion != 2 {
|
||||
t.Fatalf("bad: %v", parts.RaftVersion)
|
||||
}
|
||||
if parts.RPCAddr.String() != "1.1.1.1:10000" {
|
||||
t.Fatalf("bad: %v", parts.RPCAddr.String())
|
||||
}
|
||||
if !parts.NonVoter {
|
||||
t.Fatalf("bad: %v", parts.NonVoter)
|
||||
}
|
||||
if seg := parts.Build.Segments(); len(seg) != 3 {
|
||||
t.Fatalf("bad: %v", parts.Build)
|
||||
} else if seg[0] != 0 && seg[1] != 7 && seg[2] != 0 {
|
||||
@@ -152,105 +163,6 @@ func TestServersMeetMinimumVersion(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMinRaftProtocol(t *testing.T) {
|
||||
t.Parallel()
|
||||
makeMember := func(version, region string) serf.Member {
|
||||
return serf.Member{
|
||||
Name: "foo",
|
||||
Addr: net.IP([]byte{127, 0, 0, 1}),
|
||||
Tags: map[string]string{
|
||||
"role": "nomad",
|
||||
"region": region,
|
||||
"dc": "dc1",
|
||||
"port": "10000",
|
||||
"vsn": "1",
|
||||
"raft_vsn": version,
|
||||
},
|
||||
Status: serf.StatusAlive,
|
||||
}
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
members []serf.Member
|
||||
region string
|
||||
expected int
|
||||
err error
|
||||
}{
|
||||
// No servers, error
|
||||
{
|
||||
members: []serf.Member{},
|
||||
expected: -1,
|
||||
err: errors.New("no servers found"),
|
||||
},
|
||||
// One server
|
||||
{
|
||||
members: []serf.Member{
|
||||
makeMember("1", "global"),
|
||||
},
|
||||
region: "global",
|
||||
expected: 1,
|
||||
},
|
||||
// One server, bad version formatting
|
||||
{
|
||||
members: []serf.Member{
|
||||
makeMember("asdf", "global"),
|
||||
},
|
||||
region: "global",
|
||||
expected: -1,
|
||||
err: errors.New(`strconv.Atoi: parsing "asdf": invalid syntax`),
|
||||
},
|
||||
// One server, wrong datacenter
|
||||
{
|
||||
members: []serf.Member{
|
||||
makeMember("1", "global"),
|
||||
},
|
||||
region: "nope",
|
||||
expected: -1,
|
||||
err: errors.New("no servers found"),
|
||||
},
|
||||
// Multiple servers, different versions
|
||||
{
|
||||
members: []serf.Member{
|
||||
makeMember("1", "global"),
|
||||
makeMember("2", "global"),
|
||||
},
|
||||
region: "global",
|
||||
expected: 1,
|
||||
},
|
||||
// Multiple servers, same version
|
||||
{
|
||||
members: []serf.Member{
|
||||
makeMember("2", "global"),
|
||||
makeMember("2", "global"),
|
||||
},
|
||||
region: "global",
|
||||
expected: 2,
|
||||
},
|
||||
// Multiple servers, multiple datacenters
|
||||
{
|
||||
members: []serf.Member{
|
||||
makeMember("3", "r1"),
|
||||
makeMember("2", "r1"),
|
||||
makeMember("1", "r2"),
|
||||
},
|
||||
region: "r1",
|
||||
expected: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
result, err := MinRaftProtocol(tc.region, tc.members)
|
||||
if result != tc.expected {
|
||||
t.Fatalf("bad: %v, %v, %v", result, tc.expected, tc)
|
||||
}
|
||||
if tc.err != nil {
|
||||
if err == nil || tc.err.Error() != err.Error() {
|
||||
t.Fatalf("bad: %v, %v, %v", err, tc.err, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestShuffleStrings(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Generate input
|
||||
|
||||
@@ -62,6 +62,7 @@ type PortsConfig struct {
|
||||
type ServerConfig struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
BootstrapExpect int `json:"bootstrap_expect"`
|
||||
RaftProtocol int `json:"raft_protocol,omitempty"`
|
||||
}
|
||||
|
||||
// ClientConfig is used to configure the client
|
||||
|
||||
491
vendor/github.com/hashicorp/consul/agent/consul/autopilot/autopilot.go
generated
vendored
Normal file
491
vendor/github.com/hashicorp/consul/agent/consul/autopilot/autopilot.go
generated
vendored
Normal file
@@ -0,0 +1,491 @@
|
||||
package autopilot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
// Delegate is the interface for the Autopilot mechanism
|
||||
type Delegate interface {
|
||||
AutopilotConfig() *Config
|
||||
FetchStats(context.Context, []serf.Member) map[string]*ServerStats
|
||||
IsServer(serf.Member) (*ServerInfo, error)
|
||||
NotifyHealth(OperatorHealthReply)
|
||||
PromoteNonVoters(*Config, OperatorHealthReply) ([]raft.Server, error)
|
||||
Raft() *raft.Raft
|
||||
Serf() *serf.Serf
|
||||
}
|
||||
|
||||
// Autopilot is a mechanism for automatically managing the Raft
|
||||
// quorum using server health information along with updates from Serf gossip.
|
||||
// For more information, see https://www.consul.io/docs/guides/autopilot.html
|
||||
type Autopilot struct {
|
||||
logger *log.Logger
|
||||
delegate Delegate
|
||||
|
||||
interval time.Duration
|
||||
healthInterval time.Duration
|
||||
|
||||
clusterHealth OperatorHealthReply
|
||||
clusterHealthLock sync.RWMutex
|
||||
|
||||
removeDeadCh chan struct{}
|
||||
shutdownCh chan struct{}
|
||||
waitGroup sync.WaitGroup
|
||||
}
|
||||
|
||||
type ServerInfo struct {
|
||||
Name string
|
||||
ID string
|
||||
Addr net.Addr
|
||||
Build version.Version
|
||||
Status serf.MemberStatus
|
||||
}
|
||||
|
||||
func NewAutopilot(logger *log.Logger, delegate Delegate, interval, healthInterval time.Duration) *Autopilot {
|
||||
return &Autopilot{
|
||||
logger: logger,
|
||||
delegate: delegate,
|
||||
interval: interval,
|
||||
healthInterval: healthInterval,
|
||||
removeDeadCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Autopilot) Start() {
|
||||
a.shutdownCh = make(chan struct{})
|
||||
a.waitGroup = sync.WaitGroup{}
|
||||
a.waitGroup.Add(1)
|
||||
|
||||
go a.run()
|
||||
}
|
||||
|
||||
func (a *Autopilot) Stop() {
|
||||
close(a.shutdownCh)
|
||||
a.waitGroup.Wait()
|
||||
}
|
||||
|
||||
// run periodically looks for nonvoting servers to promote and dead servers to remove.
|
||||
func (a *Autopilot) run() {
|
||||
defer a.waitGroup.Done()
|
||||
|
||||
// Monitor server health until shutdown
|
||||
ticker := time.NewTicker(a.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-a.shutdownCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := a.promoteServers(); err != nil {
|
||||
a.logger.Printf("[ERR] autopilot: Error promoting servers: %v", err)
|
||||
}
|
||||
|
||||
if err := a.pruneDeadServers(); err != nil {
|
||||
a.logger.Printf("[ERR] autopilot: Error checking for dead servers to remove: %s", err)
|
||||
}
|
||||
case <-a.removeDeadCh:
|
||||
if err := a.pruneDeadServers(); err != nil {
|
||||
a.logger.Printf("[ERR] autopilot: Error checking for dead servers to remove: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// promoteServers asks the delegate for any promotions and carries them out.
|
||||
func (a *Autopilot) promoteServers() error {
|
||||
conf := a.delegate.AutopilotConfig()
|
||||
if conf == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip the non-voter promotions unless all servers support the new APIs
|
||||
minRaftProtocol, err := a.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting server raft protocol versions: %s", err)
|
||||
}
|
||||
if minRaftProtocol >= 3 {
|
||||
promotions, err := a.delegate.PromoteNonVoters(conf, a.GetClusterHealth())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking for non-voters to promote: %s", err)
|
||||
}
|
||||
if err := a.handlePromotions(promotions); err != nil {
|
||||
return fmt.Errorf("error handling promotions: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// fmtServer prints info about a server in a standard way for logging.
|
||||
func fmtServer(server raft.Server) string {
|
||||
return fmt.Sprintf("Server (ID: %q Address: %q)", server.ID, server.Address)
|
||||
}
|
||||
|
||||
// NumPeers counts the number of voting peers in the given raft config.
|
||||
func NumPeers(raftConfig raft.Configuration) int {
|
||||
var numPeers int
|
||||
for _, server := range raftConfig.Servers {
|
||||
if server.Suffrage == raft.Voter {
|
||||
numPeers++
|
||||
}
|
||||
}
|
||||
return numPeers
|
||||
}
|
||||
|
||||
// RemoveDeadServers triggers a pruning of dead servers in a non-blocking way.
|
||||
func (a *Autopilot) RemoveDeadServers() {
|
||||
select {
|
||||
case a.removeDeadCh <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// pruneDeadServers removes up to numPeers/2 failed servers
|
||||
func (a *Autopilot) pruneDeadServers() error {
|
||||
conf := a.delegate.AutopilotConfig()
|
||||
if conf == nil || !conf.CleanupDeadServers {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Failed servers are known to Serf and marked failed, and stale servers
|
||||
// are known to Raft but not Serf.
|
||||
var failed []string
|
||||
staleRaftServers := make(map[string]raft.Server)
|
||||
raftNode := a.delegate.Raft()
|
||||
future := raftNode.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
raftConfig := future.Configuration()
|
||||
for _, server := range raftConfig.Servers {
|
||||
staleRaftServers[string(server.Address)] = server
|
||||
}
|
||||
|
||||
serfLAN := a.delegate.Serf()
|
||||
for _, member := range serfLAN.Members() {
|
||||
server, err := a.delegate.IsServer(member)
|
||||
if err != nil {
|
||||
a.logger.Printf("[INFO] autopilot: Error parsing server info for %q: %s", member.Name, err)
|
||||
continue
|
||||
}
|
||||
if server != nil {
|
||||
// todo(kyhavlov): change this to index by UUID
|
||||
if _, ok := staleRaftServers[server.Addr.String()]; ok {
|
||||
delete(staleRaftServers, server.Addr.String())
|
||||
}
|
||||
|
||||
if member.Status == serf.StatusFailed {
|
||||
failed = append(failed, member.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We can bail early if there's nothing to do.
|
||||
removalCount := len(failed) + len(staleRaftServers)
|
||||
if removalCount == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Only do removals if a minority of servers will be affected.
|
||||
peers := NumPeers(raftConfig)
|
||||
if removalCount < peers/2 {
|
||||
for _, node := range failed {
|
||||
a.logger.Printf("[INFO] autopilot: Attempting removal of failed server node %q", node)
|
||||
go serfLAN.RemoveFailedNode(node)
|
||||
}
|
||||
|
||||
minRaftProtocol, err := a.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, raftServer := range staleRaftServers {
|
||||
a.logger.Printf("[INFO] autopilot: Attempting removal of stale %s", fmtServer(raftServer))
|
||||
var future raft.Future
|
||||
if minRaftProtocol >= 2 {
|
||||
future = raftNode.RemoveServer(raftServer.ID, 0, 0)
|
||||
} else {
|
||||
future = raftNode.RemovePeer(raftServer.Address)
|
||||
}
|
||||
if err := future.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
a.logger.Printf("[DEBUG] autopilot: Failed to remove dead servers: too many dead servers: %d/%d", removalCount, peers)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MinRaftProtocol returns the lowest supported Raft protocol among alive servers
|
||||
func (a *Autopilot) MinRaftProtocol() (int, error) {
|
||||
return minRaftProtocol(a.delegate.Serf().Members(), a.delegate.IsServer)
|
||||
}
|
||||
|
||||
func minRaftProtocol(members []serf.Member, serverFunc func(serf.Member) (*ServerInfo, error)) (int, error) {
|
||||
minVersion := -1
|
||||
for _, m := range members {
|
||||
if m.Status != serf.StatusAlive {
|
||||
continue
|
||||
}
|
||||
|
||||
server, err := serverFunc(m)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if server == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
vsn, ok := m.Tags["raft_vsn"]
|
||||
if !ok {
|
||||
vsn = "1"
|
||||
}
|
||||
raftVsn, err := strconv.Atoi(vsn)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
if minVersion == -1 || raftVsn < minVersion {
|
||||
minVersion = raftVsn
|
||||
}
|
||||
}
|
||||
|
||||
if minVersion == -1 {
|
||||
return minVersion, fmt.Errorf("No servers found")
|
||||
}
|
||||
|
||||
return minVersion, nil
|
||||
}
|
||||
|
||||
// handlePromotions is a helper shared with Consul Enterprise that attempts to
|
||||
// apply desired server promotions to the Raft configuration.
|
||||
func (a *Autopilot) handlePromotions(promotions []raft.Server) error {
|
||||
// This used to wait to only promote to maintain an odd quorum of
|
||||
// servers, but this was at odds with the dead server cleanup when doing
|
||||
// rolling updates (add one new server, wait, and then kill an old
|
||||
// server). The dead server cleanup would still count the old server as
|
||||
// a peer, which is conservative and the right thing to do, and this
|
||||
// would wait to promote, so you could get into a stalemate. It is safer
|
||||
// to promote early than remove early, so by promoting as soon as
|
||||
// possible we have chosen that as the solution here.
|
||||
for _, server := range promotions {
|
||||
a.logger.Printf("[INFO] autopilot: Promoting %s to voter", fmtServer(server))
|
||||
addFuture := a.delegate.Raft().AddVoter(server.ID, server.Address, 0, 0)
|
||||
if err := addFuture.Error(); err != nil {
|
||||
return fmt.Errorf("failed to add raft peer: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// If we promoted a server, trigger a check to remove dead servers.
|
||||
if len(promotions) > 0 {
|
||||
select {
|
||||
case a.removeDeadCh <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ServerHealthLoop monitors the health of the servers in the cluster
|
||||
func (a *Autopilot) ServerHealthLoop(shutdownCh <-chan struct{}) {
|
||||
// Monitor server health until shutdown
|
||||
ticker := time.NewTicker(a.healthInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-shutdownCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := a.updateClusterHealth(); err != nil {
|
||||
a.logger.Printf("[ERR] autopilot: Error updating cluster health: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updateClusterHealth fetches the Raft stats of the other servers and updates
|
||||
// s.clusterHealth based on the configured Autopilot thresholds
|
||||
func (a *Autopilot) updateClusterHealth() error {
|
||||
// Don't do anything if the min Raft version is too low
|
||||
minRaftProtocol, err := a.MinRaftProtocol()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting server raft protocol versions: %s", err)
|
||||
}
|
||||
if minRaftProtocol < 3 {
|
||||
return nil
|
||||
}
|
||||
|
||||
autopilotConf := a.delegate.AutopilotConfig()
|
||||
// Bail early if autopilot config hasn't been initialized yet
|
||||
if autopilotConf == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the the serf members which are Consul servers
|
||||
var serverMembers []serf.Member
|
||||
serverMap := make(map[string]*ServerInfo)
|
||||
for _, member := range a.delegate.Serf().Members() {
|
||||
if member.Status == serf.StatusLeft {
|
||||
continue
|
||||
}
|
||||
|
||||
server, err := a.delegate.IsServer(member)
|
||||
if err != nil {
|
||||
a.logger.Printf("[INFO] autopilot: Error parsing server info for %q: %s", member.Name, err)
|
||||
continue
|
||||
}
|
||||
if server != nil {
|
||||
serverMap[server.ID] = server
|
||||
serverMembers = append(serverMembers, member)
|
||||
}
|
||||
}
|
||||
|
||||
raftNode := a.delegate.Raft()
|
||||
future := raftNode.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
return fmt.Errorf("error getting Raft configuration %s", err)
|
||||
}
|
||||
servers := future.Configuration().Servers
|
||||
|
||||
// Fetch the health for each of the servers in parallel so we get as
|
||||
// consistent of a sample as possible. We capture the leader's index
|
||||
// here as well so it roughly lines up with the same point in time.
|
||||
targetLastIndex := raftNode.LastIndex()
|
||||
var fetchList []*ServerInfo
|
||||
for _, server := range servers {
|
||||
if parts, ok := serverMap[string(server.ID)]; ok {
|
||||
fetchList = append(fetchList, parts)
|
||||
}
|
||||
}
|
||||
d := time.Now().Add(a.healthInterval / 2)
|
||||
ctx, cancel := context.WithDeadline(context.Background(), d)
|
||||
defer cancel()
|
||||
fetchedStats := a.delegate.FetchStats(ctx, serverMembers)
|
||||
|
||||
// Build a current list of server healths
|
||||
leader := raftNode.Leader()
|
||||
var clusterHealth OperatorHealthReply
|
||||
voterCount := 0
|
||||
healthyCount := 0
|
||||
healthyVoterCount := 0
|
||||
for _, server := range servers {
|
||||
health := ServerHealth{
|
||||
ID: string(server.ID),
|
||||
Address: string(server.Address),
|
||||
Leader: server.Address == leader,
|
||||
LastContact: -1,
|
||||
Voter: server.Suffrage == raft.Voter,
|
||||
}
|
||||
|
||||
parts, ok := serverMap[string(server.ID)]
|
||||
if ok {
|
||||
health.Name = parts.Name
|
||||
health.SerfStatus = parts.Status
|
||||
health.Version = parts.Build.String()
|
||||
if stats, ok := fetchedStats[string(server.ID)]; ok {
|
||||
if err := a.updateServerHealth(&health, parts, stats, autopilotConf, targetLastIndex); err != nil {
|
||||
a.logger.Printf("[WARN] autopilot: Error updating server %s health: %s", fmtServer(server), err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
health.SerfStatus = serf.StatusNone
|
||||
}
|
||||
|
||||
if health.Voter {
|
||||
voterCount++
|
||||
}
|
||||
if health.Healthy {
|
||||
healthyCount++
|
||||
if health.Voter {
|
||||
healthyVoterCount++
|
||||
}
|
||||
}
|
||||
|
||||
clusterHealth.Servers = append(clusterHealth.Servers, health)
|
||||
}
|
||||
clusterHealth.Healthy = healthyCount == len(servers)
|
||||
|
||||
// If we have extra healthy voters, update FailureTolerance
|
||||
requiredQuorum := voterCount/2 + 1
|
||||
if healthyVoterCount > requiredQuorum {
|
||||
clusterHealth.FailureTolerance = healthyVoterCount - requiredQuorum
|
||||
}
|
||||
|
||||
a.delegate.NotifyHealth(clusterHealth)
|
||||
|
||||
a.clusterHealthLock.Lock()
|
||||
a.clusterHealth = clusterHealth
|
||||
a.clusterHealthLock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateServerHealth computes the resulting health of the server based on its
|
||||
// fetched stats and the state of the leader.
|
||||
func (a *Autopilot) updateServerHealth(health *ServerHealth,
|
||||
server *ServerInfo, stats *ServerStats,
|
||||
autopilotConf *Config, targetLastIndex uint64) error {
|
||||
|
||||
health.LastTerm = stats.LastTerm
|
||||
health.LastIndex = stats.LastIndex
|
||||
|
||||
if stats.LastContact != "never" {
|
||||
var err error
|
||||
health.LastContact, err = time.ParseDuration(stats.LastContact)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing last_contact duration: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
raftNode := a.delegate.Raft()
|
||||
lastTerm, err := strconv.ParseUint(raftNode.Stats()["last_log_term"], 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing last_log_term: %s", err)
|
||||
}
|
||||
health.Healthy = health.IsHealthy(lastTerm, targetLastIndex, autopilotConf)
|
||||
|
||||
// If this is a new server or the health changed, reset StableSince
|
||||
lastHealth := a.GetServerHealth(server.ID)
|
||||
if lastHealth == nil || lastHealth.Healthy != health.Healthy {
|
||||
health.StableSince = time.Now()
|
||||
} else {
|
||||
health.StableSince = lastHealth.StableSince
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Autopilot) GetClusterHealth() OperatorHealthReply {
|
||||
a.clusterHealthLock.RLock()
|
||||
defer a.clusterHealthLock.RUnlock()
|
||||
return a.clusterHealth
|
||||
}
|
||||
|
||||
func (a *Autopilot) GetServerHealth(id string) *ServerHealth {
|
||||
a.clusterHealthLock.RLock()
|
||||
defer a.clusterHealthLock.RUnlock()
|
||||
return a.clusterHealth.ServerHealth(id)
|
||||
}
|
||||
|
||||
func IsPotentialVoter(suffrage raft.ServerSuffrage) bool {
|
||||
switch suffrage {
|
||||
case raft.Voter, raft.Staging:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
26
vendor/github.com/hashicorp/consul/agent/consul/autopilot/promotion.go
generated
vendored
Normal file
26
vendor/github.com/hashicorp/consul/agent/consul/autopilot/promotion.go
generated
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
package autopilot
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
// PromoteStableServers is a basic autopilot promotion policy that promotes any
|
||||
// server which has been healthy and stable for the duration specified in the
|
||||
// given Autopilot config.
|
||||
func PromoteStableServers(autopilotConfig *Config, health OperatorHealthReply, servers []raft.Server) []raft.Server {
|
||||
// Find any non-voters eligible for promotion.
|
||||
now := time.Now()
|
||||
var promotions []raft.Server
|
||||
for _, server := range servers {
|
||||
if !IsPotentialVoter(server.Suffrage) {
|
||||
health := health.ServerHealth(string(server.ID))
|
||||
if health.IsStable(now, autopilotConfig) {
|
||||
promotions = append(promotions, server)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return promotions
|
||||
}
|
||||
158
vendor/github.com/hashicorp/consul/agent/consul/autopilot/structs.go
generated
vendored
Normal file
158
vendor/github.com/hashicorp/consul/agent/consul/autopilot/structs.go
generated
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
package autopilot
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
// Config holds the Autopilot configuration for a cluster.
|
||||
type Config struct {
|
||||
// CleanupDeadServers controls whether to remove dead servers when a new
|
||||
// server is added to the Raft peers.
|
||||
CleanupDeadServers bool
|
||||
|
||||
// LastContactThreshold is the limit on the amount of time a server can go
|
||||
// without leader contact before being considered unhealthy.
|
||||
LastContactThreshold time.Duration
|
||||
|
||||
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
|
||||
// be behind before being considered unhealthy.
|
||||
MaxTrailingLogs uint64
|
||||
|
||||
// ServerStabilizationTime is the minimum amount of time a server must be
|
||||
// in a stable, healthy state before it can be added to the cluster. Only
|
||||
// applicable with Raft protocol version 3 or higher.
|
||||
ServerStabilizationTime time.Duration
|
||||
|
||||
// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
|
||||
// servers into zones for redundancy. If left blank, this feature will be disabled.
|
||||
RedundancyZoneTag string
|
||||
|
||||
// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
|
||||
// strategy of waiting until enough newer-versioned servers have been added to the
|
||||
// cluster before promoting them to voters.
|
||||
DisableUpgradeMigration bool
|
||||
|
||||
// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
|
||||
// performing upgrade migrations. If left blank, the Consul version will be used.
|
||||
UpgradeVersionTag string
|
||||
|
||||
// CreateIndex/ModifyIndex store the create/modify indexes of this configuration.
|
||||
CreateIndex uint64
|
||||
ModifyIndex uint64
|
||||
}
|
||||
|
||||
// ServerHealth is the health (from the leader's point of view) of a server.
|
||||
type ServerHealth struct {
|
||||
// ID is the raft ID of the server.
|
||||
ID string
|
||||
|
||||
// Name is the node name of the server.
|
||||
Name string
|
||||
|
||||
// Address is the address of the server.
|
||||
Address string
|
||||
|
||||
// The status of the SerfHealth check for the server.
|
||||
SerfStatus serf.MemberStatus
|
||||
|
||||
// Version is the version of the server.
|
||||
Version string
|
||||
|
||||
// Leader is whether this server is currently the leader.
|
||||
Leader bool
|
||||
|
||||
// LastContact is the time since this node's last contact with the leader.
|
||||
LastContact time.Duration
|
||||
|
||||
// LastTerm is the highest leader term this server has a record of in its Raft log.
|
||||
LastTerm uint64
|
||||
|
||||
// LastIndex is the last log index this server has a record of in its Raft log.
|
||||
LastIndex uint64
|
||||
|
||||
// Healthy is whether or not the server is healthy according to the current
|
||||
// Autopilot config.
|
||||
Healthy bool
|
||||
|
||||
// Voter is whether this is a voting server.
|
||||
Voter bool
|
||||
|
||||
// StableSince is the last time this server's Healthy value changed.
|
||||
StableSince time.Time
|
||||
}
|
||||
|
||||
// IsHealthy determines whether this ServerHealth is considered healthy
|
||||
// based on the given Autopilot config
|
||||
func (h *ServerHealth) IsHealthy(lastTerm uint64, leaderLastIndex uint64, autopilotConf *Config) bool {
|
||||
if h.SerfStatus != serf.StatusAlive {
|
||||
return false
|
||||
}
|
||||
|
||||
if h.LastContact > autopilotConf.LastContactThreshold || h.LastContact < 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if h.LastTerm != lastTerm {
|
||||
return false
|
||||
}
|
||||
|
||||
if leaderLastIndex > autopilotConf.MaxTrailingLogs && h.LastIndex < leaderLastIndex-autopilotConf.MaxTrailingLogs {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// IsStable returns true if the ServerHealth shows a stable, passing state
|
||||
// according to the given AutopilotConfig
|
||||
func (h *ServerHealth) IsStable(now time.Time, conf *Config) bool {
|
||||
if h == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if !h.Healthy {
|
||||
return false
|
||||
}
|
||||
|
||||
if now.Sub(h.StableSince) < conf.ServerStabilizationTime {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// ServerStats holds miscellaneous Raft metrics for a server
|
||||
type ServerStats struct {
|
||||
// LastContact is the time since this node's last contact with the leader.
|
||||
LastContact string
|
||||
|
||||
// LastTerm is the highest leader term this server has a record of in its Raft log.
|
||||
LastTerm uint64
|
||||
|
||||
// LastIndex is the last log index this server has a record of in its Raft log.
|
||||
LastIndex uint64
|
||||
}
|
||||
|
||||
// OperatorHealthReply is a representation of the overall health of the cluster
|
||||
type OperatorHealthReply struct {
|
||||
// Healthy is true if all the servers in the cluster are healthy.
|
||||
Healthy bool
|
||||
|
||||
// FailureTolerance is the number of healthy servers that could be lost without
|
||||
// an outage occurring.
|
||||
FailureTolerance int
|
||||
|
||||
// Servers holds the health of each server.
|
||||
Servers []ServerHealth
|
||||
}
|
||||
|
||||
func (o *OperatorHealthReply) ServerHealth(id string) *ServerHealth {
|
||||
for _, health := range o.Servers {
|
||||
if health.ID == id {
|
||||
return &health
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
310
vendor/github.com/hashicorp/consul/command/flags/config.go
generated
vendored
Normal file
310
vendor/github.com/hashicorp/consul/command/flags/config.go
generated
vendored
Normal file
@@ -0,0 +1,310 @@
|
||||
package flags
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
// TODO (slackpad) - Trying out a different pattern here for config handling.
|
||||
// These classes support the flag.Value interface but work in a manner where
|
||||
// we can tell if they have been set. This lets us work with an all-pointer
|
||||
// config structure and merge it in a clean-ish way. If this ends up being a
|
||||
// good pattern we should pull this out into a reusable library.
|
||||
|
||||
// ConfigDecodeHook should be passed to mapstructure in order to decode into
|
||||
// the *Value objects here.
|
||||
var ConfigDecodeHook = mapstructure.ComposeDecodeHookFunc(
|
||||
BoolToBoolValueFunc(),
|
||||
StringToDurationValueFunc(),
|
||||
StringToStringValueFunc(),
|
||||
Float64ToUintValueFunc(),
|
||||
)
|
||||
|
||||
// BoolValue provides a flag value that's aware if it has been set.
|
||||
type BoolValue struct {
|
||||
v *bool
|
||||
}
|
||||
|
||||
// IsBoolFlag is an optional method of the flag.Value
|
||||
// interface which marks this value as boolean when
|
||||
// the return value is true. See flag.Value for details.
|
||||
func (b *BoolValue) IsBoolFlag() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Merge will overlay this value if it has been set.
|
||||
func (b *BoolValue) Merge(onto *bool) {
|
||||
if b.v != nil {
|
||||
*onto = *(b.v)
|
||||
}
|
||||
}
|
||||
|
||||
// Set implements the flag.Value interface.
|
||||
func (b *BoolValue) Set(v string) error {
|
||||
if b.v == nil {
|
||||
b.v = new(bool)
|
||||
}
|
||||
var err error
|
||||
*(b.v), err = strconv.ParseBool(v)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements the flag.Value interface.
|
||||
func (b *BoolValue) String() string {
|
||||
var current bool
|
||||
if b.v != nil {
|
||||
current = *(b.v)
|
||||
}
|
||||
return fmt.Sprintf("%v", current)
|
||||
}
|
||||
|
||||
// BoolToBoolValueFunc is a mapstructure hook that looks for an incoming bool
|
||||
// mapped to a BoolValue and does the translation.
|
||||
func BoolToBoolValueFunc() mapstructure.DecodeHookFunc {
|
||||
return func(
|
||||
f reflect.Type,
|
||||
t reflect.Type,
|
||||
data interface{}) (interface{}, error) {
|
||||
if f.Kind() != reflect.Bool {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
val := BoolValue{}
|
||||
if t != reflect.TypeOf(val) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
val.v = new(bool)
|
||||
*(val.v) = data.(bool)
|
||||
return val, nil
|
||||
}
|
||||
}
|
||||
|
||||
// DurationValue provides a flag value that's aware if it has been set.
|
||||
type DurationValue struct {
|
||||
v *time.Duration
|
||||
}
|
||||
|
||||
// Merge will overlay this value if it has been set.
|
||||
func (d *DurationValue) Merge(onto *time.Duration) {
|
||||
if d.v != nil {
|
||||
*onto = *(d.v)
|
||||
}
|
||||
}
|
||||
|
||||
// Set implements the flag.Value interface.
|
||||
func (d *DurationValue) Set(v string) error {
|
||||
if d.v == nil {
|
||||
d.v = new(time.Duration)
|
||||
}
|
||||
var err error
|
||||
*(d.v), err = time.ParseDuration(v)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements the flag.Value interface.
|
||||
func (d *DurationValue) String() string {
|
||||
var current time.Duration
|
||||
if d.v != nil {
|
||||
current = *(d.v)
|
||||
}
|
||||
return current.String()
|
||||
}
|
||||
|
||||
// StringToDurationValueFunc is a mapstructure hook that looks for an incoming
|
||||
// string mapped to a DurationValue and does the translation.
|
||||
func StringToDurationValueFunc() mapstructure.DecodeHookFunc {
|
||||
return func(
|
||||
f reflect.Type,
|
||||
t reflect.Type,
|
||||
data interface{}) (interface{}, error) {
|
||||
if f.Kind() != reflect.String {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
val := DurationValue{}
|
||||
if t != reflect.TypeOf(val) {
|
||||
return data, nil
|
||||
}
|
||||
if err := val.Set(data.(string)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
}
|
||||
|
||||
// StringValue provides a flag value that's aware if it has been set.
|
||||
type StringValue struct {
|
||||
v *string
|
||||
}
|
||||
|
||||
// Merge will overlay this value if it has been set.
|
||||
func (s *StringValue) Merge(onto *string) {
|
||||
if s.v != nil {
|
||||
*onto = *(s.v)
|
||||
}
|
||||
}
|
||||
|
||||
// Set implements the flag.Value interface.
|
||||
func (s *StringValue) Set(v string) error {
|
||||
if s.v == nil {
|
||||
s.v = new(string)
|
||||
}
|
||||
*(s.v) = v
|
||||
return nil
|
||||
}
|
||||
|
||||
// String implements the flag.Value interface.
|
||||
func (s *StringValue) String() string {
|
||||
var current string
|
||||
if s.v != nil {
|
||||
current = *(s.v)
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
// StringToStringValueFunc is a mapstructure hook that looks for an incoming
|
||||
// string mapped to a StringValue and does the translation.
|
||||
func StringToStringValueFunc() mapstructure.DecodeHookFunc {
|
||||
return func(
|
||||
f reflect.Type,
|
||||
t reflect.Type,
|
||||
data interface{}) (interface{}, error) {
|
||||
if f.Kind() != reflect.String {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
val := StringValue{}
|
||||
if t != reflect.TypeOf(val) {
|
||||
return data, nil
|
||||
}
|
||||
val.v = new(string)
|
||||
*(val.v) = data.(string)
|
||||
return val, nil
|
||||
}
|
||||
}
|
||||
|
||||
// UintValue provides a flag value that's aware if it has been set.
|
||||
type UintValue struct {
|
||||
v *uint
|
||||
}
|
||||
|
||||
// Merge will overlay this value if it has been set.
|
||||
func (u *UintValue) Merge(onto *uint) {
|
||||
if u.v != nil {
|
||||
*onto = *(u.v)
|
||||
}
|
||||
}
|
||||
|
||||
// Set implements the flag.Value interface.
|
||||
func (u *UintValue) Set(v string) error {
|
||||
if u.v == nil {
|
||||
u.v = new(uint)
|
||||
}
|
||||
parsed, err := strconv.ParseUint(v, 0, 64)
|
||||
*(u.v) = (uint)(parsed)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements the flag.Value interface.
|
||||
func (u *UintValue) String() string {
|
||||
var current uint
|
||||
if u.v != nil {
|
||||
current = *(u.v)
|
||||
}
|
||||
return fmt.Sprintf("%v", current)
|
||||
}
|
||||
|
||||
// Float64ToUintValueFunc is a mapstructure hook that looks for an incoming
|
||||
// float64 mapped to a UintValue and does the translation.
|
||||
func Float64ToUintValueFunc() mapstructure.DecodeHookFunc {
|
||||
return func(
|
||||
f reflect.Type,
|
||||
t reflect.Type,
|
||||
data interface{}) (interface{}, error) {
|
||||
if f.Kind() != reflect.Float64 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
val := UintValue{}
|
||||
if t != reflect.TypeOf(val) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
fv := data.(float64)
|
||||
if fv < 0 {
|
||||
return nil, fmt.Errorf("value cannot be negative")
|
||||
}
|
||||
|
||||
// The standard guarantees at least this, and this is fine for
|
||||
// values we expect to use in configs vs. being fancy with the
|
||||
// machine's size for uint.
|
||||
if fv > (1<<32 - 1) {
|
||||
return nil, fmt.Errorf("value is too large")
|
||||
}
|
||||
|
||||
val.v = new(uint)
|
||||
*(val.v) = (uint)(fv)
|
||||
return val, nil
|
||||
}
|
||||
}
|
||||
|
||||
// VisitFn is a callback that gets a chance to visit each file found during a
|
||||
// traversal with visit().
|
||||
type VisitFn func(path string) error
|
||||
|
||||
// Visit will call the visitor function on the path if it's a file, or for each
|
||||
// file in the path if it's a directory. Directories will not be recursed into,
|
||||
// and files in the directory will be visited in alphabetical order.
|
||||
func Visit(path string, visitor VisitFn) error {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading %q: %v", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking %q: %v", path, err)
|
||||
}
|
||||
|
||||
if !fi.IsDir() {
|
||||
if err := visitor(path); err != nil {
|
||||
return fmt.Errorf("error in %q: %v", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
contents, err := f.Readdir(-1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error listing %q: %v", path, err)
|
||||
}
|
||||
|
||||
sort.Sort(dirEnts(contents))
|
||||
for _, fi := range contents {
|
||||
if fi.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
fullPath := filepath.Join(path, fi.Name())
|
||||
if err := visitor(fullPath); err != nil {
|
||||
return fmt.Errorf("error in %q: %v", fullPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// dirEnts applies sort.Interface to directory entries for sorting by name.
|
||||
type dirEnts []os.FileInfo
|
||||
|
||||
func (d dirEnts) Len() int { return len(d) }
|
||||
func (d dirEnts) Less(i, j int) bool { return d[i].Name() < d[j].Name() }
|
||||
func (d dirEnts) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
|
||||
37
vendor/github.com/hashicorp/consul/command/flags/flag_map_value.go
generated
vendored
Normal file
37
vendor/github.com/hashicorp/consul/command/flags/flag_map_value.go
generated
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
package flags
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Ensure implements
|
||||
var _ flag.Value = (*FlagMapValue)(nil)
|
||||
|
||||
// FlagMapValue is a flag implementation used to provide key=value semantics
|
||||
// multiple times.
|
||||
type FlagMapValue map[string]string
|
||||
|
||||
func (h *FlagMapValue) String() string {
|
||||
return fmt.Sprintf("%v", *h)
|
||||
}
|
||||
|
||||
func (h *FlagMapValue) Set(value string) error {
|
||||
idx := strings.Index(value, "=")
|
||||
if idx == -1 {
|
||||
return fmt.Errorf("Missing \"=\" value in argument: %s", value)
|
||||
}
|
||||
|
||||
key, value := value[0:idx], value[idx+1:]
|
||||
|
||||
if *h == nil {
|
||||
*h = make(map[string]string)
|
||||
}
|
||||
|
||||
headers := *h
|
||||
headers[key] = value
|
||||
*h = headers
|
||||
|
||||
return nil
|
||||
}
|
||||
20
vendor/github.com/hashicorp/consul/command/flags/flag_slice_value.go
generated
vendored
Normal file
20
vendor/github.com/hashicorp/consul/command/flags/flag_slice_value.go
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
package flags
|
||||
|
||||
import "strings"
|
||||
|
||||
// AppendSliceValue implements the flag.Value interface and allows multiple
|
||||
// calls to the same variable to append a list.
|
||||
type AppendSliceValue []string
|
||||
|
||||
func (s *AppendSliceValue) String() string {
|
||||
return strings.Join(*s, ",")
|
||||
}
|
||||
|
||||
func (s *AppendSliceValue) Set(value string) error {
|
||||
if *s == nil {
|
||||
*s = make([]string, 0, 1)
|
||||
}
|
||||
|
||||
*s = append(*s, value)
|
||||
return nil
|
||||
}
|
||||
100
vendor/github.com/hashicorp/consul/command/flags/http.go
generated
vendored
Normal file
100
vendor/github.com/hashicorp/consul/command/flags/http.go
generated
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
package flags
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
"github.com/hashicorp/consul/api"
|
||||
)
|
||||
|
||||
type HTTPFlags struct {
|
||||
// client api flags
|
||||
address StringValue
|
||||
token StringValue
|
||||
caFile StringValue
|
||||
caPath StringValue
|
||||
certFile StringValue
|
||||
keyFile StringValue
|
||||
tlsServerName StringValue
|
||||
|
||||
// server flags
|
||||
datacenter StringValue
|
||||
stale BoolValue
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) ClientFlags() *flag.FlagSet {
|
||||
fs := flag.NewFlagSet("", flag.ContinueOnError)
|
||||
fs.Var(&f.address, "http-addr",
|
||||
"The `address` and port of the Consul HTTP agent. The value can be an IP "+
|
||||
"address or DNS address, but it must also include the port. This can "+
|
||||
"also be specified via the CONSUL_HTTP_ADDR environment variable. The "+
|
||||
"default value is http://127.0.0.1:8500. The scheme can also be set to "+
|
||||
"HTTPS by setting the environment variable CONSUL_HTTP_SSL=true.")
|
||||
fs.Var(&f.token, "token",
|
||||
"ACL token to use in the request. This can also be specified via the "+
|
||||
"CONSUL_HTTP_TOKEN environment variable. If unspecified, the query will "+
|
||||
"default to the token of the Consul agent at the HTTP address.")
|
||||
fs.Var(&f.caFile, "ca-file",
|
||||
"Path to a CA file to use for TLS when communicating with Consul. This "+
|
||||
"can also be specified via the CONSUL_CACERT environment variable.")
|
||||
fs.Var(&f.caPath, "ca-path",
|
||||
"Path to a directory of CA certificates to use for TLS when communicating "+
|
||||
"with Consul. This can also be specified via the CONSUL_CAPATH environment variable.")
|
||||
fs.Var(&f.certFile, "client-cert",
|
||||
"Path to a client cert file to use for TLS when 'verify_incoming' is enabled. This "+
|
||||
"can also be specified via the CONSUL_CLIENT_CERT environment variable.")
|
||||
fs.Var(&f.keyFile, "client-key",
|
||||
"Path to a client key file to use for TLS when 'verify_incoming' is enabled. This "+
|
||||
"can also be specified via the CONSUL_CLIENT_KEY environment variable.")
|
||||
fs.Var(&f.tlsServerName, "tls-server-name",
|
||||
"The server name to use as the SNI host when connecting via TLS. This "+
|
||||
"can also be specified via the CONSUL_TLS_SERVER_NAME environment variable.")
|
||||
|
||||
return fs
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) ServerFlags() *flag.FlagSet {
|
||||
fs := flag.NewFlagSet("", flag.ContinueOnError)
|
||||
fs.Var(&f.datacenter, "datacenter",
|
||||
"Name of the datacenter to query. If unspecified, this will default to "+
|
||||
"the datacenter of the queried agent.")
|
||||
fs.Var(&f.stale, "stale",
|
||||
"Permit any Consul server (non-leader) to respond to this request. This "+
|
||||
"allows for lower latency and higher throughput, but can result in "+
|
||||
"stale data. This option has no effect on non-read operations. The "+
|
||||
"default value is false.")
|
||||
return fs
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) Addr() string {
|
||||
return f.address.String()
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) Datacenter() string {
|
||||
return f.datacenter.String()
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) Stale() bool {
|
||||
if f.stale.v == nil {
|
||||
return false
|
||||
}
|
||||
return *f.stale.v
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) Token() string {
|
||||
return f.token.String()
|
||||
}
|
||||
|
||||
func (f *HTTPFlags) APIClient() (*api.Client, error) {
|
||||
c := api.DefaultConfig()
|
||||
|
||||
f.address.Merge(&c.Address)
|
||||
f.token.Merge(&c.Token)
|
||||
f.caFile.Merge(&c.TLSConfig.CAFile)
|
||||
f.caPath.Merge(&c.TLSConfig.CAPath)
|
||||
f.certFile.Merge(&c.TLSConfig.CertFile)
|
||||
f.keyFile.Merge(&c.TLSConfig.KeyFile)
|
||||
f.tlsServerName.Merge(&c.TLSConfig.Address)
|
||||
f.datacenter.Merge(&c.Datacenter)
|
||||
|
||||
return api.NewClient(c)
|
||||
}
|
||||
15
vendor/github.com/hashicorp/consul/command/flags/merge.go
generated
vendored
Normal file
15
vendor/github.com/hashicorp/consul/command/flags/merge.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
package flags
|
||||
|
||||
import "flag"
|
||||
|
||||
func Merge(dst, src *flag.FlagSet) {
|
||||
if dst == nil {
|
||||
panic("dst cannot be nil")
|
||||
}
|
||||
if src == nil {
|
||||
return
|
||||
}
|
||||
src.VisitAll(func(f *flag.Flag) {
|
||||
dst.Var(f.Value, f.Name, f.Usage)
|
||||
})
|
||||
}
|
||||
114
vendor/github.com/hashicorp/consul/command/flags/usage.go
generated
vendored
Normal file
114
vendor/github.com/hashicorp/consul/command/flags/usage.go
generated
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
package flags
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
text "github.com/tonnerre/golang-text"
|
||||
)
|
||||
|
||||
func Usage(txt string, flags *flag.FlagSet) string {
|
||||
u := &Usager{
|
||||
Usage: txt,
|
||||
Flags: flags,
|
||||
}
|
||||
return u.String()
|
||||
}
|
||||
|
||||
type Usager struct {
|
||||
Usage string
|
||||
Flags *flag.FlagSet
|
||||
}
|
||||
|
||||
func (u *Usager) String() string {
|
||||
out := new(bytes.Buffer)
|
||||
out.WriteString(strings.TrimSpace(u.Usage))
|
||||
out.WriteString("\n")
|
||||
out.WriteString("\n")
|
||||
|
||||
if u.Flags != nil {
|
||||
f := &HTTPFlags{}
|
||||
clientFlags := f.ClientFlags()
|
||||
serverFlags := f.ServerFlags()
|
||||
|
||||
var httpFlags, cmdFlags *flag.FlagSet
|
||||
u.Flags.VisitAll(func(f *flag.Flag) {
|
||||
if contains(clientFlags, f) || contains(serverFlags, f) {
|
||||
if httpFlags == nil {
|
||||
httpFlags = flag.NewFlagSet("", flag.ContinueOnError)
|
||||
}
|
||||
httpFlags.Var(f.Value, f.Name, f.Usage)
|
||||
} else {
|
||||
if cmdFlags == nil {
|
||||
cmdFlags = flag.NewFlagSet("", flag.ContinueOnError)
|
||||
}
|
||||
cmdFlags.Var(f.Value, f.Name, f.Usage)
|
||||
}
|
||||
})
|
||||
|
||||
if httpFlags != nil {
|
||||
printTitle(out, "HTTP API Options")
|
||||
httpFlags.VisitAll(func(f *flag.Flag) {
|
||||
printFlag(out, f)
|
||||
})
|
||||
}
|
||||
|
||||
if cmdFlags != nil {
|
||||
printTitle(out, "Command Options")
|
||||
cmdFlags.VisitAll(func(f *flag.Flag) {
|
||||
printFlag(out, f)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return strings.TrimRight(out.String(), "\n")
|
||||
}
|
||||
|
||||
// printTitle prints a consistently-formatted title to the given writer.
|
||||
func printTitle(w io.Writer, s string) {
|
||||
fmt.Fprintf(w, "%s\n\n", s)
|
||||
}
|
||||
|
||||
// printFlag prints a single flag to the given writer.
|
||||
func printFlag(w io.Writer, f *flag.Flag) {
|
||||
example, _ := flag.UnquoteUsage(f)
|
||||
if example != "" {
|
||||
fmt.Fprintf(w, " -%s=<%s>\n", f.Name, example)
|
||||
} else {
|
||||
fmt.Fprintf(w, " -%s\n", f.Name)
|
||||
}
|
||||
|
||||
indented := wrapAtLength(f.Usage, 5)
|
||||
fmt.Fprintf(w, "%s\n\n", indented)
|
||||
}
|
||||
|
||||
// contains returns true if the given flag is contained in the given flag
|
||||
// set or false otherwise.
|
||||
func contains(fs *flag.FlagSet, f *flag.Flag) bool {
|
||||
if fs == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var in bool
|
||||
fs.VisitAll(func(hf *flag.Flag) {
|
||||
in = in || f.Name == hf.Name
|
||||
})
|
||||
return in
|
||||
}
|
||||
|
||||
// maxLineLength is the maximum width of any line.
|
||||
const maxLineLength int = 72
|
||||
|
||||
// wrapAtLength wraps the given text at the maxLineLength, taking into account
|
||||
// any provided left padding.
|
||||
func wrapAtLength(s string, pad int) string {
|
||||
wrapped := text.Wrap(s, maxLineLength-pad)
|
||||
lines := strings.Split(wrapped, "\n")
|
||||
for i, line := range lines {
|
||||
lines[i] = strings.Repeat(" ", pad) + line
|
||||
}
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
19
vendor/github.com/tonnerre/golang-text/License
generated
vendored
Normal file
19
vendor/github.com/tonnerre/golang-text/License
generated
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
Copyright 2012 Keith Rarick
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
3
vendor/github.com/tonnerre/golang-text/Readme
generated
vendored
Normal file
3
vendor/github.com/tonnerre/golang-text/Readme
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
This is a Go package for manipulating paragraphs of text.
|
||||
|
||||
See http://go.pkgdoc.org/github.com/kr/text for full documentation.
|
||||
3
vendor/github.com/tonnerre/golang-text/doc.go
generated
vendored
Normal file
3
vendor/github.com/tonnerre/golang-text/doc.go
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
// Package text provides rudimentary functions for manipulating text in
|
||||
// paragraphs.
|
||||
package text
|
||||
74
vendor/github.com/tonnerre/golang-text/indent.go
generated
vendored
Normal file
74
vendor/github.com/tonnerre/golang-text/indent.go
generated
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
package text
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// Indent inserts prefix at the beginning of each non-empty line of s. The
|
||||
// end-of-line marker is NL.
|
||||
func Indent(s, prefix string) string {
|
||||
return string(IndentBytes([]byte(s), []byte(prefix)))
|
||||
}
|
||||
|
||||
// IndentBytes inserts prefix at the beginning of each non-empty line of b.
|
||||
// The end-of-line marker is NL.
|
||||
func IndentBytes(b, prefix []byte) []byte {
|
||||
var res []byte
|
||||
bol := true
|
||||
for _, c := range b {
|
||||
if bol && c != '\n' {
|
||||
res = append(res, prefix...)
|
||||
}
|
||||
res = append(res, c)
|
||||
bol = c == '\n'
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Writer indents each line of its input.
|
||||
type indentWriter struct {
|
||||
w io.Writer
|
||||
bol bool
|
||||
pre [][]byte
|
||||
sel int
|
||||
off int
|
||||
}
|
||||
|
||||
// NewIndentWriter makes a new write filter that indents the input
|
||||
// lines. Each line is prefixed in order with the corresponding
|
||||
// element of pre. If there are more lines than elements, the last
|
||||
// element of pre is repeated for each subsequent line.
|
||||
func NewIndentWriter(w io.Writer, pre ...[]byte) io.Writer {
|
||||
return &indentWriter{
|
||||
w: w,
|
||||
pre: pre,
|
||||
bol: true,
|
||||
}
|
||||
}
|
||||
|
||||
// The only errors returned are from the underlying indentWriter.
|
||||
func (w *indentWriter) Write(p []byte) (n int, err error) {
|
||||
for _, c := range p {
|
||||
if w.bol {
|
||||
var i int
|
||||
i, err = w.w.Write(w.pre[w.sel][w.off:])
|
||||
w.off += i
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
_, err = w.w.Write([]byte{c})
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
n++
|
||||
w.bol = c == '\n'
|
||||
if w.bol {
|
||||
w.off = 0
|
||||
if w.sel < len(w.pre)-1 {
|
||||
w.sel++
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
86
vendor/github.com/tonnerre/golang-text/wrap.go
generated
vendored
Executable file
86
vendor/github.com/tonnerre/golang-text/wrap.go
generated
vendored
Executable file
@@ -0,0 +1,86 @@
|
||||
package text
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
)
|
||||
|
||||
var (
|
||||
nl = []byte{'\n'}
|
||||
sp = []byte{' '}
|
||||
)
|
||||
|
||||
const defaultPenalty = 1e5
|
||||
|
||||
// Wrap wraps s into a paragraph of lines of length lim, with minimal
|
||||
// raggedness.
|
||||
func Wrap(s string, lim int) string {
|
||||
return string(WrapBytes([]byte(s), lim))
|
||||
}
|
||||
|
||||
// WrapBytes wraps b into a paragraph of lines of length lim, with minimal
|
||||
// raggedness.
|
||||
func WrapBytes(b []byte, lim int) []byte {
|
||||
words := bytes.Split(bytes.Replace(bytes.TrimSpace(b), nl, sp, -1), sp)
|
||||
var lines [][]byte
|
||||
for _, line := range WrapWords(words, 1, lim, defaultPenalty) {
|
||||
lines = append(lines, bytes.Join(line, sp))
|
||||
}
|
||||
return bytes.Join(lines, nl)
|
||||
}
|
||||
|
||||
// WrapWords is the low-level line-breaking algorithm, useful if you need more
|
||||
// control over the details of the text wrapping process. For most uses, either
|
||||
// Wrap or WrapBytes will be sufficient and more convenient.
|
||||
//
|
||||
// WrapWords splits a list of words into lines with minimal "raggedness",
|
||||
// treating each byte as one unit, accounting for spc units between adjacent
|
||||
// words on each line, and attempting to limit lines to lim units. Raggedness
|
||||
// is the total error over all lines, where error is the square of the
|
||||
// difference of the length of the line and lim. Too-long lines (which only
|
||||
// happen when a single word is longer than lim units) have pen penalty units
|
||||
// added to the error.
|
||||
func WrapWords(words [][]byte, spc, lim, pen int) [][][]byte {
|
||||
n := len(words)
|
||||
|
||||
length := make([][]int, n)
|
||||
for i := 0; i < n; i++ {
|
||||
length[i] = make([]int, n)
|
||||
length[i][i] = len(words[i])
|
||||
for j := i + 1; j < n; j++ {
|
||||
length[i][j] = length[i][j-1] + spc + len(words[j])
|
||||
}
|
||||
}
|
||||
|
||||
nbrk := make([]int, n)
|
||||
cost := make([]int, n)
|
||||
for i := range cost {
|
||||
cost[i] = math.MaxInt32
|
||||
}
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
if length[i][n-1] <= lim {
|
||||
cost[i] = 0
|
||||
nbrk[i] = n
|
||||
} else {
|
||||
for j := i + 1; j < n; j++ {
|
||||
d := lim - length[i][j-1]
|
||||
c := d*d + cost[j]
|
||||
if length[i][j-1] > lim {
|
||||
c += pen // too-long lines get a worse penalty
|
||||
}
|
||||
if c < cost[i] {
|
||||
cost[i] = c
|
||||
nbrk[i] = j
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var lines [][][]byte
|
||||
i := 0
|
||||
for i < n {
|
||||
lines = append(lines, words[i:nbrk[i]])
|
||||
i = nbrk[i]
|
||||
}
|
||||
return lines
|
||||
}
|
||||
3
vendor/vendor.json
vendored
3
vendor/vendor.json
vendored
@@ -123,7 +123,9 @@
|
||||
{"path":"github.com/hashicorp/consul-template/template","checksumSHA1":"N9qobVzScLbTEnGE7MgFnnTbGBw=","revision":"26d029ad37335b3827a9fde5569b2c5e10dcac8f","revisionTime":"2017-10-31T14:25:17Z"},
|
||||
{"path":"github.com/hashicorp/consul-template/version","checksumSHA1":"NB5+D4AuCNV9Bsqh3YFdPi4AJ6U=","revision":"26d029ad37335b3827a9fde5569b2c5e10dcac8f","revisionTime":"2017-10-31T14:25:17Z"},
|
||||
{"path":"github.com/hashicorp/consul-template/watch","checksumSHA1":"b4+Y+02pY2Y5620F9ALzKg8Zmdw=","revision":"26d029ad37335b3827a9fde5569b2c5e10dcac8f","revisionTime":"2017-10-31T14:25:17Z"},
|
||||
{"path":"github.com/hashicorp/consul/agent/consul/autopilot","checksumSHA1":"B2Y0S6Iq6ADURuXOGB7v79gU9WU=","revision":"d08ab9fd199434e5220276356ecf9617cfec1eb2","revisionTime":"2017-12-18T20:26:35Z"},
|
||||
{"path":"github.com/hashicorp/consul/api","checksumSHA1":"XLfcIX2qpRr0o26aFMjCOzvw6jo=","revision":"51ea240df8476e02215d53fbfad5838bf0d44d21","revisionTime":"2017-10-16T16:22:40Z"},
|
||||
{"path":"github.com/hashicorp/consul/command/flags","checksumSHA1":"XTQIYV+DPUVRKpVp0+y/78bWH3I=","revision":"d08ab9fd199434e5220276356ecf9617cfec1eb2","revisionTime":"2017-12-18T20:26:35Z"},
|
||||
{"path":"github.com/hashicorp/consul/lib","checksumSHA1":"HGljdtVaqi/e3DgIHymLRLfPYhw=","revision":"bcafded4e60982d0b71e730f0b8564d73cb1d715","revisionTime":"2017-10-31T16:39:15Z"},
|
||||
{"path":"github.com/hashicorp/consul/lib/freeport","checksumSHA1":"hDJiPli3EEGJE4vAezMi05oOC7o=","revision":"bcafded4e60982d0b71e730f0b8564d73cb1d715","revisionTime":"2017-10-31T16:39:15Z"},
|
||||
{"path":"github.com/hashicorp/consul/test/porter","checksumSHA1":"5XjgqE4UIfwXvkq5VssGNc7uPhQ=","revision":"ad9425ca6353b8afcfebd19130a8cf768f7eac30","revisionTime":"2017-10-21T00:05:25Z"},
|
||||
@@ -233,6 +235,7 @@
|
||||
{"path":"github.com/stretchr/objx","checksumSHA1":"K0crHygPTP42i1nLKWphSlvOQJw=","revision":"1a9d0bb9f541897e62256577b352fdbc1fb4fd94","revisionTime":"2015-09-28T12:21:52Z"},
|
||||
{"path":"github.com/stretchr/testify/mock","checksumSHA1":"o+jsS/rxceTym4M3reSPfrPxaio=","revision":"f6abca593680b2315d2075e0f5e2a9751e3f431a","revisionTime":"2017-06-01T20:57:54Z"},
|
||||
{"path":"github.com/stretchr/testify/require","checksumSHA1":"7vs6dSc1PPGBKyzb/SCIyeMJPLQ=","revision":"f6abca593680b2315d2075e0f5e2a9751e3f431a","revisionTime":"2017-06-01T20:57:54Z"},
|
||||
{"path":"github.com/tonnerre/golang-text","checksumSHA1":"t24KnvC9jRxiANVhpw2pqFpmEu8=","revision":"048ed3d792f7104850acbc8cfc01e5a6070f4c04","revisionTime":"2013-09-25T19:58:46Z"},
|
||||
{"path":"github.com/ugorji/go/codec","checksumSHA1":"8G1zvpE4gTtWQRuP/x2HPVDmflo=","revision":"0053ebfd9d0ee06ccefbfe17072021e1d4acebee","revisionTime":"2017-06-20T06:01:02Z"},
|
||||
{"path":"github.com/ugorji/go/codec/codecgen","checksumSHA1":"OgParimNuU2CJqr3pcTympeQZUc=","revision":"5efa3251c7f7d05e5d9704a69a984ec9f1386a40","revisionTime":"2017-06-20T10:48:52Z"},
|
||||
{"path":"github.com/ulikunitz/xz","checksumSHA1":"z2kAtVle4NFV2OExI85fZoTcsI4=","revision":"0c6b41e72360850ca4f98dc341fd999726ea007f","revisionTime":"2017-06-05T21:53:11Z"},
|
||||
|
||||
@@ -109,9 +109,11 @@ The table below shows this endpoint's support for
|
||||
|
||||
### Parameters
|
||||
|
||||
- `address` `(string: <required>)` - Specifies the server to remove as
|
||||
`ip:port`. This may be provided multiple times and is provided as a
|
||||
querystring parameter.
|
||||
- `address` `(string: <optional>)` - Specifies the server to remove as
|
||||
`ip:port`. This cannot be provided along with the `id` parameter.
|
||||
|
||||
- `id` `(string: <optional>)` - Specifies the server to remove as
|
||||
`id`. This cannot be provided along with the `address` parameter.
|
||||
|
||||
- `stale` - Specifies if the cluster should respond without an active leader.
|
||||
This is specified as a querystring parameter.
|
||||
@@ -123,3 +125,232 @@ $ curl \
|
||||
--request DELETE \
|
||||
https://nomad.rocks/v1/operator/raft/peer?address=1.2.3.4
|
||||
```
|
||||
|
||||
## Read Autopilot Configuration
|
||||
|
||||
This endpoint retrieves its latest Autopilot configuration.
|
||||
|
||||
| Method | Path | Produces |
|
||||
| ------ | ---------------------------- | -------------------------- |
|
||||
| `GET` | `/operator/autopilot/configuration` | `application/json` |
|
||||
|
||||
The table below shows this endpoint's support for
|
||||
[blocking queries](/api/index.html#blocking-queries),
|
||||
[consistency modes](/api/index.html#consistency-modes), and
|
||||
[required ACLs](/api/index.html#acls).
|
||||
|
||||
| Blocking Queries | Consistency Modes | ACL Required |
|
||||
| ---------------- | ----------------- | --------------- |
|
||||
| `NO` | `none` | `operator:read` |
|
||||
|
||||
### Parameters
|
||||
|
||||
- `dc` `(string: "")` - Specifies the datacenter to query. This will default to
|
||||
the datacenter of the agent being queried. This is specified as part of the
|
||||
URL as a query string.
|
||||
|
||||
- `stale` `(bool: false)` - If the cluster does not currently have a leader an
|
||||
error will be returned. You can use the `?stale` query parameter to read the
|
||||
Raft configuration from any of the Nomad servers.
|
||||
|
||||
### Sample Request
|
||||
|
||||
```text
|
||||
$ curl \
|
||||
https://nomad.rocks/operator/autopilot/configuration
|
||||
```
|
||||
|
||||
### Sample Response
|
||||
|
||||
```json
|
||||
{
|
||||
"CleanupDeadServers": true,
|
||||
"LastContactThreshold": "200ms",
|
||||
"MaxTrailingLogs": 250,
|
||||
"ServerStabilizationTime": "10s",
|
||||
"RedundancyZoneTag": "",
|
||||
"DisableUpgradeMigration": false,
|
||||
"UpgradeVersionTag": "",
|
||||
"CreateIndex": 4,
|
||||
"ModifyIndex": 4
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Autopilot configuration options, see the
|
||||
[agent configuration section](/docs/agent/options.html#autopilot).
|
||||
|
||||
## Update Autopilot Configuration
|
||||
|
||||
This endpoint updates the Autopilot configuration of the cluster.
|
||||
|
||||
| Method | Path | Produces |
|
||||
| ------ | ---------------------------- | -------------------------- |
|
||||
| `PUT` | `/operator/autopilot/configuration` | `application/json` |
|
||||
|
||||
The table below shows this endpoint's support for
|
||||
[blocking queries](/api/index.html#blocking-queries),
|
||||
[consistency modes](/api/index.html#consistency-modes), and
|
||||
[required ACLs](/api/index.html#acls).
|
||||
|
||||
| Blocking Queries | Consistency Modes | ACL Required |
|
||||
| ---------------- | ----------------- | ---------------- |
|
||||
| `NO` | `none` | `opreator:write` |
|
||||
|
||||
### Parameters
|
||||
|
||||
- `dc` `(string: "")` - Specifies the datacenter to query. This will default to
|
||||
the datacenter of the agent being queried. This is specified as part of the
|
||||
URL as a query string.
|
||||
|
||||
- `cas` `(int: 0)` - Specifies to use a Check-And-Set operation. The update will
|
||||
only happen if the given index matches the `ModifyIndex` of the configuration
|
||||
at the time of writing.
|
||||
|
||||
- `CleanupDeadServers` `(bool: true)` - Specifies automatic removal of dead
|
||||
server nodes periodically and whenever a new server is added to the cluster.
|
||||
|
||||
- `LastContactThreshold` `(string: "200ms")` - Specifies the maximum amount of
|
||||
time a server can go without contact from the leader before being considered
|
||||
unhealthy. Must be a duration value such as `10s`.
|
||||
|
||||
- `MaxTrailingLogs` `(int: 250)` specifies the maximum number of log entries
|
||||
that a server can trail the leader by before being considered unhealthy.
|
||||
|
||||
- `ServerStabilizationTime` `(string: "10s")` - Specifies the minimum amount of
|
||||
time a server must be stable in the 'healthy' state before being added to the
|
||||
cluster. Only takes effect if all servers are running Raft protocol version 3
|
||||
or higher. Must be a duration value such as `30s`.
|
||||
|
||||
- `RedundancyZoneTag` `(string: "")` - Controls the node-meta key to use when
|
||||
Autopilot is separating servers into zones for redundancy. Only one server in
|
||||
each zone can be a voting member at one time. If left blank, this feature will
|
||||
be disabled.
|
||||
|
||||
- `DisableUpgradeMigration` `(bool: false)` - Disables Autopilot's upgrade
|
||||
migration strategy in Nomad Enterprise of waiting until enough
|
||||
newer-versioned servers have been added to the cluster before promoting any of
|
||||
them to voters.
|
||||
|
||||
- `UpgradeVersionTag` `(string: "")` - Controls the node-meta key to use for
|
||||
version info when performing upgrade migrations. If left blank, the Nomad
|
||||
version will be used.
|
||||
|
||||
### Sample Payload
|
||||
|
||||
```json
|
||||
{
|
||||
"CleanupDeadServers": true,
|
||||
"LastContactThreshold": "200ms",
|
||||
"MaxTrailingLogs": 250,
|
||||
"ServerStabilizationTime": "10s",
|
||||
"RedundancyZoneTag": "",
|
||||
"DisableUpgradeMigration": false,
|
||||
"UpgradeVersionTag": "",
|
||||
"CreateIndex": 4,
|
||||
"ModifyIndex": 4
|
||||
}
|
||||
```
|
||||
|
||||
## Read Health
|
||||
|
||||
This endpoint queries the health of the autopilot status.
|
||||
|
||||
| Method | Path | Produces |
|
||||
| ------ | ---------------------------- | -------------------------- |
|
||||
| `GET` | `/operator/autopilot/health` | `application/json` |
|
||||
|
||||
The table below shows this endpoint's support for
|
||||
[blocking queries](/api/index.html#blocking-queries),
|
||||
[consistency modes](/api/index.html#consistency-modes), and
|
||||
[required ACLs](/api/index.html#acls).
|
||||
|
||||
| Blocking Queries | Consistency Modes | ACL Required |
|
||||
| ---------------- | ----------------- | --------------- |
|
||||
| `NO` | `none` | `opreator:read` |
|
||||
|
||||
### Parameters
|
||||
|
||||
- `dc` `(string: "")` - Specifies the datacenter to query. This will default to
|
||||
the datacenter of the agent being queried. This is specified as part of the
|
||||
URL as a query string.
|
||||
|
||||
### Sample Request
|
||||
|
||||
```text
|
||||
$ curl \
|
||||
https://nomad.rocks/v1/operator/autopilot/health
|
||||
```
|
||||
|
||||
### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"Healthy": true,
|
||||
"FailureTolerance": 0,
|
||||
"Servers": [
|
||||
{
|
||||
"ID": "e349749b-3303-3ddf-959c-b5885a0e1f6e",
|
||||
"Name": "node1",
|
||||
"Address": "127.0.0.1:8300",
|
||||
"SerfStatus": "alive",
|
||||
"Version": "0.8.0",
|
||||
"Leader": true,
|
||||
"LastContact": "0s",
|
||||
"LastTerm": 2,
|
||||
"LastIndex": 46,
|
||||
"Healthy": true,
|
||||
"Voter": true,
|
||||
"StableSince": "2017-03-06T22:07:51Z"
|
||||
},
|
||||
{
|
||||
"ID": "e36ee410-cc3c-0a0c-c724-63817ab30303",
|
||||
"Name": "node2",
|
||||
"Address": "127.0.0.1:8205",
|
||||
"SerfStatus": "alive",
|
||||
"Version": "0.8.0",
|
||||
"Leader": false,
|
||||
"LastContact": "27.291304ms",
|
||||
"LastTerm": 2,
|
||||
"LastIndex": 46,
|
||||
"Healthy": true,
|
||||
"Voter": false,
|
||||
"StableSince": "2017-03-06T22:18:26Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- `Healthy` is whether all the servers are currently healthy.
|
||||
|
||||
- `FailureTolerance` is the number of redundant healthy servers that could be
|
||||
fail without causing an outage (this would be 2 in a healthy cluster of 5
|
||||
servers).
|
||||
|
||||
- `Servers` holds detailed health information on each server:
|
||||
|
||||
- `ID` is the Raft ID of the server.
|
||||
|
||||
- `Name` is the node name of the server.
|
||||
|
||||
- `Address` is the address of the server.
|
||||
|
||||
- `SerfStatus` is the SerfHealth check status for the server.
|
||||
|
||||
- `Version` is the Nomad version of the server.
|
||||
|
||||
- `Leader` is whether this server is currently the leader.
|
||||
|
||||
- `LastContact` is the time elapsed since this server's last contact with the leader.
|
||||
|
||||
- `LastTerm` is the server's last known Raft leader term.
|
||||
|
||||
- `LastIndex` is the index of the server's last committed Raft log entry.
|
||||
|
||||
- `Healthy` is whether the server is healthy according to the current Autopilot configuration.
|
||||
|
||||
- `Voter` is whether the server is a voting member of the Raft cluster.
|
||||
|
||||
- `StableSince` is the time this server has been in its current `Healthy` state.
|
||||
|
||||
The HTTP status code will indicate the health of the cluster. If `Healthy` is true, then a
|
||||
status of 200 will be returned. If `Healthy` is false, then a status of 429 will be returned.
|
||||
|
||||
64
website/source/docs/agent/configuration/autopilot.html.md
Normal file
64
website/source/docs/agent/configuration/autopilot.html.md
Normal file
@@ -0,0 +1,64 @@
|
||||
---
|
||||
layout: "docs"
|
||||
page_title: "autopilot Stanza - Agent Configuration"
|
||||
sidebar_current: "docs-agent-configuration-autopilot"
|
||||
description: |-
|
||||
The "autopilot" stanza configures the Nomad agent to configure Autopilot behavior.
|
||||
---
|
||||
|
||||
# `autopilot` Stanza
|
||||
|
||||
<table class="table table-bordered table-striped">
|
||||
<tr>
|
||||
<th width="120">Placement</th>
|
||||
<td>
|
||||
<code>**acl**</code>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
The `autopilot` stanza configures the Nomad agent to configure Autopilot behavior.
|
||||
|
||||
```hcl
|
||||
autopilot {
|
||||
cleanup_dead_servers = true
|
||||
last_contact_threshold = "200ms"
|
||||
max_trailing_logs = 250
|
||||
server_stabilization_time = "10s"
|
||||
redundancy_zone_tag = ""
|
||||
disable_upgrade_migration = true
|
||||
upgrade_version_tag = ""
|
||||
}
|
||||
```
|
||||
|
||||
## `autopilot` Parameters
|
||||
|
||||
- `cleanup_dead_servers` `(bool: true)` - Specifies automatic removal of dead
|
||||
server nodes periodically and whenever a new server is added to the cluster.
|
||||
|
||||
- `last_contact_threshold` `(string: "200ms")` - Specifies the maximum amount of
|
||||
time a server can go without contact from the leader before being considered
|
||||
unhealthy. Must be a duration value such as `10s`.
|
||||
|
||||
- `max_trailing_logs` `(int: 250)` specifies the maximum number of log entries
|
||||
that a server can trail the leader by before being considered unhealthy.
|
||||
|
||||
- `server_stabilization_time` `(string: "10s")` - Specifies the minimum amount of
|
||||
time a server must be stable in the 'healthy' state before being added to the
|
||||
cluster. Only takes effect if all servers are running Raft protocol version 3
|
||||
or higher. Must be a duration value such as `30s`.
|
||||
|
||||
- `redundancy_zone_tag` `(string: "")` - Controls the node-meta key to use when
|
||||
Autopilot is separating servers into zones for redundancy. Only one server in
|
||||
each zone can be a voting member at one time. If left blank, this feature will
|
||||
be disabled.
|
||||
|
||||
- `disable_upgrade_migration` `(bool: false)` - Disables Autopilot's upgrade
|
||||
migration strategy in Nomad Enterprise of waiting until enough
|
||||
newer-versioned servers have been added to the cluster before promoting any of
|
||||
them to voters.
|
||||
|
||||
- `upgrade_version_tag` `(string: "")` - Controls the node-meta key to use for
|
||||
version info when performing upgrade migrations. If left blank, the Nomad
|
||||
version will be used.
|
||||
|
||||
@@ -102,6 +102,9 @@ server {
|
||||
second is a tradeoff as it lowers failure detection time of nodes at the
|
||||
tradeoff of false positives and increased load on the leader.
|
||||
|
||||
- `non_voting_server` `(bool: false)` - is whether this server will act as
|
||||
a non-voting member of the cluster to help provide read scalability. (Enterprise-only)
|
||||
|
||||
- `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel
|
||||
scheduler threads to run. This can be as many as one per core, or `0` to
|
||||
disallow this server from making any scheduling decisions. This defaults to
|
||||
|
||||
@@ -28,8 +28,12 @@ Usage: `nomad operator <subcommand> <subcommand> [options]`
|
||||
Run `nomad operator <subcommand>` with no arguments for help on that subcommand.
|
||||
The following subcommands are available:
|
||||
|
||||
* [`autopilot get-config`][get-config] - Display the current Autopilot configuration
|
||||
* [`autopilot set-config`][set-config] - Modify the current Autopilot configuration
|
||||
* [`raft list-peers`][list] - Display the current Raft peer configuration
|
||||
* [`raft remove-peer`][remove] - Remove a Nomad server from the Raft configuration
|
||||
|
||||
[get-config]: /docs/commands/operator/autopilot-get-config.html "Autopilot Get Config command"
|
||||
[set-config]: /docs/commands/operator/autopilot-set-config.html "Autopilot Set Config command"
|
||||
[list]: /docs/commands/operator/raft-list-peers.html "Raft List Peers command"
|
||||
[remove]: /docs/commands/operator/raft-remove-peer.html "Raft Remove Peer command"
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
---
|
||||
layout: "docs"
|
||||
page_title: "Commands: operator autopilot get-config"
|
||||
sidebar_current: "docs-commands-operator-autopilot-get-config"
|
||||
description: >
|
||||
Display the current Autopilot configuration.
|
||||
---
|
||||
|
||||
# Command: `operator autopilot get-config`
|
||||
|
||||
The Autopilot operator command is used to view the current Autopilot configuration. See the
|
||||
[Autopilot Guide](/guides/cluster/autopilot.html) for more information about Autopilot.
|
||||
|
||||
## Usage
|
||||
|
||||
```
|
||||
nomad operator autopilot get-config [options]
|
||||
```
|
||||
|
||||
## General Options
|
||||
|
||||
<%= partial "docs/commands/_general_options" %>
|
||||
|
||||
The output looks like this:
|
||||
|
||||
```
|
||||
CleanupDeadServers = true
|
||||
LastContactThreshold = 200ms
|
||||
MaxTrailingLogs = 250
|
||||
ServerStabilizationTime = 10s
|
||||
RedundancyZoneTag = ""
|
||||
DisableUpgradeMigration = false
|
||||
UpgradeMigrationTag = ""
|
||||
```
|
||||
|
||||
- `CleanupDeadServers` - Specifies automatic removal of dead
|
||||
server nodes periodically and whenever a new server is added to the cluster.
|
||||
|
||||
- `LastContactThreshold` - Specifies the maximum amount of
|
||||
time a server can go without contact from the leader before being considered
|
||||
unhealthy. Must be a duration value such as `10s`.
|
||||
|
||||
- `MaxTrailingLogs` - specifies the maximum number of log entries
|
||||
that a server can trail the leader by before being considered unhealthy.
|
||||
|
||||
- `ServerStabilizationTime` - Specifies the minimum amount of
|
||||
time a server must be stable in the 'healthy' state before being added to the
|
||||
cluster. Only takes effect if all servers are running Raft protocol version 3
|
||||
or higher. Must be a duration value such as `30s`.
|
||||
|
||||
- `RedundancyZoneTag` - Controls the node-meta key to use when
|
||||
Autopilot is separating servers into zones for redundancy. Only one server in
|
||||
each zone can be a voting member at one time. If left blank, this feature will
|
||||
be disabled.
|
||||
|
||||
- `DisableUpgradeMigration` - Disables Autopilot's upgrade
|
||||
migration strategy in Nomad Enterprise of waiting until enough
|
||||
newer-versioned servers have been added to the cluster before promoting any of
|
||||
them to voters.
|
||||
|
||||
- `UpgradeVersionTag` - Controls the node-meta key to use for
|
||||
version info when performing upgrade migrations. If left blank, the Nomad
|
||||
version will be used.
|
||||
@@ -0,0 +1,55 @@
|
||||
---
|
||||
layout: "docs"
|
||||
page_title: "Commands: operator autopilot set-config"
|
||||
sidebar_current: "docs-commands-operator-autopilot-set-config"
|
||||
description: >
|
||||
Modify the current Autopilot configuration.
|
||||
---
|
||||
|
||||
# Command: `operator autopilot set-config`
|
||||
|
||||
The Autopilot operator command is used to set the current Autopilot configuration. See the
|
||||
[Autopilot Guide](/guides/cluster/autopilot.html) for more information about Autopilot.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
```
|
||||
nomad operator autopilot set-config [options]
|
||||
```
|
||||
|
||||
## General Options
|
||||
|
||||
<%= partial "docs/commands/_general_options" %>
|
||||
|
||||
## Set Config Options
|
||||
|
||||
* `-cleanup-dead-servers` - Specifies whether to enable automatic removal of dead servers
|
||||
upon the successful joining of new servers to the cluster. Must be one of `[true|false]`.
|
||||
|
||||
* `-last-contact-threshold` - Controls the maximum amount of time a server can go without contact
|
||||
from the leader before being considered unhealthy. Must be a duration value such as `200ms`.
|
||||
|
||||
* `-max-trailing-logs` - Controls the maximum number of log entries that a server can trail
|
||||
the leader by before being considered unhealthy.
|
||||
|
||||
* `-server-stabilization-time` - Controls the minimum amount of time a server must be stable in
|
||||
the 'healthy' state before being added to the cluster. Only takes effect if all servers are
|
||||
running Raft protocol version 3 or higher. Must be a duration value such as `10s`.
|
||||
|
||||
* `-disable-upgrade-migration` - (Enterprise-only) Controls whether Nomad will avoid promoting
|
||||
new servers until it can perform a migration. Must be one of `[true|false]`.
|
||||
|
||||
* `-redundancy-zone-tag`- (Enterprise-only) Controls the [`-node-meta`](/docs/agent/options.html#_node_meta)
|
||||
key name used for separating servers into different redundancy zones.
|
||||
|
||||
* `-upgrade-version-tag` - (Enterprise-only) Controls the [`-node-meta`](/docs/agent/options.html#_node_meta)
|
||||
tag to use for version info when performing upgrade migrations. If left blank, the Nomad version will be used.
|
||||
|
||||
The output looks like this:
|
||||
|
||||
```
|
||||
Configuration updated!
|
||||
```
|
||||
|
||||
The return code will indicate success or failure.
|
||||
@@ -38,3 +38,6 @@ nomad operator raft remove-peer [options]
|
||||
|
||||
* `-peer-address`: Remove a Nomad server with given address from the Raft
|
||||
configuration. The format is "IP:port"
|
||||
|
||||
* `-peer-id`: Remove a Nomad server with the given ID from the Raft
|
||||
configuration. The format is "id"
|
||||
|
||||
@@ -15,6 +15,42 @@ details provided for their upgrades as a result of new features or changed
|
||||
behavior. This page is used to document those details separately from the
|
||||
standard upgrade flow.
|
||||
|
||||
## Nomad 0.8.0
|
||||
|
||||
#### Raft Protocol Version Compatibility
|
||||
|
||||
When upgrading to Nomad 0.8.0 from a version lower than 0.7.0, users will need to
|
||||
set the [`-raft-protocol`](/docs/agent/options.html#_raft_protocol) option to 1 in
|
||||
order to maintain backwards compatibility with the old servers during the upgrade.
|
||||
After the servers have been migrated to version 0.8.0, `-raft-protocol` can be moved
|
||||
up to 2 and the servers restarted to match the default.
|
||||
|
||||
The Raft protocol must be stepped up in this way; only adjacent version numbers are
|
||||
compatible (for example, version 1 cannot talk to version 3). Here is a table of the
|
||||
Raft Protocol versions supported by each Consul version:
|
||||
|
||||
<table class="table table-bordered table-striped">
|
||||
<tr>
|
||||
<th>Version</th>
|
||||
<th>Supported Raft Protocols</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>0.6 and earlier</td>
|
||||
<td>0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>0.7</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>0.8</td>
|
||||
<td>1, 2, 3</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
In order to enable all [Autopilot](/guides/cluster/autopilot.html) features, all servers
|
||||
in a Nomad cluster must be running with Raft protocol version 3 or later.
|
||||
|
||||
## Nomad 0.6.0
|
||||
|
||||
### Default `advertise` address changes
|
||||
|
||||
219
website/source/guides/cluster/autopilot.html.md
Normal file
219
website/source/guides/cluster/autopilot.html.md
Normal file
@@ -0,0 +1,219 @@
|
||||
---
|
||||
layout: "guides"
|
||||
page_title: "Autopilot"
|
||||
sidebar_current: "guides-cluster-autopilot"
|
||||
description: |-
|
||||
This guide covers how to configure and use Autopilot features.
|
||||
---
|
||||
|
||||
# Autopilot
|
||||
|
||||
Autopilot is a set of new features added in Nomad 0.8 to allow for automatic
|
||||
operator-friendly management of Nomad servers. It includes cleanup of dead
|
||||
servers, monitoring the state of the Raft cluster, and stable server introduction.
|
||||
|
||||
To enable Autopilot features (with the exception of dead server cleanup),
|
||||
the [`raft_protocol`](/docs/agent/configuration/server.html#raft_protocol) setting in
|
||||
the Agent configuration must be set to 3 or higher on all servers. In Nomad
|
||||
0.8 this setting defaults to 2; in Nomad 0.9 it will default to 3. For more
|
||||
information, see the [Version Upgrade section]
|
||||
(/docs/upgrade/upgrade-specific.html#raft-protocol-version-compatibility)
|
||||
on Raft Protocol versions.
|
||||
|
||||
## Configuration
|
||||
|
||||
The configuration of Autopilot is loaded by the leader from the agent's
|
||||
[Autopilot settings](/docs/agent/configuration/autopilot.html) when initially
|
||||
bootstrapping the cluster:
|
||||
|
||||
```
|
||||
autopilot {
|
||||
cleanup_dead_servers = true
|
||||
last_contact_threshold = 200ms
|
||||
max_trailing_logs = 250
|
||||
server_stabilization_time = "10s"
|
||||
redundancy_zone_tag = "az"
|
||||
disable_upgrade_migration = false
|
||||
upgrade_version_tag = ""
|
||||
}
|
||||
```
|
||||
|
||||
After bootstrapping, the configuration can be viewed or modified either via the
|
||||
[`operator autopilot`](/docs/commands/operator.html) subcommand or the
|
||||
[`/v1/operator/autopilot/configuration`](/api/operator.html#read-autopilot-configuration)
|
||||
HTTP endpoint:
|
||||
|
||||
```
|
||||
$ nomad operator autopilot get-config
|
||||
CleanupDeadServers = true
|
||||
LastContactThreshold = 200ms
|
||||
MaxTrailingLogs = 250
|
||||
ServerStabilizationTime = 10s
|
||||
RedundancyZoneTag = ""
|
||||
DisableUpgradeMigration = false
|
||||
UpgradeVersionTag = ""
|
||||
|
||||
$ Nomad operator autopilot set-config -cleanup-dead-servers=false
|
||||
Configuration updated!
|
||||
|
||||
$ Nomad operator autopilot get-config
|
||||
CleanupDeadServers = false
|
||||
LastContactThreshold = 200ms
|
||||
MaxTrailingLogs = 250
|
||||
ServerStabilizationTime = 10s
|
||||
RedundancyZoneTag = ""
|
||||
DisableUpgradeMigration = false
|
||||
UpgradeVersionTag = ""
|
||||
```
|
||||
|
||||
## Dead Server Cleanup
|
||||
|
||||
Dead servers will periodically be cleaned up and removed from the Raft peer
|
||||
set, to prevent them from interfering with the quorum size and leader elections.
|
||||
This cleanup will also happen whenever a new server is successfully added to the
|
||||
cluster.
|
||||
|
||||
Prior to Autopilot, it would take 72 hours for dead servers to be automatically reaped,
|
||||
or operators had to script a `nomad force-leave`. If another server failure occurred,
|
||||
it could jeopardize the quorum, even if the failed Nomad server had been automatically
|
||||
replaced. Autopilot helps prevent these kinds of outages by quickly removing failed
|
||||
servers as soon as a replacement Nomad server comes online. When servers are removed
|
||||
by the cleanup process they will enter the "left" state.
|
||||
|
||||
This option can be disabled by running `nomad operator autopilot set-config`
|
||||
with the `-cleanup-dead-servers=false` option.
|
||||
|
||||
## Server Health Checking
|
||||
|
||||
An internal health check runs on the leader to track the stability of servers.
|
||||
A server is considered healthy if all of the following conditions are true:
|
||||
|
||||
- Its status according to Serf is 'Alive'
|
||||
- The time since its last contact with the current leader is below
|
||||
`LastContactThreshold`
|
||||
- Its latest Raft term matches the leader's term
|
||||
- The number of Raft log entries it trails the leader by does not exceed
|
||||
`MaxTrailingLogs`
|
||||
|
||||
The status of these health checks can be viewed through the [`/v1/operator/autopilot/health`]
|
||||
(/api/operator.html#read-health) HTTP endpoint, with a top level
|
||||
`Healthy` field indicating the overall status of the cluster:
|
||||
|
||||
```
|
||||
$ curl localhost:8500/v1/operator/autopilot/health
|
||||
{
|
||||
"Healthy": true,
|
||||
"FailureTolerance": 0,
|
||||
"Servers": [
|
||||
{
|
||||
"ID": "e349749b-3303-3ddf-959c-b5885a0e1f6e",
|
||||
"Name": "node1",
|
||||
"Address": "127.0.0.1:4647",
|
||||
"SerfStatus": "alive",
|
||||
"Version": "0.8.0",
|
||||
"Leader": true,
|
||||
"LastContact": "0s",
|
||||
"LastTerm": 2,
|
||||
"LastIndex": 10,
|
||||
"Healthy": true,
|
||||
"Voter": true,
|
||||
"StableSince": "2017-03-28T18:28:52Z"
|
||||
},
|
||||
{
|
||||
"ID": "e35bde83-4e9c-434f-a6ef-453f44ee21ea",
|
||||
"Name": "node2",
|
||||
"Address": "127.0.0.1:4747",
|
||||
"SerfStatus": "alive",
|
||||
"Version": "0.8.0",
|
||||
"Leader": false,
|
||||
"LastContact": "35.371007ms",
|
||||
"LastTerm": 2,
|
||||
"LastIndex": 10,
|
||||
"Healthy": true,
|
||||
"Voter": false,
|
||||
"StableSince": "2017-03-28T18:29:10Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Stable Server Introduction
|
||||
|
||||
When a new server is added to the cluster, there is a waiting period where it
|
||||
must be healthy and stable for a certain amount of time before being promoted
|
||||
to a full, voting member. This can be configured via the `ServerStabilizationTime`
|
||||
setting.
|
||||
|
||||
---
|
||||
|
||||
~> The following Autopilot features are available only in
|
||||
[Nomad Enterprise](https://www.hashicorp.com/products/nomad/) version 0.8.0 and later.
|
||||
|
||||
## Server Read Scaling
|
||||
|
||||
With the [`non_voting_server`](/docs/agent/configuration/server.html#non_voting_server) option, a
|
||||
server can be explicitly marked as a non-voter and will never be promoted to a voting
|
||||
member. This can be useful when more read scaling is needed; being a non-voter means
|
||||
that the server will still have data replicated to it, but it will not be part of the
|
||||
quorum that the leader must wait for before committing log entries.
|
||||
|
||||
## Redundancy Zones
|
||||
|
||||
Prior to Autopilot, it was difficult to deploy servers in a way that took advantage of
|
||||
isolated failure domains such as AWS Availability Zones; users would be forced to either
|
||||
have an overly-large quorum (2-3 nodes per AZ) or give up redundancy within an AZ by
|
||||
deploying just one server in each.
|
||||
|
||||
If the `RedundancyZoneTag` setting is set, Nomad will use its value to look for a
|
||||
zone in each server's specified [`-meta`](/docs/agent/configuration/client.html#meta)
|
||||
tag. For example, if `RedundancyZoneTag` is set to `zone`, and `-meta zone=east1a`
|
||||
is used when starting a server, that server's redundancy zone will be `east1a`.
|
||||
|
||||
Here's an example showing how to configure this:
|
||||
|
||||
```
|
||||
$ nomad operator autopilot set-config -redundancy-zone-tag=zone
|
||||
Configuration updated!
|
||||
```
|
||||
|
||||
Nomad will then use these values to partition the servers by redundancy zone, and will
|
||||
aim to keep one voting server per zone. Extra servers in each zone will stay as non-voters
|
||||
on standby to be promoted if the active voter leaves or dies.
|
||||
|
||||
## Upgrade Migrations
|
||||
|
||||
Autopilot in Nomad Enterprise supports upgrade migrations by default. To disable this
|
||||
functionality, set `DisableUpgradeMigration` to true.
|
||||
|
||||
When a new server is added and Autopilot detects that its Nomad version is newer than
|
||||
that of the existing servers, Autopilot will avoid promoting the new server until enough
|
||||
newer-versioned servers have been added to the cluster. When the count of new servers
|
||||
equals or exceeds that of the old servers, Autopilot will begin promoting the new servers
|
||||
to voters and demoting the old servers. After this is finished, the old servers can be
|
||||
safely removed from the cluster.
|
||||
|
||||
To check the Nomad version of the servers, either the [autopilot health]
|
||||
(/api/operator.html#read-health) endpoint or the `Nomad members`
|
||||
command can be used:
|
||||
|
||||
```
|
||||
$ Nomad members
|
||||
Node Address Status Type Build Protocol DC
|
||||
node1 127.0.0.1:8301 alive server 0.7.5 2 dc1
|
||||
node2 127.0.0.1:8703 alive server 0.7.5 2 dc1
|
||||
node3 127.0.0.1:8803 alive server 0.7.5 2 dc1
|
||||
node4 127.0.0.1:8203 alive server 0.8.0 2 dc1
|
||||
```
|
||||
|
||||
### Migrations Without a Nomad Version Change
|
||||
|
||||
The `UpgradeVersionTag` can be used to override the version information used during
|
||||
a migration, so that the migration logic can be used for updating the cluster when
|
||||
changing configuration.
|
||||
|
||||
If the `UpgradeVersionTag` setting is set, Nomad will use its value to look for a
|
||||
version in each server's specified [`-meta`](/docs/agent/configuration/client.html#meta)
|
||||
tag. For example, if `UpgradeVersionTag` is set to `build`, and `-meta build:0.0.2`
|
||||
is used when starting a server, that server's version will be `0.0.2` when considered in
|
||||
a migration. The upgrade logic will follow semantic versioning and the version string
|
||||
must be in the form of either `X`, `X.Y`, or `X.Y.Z`.
|
||||
@@ -313,6 +313,12 @@
|
||||
<li<%= sidebar_current("docs-commands-operator") %>>
|
||||
<a href="/docs/commands/operator.html">operator</a>
|
||||
<ul class="nav">
|
||||
<li<%= sidebar_current("docs-commands-operator-autopilot-get-config") %>>
|
||||
<a href="/docs/commands/operator/autopilot-get-config.html">autopilot get-config</a>
|
||||
</li>
|
||||
<li<%= sidebar_current("docs-commands-operator-autopilot-set-config") %>>
|
||||
<a href="/docs/commands/operator/autopilot-set-config.html">autopilot set-config</a>
|
||||
</li>
|
||||
<li<%= sidebar_current("docs-commands-operator-raft-list-peers") %>>
|
||||
<a href="/docs/commands/operator/raft-list-peers.html">raft list-peers</a>
|
||||
</li>
|
||||
@@ -404,6 +410,9 @@
|
||||
<li <%= sidebar_current("docs-agent-configuration-acl") %>>
|
||||
<a href="/docs/agent/configuration/acl.html">acl</a>
|
||||
</li>
|
||||
<li <%= sidebar_current("docs-agent-configuration-autopilot") %>>
|
||||
<a href="/docs/agent/configuration/autopilot.html">autopilot</a>
|
||||
</li>
|
||||
<li <%= sidebar_current("docs-agent-configuration-client") %>>
|
||||
<a href="/docs/agent/configuration/client.html">client</a>
|
||||
</li>
|
||||
|
||||
@@ -42,6 +42,9 @@
|
||||
<li<%= sidebar_current("guides-cluster-automatic") %>>
|
||||
<a href="/guides/cluster/automatic.html">Automatic</a>
|
||||
</li>
|
||||
<li<%= sidebar_current("guides-cluster-autopilot") %>>
|
||||
<a href="/guides/cluster/autopilot.html">Autopilot</a>
|
||||
</li>
|
||||
<li<%= sidebar_current("guides-cluster-manual") %>>
|
||||
<a href="/guides/cluster/manual.html">Manual</a>
|
||||
</li>
|
||||
|
||||
Reference in New Issue
Block a user