mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
Merge pull request #4277 from hashicorp/f-retry-join-clients
Add go-discover support to Nomad clients
This commit is contained in:
@@ -283,7 +283,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
|
||||
// Set the preconfigured list of static servers
|
||||
c.configLock.RLock()
|
||||
if len(c.configCopy.Servers) > 0 {
|
||||
if err := c.setServersImpl(c.configCopy.Servers, true); err != nil {
|
||||
if _, err := c.setServersImpl(c.configCopy.Servers, true); err != nil {
|
||||
logger.Printf("[WARN] client: None of the configured servers are valid: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -623,7 +623,7 @@ func (c *Client) GetServers() []string {
|
||||
|
||||
// SetServers sets a new list of nomad servers to connect to. As long as one
|
||||
// server is resolvable no error is returned.
|
||||
func (c *Client) SetServers(in []string) error {
|
||||
func (c *Client) SetServers(in []string) (int, error) {
|
||||
return c.setServersImpl(in, false)
|
||||
}
|
||||
|
||||
@@ -633,7 +633,7 @@ func (c *Client) SetServers(in []string) error {
|
||||
//
|
||||
// Force should be used when setting the servers from the initial configuration
|
||||
// since the server may be starting up in parallel and initial pings may fail.
|
||||
func (c *Client) setServersImpl(in []string, force bool) error {
|
||||
func (c *Client) setServersImpl(in []string, force bool) (int, error) {
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var merr multierror.Error
|
||||
@@ -673,13 +673,13 @@ func (c *Client) setServersImpl(in []string, force bool) error {
|
||||
// Only return errors if no servers are valid
|
||||
if len(endpoints) == 0 {
|
||||
if len(merr.Errors) > 0 {
|
||||
return merr.ErrorOrNil()
|
||||
return 0, merr.ErrorOrNil()
|
||||
}
|
||||
return noServersErr
|
||||
return 0, noServersErr
|
||||
}
|
||||
|
||||
c.servers.SetServers(endpoints)
|
||||
return nil
|
||||
return len(endpoints), nil
|
||||
}
|
||||
|
||||
// restoreState is used to restore our state from the data dir
|
||||
|
||||
@@ -975,13 +975,13 @@ func TestClient_ServerList(t *testing.T) {
|
||||
if s := client.GetServers(); len(s) != 0 {
|
||||
t.Fatalf("expected server lit to be empty but found: %+q", s)
|
||||
}
|
||||
if err := client.SetServers(nil); err != noServersErr {
|
||||
if _, err := client.SetServers(nil); err != noServersErr {
|
||||
t.Fatalf("expected setting an empty list to return a 'no servers' error but received %v", err)
|
||||
}
|
||||
if err := client.SetServers([]string{"123.456.13123.123.13:80"}); err == nil {
|
||||
if _, err := client.SetServers([]string{"123.456.13123.123.13:80"}); err == nil {
|
||||
t.Fatalf("expected setting a bad server to return an error")
|
||||
}
|
||||
if err := client.SetServers([]string{"123.456.13123.123.13:80", "127.0.0.1:1234", "127.0.0.1"}); err == nil {
|
||||
if _, err := client.SetServers([]string{"123.456.13123.123.13:80", "127.0.0.1:1234", "127.0.0.1"}); err == nil {
|
||||
t.Fatalf("expected setting at least one good server to succeed but received: %v", err)
|
||||
}
|
||||
s := client.GetServers()
|
||||
|
||||
@@ -222,7 +222,7 @@ func (s *HTTPServer) updateServers(resp http.ResponseWriter, req *http.Request)
|
||||
|
||||
// Set the servers list into the client
|
||||
s.agent.logger.Printf("[TRACE] Adding servers %+q to the client's primary server list", servers)
|
||||
if err := client.SetServers(servers); err != nil {
|
||||
if _, err := client.SetServers(servers); err != nil {
|
||||
s.agent.logger.Printf("[ERR] Attempt to add servers %q to client failed: %v", servers, err)
|
||||
//TODO is this the right error to return?
|
||||
return nil, CodedError(400, err.Error())
|
||||
|
||||
@@ -63,9 +63,11 @@ func (c *Command) readConfig() *Config {
|
||||
Client: &ClientConfig{},
|
||||
Consul: &config.ConsulConfig{},
|
||||
Ports: &Ports{},
|
||||
Server: &ServerConfig{},
|
||||
Vault: &config.VaultConfig{},
|
||||
ACL: &ACLConfig{},
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{},
|
||||
},
|
||||
Vault: &config.VaultConfig{},
|
||||
ACL: &ACLConfig{},
|
||||
}
|
||||
|
||||
flags := flag.NewFlagSet("agent", flag.ContinueOnError)
|
||||
@@ -78,13 +80,16 @@ func (c *Command) readConfig() *Config {
|
||||
|
||||
// Server-only options
|
||||
flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
|
||||
flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
|
||||
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
|
||||
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
|
||||
flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
|
||||
flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
|
||||
flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
|
||||
flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "")
|
||||
flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
|
||||
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.StartJoin), "join", "")
|
||||
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.RetryJoin), "retry-join", "")
|
||||
flags.IntVar(&cmdConfig.Server.ServerJoin.RetryMaxAttempts, "retry-max", 0, "")
|
||||
flags.Var((flaghelper.FuncDurationVar)(func(d time.Duration) error {
|
||||
cmdConfig.Server.ServerJoin.RetryInterval = d
|
||||
return nil
|
||||
}), "retry-interval", "")
|
||||
|
||||
// Client-only options
|
||||
flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
|
||||
@@ -267,14 +272,6 @@ func (c *Command) readConfig() *Config {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the RetryInterval.
|
||||
dur, err := time.ParseDuration(config.Server.RetryInterval)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
|
||||
return nil
|
||||
}
|
||||
config.Server.retryInterval = dur
|
||||
|
||||
// Check that the server is running in at least one mode.
|
||||
if !(config.Server.Enabled || config.Client.Enabled) {
|
||||
c.Ui.Error("Must specify either server, client or dev mode for the agent.")
|
||||
@@ -547,20 +544,89 @@ func (c *Command) Run(args []string) int {
|
||||
logGate.Flush()
|
||||
|
||||
// Start retry join process
|
||||
c.retryJoinErrCh = make(chan struct{})
|
||||
|
||||
joiner := retryJoiner{
|
||||
join: c.agent.server.Join,
|
||||
discover: &discover.Discover{},
|
||||
errCh: c.retryJoinErrCh,
|
||||
logger: c.agent.logger,
|
||||
if err := c.handleRetryJoin(config); err != nil {
|
||||
c.Ui.Error(err.Error())
|
||||
return 1
|
||||
}
|
||||
go joiner.RetryJoin(config)
|
||||
|
||||
// Wait for exit
|
||||
return c.handleSignals()
|
||||
}
|
||||
|
||||
// handleRetryJoin is used to start retry joining if it is configured.
|
||||
func (c *Command) handleRetryJoin(config *Config) error {
|
||||
c.retryJoinErrCh = make(chan struct{})
|
||||
|
||||
if config.Server.Enabled && len(config.Server.RetryJoin) != 0 {
|
||||
joiner := retryJoiner{
|
||||
discover: &discover.Discover{},
|
||||
errCh: c.retryJoinErrCh,
|
||||
logger: c.agent.logger,
|
||||
serverJoin: c.agent.server.Join,
|
||||
serverEnabled: true,
|
||||
}
|
||||
|
||||
if err := joiner.Validate(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remove the duplicate fields
|
||||
if len(config.Server.RetryJoin) != 0 {
|
||||
config.Server.ServerJoin.RetryJoin = config.Server.RetryJoin
|
||||
config.Server.RetryJoin = nil
|
||||
}
|
||||
if config.Server.RetryMaxAttempts != 0 {
|
||||
config.Server.ServerJoin.RetryMaxAttempts = config.Server.RetryMaxAttempts
|
||||
config.Server.RetryMaxAttempts = 0
|
||||
}
|
||||
if config.Server.RetryInterval != 0 {
|
||||
config.Server.ServerJoin.RetryInterval = config.Server.RetryInterval
|
||||
config.Server.RetryInterval = 0
|
||||
}
|
||||
|
||||
c.agent.logger.Printf("[WARN] agent: Using deprecated retry_join fields. Upgrade configuration to use server_join")
|
||||
}
|
||||
|
||||
if config.Server.Enabled &&
|
||||
config.Server.ServerJoin != nil &&
|
||||
len(config.Server.ServerJoin.RetryJoin) != 0 {
|
||||
|
||||
joiner := retryJoiner{
|
||||
discover: &discover.Discover{},
|
||||
errCh: c.retryJoinErrCh,
|
||||
logger: c.agent.logger,
|
||||
serverJoin: c.agent.server.Join,
|
||||
serverEnabled: true,
|
||||
}
|
||||
|
||||
if err := joiner.Validate(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go joiner.RetryJoin(config.Server.ServerJoin)
|
||||
}
|
||||
|
||||
if config.Client.Enabled &&
|
||||
config.Client.ServerJoin != nil &&
|
||||
len(config.Client.ServerJoin.RetryJoin) != 0 {
|
||||
joiner := retryJoiner{
|
||||
discover: &discover.Discover{},
|
||||
errCh: c.retryJoinErrCh,
|
||||
logger: c.agent.logger,
|
||||
clientJoin: c.agent.client.SetServers,
|
||||
clientEnabled: true,
|
||||
}
|
||||
|
||||
if err := joiner.Validate(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go joiner.RetryJoin(config.Client.ServerJoin)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleSignals blocks until we get an exit-causing signal
|
||||
func (c *Command) handleSignals() int {
|
||||
signalCh := make(chan os.Signal, 4)
|
||||
@@ -831,12 +897,34 @@ func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) {
|
||||
}
|
||||
|
||||
func (c *Command) startupJoin(config *Config) error {
|
||||
if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
|
||||
// Nothing to do
|
||||
if !config.Server.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate both old and new aren't being set
|
||||
old := len(config.Server.StartJoin)
|
||||
var new int
|
||||
if config.Server.ServerJoin != nil {
|
||||
new = len(config.Server.ServerJoin.StartJoin)
|
||||
}
|
||||
if old != 0 && new != 0 {
|
||||
return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza")
|
||||
}
|
||||
|
||||
// Nothing to do
|
||||
if old+new == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Combine the lists and join
|
||||
joining := config.Server.StartJoin
|
||||
if new != 0 {
|
||||
joining = append(joining, config.Server.ServerJoin.StartJoin...)
|
||||
}
|
||||
|
||||
c.Ui.Output("Joining cluster...")
|
||||
n, err := c.agent.server.Join(config.Server.StartJoin)
|
||||
n, err := c.agent.server.Join(joining)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ advertise {
|
||||
rpc = "127.0.0.3"
|
||||
serf = "127.0.0.4"
|
||||
}
|
||||
|
||||
client {
|
||||
enabled = true
|
||||
state_dir = "/tmp/client-state"
|
||||
@@ -29,6 +30,11 @@ client {
|
||||
foo = "bar"
|
||||
baz = "zip"
|
||||
}
|
||||
server_join {
|
||||
retry_join = [ "1.1.1.1", "2.2.2.2" ]
|
||||
retry_max = 3
|
||||
retry_interval = "15s"
|
||||
}
|
||||
options {
|
||||
foo = "bar"
|
||||
baz = "zip"
|
||||
@@ -49,17 +55,17 @@ client {
|
||||
}
|
||||
client_min_port = 1000
|
||||
client_max_port = 2000
|
||||
max_kill_timeout = "10s"
|
||||
stats {
|
||||
data_points = 35
|
||||
collection_interval = "5s"
|
||||
}
|
||||
gc_interval = "6s"
|
||||
gc_parallel_destroys = 6
|
||||
gc_disk_usage_threshold = 82
|
||||
gc_inode_usage_threshold = 91
|
||||
gc_max_allocs = 50
|
||||
no_host_uuid = false
|
||||
max_kill_timeout = "10s"
|
||||
stats {
|
||||
data_points = 35
|
||||
collection_interval = "5s"
|
||||
}
|
||||
gc_interval = "6s"
|
||||
gc_parallel_destroys = 6
|
||||
gc_disk_usage_threshold = 82
|
||||
gc_inode_usage_threshold = 91
|
||||
gc_max_allocs = 50
|
||||
no_host_uuid = false
|
||||
}
|
||||
server {
|
||||
enabled = true
|
||||
@@ -86,23 +92,28 @@ server {
|
||||
redundancy_zone = "foo"
|
||||
upgrade_version = "0.8.0"
|
||||
encrypt = "abc"
|
||||
server_join {
|
||||
retry_join = [ "1.1.1.1", "2.2.2.2" ]
|
||||
retry_max = 3
|
||||
retry_interval = "15s"
|
||||
}
|
||||
}
|
||||
acl {
|
||||
enabled = true
|
||||
token_ttl = "60s"
|
||||
policy_ttl = "60s"
|
||||
replication_token = "foobar"
|
||||
enabled = true
|
||||
token_ttl = "60s"
|
||||
policy_ttl = "60s"
|
||||
replication_token = "foobar"
|
||||
}
|
||||
telemetry {
|
||||
statsite_address = "127.0.0.1:1234"
|
||||
statsd_address = "127.0.0.1:2345"
|
||||
prometheus_metrics = true
|
||||
disable_hostname = true
|
||||
collection_interval = "3s"
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
disable_tagged_metrics = true
|
||||
backwards_compatible_metrics = true
|
||||
collection_interval = "3s"
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
disable_tagged_metrics = true
|
||||
backwards_compatible_metrics = true
|
||||
}
|
||||
leave_on_interrupt = true
|
||||
leave_on_terminate = true
|
||||
@@ -114,68 +125,68 @@ http_api_response_headers {
|
||||
Access-Control-Allow-Origin = "*"
|
||||
}
|
||||
consul {
|
||||
server_service_name = "nomad"
|
||||
server_http_check_name = "nomad-server-http-health-check"
|
||||
server_serf_check_name = "nomad-server-serf-health-check"
|
||||
server_rpc_check_name = "nomad-server-rpc-health-check"
|
||||
client_service_name = "nomad-client"
|
||||
client_http_check_name = "nomad-client-http-health-check"
|
||||
address = "127.0.0.1:9500"
|
||||
token = "token1"
|
||||
auth = "username:pass"
|
||||
ssl = true
|
||||
verify_ssl = true
|
||||
ca_file = "/path/to/ca/file"
|
||||
cert_file = "/path/to/cert/file"
|
||||
key_file = "/path/to/key/file"
|
||||
server_auto_join = true
|
||||
client_auto_join = true
|
||||
auto_advertise = true
|
||||
checks_use_advertise = true
|
||||
server_service_name = "nomad"
|
||||
server_http_check_name = "nomad-server-http-health-check"
|
||||
server_serf_check_name = "nomad-server-serf-health-check"
|
||||
server_rpc_check_name = "nomad-server-rpc-health-check"
|
||||
client_service_name = "nomad-client"
|
||||
client_http_check_name = "nomad-client-http-health-check"
|
||||
address = "127.0.0.1:9500"
|
||||
token = "token1"
|
||||
auth = "username:pass"
|
||||
ssl = true
|
||||
verify_ssl = true
|
||||
ca_file = "/path/to/ca/file"
|
||||
cert_file = "/path/to/cert/file"
|
||||
key_file = "/path/to/key/file"
|
||||
server_auto_join = true
|
||||
client_auto_join = true
|
||||
auto_advertise = true
|
||||
checks_use_advertise = true
|
||||
}
|
||||
vault {
|
||||
address = "127.0.0.1:9500"
|
||||
allow_unauthenticated = true
|
||||
task_token_ttl = "1s"
|
||||
enabled = false
|
||||
token = "12345"
|
||||
ca_file = "/path/to/ca/file"
|
||||
ca_path = "/path/to/ca"
|
||||
cert_file = "/path/to/cert/file"
|
||||
key_file = "/path/to/key/file"
|
||||
tls_server_name = "foobar"
|
||||
tls_skip_verify = true
|
||||
create_from_role = "test_role"
|
||||
address = "127.0.0.1:9500"
|
||||
allow_unauthenticated = true
|
||||
task_token_ttl = "1s"
|
||||
enabled = false
|
||||
token = "12345"
|
||||
ca_file = "/path/to/ca/file"
|
||||
ca_path = "/path/to/ca"
|
||||
cert_file = "/path/to/cert/file"
|
||||
key_file = "/path/to/key/file"
|
||||
tls_server_name = "foobar"
|
||||
tls_skip_verify = true
|
||||
create_from_role = "test_role"
|
||||
}
|
||||
tls {
|
||||
http = true
|
||||
rpc = true
|
||||
verify_server_hostname = true
|
||||
ca_file = "foo"
|
||||
cert_file = "bar"
|
||||
key_file = "pipe"
|
||||
rpc_upgrade_mode = true
|
||||
verify_https_client = true
|
||||
tls_prefer_server_cipher_suites = true
|
||||
tls_cipher_suites = "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"
|
||||
tls_min_version = "tls12"
|
||||
http = true
|
||||
rpc = true
|
||||
verify_server_hostname = true
|
||||
ca_file = "foo"
|
||||
cert_file = "bar"
|
||||
key_file = "pipe"
|
||||
rpc_upgrade_mode = true
|
||||
verify_https_client = true
|
||||
tls_prefer_server_cipher_suites = true
|
||||
tls_cipher_suites = "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"
|
||||
tls_min_version = "tls12"
|
||||
}
|
||||
sentinel {
|
||||
import "foo" {
|
||||
path = "foo"
|
||||
args = ["a", "b", "c"]
|
||||
}
|
||||
import "bar" {
|
||||
path = "bar"
|
||||
args = ["x", "y", "z"]
|
||||
}
|
||||
import "foo" {
|
||||
path = "foo"
|
||||
args = ["a", "b", "c"]
|
||||
}
|
||||
import "bar" {
|
||||
path = "bar"
|
||||
args = ["x", "y", "z"]
|
||||
}
|
||||
}
|
||||
autopilot {
|
||||
cleanup_dead_servers = true
|
||||
disable_upgrade_migration = true
|
||||
last_contact_threshold = "12705s"
|
||||
max_trailing_logs = 17849
|
||||
enable_redundancy_zones = true
|
||||
server_stabilization_time = "23057s"
|
||||
enable_custom_upgrades = true
|
||||
cleanup_dead_servers = true
|
||||
disable_upgrade_migration = true
|
||||
last_contact_threshold = "12705s"
|
||||
max_trailing_logs = 17849
|
||||
enable_redundancy_zones = true
|
||||
server_stabilization_time = "23057s"
|
||||
enable_custom_upgrades = true
|
||||
}
|
||||
|
||||
@@ -217,6 +217,9 @@ type ClientConfig struct {
|
||||
// NoHostUUID disables using the host's UUID and will force generation of a
|
||||
// random UUID.
|
||||
NoHostUUID *bool `mapstructure:"no_host_uuid"`
|
||||
|
||||
// ServerJoin contains information that is used to attempt to join servers
|
||||
ServerJoin *ServerJoin `mapstructure:"server_join"`
|
||||
}
|
||||
|
||||
// ACLConfig is configuration specific to the ACL system
|
||||
@@ -311,21 +314,24 @@ type ServerConfig struct {
|
||||
// StartJoin is a list of addresses to attempt to join when the
|
||||
// agent starts. If Serf is unable to communicate with any of these
|
||||
// addresses, then the agent will error and exit.
|
||||
// Deprecated in Nomad 0.10
|
||||
StartJoin []string `mapstructure:"start_join"`
|
||||
|
||||
// RetryJoin is a list of addresses to join with retry enabled.
|
||||
// Deprecated in Nomad 0.10
|
||||
RetryJoin []string `mapstructure:"retry_join"`
|
||||
|
||||
// RetryMaxAttempts specifies the maximum number of times to retry joining a
|
||||
// host on startup. This is useful for cases where we know the node will be
|
||||
// online eventually.
|
||||
// Deprecated in Nomad 0.10
|
||||
RetryMaxAttempts int `mapstructure:"retry_max"`
|
||||
|
||||
// RetryInterval specifies the amount of time to wait in between join
|
||||
// attempts on agent start. The minimum allowed value is 1 second and
|
||||
// the default is 30s.
|
||||
RetryInterval string `mapstructure:"retry_interval"`
|
||||
retryInterval time.Duration `mapstructure:"-"`
|
||||
// Deprecated in Nomad 0.10
|
||||
RetryInterval time.Duration `mapstructure:"retry_interval"`
|
||||
|
||||
// RejoinAfterLeave controls our interaction with the cluster after leave.
|
||||
// When set to false (default), a leave causes Consul to not rejoin
|
||||
@@ -346,6 +352,59 @@ type ServerConfig struct {
|
||||
|
||||
// Encryption key to use for the Serf communication
|
||||
EncryptKey string `mapstructure:"encrypt" json:"-"`
|
||||
|
||||
// ServerJoin contains information that is used to attempt to join servers
|
||||
ServerJoin *ServerJoin `mapstructure:"server_join"`
|
||||
}
|
||||
|
||||
// ServerJoin is used in both clients and servers to bootstrap connections to
|
||||
// servers
|
||||
type ServerJoin struct {
|
||||
// StartJoin is a list of addresses to attempt to join when the
|
||||
// agent starts. If Serf is unable to communicate with any of these
|
||||
// addresses, then the agent will error and exit.
|
||||
StartJoin []string `mapstructure:"start_join"`
|
||||
|
||||
// RetryJoin is a list of addresses to join with retry enabled, or a single
|
||||
// value to find multiple servers using go-discover syntax.
|
||||
RetryJoin []string `mapstructure:"retry_join"`
|
||||
|
||||
// RetryMaxAttempts specifies the maximum number of times to retry joining a
|
||||
// host on startup. This is useful for cases where we know the node will be
|
||||
// online eventually.
|
||||
RetryMaxAttempts int `mapstructure:"retry_max"`
|
||||
|
||||
// RetryInterval specifies the amount of time to wait in between join
|
||||
// attempts on agent start. The minimum allowed value is 1 second and
|
||||
// the default is 30s.
|
||||
RetryInterval time.Duration `mapstructure:"retry_interval"`
|
||||
}
|
||||
|
||||
func (s *ServerJoin) Merge(b *ServerJoin) *ServerJoin {
|
||||
if s == nil {
|
||||
return b
|
||||
}
|
||||
|
||||
result := *s
|
||||
|
||||
if b == nil {
|
||||
return &result
|
||||
}
|
||||
|
||||
if len(b.StartJoin) != 0 {
|
||||
result.StartJoin = b.StartJoin
|
||||
}
|
||||
if len(b.RetryJoin) != 0 {
|
||||
result.RetryJoin = b.RetryJoin
|
||||
}
|
||||
if b.RetryMaxAttempts != 0 {
|
||||
result.RetryMaxAttempts = b.RetryMaxAttempts
|
||||
}
|
||||
if b.RetryInterval != 0 {
|
||||
result.RetryInterval = b.RetryInterval
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
// EncryptBytes returns the encryption key configured.
|
||||
@@ -601,13 +660,20 @@ func DefaultConfig() *Config {
|
||||
GCInodeUsageThreshold: 70,
|
||||
GCMaxAllocs: 50,
|
||||
NoHostUUID: helper.BoolToPtr(true),
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{},
|
||||
RetryInterval: 30 * time.Second,
|
||||
RetryMaxAttempts: 0,
|
||||
},
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: false,
|
||||
StartJoin: []string{},
|
||||
RetryJoin: []string{},
|
||||
RetryInterval: "30s",
|
||||
RetryMaxAttempts: 0,
|
||||
Enabled: false,
|
||||
StartJoin: []string{},
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{},
|
||||
RetryInterval: 30 * time.Second,
|
||||
RetryMaxAttempts: 0,
|
||||
},
|
||||
},
|
||||
ACL: &ACLConfig{
|
||||
Enabled: false,
|
||||
@@ -1036,9 +1102,8 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
|
||||
if b.RetryMaxAttempts != 0 {
|
||||
result.RetryMaxAttempts = b.RetryMaxAttempts
|
||||
}
|
||||
if b.RetryInterval != "" {
|
||||
if b.RetryInterval != 0 {
|
||||
result.RetryInterval = b.RetryInterval
|
||||
result.retryInterval = b.retryInterval
|
||||
}
|
||||
if b.RejoinAfterLeave {
|
||||
result.RejoinAfterLeave = true
|
||||
@@ -1055,6 +1120,9 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
|
||||
if b.EncryptKey != "" {
|
||||
result.EncryptKey = b.EncryptKey
|
||||
}
|
||||
if b.ServerJoin != nil {
|
||||
result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin)
|
||||
}
|
||||
|
||||
// Add the schedulers
|
||||
result.EnabledSchedulers = append(result.EnabledSchedulers, b.EnabledSchedulers...)
|
||||
@@ -1162,6 +1230,10 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
|
||||
result.ChrootEnv[k] = v
|
||||
}
|
||||
|
||||
if b.ServerJoin != nil {
|
||||
result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin)
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
|
||||
@@ -370,6 +370,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
|
||||
"gc_parallel_destroys",
|
||||
"gc_max_allocs",
|
||||
"no_host_uuid",
|
||||
"server_join",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
|
||||
return err
|
||||
@@ -385,6 +386,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
|
||||
delete(m, "chroot_env")
|
||||
delete(m, "reserved")
|
||||
delete(m, "stats")
|
||||
delete(m, "server_join")
|
||||
|
||||
var config ClientConfig
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||
@@ -448,6 +450,13 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse ServerJoin config
|
||||
if o := listVal.Filter("server_join"); len(o.Items) > 0 {
|
||||
if err := parseServerJoin(&config.ServerJoin, o); err != nil {
|
||||
return multierror.Prefix(err, "server_join->")
|
||||
}
|
||||
}
|
||||
|
||||
*result = &config
|
||||
return nil
|
||||
}
|
||||
@@ -531,16 +540,20 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error {
|
||||
"heartbeat_grace",
|
||||
"min_heartbeat_ttl",
|
||||
"max_heartbeats_per_second",
|
||||
"start_join",
|
||||
"retry_join",
|
||||
"retry_max",
|
||||
"retry_interval",
|
||||
"rejoin_after_leave",
|
||||
"encrypt",
|
||||
"authoritative_region",
|
||||
"non_voting_server",
|
||||
"redundancy_zone",
|
||||
"upgrade_version",
|
||||
|
||||
"server_join",
|
||||
|
||||
// For backwards compatibility
|
||||
"start_join",
|
||||
"retry_join",
|
||||
"retry_max",
|
||||
"retry_interval",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
|
||||
return err
|
||||
@@ -551,6 +564,8 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error {
|
||||
return err
|
||||
}
|
||||
|
||||
delete(m, "server_join")
|
||||
|
||||
var config ServerConfig
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||
@@ -570,10 +585,59 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse ServerJoin config
|
||||
if o := listVal.Filter("server_join"); len(o.Items) > 0 {
|
||||
if err := parseServerJoin(&config.ServerJoin, o); err != nil {
|
||||
return multierror.Prefix(err, "server_join->")
|
||||
}
|
||||
}
|
||||
|
||||
*result = &config
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseServerJoin(result **ServerJoin, list *ast.ObjectList) error {
|
||||
list = list.Elem()
|
||||
if len(list.Items) > 1 {
|
||||
return fmt.Errorf("only one 'server_join' block allowed")
|
||||
}
|
||||
|
||||
// Get our object
|
||||
listVal := list.Items[0].Val
|
||||
|
||||
// Check for invalid keys
|
||||
valid := []string{
|
||||
"start_join",
|
||||
"retry_join",
|
||||
"retry_max",
|
||||
"retry_interval",
|
||||
}
|
||||
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var m map[string]interface{}
|
||||
if err := hcl.DecodeObject(&m, listVal); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var serverJoinInfo ServerJoin
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||
WeaklyTypedInput: true,
|
||||
Result: &serverJoinInfo,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := dec.Decode(m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*result = &serverJoinInfo
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseACL(result **ACLConfig, list *ast.ObjectList) error {
|
||||
list = list.Elem()
|
||||
if len(list.Items) > 1 {
|
||||
|
||||
@@ -47,6 +47,11 @@ func TestConfig_Parse(t *testing.T) {
|
||||
AllocDir: "/tmp/alloc",
|
||||
Servers: []string{"a.b.c:80", "127.0.0.1:1234"},
|
||||
NodeClass: "linux-medium-64bit",
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"1.1.1.1", "2.2.2.2"},
|
||||
RetryInterval: time.Duration(15) * time.Second,
|
||||
RetryMaxAttempts: 3,
|
||||
},
|
||||
Meta: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "zip",
|
||||
@@ -99,13 +104,18 @@ func TestConfig_Parse(t *testing.T) {
|
||||
MaxHeartbeatsPerSecond: 11.0,
|
||||
RetryJoin: []string{"1.1.1.1", "2.2.2.2"},
|
||||
StartJoin: []string{"1.1.1.1", "2.2.2.2"},
|
||||
RetryInterval: "15s",
|
||||
RetryInterval: 15 * time.Second,
|
||||
RejoinAfterLeave: true,
|
||||
RetryMaxAttempts: 3,
|
||||
NonVotingServer: true,
|
||||
RedundancyZone: "foo",
|
||||
UpgradeVersion: "0.8.0",
|
||||
EncryptKey: "abc",
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"1.1.1.1", "2.2.2.2"},
|
||||
RetryInterval: time.Duration(15) * time.Second,
|
||||
RetryMaxAttempts: 3,
|
||||
},
|
||||
},
|
||||
ACL: &ACLConfig{
|
||||
Enabled: true,
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -264,8 +265,7 @@ func TestConfig_Merge(t *testing.T) {
|
||||
RejoinAfterLeave: true,
|
||||
StartJoin: []string{"1.1.1.1"},
|
||||
RetryJoin: []string{"1.1.1.1"},
|
||||
RetryInterval: "10s",
|
||||
retryInterval: time.Second * 10,
|
||||
RetryInterval: time.Second * 10,
|
||||
NonVotingServer: true,
|
||||
RedundancyZone: "bar",
|
||||
UpgradeVersion: "bar",
|
||||
@@ -907,3 +907,109 @@ func TestIsMissingPort(t *testing.T) {
|
||||
t.Errorf("expected no error, but got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeServerJoin(t *testing.T) {
|
||||
require := require.New(t)
|
||||
|
||||
{
|
||||
retryJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
startJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
retryMaxAttempts := 1
|
||||
retryInterval := time.Duration(0)
|
||||
|
||||
a := &ServerJoin{
|
||||
RetryJoin: retryJoin,
|
||||
StartJoin: startJoin,
|
||||
RetryMaxAttempts: retryMaxAttempts,
|
||||
RetryInterval: time.Duration(retryInterval),
|
||||
}
|
||||
b := &ServerJoin{}
|
||||
|
||||
result := a.Merge(b)
|
||||
require.Equal(result.RetryJoin, retryJoin)
|
||||
require.Equal(result.StartJoin, startJoin)
|
||||
require.Equal(result.RetryMaxAttempts, retryMaxAttempts)
|
||||
require.Equal(result.RetryInterval, retryInterval)
|
||||
}
|
||||
{
|
||||
retryJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
startJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
retryMaxAttempts := 1
|
||||
retryInterval := time.Duration(0)
|
||||
|
||||
a := &ServerJoin{}
|
||||
b := &ServerJoin{
|
||||
RetryJoin: retryJoin,
|
||||
StartJoin: startJoin,
|
||||
RetryMaxAttempts: retryMaxAttempts,
|
||||
RetryInterval: time.Duration(retryInterval),
|
||||
}
|
||||
|
||||
result := a.Merge(b)
|
||||
require.Equal(result.RetryJoin, retryJoin)
|
||||
require.Equal(result.StartJoin, startJoin)
|
||||
require.Equal(result.RetryMaxAttempts, retryMaxAttempts)
|
||||
require.Equal(result.RetryInterval, retryInterval)
|
||||
}
|
||||
{
|
||||
retryJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
startJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
retryMaxAttempts := 1
|
||||
retryInterval := time.Duration(0)
|
||||
|
||||
var a *ServerJoin
|
||||
b := &ServerJoin{
|
||||
RetryJoin: retryJoin,
|
||||
StartJoin: startJoin,
|
||||
RetryMaxAttempts: retryMaxAttempts,
|
||||
RetryInterval: time.Duration(retryInterval),
|
||||
}
|
||||
|
||||
result := a.Merge(b)
|
||||
require.Equal(result.RetryJoin, retryJoin)
|
||||
require.Equal(result.StartJoin, startJoin)
|
||||
require.Equal(result.RetryMaxAttempts, retryMaxAttempts)
|
||||
require.Equal(result.RetryInterval, retryInterval)
|
||||
}
|
||||
{
|
||||
retryJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
startJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
retryMaxAttempts := 1
|
||||
retryInterval := time.Duration(0)
|
||||
|
||||
a := &ServerJoin{
|
||||
RetryJoin: retryJoin,
|
||||
StartJoin: startJoin,
|
||||
RetryMaxAttempts: retryMaxAttempts,
|
||||
RetryInterval: time.Duration(retryInterval),
|
||||
}
|
||||
var b *ServerJoin
|
||||
|
||||
result := a.Merge(b)
|
||||
require.Equal(result.RetryJoin, retryJoin)
|
||||
require.Equal(result.StartJoin, startJoin)
|
||||
require.Equal(result.RetryMaxAttempts, retryMaxAttempts)
|
||||
require.Equal(result.RetryInterval, retryInterval)
|
||||
}
|
||||
{
|
||||
retryJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
startJoin := []string{"127.0.0.1", "127.0.0.2"}
|
||||
retryMaxAttempts := 1
|
||||
retryInterval := time.Duration(0)
|
||||
|
||||
a := &ServerJoin{
|
||||
RetryJoin: retryJoin,
|
||||
StartJoin: startJoin,
|
||||
}
|
||||
b := &ServerJoin{
|
||||
RetryMaxAttempts: retryMaxAttempts,
|
||||
RetryInterval: time.Duration(retryInterval),
|
||||
}
|
||||
|
||||
result := a.Merge(b)
|
||||
require.Equal(result.RetryJoin, retryJoin)
|
||||
require.Equal(result.StartJoin, startJoin)
|
||||
require.Equal(result.RetryMaxAttempts, retryMaxAttempts)
|
||||
require.Equal(result.RetryInterval, retryInterval)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -27,8 +28,17 @@ type DiscoverInterface interface {
|
||||
// retryJoiner is used to handle retrying a join until it succeeds or all of
|
||||
// its tries are exhausted.
|
||||
type retryJoiner struct {
|
||||
// join adds the specified servers to the serf cluster
|
||||
join func([]string) (int, error)
|
||||
// serverJoin adds the specified servers to the serf cluster
|
||||
serverJoin func([]string) (int, error)
|
||||
|
||||
// serverEnabled indicates whether the nomad agent will run in server mode
|
||||
serverEnabled bool
|
||||
|
||||
// clientJoin adds the specified servers to the serf cluster
|
||||
clientJoin func([]string) (int, error)
|
||||
|
||||
// clientEnabled indicates whether the nomad agent will run in client mode
|
||||
clientEnabled bool
|
||||
|
||||
// discover is of type Discover, where this is either the go-discover
|
||||
// implementation or a mock used for testing
|
||||
@@ -42,23 +52,62 @@ type retryJoiner struct {
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// Validate ensures that the configuration passes validity checks for the
|
||||
// retry_join stanza. If the configuration is not valid, returns an error that
|
||||
// will be displayed to the operator, otherwise nil.
|
||||
func (r *retryJoiner) Validate(config *Config) error {
|
||||
|
||||
// If retry_join is defined for the server, ensure that deprecated
|
||||
// fields and the server_join stanza are not both set
|
||||
if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 {
|
||||
if len(config.Server.RetryJoin) != 0 {
|
||||
return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza")
|
||||
}
|
||||
if len(config.Server.StartJoin) != 0 {
|
||||
return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza")
|
||||
}
|
||||
if config.Server.RetryMaxAttempts != 0 {
|
||||
return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza")
|
||||
}
|
||||
|
||||
if config.Server.RetryInterval != 0 {
|
||||
return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza")
|
||||
}
|
||||
|
||||
if len(config.Server.ServerJoin.StartJoin) != 0 {
|
||||
return fmt.Errorf("retry_join and start_join cannot both be defined")
|
||||
}
|
||||
}
|
||||
|
||||
// if retry_join is defined for the client, ensure that start_join is not
|
||||
// set as this configuration is only defined for servers.
|
||||
if config.Client != nil && config.Client.ServerJoin != nil {
|
||||
if config.Client.ServerJoin.StartJoin != nil {
|
||||
return fmt.Errorf("start_join is not supported for Nomad clients")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// retryJoin is used to handle retrying a join until it succeeds or all retries
|
||||
// are exhausted.
|
||||
func (r *retryJoiner) RetryJoin(config *Config) {
|
||||
if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
|
||||
func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
|
||||
if len(serverJoin.RetryJoin) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
attempt := 0
|
||||
|
||||
addrsToJoin := strings.Join(config.Server.RetryJoin, " ")
|
||||
addrsToJoin := strings.Join(serverJoin.RetryJoin, " ")
|
||||
r.logger.Printf("[INFO] agent: Joining cluster... %s", addrsToJoin)
|
||||
|
||||
for {
|
||||
var addrs []string
|
||||
var n int
|
||||
var err error
|
||||
|
||||
for _, addr := range config.Server.RetryJoin {
|
||||
for _, addr := range serverJoin.RetryJoin {
|
||||
switch {
|
||||
case strings.HasPrefix(addr, "provider="):
|
||||
servers, err := r.discover.Addrs(addr, r.logger)
|
||||
@@ -73,23 +122,33 @@ func (r *retryJoiner) RetryJoin(config *Config) {
|
||||
}
|
||||
|
||||
if len(addrs) > 0 {
|
||||
n, err := r.join(addrs)
|
||||
if err == nil {
|
||||
r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
|
||||
if r.serverEnabled && r.serverJoin != nil {
|
||||
n, err = r.serverJoin(addrs)
|
||||
if err == nil {
|
||||
r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n)
|
||||
return
|
||||
}
|
||||
}
|
||||
if r.clientEnabled && r.clientJoin != nil {
|
||||
n, err = r.clientJoin(addrs)
|
||||
if err == nil {
|
||||
r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
attempt++
|
||||
if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
|
||||
if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts {
|
||||
r.logger.Printf("[ERR] agent: max join retry exhausted, exiting")
|
||||
close(r.errCh)
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
|
||||
config.Server.RetryInterval)
|
||||
r.logger.Printf("[WARN] agent: Join failed: %q, retrying in %v", err,
|
||||
serverJoin.RetryInterval)
|
||||
}
|
||||
time.Sleep(config.Server.retryInterval)
|
||||
time.Sleep(serverJoin.RetryInterval)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/hashicorp/nomad/version"
|
||||
"github.com/mitchellh/cli"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -30,43 +30,37 @@ func (m *MockDiscover) Names() []string {
|
||||
|
||||
func TestRetryJoin_Integration(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Create two agents and have one retry join the other
|
||||
agent := NewTestAgent(t, t.Name(), nil)
|
||||
defer agent.Shutdown()
|
||||
|
||||
doneCh := make(chan struct{})
|
||||
shutdownCh := make(chan struct{})
|
||||
|
||||
defer func() {
|
||||
close(shutdownCh)
|
||||
<-doneCh
|
||||
}()
|
||||
agent2 := NewTestAgent(t, t.Name(), func(c *Config) {
|
||||
c.NodeName = "foo"
|
||||
if c.Server.ServerJoin == nil {
|
||||
c.Server.ServerJoin = &ServerJoin{}
|
||||
}
|
||||
c.Server.ServerJoin.RetryJoin = []string{agent.Config.normalizedAddrs.Serf}
|
||||
c.Server.ServerJoin.RetryInterval = 1 * time.Second
|
||||
})
|
||||
defer agent2.Shutdown()
|
||||
|
||||
// Create a fake command and have it wrap the second agent and run the retry
|
||||
// join handler
|
||||
cmd := &Command{
|
||||
Version: version.GetVersion(),
|
||||
ShutdownCh: shutdownCh,
|
||||
Ui: &cli.BasicUi{
|
||||
Reader: os.Stdin,
|
||||
Writer: os.Stdout,
|
||||
ErrorWriter: os.Stderr,
|
||||
},
|
||||
agent: agent2.Agent,
|
||||
}
|
||||
|
||||
serfAddr := agent.Config.normalizedAddrs.Serf
|
||||
|
||||
args := []string{
|
||||
"-dev",
|
||||
"-node", "foo",
|
||||
"-retry-join", serfAddr,
|
||||
"-retry-interval", "1s",
|
||||
if err := cmd.handleRetryJoin(agent2.Config); err != nil {
|
||||
t.Fatalf("handleRetryJoin failed: %v", err)
|
||||
}
|
||||
|
||||
go func() {
|
||||
if code := cmd.Run(args); code != 0 {
|
||||
t.Logf("bad: %d", code)
|
||||
}
|
||||
close(doneCh)
|
||||
}()
|
||||
|
||||
// Ensure the retry join occured.
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
mem := agent.server.Members()
|
||||
if len(mem) != 2 {
|
||||
@@ -78,16 +72,13 @@ func TestRetryJoin_Integration(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestRetryJoin_NonCloud(t *testing.T) {
|
||||
func TestRetryJoin_Server_NonCloud(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
newConfig := &Config{
|
||||
Server: &ServerConfig{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
Enabled: true,
|
||||
},
|
||||
serverJoin := &ServerJoin{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
}
|
||||
|
||||
var output []string
|
||||
@@ -98,28 +89,26 @@ func TestRetryJoin_NonCloud(t *testing.T) {
|
||||
}
|
||||
|
||||
joiner := retryJoiner{
|
||||
discover: &MockDiscover{},
|
||||
join: mockJoin,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
discover: &MockDiscover{},
|
||||
serverJoin: mockJoin,
|
||||
serverEnabled: true,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
joiner.RetryJoin(newConfig)
|
||||
joiner.RetryJoin(serverJoin)
|
||||
|
||||
require.Equal(1, len(output))
|
||||
require.Equal(stubAddress, output[0])
|
||||
}
|
||||
|
||||
func TestRetryJoin_Cloud(t *testing.T) {
|
||||
func TestRetryJoin_Server_Cloud(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
newConfig := &Config{
|
||||
Server: &ServerConfig{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"provider=aws, tag_value=foo"},
|
||||
Enabled: true,
|
||||
},
|
||||
serverJoin := &ServerJoin{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"provider=aws, tag_value=foo"},
|
||||
}
|
||||
|
||||
var output []string
|
||||
@@ -131,29 +120,27 @@ func TestRetryJoin_Cloud(t *testing.T) {
|
||||
|
||||
mockDiscover := &MockDiscover{}
|
||||
joiner := retryJoiner{
|
||||
discover: mockDiscover,
|
||||
join: mockJoin,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
discover: mockDiscover,
|
||||
serverJoin: mockJoin,
|
||||
serverEnabled: true,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
joiner.RetryJoin(newConfig)
|
||||
joiner.RetryJoin(serverJoin)
|
||||
|
||||
require.Equal(1, len(output))
|
||||
require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs)
|
||||
require.Equal(stubAddress, output[0])
|
||||
}
|
||||
|
||||
func TestRetryJoin_MixedProvider(t *testing.T) {
|
||||
func TestRetryJoin_Server_MixedProvider(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
newConfig := &Config{
|
||||
Server: &ServerConfig{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"provider=aws, tag_value=foo", "127.0.0.1"},
|
||||
Enabled: true,
|
||||
},
|
||||
serverJoin := &ServerJoin{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"provider=aws, tag_value=foo", "127.0.0.1"},
|
||||
}
|
||||
|
||||
var output []string
|
||||
@@ -165,15 +152,197 @@ func TestRetryJoin_MixedProvider(t *testing.T) {
|
||||
|
||||
mockDiscover := &MockDiscover{}
|
||||
joiner := retryJoiner{
|
||||
discover: mockDiscover,
|
||||
join: mockJoin,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
discover: mockDiscover,
|
||||
serverJoin: mockJoin,
|
||||
serverEnabled: true,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
joiner.RetryJoin(newConfig)
|
||||
joiner.RetryJoin(serverJoin)
|
||||
|
||||
require.Equal(2, len(output))
|
||||
require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs)
|
||||
require.Equal(stubAddress, output[0])
|
||||
}
|
||||
|
||||
func TestRetryJoin_Client(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
serverJoin := &ServerJoin{
|
||||
RetryMaxAttempts: 1,
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
}
|
||||
|
||||
var output []string
|
||||
|
||||
mockJoin := func(s []string) (int, error) {
|
||||
output = s
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
joiner := retryJoiner{
|
||||
discover: &MockDiscover{},
|
||||
clientJoin: mockJoin,
|
||||
clientEnabled: true,
|
||||
logger: log.New(ioutil.Discard, "", 0),
|
||||
errCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
joiner.RetryJoin(serverJoin)
|
||||
|
||||
require.Equal(1, len(output))
|
||||
require.Equal(stubAddress, output[0])
|
||||
}
|
||||
|
||||
func TestRetryJoin_Validate(t *testing.T) {
|
||||
t.Parallel()
|
||||
type validateExpect struct {
|
||||
config *Config
|
||||
isValid bool
|
||||
reason string
|
||||
}
|
||||
|
||||
scenarios := []*validateExpect{
|
||||
{
|
||||
config: &Config{
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
StartJoin: []string{},
|
||||
},
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
StartJoin: []string{},
|
||||
},
|
||||
},
|
||||
isValid: false,
|
||||
reason: "server_join cannot be defined if retry_join is defined on the server stanza",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
StartJoin: []string{},
|
||||
},
|
||||
StartJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
RetryJoin: []string{},
|
||||
},
|
||||
},
|
||||
isValid: false,
|
||||
reason: "server_join cannot be defined if start_join is defined on the server stanza",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
StartJoin: []string{},
|
||||
},
|
||||
StartJoin: []string{},
|
||||
RetryMaxAttempts: 1,
|
||||
RetryInterval: 0,
|
||||
RetryJoin: []string{},
|
||||
},
|
||||
},
|
||||
isValid: false,
|
||||
reason: "server_join cannot be defined if retry_max_attempts is defined on the server stanza",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: time.Duration(1),
|
||||
StartJoin: []string{},
|
||||
},
|
||||
StartJoin: []string{},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 3 * time.Second,
|
||||
RetryJoin: []string{},
|
||||
},
|
||||
},
|
||||
isValid: false,
|
||||
reason: "server_join cannot be defined if retry_interval is defined on the server stanza",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
StartJoin: []string{"127.0.0.1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
isValid: false,
|
||||
reason: "start_join and retry_join should not both be defined",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Client: &ClientConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
StartJoin: []string{"127.0.0.1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
isValid: false,
|
||||
reason: "start_join should not be defined on the client",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Client: &ClientConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 0,
|
||||
RetryInterval: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
isValid: true,
|
||||
reason: "client server_join should be valid",
|
||||
},
|
||||
{
|
||||
config: &Config{
|
||||
Server: &ServerConfig{
|
||||
ServerJoin: &ServerJoin{
|
||||
RetryJoin: []string{"127.0.0.1"},
|
||||
RetryMaxAttempts: 1,
|
||||
RetryInterval: 1,
|
||||
StartJoin: []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
isValid: true,
|
||||
reason: "server server_join should be valid",
|
||||
},
|
||||
}
|
||||
|
||||
joiner := retryJoiner{}
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.reason, func(t *testing.T) {
|
||||
err := joiner.Validate(scenario.config)
|
||||
if scenario.isValid {
|
||||
require.NoError(t, err)
|
||||
} else {
|
||||
require.Error(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
136
website/source/docs/agent/cloud_auto_join.html.md
Normal file
136
website/source/docs/agent/cloud_auto_join.html.md
Normal file
@@ -0,0 +1,136 @@
|
||||
---
|
||||
layout: "docs"
|
||||
page_title: "Cloud Auto-join"
|
||||
sidebar_current: "docs-agent-cloud-auto-join"
|
||||
description: |-
|
||||
Nomad supports automatic cluster joining using cloud metadata from various cloud providers
|
||||
---
|
||||
|
||||
# Cloud Auto-joining
|
||||
|
||||
As of Nomad 0.8.4,
|
||||
[`retry_join`](/docs/agent/configuration/server_join.html#retry_join) accepts a
|
||||
unified interface using the
|
||||
[go-discover](https://github.com/hashicorp/go-discover) library for doing
|
||||
automatic cluster joining using cloud metadata. To use retry-join with a
|
||||
supported cloud provider, specify the configuration on the command line or
|
||||
configuration file as a `key=value key=value ...` string.
|
||||
|
||||
Values are taken literally and must not be URL
|
||||
encoded. If the values contain spaces, backslashes or double quotes then
|
||||
they need to be double quoted and the usual escaping rules apply.
|
||||
|
||||
```json
|
||||
{
|
||||
"retry_join": ["provider=my-cloud config=val config2=\"some other val\" ..."]
|
||||
}
|
||||
```
|
||||
|
||||
The cloud provider-specific configurations are detailed below. This can be
|
||||
combined with static IP or DNS addresses or even multiple configurations
|
||||
for different providers.
|
||||
|
||||
In order to use discovery behind a proxy, you will need to set
|
||||
`HTTP_PROXY`, `HTTPS_PROXY` and `NO_PROXY` environment variables per
|
||||
[Golang `net/http` library](https://golang.org/pkg/net/http/#ProxyFromEnvironment).
|
||||
|
||||
The following sections give the options specific to a subset of supported cloud
|
||||
provider. For information on all providers, see further documentation in
|
||||
[go-discover](https://github.com/hashicorp/go-discover).
|
||||
|
||||
### Amazon EC2
|
||||
|
||||
This returns the first private IP address of all servers in the given
|
||||
region which have the given `tag_key` and `tag_value`.
|
||||
|
||||
|
||||
```json
|
||||
{
|
||||
"retry_join": ["provider=aws tag_key=... tag_value=..."]
|
||||
}
|
||||
```
|
||||
|
||||
- `provider` (required) - the name of the provider ("aws" in this case).
|
||||
- `tag_key` (required) - the key of the tag to auto-join on.
|
||||
- `tag_value` (required) - the value of the tag to auto-join on.
|
||||
- `region` (optional) - the AWS region to authenticate in.
|
||||
- `addr_type` (optional) - the type of address to discover: `private_v4`, `public_v4`, `public_v6`. Default is `private_v4`. (>= 1.0)
|
||||
- `access_key_id` (optional) - the AWS access key for authentication (see below for more information about authenticating).
|
||||
- `secret_access_key` (optional) - the AWS secret access key for authentication (see below for more information about authenticating).
|
||||
|
||||
#### Authentication & Precedence
|
||||
|
||||
- Static credentials `access_key_id=... secret_access_key=...`
|
||||
- Environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`)
|
||||
- Shared credentials file (`~/.aws/credentials` or the path specified by `AWS_SHARED_CREDENTIALS_FILE`)
|
||||
- ECS task role metadata (container-specific).
|
||||
- EC2 instance role metadata.
|
||||
|
||||
The only required IAM permission is `ec2:DescribeInstances`, and it is
|
||||
recommended that you make a dedicated key used only for auto-joining. If the
|
||||
region is omitted it will be discovered through the local instance's [EC2
|
||||
metadata
|
||||
endpoint](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html).
|
||||
|
||||
### Microsoft Azure
|
||||
|
||||
This returns the first private IP address of all servers in the given region
|
||||
which have the given `tag_key` and `tag_value` in the tenant and subscription, or in
|
||||
the given `resource_group` of a `vm_scale_set` for Virtual Machine Scale Sets.
|
||||
|
||||
|
||||
```json
|
||||
{
|
||||
"retry_join": ["provider=azure tag_name=... tag_value=... tenant_id=... client_id=... subscription_id=... secret_access_key=..."]
|
||||
}
|
||||
```
|
||||
|
||||
- `provider` (required) - the name of the provider ("azure" in this case).
|
||||
- `tenant_id` (required) - the tenant to join machines in.
|
||||
- `client_id` (required) - the client to authenticate with.
|
||||
- `secret_access_key` (required) - the secret client key.
|
||||
|
||||
Use these configuration parameters when using tags:
|
||||
- `tag_name` - the name of the tag to auto-join on.
|
||||
- `tag_value` - the value of the tag to auto-join on.
|
||||
|
||||
Use these configuration parameters when using Virtual Machine Scale Sets (Consul 1.0.3 and later):
|
||||
- `resource_group` - the name of the resource group to filter on.
|
||||
- `vm_scale_set` - the name of the virtual machine scale set to filter on.
|
||||
|
||||
When using tags the only permission needed is the `ListAll` method for `NetworkInterfaces`. When using
|
||||
Virtual Machine Scale Sets the only role action needed is `Microsoft.Compute/virtualMachineScaleSets/*/read`.
|
||||
|
||||
### Google Compute Engine
|
||||
|
||||
This returns the first private IP address of all servers in the given
|
||||
project which have the given `tag_value`.
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"retry_join": ["provider=gce project_name=... tag_value=..."]
|
||||
}
|
||||
```
|
||||
|
||||
- `provider` (required) - the name of the provider ("gce" in this case).
|
||||
- `tag_value` (required) - the value of the tag to auto-join on.
|
||||
- `project_name` (optional) - the name of the project to auto-join on. Discovered if not set.
|
||||
- `zone_pattern` (optional) - the list of zones can be restricted through an RE2 compatible regular expression. If omitted, servers in all zones are returned.
|
||||
- `credentials_file` (optional) - the credentials file for authentication. See below for more information.
|
||||
|
||||
#### Authentication & Precedence
|
||||
|
||||
- Use credentials from `credentials_file`, if provided.
|
||||
- Use JSON file from `GOOGLE_APPLICATION_CREDENTIALS` environment variable.
|
||||
- Use JSON file in a location known to the gcloud command-line tool.
|
||||
- On Windows, this is `%APPDATA%/gcloud/application_default_credentials.json`.
|
||||
- On other systems, `$HOME/.config/gcloud/application_default_credentials.json`.
|
||||
- On Google Compute Engine, use credentials from the metadata
|
||||
server. In this final case any provided scopes are ignored.
|
||||
|
||||
Discovery requires a [GCE Service
|
||||
Account](https://cloud.google.com/compute/docs/access/service-accounts).
|
||||
Credentials are searched using the following paths, in order of precedence.
|
||||
|
||||
|
||||
@@ -90,6 +90,12 @@ client {
|
||||
receive work. This may be specified as an IP address or DNS, with or without
|
||||
the port. If the port is omitted, the default port of `4647` is used.
|
||||
|
||||
- `server_join` <code>([server_join][server-join]: nil)</code> - Specifies
|
||||
how the Nomad client will connect to Nomad servers. The `start_join` field
|
||||
is not supported on the client. The retry_join fields may directly specify
|
||||
the server address or use go-discover syntax for auto-discovery. See the
|
||||
documentation for more detail.
|
||||
|
||||
- `state_dir` `(string: "[data_dir]/client")` - Specifies the directory to use
|
||||
to store client state. By default, this is - the top-level
|
||||
[data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with
|
||||
@@ -307,7 +313,11 @@ cluster.
|
||||
```hcl
|
||||
client {
|
||||
enabled = true
|
||||
servers = ["1.2.3.4:4647", "5.6.7.8:4647"]
|
||||
server_join {
|
||||
retry_join = [ "1.1.1.1", "2.2.2.2" ]
|
||||
retry_max = 3
|
||||
retry_interval = "15s"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -346,3 +356,4 @@ client {
|
||||
}
|
||||
}
|
||||
```
|
||||
[server-join]: /docs/agent/configuration/server_join.html "Server Join"
|
||||
|
||||
@@ -28,7 +28,11 @@ join failures, and more.
|
||||
server {
|
||||
enabled = true
|
||||
bootstrap_expect = 3
|
||||
retry_join = ["1.2.3.4", "5.6.7.8"]
|
||||
server_join {
|
||||
retry_join = [ "1.1.1.1", "2.2.2.2" ]
|
||||
retry_max = 3
|
||||
retry_interval = "15s"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -102,9 +106,9 @@ server {
|
||||
second is a tradeoff as it lowers failure detection time of nodes at the
|
||||
tradeoff of false positives and increased load on the leader.
|
||||
|
||||
- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether
|
||||
this server will act as a non-voting member of the cluster to help provide
|
||||
read scalability.
|
||||
- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether
|
||||
this server will act as a non-voting member of the cluster to help provide
|
||||
read scalability.
|
||||
|
||||
- `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel
|
||||
scheduler threads to run. This can be as many as one per core, or `0` to
|
||||
@@ -131,6 +135,17 @@ server {
|
||||
cluster again when starting. This flag allows the previous state to be used to
|
||||
rejoin the cluster.
|
||||
|
||||
- `server_join` <code>([server_join][server-join]: nil)</code> - Specifies
|
||||
how the Nomad server will connect to other Nomad servers. The `retry_join`
|
||||
fields may directly specify the server address or use go-discover syntax for
|
||||
auto-discovery. See the [server_join documentation][server-join] for more detail.
|
||||
|
||||
- `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use
|
||||
in place of the Nomad version when custom upgrades are enabled in Autopilot.
|
||||
For more information, see the [Autopilot Guide](/guides/cluster/autopilot.html).
|
||||
|
||||
### Deprecated Parameters
|
||||
|
||||
- `retry_join` `(array<string>: [])` - Specifies a list of server addresses to
|
||||
retry joining if the first attempt fails. This is similar to
|
||||
[`start_join`](#start_join), but only invokes if the initial join attempt
|
||||
@@ -138,63 +153,25 @@ server {
|
||||
succeeds. After one succeeds, no further addresses will be contacted. This is
|
||||
useful for cases where we know the address will become available eventually.
|
||||
Use `retry_join` with an array as a replacement for `start_join`, **do not use
|
||||
both options**. See the [server address format](#server-address-format)
|
||||
section for more information on the format of the string.
|
||||
both options**. See the [server_join][server-join]
|
||||
section for more information on the format of the string. This field is
|
||||
deprecated in favor of the [server_join stanza][server-join].
|
||||
|
||||
- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry
|
||||
join attempts.
|
||||
join attempts. This field is deprecated in favor of the [server_join
|
||||
stanza][server-join].
|
||||
|
||||
- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be
|
||||
made before exiting with a return code of 1. By default, this is set to 0
|
||||
which is interpreted as infinite retries.
|
||||
which is interpreted as infinite retries. This field is deprecated in favor of
|
||||
the [server_join stanza][server-join].
|
||||
|
||||
- `start_join` `(array<string>: [])` - Specifies a list of server addresses to
|
||||
join on startup. If Nomad is unable to join with any of the specified
|
||||
addresses, agent startup will fail. See the
|
||||
[server address format](#server-address-format) section for more information
|
||||
on the format of the string.
|
||||
|
||||
- `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use
|
||||
in place of the Nomad version when custom upgrades are enabled in Autopilot.
|
||||
For more information, see the [Autopilot Guide](/guides/cluster/autopilot.html).
|
||||
|
||||
### Server Address Format
|
||||
|
||||
This section describes the acceptable syntax and format for describing the
|
||||
location of a Nomad server. There are many ways to reference a Nomad server,
|
||||
including directly by IP address and resolving through DNS.
|
||||
|
||||
#### Directly via IP Address
|
||||
|
||||
It is possible to address another Nomad server using its IP address. This is
|
||||
done in the `ip:port` format, such as:
|
||||
|
||||
```
|
||||
1.2.3.4:5678
|
||||
```
|
||||
|
||||
If the port option is omitted, it defaults to the Serf port, which is 4648
|
||||
unless configured otherwise:
|
||||
|
||||
```
|
||||
1.2.3.4 => 1.2.3.4:4648
|
||||
```
|
||||
|
||||
#### Via Domains or DNS
|
||||
|
||||
It is possible to address another Nomad server using its DNS address. This is
|
||||
done in the `address:port` format, such as:
|
||||
|
||||
```
|
||||
nomad-01.company.local:5678
|
||||
```
|
||||
|
||||
If the port option is omitted, it defaults to the Serf port, which is 4648
|
||||
unless configured otherwise:
|
||||
|
||||
```
|
||||
nomad-01.company.local => nomad-01.company.local:4648
|
||||
```
|
||||
addresses, agent startup will fail. See the [server address
|
||||
format](/docs/agent/configuration/server_join.html#server-address-format)
|
||||
section for more information on the format of the string. This field is
|
||||
deprecated in favor of the [server_join stanza][server-join].
|
||||
|
||||
## `server` Examples
|
||||
|
||||
@@ -242,3 +219,4 @@ server {
|
||||
```
|
||||
|
||||
[encryption]: /docs/agent/encryption.html "Nomad Agent Encryption"
|
||||
[server-join]: /docs/agent/configuration/server_join.html "Server Join"
|
||||
|
||||
131
website/source/docs/agent/configuration/server_join.html.md
Normal file
131
website/source/docs/agent/configuration/server_join.html.md
Normal file
@@ -0,0 +1,131 @@
|
||||
---
|
||||
layout: "docs"
|
||||
page_title: "server_join Stanza - Agent Configuration"
|
||||
sidebar_current: "docs-agent-configuration--server-join"
|
||||
description: |-
|
||||
The "server_join" stanza specifies how the Nomad agent will discover and connect to Nomad servers.
|
||||
---
|
||||
|
||||
# `server_join` Stanza
|
||||
|
||||
<table class="table table-bordered table-striped">
|
||||
<tr>
|
||||
<th width="120">Placement</th>
|
||||
<td>
|
||||
<code>server -> **server_join**</code>
|
||||
<br>
|
||||
<code>client -> **server_join**</code>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
The `server_join` stanza specifies how the Nomad agent will discover and connect
|
||||
to Nomad servers.
|
||||
|
||||
```hcl
|
||||
server_join {
|
||||
retry_join = [ "1.1.1.1", "2.2.2.2" ]
|
||||
retry_max = 3
|
||||
retry_interval = "15s"
|
||||
}
|
||||
```
|
||||
|
||||
## `server_join` Parameters
|
||||
|
||||
- `retry_join` `(array<string>: [])` - Specifies a list of server addresses to
|
||||
join. This is similar to [`start_join`](#start_join), but will continue to
|
||||
be attempted even if the initial join attempt fails, up to
|
||||
[retry_max](#retry_max). Further, `retry_join` is available to
|
||||
both Nomad servers and clients, while `start_join` is only defined for Nomad
|
||||
servers. This is useful for cases where we know the address will become
|
||||
available eventually. Use `retry_join` with an array as a replacement for
|
||||
`start_join`, **do not use both options**.
|
||||
|
||||
Address format includes both using IP addresses as well as an interface to the
|
||||
[go-discover](https://github.com/hashicorp/go-discover) library for doing
|
||||
automated cluster joining using cloud metadata. See [Cloud
|
||||
Auto-join][cloud_auto_join] for more information.
|
||||
|
||||
```
|
||||
server_join {
|
||||
retry_join = [ "1.1.1.1", "2.2.2.2" ]
|
||||
}
|
||||
```
|
||||
|
||||
Using the `go-discover` interface, this can be defined both in a client or
|
||||
server configuration as well as provided as a command-line argument.
|
||||
|
||||
```
|
||||
server_join {
|
||||
retry_join = [ "provider=aws tag_key=..." ]
|
||||
}
|
||||
```
|
||||
|
||||
See the [server address format](#server-address-format) for more information
|
||||
about expected server address formats.
|
||||
|
||||
- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry
|
||||
join attempts.
|
||||
|
||||
- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be
|
||||
made before exiting with a return code of 1. By default, this is set to 0
|
||||
which is interpreted as infinite retries.
|
||||
|
||||
- `start_join` `(array<string>: [])` - Specifies a list of server addresses to
|
||||
join on startup. If Nomad is unable to join with any of the specified
|
||||
addresses, agent startup will fail. See the
|
||||
[server address format](#server-address-format) section for more information
|
||||
on the format of the string. This field is defined only for Nomad servers and
|
||||
will result in a configuration parse error if included in a client
|
||||
configuration.
|
||||
|
||||
## Server Address Format
|
||||
|
||||
This section describes the acceptable syntax and format for describing the
|
||||
location of a Nomad server. There are many ways to reference a Nomad server,
|
||||
including directly by IP address and resolving through DNS.
|
||||
|
||||
### Directly via IP Address
|
||||
|
||||
It is possible to address another Nomad server using its IP address. This is
|
||||
done in the `ip:port` format, such as:
|
||||
|
||||
```
|
||||
1.2.3.4:5678
|
||||
```
|
||||
|
||||
If the port option is omitted, it defaults to the Serf port, which is 4648
|
||||
unless configured otherwise:
|
||||
|
||||
```
|
||||
1.2.3.4 => 1.2.3.4:4648
|
||||
```
|
||||
|
||||
### Via Domains or DNS
|
||||
|
||||
It is possible to address another Nomad server using its DNS address. This is
|
||||
done in the `address:port` format, such as:
|
||||
|
||||
```
|
||||
nomad-01.company.local:5678
|
||||
```
|
||||
|
||||
If the port option is omitted, it defaults to the Serf port, which is 4648
|
||||
unless configured otherwise:
|
||||
|
||||
```
|
||||
nomad-01.company.local => nomad-01.company.local:4648
|
||||
```
|
||||
|
||||
### Via the go-discover interface
|
||||
|
||||
As of Nomad 0.8.4, `retry_join` accepts a unified interface using the
|
||||
[go-discover](https://github.com/hashicorp/go-discover) library for doing
|
||||
automated cluster joining using cloud metadata. See [Cloud
|
||||
Auto-join][cloud_auto_join] for more information.
|
||||
|
||||
```
|
||||
"provider=aws tag_key=..." => 1.2.3.4:4648
|
||||
```
|
||||
|
||||
[cloud_auto_join]: /docs/agent/cloud_auto_join.html "Nomad Cloud Auto-join"
|
||||
@@ -71,7 +71,15 @@ via CLI arguments. The `agent` command accepts the following arguments:
|
||||
* `-region=<region>`: Equivalent to the [region](#region) config option.
|
||||
* `-rejoin`: Equivalent to the [rejoin_after_leave](#rejoin_after_leave) config option.
|
||||
* `-retry-interval`: Equivalent to the [retry_interval](#retry_interval) config option.
|
||||
* `-retry-join`: Similar to `-join` but allows retrying a join if the first attempt fails.
|
||||
* `-retry-join`: Similar to `-join` but allows retrying a join if the first attempt fails.
|
||||
|
||||
```sh
|
||||
$ nomad agent -retry-join "127.0.0.1:4648"
|
||||
```
|
||||
|
||||
`retry-join` can be defined as a command line flag only for servers. Clients
|
||||
can configure `retry-join` only in configuration files.
|
||||
|
||||
* `-retry-max`: Similar to the [retry_max](#retry_max) config option.
|
||||
* `-server`: Enable server mode on the local agent.
|
||||
* `-servers=<host:port>`: Equivalent to the Client [servers](#servers) config
|
||||
|
||||
@@ -31,7 +31,9 @@ server {
|
||||
bootstrap_expect = 3
|
||||
|
||||
# This is the IP address of the first server we provisioned
|
||||
retry_join = ["<known-address>:4648"]
|
||||
server_join {
|
||||
retry_join = ["<known-address>:4648"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -396,6 +396,9 @@
|
||||
<a href="/docs/agent/index.html">Nomad Agent</a>
|
||||
|
||||
<ul class="nav">
|
||||
<li <%= sidebar_current("docs-agent-cloud-auto-join") %>>
|
||||
<a href="/docs/agent/cloud_auto_join.html">Cloud Auto-join</a>
|
||||
</li>
|
||||
<li<%= sidebar_current("docs-agent-configuration") %>>
|
||||
<a href="/docs/agent/configuration/index.html">Configuration</a>
|
||||
<ul class="nav">
|
||||
@@ -417,6 +420,9 @@
|
||||
<li <%= sidebar_current("docs-agent-configuration-server") %>>
|
||||
<a href="/docs/agent/configuration/server.html">server</a>
|
||||
</li>
|
||||
<li <%= sidebar_current("docs-agent-configuration--server-join") %>>
|
||||
<a href="/docs/agent/configuration/server_join.html">server_join</a>
|
||||
</li>
|
||||
<li <%= sidebar_current("docs-agent-configuration-telemetry") %>>
|
||||
<a href="/docs/agent/configuration/telemetry.html">telemetry</a>
|
||||
</li>
|
||||
@@ -428,6 +434,7 @@
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-encryption") %>>
|
||||
<a href="/docs/agent/encryption.html">Encryption</a>
|
||||
</li>
|
||||
|
||||
Reference in New Issue
Block a user