From f66eb83fc035e250569e13d392dab80e1b4cc9a4 Mon Sep 17 00:00:00 2001 From: Adriano Caloiaro Date: Wed, 15 Nov 2023 08:07:18 -0700 Subject: [PATCH] Add `go-netaddrs` support to `retry_join` (#18745) --- .changelog/18745.txt | 3 + command/agent/command.go | 7 +- command/agent/retry_join.go | 88 ++++++++++--- command/agent/retry_join_test.go | 87 +++++++++++-- go.mod | 1 + go.sum | 2 + .../docs/configuration/server_join.mdx | 120 ++++++++++++++---- 7 files changed, 248 insertions(+), 60 deletions(-) create mode 100644 .changelog/18745.txt diff --git a/.changelog/18745.txt b/.changelog/18745.txt new file mode 100644 index 000000000..c6d6293b4 --- /dev/null +++ b/.changelog/18745.txt @@ -0,0 +1,3 @@ +```release-note:improvement +config: Add `go-netaddrs` support to `server_join.retry_join` +``` diff --git a/command/agent/command.go b/command/agent/command.go index d3fd4de32..7d4d3febc 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -310,7 +310,6 @@ func (c *Command) readConfig() *Config { } func (c *Command) IsValidConfig(config, cmdConfig *Config) bool { - // Check that the server is running in at least one mode. if !(config.Server.Enabled || config.Client.Enabled) { c.Ui.Error("Must specify either server, client or dev mode for the agent.") @@ -887,7 +886,7 @@ func (c *Command) handleRetryJoin(config *Config) error { if config.Server.Enabled && len(config.Server.RetryJoin) != 0 { joiner := retryJoiner{ - discover: &discover.Discover{}, + autoDiscover: autoDiscover{goDiscover: &discover.Discover{}, netAddrs: &netAddrs{}}, errCh: c.retryJoinErrCh, logger: c.agent.logger.Named("joiner"), serverJoin: c.agent.server.Join, @@ -920,7 +919,7 @@ func (c *Command) handleRetryJoin(config *Config) error { len(config.Server.ServerJoin.RetryJoin) != 0 { joiner := retryJoiner{ - discover: &discover.Discover{}, + autoDiscover: autoDiscover{goDiscover: &discover.Discover{}, netAddrs: &netAddrs{}}, errCh: c.retryJoinErrCh, logger: c.agent.logger.Named("joiner"), serverJoin: c.agent.server.Join, @@ -938,7 +937,7 @@ func (c *Command) handleRetryJoin(config *Config) error { config.Client.ServerJoin != nil && len(config.Client.ServerJoin.RetryJoin) != 0 { joiner := retryJoiner{ - discover: &discover.Discover{}, + autoDiscover: autoDiscover{goDiscover: &discover.Discover{}, netAddrs: &netAddrs{}}, errCh: c.retryJoinErrCh, logger: c.agent.logger.Named("joiner"), clientJoin: c.agent.client.SetServers, diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index a431c1be6..228dfc44d 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -4,14 +4,22 @@ package agent import ( + "context" "fmt" golog "log" + "net" "strings" "time" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-netaddrs" ) +// AutoDiscoverInterface is an interface for autoDiscover to ease testing +type AutoDiscoverInterface interface { + Addrs(cfg string, logger log.Logger) ([]string, error) +} + // DiscoverInterface is an interface for the Discover type in the go-discover // library. Using an interface allows for ease of testing. type DiscoverInterface interface { @@ -30,6 +38,63 @@ type DiscoverInterface interface { Names() []string } +// NetaddrsInterface is an interface for go-netaddrs to ease testing +type NetaddrsInterface interface { + IPAddrs(ctx context.Context, cfg string, l netaddrs.Logger) ([]net.IPAddr, error) +} + +type netAddrs struct{} + +func (n *netAddrs) IPAddrs(ctx context.Context, cfg string, l netaddrs.Logger) ([]net.IPAddr, error) { + return netaddrs.IPAddrs(ctx, cfg, l) +} + +// autoDiscover uses go-netaddrs and go-discover to discover IP addresses when +// auto-joining clusters +// +// autoDiscover implements AutoDiscoverInterface +type autoDiscover struct { + netAddrs NetaddrsInterface + goDiscover DiscoverInterface +} + +// Addrs looks up and returns IP addresses specified by cfg. +// +// If cfg has an exec= prefix, IP addresses are looked up by executing the command +// after exec=. The command may include optional arguments. Command arguments +// must be space separated (spaces in argument values can not be escaped). +// The command may output IPv4 or IPv6 addresses, and IPv6 addresses can +// optionally include a zone index. +// +// The executable must follow these rules: +// +// on success - exit 0 and print whitespace delimited IP addresses to stdout. +// on failure - exits with a non-zero code, and should print an error message +// of up to 1024 bytes to stderr. +// +// If cfg has a provider= prefix, IP addresses are looked up using the go-discover +// provider specified in cfg. +// +// If cfg contains neither an exec= or provider= prefix, the configuration is +// returned as-is, to be resolved later via Serf in the server's Join() function, +// or via DNS in client's SetServers() function. +func (d autoDiscover) Addrs(cfg string, logger log.Logger) (addrs []string, err error) { + var ipAddrs []net.IPAddr + switch { + case strings.HasPrefix(cfg, "exec="): + ipAddrs, err = d.netAddrs.IPAddrs(context.Background(), cfg, logger) + for _, addr := range ipAddrs { + addrs = append(addrs, addr.IP.String()) + } + case strings.HasPrefix(cfg, "provider="): + addrs, err = d.goDiscover.Addrs(cfg, logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true})) + default: + return []string{cfg}, err + } + + return +} + // retryJoiner is used to handle retrying a join until it succeeds or all of // its tries are exhausted. type retryJoiner struct { @@ -45,9 +110,8 @@ type retryJoiner struct { // clientEnabled indicates whether the nomad agent will run in client mode clientEnabled bool - // discover is of type Discover, where this is either the go-discover - // implementation or a mock used for testing - discover DiscoverInterface + // autoDiscover is either an agent.autoDiscover, or a mock used for testing + autoDiscover AutoDiscoverInterface // errCh is used to communicate with the agent when the max retry attempt // limit has been reached @@ -61,7 +125,6 @@ type retryJoiner struct { // retry_join block. If the configuration is not valid, returns an error that // will be displayed to the operator, otherwise nil. func (r *retryJoiner) Validate(config *Config) error { - // If retry_join is defined for the server, ensure that deprecated // fields and the server_join block are not both set if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 { @@ -107,24 +170,19 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { addrsToJoin := strings.Join(serverJoin.RetryJoin, " ") r.logger.Info("starting retry join", "servers", addrsToJoin) - standardLogger := r.logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true}) for { var addrs []string var n int var err error for _, addr := range serverJoin.RetryJoin { - switch { - case strings.HasPrefix(addr, "provider="): - servers, err := r.discover.Addrs(addr, standardLogger) - if err != nil { - r.logger.Error("determining join addresses failed", "error", err) - } else { - addrs = append(addrs, servers...) - } - default: - addrs = append(addrs, addr) + servers, err := r.autoDiscover.Addrs(addr, r.logger) + if err != nil { + r.logger.Error("discovering join addresses failed", "join_config", addr, "error", err) + return } + + addrs = append(addrs, servers...) } if len(addrs) > 0 { diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index a428f5611..bbe12eed2 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -4,27 +4,31 @@ package agent import ( + "context" "fmt" - "log" + golog "log" + "net" "os" "testing" "time" + "github.com/hashicorp/go-netaddrs" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/testutil" "github.com/mitchellh/cli" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) -type MockDiscover struct { - ReceivedAddrs string -} - const stubAddress = "127.0.0.1" -func (m *MockDiscover) Addrs(s string, l *log.Logger) ([]string, error) { - m.ReceivedAddrs = s +type MockDiscover struct { + ReceivedConfig string +} + +func (m *MockDiscover) Addrs(s string, l *golog.Logger) ([]string, error) { + m.ReceivedConfig = s return []string{stubAddress}, nil } func (m *MockDiscover) Help() string { return "" } @@ -32,6 +36,21 @@ func (m *MockDiscover) Names() []string { return []string{""} } +type MockNetaddrs struct { + ReceivedConfig []string +} + +func (m *MockNetaddrs) IPAddrs(ctx context.Context, cfg string, l netaddrs.Logger) ([]net.IPAddr, error) { + m.ReceivedConfig = append(m.ReceivedConfig, cfg) + + ip := net.ParseIP(stubAddress) + if ip == nil { + return nil, fmt.Errorf("unable to transform the stubAddress into a valid IP") + } + + return []net.IPAddr{{IP: ip}}, nil +} + func TestRetryJoin_Integration(t *testing.T) { ci.Parallel(t) @@ -93,7 +112,7 @@ func TestRetryJoin_Server_NonCloud(t *testing.T) { } joiner := retryJoiner{ - discover: &MockDiscover{}, + autoDiscover: autoDiscover{goDiscover: &MockDiscover{}}, serverJoin: mockJoin, serverEnabled: true, logger: testlog.HCLogger(t), @@ -124,7 +143,7 @@ func TestRetryJoin_Server_Cloud(t *testing.T) { mockDiscover := &MockDiscover{} joiner := retryJoiner{ - discover: mockDiscover, + autoDiscover: autoDiscover{goDiscover: mockDiscover}, serverJoin: mockJoin, serverEnabled: true, logger: testlog.HCLogger(t), @@ -134,7 +153,7 @@ func TestRetryJoin_Server_Cloud(t *testing.T) { joiner.RetryJoin(serverJoin) require.Equal(1, len(output)) - require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs) + require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedConfig) require.Equal(stubAddress, output[0]) } @@ -156,7 +175,7 @@ func TestRetryJoin_Server_MixedProvider(t *testing.T) { mockDiscover := &MockDiscover{} joiner := retryJoiner{ - discover: mockDiscover, + autoDiscover: autoDiscover{goDiscover: mockDiscover}, serverJoin: mockJoin, serverEnabled: true, logger: testlog.HCLogger(t), @@ -166,10 +185,52 @@ func TestRetryJoin_Server_MixedProvider(t *testing.T) { joiner.RetryJoin(serverJoin) require.Equal(2, len(output)) - require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs) + require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedConfig) require.Equal(stubAddress, output[0]) } +func TestRetryJoin_AutoDiscover(t *testing.T) { + ci.Parallel(t) + + var joinAddrs []string + mockJoin := func(s []string) (int, error) { + joinAddrs = s + return 0, nil + } + + // 'exec=*'' tests autoDiscover go-netaddr support + // 'provider=aws, tag_value=foo' ensures that provider-prefixed configs are routed to go-discover + // 'localhost' ensures that bare hostnames are returned as-is + // 'localhost2:4648' ensures hostname:port entries are returned as-is + // '127.0.0.1:4648' ensures ip:port entiresare returned as-is + // '100.100.100.100' ensures that bare IPs are returned as-is + serverJoin := &ServerJoin{ + RetryMaxAttempts: 1, + RetryJoin: []string{ + "exec=echo 127.0.0.1", "provider=aws, tag_value=foo", + "localhost", "localhost2:4648", "127.0.0.1:4648", "100.100.100.100"}, + } + + mockDiscover := &MockDiscover{} + mockNetaddrs := &MockNetaddrs{} + joiner := retryJoiner{ + autoDiscover: autoDiscover{goDiscover: mockDiscover, netAddrs: mockNetaddrs}, + serverJoin: mockJoin, + serverEnabled: true, + logger: testlog.HCLogger(t), + errCh: make(chan struct{}), + } + + joiner.RetryJoin(serverJoin) + + must.Eq(t, []string{ + "127.0.0.1", "127.0.0.1", "localhost", "localhost2:4648", + "127.0.0.1:4648", "100.100.100.100"}, + joinAddrs) + must.Eq(t, []string{"exec=echo 127.0.0.1"}, mockNetaddrs.ReceivedConfig) + must.Eq(t, "provider=aws, tag_value=foo", mockDiscover.ReceivedConfig) +} + func TestRetryJoin_Client(t *testing.T) { ci.Parallel(t) require := require.New(t) @@ -187,7 +248,7 @@ func TestRetryJoin_Client(t *testing.T) { } joiner := retryJoiner{ - discover: &MockDiscover{}, + autoDiscover: autoDiscover{goDiscover: &MockDiscover{}}, clientJoin: mockJoin, clientEnabled: true, logger: testlog.HCLogger(t), diff --git a/go.mod b/go.mod index e5347293a..daabd9433 100644 --- a/go.mod +++ b/go.mod @@ -63,6 +63,7 @@ require ( github.com/hashicorp/go-memdb v1.3.4 github.com/hashicorp/go-msgpack v1.1.5 github.com/hashicorp/go-multierror v1.1.1 + github.com/hashicorp/go-netaddrs v0.1.0 github.com/hashicorp/go-plugin v1.4.10 github.com/hashicorp/go-secure-stdlib/listenerutil v0.1.4 github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 diff --git a/go.sum b/go.sum index a6547ee9a..da8bb087c 100644 --- a/go.sum +++ b/go.sum @@ -856,6 +856,8 @@ github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHh github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-netaddrs v0.1.0 h1:TnlYvODD4C/wO+j7cX1z69kV5gOzI87u3OcUinANaW8= +github.com/hashicorp/go-netaddrs v0.1.0/go.mod h1:33+a/emi5R5dqRspOuZKO0E+Tuz5WV1F84eRWALkedA= github.com/hashicorp/go-plugin v1.4.10 h1:xUbmA4jC6Dq163/fWcp8P3JuHilrHHMLNRxzGQJ9hNk= github.com/hashicorp/go-plugin v1.4.10/go.mod h1:6/1TEzT0eQznvI/gV2CM29DLSkAK/e58mUWKVsPaph0= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= diff --git a/website/content/docs/configuration/server_join.mdx b/website/content/docs/configuration/server_join.mdx index f4184645b..63b28df1e 100644 --- a/website/content/docs/configuration/server_join.mdx +++ b/website/content/docs/configuration/server_join.mdx @@ -28,37 +28,55 @@ server_join { ## `server_join` Parameters -- `retry_join` `(array: [])` - Specifies a list of server addresses to - join. This is similar to [`start_join`](#start_join), but will continue to - be attempted even if the initial join attempt fails, up to - [retry_max](#retry_max). Further, `retry_join` is available to - both Nomad servers and clients, while `start_join` is only defined for Nomad - servers. This is useful for cases where we know the address will become - available eventually. Use `retry_join` with an array as a replacement for - `start_join`, **do not use both options**. +- `retry_join` `(array: [])` - Specifies a list of Nomad server + addresses and [Cloud Auto-join](#cloud-auto-join) configurations that are + joined as cluster members. This is similar to [`start_join`](#start_join), + but join attempts are retried up to [retry_max](#retry_max) times. + Further, `retry_join` is available to both Nomad servers and clients, while + `start_join` is only defined for Nomad servers. This is useful for cases where + we know the address will become available eventually. Use `retry_join` with an + array as a replacement for `start_join`, **do not use both options**. - Address format includes both using IP addresses as well as an interface to the - [go-discover](https://github.com/hashicorp/go-discover) library for doing - automated cluster joining using cloud metadata. See the [Cloud Auto-join](#cloud-auto-join) - section below for more information. + Server addresses must conform to the [server address format](#server-address-format). - ``` + ```hcl server_join { - retry_join = [ "1.1.1.1", "2.2.2.2" ] + retry_join = [ "1.1.1.1", "2.2.2.2" ] } ``` - Using the `go-discover` interface, this can be defined both in a client or - server configuration as well as provided as a command-line argument. + Auto-join configurations must conform to the [Cloud Auto-join format](#cloud-auto-join). - ``` + Cloud-Auto join using `go-discover` to join an AWS EC2 cluster. + + ```hcl server_join { - retry_join = [ "provider=aws tag_key=..." ] + retry_join = [ "provider=aws tag_key=..." ] } ``` - See the [server address format](#server-address-format) for more information about expected - server address formats. + Cloud-Auto join using `go-netaddrs` to join a Hetzner cluster of servers + labeled with `nomad-server=true` using the `hcloud` CLI. + + ```hcl + server_join { + retry_join = [ "exec=hcloud server list -o noheader -o columns=ipv4 -l nomad-server=true | tr '\n' ' '" ] + } + ``` + + Server addresses and Cloud Auto-join configurations may be used together. + This is a mixed configuration containing a server address and Cloud Auto-join + configuration. + + ```hcl + server_join { + retry_join = [ "1.1.1.1", "provider=aws tag_key=..." ] + } + ``` + + See [server address format](#server-address-format) for more information + about expected server address formats and [Cloud Auto-join](#cloud-auto-join) + for more information on expected Cloud Auto-join formats. - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry join attempts. @@ -114,10 +132,9 @@ nomad-01.company.local => nomad-01.company.local:4648 ``` ### Via the go-discover interface - -As of Nomad 0.8.4, `retry_join` accepts a unified interface using the +`retry_join` accepts a unified interface using the [go-discover](https://github.com/hashicorp/go-discover) library for doing -automated cluster joining using cloud metadata. See [Cloud +automated cluster joining using cloud provider metadata. See [Cloud Auto-join](#cloud-auto-join) for more information. ``` @@ -126,11 +143,24 @@ Auto-join](#cloud-auto-join) for more information. ## Cloud Auto-join +`retry_join`'s Cloud Auto-join allows Nomad to automatically discover cluster +server addresses using cloud provider metadata. Cloud Auto-join allows both +[go-discover](https://github.com/hashicorp/go-discover) and +[go-netaddrs](https://github.com/hashicorp/go-netaddrs) formats. + +Configurations prefixed with `provider=` use `go-discover` whereas +configurations prefixed with `exec=` use `go-netaddrs`. For cloud providers not +supported by `go-discover`, use `go-netaddrs`. + +### `go-discover` Configurations + +`go-discover` configurations are prefixed with `provider=`. + The following sections describe the Cloud Auto-join `retry_join` options that are specific to a subset of supported cloud providers. For information on all providers, see further documentation in [go-discover](https://github.com/hashicorp/go-discover). -### Amazon EC2 +#### Amazon EC2 This returns the first private IP address of all servers in the given region which have the given `tag_key` and `tag_value`. @@ -149,7 +179,7 @@ region which have the given `tag_key` and `tag_value`. - `access_key_id` (optional) - the AWS access key for authentication (see below for more information about authenticating). - `secret_access_key` (optional) - the AWS secret access key for authentication (see below for more information about authenticating). -#### Authentication & Precedence +##### Authentication & Precedence - Static credentials `access_key_id=... secret_access_key=...` - Environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) @@ -163,7 +193,7 @@ region which have the given `tag_key` and `tag_value`. metadata endpoint](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html). -### Microsoft Azure +#### Microsoft Azure This returns the first private IP address of all servers in the given region which have the given `tag_key` and `tag_value` in the tenant and subscription, or in @@ -195,7 +225,7 @@ Use these configuration parameters when using Virtual Machine Scale Sets (Consul When using tags the only permission needed is the `ListAll` method for `NetworkInterfaces`. When using Virtual Machine Scale Sets the only role action needed is `Microsoft.Compute/virtualMachineScaleSets/*/read`. -### Google Compute Engine +#### Google Compute Engine This returns the first private IP address of all servers in the given project which have the given `tag_value`. @@ -212,7 +242,7 @@ project which have the given `tag_value`. - `zone_pattern` (optional) - the list of zones can be restricted through an RE2 compatible regular expression. If omitted, servers in all zones are returned. - `credentials_file` (optional) - the credentials file for authentication. See below for more information. -#### Authentication & Precedence +##### Authentication & Precedence - Use credentials from `credentials_file`, if provided. - Use JSON file from `GOOGLE_APPLICATION_CREDENTIALS` environment variable. @@ -225,3 +255,37 @@ project which have the given `tag_value`. Discovery requires a [GCE Service Account](https://cloud.google.com/compute/docs/access/service-accounts). Credentials are searched using the following paths, in order of precedence. + + +### `go-netaddrs` Configurations + +`go-netaddrs` configurations are prefixed with `exec=`. + +What follows the `exec=` prefix may be any executable program and its arguments. +Commands run by `go-netaddrs` must return a list of space-delimited IPv4 or IPv6 +addresses and exit with code `0` on success and non-zero on failure. + +You can refer to the executable by its absolute file system path, or by +the executable's name if it can be found on the Nomad agent's PATH. See +[executables in the current directory](https://pkg.go.dev/os/exec#hdr-Executables_in_the_current_directory) +for more details on the lookup behavior. + +Example `go-netaddrs` configuration. + +```hcl +server_join { + retry_join = [ "exec=hcloud server list -o noheader -o columns=ipv4 -l nomad-server=true | tr '\n' ' '" ] +} +``` + +Here, `hcloud`'s newline-delimited output has been re-formatted as space- +delimited by piping its output into `tr`. + +Output of an example `go-netaddrs` executable + +``` +"1.1.1.1 2.2.2.2" +``` + +Visit [go-netaddrs](https://github.com/hashicorp/go-netaddrs) for more +information on go-netaddrs configuration.