Add go-netaddrs support to retry_join (#18745)

This commit is contained in:
Adriano Caloiaro
2023-11-15 08:07:18 -07:00
committed by GitHub
parent bb6c86d2a4
commit f66eb83fc0
7 changed files with 248 additions and 60 deletions

3
.changelog/18745.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
config: Add `go-netaddrs` support to `server_join.retry_join`
```

View File

@@ -310,7 +310,6 @@ func (c *Command) readConfig() *Config {
}
func (c *Command) IsValidConfig(config, cmdConfig *Config) bool {
// Check that the server is running in at least one mode.
if !(config.Server.Enabled || config.Client.Enabled) {
c.Ui.Error("Must specify either server, client or dev mode for the agent.")
@@ -887,7 +886,7 @@ func (c *Command) handleRetryJoin(config *Config) error {
if config.Server.Enabled && len(config.Server.RetryJoin) != 0 {
joiner := retryJoiner{
discover: &discover.Discover{},
autoDiscover: autoDiscover{goDiscover: &discover.Discover{}, netAddrs: &netAddrs{}},
errCh: c.retryJoinErrCh,
logger: c.agent.logger.Named("joiner"),
serverJoin: c.agent.server.Join,
@@ -920,7 +919,7 @@ func (c *Command) handleRetryJoin(config *Config) error {
len(config.Server.ServerJoin.RetryJoin) != 0 {
joiner := retryJoiner{
discover: &discover.Discover{},
autoDiscover: autoDiscover{goDiscover: &discover.Discover{}, netAddrs: &netAddrs{}},
errCh: c.retryJoinErrCh,
logger: c.agent.logger.Named("joiner"),
serverJoin: c.agent.server.Join,
@@ -938,7 +937,7 @@ func (c *Command) handleRetryJoin(config *Config) error {
config.Client.ServerJoin != nil &&
len(config.Client.ServerJoin.RetryJoin) != 0 {
joiner := retryJoiner{
discover: &discover.Discover{},
autoDiscover: autoDiscover{goDiscover: &discover.Discover{}, netAddrs: &netAddrs{}},
errCh: c.retryJoinErrCh,
logger: c.agent.logger.Named("joiner"),
clientJoin: c.agent.client.SetServers,

View File

@@ -4,14 +4,22 @@
package agent
import (
"context"
"fmt"
golog "log"
"net"
"strings"
"time"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-netaddrs"
)
// AutoDiscoverInterface is an interface for autoDiscover to ease testing
type AutoDiscoverInterface interface {
Addrs(cfg string, logger log.Logger) ([]string, error)
}
// DiscoverInterface is an interface for the Discover type in the go-discover
// library. Using an interface allows for ease of testing.
type DiscoverInterface interface {
@@ -30,6 +38,63 @@ type DiscoverInterface interface {
Names() []string
}
// NetaddrsInterface is an interface for go-netaddrs to ease testing
type NetaddrsInterface interface {
IPAddrs(ctx context.Context, cfg string, l netaddrs.Logger) ([]net.IPAddr, error)
}
type netAddrs struct{}
func (n *netAddrs) IPAddrs(ctx context.Context, cfg string, l netaddrs.Logger) ([]net.IPAddr, error) {
return netaddrs.IPAddrs(ctx, cfg, l)
}
// autoDiscover uses go-netaddrs and go-discover to discover IP addresses when
// auto-joining clusters
//
// autoDiscover implements AutoDiscoverInterface
type autoDiscover struct {
netAddrs NetaddrsInterface
goDiscover DiscoverInterface
}
// Addrs looks up and returns IP addresses specified by cfg.
//
// If cfg has an exec= prefix, IP addresses are looked up by executing the command
// after exec=. The command may include optional arguments. Command arguments
// must be space separated (spaces in argument values can not be escaped).
// The command may output IPv4 or IPv6 addresses, and IPv6 addresses can
// optionally include a zone index.
//
// The executable must follow these rules:
//
// on success - exit 0 and print whitespace delimited IP addresses to stdout.
// on failure - exits with a non-zero code, and should print an error message
// of up to 1024 bytes to stderr.
//
// If cfg has a provider= prefix, IP addresses are looked up using the go-discover
// provider specified in cfg.
//
// If cfg contains neither an exec= or provider= prefix, the configuration is
// returned as-is, to be resolved later via Serf in the server's Join() function,
// or via DNS in client's SetServers() function.
func (d autoDiscover) Addrs(cfg string, logger log.Logger) (addrs []string, err error) {
var ipAddrs []net.IPAddr
switch {
case strings.HasPrefix(cfg, "exec="):
ipAddrs, err = d.netAddrs.IPAddrs(context.Background(), cfg, logger)
for _, addr := range ipAddrs {
addrs = append(addrs, addr.IP.String())
}
case strings.HasPrefix(cfg, "provider="):
addrs, err = d.goDiscover.Addrs(cfg, logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true}))
default:
return []string{cfg}, err
}
return
}
// retryJoiner is used to handle retrying a join until it succeeds or all of
// its tries are exhausted.
type retryJoiner struct {
@@ -45,9 +110,8 @@ type retryJoiner struct {
// clientEnabled indicates whether the nomad agent will run in client mode
clientEnabled bool
// discover is of type Discover, where this is either the go-discover
// implementation or a mock used for testing
discover DiscoverInterface
// autoDiscover is either an agent.autoDiscover, or a mock used for testing
autoDiscover AutoDiscoverInterface
// errCh is used to communicate with the agent when the max retry attempt
// limit has been reached
@@ -61,7 +125,6 @@ type retryJoiner struct {
// retry_join block. If the configuration is not valid, returns an error that
// will be displayed to the operator, otherwise nil.
func (r *retryJoiner) Validate(config *Config) error {
// If retry_join is defined for the server, ensure that deprecated
// fields and the server_join block are not both set
if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 {
@@ -107,24 +170,19 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
addrsToJoin := strings.Join(serverJoin.RetryJoin, " ")
r.logger.Info("starting retry join", "servers", addrsToJoin)
standardLogger := r.logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true})
for {
var addrs []string
var n int
var err error
for _, addr := range serverJoin.RetryJoin {
switch {
case strings.HasPrefix(addr, "provider="):
servers, err := r.discover.Addrs(addr, standardLogger)
if err != nil {
r.logger.Error("determining join addresses failed", "error", err)
} else {
addrs = append(addrs, servers...)
}
default:
addrs = append(addrs, addr)
servers, err := r.autoDiscover.Addrs(addr, r.logger)
if err != nil {
r.logger.Error("discovering join addresses failed", "join_config", addr, "error", err)
return
}
addrs = append(addrs, servers...)
}
if len(addrs) > 0 {

View File

@@ -4,27 +4,31 @@
package agent
import (
"context"
"fmt"
"log"
golog "log"
"net"
"os"
"testing"
"time"
"github.com/hashicorp/go-netaddrs"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/testutil"
"github.com/mitchellh/cli"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)
type MockDiscover struct {
ReceivedAddrs string
}
const stubAddress = "127.0.0.1"
func (m *MockDiscover) Addrs(s string, l *log.Logger) ([]string, error) {
m.ReceivedAddrs = s
type MockDiscover struct {
ReceivedConfig string
}
func (m *MockDiscover) Addrs(s string, l *golog.Logger) ([]string, error) {
m.ReceivedConfig = s
return []string{stubAddress}, nil
}
func (m *MockDiscover) Help() string { return "" }
@@ -32,6 +36,21 @@ func (m *MockDiscover) Names() []string {
return []string{""}
}
type MockNetaddrs struct {
ReceivedConfig []string
}
func (m *MockNetaddrs) IPAddrs(ctx context.Context, cfg string, l netaddrs.Logger) ([]net.IPAddr, error) {
m.ReceivedConfig = append(m.ReceivedConfig, cfg)
ip := net.ParseIP(stubAddress)
if ip == nil {
return nil, fmt.Errorf("unable to transform the stubAddress into a valid IP")
}
return []net.IPAddr{{IP: ip}}, nil
}
func TestRetryJoin_Integration(t *testing.T) {
ci.Parallel(t)
@@ -93,7 +112,7 @@ func TestRetryJoin_Server_NonCloud(t *testing.T) {
}
joiner := retryJoiner{
discover: &MockDiscover{},
autoDiscover: autoDiscover{goDiscover: &MockDiscover{}},
serverJoin: mockJoin,
serverEnabled: true,
logger: testlog.HCLogger(t),
@@ -124,7 +143,7 @@ func TestRetryJoin_Server_Cloud(t *testing.T) {
mockDiscover := &MockDiscover{}
joiner := retryJoiner{
discover: mockDiscover,
autoDiscover: autoDiscover{goDiscover: mockDiscover},
serverJoin: mockJoin,
serverEnabled: true,
logger: testlog.HCLogger(t),
@@ -134,7 +153,7 @@ func TestRetryJoin_Server_Cloud(t *testing.T) {
joiner.RetryJoin(serverJoin)
require.Equal(1, len(output))
require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs)
require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedConfig)
require.Equal(stubAddress, output[0])
}
@@ -156,7 +175,7 @@ func TestRetryJoin_Server_MixedProvider(t *testing.T) {
mockDiscover := &MockDiscover{}
joiner := retryJoiner{
discover: mockDiscover,
autoDiscover: autoDiscover{goDiscover: mockDiscover},
serverJoin: mockJoin,
serverEnabled: true,
logger: testlog.HCLogger(t),
@@ -166,10 +185,52 @@ func TestRetryJoin_Server_MixedProvider(t *testing.T) {
joiner.RetryJoin(serverJoin)
require.Equal(2, len(output))
require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs)
require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedConfig)
require.Equal(stubAddress, output[0])
}
func TestRetryJoin_AutoDiscover(t *testing.T) {
ci.Parallel(t)
var joinAddrs []string
mockJoin := func(s []string) (int, error) {
joinAddrs = s
return 0, nil
}
// 'exec=*'' tests autoDiscover go-netaddr support
// 'provider=aws, tag_value=foo' ensures that provider-prefixed configs are routed to go-discover
// 'localhost' ensures that bare hostnames are returned as-is
// 'localhost2:4648' ensures hostname:port entries are returned as-is
// '127.0.0.1:4648' ensures ip:port entiresare returned as-is
// '100.100.100.100' ensures that bare IPs are returned as-is
serverJoin := &ServerJoin{
RetryMaxAttempts: 1,
RetryJoin: []string{
"exec=echo 127.0.0.1", "provider=aws, tag_value=foo",
"localhost", "localhost2:4648", "127.0.0.1:4648", "100.100.100.100"},
}
mockDiscover := &MockDiscover{}
mockNetaddrs := &MockNetaddrs{}
joiner := retryJoiner{
autoDiscover: autoDiscover{goDiscover: mockDiscover, netAddrs: mockNetaddrs},
serverJoin: mockJoin,
serverEnabled: true,
logger: testlog.HCLogger(t),
errCh: make(chan struct{}),
}
joiner.RetryJoin(serverJoin)
must.Eq(t, []string{
"127.0.0.1", "127.0.0.1", "localhost", "localhost2:4648",
"127.0.0.1:4648", "100.100.100.100"},
joinAddrs)
must.Eq(t, []string{"exec=echo 127.0.0.1"}, mockNetaddrs.ReceivedConfig)
must.Eq(t, "provider=aws, tag_value=foo", mockDiscover.ReceivedConfig)
}
func TestRetryJoin_Client(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
@@ -187,7 +248,7 @@ func TestRetryJoin_Client(t *testing.T) {
}
joiner := retryJoiner{
discover: &MockDiscover{},
autoDiscover: autoDiscover{goDiscover: &MockDiscover{}},
clientJoin: mockJoin,
clientEnabled: true,
logger: testlog.HCLogger(t),

1
go.mod
View File

@@ -63,6 +63,7 @@ require (
github.com/hashicorp/go-memdb v1.3.4
github.com/hashicorp/go-msgpack v1.1.5
github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/go-netaddrs v0.1.0
github.com/hashicorp/go-plugin v1.4.10
github.com/hashicorp/go-secure-stdlib/listenerutil v0.1.4
github.com/hashicorp/go-secure-stdlib/strutil v0.1.2

2
go.sum
View File

@@ -856,6 +856,8 @@ github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHh
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-netaddrs v0.1.0 h1:TnlYvODD4C/wO+j7cX1z69kV5gOzI87u3OcUinANaW8=
github.com/hashicorp/go-netaddrs v0.1.0/go.mod h1:33+a/emi5R5dqRspOuZKO0E+Tuz5WV1F84eRWALkedA=
github.com/hashicorp/go-plugin v1.4.10 h1:xUbmA4jC6Dq163/fWcp8P3JuHilrHHMLNRxzGQJ9hNk=
github.com/hashicorp/go-plugin v1.4.10/go.mod h1:6/1TEzT0eQznvI/gV2CM29DLSkAK/e58mUWKVsPaph0=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=

View File

@@ -28,37 +28,55 @@ server_join {
## `server_join` Parameters
- `retry_join` `(array<string>: [])` - Specifies a list of server addresses to
join. This is similar to [`start_join`](#start_join), but will continue to
be attempted even if the initial join attempt fails, up to
[retry_max](#retry_max). Further, `retry_join` is available to
both Nomad servers and clients, while `start_join` is only defined for Nomad
servers. This is useful for cases where we know the address will become
available eventually. Use `retry_join` with an array as a replacement for
`start_join`, **do not use both options**.
- `retry_join` `(array<string>: [])` - Specifies a list of Nomad server
addresses and [Cloud Auto-join](#cloud-auto-join) configurations that are
joined as cluster members. This is similar to [`start_join`](#start_join),
but join attempts are retried up to [retry_max](#retry_max) times.
Further, `retry_join` is available to both Nomad servers and clients, while
`start_join` is only defined for Nomad servers. This is useful for cases where
we know the address will become available eventually. Use `retry_join` with an
array as a replacement for `start_join`, **do not use both options**.
Address format includes both using IP addresses as well as an interface to the
[go-discover](https://github.com/hashicorp/go-discover) library for doing
automated cluster joining using cloud metadata. See the [Cloud Auto-join](#cloud-auto-join)
section below for more information.
Server addresses must conform to the [server address format](#server-address-format).
```
```hcl
server_join {
retry_join = [ "1.1.1.1", "2.2.2.2" ]
retry_join = [ "1.1.1.1", "2.2.2.2" ]
}
```
Using the `go-discover` interface, this can be defined both in a client or
server configuration as well as provided as a command-line argument.
Auto-join configurations must conform to the [Cloud Auto-join format](#cloud-auto-join).
```
Cloud-Auto join using `go-discover` to join an AWS EC2 cluster.
```hcl
server_join {
retry_join = [ "provider=aws tag_key=..." ]
retry_join = [ "provider=aws tag_key=..." ]
}
```
See the [server address format](#server-address-format) for more information about expected
server address formats.
Cloud-Auto join using `go-netaddrs` to join a Hetzner cluster of servers
labeled with `nomad-server=true` using the `hcloud` CLI.
```hcl
server_join {
retry_join = [ "exec=hcloud server list -o noheader -o columns=ipv4 -l nomad-server=true | tr '\n' ' '" ]
}
```
Server addresses and Cloud Auto-join configurations may be used together.
This is a mixed configuration containing a server address and Cloud Auto-join
configuration.
```hcl
server_join {
retry_join = [ "1.1.1.1", "provider=aws tag_key=..." ]
}
```
See [server address format](#server-address-format) for more information
about expected server address formats and [Cloud Auto-join](#cloud-auto-join)
for more information on expected Cloud Auto-join formats.
- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry
join attempts.
@@ -114,10 +132,9 @@ nomad-01.company.local => nomad-01.company.local:4648
```
### Via the go-discover interface
As of Nomad 0.8.4, `retry_join` accepts a unified interface using the
`retry_join` accepts a unified interface using the
[go-discover](https://github.com/hashicorp/go-discover) library for doing
automated cluster joining using cloud metadata. See [Cloud
automated cluster joining using cloud provider metadata. See [Cloud
Auto-join](#cloud-auto-join) for more information.
```
@@ -126,11 +143,24 @@ Auto-join](#cloud-auto-join) for more information.
## Cloud Auto-join
`retry_join`'s Cloud Auto-join allows Nomad to automatically discover cluster
server addresses using cloud provider metadata. Cloud Auto-join allows both
[go-discover](https://github.com/hashicorp/go-discover) and
[go-netaddrs](https://github.com/hashicorp/go-netaddrs) formats.
Configurations prefixed with `provider=` use `go-discover` whereas
configurations prefixed with `exec=` use `go-netaddrs`. For cloud providers not
supported by `go-discover`, use `go-netaddrs`.
### `go-discover` Configurations
`go-discover` configurations are prefixed with `provider=`.
The following sections describe the Cloud Auto-join `retry_join` options that are specific
to a subset of supported cloud providers. For information on all providers, see further
documentation in [go-discover](https://github.com/hashicorp/go-discover).
### Amazon EC2
#### Amazon EC2
This returns the first private IP address of all servers in the given
region which have the given `tag_key` and `tag_value`.
@@ -149,7 +179,7 @@ region which have the given `tag_key` and `tag_value`.
- `access_key_id` (optional) - the AWS access key for authentication (see below for more information about authenticating).
- `secret_access_key` (optional) - the AWS secret access key for authentication (see below for more information about authenticating).
#### Authentication &amp; Precedence
##### Authentication &amp; Precedence
- Static credentials `access_key_id=... secret_access_key=...`
- Environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`)
@@ -163,7 +193,7 @@ region which have the given `tag_key` and `tag_value`.
metadata
endpoint](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html).
### Microsoft Azure
#### Microsoft Azure
This returns the first private IP address of all servers in the given region
which have the given `tag_key` and `tag_value` in the tenant and subscription, or in
@@ -195,7 +225,7 @@ Use these configuration parameters when using Virtual Machine Scale Sets (Consul
When using tags the only permission needed is the `ListAll` method for `NetworkInterfaces`. When using
Virtual Machine Scale Sets the only role action needed is `Microsoft.Compute/virtualMachineScaleSets/*/read`.
### Google Compute Engine
#### Google Compute Engine
This returns the first private IP address of all servers in the given
project which have the given `tag_value`.
@@ -212,7 +242,7 @@ project which have the given `tag_value`.
- `zone_pattern` (optional) - the list of zones can be restricted through an RE2 compatible regular expression. If omitted, servers in all zones are returned.
- `credentials_file` (optional) - the credentials file for authentication. See below for more information.
#### Authentication &amp; Precedence
##### Authentication &amp; Precedence
- Use credentials from `credentials_file`, if provided.
- Use JSON file from `GOOGLE_APPLICATION_CREDENTIALS` environment variable.
@@ -225,3 +255,37 @@ project which have the given `tag_value`.
Discovery requires a [GCE Service
Account](https://cloud.google.com/compute/docs/access/service-accounts).
Credentials are searched using the following paths, in order of precedence.
### `go-netaddrs` Configurations
`go-netaddrs` configurations are prefixed with `exec=`.
What follows the `exec=` prefix may be any executable program and its arguments.
Commands run by `go-netaddrs` must return a list of space-delimited IPv4 or IPv6
addresses and exit with code `0` on success and non-zero on failure.
You can refer to the executable by its absolute file system path, or by
the executable's name if it can be found on the Nomad agent's PATH. See
[executables in the current directory](https://pkg.go.dev/os/exec#hdr-Executables_in_the_current_directory)
for more details on the lookup behavior.
Example `go-netaddrs` configuration.
```hcl
server_join {
retry_join = [ "exec=hcloud server list -o noheader -o columns=ipv4 -l nomad-server=true | tr '\n' ' '" ]
}
```
Here, `hcloud`'s newline-delimited output has been re-formatted as space-
delimited by piping its output into `tr`.
Output of an example `go-netaddrs` executable
```
"1.1.1.1 2.2.2.2"
```
Visit [go-netaddrs](https://github.com/hashicorp/go-netaddrs) for more
information on go-netaddrs configuration.