mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
agent: Fix a bug where retry_join was not retrying. (#24561)
The retry_join logic was not allowing for retries to happen and was exiting after the first failed discovery attempt. This change fixes that behaviour and adds a test to ensure no further regressions.
This commit is contained in:
3
.changelog/24561.txt
Normal file
3
.changelog/24561.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
agent: Fixed a bug where `retry_join` gave up after a single failure, rather than retrying until max attempts had been reached
|
||||
```
|
||||
@@ -158,7 +158,7 @@ func (r *retryJoiner) Validate(config *Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// retryJoin is used to handle retrying a join until it succeeds or all retries
|
||||
// RetryJoin is used to handle retrying a join until it succeeds or all retries
|
||||
// are exhausted.
|
||||
func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
|
||||
if len(serverJoin.RetryJoin) == 0 {
|
||||
@@ -176,13 +176,16 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
|
||||
var err error
|
||||
|
||||
for _, addr := range serverJoin.RetryJoin {
|
||||
|
||||
// If auto-discovery returns an error, log the error and
|
||||
// fall-through, so we reach the retry logic and loop back around
|
||||
// for another go.
|
||||
servers, err := r.autoDiscover.Addrs(addr, r.logger)
|
||||
if err != nil {
|
||||
r.logger.Error("discovering join addresses failed", "join_config", addr, "error", err)
|
||||
return
|
||||
} else {
|
||||
addrs = append(addrs, servers...)
|
||||
}
|
||||
|
||||
addrs = append(addrs, servers...)
|
||||
}
|
||||
|
||||
if len(addrs) > 0 {
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
|
||||
"github.com/hashicorp/go-netaddrs"
|
||||
"github.com/hashicorp/nomad/ci"
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/mitchellh/cli"
|
||||
@@ -261,6 +262,68 @@ func TestRetryJoin_Client(t *testing.T) {
|
||||
require.Equal(stubAddress, output[0])
|
||||
}
|
||||
|
||||
// MockFailDiscover implements the DiscoverInterface interface and can be used
|
||||
// for tests that want to purposely fail the discovery process.
|
||||
type MockFailDiscover struct {
|
||||
ReceivedConfig string
|
||||
}
|
||||
|
||||
func (m *MockFailDiscover) Addrs(cfg string, _ *golog.Logger) ([]string, error) {
|
||||
return nil, fmt.Errorf("test: failed discovery %q", cfg)
|
||||
}
|
||||
func (m *MockFailDiscover) Help() string { return "" }
|
||||
func (m *MockFailDiscover) Names() []string {
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
func TestRetryJoin_RetryMaxAttempts(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
// Create an error channel to pass to the retry joiner. When the retry
|
||||
// attempts have been exhausted, this channel is closed and our only way
|
||||
// to test this apart from inspecting log entries.
|
||||
errCh := make(chan struct{})
|
||||
|
||||
// Create a timeout to protect against problems within the test blocking
|
||||
// for arbitrary long times.
|
||||
timeout, timeoutStop := helper.NewSafeTimer(2 * time.Second)
|
||||
defer timeoutStop()
|
||||
|
||||
var output []string
|
||||
|
||||
joiner := retryJoiner{
|
||||
autoDiscover: autoDiscover{goDiscover: &MockFailDiscover{}},
|
||||
clientJoin: func(s []string) (int, error) {
|
||||
output = s
|
||||
return 0, nil
|
||||
},
|
||||
clientEnabled: true,
|
||||
logger: testlog.HCLogger(t),
|
||||
errCh: errCh,
|
||||
}
|
||||
|
||||
// Execute the retry join function in a routine, so we can track whether
|
||||
// this returns and exits without close the error channel and thus
|
||||
// indicating retry failure.
|
||||
doneCh := make(chan struct{})
|
||||
|
||||
go func(doneCh chan struct{}) {
|
||||
joiner.RetryJoin(&ServerJoin{RetryMaxAttempts: 1, RetryJoin: []string{"provider=foo"}})
|
||||
close(doneCh)
|
||||
}(doneCh)
|
||||
|
||||
// The main test; ensure error channel is closed, indicating the retry
|
||||
// limit has been reached.
|
||||
select {
|
||||
case <-errCh:
|
||||
must.Len(t, 0, output)
|
||||
case <-doneCh:
|
||||
t.Fatal("retry join completed without closing error channel")
|
||||
case <-timeout.C:
|
||||
t.Fatal("timeout reached without error channel close")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryJoin_Validate(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
type validateExpect struct {
|
||||
|
||||
Reference in New Issue
Block a user