mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
[server] Directed leadership transfer CLI and API (#17383)
* Add directed leadership transfer func * Add leadership transfer RPC endpoint * Add ACL tests for leadership-transfer endpoint * Add HTTP API route and implementation * Add to Go API client * Implement CLI command * Add documentation * Add changelog Co-authored-by: Tim Gross <tgross@hashicorp.com>
This commit is contained in:
3
.changelog/17383.txt
Normal file
3
.changelog/17383.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:improvement
|
||||||
|
server: Added transfer-leadership API and CLI
|
||||||
|
```
|
||||||
@@ -120,6 +120,46 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RaftTransferLeadershipByAddress is used to transfer leadership to a
|
||||||
|
// different peer using its address in the form of "IP:port".
|
||||||
|
func (op *Operator) RaftTransferLeadershipByAddress(address string, q *WriteOptions) error {
|
||||||
|
r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
r.setWriteOptions(q)
|
||||||
|
|
||||||
|
r.params.Set("address", address)
|
||||||
|
|
||||||
|
_, resp, err := requireOK(op.c.doRequest(r))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.Body.Close()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RaftTransferLeadershipByID is used to transfer leadership to a
|
||||||
|
// different peer using its Raft ID.
|
||||||
|
func (op *Operator) RaftTransferLeadershipByID(id string, q *WriteOptions) error {
|
||||||
|
r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
r.setWriteOptions(q)
|
||||||
|
|
||||||
|
r.params.Set("id", id)
|
||||||
|
|
||||||
|
_, resp, err := requireOK(op.c.doRequest(r))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.Body.Close()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// SchedulerConfiguration is the config for controlling scheduler behavior
|
// SchedulerConfiguration is the config for controlling scheduler behavior
|
||||||
type SchedulerConfiguration struct {
|
type SchedulerConfiguration struct {
|
||||||
// SchedulerAlgorithm lets you select between available scheduling algorithms.
|
// SchedulerAlgorithm lets you select between available scheduling algorithms.
|
||||||
@@ -363,3 +403,12 @@ func (op *Operator) LicenseGet(q *QueryOptions) (*LicenseReply, *QueryMeta, erro
|
|||||||
|
|
||||||
return &reply, qm, nil
|
return &reply, qm, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type LeadershipTransferResponse struct {
|
||||||
|
From RaftServer
|
||||||
|
To RaftServer
|
||||||
|
Noop bool
|
||||||
|
Err error
|
||||||
|
|
||||||
|
WriteMeta
|
||||||
|
}
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request
|
|||||||
return s.OperatorRaftConfiguration(resp, req)
|
return s.OperatorRaftConfiguration(resp, req)
|
||||||
case strings.HasPrefix(path, "peer"):
|
case strings.HasPrefix(path, "peer"):
|
||||||
return s.OperatorRaftPeer(resp, req)
|
return s.OperatorRaftPeer(resp, req)
|
||||||
|
case strings.HasPrefix(path, "transfer-leadership"):
|
||||||
|
return s.OperatorRaftTransferLeadership(resp, req)
|
||||||
default:
|
default:
|
||||||
return nil, CodedError(404, ErrInvalidMethod)
|
return nil, CodedError(404, ErrInvalidMethod)
|
||||||
}
|
}
|
||||||
@@ -56,8 +58,7 @@ func (s *HTTPServer) OperatorRaftConfiguration(resp http.ResponseWriter, req *ht
|
|||||||
return reply, nil
|
return reply, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// OperatorRaftPeer supports actions on Raft peers. Currently we only support
|
// OperatorRaftPeer supports actions on Raft peers.
|
||||||
// removing peers by address.
|
|
||||||
func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||||
if req.Method != http.MethodDelete {
|
if req.Method != http.MethodDelete {
|
||||||
return nil, CodedError(404, ErrInvalidMethod)
|
return nil, CodedError(404, ErrInvalidMethod)
|
||||||
@@ -97,6 +98,57 @@ func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Reques
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OperatorRaftTransferLeadership supports actions on Raft peers.
|
||||||
|
func (s *HTTPServer) OperatorRaftTransferLeadership(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||||
|
if req.Method != http.MethodPost && req.Method != http.MethodPut {
|
||||||
|
return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod)
|
||||||
|
}
|
||||||
|
|
||||||
|
params := req.URL.Query()
|
||||||
|
|
||||||
|
// Using the params map directly
|
||||||
|
id, hasID := params["id"]
|
||||||
|
addr, hasAddress := params["address"]
|
||||||
|
|
||||||
|
// There are some items that we can parse for here that are more unwieldy in
|
||||||
|
// the Validate() func on the RPC request object, like repeated query params.
|
||||||
|
switch {
|
||||||
|
case !hasID && !hasAddress:
|
||||||
|
return nil, CodedError(http.StatusBadRequest, "must specify id or address")
|
||||||
|
case hasID && hasAddress:
|
||||||
|
return nil, CodedError(http.StatusBadRequest, "must specify either id or address")
|
||||||
|
case hasID && id[0] == "":
|
||||||
|
return nil, CodedError(http.StatusBadRequest, "id must be non-empty")
|
||||||
|
case hasID && len(id) > 1:
|
||||||
|
return nil, CodedError(http.StatusBadRequest, "must specify only one id")
|
||||||
|
case hasAddress && addr[0] == "":
|
||||||
|
return nil, CodedError(http.StatusBadRequest, "address must be non-empty")
|
||||||
|
case hasAddress && len(addr) > 1:
|
||||||
|
return nil, CodedError(http.StatusBadRequest, "must specify only one address")
|
||||||
|
}
|
||||||
|
|
||||||
|
var out structs.LeadershipTransferResponse
|
||||||
|
args := &structs.RaftPeerRequest{}
|
||||||
|
s.parseWriteRequest(req, &args.WriteRequest)
|
||||||
|
|
||||||
|
if hasID {
|
||||||
|
args.ID = raft.ServerID(id[0])
|
||||||
|
} else {
|
||||||
|
args.Address = raft.ServerAddress(addr[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := args.Validate(); err != nil {
|
||||||
|
return nil, CodedError(http.StatusBadRequest, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
err := s.agent.RPC("Operator.TransferLeadershipToPeer", &args, &out)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
|
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
|
||||||
// This supports the stale query mode in case the cluster doesn't have a leader.
|
// This supports the stale query mode in case the cluster doesn't have a leader.
|
||||||
func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ import (
|
|||||||
|
|
||||||
"github.com/hashicorp/nomad/api"
|
"github.com/hashicorp/nomad/api"
|
||||||
"github.com/hashicorp/nomad/ci"
|
"github.com/hashicorp/nomad/ci"
|
||||||
|
"github.com/hashicorp/nomad/helper/pointer"
|
||||||
|
"github.com/hashicorp/nomad/helper/uuid"
|
||||||
"github.com/hashicorp/nomad/nomad/mock"
|
"github.com/hashicorp/nomad/nomad/mock"
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
"github.com/shoenig/test/must"
|
"github.com/shoenig/test/must"
|
||||||
@@ -91,6 +93,144 @@ func TestHTTP_OperatorRaftPeer(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHTTP_OperatorRaftTransferLeadership(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
configCB := func(c *Config) {
|
||||||
|
c.Client.Enabled = false
|
||||||
|
c.Server.NumSchedulers = pointer.Of(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpTest(t, configCB, func(s *TestAgent) {
|
||||||
|
body := bytes.NewBuffer(nil)
|
||||||
|
badMethods := []string{
|
||||||
|
http.MethodConnect,
|
||||||
|
http.MethodDelete,
|
||||||
|
http.MethodGet,
|
||||||
|
http.MethodHead,
|
||||||
|
http.MethodOptions,
|
||||||
|
http.MethodPatch,
|
||||||
|
http.MethodTrace,
|
||||||
|
}
|
||||||
|
for _, tc := range badMethods {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc+" method errors", func(t *testing.T) {
|
||||||
|
req, err := http.NewRequest(tc, "/v1/operator/raft/transfer-leadership?address=nope", body)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
_, err = s.Server.OperatorRaftTransferLeadership(resp, req)
|
||||||
|
|
||||||
|
must.Error(t, err)
|
||||||
|
must.ErrorContains(t, err, "Invalid method")
|
||||||
|
body.Reset()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
apiErrTCs := []struct {
|
||||||
|
name string
|
||||||
|
qs string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "URL with id and address errors",
|
||||||
|
qs: `?id=foo&address=bar`,
|
||||||
|
expected: "must specify either id or address",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL without id and address errors",
|
||||||
|
qs: ``,
|
||||||
|
expected: "must specify id or address",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL with multiple id errors",
|
||||||
|
qs: `?id=foo&id=bar`,
|
||||||
|
expected: "must specify only one id",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL with multiple address errors",
|
||||||
|
qs: `?address=foo&address=bar`,
|
||||||
|
expected: "must specify only one address",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL with an empty id errors",
|
||||||
|
qs: `?id`,
|
||||||
|
expected: "id must be non-empty",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL with an empty address errors",
|
||||||
|
qs: `?address`,
|
||||||
|
expected: "address must be non-empty",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "an invalid id errors",
|
||||||
|
qs: `?id=foo`,
|
||||||
|
expected: "id must be a uuid",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL with an empty address errors",
|
||||||
|
qs: `?address=bar`,
|
||||||
|
expected: "address must be in IP:port format",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range apiErrTCs {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
req, err := http.NewRequest(
|
||||||
|
http.MethodPut,
|
||||||
|
"/v1/operator/raft/transfer-leadership"+tc.qs,
|
||||||
|
body,
|
||||||
|
)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
_, err = s.Server.OperatorRaftTransferLeadership(resp, req)
|
||||||
|
|
||||||
|
must.Error(t, err)
|
||||||
|
must.ErrorContains(t, err, tc.expected)
|
||||||
|
body.Reset()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
testID := uuid.Generate()
|
||||||
|
apiOkTCs := []struct {
|
||||||
|
name string
|
||||||
|
qs string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"id",
|
||||||
|
"?id=" + testID,
|
||||||
|
`id "` + testID + `" was not found in the Raft configuration`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"address",
|
||||||
|
"?address=9.9.9.9:8000",
|
||||||
|
`address "9.9.9.9:8000" was not found in the Raft configuration`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range apiOkTCs {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc.name+" can roundtrip", func(t *testing.T) {
|
||||||
|
httpTest(t, configCB, func(s *TestAgent) {
|
||||||
|
body := bytes.NewBuffer(nil)
|
||||||
|
req, err := http.NewRequest(
|
||||||
|
http.MethodPut,
|
||||||
|
"/v1/operator/raft/transfer-leadership"+tc.qs,
|
||||||
|
body,
|
||||||
|
)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
// If we get this error, it proves we sent the parameter all the
|
||||||
|
// way through.
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
_, err = s.Server.OperatorRaftTransferLeadership(resp, req)
|
||||||
|
must.ErrorContains(t, err, tc.expected)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestOperator_AutopilotGetConfiguration(t *testing.T) {
|
func TestOperator_AutopilotGetConfiguration(t *testing.T) {
|
||||||
ci.Parallel(t)
|
ci.Parallel(t)
|
||||||
httpTest(t, nil, func(s *TestAgent) {
|
httpTest(t, nil, func(s *TestAgent) {
|
||||||
|
|||||||
@@ -749,6 +749,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
|
|||||||
Meta: meta,
|
Meta: meta,
|
||||||
}, nil
|
}, nil
|
||||||
},
|
},
|
||||||
|
"operator raft transfer-leadership": func() (cli.Command, error) {
|
||||||
|
return &OperatorRaftTransferLeadershipCommand{
|
||||||
|
Meta: meta,
|
||||||
|
}, nil
|
||||||
|
},
|
||||||
"operator raft info": func() (cli.Command, error) {
|
"operator raft info": func() (cli.Command, error) {
|
||||||
return &OperatorRaftInfoCommand{
|
return &OperatorRaftInfoCommand{
|
||||||
Meta: meta,
|
Meta: meta,
|
||||||
|
|||||||
125
command/operator_raft_leadership_transfer.go
Normal file
125
command/operator_raft_leadership_transfer.go
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
package command
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/api"
|
||||||
|
"github.com/posener/complete"
|
||||||
|
)
|
||||||
|
|
||||||
|
type OperatorRaftTransferLeadershipCommand struct {
|
||||||
|
Meta
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *OperatorRaftTransferLeadershipCommand) Help() string {
|
||||||
|
helpText := `
|
||||||
|
Usage: nomad operator raft transfer-leadership [options]
|
||||||
|
|
||||||
|
Transfer leadership to the Nomad server with given -peer-address or
|
||||||
|
-peer-id in the Raft configuration. All server nodes in the cluster
|
||||||
|
must be running at least Raft protocol v3 in order to use this command.
|
||||||
|
|
||||||
|
There are cases where you might desire transferring leadership from one
|
||||||
|
cluster member to another, for example, during a rolling upgrade. This
|
||||||
|
command allows you to designate a new server to be cluster leader.
|
||||||
|
|
||||||
|
Note: This command requires a currently established leader to function.
|
||||||
|
|
||||||
|
If ACLs are enabled, this command requires a management token.
|
||||||
|
|
||||||
|
General Options:
|
||||||
|
|
||||||
|
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
|
||||||
|
|
||||||
|
Remove Peer Options:
|
||||||
|
|
||||||
|
-peer-address="IP:port"
|
||||||
|
Transfer leadership to the Nomad server with given Raft address.
|
||||||
|
|
||||||
|
-peer-id="id"
|
||||||
|
Transfer leadership to the Nomad server with given Raft ID.
|
||||||
|
`
|
||||||
|
|
||||||
|
return strings.TrimSpace(helpText)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *OperatorRaftTransferLeadershipCommand) AutocompleteFlags() complete.Flags {
|
||||||
|
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||||||
|
complete.Flags{
|
||||||
|
"-peer-address": complete.PredictAnything,
|
||||||
|
"-peer-id": complete.PredictAnything,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *OperatorRaftTransferLeadershipCommand) AutocompleteArgs() complete.Predictor {
|
||||||
|
return complete.PredictNothing
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *OperatorRaftTransferLeadershipCommand) Synopsis() string {
|
||||||
|
return "Transfer leadership to a specified Nomad server"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *OperatorRaftTransferLeadershipCommand) Name() string {
|
||||||
|
return "operator raft transfer-leadership"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *OperatorRaftTransferLeadershipCommand) Run(args []string) int {
|
||||||
|
var peerAddress string
|
||||||
|
var peerID string
|
||||||
|
|
||||||
|
flags := c.Meta.FlagSet("raft", FlagSetClient)
|
||||||
|
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||||
|
|
||||||
|
flags.StringVar(&peerAddress, "peer-address", "", "")
|
||||||
|
flags.StringVar(&peerID, "peer-id", "", "")
|
||||||
|
if err := flags.Parse(args); err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up a client.
|
||||||
|
client, err := c.Meta.Client()
|
||||||
|
if err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
operator := client.Operator()
|
||||||
|
|
||||||
|
if err := raftTransferLeadership(peerAddress, peerID, operator); err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Error transferring leadership to peer: %v", err))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
if peerAddress != "" {
|
||||||
|
c.Ui.Output(fmt.Sprintf("Transferred leadership to peer with address %q", peerAddress))
|
||||||
|
} else {
|
||||||
|
c.Ui.Output(fmt.Sprintf("Transferred leadership to peer with id %q", peerID))
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func raftTransferLeadership(address, id string, operator *api.Operator) error {
|
||||||
|
if len(address) == 0 && len(id) == 0 {
|
||||||
|
return fmt.Errorf("an address or id is required for the destination peer")
|
||||||
|
}
|
||||||
|
if len(address) > 0 && len(id) > 0 {
|
||||||
|
return fmt.Errorf("cannot give both an address and id")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to perform the leadership transfer.
|
||||||
|
if len(address) > 0 {
|
||||||
|
if err := operator.RaftTransferLeadershipByAddress(address, nil); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := operator.RaftTransferLeadershipByID(id, nil); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -149,6 +149,50 @@ func (s *Server) monitorLeadership() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Server) leadershipTransferToServer(to structs.RaftIDAddress) error {
|
||||||
|
if l := structs.NewRaftIDAddress(s.raft.LeaderWithID()); l == to {
|
||||||
|
s.logger.Debug("leadership transfer to current leader is a no-op")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
retryCount := 3
|
||||||
|
var lastError error
|
||||||
|
for i := 0; i < retryCount; i++ {
|
||||||
|
err := s.raft.LeadershipTransferToServer(to.ID, to.Address).Error()
|
||||||
|
if err == nil {
|
||||||
|
s.logger.Info("successfully transferred leadership")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// "cannot transfer leadership to itself"
|
||||||
|
// Handled at top of function, but reapplied here to prevent retrying if
|
||||||
|
// it occurs while we are retrying
|
||||||
|
if err.Error() == "cannot transfer leadership to itself" {
|
||||||
|
s.logger.Debug("leadership transfer to current leader is a no-op")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrRaftShutdown: Don't retry if raft is shut down.
|
||||||
|
if err == raft.ErrRaftShutdown {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrUnsupportedProtocol: Don't retry if the Raft version doesn't
|
||||||
|
// support leadership transfer since this will never succeed.
|
||||||
|
if err == raft.ErrUnsupportedProtocol {
|
||||||
|
return fmt.Errorf("leadership transfer not supported with Raft version lower than 3")
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrEnqueueTimeout: This seems to be the valid time to retry.
|
||||||
|
s.logger.Error("failed to transfer leadership attempt, will retry",
|
||||||
|
"attempt", i,
|
||||||
|
"retry_limit", retryCount,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
|
lastError = err
|
||||||
|
}
|
||||||
|
return fmt.Errorf("failed to transfer leadership in %d attempts. last error: %w", retryCount, lastError)
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) leadershipTransfer() error {
|
func (s *Server) leadershipTransfer() error {
|
||||||
retryCount := 3
|
retryCount := 3
|
||||||
for i := 0; i < retryCount; i++ {
|
for i := 0; i < retryCount; i++ {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/go-hclog"
|
"github.com/hashicorp/go-hclog"
|
||||||
@@ -124,7 +125,7 @@ func (op *Operator) RaftRemovePeerByAddress(args *structs.RaftPeerByAddressReque
|
|||||||
|
|
||||||
// Since this is an operation designed for humans to use, we will return
|
// Since this is an operation designed for humans to use, we will return
|
||||||
// an error if the supplied address isn't among the peers since it's
|
// an error if the supplied address isn't among the peers since it's
|
||||||
// likely they screwed up.
|
// likely a mistake.
|
||||||
{
|
{
|
||||||
future := op.srv.raft.GetConfiguration()
|
future := op.srv.raft.GetConfiguration()
|
||||||
if err := future.Error(); err != nil {
|
if err := future.Error(); err != nil {
|
||||||
@@ -182,7 +183,7 @@ func (op *Operator) RaftRemovePeerByID(args *structs.RaftPeerByIDRequest, reply
|
|||||||
|
|
||||||
// Since this is an operation designed for humans to use, we will return
|
// Since this is an operation designed for humans to use, we will return
|
||||||
// an error if the supplied id isn't among the peers since it's
|
// an error if the supplied id isn't among the peers since it's
|
||||||
// likely they screwed up.
|
// likely a mistake.
|
||||||
var address raft.ServerAddress
|
var address raft.ServerAddress
|
||||||
{
|
{
|
||||||
future := op.srv.raft.GetConfiguration()
|
future := op.srv.raft.GetConfiguration()
|
||||||
@@ -228,6 +229,127 @@ REMOVE:
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TransferLeadershipToPeer is used to transfer leadership away from the
|
||||||
|
// current leader to a specific target peer. This can help prevent leadership
|
||||||
|
// flapping during a rolling upgrade by allowing the cluster operator to target
|
||||||
|
// an already upgraded node before upgrading the remainder of the cluster.
|
||||||
|
func (op *Operator) TransferLeadershipToPeer(req *structs.RaftPeerRequest, reply *structs.LeadershipTransferResponse) error {
|
||||||
|
// Populate the reply's `To` with the arguments. Only one of them is likely
|
||||||
|
// to be filled. We don't get any additional information until after auth
|
||||||
|
// to prevent leaking cluster details via the error response.
|
||||||
|
reply.To = structs.NewRaftIDAddress(req.Address, req.ID)
|
||||||
|
|
||||||
|
authErr := op.srv.Authenticate(op.ctx, req)
|
||||||
|
|
||||||
|
if done, err := op.srv.forward("Operator.TransferLeadershipToPeer", req, req, reply); done {
|
||||||
|
reply.Err = err
|
||||||
|
return reply.Err
|
||||||
|
}
|
||||||
|
op.srv.MeasureRPCRate("operator", structs.RateMetricWrite, req)
|
||||||
|
if authErr != nil {
|
||||||
|
reply.Err = structs.ErrPermissionDenied
|
||||||
|
return structs.ErrPermissionDenied
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check ACL permissions
|
||||||
|
if aclObj, err := op.srv.ResolveACL(req); err != nil {
|
||||||
|
return err
|
||||||
|
} else if aclObj != nil && !aclObj.IsManagement() {
|
||||||
|
reply.Err = structs.ErrPermissionDenied
|
||||||
|
return structs.ErrPermissionDenied
|
||||||
|
}
|
||||||
|
|
||||||
|
// Technically, this code will be running on the leader because of the RPC
|
||||||
|
// forwarding, but a leadership change could happen at any moment while we're
|
||||||
|
// running. We need the leader's raft info to populate the response struct
|
||||||
|
// anyway, so we have a chance to check again here
|
||||||
|
|
||||||
|
reply.From = structs.NewRaftIDAddress(op.srv.raft.LeaderWithID())
|
||||||
|
|
||||||
|
// If the leader information comes back empty, that signals that there is
|
||||||
|
// currently no leader.
|
||||||
|
if reply.From.Address == "" || reply.From.ID == "" {
|
||||||
|
reply.Err = structs.ErrNoLeader
|
||||||
|
return structs.NewErrRPCCoded(http.StatusServiceUnavailable, structs.ErrNoLeader.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// while this is a somewhat more expensive test than later ones, if this
|
||||||
|
// test fails, they will _never_ be able to do a transfer. We do this after
|
||||||
|
// ACL checks though, so as to not leak cluster info to non-validated users.
|
||||||
|
minRaftProtocol, err := op.srv.MinRaftProtocol()
|
||||||
|
if err != nil {
|
||||||
|
reply.Err = err
|
||||||
|
return structs.NewErrRPCCoded(http.StatusInternalServerError, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// TransferLeadership is not supported until Raft protocol v3 or greater.
|
||||||
|
if minRaftProtocol < 3 {
|
||||||
|
op.logger.Warn("unsupported minimum common raft protocol version", "required", "3", "current", minRaftProtocol)
|
||||||
|
reply.Err = errors.New("unsupported minimum common raft protocol version")
|
||||||
|
return structs.NewErrRPCCoded(http.StatusBadRequest, reply.Err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
var kind, testedVal string
|
||||||
|
|
||||||
|
// The request must provide either an ID or an Address, this lets us validate
|
||||||
|
// the request
|
||||||
|
req.Validate()
|
||||||
|
switch {
|
||||||
|
case req.ID != "":
|
||||||
|
kind, testedVal = "id", string(req.ID)
|
||||||
|
case req.Address != "":
|
||||||
|
kind, testedVal = "address", string(req.Address)
|
||||||
|
default:
|
||||||
|
reply.Err = errors.New("must provide peer id or address")
|
||||||
|
return structs.NewErrRPCCoded(http.StatusBadRequest, reply.Err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the raft configuration
|
||||||
|
future := op.srv.raft.GetConfiguration()
|
||||||
|
if err := future.Error(); err != nil {
|
||||||
|
reply.Err = err
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since this is an operation designed for humans to use, we will return
|
||||||
|
// an error if the supplied ID or address isn't among the peers since it's
|
||||||
|
// likely a mistake.
|
||||||
|
var found bool
|
||||||
|
for _, s := range future.Configuration().Servers {
|
||||||
|
if s.ID == req.ID || s.Address == req.Address {
|
||||||
|
reply.To = structs.NewRaftIDAddress(s.Address, s.ID)
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
reply.Err = fmt.Errorf("%s %q was not found in the Raft configuration",
|
||||||
|
kind, testedVal)
|
||||||
|
return structs.NewErrRPCCoded(http.StatusBadRequest, reply.Err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, this is a no-op, respond accordingly.
|
||||||
|
if reply.From == reply.To {
|
||||||
|
op.logger.Debug("leadership transfer to current leader is a no-op")
|
||||||
|
reply.Noop = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log := op.logger.With(
|
||||||
|
"to_peer_id", reply.To.ID, "to_peer_addr", reply.To.Address,
|
||||||
|
"from_peer_id", reply.From.ID, "from_peer_addr", reply.From.Address,
|
||||||
|
)
|
||||||
|
if err = op.srv.leadershipTransferToServer(reply.To); err != nil {
|
||||||
|
reply.Err = err
|
||||||
|
log.Error("failed transferring leadership", "error", reply.Err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("transferred leadership")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// AutopilotGetConfiguration is used to retrieve the current Autopilot configuration.
|
// AutopilotGetConfiguration is used to retrieve the current Autopilot configuration.
|
||||||
func (op *Operator) AutopilotGetConfiguration(args *structs.GenericRequest, reply *structs.AutopilotConfig) error {
|
func (op *Operator) AutopilotGetConfiguration(args *structs.GenericRequest, reply *structs.AutopilotConfig) error {
|
||||||
|
|
||||||
@@ -284,7 +406,7 @@ func (op *Operator) AutopilotSetConfiguration(args *structs.AutopilotSetConfigRe
|
|||||||
return structs.ErrPermissionDenied
|
return structs.ErrPermissionDenied
|
||||||
}
|
}
|
||||||
|
|
||||||
// All servers should be at or above 0.8.0 to apply this operatation
|
// All servers should be at or above 0.8.0 to apply this operation
|
||||||
if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minAutopilotVersion, false) {
|
if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minAutopilotVersion, false) {
|
||||||
return fmt.Errorf("All servers should be running version %v to update autopilot config", minAutopilotVersion)
|
return fmt.Errorf("All servers should be running version %v to update autopilot config", minAutopilotVersion)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
"net/rpc"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"reflect"
|
"reflect"
|
||||||
@@ -27,10 +28,16 @@ import (
|
|||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
"github.com/hashicorp/nomad/testutil"
|
"github.com/hashicorp/nomad/testutil"
|
||||||
"github.com/hashicorp/raft"
|
"github.com/hashicorp/raft"
|
||||||
|
"github.com/shoenig/test/must"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// RPC Permission Denied Errors - currently `rpc error: Permission denied`
|
||||||
|
rpcPermDeniedErr = rpc.ServerError(structs.ErrPermissionDenied.Error())
|
||||||
|
)
|
||||||
|
|
||||||
func TestOperator_RaftGetConfiguration(t *testing.T) {
|
func TestOperator_RaftGetConfiguration(t *testing.T) {
|
||||||
ci.Parallel(t)
|
ci.Parallel(t)
|
||||||
|
|
||||||
@@ -368,6 +375,211 @@ func TestOperator_RaftRemovePeerByID_ACL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type testcluster struct {
|
||||||
|
t *testing.T
|
||||||
|
server []*Server
|
||||||
|
cleanup []func()
|
||||||
|
token *structs.ACLToken
|
||||||
|
rpc func(string, any, any) error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc testcluster) Cleanup() {
|
||||||
|
for _, cFn := range tc.cleanup {
|
||||||
|
cFn()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type tcArgs struct {
|
||||||
|
size int
|
||||||
|
enableACL bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestCluster(t *testing.T, args tcArgs) (tc testcluster) {
|
||||||
|
// handle the zero case reasonably for count
|
||||||
|
if args.size == 0 {
|
||||||
|
args.size = 3
|
||||||
|
}
|
||||||
|
if args.size < 1 {
|
||||||
|
t.Fatal("newTestCluster must have size greater than zero")
|
||||||
|
}
|
||||||
|
cSize := args.size
|
||||||
|
out := testcluster{
|
||||||
|
t: t,
|
||||||
|
server: make([]*Server, cSize),
|
||||||
|
cleanup: make([]func(), cSize),
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < cSize; i += 1 {
|
||||||
|
out.server[i], out.cleanup[i] = TestServer(t, func(c *Config) {
|
||||||
|
c.NodeName = fmt.Sprintf("node-%v", i+1)
|
||||||
|
c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(3)
|
||||||
|
c.BootstrapExpect = cSize
|
||||||
|
c.ACLEnabled = args.enableACL
|
||||||
|
})
|
||||||
|
}
|
||||||
|
t.Cleanup(out.Cleanup)
|
||||||
|
out.rpc = out.server[0].RPC
|
||||||
|
|
||||||
|
TestJoin(t, out.server...)
|
||||||
|
out.WaitForLeader()
|
||||||
|
|
||||||
|
if args.enableACL {
|
||||||
|
// Bootstrap the ACL subsystem
|
||||||
|
token := mock.ACLManagementToken()
|
||||||
|
err := out.server[0].State().BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, token)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to bootstrap ACL token: %v", err)
|
||||||
|
}
|
||||||
|
t.Logf("bootstrap token: %v", *token)
|
||||||
|
out.token = token
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc testcluster) WaitForLeader() {
|
||||||
|
testutil.WaitForLeader(tc.t, tc.rpc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc testcluster) leader() *Server {
|
||||||
|
tc.WaitForLeader()
|
||||||
|
for _, s := range tc.server {
|
||||||
|
if isLeader, _ := s.getLeader(); isLeader {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc testcluster) anyFollower() *Server {
|
||||||
|
if len(tc.server) < 2 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
testutil.WaitForLeader(tc.t, tc.rpc)
|
||||||
|
for _, s := range tc.server {
|
||||||
|
if isLeader, _ := s.getLeader(); !isLeader {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// something weird happened.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOperator_TransferLeadershipToServerAddress_ACL(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
tc := newTestCluster(t, tcArgs{enableACL: true})
|
||||||
|
s1 := tc.leader()
|
||||||
|
codec := rpcClient(t, s1)
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
lAddr, _ := s1.raft.LeaderWithID()
|
||||||
|
|
||||||
|
var addr raft.ServerAddress
|
||||||
|
// Find the first non-leader server in the list.
|
||||||
|
for a := range s1.localPeers {
|
||||||
|
addr = a
|
||||||
|
if addr != lAddr {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create ACL token
|
||||||
|
invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite))
|
||||||
|
|
||||||
|
arg := structs.RaftPeerRequest{
|
||||||
|
RaftIDAddress: structs.RaftIDAddress{Address: addr},
|
||||||
|
WriteRequest: structs.WriteRequest{Region: s1.config.Region},
|
||||||
|
}
|
||||||
|
|
||||||
|
var reply struct{}
|
||||||
|
|
||||||
|
t.Run("no-token", func(t *testing.T) {
|
||||||
|
// Try with no token and expect permission denied
|
||||||
|
err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply)
|
||||||
|
must.Error(t, err)
|
||||||
|
must.ErrorIs(t, err, rpcPermDeniedErr)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid-token", func(t *testing.T) {
|
||||||
|
// Try with an invalid token and expect permission denied
|
||||||
|
arg.AuthToken = invalidToken.SecretID
|
||||||
|
err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply)
|
||||||
|
must.Error(t, err)
|
||||||
|
must.ErrorIs(t, err, rpcPermDeniedErr)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("good-token", func(t *testing.T) {
|
||||||
|
// Try with a management token
|
||||||
|
arg.AuthToken = tc.token.SecretID
|
||||||
|
err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
// Is the expected leader the new one?
|
||||||
|
tc.WaitForLeader()
|
||||||
|
lAddrNew, _ := s1.raft.LeaderWithID()
|
||||||
|
must.Eq(t, addr, lAddrNew)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOperator_TransferLeadershipToServerID_ACL(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
tc := newTestCluster(t, tcArgs{enableACL: true})
|
||||||
|
s1 := tc.leader()
|
||||||
|
codec := rpcClient(t, s1)
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
_, ldrID := s1.raft.LeaderWithID()
|
||||||
|
|
||||||
|
var tgtID raft.ServerID
|
||||||
|
// Find the first non-leader server in the list.
|
||||||
|
for _, sp := range s1.localPeers {
|
||||||
|
tgtID = raft.ServerID(sp.ID)
|
||||||
|
if tgtID != ldrID {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create ACL token
|
||||||
|
invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite))
|
||||||
|
|
||||||
|
arg := structs.RaftPeerRequest{
|
||||||
|
RaftIDAddress: structs.RaftIDAddress{
|
||||||
|
ID: tgtID,
|
||||||
|
},
|
||||||
|
WriteRequest: structs.WriteRequest{Region: s1.config.Region},
|
||||||
|
}
|
||||||
|
|
||||||
|
var reply struct{}
|
||||||
|
|
||||||
|
t.Run("no-token", func(t *testing.T) {
|
||||||
|
// Try with no token and expect permission denied
|
||||||
|
err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply)
|
||||||
|
must.Error(t, err)
|
||||||
|
must.ErrorIs(t, err, rpcPermDeniedErr)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid-token", func(t *testing.T) {
|
||||||
|
// Try with an invalid token and expect permission denied
|
||||||
|
arg.AuthToken = invalidToken.SecretID
|
||||||
|
err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply)
|
||||||
|
must.Error(t, err)
|
||||||
|
must.ErrorIs(t, err, rpcPermDeniedErr)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("good-token", func(t *testing.T) {
|
||||||
|
// Try with a management token
|
||||||
|
arg.AuthToken = tc.token.SecretID
|
||||||
|
err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
// Is the expected leader the new one?
|
||||||
|
tc.WaitForLeader()
|
||||||
|
_, ldrID := s1.raft.LeaderWithID()
|
||||||
|
must.Eq(t, tgtID, ldrID)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestOperator_SchedulerGetConfiguration(t *testing.T) {
|
func TestOperator_SchedulerGetConfiguration(t *testing.T) {
|
||||||
ci.Parallel(t)
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,12 @@
|
|||||||
package structs
|
package structs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/netip"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/go-uuid"
|
||||||
"github.com/hashicorp/raft"
|
"github.com/hashicorp/raft"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -49,6 +52,8 @@ type RaftConfigurationResponse struct {
|
|||||||
|
|
||||||
// RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft
|
// RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft
|
||||||
// operation on a specific Raft peer by address in the form of "IP:port".
|
// operation on a specific Raft peer by address in the form of "IP:port".
|
||||||
|
//
|
||||||
|
// Deprecated: Use RaftPeerRequest with an Address instead.
|
||||||
type RaftPeerByAddressRequest struct {
|
type RaftPeerByAddressRequest struct {
|
||||||
// Address is the peer to remove, in the form "IP:port".
|
// Address is the peer to remove, in the form "IP:port".
|
||||||
Address raft.ServerAddress
|
Address raft.ServerAddress
|
||||||
@@ -59,6 +64,8 @@ type RaftPeerByAddressRequest struct {
|
|||||||
|
|
||||||
// RaftPeerByIDRequest is used by the Operator endpoint to apply a Raft
|
// RaftPeerByIDRequest is used by the Operator endpoint to apply a Raft
|
||||||
// operation on a specific Raft peer by ID.
|
// operation on a specific Raft peer by ID.
|
||||||
|
//
|
||||||
|
// Deprecated: Use RaftPeerRequest with an ID instead.
|
||||||
type RaftPeerByIDRequest struct {
|
type RaftPeerByIDRequest struct {
|
||||||
// ID is the peer ID to remove.
|
// ID is the peer ID to remove.
|
||||||
ID raft.ServerID
|
ID raft.ServerID
|
||||||
@@ -67,6 +74,58 @@ type RaftPeerByIDRequest struct {
|
|||||||
WriteRequest
|
WriteRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RaftPeerRequest is used by the Operator endpoint to apply a Raft
|
||||||
|
// operation on a specific Raft peer by its peer ID or address in the form of
|
||||||
|
// "IP:port".
|
||||||
|
type RaftPeerRequest struct {
|
||||||
|
// RaftIDAddress contains an ID and Address field to identify the target
|
||||||
|
RaftIDAddress
|
||||||
|
// WriteRequest holds the Region for this request.
|
||||||
|
WriteRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RaftPeerRequest) Validate() error {
|
||||||
|
if (r.ID == "" && r.Address == "") || (r.ID != "" && r.Address != "") {
|
||||||
|
return errors.New("either ID or Address must be set")
|
||||||
|
}
|
||||||
|
if r.ID != "" {
|
||||||
|
return r.validateID()
|
||||||
|
}
|
||||||
|
return r.validateAddress()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RaftPeerRequest) validateID() error {
|
||||||
|
if _, err := uuid.ParseUUID(string(r.ID)); err != nil {
|
||||||
|
return fmt.Errorf("id must be a uuid: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RaftPeerRequest) validateAddress() error {
|
||||||
|
if _, err := netip.ParseAddrPort(string(r.Address)); err != nil {
|
||||||
|
return fmt.Errorf("address must be in IP:port format: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type LeadershipTransferResponse struct {
|
||||||
|
From RaftIDAddress // Server yielding leadership
|
||||||
|
To RaftIDAddress // Server obtaining leadership
|
||||||
|
Noop bool // Was the transfer a non-operation
|
||||||
|
Err error // Non-nil if there was an error while transferring leadership
|
||||||
|
}
|
||||||
|
|
||||||
|
type RaftIDAddress struct {
|
||||||
|
Address raft.ServerAddress
|
||||||
|
ID raft.ServerID
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRaftIDAddress takes parameters in the order provided by raft's
|
||||||
|
// LeaderWithID func and returns a RaftIDAddress
|
||||||
|
func NewRaftIDAddress(a raft.ServerAddress, id raft.ServerID) RaftIDAddress {
|
||||||
|
return RaftIDAddress{ID: id, Address: a}
|
||||||
|
}
|
||||||
|
|
||||||
// AutopilotSetConfigRequest is used by the Operator endpoint to update the
|
// AutopilotSetConfigRequest is used by the Operator endpoint to update the
|
||||||
// current Autopilot configuration of the cluster.
|
// current Autopilot configuration of the cluster.
|
||||||
type AutopilotSetConfigRequest struct {
|
type AutopilotSetConfigRequest struct {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
layout: api
|
layout: api
|
||||||
page_title: Raft - Operator - HTTP API
|
page_title: Raft - Operator - HTTP API
|
||||||
description: |-
|
description: |-
|
||||||
The /operator/raft endpoints provide tools for management of the Raft subsystem.
|
The /operator/raft endpoints provide tools for management of the Raft subsystem.
|
||||||
---
|
---
|
||||||
|
|
||||||
# Raft Operator HTTP API
|
# Raft Operator HTTP API
|
||||||
@@ -34,26 +34,56 @@ The table below shows this endpoint's support for
|
|||||||
|
|
||||||
### Sample Request
|
### Sample Request
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<Tab heading="Nomad CLI">
|
||||||
|
|
||||||
|
```shell-session
|
||||||
|
$ nomad operator api /v1/operator/raft/configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
</Tab>
|
||||||
|
<Tab heading="curl">
|
||||||
|
|
||||||
```shell-session
|
```shell-session
|
||||||
$ curl \
|
$ curl \
|
||||||
https://localhost:4646/v1/operator/raft/configuration
|
https://localhost:4646/v1/operator/raft/configuration
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</Tab>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
### Sample Response
|
### Sample Response
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"Index": 1,
|
"Index": 0,
|
||||||
"Servers": [
|
"Servers": [
|
||||||
{
|
{
|
||||||
"Address": "127.0.0.1:4647",
|
"Address": "10.1.0.10:4647",
|
||||||
"ID": "127.0.0.1:4647",
|
"ID": "c13f9998-a0f3-d765-0b52-55a0b3ce5f88",
|
||||||
"Leader": true,
|
"Leader": false,
|
||||||
"Node": "bacon-mac.global",
|
"Node": "node1.global",
|
||||||
"RaftProtocol": 2,
|
"RaftProtocol": "3",
|
||||||
"Voter": true
|
"Voter": true
|
||||||
}
|
},
|
||||||
]
|
{
|
||||||
|
"Address": "10.1.0.20:4647",
|
||||||
|
"ID": "d7927f2b-067f-45a4-6266-af8bb84de082",
|
||||||
|
"Leader": true,
|
||||||
|
"Node": "node2.global",
|
||||||
|
"RaftProtocol": "3",
|
||||||
|
"Voter": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Address": "10.1.0.30:4647",
|
||||||
|
"ID": "00d56ef8-938e-abc3-6f8a-f8ac80a80fb9",
|
||||||
|
"Leader": false,
|
||||||
|
"Node": "node3.global",
|
||||||
|
"RaftProtocol": "3",
|
||||||
|
"Voter": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -66,8 +96,8 @@ $ curl \
|
|||||||
- `Servers` `(array: Server)` - The returned `Servers` array has information
|
- `Servers` `(array: Server)` - The returned `Servers` array has information
|
||||||
about the servers in the Raft peer configuration.
|
about the servers in the Raft peer configuration.
|
||||||
|
|
||||||
- `ID` `(string)` - The ID of the server. This is the same as the `Address`
|
- `ID` `(string)` - The ID of the server. For Raft protocol v2, this is the
|
||||||
but may be upgraded to a GUID in a future version of Nomad.
|
same as the `Address`. Raft protocol v3 uses GUIDs as the ID.
|
||||||
|
|
||||||
- `Node` `(string)` - The node name of the server, as known to Nomad, or
|
- `Node` `(string)` - The node name of the server, as known to Nomad, or
|
||||||
`"(unknown)"` if the node is stale and not known.
|
`"(unknown)"` if the node is stale and not known.
|
||||||
@@ -100,18 +130,100 @@ The table below shows this endpoint's support for
|
|||||||
|
|
||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
- `address` `(string: <optional>)` - Specifies the server to remove as
|
- `address` `(string: <optional>)` - Specifies the Raft **Address** of the
|
||||||
`ip:port`. This cannot be provided along with the `id` parameter.
|
server to remove as provided in the output of `/v1/operator/raft/configuration`
|
||||||
|
API endpoint or the `nomad operator raft list-peers` command.
|
||||||
|
|
||||||
- `id` `(string: <optional>)` - Specifies the server to remove as
|
- `id` `(string: <optional>)` - Specifies the Raft **ID** of the server to
|
||||||
`id`. This cannot be provided along with the `address` parameter.
|
remove as provided in the output of `/v1/operator/raft/configuration`
|
||||||
|
API endpoint or the `nomad operator raft list-peers` command.
|
||||||
|
|
||||||
|
<Note>
|
||||||
|
|
||||||
|
Either `address` or `id` must be provided, but not both.
|
||||||
|
|
||||||
|
</Note>
|
||||||
|
|
||||||
### Sample Request
|
### Sample Request
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<Tab heading="Nomad CLI">
|
||||||
|
|
||||||
|
```shell-session
|
||||||
|
$ nomad operator api -X DELETE \
|
||||||
|
/v1/operator/raft/peer?address=1.2.3.4:4647
|
||||||
|
```
|
||||||
|
|
||||||
|
</Tab>
|
||||||
|
<Tab heading="curl">
|
||||||
|
|
||||||
```shell-session
|
```shell-session
|
||||||
$ curl \
|
$ curl \
|
||||||
--request DELETE \
|
--request DELETE \
|
||||||
https://localhost:4646/v1/operator/raft/peer?address=1.2.3.4:4646
|
--header "X-Nomad-Token: ${NOMAD_TOKEN}"
|
||||||
|
https://127.0.0.1:4646/v1/operator/raft/peer?address=1.2.3.4:4647
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</Tab>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Transfer Leadership to another Raft Peer
|
||||||
|
|
||||||
|
This endpoint tells the current cluster leader to transfer leadership
|
||||||
|
to the Nomad server with given address or ID in the Raft
|
||||||
|
configuration. The return code signifies success or failure.
|
||||||
|
|
||||||
|
| Method | Path | Produces |
|
||||||
|
| ------------------- | --------------------------------------- | ------------------ |
|
||||||
|
| `PUT` <br /> `POST` | `/v1/operator/raft/transfer-leadership` | `application/json` |
|
||||||
|
|
||||||
|
The table below shows this endpoint's support for
|
||||||
|
[blocking queries](/nomad/api-docs#blocking-queries) and
|
||||||
|
[required ACLs](/nomad/api-docs#acls).
|
||||||
|
|
||||||
|
| Blocking Queries | ACL Required |
|
||||||
|
| ---------------- | ------------ |
|
||||||
|
| `NO` | `management` |
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
- `address` `(string: <optional>)` - Specifies the Raft **Address** of the
|
||||||
|
target server as provided in the output of `/v1/operator/raft/configuration`
|
||||||
|
API endpoint or the `nomad operator raft list-peers` command.
|
||||||
|
|
||||||
|
- `id` `(string: <optional>)` - Specifies the Raft **ID** of the target server
|
||||||
|
as provided in the output of `/v1/operator/raft/configuration` API endpoint or
|
||||||
|
the `nomad operator raft list-peers` command.
|
||||||
|
|
||||||
|
<Note>
|
||||||
|
|
||||||
|
- The cluster must be running Raft protocol v3 or greater on all server members.
|
||||||
|
|
||||||
|
- Either `address` or `id` must be provided, but not both.
|
||||||
|
|
||||||
|
</Note>
|
||||||
|
|
||||||
|
### Sample Requests
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<Tab heading="Nomad CLI">
|
||||||
|
|
||||||
|
```shell-session
|
||||||
|
$ nomad operator api -X PUT \
|
||||||
|
"/v1/operator/raft/transfer-leadership?address=1.2.3.4:4647"
|
||||||
|
```
|
||||||
|
|
||||||
|
</Tab>
|
||||||
|
<Tab heading="curl">
|
||||||
|
|
||||||
|
```shell-session
|
||||||
|
$ curl --request PUT \
|
||||||
|
--header "X-Nomad-Token: ${NOMAD_TOKEN}"
|
||||||
|
"https://127.0.0.1:4646/v1/operator/raft/transfer-leadership?address=1.2.3.4:4647"
|
||||||
|
```
|
||||||
|
|
||||||
|
</Tab>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
[consensus protocol guide]: /nomad/docs/concepts/consensus
|
[consensus protocol guide]: /nomad/docs/concepts/consensus
|
||||||
|
|||||||
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
layout: docs
|
||||||
|
page_title: 'Commands: operator raft transfer-leadership'
|
||||||
|
description: |
|
||||||
|
Transfer leadership to a specific a Nomad server.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Command: operator raft transfer-leadership
|
||||||
|
|
||||||
|
Transfer leadership from the current leader to the given server member.
|
||||||
|
|
||||||
|
While performing a [rolling upgrade][] of your Nomad cluster, it might be
|
||||||
|
advisable to transfer leadership to a specific node in the cluster. For example,
|
||||||
|
setting the leader to the first upgraded server in the cluster can prevent
|
||||||
|
leadership churn as you upgrade the remaining server nodes.
|
||||||
|
|
||||||
|
The target server's ID or address:port are required and can be obtained by
|
||||||
|
running the [`nomad operator raft list-peers`][] command or by calling the
|
||||||
|
[Read Raft Configuration][] API endpoint.
|
||||||
|
|
||||||
|
For an API to perform these operations programmatically, please see the
|
||||||
|
documentation for the [Operator][] endpoint.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```plaintext
|
||||||
|
nomad operator raft transfer-leadership [options]
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tip title="Required Permissions">
|
||||||
|
If ACLs are enabled, this command requires a management token.
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
## General Options
|
||||||
|
|
||||||
|
@include 'general_options_no_namespace.mdx'
|
||||||
|
|
||||||
|
## Transfer Leadership Options
|
||||||
|
|
||||||
|
- `-peer-address`: Specifies the Raft **Address** of the target server as
|
||||||
|
provided in the output of the [`nomad operator raft list-peers`][] command or
|
||||||
|
the [Read Raft Configuration] API endpoint.
|
||||||
|
|
||||||
|
- `-peer-id`: Specifies the Raft **ID** of the target server as provided in the
|
||||||
|
output of the [`nomad operator raft list-peers`][] command or the
|
||||||
|
[Read Raft Configuration] API endpoint.
|
||||||
|
|
||||||
|
<Note>
|
||||||
|
|
||||||
|
Either `-peer-address` or `-peer-id` must be provided, but not both.
|
||||||
|
|
||||||
|
</Note>
|
||||||
|
|
||||||
|
[`nomad operator raft list-peers`]: /nomad/docs/commands/operator/raft/list-peers 'Nomad operator raft list-peers command'
|
||||||
|
[operator]: /nomad/api-docs/operator 'Nomad Operator API'
|
||||||
|
[rolling upgrade]: /nomad/docs/upgrade#upgrade-process
|
||||||
|
[Read Raft Configuration]: /nomad/api-docs/operator/raft#read-raft-configuration
|
||||||
@@ -800,6 +800,10 @@
|
|||||||
{
|
{
|
||||||
"title": "state",
|
"title": "state",
|
||||||
"path": "commands/operator/raft/state"
|
"path": "commands/operator/raft/state"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "transfer-leadership",
|
||||||
|
"path": "commands/operator/raft/transfer-leadership"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user