[server] Directed leadership transfer CLI and API (#17383)

* Add directed leadership transfer func
* Add leadership transfer RPC endpoint
* Add ACL tests for leadership-transfer endpoint
* Add HTTP API route and implementation
* Add to Go API client
* Implement CLI command
* Add documentation
* Add changelog

Co-authored-by: Tim Gross <tgross@hashicorp.com>
This commit is contained in:
Charlie Voiselle
2023-10-04 12:20:27 -04:00
committed by GitHub
parent c885c08640
commit 8a93ff3d2d
13 changed files with 1008 additions and 24 deletions

View File

@@ -30,6 +30,8 @@ func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request
return s.OperatorRaftConfiguration(resp, req)
case strings.HasPrefix(path, "peer"):
return s.OperatorRaftPeer(resp, req)
case strings.HasPrefix(path, "transfer-leadership"):
return s.OperatorRaftTransferLeadership(resp, req)
default:
return nil, CodedError(404, ErrInvalidMethod)
}
@@ -56,8 +58,7 @@ func (s *HTTPServer) OperatorRaftConfiguration(resp http.ResponseWriter, req *ht
return reply, nil
}
// OperatorRaftPeer supports actions on Raft peers. Currently we only support
// removing peers by address.
// OperatorRaftPeer supports actions on Raft peers.
func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != http.MethodDelete {
return nil, CodedError(404, ErrInvalidMethod)
@@ -97,6 +98,57 @@ func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Reques
return nil, nil
}
// OperatorRaftTransferLeadership supports actions on Raft peers.
func (s *HTTPServer) OperatorRaftTransferLeadership(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != http.MethodPost && req.Method != http.MethodPut {
return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod)
}
params := req.URL.Query()
// Using the params map directly
id, hasID := params["id"]
addr, hasAddress := params["address"]
// There are some items that we can parse for here that are more unwieldy in
// the Validate() func on the RPC request object, like repeated query params.
switch {
case !hasID && !hasAddress:
return nil, CodedError(http.StatusBadRequest, "must specify id or address")
case hasID && hasAddress:
return nil, CodedError(http.StatusBadRequest, "must specify either id or address")
case hasID && id[0] == "":
return nil, CodedError(http.StatusBadRequest, "id must be non-empty")
case hasID && len(id) > 1:
return nil, CodedError(http.StatusBadRequest, "must specify only one id")
case hasAddress && addr[0] == "":
return nil, CodedError(http.StatusBadRequest, "address must be non-empty")
case hasAddress && len(addr) > 1:
return nil, CodedError(http.StatusBadRequest, "must specify only one address")
}
var out structs.LeadershipTransferResponse
args := &structs.RaftPeerRequest{}
s.parseWriteRequest(req, &args.WriteRequest)
if hasID {
args.ID = raft.ServerID(id[0])
} else {
args.Address = raft.ServerAddress(addr[0])
}
if err := args.Validate(); err != nil {
return nil, CodedError(http.StatusBadRequest, err.Error())
}
err := s.agent.RPC("Operator.TransferLeadershipToPeer", &args, &out)
if err != nil {
return nil, err
}
return out, nil
}
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {

View File

@@ -20,6 +20,8 @@ import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
@@ -91,6 +93,144 @@ func TestHTTP_OperatorRaftPeer(t *testing.T) {
})
}
func TestHTTP_OperatorRaftTransferLeadership(t *testing.T) {
ci.Parallel(t)
configCB := func(c *Config) {
c.Client.Enabled = false
c.Server.NumSchedulers = pointer.Of(0)
}
httpTest(t, configCB, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
badMethods := []string{
http.MethodConnect,
http.MethodDelete,
http.MethodGet,
http.MethodHead,
http.MethodOptions,
http.MethodPatch,
http.MethodTrace,
}
for _, tc := range badMethods {
tc := tc
t.Run(tc+" method errors", func(t *testing.T) {
req, err := http.NewRequest(tc, "/v1/operator/raft/transfer-leadership?address=nope", body)
must.NoError(t, err)
resp := httptest.NewRecorder()
_, err = s.Server.OperatorRaftTransferLeadership(resp, req)
must.Error(t, err)
must.ErrorContains(t, err, "Invalid method")
body.Reset()
})
}
apiErrTCs := []struct {
name string
qs string
expected string
}{
{
name: "URL with id and address errors",
qs: `?id=foo&address=bar`,
expected: "must specify either id or address",
},
{
name: "URL without id and address errors",
qs: ``,
expected: "must specify id or address",
},
{
name: "URL with multiple id errors",
qs: `?id=foo&id=bar`,
expected: "must specify only one id",
},
{
name: "URL with multiple address errors",
qs: `?address=foo&address=bar`,
expected: "must specify only one address",
},
{
name: "URL with an empty id errors",
qs: `?id`,
expected: "id must be non-empty",
},
{
name: "URL with an empty address errors",
qs: `?address`,
expected: "address must be non-empty",
},
{
name: "an invalid id errors",
qs: `?id=foo`,
expected: "id must be a uuid",
},
{
name: "URL with an empty address errors",
qs: `?address=bar`,
expected: "address must be in IP:port format",
},
}
for _, tc := range apiErrTCs {
tc := tc
t.Run(tc.name, func(t *testing.T) {
req, err := http.NewRequest(
http.MethodPut,
"/v1/operator/raft/transfer-leadership"+tc.qs,
body,
)
must.NoError(t, err)
resp := httptest.NewRecorder()
_, err = s.Server.OperatorRaftTransferLeadership(resp, req)
must.Error(t, err)
must.ErrorContains(t, err, tc.expected)
body.Reset()
})
}
})
testID := uuid.Generate()
apiOkTCs := []struct {
name string
qs string
expected string
}{
{
"id",
"?id=" + testID,
`id "` + testID + `" was not found in the Raft configuration`,
},
{
"address",
"?address=9.9.9.9:8000",
`address "9.9.9.9:8000" was not found in the Raft configuration`,
},
}
for _, tc := range apiOkTCs {
tc := tc
t.Run(tc.name+" can roundtrip", func(t *testing.T) {
httpTest(t, configCB, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, err := http.NewRequest(
http.MethodPut,
"/v1/operator/raft/transfer-leadership"+tc.qs,
body,
)
must.NoError(t, err)
// If we get this error, it proves we sent the parameter all the
// way through.
resp := httptest.NewRecorder()
_, err = s.Server.OperatorRaftTransferLeadership(resp, req)
must.ErrorContains(t, err, tc.expected)
})
})
}
}
func TestOperator_AutopilotGetConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {

View File

@@ -749,6 +749,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"operator raft transfer-leadership": func() (cli.Command, error) {
return &OperatorRaftTransferLeadershipCommand{
Meta: meta,
}, nil
},
"operator raft info": func() (cli.Command, error) {
return &OperatorRaftInfoCommand{
Meta: meta,

View File

@@ -0,0 +1,125 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package command
import (
"fmt"
"strings"
"github.com/hashicorp/nomad/api"
"github.com/posener/complete"
)
type OperatorRaftTransferLeadershipCommand struct {
Meta
}
func (c *OperatorRaftTransferLeadershipCommand) Help() string {
helpText := `
Usage: nomad operator raft transfer-leadership [options]
Transfer leadership to the Nomad server with given -peer-address or
-peer-id in the Raft configuration. All server nodes in the cluster
must be running at least Raft protocol v3 in order to use this command.
There are cases where you might desire transferring leadership from one
cluster member to another, for example, during a rolling upgrade. This
command allows you to designate a new server to be cluster leader.
Note: This command requires a currently established leader to function.
If ACLs are enabled, this command requires a management token.
General Options:
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
Remove Peer Options:
-peer-address="IP:port"
Transfer leadership to the Nomad server with given Raft address.
-peer-id="id"
Transfer leadership to the Nomad server with given Raft ID.
`
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftTransferLeadershipCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-peer-address": complete.PredictAnything,
"-peer-id": complete.PredictAnything,
})
}
func (c *OperatorRaftTransferLeadershipCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictNothing
}
func (c *OperatorRaftTransferLeadershipCommand) Synopsis() string {
return "Transfer leadership to a specified Nomad server"
}
func (c *OperatorRaftTransferLeadershipCommand) Name() string {
return "operator raft transfer-leadership"
}
func (c *OperatorRaftTransferLeadershipCommand) Run(args []string) int {
var peerAddress string
var peerID string
flags := c.Meta.FlagSet("raft", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.StringVar(&peerAddress, "peer-address", "", "")
flags.StringVar(&peerID, "peer-id", "", "")
if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
return 1
}
// Set up a client.
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
operator := client.Operator()
if err := raftTransferLeadership(peerAddress, peerID, operator); err != nil {
c.Ui.Error(fmt.Sprintf("Error transferring leadership to peer: %v", err))
return 1
}
if peerAddress != "" {
c.Ui.Output(fmt.Sprintf("Transferred leadership to peer with address %q", peerAddress))
} else {
c.Ui.Output(fmt.Sprintf("Transferred leadership to peer with id %q", peerID))
}
return 0
}
func raftTransferLeadership(address, id string, operator *api.Operator) error {
if len(address) == 0 && len(id) == 0 {
return fmt.Errorf("an address or id is required for the destination peer")
}
if len(address) > 0 && len(id) > 0 {
return fmt.Errorf("cannot give both an address and id")
}
// Try to perform the leadership transfer.
if len(address) > 0 {
if err := operator.RaftTransferLeadershipByAddress(address, nil); err != nil {
return err
}
} else {
if err := operator.RaftTransferLeadershipByID(id, nil); err != nil {
return err
}
}
return nil
}