From fbefdb98c3495e276085d43da424fe70af2b9c2e Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 11 Aug 2020 10:18:54 -0400 Subject: [PATCH] csi: nomad volume detach command (#8584) The soundness guarantees of the CSI specification leave a little to be desired in our ability to provide a 100% reliable automated solution for managing volumes. This changeset provides a new command to bridge this gap by providing the operator the ability to intervene. The command doesn't take an allocation ID so that the operator doesn't have to keep track of alloc IDs that may have been GC'd. Handle this case in the unpublish RPC by sending the client RPC for all the terminal/nil allocs on the selected node. --- api/csi.go | 5 + command/agent/csi_endpoint.go | 62 ++++++++++-- command/commands.go | 5 + command/volume.go | 4 + command/volume_detach.go | 97 +++++++++++++++++++ nomad/csi_endpoint.go | 63 +++++++++++- nomad/csi_endpoint_test.go | 83 ++++++++++------ vendor/github.com/hashicorp/nomad/api/csi.go | 5 + website/data/docs-navigation.js | 2 +- website/pages/api-docs/volumes.mdx | 38 +++++++- website/pages/docs/commands/volume/detach.mdx | 28 ++++++ website/pages/docs/commands/volume/index.mdx | 6 +- 12 files changed, 353 insertions(+), 45 deletions(-) create mode 100644 command/volume_detach.go create mode 100644 website/pages/docs/commands/volume/detach.mdx diff --git a/api/csi.go b/api/csi.go index fbc984e51..47d7017a2 100644 --- a/api/csi.go +++ b/api/csi.go @@ -60,6 +60,11 @@ func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error { return err } +func (v *CSIVolumes) Detach(volID, nodeID string, w *WriteOptions) error { + _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/detach?node=%v", url.PathEscape(volID), nodeID), nil, w) + return err +} + // CSIVolumeAttachmentMode duplicated in nomad/structs/csi.go type CSIVolumeAttachmentMode string diff --git a/command/agent/csi_endpoint.go b/command/agent/csi_endpoint.go index babf13e77..95c36f2a9 100644 --- a/command/agent/csi_endpoint.go +++ b/command/agent/csi_endpoint.go @@ -51,21 +51,35 @@ func (s *HTTPServer) CSIVolumeSpecificRequest(resp http.ResponseWriter, req *htt // Tokenize the suffix of the path to get the volume id reqSuffix := strings.TrimPrefix(req.URL.Path, "/v1/volume/csi/") tokens := strings.Split(reqSuffix, "/") - if len(tokens) > 2 || len(tokens) < 1 { + if len(tokens) < 1 { return nil, CodedError(404, resourceNotFoundErr) } id := tokens[0] - switch req.Method { - case "GET": - return s.csiVolumeGet(id, resp, req) - case "PUT": - return s.csiVolumePut(id, resp, req) - case "DELETE": - return s.csiVolumeDelete(id, resp, req) - default: - return nil, CodedError(405, ErrInvalidMethod) + if len(tokens) == 1 { + switch req.Method { + case http.MethodGet: + return s.csiVolumeGet(id, resp, req) + case http.MethodPut: + return s.csiVolumePut(id, resp, req) + case http.MethodDelete: + return s.csiVolumeDelete(id, resp, req) + default: + return nil, CodedError(405, ErrInvalidMethod) + } } + + if len(tokens) == 2 { + if tokens[1] != "detach" { + return nil, CodedError(404, resourceNotFoundErr) + } + if req.Method != http.MethodDelete { + return nil, CodedError(405, ErrInvalidMethod) + } + return s.csiVolumeDetach(id, resp, req) + } + + return nil, CodedError(404, resourceNotFoundErr) } func (s *HTTPServer) csiVolumeGet(id string, resp http.ResponseWriter, req *http.Request) (interface{}, error) { @@ -149,6 +163,34 @@ func (s *HTTPServer) csiVolumeDelete(id string, resp http.ResponseWriter, req *h return nil, nil } +func (s *HTTPServer) csiVolumeDetach(id string, resp http.ResponseWriter, req *http.Request) (interface{}, error) { + if req.Method != http.MethodDelete { + return nil, CodedError(405, ErrInvalidMethod) + } + + nodeID := req.URL.Query().Get("node") + if nodeID == "" { + return nil, CodedError(400, "detach requires node ID") + } + + args := structs.CSIVolumeUnpublishRequest{ + VolumeID: id, + Claim: &structs.CSIVolumeClaim{ + NodeID: nodeID, + Mode: structs.CSIVolumeClaimRelease, + }, + } + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.CSIVolumeUnpublishResponse + if err := s.agent.RPC("CSIVolume.Unpublish", &args, &out); err != nil { + return nil, err + } + + setMeta(resp, &out.QueryMeta) + return nil, nil +} + // CSIPluginsRequest lists CSI plugins func (s *HTTPServer) CSIPluginsRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) { if req.Method != "GET" { diff --git a/command/commands.go b/command/commands.go index 12a18abe8..dc0166bc8 100644 --- a/command/commands.go +++ b/command/commands.go @@ -723,6 +723,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "volume detach": func() (cli.Command, error) { + return &VolumeDetachCommand{ + Meta: meta, + }, nil + }, } deprecated := map[string]cli.CommandFactory{ diff --git a/command/volume.go b/command/volume.go index 83eb44093..6260b0bfc 100644 --- a/command/volume.go +++ b/command/volume.go @@ -28,6 +28,10 @@ Usage: nomad volume [options] $ nomad volume deregister + Detach an unused volume: + + $ nomad volume detach + Please see the individual subcommand help for detailed usage information. ` return strings.TrimSpace(helpText) diff --git a/command/volume_detach.go b/command/volume_detach.go new file mode 100644 index 000000000..1810d4d4d --- /dev/null +++ b/command/volume_detach.go @@ -0,0 +1,97 @@ +package command + +import ( + "fmt" + "strings" + + "github.com/hashicorp/nomad/api/contexts" + "github.com/posener/complete" +) + +type VolumeDetachCommand struct { + Meta +} + +func (c *VolumeDetachCommand) Help() string { + helpText := ` +Usage: nomad volume detach [options] + + Detach a volume from a Nomad client. + +General Options: + + ` + generalOptionsUsage() + ` + +` + return strings.TrimSpace(helpText) +} + +func (c *VolumeDetachCommand) AutocompleteFlags() complete.Flags { + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{}) +} + +func (c *VolumeDetachCommand) AutocompleteArgs() complete.Predictor { + return complete.PredictFunc(func(a complete.Args) []string { + client, err := c.Meta.Client() + if err != nil { + return nil + } + + resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Volumes, nil) + if err != nil { + return []string{} + } + matches := resp.Matches[contexts.Volumes] + + resp, _, err = client.Search().PrefixSearch(a.Last, contexts.Nodes, nil) + if err != nil { + return []string{} + } + for _, match := range resp.Matches[contexts.Nodes] { + matches = append(matches, match) + } + return matches + }) +} + +func (c *VolumeDetachCommand) Synopsis() string { + return "Detach a volume" +} + +func (c *VolumeDetachCommand) Name() string { return "volume detach" } + +func (c *VolumeDetachCommand) Run(args []string) int { + flags := c.Meta.FlagSet(c.Name(), FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + + if err := flags.Parse(args); err != nil { + c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) + return 1 + } + + // Check that we get exactly two arguments + args = flags.Args() + if l := len(args); l != 2 { + c.Ui.Error("This command takes two arguments: ") + c.Ui.Error(commandErrorText(c)) + return 1 + } + volID := args[0] + nodeID := args[1] + + // Get the HTTP client + client, err := c.Meta.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) + return 1 + } + + err = client.CSIVolumes().Detach(volID, nodeID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error detaching volume: %s", err)) + return 1 + } + + return 0 +} diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 290574cd9..ed95e994f 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -589,6 +589,66 @@ RELEASE_CLAIM: } func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { + if claim.AllocationID != "" { + err := v.nodeUnpublishVolumeImpl(vol, claim) + if err != nil { + return err + } + claim.State = structs.CSIVolumeClaimStateNodeDetached + return v.checkpointClaim(vol, claim) + } + + // The RPC sent from the 'nomad node detach' command won't have an + // allocation ID set so we try to unpublish every terminal or invalid + // alloc on the node + allocIDs := []string{} + state := v.srv.fsm.State() + vol, err := state.CSIVolumeDenormalize(memdb.NewWatchSet(), vol) + if err != nil { + return err + } + for allocID, alloc := range vol.ReadAllocs { + if alloc == nil { + rclaim, ok := vol.ReadClaims[allocID] + if ok && rclaim.NodeID == claim.NodeID { + allocIDs = append(allocIDs, allocID) + } + } else { + if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() { + allocIDs = append(allocIDs, allocID) + } + } + } + for allocID, alloc := range vol.WriteAllocs { + if alloc == nil { + wclaim, ok := vol.WriteClaims[allocID] + if ok && wclaim.NodeID == claim.NodeID { + allocIDs = append(allocIDs, allocID) + } + } else { + if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() { + allocIDs = append(allocIDs, allocID) + } + } + } + var merr multierror.Error + for _, allocID := range allocIDs { + claim.AllocationID = allocID + err := v.nodeUnpublishVolumeImpl(vol, claim) + if err != nil { + merr.Errors = append(merr.Errors, err) + } + } + err = merr.ErrorOrNil() + if err != nil { + return err + } + + claim.State = structs.CSIVolumeClaimStateNodeDetached + return v.checkpointClaim(vol, claim) +} + +func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { req := &cstructs.ClientCSINodeDetachVolumeRequest{ PluginID: vol.PluginID, VolumeID: vol.ID, @@ -609,8 +669,7 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C return fmt.Errorf("could not detach from node: %w", err) } } - claim.State = structs.CSIVolumeClaimStateNodeDetached - return v.checkpointClaim(vol, claim) + return nil } func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { diff --git a/nomad/csi_endpoint_test.go b/nomad/csi_endpoint_test.go index d372e080d..900ca1be3 100644 --- a/nomad/csi_endpoint_test.go +++ b/nomad/csi_endpoint_test.go @@ -425,48 +425,51 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { codec := rpcClient(t, srv) + // setup: create a client node with a controller and node plugin + node := mock.Node() + node.Attributes["nomad.version"] = "0.11.0" + node.CSINodePlugins = map[string]*structs.CSIInfo{ + "minnie": {PluginID: "minnie", + Healthy: true, + NodeInfo: &structs.CSINodeInfo{}, + }, + } + node.CSIControllerPlugins = map[string]*structs.CSIInfo{ + "minnie": {PluginID: "minnie", + Healthy: true, + ControllerInfo: &structs.CSIControllerInfo{SupportsAttachDetach: true}, + RequiresControllerPlugin: true, + }, + } + index++ + require.NoError(t, state.UpsertNode(index, node)) + type tc struct { name string startingState structs.CSIVolumeClaimState - hasController bool expectedErrMsg string } - testCases := []tc{ - { - name: "no path to node plugin", - startingState: structs.CSIVolumeClaimStateTaken, - hasController: true, - expectedErrMsg: "could not detach from node: Unknown node ", - }, - { - name: "no registered controller plugin", - startingState: structs.CSIVolumeClaimStateNodeDetached, - hasController: true, - expectedErrMsg: "could not detach from controller: controller detach volume: plugin missing: minnie", - }, { name: "success", startingState: structs.CSIVolumeClaimStateControllerDetached, - hasController: true, + }, + { + name: "unpublish previously detached node", + startingState: structs.CSIVolumeClaimStateNodeDetached, + expectedErrMsg: "could not detach from controller: No path to node", + }, + { + name: "first unpublish", + startingState: structs.CSIVolumeClaimStateTaken, + expectedErrMsg: "could not detach from node: No path to node", }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - + // setup: register a volume volID := uuid.Generate() - nodeID := uuid.Generate() - allocID := uuid.Generate() - - claim := &structs.CSIVolumeClaim{ - AllocationID: allocID, - NodeID: nodeID, - ExternalNodeID: "i-example", - Mode: structs.CSIVolumeClaimRead, - State: tc.startingState, - } - vol := &structs.CSIVolume{ ID: volID, Namespace: ns, @@ -474,13 +477,36 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, PluginID: "minnie", Secrets: structs.CSISecrets{"mysecret": "secretvalue"}, - ControllerRequired: tc.hasController, + ControllerRequired: true, } index++ err = state.CSIVolumeRegister(index, []*structs.CSIVolume{vol}) require.NoError(t, err) + // setup: create an alloc that will claim our volume + alloc := mock.BatchAlloc() + alloc.NodeID = node.ID + alloc.ClientStatus = structs.AllocClientStatusFailed + + index++ + require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc})) + + // setup: claim the volume for our alloc + claim := &structs.CSIVolumeClaim{ + AllocationID: alloc.ID, + NodeID: node.ID, + ExternalNodeID: "i-example", + Mode: structs.CSIVolumeClaimRead, + } + + index++ + claim.State = structs.CSIVolumeClaimStateTaken + err = state.CSIVolumeClaim(index, ns, volID, claim) + require.NoError(t, err) + + // test: unpublish and check the results + claim.State = tc.startingState req := &structs.CSIVolumeUnpublishRequest{ VolumeID: volID, Claim: claim, @@ -497,6 +523,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { if tc.expectedErrMsg == "" { require.NoError(t, err) } else { + require.Error(t, err) require.True(t, strings.Contains(err.Error(), tc.expectedErrMsg), "error message %q did not contain %q", err.Error(), tc.expectedErrMsg) } diff --git a/vendor/github.com/hashicorp/nomad/api/csi.go b/vendor/github.com/hashicorp/nomad/api/csi.go index fbc984e51..47d7017a2 100644 --- a/vendor/github.com/hashicorp/nomad/api/csi.go +++ b/vendor/github.com/hashicorp/nomad/api/csi.go @@ -60,6 +60,11 @@ func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error { return err } +func (v *CSIVolumes) Detach(volID, nodeID string, w *WriteOptions) error { + _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/detach?node=%v", url.PathEscape(volID), nodeID), nil, w) + return err +} + // CSIVolumeAttachmentMode duplicated in nomad/structs/csi.go type CSIVolumeAttachmentMode string diff --git a/website/data/docs-navigation.js b/website/data/docs-navigation.js index dbbeed0dd..1fd33342a 100644 --- a/website/data/docs-navigation.js +++ b/website/data/docs-navigation.js @@ -148,7 +148,7 @@ export default [ { category: 'system', content: ['gc', 'reconcile-summaries'] }, 'ui', 'version', - { category: 'volume', content: ['register', 'deregister', 'status'] } + { category: 'volume', content: ['deregister', 'detach', 'status', 'register'] } ] }, '----------', diff --git a/website/pages/api-docs/volumes.mdx b/website/pages/api-docs/volumes.mdx index 5c1e78411..ea2843aec 100644 --- a/website/pages/api-docs/volumes.mdx +++ b/website/pages/api-docs/volumes.mdx @@ -297,8 +297,8 @@ $ curl \ ## Delete Volume -This endpoint deregisters an external volume with Nomad. It is an -error to deregister a volume that is in use. +This endpoint deregisters an external volume with Nomad. It is an error to +deregister a volume that is in use. | Method | Path | Produces | | ------- | ---------------------------- | ------------------ | @@ -331,3 +331,37 @@ $ curl \ --data @payload.json \ https://localhost:4646/v1/volume/csi/volume-id1?force=false ``` + +## Detach Volume + +This endpoint detaches an external volume from a Nomad client node. It is an +error to detach a volume that is in use. + +| Method | Path | Produces | +| ------- | ----------------------------------- | ------------------ | +| `DELETE` | `/v1/volume/csi/:volume_id/detach` | `application/json` | + +The table below shows this endpoint's support for +[blocking queries](/api-docs#blocking-queries) and +[required ACLs](/api-docs#acls). + +| Blocking Queries | ACL Required | +| ---------------- | ---------------------------- | +| `NO` | `namespace:csi-write-volume` | + +### Parameters + +- `:volume_id` `(string: )`- Specifies the ID of the + volume. This must be the full ID. This is specified as part of the + path. + +- `node` `(string: )`- The node to detach the volume from. + +### Sample Request + +```shell-session +$ curl \ + --request DELETE \ + --data @payload.json \ + https://localhost:4646/v1/volume/csi/volume-id/detach +``` diff --git a/website/pages/docs/commands/volume/detach.mdx b/website/pages/docs/commands/volume/detach.mdx new file mode 100644 index 000000000..9b80ba0e6 --- /dev/null +++ b/website/pages/docs/commands/volume/detach.mdx @@ -0,0 +1,28 @@ +--- +layout: docs +page_title: 'Commands: volume detach' +sidebar_title: detach +description: | + Detach volumes with CSI plugins. +--- + +# Command: volume detach + +The `volume detach` command detaches external storage volumes with Nomad's +[Container Storage Interface (CSI)][csi] support. + +## Usage + +```plaintext +nomad volume detach [options] [volume] [node] +``` + +The `volume detach` command requires two arguments, specifying the ID of the +volume to be detached and the node to detach it from. Detaching will fail if +the volume is still in use by an allocation. + +## General Options + +@include 'general_options.mdx' + +[csi]: https://github.com/container-storage-interface/spec diff --git a/website/pages/docs/commands/volume/index.mdx b/website/pages/docs/commands/volume/index.mdx index 0f3db8266..67930c155 100644 --- a/website/pages/docs/commands/volume/index.mdx +++ b/website/pages/docs/commands/volume/index.mdx @@ -17,10 +17,12 @@ Usage: `nomad volume [options]` Run `nomad volume -h` for help on that subcommand. The following subcommands are available: -- [`volume register`][register] - Register a volume. - [`volume deregister`][deregister] - Deregister a volume. +- [`volume detach`][detach] - Detach a volume. +- [`volume register`][register] - Register a volume. - [`volume status`][status] - Display status information about a volume -[register]: /docs/commands/volume/register 'Register a volume' [deregister]: /docs/commands/volume/deregister 'Deregister a volume' +[detach]: /docs/commands/volume/detach 'Detach a volume' +[register]: /docs/commands/volume/register 'Register a volume' [status]: /docs/commands/volume/status 'Display status information about a volume'