csi: nomad volume detach command (#8584)

The soundness guarantees of the CSI specification leave a little to be desired
in our ability to provide a 100% reliable automated solution for managing
volumes. This changeset provides a new command to bridge this gap by providing
the operator the ability to intervene.

The command doesn't take an allocation ID so that the operator doesn't have to
keep track of alloc IDs that may have been GC'd. Handle this case in the
unpublish RPC by sending the client RPC for all the terminal/nil allocs on the
selected node.
This commit is contained in:
Tim Gross
2020-08-11 10:18:54 -04:00
committed by GitHub
parent d21ef34cbc
commit fbefdb98c3
12 changed files with 353 additions and 45 deletions

View File

@@ -60,6 +60,11 @@ func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error {
return err
}
func (v *CSIVolumes) Detach(volID, nodeID string, w *WriteOptions) error {
_, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/detach?node=%v", url.PathEscape(volID), nodeID), nil, w)
return err
}
// CSIVolumeAttachmentMode duplicated in nomad/structs/csi.go
type CSIVolumeAttachmentMode string

View File

@@ -51,21 +51,35 @@ func (s *HTTPServer) CSIVolumeSpecificRequest(resp http.ResponseWriter, req *htt
// Tokenize the suffix of the path to get the volume id
reqSuffix := strings.TrimPrefix(req.URL.Path, "/v1/volume/csi/")
tokens := strings.Split(reqSuffix, "/")
if len(tokens) > 2 || len(tokens) < 1 {
if len(tokens) < 1 {
return nil, CodedError(404, resourceNotFoundErr)
}
id := tokens[0]
switch req.Method {
case "GET":
return s.csiVolumeGet(id, resp, req)
case "PUT":
return s.csiVolumePut(id, resp, req)
case "DELETE":
return s.csiVolumeDelete(id, resp, req)
default:
return nil, CodedError(405, ErrInvalidMethod)
if len(tokens) == 1 {
switch req.Method {
case http.MethodGet:
return s.csiVolumeGet(id, resp, req)
case http.MethodPut:
return s.csiVolumePut(id, resp, req)
case http.MethodDelete:
return s.csiVolumeDelete(id, resp, req)
default:
return nil, CodedError(405, ErrInvalidMethod)
}
}
if len(tokens) == 2 {
if tokens[1] != "detach" {
return nil, CodedError(404, resourceNotFoundErr)
}
if req.Method != http.MethodDelete {
return nil, CodedError(405, ErrInvalidMethod)
}
return s.csiVolumeDetach(id, resp, req)
}
return nil, CodedError(404, resourceNotFoundErr)
}
func (s *HTTPServer) csiVolumeGet(id string, resp http.ResponseWriter, req *http.Request) (interface{}, error) {
@@ -149,6 +163,34 @@ func (s *HTTPServer) csiVolumeDelete(id string, resp http.ResponseWriter, req *h
return nil, nil
}
func (s *HTTPServer) csiVolumeDetach(id string, resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != http.MethodDelete {
return nil, CodedError(405, ErrInvalidMethod)
}
nodeID := req.URL.Query().Get("node")
if nodeID == "" {
return nil, CodedError(400, "detach requires node ID")
}
args := structs.CSIVolumeUnpublishRequest{
VolumeID: id,
Claim: &structs.CSIVolumeClaim{
NodeID: nodeID,
Mode: structs.CSIVolumeClaimRelease,
},
}
s.parseWriteRequest(req, &args.WriteRequest)
var out structs.CSIVolumeUnpublishResponse
if err := s.agent.RPC("CSIVolume.Unpublish", &args, &out); err != nil {
return nil, err
}
setMeta(resp, &out.QueryMeta)
return nil, nil
}
// CSIPluginsRequest lists CSI plugins
func (s *HTTPServer) CSIPluginsRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {

View File

@@ -723,6 +723,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"volume detach": func() (cli.Command, error) {
return &VolumeDetachCommand{
Meta: meta,
}, nil
},
}
deprecated := map[string]cli.CommandFactory{

View File

@@ -28,6 +28,10 @@ Usage: nomad volume <subcommand> [options]
$ nomad volume deregister <id>
Detach an unused volume:
$ nomad volume detach <vol id> <node id>
Please see the individual subcommand help for detailed usage information.
`
return strings.TrimSpace(helpText)

97
command/volume_detach.go Normal file
View File

@@ -0,0 +1,97 @@
package command
import (
"fmt"
"strings"
"github.com/hashicorp/nomad/api/contexts"
"github.com/posener/complete"
)
type VolumeDetachCommand struct {
Meta
}
func (c *VolumeDetachCommand) Help() string {
helpText := `
Usage: nomad volume detach [options] <vol id> <node id>
Detach a volume from a Nomad client.
General Options:
` + generalOptionsUsage() + `
`
return strings.TrimSpace(helpText)
}
func (c *VolumeDetachCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{})
}
func (c *VolumeDetachCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictFunc(func(a complete.Args) []string {
client, err := c.Meta.Client()
if err != nil {
return nil
}
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Volumes, nil)
if err != nil {
return []string{}
}
matches := resp.Matches[contexts.Volumes]
resp, _, err = client.Search().PrefixSearch(a.Last, contexts.Nodes, nil)
if err != nil {
return []string{}
}
for _, match := range resp.Matches[contexts.Nodes] {
matches = append(matches, match)
}
return matches
})
}
func (c *VolumeDetachCommand) Synopsis() string {
return "Detach a volume"
}
func (c *VolumeDetachCommand) Name() string { return "volume detach" }
func (c *VolumeDetachCommand) Run(args []string) int {
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err))
return 1
}
// Check that we get exactly two arguments
args = flags.Args()
if l := len(args); l != 2 {
c.Ui.Error("This command takes two arguments: <vol id> <node id>")
c.Ui.Error(commandErrorText(c))
return 1
}
volID := args[0]
nodeID := args[1]
// Get the HTTP client
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
err = client.CSIVolumes().Detach(volID, nodeID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error detaching volume: %s", err))
return 1
}
return 0
}

View File

@@ -589,6 +589,66 @@ RELEASE_CLAIM:
}
func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
if claim.AllocationID != "" {
err := v.nodeUnpublishVolumeImpl(vol, claim)
if err != nil {
return err
}
claim.State = structs.CSIVolumeClaimStateNodeDetached
return v.checkpointClaim(vol, claim)
}
// The RPC sent from the 'nomad node detach' command won't have an
// allocation ID set so we try to unpublish every terminal or invalid
// alloc on the node
allocIDs := []string{}
state := v.srv.fsm.State()
vol, err := state.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
if err != nil {
return err
}
for allocID, alloc := range vol.ReadAllocs {
if alloc == nil {
rclaim, ok := vol.ReadClaims[allocID]
if ok && rclaim.NodeID == claim.NodeID {
allocIDs = append(allocIDs, allocID)
}
} else {
if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() {
allocIDs = append(allocIDs, allocID)
}
}
}
for allocID, alloc := range vol.WriteAllocs {
if alloc == nil {
wclaim, ok := vol.WriteClaims[allocID]
if ok && wclaim.NodeID == claim.NodeID {
allocIDs = append(allocIDs, allocID)
}
} else {
if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() {
allocIDs = append(allocIDs, allocID)
}
}
}
var merr multierror.Error
for _, allocID := range allocIDs {
claim.AllocationID = allocID
err := v.nodeUnpublishVolumeImpl(vol, claim)
if err != nil {
merr.Errors = append(merr.Errors, err)
}
}
err = merr.ErrorOrNil()
if err != nil {
return err
}
claim.State = structs.CSIVolumeClaimStateNodeDetached
return v.checkpointClaim(vol, claim)
}
func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
req := &cstructs.ClientCSINodeDetachVolumeRequest{
PluginID: vol.PluginID,
VolumeID: vol.ID,
@@ -609,8 +669,7 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C
return fmt.Errorf("could not detach from node: %w", err)
}
}
claim.State = structs.CSIVolumeClaimStateNodeDetached
return v.checkpointClaim(vol, claim)
return nil
}
func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {

View File

@@ -425,48 +425,51 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
codec := rpcClient(t, srv)
// setup: create a client node with a controller and node plugin
node := mock.Node()
node.Attributes["nomad.version"] = "0.11.0"
node.CSINodePlugins = map[string]*structs.CSIInfo{
"minnie": {PluginID: "minnie",
Healthy: true,
NodeInfo: &structs.CSINodeInfo{},
},
}
node.CSIControllerPlugins = map[string]*structs.CSIInfo{
"minnie": {PluginID: "minnie",
Healthy: true,
ControllerInfo: &structs.CSIControllerInfo{SupportsAttachDetach: true},
RequiresControllerPlugin: true,
},
}
index++
require.NoError(t, state.UpsertNode(index, node))
type tc struct {
name string
startingState structs.CSIVolumeClaimState
hasController bool
expectedErrMsg string
}
testCases := []tc{
{
name: "no path to node plugin",
startingState: structs.CSIVolumeClaimStateTaken,
hasController: true,
expectedErrMsg: "could not detach from node: Unknown node ",
},
{
name: "no registered controller plugin",
startingState: structs.CSIVolumeClaimStateNodeDetached,
hasController: true,
expectedErrMsg: "could not detach from controller: controller detach volume: plugin missing: minnie",
},
{
name: "success",
startingState: structs.CSIVolumeClaimStateControllerDetached,
hasController: true,
},
{
name: "unpublish previously detached node",
startingState: structs.CSIVolumeClaimStateNodeDetached,
expectedErrMsg: "could not detach from controller: No path to node",
},
{
name: "first unpublish",
startingState: structs.CSIVolumeClaimStateTaken,
expectedErrMsg: "could not detach from node: No path to node",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// setup: register a volume
volID := uuid.Generate()
nodeID := uuid.Generate()
allocID := uuid.Generate()
claim := &structs.CSIVolumeClaim{
AllocationID: allocID,
NodeID: nodeID,
ExternalNodeID: "i-example",
Mode: structs.CSIVolumeClaimRead,
State: tc.startingState,
}
vol := &structs.CSIVolume{
ID: volID,
Namespace: ns,
@@ -474,13 +477,36 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
PluginID: "minnie",
Secrets: structs.CSISecrets{"mysecret": "secretvalue"},
ControllerRequired: tc.hasController,
ControllerRequired: true,
}
index++
err = state.CSIVolumeRegister(index, []*structs.CSIVolume{vol})
require.NoError(t, err)
// setup: create an alloc that will claim our volume
alloc := mock.BatchAlloc()
alloc.NodeID = node.ID
alloc.ClientStatus = structs.AllocClientStatusFailed
index++
require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc}))
// setup: claim the volume for our alloc
claim := &structs.CSIVolumeClaim{
AllocationID: alloc.ID,
NodeID: node.ID,
ExternalNodeID: "i-example",
Mode: structs.CSIVolumeClaimRead,
}
index++
claim.State = structs.CSIVolumeClaimStateTaken
err = state.CSIVolumeClaim(index, ns, volID, claim)
require.NoError(t, err)
// test: unpublish and check the results
claim.State = tc.startingState
req := &structs.CSIVolumeUnpublishRequest{
VolumeID: volID,
Claim: claim,
@@ -497,6 +523,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
if tc.expectedErrMsg == "" {
require.NoError(t, err)
} else {
require.Error(t, err)
require.True(t, strings.Contains(err.Error(), tc.expectedErrMsg),
"error message %q did not contain %q", err.Error(), tc.expectedErrMsg)
}

View File

@@ -60,6 +60,11 @@ func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error {
return err
}
func (v *CSIVolumes) Detach(volID, nodeID string, w *WriteOptions) error {
_, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/detach?node=%v", url.PathEscape(volID), nodeID), nil, w)
return err
}
// CSIVolumeAttachmentMode duplicated in nomad/structs/csi.go
type CSIVolumeAttachmentMode string

View File

@@ -148,7 +148,7 @@ export default [
{ category: 'system', content: ['gc', 'reconcile-summaries'] },
'ui',
'version',
{ category: 'volume', content: ['register', 'deregister', 'status'] }
{ category: 'volume', content: ['deregister', 'detach', 'status', 'register'] }
]
},
'----------',

View File

@@ -297,8 +297,8 @@ $ curl \
## Delete Volume
This endpoint deregisters an external volume with Nomad. It is an
error to deregister a volume that is in use.
This endpoint deregisters an external volume with Nomad. It is an error to
deregister a volume that is in use.
| Method | Path | Produces |
| ------- | ---------------------------- | ------------------ |
@@ -331,3 +331,37 @@ $ curl \
--data @payload.json \
https://localhost:4646/v1/volume/csi/volume-id1?force=false
```
## Detach Volume
This endpoint detaches an external volume from a Nomad client node. It is an
error to detach a volume that is in use.
| Method | Path | Produces |
| ------- | ----------------------------------- | ------------------ |
| `DELETE` | `/v1/volume/csi/:volume_id/detach` | `application/json` |
The table below shows this endpoint's support for
[blocking queries](/api-docs#blocking-queries) and
[required ACLs](/api-docs#acls).
| Blocking Queries | ACL Required |
| ---------------- | ---------------------------- |
| `NO` | `namespace:csi-write-volume` |
### Parameters
- `:volume_id` `(string: <required>)`- Specifies the ID of the
volume. This must be the full ID. This is specified as part of the
path.
- `node` `(string: <required>)`- The node to detach the volume from.
### Sample Request
```shell-session
$ curl \
--request DELETE \
--data @payload.json \
https://localhost:4646/v1/volume/csi/volume-id/detach
```

View File

@@ -0,0 +1,28 @@
---
layout: docs
page_title: 'Commands: volume detach'
sidebar_title: detach
description: |
Detach volumes with CSI plugins.
---
# Command: volume detach
The `volume detach` command detaches external storage volumes with Nomad's
[Container Storage Interface (CSI)][csi] support.
## Usage
```plaintext
nomad volume detach [options] [volume] [node]
```
The `volume detach` command requires two arguments, specifying the ID of the
volume to be detached and the node to detach it from. Detaching will fail if
the volume is still in use by an allocation.
## General Options
@include 'general_options.mdx'
[csi]: https://github.com/container-storage-interface/spec

View File

@@ -17,10 +17,12 @@ Usage: `nomad volume <subcommand> [options]`
Run `nomad volume <subcommand> -h` for help on that subcommand. The following
subcommands are available:
- [`volume register`][register] - Register a volume.
- [`volume deregister`][deregister] - Deregister a volume.
- [`volume detach`][detach] - Detach a volume.
- [`volume register`][register] - Register a volume.
- [`volume status`][status] - Display status information about a volume
[register]: /docs/commands/volume/register 'Register a volume'
[deregister]: /docs/commands/volume/deregister 'Deregister a volume'
[detach]: /docs/commands/volume/detach 'Detach a volume'
[register]: /docs/commands/volume/register 'Register a volume'
[status]: /docs/commands/volume/status 'Display status information about a volume'