host volumes: -force flag for delete (#25902)

When a node is garbage collected, we leave behind the dynamic host volume in the
state store. We don't want to automatically garbage collect the volumes and risk
data loss, but we should allow these to be removed via the API.

Fixes: https://github.com/hashicorp/nomad/issues/25762
Fixes: https://hashicorp.atlassian.net/browse/NMD-705
This commit is contained in:
Tim Gross
2025-05-21 08:55:52 -04:00
committed by GitHub
parent b6d9424c4b
commit 41cf1b03b4
7 changed files with 47 additions and 11 deletions

3
.changelog/25902.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
host volumes: Add -force flag to volume delete command for removing volumes from GC'd nodes
```

View File

@@ -175,6 +175,7 @@ type HostVolumeListRequest struct {
type HostVolumeDeleteRequest struct {
ID string
Force bool
}
type HostVolumeDeleteResponse struct{}
@@ -244,6 +245,9 @@ func (hv *HostVolumes) Delete(req *HostVolumeDeleteRequest, opts *WriteOptions)
if err != nil {
return nil, nil, err
}
if req.Force {
path = path + "?force=true"
}
wm, err := hv.client.delete(path, nil, resp, opts)
return resp, wm, err
}

View File

@@ -29,9 +29,9 @@ Usage: nomad volume delete [options] <vol id>
unpublished. If the volume no longer exists, this command will silently
return without an error.
When ACLs are enabled, this command requires a token with the
'csi-write-volume' and 'csi-read-volume' capabilities for the volume's
namespace.
When ACLs are enabled, this command requires a token with the appropriate
capability in the volume's namespace: the 'csi-write-volume' capability for
CSI volumes or 'host-volume-create' for dynamic host volumes.
General Options:
@@ -39,6 +39,11 @@ General Options:
Delete Options:
-force
Delete the volume from the Nomad state store if the node has been garbage
collected. You should only use -force if the node will never rejoin the
cluster. Only available for dynamic host volumes.
-secret
Secrets to pass to the plugin to delete the snapshot. Accepts multiple
flags in the form -secret key=value. Only available for CSI volumes.
@@ -88,10 +93,12 @@ func (c *VolumeDeleteCommand) Name() string { return "volume delete" }
func (c *VolumeDeleteCommand) Run(args []string) int {
var secretsArgs flaghelper.StringFlag
var typeArg string
var force bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.Var(&secretsArgs, "secret", "secrets for snapshot, ex. -secret key=value")
flags.StringVar(&typeArg, "type", "csi", "type of volume (csi or host)")
flags.BoolVar(&force, "force", false, "force delete from garbage collected node")
if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err))
@@ -118,7 +125,7 @@ func (c *VolumeDeleteCommand) Run(args []string) int {
case "csi":
return c.deleteCSIVolume(client, volID, secretsArgs)
case "host":
return c.deleteHostVolume(client, volID)
return c.deleteHostVolume(client, volID, force)
default:
c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg))
return 1
@@ -174,7 +181,7 @@ func (c *VolumeDeleteCommand) deleteCSIVolume(client *api.Client, volID string,
return 0
}
func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) int {
func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string, force bool) int {
if !helper.IsUUID(volID) {
stub, possible, err := getHostVolumeByPrefix(client, volID, c.namespace)
@@ -195,7 +202,8 @@ func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string)
c.namespace = stub.Namespace
}
_, _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ID: volID}, nil)
_, _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{
ID: volID, Force: force}, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error deleting volume: %s", err))
return 1

View File

@@ -671,8 +671,15 @@ func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *struct
// serialize client RPC and raft write per volume ID
index, err := v.serializeCall(vol.ID, "delete", func() (uint64, error) {
if err := v.deleteVolume(vol); err != nil {
if structs.IsErrUnknownNode(err) {
if !args.Force {
return 0, fmt.Errorf(
"volume cannot be removed from unknown node without force=true")
}
} else {
return 0, err
}
}
_, idx, err := v.srv.raftApply(structs.HostVolumeDeleteRequestType, args)
if err != nil {
v.logger.Error("raft apply failed", "error", err, "method", "delete")

View File

@@ -392,16 +392,25 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) {
must.Nil(t, getResp.Volume)
})
index++
must.NoError(t, srv.State().DeleteNode(structs.MsgTypeTestSetup, index, []string{vol1.NodeID}))
// delete vol1 to finish cleaning up
var delResp structs.HostVolumeDeleteResponse
err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", &structs.HostVolumeDeleteRequest{
delReq := &structs.HostVolumeDeleteRequest{
VolumeID: vol1.ID,
WriteRequest: structs.WriteRequest{
Region: srv.Region(),
Namespace: vol1.Namespace,
AuthToken: powerToken,
},
}, &delResp)
}
var delResp structs.HostVolumeDeleteResponse
err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp)
must.EqError(t, err, "volume cannot be removed from unknown node without force=true")
delReq.Force = true
err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp)
must.NoError(t, err)
// should be no volumes left

View File

@@ -401,6 +401,7 @@ type HostVolumeRegisterResponse struct {
type HostVolumeDeleteRequest struct {
VolumeID string
Force bool
WriteRequest
}

View File

@@ -39,6 +39,10 @@ volumes or `host-volume-delete` for dynamic host volumes.
## Delete options
- `-force`: Delete the volume from the Nomad state store if the node has been
garbage collected. You should only use `-force` if the node will never rejoin
the cluster. Only available for dynamic host volumes.
- `-secret`: Secrets to pass to the plugin to delete the snapshot. Accepts
multiple flags in the form `-secret key=value`. Only available for CSI
volumes.