From 41cf1b03b467dc3f1269ef67776ef8eb2c7a7d20 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 21 May 2025 08:55:52 -0400 Subject: [PATCH] host volumes: -force flag for delete (#25902) When a node is garbage collected, we leave behind the dynamic host volume in the state store. We don't want to automatically garbage collect the volumes and risk data loss, but we should allow these to be removed via the API. Fixes: https://github.com/hashicorp/nomad/issues/25762 Fixes: https://hashicorp.atlassian.net/browse/NMD-705 --- .changelog/25902.txt | 3 +++ api/host_volumes.go | 6 +++++- command/volume_delete.go | 20 +++++++++++++------ nomad/host_volume_endpoint.go | 9 ++++++++- nomad/host_volume_endpoint_test.go | 15 +++++++++++--- nomad/structs/host_volumes.go | 1 + .../content/docs/commands/volume/delete.mdx | 4 ++++ 7 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 .changelog/25902.txt diff --git a/.changelog/25902.txt b/.changelog/25902.txt new file mode 100644 index 000000000..5e3d4e3bc --- /dev/null +++ b/.changelog/25902.txt @@ -0,0 +1,3 @@ +```release-note:improvement +host volumes: Add -force flag to volume delete command for removing volumes from GC'd nodes +``` diff --git a/api/host_volumes.go b/api/host_volumes.go index 591cee1d2..f43b6eaa4 100644 --- a/api/host_volumes.go +++ b/api/host_volumes.go @@ -174,7 +174,8 @@ type HostVolumeListRequest struct { } type HostVolumeDeleteRequest struct { - ID string + ID string + Force bool } type HostVolumeDeleteResponse struct{} @@ -244,6 +245,9 @@ func (hv *HostVolumes) Delete(req *HostVolumeDeleteRequest, opts *WriteOptions) if err != nil { return nil, nil, err } + if req.Force { + path = path + "?force=true" + } wm, err := hv.client.delete(path, nil, resp, opts) return resp, wm, err } diff --git a/command/volume_delete.go b/command/volume_delete.go index 6098d4329..6718c8e9e 100644 --- a/command/volume_delete.go +++ b/command/volume_delete.go @@ -29,9 +29,9 @@ Usage: nomad volume delete [options] unpublished. If the volume no longer exists, this command will silently return without an error. - When ACLs are enabled, this command requires a token with the - 'csi-write-volume' and 'csi-read-volume' capabilities for the volume's - namespace. + When ACLs are enabled, this command requires a token with the appropriate + capability in the volume's namespace: the 'csi-write-volume' capability for + CSI volumes or 'host-volume-create' for dynamic host volumes. General Options: @@ -39,6 +39,11 @@ General Options: Delete Options: + -force + Delete the volume from the Nomad state store if the node has been garbage + collected. You should only use -force if the node will never rejoin the + cluster. Only available for dynamic host volumes. + -secret Secrets to pass to the plugin to delete the snapshot. Accepts multiple flags in the form -secret key=value. Only available for CSI volumes. @@ -88,10 +93,12 @@ func (c *VolumeDeleteCommand) Name() string { return "volume delete" } func (c *VolumeDeleteCommand) Run(args []string) int { var secretsArgs flaghelper.StringFlag var typeArg string + var force bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.Var(&secretsArgs, "secret", "secrets for snapshot, ex. -secret key=value") flags.StringVar(&typeArg, "type", "csi", "type of volume (csi or host)") + flags.BoolVar(&force, "force", false, "force delete from garbage collected node") if err := flags.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) @@ -118,7 +125,7 @@ func (c *VolumeDeleteCommand) Run(args []string) int { case "csi": return c.deleteCSIVolume(client, volID, secretsArgs) case "host": - return c.deleteHostVolume(client, volID) + return c.deleteHostVolume(client, volID, force) default: c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg)) return 1 @@ -174,7 +181,7 @@ func (c *VolumeDeleteCommand) deleteCSIVolume(client *api.Client, volID string, return 0 } -func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) int { +func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string, force bool) int { if !helper.IsUUID(volID) { stub, possible, err := getHostVolumeByPrefix(client, volID, c.namespace) @@ -195,7 +202,8 @@ func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) c.namespace = stub.Namespace } - _, _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ID: volID}, nil) + _, _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ + ID: volID, Force: force}, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error deleting volume: %s", err)) return 1 diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 1558b9573..bcfebe54c 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -671,7 +671,14 @@ func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *struct // serialize client RPC and raft write per volume ID index, err := v.serializeCall(vol.ID, "delete", func() (uint64, error) { if err := v.deleteVolume(vol); err != nil { - return 0, err + if structs.IsErrUnknownNode(err) { + if !args.Force { + return 0, fmt.Errorf( + "volume cannot be removed from unknown node without force=true") + } + } else { + return 0, err + } } _, idx, err := v.srv.raftApply(structs.HostVolumeDeleteRequestType, args) if err != nil { diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index a73f8cce6..6a46c83b9 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -392,16 +392,25 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { must.Nil(t, getResp.Volume) }) + index++ + must.NoError(t, srv.State().DeleteNode(structs.MsgTypeTestSetup, index, []string{vol1.NodeID})) + // delete vol1 to finish cleaning up - var delResp structs.HostVolumeDeleteResponse - err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", &structs.HostVolumeDeleteRequest{ + delReq := &structs.HostVolumeDeleteRequest{ VolumeID: vol1.ID, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: vol1.Namespace, AuthToken: powerToken, }, - }, &delResp) + } + + var delResp structs.HostVolumeDeleteResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, "volume cannot be removed from unknown node without force=true") + + delReq.Force = true + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) must.NoError(t, err) // should be no volumes left diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index f5ed57006..81956954a 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -401,6 +401,7 @@ type HostVolumeRegisterResponse struct { type HostVolumeDeleteRequest struct { VolumeID string + Force bool WriteRequest } diff --git a/website/content/docs/commands/volume/delete.mdx b/website/content/docs/commands/volume/delete.mdx index 4d43de0b4..88dc52f2e 100644 --- a/website/content/docs/commands/volume/delete.mdx +++ b/website/content/docs/commands/volume/delete.mdx @@ -39,6 +39,10 @@ volumes or `host-volume-delete` for dynamic host volumes. ## Delete options +- `-force`: Delete the volume from the Nomad state store if the node has been + garbage collected. You should only use `-force` if the node will never rejoin + the cluster. Only available for dynamic host volumes. + - `-secret`: Secrets to pass to the plugin to delete the snapshot. Accepts multiple flags in the form `-secret key=value`. Only available for CSI volumes.