From 3f59860254ecccf4f34e623e1575e8cfbf0f7b6c Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 27 May 2025 10:22:08 -0400 Subject: [PATCH] host volumes: add configuration to GC on node GC (#25903) When a node is garbage collected, any dynamic host volumes on the node are orphaned in the state store. We generally don't want to automatically collect these volumes and risk data loss, and have provided a CLI flag to `-force` remove them in #25902. But for clusters running on ephemeral cloud instances (ex. AWS EC2 in an autoscaling group), deleting host volumes may add excessive friction. Add a configuration knob to the client configuration to remove host volumes from the state store on node GC. Ref: https://github.com/hashicorp/nomad/pull/25902 Ref: https://github.com/hashicorp/nomad/issues/25762 Ref: https://hashicorp.atlassian.net/browse/NMD-705 --- .changelog/25903.txt | 3 ++ api/nodes.go | 1 + client/client.go | 2 + client/config/config.go | 5 +++ command/agent/agent.go | 2 + command/agent/config.go | 8 ++++ command/agent/config_parse_test.go | 1 + command/agent/host_volume_endpoint.go | 12 ++++++ command/agent/testdata/basic.hcl | 1 + command/agent/testdata/basic.json | 1 + nomad/host_volume_endpoint.go | 2 +- nomad/state/events_test.go | 4 +- nomad/state/state_store.go | 9 +++- nomad/state/state_store_host_volumes.go | 43 +++++++++++++++++-- nomad/state/state_store_host_volumes_test.go | 8 ++++ nomad/structs/structs.go | 5 +++ website/content/docs/configuration/client.mdx | 6 +++ 17 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 .changelog/25903.txt diff --git a/.changelog/25903.txt b/.changelog/25903.txt new file mode 100644 index 000000000..c776d5e57 --- /dev/null +++ b/.changelog/25903.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: Add gc_volumes_on_node_gc configuration to delete host volumes when nodes are garbage collected +``` diff --git a/api/nodes.go b/api/nodes.go index 75df0957c..9f3574d34 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -566,6 +566,7 @@ type Node struct { Events []*NodeEvent Drivers map[string]*DriverInfo HostVolumes map[string]*HostVolumeInfo + GCVolumesOnNodeGC bool HostNetworks map[string]*HostNetworkInfo CSIControllerPlugins map[string]*CSIInfo CSINodePlugins map[string]*CSIInfo diff --git a/client/client.go b/client/client.go index e3f02e03c..d993903c2 100644 --- a/client/client.go +++ b/client/client.go @@ -1595,6 +1595,8 @@ func (c *Client) setupNode() error { node.HostVolumes[k] = v.Copy() } } + node.GCVolumesOnNodeGC = newConfig.GCVolumesOnNodeGC + if node.HostNetworks == nil { if l := len(newConfig.HostNetworks); l != 0 { node.HostNetworks = make(map[string]*structs.ClientHostNetworkConfig, l) diff --git a/client/config/config.go b/client/config/config.go index b32956929..ebefd532a 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -242,6 +242,11 @@ type Config struct { // before garbage collection is triggered. GCMaxAllocs int + // GCVolumesOnNodeGC indicates that the server should GC any dynamic host + // volumes on this node when the node is GC'd. This should only be set if + // you know that a GC'd node can never come back + GCVolumesOnNodeGC bool + // NoHostUUID disables using the host's UUID and will force generation of a // random UUID. NoHostUUID bool diff --git a/command/agent/agent.go b/command/agent/agent.go index 23b83214a..5760b3747 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -947,6 +947,8 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs + conf.GCVolumesOnNodeGC = agentConfig.Client.GCVolumesOnNodeGC + if agentConfig.Client.NoHostUUID != nil { conf.NoHostUUID = *agentConfig.Client.NoHostUUID } else { diff --git a/command/agent/config.go b/command/agent/config.go index 8b67fda80..dfe49aabe 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -342,6 +342,11 @@ type ClientConfig struct { // before garbage collection is triggered. GCMaxAllocs int `hcl:"gc_max_allocs"` + // GCVolumesOnNodeGC indicates that the server should GC any dynamic host + // volumes on this node when the node is GC'd. This should only be set if + // you know that a GC'd node can never come back + GCVolumesOnNodeGC bool `hcl:"gc_volumes_on_node_gc"` + // NoHostUUID disables using the host's UUID and will force generation of a // random UUID. NoHostUUID *bool `hcl:"no_host_uuid"` @@ -2563,6 +2568,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { if b.GCMaxAllocs != 0 { result.GCMaxAllocs = b.GCMaxAllocs } + if b.GCVolumesOnNodeGC { + result.GCVolumesOnNodeGC = b.GCVolumesOnNodeGC + } // NoHostUUID defaults to true, merge if false if b.NoHostUUID != nil { result.NoHostUUID = b.NoHostUUID diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index d7d8b13fc..9212f0c4a 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -88,6 +88,7 @@ var basicConfig = &Config{ GCDiskUsageThreshold: 82, GCInodeUsageThreshold: 91, GCMaxAllocs: 50, + GCVolumesOnNodeGC: true, NoHostUUID: pointer.Of(false), DisableRemoteExec: true, HostVolumes: []*structs.ClientHostVolumeConfig{ diff --git a/command/agent/host_volume_endpoint.go b/command/agent/host_volume_endpoint.go index a3791dee6..9d740fad0 100644 --- a/command/agent/host_volume_endpoint.go +++ b/command/agent/host_volume_endpoint.go @@ -5,6 +5,7 @@ package agent import ( "net/http" + "strconv" "strings" "github.com/hashicorp/nomad/nomad/structs" @@ -132,6 +133,17 @@ func (s *HTTPServer) hostVolumeDelete(id string, resp http.ResponseWriter, req * args := structs.HostVolumeDeleteRequest{VolumeID: id} s.parseWriteRequest(req, &args.WriteRequest) + raw := req.URL.Query().Get("force") + var force bool + if raw != "" { + var err error + force, err = strconv.ParseBool(raw) + if err != nil { + return nil, CodedError(400, "invalid force value") + } + } + args.Force = force + var out structs.HostVolumeDeleteResponse if err := s.agent.RPC("HostVolume.Delete", &args, &out); err != nil { return nil, err diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index d0e20d0bd..db0b1b6dd 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -95,6 +95,7 @@ client { gc_disk_usage_threshold = 82 gc_inode_usage_threshold = 91 gc_max_allocs = 50 + gc_volumes_on_node_gc = true no_host_uuid = false disable_remote_exec = true diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 7394e7b05..15c09e925 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -94,6 +94,7 @@ "gc_interval": "6s", "gc_max_allocs": 50, "gc_parallel_destroys": 6, + "gc_volumes_on_node_gc": true, "host_volume": [ { "tmp": [ diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index bcfebe54c..4cc49d5a0 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -671,7 +671,7 @@ func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *struct // serialize client RPC and raft write per volume ID index, err := v.serializeCall(vol.ID, "delete", func() (uint64, error) { if err := v.deleteVolume(vol); err != nil { - if structs.IsErrUnknownNode(err) { + if structs.IsErrUnknownNode(err) || structs.IsErrNoNodeConn(err) { if !args.Force { return 0, fmt.Errorf( "volume cannot be removed from unknown node without force=true") diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go index 6aca0d844..a9dc877bb 100644 --- a/nomad/state/events_test.go +++ b/nomad/state/events_test.go @@ -820,7 +820,7 @@ func TestNodeEventsFromChanges(t *testing.T) { return upsertNodeTxn(tx, tx.Index, testNode()) }, Mutate: func(s *StateStore, tx *txn) error { - return deleteNodeTxn(tx, tx.Index, []string{testNodeID()}) + return s.deleteNodeTxn(tx, tx.Index, []string{testNodeID()}) }, WantEvents: []structs.Event{{ Topic: structs.TopicNode, @@ -841,7 +841,7 @@ func TestNodeEventsFromChanges(t *testing.T) { return upsertNodeTxn(tx, tx.Index, testNode(nodeIDTwo)) }, Mutate: func(s *StateStore, tx *txn) error { - return deleteNodeTxn(tx, tx.Index, []string{testNodeID(), testNodeIDTwo()}) + return s.deleteNodeTxn(tx, tx.Index, []string{testNodeID(), testNodeIDTwo()}) }, WantEvents: []structs.Event{ { diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index a00656f65..a63e56795 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1052,14 +1052,14 @@ func (s *StateStore) DeleteNode(msgType structs.MessageType, index uint64, nodes txn := s.db.WriteTxn(index) defer txn.Abort() - err := deleteNodeTxn(txn, index, nodes) + err := s.deleteNodeTxn(txn, index, nodes) if err != nil { return nil } return txn.Commit() } -func deleteNodeTxn(txn *txn, index uint64, nodes []string) error { +func (s *StateStore) deleteNodeTxn(txn *txn, index uint64, nodes []string) error { if len(nodes) == 0 { return fmt.Errorf("node ids missing") } @@ -1082,6 +1082,11 @@ func deleteNodeTxn(txn *txn, index uint64, nodes []string) error { if err := deleteNodeCSIPlugins(txn, node, index); err != nil { return fmt.Errorf("csi plugin delete failed: %v", err) } + if node.GCVolumesOnNodeGC { + if err := s.deleteHostVolumesOnNode(txn, index, node.ID); err != nil { + return fmt.Errorf("dynamic host volume delete failed: %v", err) + } + } } if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 33117540b..0cb167c32 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -122,6 +122,20 @@ func (s *StateStore) DeleteHostVolume(index uint64, ns string, id string) error txn := s.db.WriteTxnMsgT(structs.HostVolumeDeleteRequestType, index) defer txn.Abort() + if err := s.deleteHostVolumeTxn(txn, index, ns, id); err != nil { + return err + } + + if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { + return fmt.Errorf("index update failed: %w", err) + } + + return txn.Commit() +} + +// deleteHostVolumeTxn implements a single dynamic host volume delete. The +// caller is responsible for updating the index and committing the transaction. +func (s *StateStore) deleteHostVolumeTxn(txn *txn, index uint64, ns string, id string) error { obj, err := txn.First(TableHostVolumes, indexID, ns, id) if err != nil { return err @@ -153,12 +167,35 @@ func (s *StateStore) DeleteHostVolume(index uint64, ns string, id string) error } } - if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { - return fmt.Errorf("index update failed: %w", err) + return nil +} + +func (s *StateStore) deleteHostVolumesOnNode(txn *txn, index uint64, nodeID string) error { + iter, err := s.HostVolumesByNodeID(nil, nodeID, SortDefault) + if err != nil { + return err } - return txn.Commit() + deleted := false // only update index if there was a volume to delete + for { + raw := iter.Next() + if raw == nil { + break + } + vol := raw.(*structs.HostVolume) + err := s.deleteHostVolumeTxn(txn, index, vol.Namespace, vol.ID) + if err != nil { + return err + } + deleted = true + } + if deleted { + if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { + return fmt.Errorf("index update failed: %w", err) + } + } + return nil } // HostVolumes queries all the host volumes and is mostly used for diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index 0d0b4746c..8da48a6ad 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -103,6 +103,7 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { // simulate a node registering one of the volumes nodes[2] = nodes[2].Copy() + nodes[2].GCVolumesOnNodeGC = true nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"example": { Name: vols[2].Name, Path: vols[2].HostPath, @@ -180,6 +181,13 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { index++ must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc})) + + index++ + must.NoError(t, store.DeleteNode(structs.MsgTypeTestSetup, index, []string{vol2.NodeID})) + iter, err = store.HostVolumesByNodeID(nil, vol2.NodeID, SortDefault) + must.NoError(t, err) + must.Nil(t, iter.Next(), must.Sprint("expected volume to be GC'd with node")) + for _, v := range vols { index++ must.NoError(t, store.DeleteHostVolume(index, v.Namespace, v.ID)) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a872927b0..68398f62a 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2157,6 +2157,11 @@ type Node struct { // HostVolumes is a map of host volume names to their configuration HostVolumes map[string]*ClientHostVolumeConfig + // GCVolumesOnNodeGC indicates that the server should GC any dynamic host + // volumes on this node when the node is GC'd. This should only be set if + // you know that a GC'd node can never come back + GCVolumesOnNodeGC bool + // HostNetworks is a map of host host_network names to their configuration HostNetworks map[string]*ClientHostNetworkConfig diff --git a/website/content/docs/configuration/client.mdx b/website/content/docs/configuration/client.mdx index 23e586cfc..73784f7a0 100644 --- a/website/content/docs/configuration/client.mdx +++ b/website/content/docs/configuration/client.mdx @@ -170,6 +170,12 @@ client { parallel destroys allowed by the garbage collector. This value should be relatively low to avoid high resource usage during garbage collections. +- `gc_volumes_on_node_gc` `(bool: false)` - Specifies that the server should + delete any dynamic host volumes on this node when the node is garbage + collected. You should only set this to `true` if you know that garbage + collected nodes will never rejoin the cluster, such as with ephemeral cloud + hosts. + - `no_host_uuid` `(bool: true)` - By default a random node UUID will be generated, but setting this to `false` will use the system's UUID.