diff --git a/.changelog/26213.txt b/.changelog/26213.txt new file mode 100644 index 000000000..7a4417a14 --- /dev/null +++ b/.changelog/26213.txt @@ -0,0 +1,3 @@ +```release-note:bug +host volumes: Fixed a bug where volumes with server-terminal allocations could be deleted from clients but not the state store +``` diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 0cb167c32..64544bd1f 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -143,11 +143,16 @@ func (s *StateStore) deleteHostVolumeTxn(txn *txn, index uint64, ns string, id s if obj != nil { vol := obj.(*structs.HostVolume) - allocs, err := s.AllocsByNodeTerminal(nil, vol.NodeID, false) + // we can't use AllocsByNodeTerminal because we only want to filter out + // allocs that are client-terminal, not server-terminal + allocs, err := s.AllocsByNode(nil, vol.NodeID) if err != nil { return fmt.Errorf("could not query allocs to check for host volume claims: %w", err) } for _, alloc := range allocs { + if alloc.ClientTerminalStatus() { + continue + } for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { if volClaim.Type == structs.VolumeTypeHost && volClaim.Name == vol.Name { return fmt.Errorf("could not delete volume %s in use by alloc %s", diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index 8da48a6ad..62bb27505 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -149,6 +149,18 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { must.EqError(t, err, fmt.Sprintf( "could not delete volume %s in use by alloc %s", vols[2].ID, alloc.ID)) + alloc = alloc.Copy() + alloc.DesiredStatus = structs.AllocDesiredStatusStop + index++ + must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + + index++ + err = store.DeleteHostVolume(index, vol2.Namespace, vols[2].ID) + must.EqError(t, err, fmt.Sprintf( + "could not delete volume %s in use by alloc %s", vols[2].ID, alloc.ID), + must.Sprint("allocs must be client-terminal to delete their volumes")) + err = store.DeleteHostVolume(index, vol2.Namespace, vols[1].ID) must.NoError(t, err) vol, err = store.HostVolumeByID(nil, vols[1].Namespace, vols[1].ID, true)