diff --git a/api/host_volumes.go b/api/host_volumes.go index 44352d787..576e2fbd6 100644 --- a/api/host_volumes.go +++ b/api/host_volumes.go @@ -82,9 +82,9 @@ type HostVolume struct { type HostVolumeState string const ( - HostVolumeStatePending HostVolumeState = "pending" - HostVolumeStateReady HostVolumeState = "ready" - HostVolumeStateDeleted HostVolumeState = "deleted" + HostVolumeStatePending HostVolumeState = "pending" + HostVolumeStateReady HostVolumeState = "ready" + HostVolumeStateUnavailable HostVolumeState = "unavailable" ) // HostVolumeCapability is the requested attachment and access mode for a volume diff --git a/command/volume_create_host.go b/command/volume_create_host.go index 4aedc6e87..50588d15c 100644 --- a/command/volume_create_host.go +++ b/command/volume_create_host.go @@ -147,7 +147,7 @@ DONE: ).Row().MarginLeft(2) break DONE - case api.HostVolumeStateDeleted: + case api.HostVolumeStateUnavailable: endSpinner = glint.Layout( glint.Text(fmt.Sprintf("! Host volume %q %s", limit(id, opts.length), vol.State)), ).Row().MarginLeft(2) diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 50294dcb1..961f1a16e 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -90,15 +90,11 @@ func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) err if node == nil { return fmt.Errorf("host volume %s has nonexistent node ID %s", vol.ID, vol.NodeID) } - switch vol.State { - case structs.HostVolumeStateDeleted: - // no-op: don't allow soft-deletes to resurrect a previously fingerprinted volume - default: - // prevent a race between node fingerprint and create RPC that could - // switch a ready volume back to pending - if _, ok := node.HostVolumes[vol.Name]; ok { - vol.State = structs.HostVolumeStateReady - } + + // prevent a race between node fingerprint and create RPC that could + // switch a ready volume back to pending + if _, ok := node.HostVolumes[vol.Name]; ok { + vol.State = structs.HostVolumeStateReady } // Register RPCs for new volumes may not have the node pool set @@ -235,7 +231,7 @@ func upsertHostVolumeForNode(txn *txn, node *structs.Node, index uint64) error { return err } - var dirty bool + var dirty bool // signals we need to update table index for { raw := iter.Next() @@ -243,26 +239,37 @@ func upsertHostVolumeForNode(txn *txn, node *structs.Node, index uint64) error { break } vol := raw.(*structs.HostVolume) - if _, ok := node.HostVolumes[vol.Name]; !ok { + volState := vol.State + _, ok := node.HostVolumes[vol.Name] + + switch { + case ok && vol.State != structs.HostVolumeStateReady: + // the fingerprint has been updated on the client for this volume + volState = structs.HostVolumeStateReady + + case !ok && vol.State == structs.HostVolumeStateReady: + // the volume was previously fingerprinted but is no longer showing + // up in the fingerprint; this will usually be because of a failed + // restore on the client + volState = structs.HostVolumeStateUnavailable + + case ok && vol.NodePool != node.NodePool: + // the client's node pool has been changed + + default: + // nothing has changed, skip updating this volume continue } - // the fingerprint has been written on the client for this volume, or - // the client's node pool has been changed - if vol.State == structs.HostVolumeStateUnknown || - vol.State == structs.HostVolumeStatePending || - vol.NodePool != node.NodePool { - - vol = vol.Copy() - vol.State = structs.HostVolumeStateReady - vol.NodePool = node.NodePool - vol.ModifyIndex = index - err = txn.Insert(TableHostVolumes, vol) - if err != nil { - return fmt.Errorf("host volume insert: %w", err) - } - dirty = true + vol = vol.Copy() + vol.State = volState + vol.NodePool = node.NodePool + vol.ModifyIndex = index + err = txn.Insert(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume insert: %w", err) } + dirty = true } if dirty { diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index a185f98bf..567b03748 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -295,4 +295,39 @@ func TestStateStore_UpdateHostVolumesFromFingerprint(t *testing.T) { must.Sprint("node pool change should update pending volume")) must.Eq(t, "new-node-pool", vol2.NodePool) must.Eq(t, structs.HostVolumeStateReady, vol2.State) + + // node restarts and fails to restore + node = node.Copy() + node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "static-vol": {Name: "static-vol", Path: "/srv/static"}, + } + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + vol0, err = store.HostVolumeByID(nil, ns, vols[0].ID, false) + must.NoError(t, err) + must.Eq(t, index, vol0.ModifyIndex, + must.Sprint("failed restore should update ready volume")) + must.Eq(t, structs.HostVolumeStateUnavailable, vol0.State) + + vol1, err = store.HostVolumeByID(nil, ns, vols[1].ID, false) + must.NoError(t, err) + must.Eq(t, index, vol1.ModifyIndex, + must.Sprint("failed restore should update ready volume")) + must.Eq(t, structs.HostVolumeStateUnavailable, vol1.State) + + // make sure we can go from unavailable to available + + node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "static-vol": {Name: "static-vol", Path: "/srv/static"}, + "dhv-zero": {Name: "dhv-zero", Path: "/var/nomad/alloc_mounts" + uuid.Generate()}, + } + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + vol0, err = store.HostVolumeByID(nil, ns, vols[0].ID, false) + must.NoError(t, err) + must.Eq(t, index, vol0.ModifyIndex, + must.Sprint("recovered node should update unavailable volume")) + must.Eq(t, structs.HostVolumeStateReady, vol0.State) } diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index f503b9757..c29d3b369 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -89,10 +89,10 @@ type HostVolume struct { type HostVolumeState string const ( - HostVolumeStateUnknown HostVolumeState = "" // never write this to Raft - HostVolumeStatePending HostVolumeState = "pending" - HostVolumeStateReady HostVolumeState = "ready" - HostVolumeStateDeleted HostVolumeState = "deleted" + HostVolumeStateUnknown HostVolumeState = "" // never write this to Raft + HostVolumeStatePending HostVolumeState = "pending" + HostVolumeStateReady HostVolumeState = "ready" + HostVolumeStateUnavailable HostVolumeState = "unavailable" ) func (hv *HostVolume) Copy() *HostVolume { diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 3318a5270..9cbbc9989 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -218,7 +218,7 @@ func TestHostVolumeChecker_Dynamic(t *testing.T) { Name: "foo", NodeID: nodes[2].ID, RequestedCapabilities: hostVolCapsReadOnly, - State: structs.HostVolumeStateDeleted, + State: structs.HostVolumeStateUnavailable, } dhvReadOnly := &structs.HostVolume{ Namespace: structs.DefaultNamespace, @@ -247,9 +247,7 @@ func TestHostVolumeChecker_Dynamic(t *testing.T) { ReadOnly: false, }, } - nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ - "foo": {ID: dhvNotReady.ID}, - } + nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{} nodes[3].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ "foo": {ID: dhvReadOnly.ID}, } @@ -265,6 +263,10 @@ func TestHostVolumeChecker_Dynamic(t *testing.T) { must.NoError(t, store.UpsertHostVolume(1000, dhvReadOnly)) must.NoError(t, store.UpsertHostVolume(1000, dhvReadWrite)) + // reinsert unavailable node to set the correct state on the unavailable + // volume + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, nodes[2])) + readwriteRequest := map[string]*structs.VolumeRequest{ "foo": { Type: "host",