diff --git a/.changelog/25432.txt b/.changelog/25432.txt new file mode 100644 index 000000000..c989003e0 --- /dev/null +++ b/.changelog/25432.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where GC would attempt and fail to delete plugins that had volumes +``` diff --git a/nomad/core_sched.go b/nomad/core_sched.go index ec3d4f020..ca25abcb4 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -833,6 +833,9 @@ func (c *CoreScheduler) csiPluginGC(eval *structs.Evaluation, customThreshold *t for i := iter.Next(); i != nil; i = iter.Next() { plugin := i.(*structs.CSIPlugin) + if !plugin.IsEmpty() { + continue + } // Ignore new plugins mt := time.Unix(0, plugin.ModifyTime) diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index bc2730642..e7bf1ff31 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -2346,6 +2346,10 @@ func TestCoreScheduler_CSIPluginGC(t *testing.T) { index++ must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job)) + snap, err = store.Snapshot() + must.NoError(t, err) + core = NewCoreScheduler(srv, snap) + // Retry index++ gc = srv.coreJobEval(structs.CoreJobCSIPluginGC, index) diff --git a/nomad/fsm.go b/nomad/fsm.go index 71f4b2fbf..6f735f94d 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -4,6 +4,7 @@ package nomad import ( + "errors" "fmt" "io" "reflect" @@ -1385,7 +1386,7 @@ func (n *nomadFSM) applyCSIPluginDelete(buf []byte, index uint64) interface{} { if err := n.state.DeleteCSIPlugin(index, req.ID); err != nil { // "plugin in use" is an error for the state store but not for typical // callers, so reduce log noise by not logging that case here - if err.Error() != "plugin in use" { + if !errors.Is(err, structs.ErrCSIPluginInUse) { n.logger.Error("DeleteCSIPlugin failed", "error", err) } return err