CSI: prevent extraneous GC attempts for plugins (#25432)

We can't delete a CSI plugin when it has volumes in use. When periodic GC runs,
we send the RPC unconditionally and then let the state store return an error. We
accidentally fixed the excess logging this causes (#17025) in #20555, but we can
also check if the plugin is empty first before sending the RPC to save a
request and subsequent Raft write.

Fixes: https://github.com/hashicorp/nomad/issues/17025
Ref: https://github.com/hashicorp/nomad/pull/20555
This commit is contained in:
Tim Gross
2025-03-19 09:14:42 -04:00
committed by GitHub
parent b641d25730
commit 13b95b7685
4 changed files with 12 additions and 1 deletions

3
.changelog/25432.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
csi: Fixed a bug where GC would attempt and fail to delete plugins that had volumes
```

View File

@@ -833,6 +833,9 @@ func (c *CoreScheduler) csiPluginGC(eval *structs.Evaluation, customThreshold *t
for i := iter.Next(); i != nil; i = iter.Next() { for i := iter.Next(); i != nil; i = iter.Next() {
plugin := i.(*structs.CSIPlugin) plugin := i.(*structs.CSIPlugin)
if !plugin.IsEmpty() {
continue
}
// Ignore new plugins // Ignore new plugins
mt := time.Unix(0, plugin.ModifyTime) mt := time.Unix(0, plugin.ModifyTime)

View File

@@ -2346,6 +2346,10 @@ func TestCoreScheduler_CSIPluginGC(t *testing.T) {
index++ index++
must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job)) must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job))
snap, err = store.Snapshot()
must.NoError(t, err)
core = NewCoreScheduler(srv, snap)
// Retry // Retry
index++ index++
gc = srv.coreJobEval(structs.CoreJobCSIPluginGC, index) gc = srv.coreJobEval(structs.CoreJobCSIPluginGC, index)

View File

@@ -4,6 +4,7 @@
package nomad package nomad
import ( import (
"errors"
"fmt" "fmt"
"io" "io"
"reflect" "reflect"
@@ -1385,7 +1386,7 @@ func (n *nomadFSM) applyCSIPluginDelete(buf []byte, index uint64) interface{} {
if err := n.state.DeleteCSIPlugin(index, req.ID); err != nil { if err := n.state.DeleteCSIPlugin(index, req.ID); err != nil {
// "plugin in use" is an error for the state store but not for typical // "plugin in use" is an error for the state store but not for typical
// callers, so reduce log noise by not logging that case here // callers, so reduce log noise by not logging that case here
if err.Error() != "plugin in use" { if !errors.Is(err, structs.ErrCSIPluginInUse) {
n.logger.Error("DeleteCSIPlugin failed", "error", err) n.logger.Error("DeleteCSIPlugin failed", "error", err)
} }
return err return err