scheduler: fix a bug where force GC wasn't respected (#24456)

This PR fixes a bug where System.GarbageCollect endpoint didn't work on objects
that weren't older than their respective GC thresholds. System.GarbageCollect
is used to force garbage collection (also used by the system gc command) and
should ignore any GC threshold settings.
This commit is contained in:
Piotr Kazmierczak
2024-11-21 09:07:23 +01:00
committed by GitHub
parent a6fbd5a2e2
commit 6ccfcc37a3
4 changed files with 70 additions and 72 deletions

3
.changelog/24456.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fix bug where forced garbage collection does not ignore GC thresholds
```

View File

@@ -13,6 +13,7 @@ import (
log "github.com/hashicorp/go-hclog" log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb" memdb "github.com/hashicorp/go-memdb"
version "github.com/hashicorp/go-version" version "github.com/hashicorp/go-version"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
@@ -28,25 +29,20 @@ type CoreScheduler struct {
snap *state.StateSnapshot snap *state.StateSnapshot
logger log.Logger logger log.Logger
// custom GC Threshold values can be used by unit tests to simulate time // customThresholdForObject is used by unit tests that want to manipulate GC
// manipulation // threshold settings. Users can pass the string that matches the object to GC
customJobGCThreshold time.Duration // (e.g., structs.CoreJobEvalGC) and time.Duration that will be used as GC
customEvalGCThreshold time.Duration // threshold value.
customBatchEvalGCThreshold time.Duration customThresholdForObject map[string]*time.Duration
customNodeGCThreshold time.Duration
customDeploymentGCThreshold time.Duration
customCSIVolumeClaimGCThreshold time.Duration
customCSIPluginGCThreshold time.Duration
customACLTokenExpirationGCThreshold time.Duration
customRootKeyGCThreshold time.Duration
} }
// NewCoreScheduler is used to return a new system scheduler instance // NewCoreScheduler is used to return a new system scheduler instance
func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler { func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler {
s := &CoreScheduler{ s := &CoreScheduler{
srv: srv, srv: srv,
snap: snap, snap: snap,
logger: srv.logger.ResetNamed("core.sched"), logger: srv.logger.ResetNamed("core.sched"),
customThresholdForObject: make(map[string]*time.Duration),
} }
return s return s
} }
@@ -54,25 +50,29 @@ func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Schedule
// Process is used to implement the scheduler.Scheduler interface // Process is used to implement the scheduler.Scheduler interface
func (c *CoreScheduler) Process(eval *structs.Evaluation) error { func (c *CoreScheduler) Process(eval *structs.Evaluation) error {
job := strings.Split(eval.JobID, ":") // extra data can be smuggled in w/ JobID job := strings.Split(eval.JobID, ":") // extra data can be smuggled in w/ JobID
// check if there are any custom threshold values set
customThreshold := c.customThresholdForObject[job[0]]
switch job[0] { switch job[0] {
case structs.CoreJobEvalGC: case structs.CoreJobEvalGC:
return c.evalGC() return c.evalGC(customThreshold)
case structs.CoreJobNodeGC: case structs.CoreJobNodeGC:
return c.nodeGC(eval) return c.nodeGC(eval, customThreshold)
case structs.CoreJobJobGC: case structs.CoreJobJobGC:
return c.jobGC(eval) return c.jobGC(eval, customThreshold)
case structs.CoreJobDeploymentGC: case structs.CoreJobDeploymentGC:
return c.deploymentGC() return c.deploymentGC(customThreshold)
case structs.CoreJobCSIVolumeClaimGC: case structs.CoreJobCSIVolumeClaimGC:
return c.csiVolumeClaimGC(eval) return c.csiVolumeClaimGC(eval, customThreshold)
case structs.CoreJobCSIPluginGC: case structs.CoreJobCSIPluginGC:
return c.csiPluginGC(eval) return c.csiPluginGC(eval, customThreshold)
case structs.CoreJobOneTimeTokenGC: case structs.CoreJobOneTimeTokenGC:
return c.expiredOneTimeTokenGC(eval) return c.expiredOneTimeTokenGC(eval)
case structs.CoreJobLocalTokenExpiredGC: case structs.CoreJobLocalTokenExpiredGC:
return c.expiredACLTokenGC(eval, false) return c.expiredACLTokenGC(eval, false, customThreshold)
case structs.CoreJobGlobalTokenExpiredGC: case structs.CoreJobGlobalTokenExpiredGC:
return c.expiredACLTokenGC(eval, true) return c.expiredACLTokenGC(eval, true, customThreshold)
case structs.CoreJobRootKeyRotateOrGC: case structs.CoreJobRootKeyRotateOrGC:
return c.rootKeyRotateOrGC(eval) return c.rootKeyRotateOrGC(eval)
case structs.CoreJobVariablesRekey: case structs.CoreJobVariablesRekey:
@@ -86,40 +86,44 @@ func (c *CoreScheduler) Process(eval *structs.Evaluation) error {
// forceGC is used to garbage collect all eligible objects. // forceGC is used to garbage collect all eligible objects.
func (c *CoreScheduler) forceGC(eval *structs.Evaluation) error { func (c *CoreScheduler) forceGC(eval *structs.Evaluation) error {
if err := c.jobGC(eval); err != nil { // set a minimal threshold for all objects to make force GC possible
force := pointer.Of(time.Millisecond)
if err := c.jobGC(eval, force); err != nil {
return err return err
} }
if err := c.evalGC(); err != nil { if err := c.evalGC(force); err != nil {
return err return err
} }
if err := c.deploymentGC(); err != nil { if err := c.deploymentGC(force); err != nil {
return err return err
} }
if err := c.csiPluginGC(eval); err != nil { if err := c.csiPluginGC(eval, force); err != nil {
return err return err
} }
if err := c.csiVolumeClaimGC(eval); err != nil { if err := c.csiVolumeClaimGC(eval, force); err != nil {
return err return err
} }
if err := c.expiredOneTimeTokenGC(eval); err != nil { if err := c.expiredOneTimeTokenGC(eval); err != nil {
return err return err
} }
if err := c.expiredACLTokenGC(eval, false); err != nil { if err := c.expiredACLTokenGC(eval, false, force); err != nil {
return err return err
} }
if err := c.expiredACLTokenGC(eval, true); err != nil { if err := c.expiredACLTokenGC(eval, true, force); err != nil {
return err return err
} }
if err := c.rootKeyGC(eval, time.Now()); err != nil { if err := c.rootKeyGC(eval, time.Now()); err != nil {
return err return err
} }
// Node GC must occur after the others to ensure the allocations are // Node GC must occur after the others to ensure the allocations are
// cleared. // cleared.
return c.nodeGC(eval) return c.nodeGC(eval, force)
} }
// jobGC is used to garbage collect eligible jobs. // jobGC is used to garbage collect eligible jobs.
func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { func (c *CoreScheduler) jobGC(eval *structs.Evaluation, customThreshold *time.Duration) error {
// Get all the jobs eligible for garbage collection. // Get all the jobs eligible for garbage collection.
ws := memdb.NewWatchSet() ws := memdb.NewWatchSet()
iter, err := c.snap.JobsByGC(ws, true) iter, err := c.snap.JobsByGC(ws, true)
@@ -131,8 +135,8 @@ func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error {
threshold = c.srv.config.JobGCThreshold threshold = c.srv.config.JobGCThreshold
// custom threshold override // custom threshold override
if c.customJobGCThreshold != 0 { if customThreshold != nil {
threshold = c.customJobGCThreshold threshold = *customThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -263,7 +267,7 @@ func (c *CoreScheduler) partitionJobReap(jobs []*structs.Job, leaderACL string,
} }
// evalGC is used to garbage collect old evaluations // evalGC is used to garbage collect old evaluations
func (c *CoreScheduler) evalGC() error { func (c *CoreScheduler) evalGC(customThreshold *time.Duration) error {
// Iterate over the evaluations // Iterate over the evaluations
ws := memdb.NewWatchSet() ws := memdb.NewWatchSet()
iter, err := c.snap.Evals(ws, false) iter, err := c.snap.Evals(ws, false)
@@ -276,11 +280,9 @@ func (c *CoreScheduler) evalGC() error {
batchThreshold = c.srv.config.BatchEvalGCThreshold batchThreshold = c.srv.config.BatchEvalGCThreshold
// custom threshold override // custom threshold override
if c.customEvalGCThreshold != 0 { if customThreshold != nil {
threshold = c.customEvalGCThreshold threshold = *customThreshold
} batchThreshold = *customThreshold
if c.customBatchEvalGCThreshold != 0 {
batchThreshold = c.customBatchEvalGCThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -376,8 +378,7 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, cutoffTime time.Time, a
var gcAllocIDs []string var gcAllocIDs []string
for _, alloc := range allocs { for _, alloc := range allocs {
if !allocGCEligible(alloc, job, time.Now().UTC(), cutoffTime) { if !allocGCEligible(alloc, job, time.Now().UTC(), cutoffTime) {
// Can't GC the evaluation since not all of the allocations are // Can't GC the evaluation since not all the allocations are terminal
// terminal
gcEval = false gcEval = false
} else { } else {
// The allocation is eligible to be GC'd // The allocation is eligible to be GC'd
@@ -462,7 +463,7 @@ func (c *CoreScheduler) partitionEvalReap(evals, allocs []string, batchSize int)
} }
// nodeGC is used to garbage collect old nodes // nodeGC is used to garbage collect old nodes
func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error { func (c *CoreScheduler) nodeGC(eval *structs.Evaluation, customThreshold *time.Duration) error {
// Iterate over the evaluations // Iterate over the evaluations
ws := memdb.NewWatchSet() ws := memdb.NewWatchSet()
iter, err := c.snap.Nodes(ws) iter, err := c.snap.Nodes(ws)
@@ -474,8 +475,8 @@ func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error {
threshold = c.srv.config.NodeGCThreshold threshold = c.srv.config.NodeGCThreshold
// custom threshold override // custom threshold override
if c.customNodeGCThreshold != 0 { if customThreshold != nil {
threshold = c.customNodeGCThreshold threshold = *customThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -566,7 +567,7 @@ func (c *CoreScheduler) nodeReap(eval *structs.Evaluation, nodeIDs []string) err
} }
// deploymentGC is used to garbage collect old deployments // deploymentGC is used to garbage collect old deployments
func (c *CoreScheduler) deploymentGC() error { func (c *CoreScheduler) deploymentGC(customThreshold *time.Duration) error {
// Iterate over the deployments // Iterate over the deployments
ws := memdb.NewWatchSet() ws := memdb.NewWatchSet()
iter, err := c.snap.Deployments(ws, state.SortDefault) iter, err := c.snap.Deployments(ws, state.SortDefault)
@@ -578,8 +579,8 @@ func (c *CoreScheduler) deploymentGC() error {
threshold = c.srv.config.DeploymentGCThreshold threshold = c.srv.config.DeploymentGCThreshold
// custom threshold override // custom threshold override
if c.customDeploymentGCThreshold != 0 { if customThreshold != nil {
threshold = c.customDeploymentGCThreshold threshold = *customThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -739,7 +740,7 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime, cutoffTime
} }
// csiVolumeClaimGC is used to garbage collect CSI volume claims // csiVolumeClaimGC is used to garbage collect CSI volume claims
func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error { func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation, customThreshold *time.Duration) error {
gcClaims := func(ns, volID string) error { gcClaims := func(ns, volID string) error {
req := &structs.CSIVolumeClaimRequest{ req := &structs.CSIVolumeClaimRequest{
@@ -778,8 +779,8 @@ func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error {
threshold = c.srv.config.CSIVolumeClaimGCThreshold threshold = c.srv.config.CSIVolumeClaimGCThreshold
// custom threshold override // custom threshold override
if c.customCSIVolumeClaimGCThreshold != 0 { if customThreshold != nil {
threshold = c.customCSIVolumeClaimGCThreshold threshold = *customThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -812,7 +813,7 @@ func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error {
} }
// csiPluginGC is used to garbage collect unused plugins // csiPluginGC is used to garbage collect unused plugins
func (c *CoreScheduler) csiPluginGC(eval *structs.Evaluation) error { func (c *CoreScheduler) csiPluginGC(eval *structs.Evaluation, customThreshold *time.Duration) error {
ws := memdb.NewWatchSet() ws := memdb.NewWatchSet()
@@ -825,8 +826,8 @@ func (c *CoreScheduler) csiPluginGC(eval *structs.Evaluation) error {
threshold = c.srv.config.CSIPluginGCThreshold threshold = c.srv.config.CSIPluginGCThreshold
// custom threshold override // custom threshold override
if c.customCSIPluginGCThreshold != 0 { if customThreshold != nil {
threshold = c.customCSIPluginGCThreshold threshold = *customThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -870,7 +871,7 @@ func (c *CoreScheduler) expiredOneTimeTokenGC(eval *structs.Evaluation) error {
// tokens. It can be used for both local and global tokens and includes // tokens. It can be used for both local and global tokens and includes
// behaviour to account for periodic and user actioned garbage collection // behaviour to account for periodic and user actioned garbage collection
// invocations. // invocations.
func (c *CoreScheduler) expiredACLTokenGC(eval *structs.Evaluation, global bool) error { func (c *CoreScheduler) expiredACLTokenGC(eval *structs.Evaluation, global bool, customThreshold *time.Duration) error {
// If ACLs are not enabled, we do not need to continue and should exit // If ACLs are not enabled, we do not need to continue and should exit
// early. This is not an error condition as callers can blindly call this // early. This is not an error condition as callers can blindly call this
@@ -893,8 +894,8 @@ func (c *CoreScheduler) expiredACLTokenGC(eval *structs.Evaluation, global bool)
threshold = c.srv.config.ACLTokenExpirationGCThreshold threshold = c.srv.config.ACLTokenExpirationGCThreshold
// custom threshold override // custom threshold override
if c.customACLTokenExpirationGCThreshold != 0 { if customThreshold != nil {
threshold = c.customACLTokenExpirationGCThreshold threshold = *customThreshold
} }
cutoffTime := c.getCutoffTime(threshold) cutoffTime := c.getCutoffTime(threshold)
@@ -1003,13 +1004,9 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error
return err return err
} }
var threshold time.Duration // we don't do custom overrides for root keys because they are never subject to
threshold = c.srv.config.RootKeyGCThreshold // force GC
threshold := c.srv.config.RootKeyGCThreshold
// custom threshold override
if c.customRootKeyGCThreshold != 0 {
threshold = c.customRootKeyGCThreshold
}
// the threshold is longer than we can support with the time table, and we // the threshold is longer than we can support with the time table, and we
// never want to force-GC keys because that will orphan signed Workload // never want to force-GC keys because that will orphan signed Workload

View File

@@ -527,9 +527,7 @@ func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
// set a shorter GC threshold this time // set a shorter GC threshold this time
gc = s1.coreJobEval(structs.CoreJobEvalGC, jobModifyIdx*2) gc = s1.coreJobEval(structs.CoreJobEvalGC, jobModifyIdx*2)
core.(*CoreScheduler).customBatchEvalGCThreshold = time.Minute core.(*CoreScheduler).customThresholdForObject[structs.CoreJobEvalGC] = pointer.Of(time.Minute)
//core.(*CoreScheduler).customEvalGCThreshold = time.Minute
//core.(*CoreScheduler).customJobGCThreshold = time.Minute
must.NoError(t, core.Process(gc)) must.NoError(t, core.Process(gc))
// We expect the following: // We expect the following:
@@ -2513,7 +2511,7 @@ func TestCoreScheduler_CSIVolumeClaimGC(t *testing.T) {
index++ index++
gc := srv.coreJobEval(structs.CoreJobForceGC, index) gc := srv.coreJobEval(structs.CoreJobForceGC, index)
c := core.(*CoreScheduler) c := core.(*CoreScheduler)
require.NoError(t, c.csiVolumeClaimGC(gc)) require.NoError(t, c.csiVolumeClaimGC(gc, nil))
// the only remaining claim is for a deleted alloc with no path to // the only remaining claim is for a deleted alloc with no path to
// the non-existent node, so volumewatcher will release the // the non-existent node, so volumewatcher will release the
@@ -2551,7 +2549,7 @@ func TestCoreScheduler_CSIBadState_ClaimGC(t *testing.T) {
index++ index++
gc := srv.coreJobEval(structs.CoreJobForceGC, index) gc := srv.coreJobEval(structs.CoreJobForceGC, index)
c := core.(*CoreScheduler) c := core.(*CoreScheduler)
must.NoError(t, c.csiVolumeClaimGC(gc)) must.NoError(t, c.csiVolumeClaimGC(gc, nil))
vol, err := srv.State().CSIVolumeByID(nil, structs.DefaultNamespace, "csi-volume-nfs0") vol, err := srv.State().CSIVolumeByID(nil, structs.DefaultNamespace, "csi-volume-nfs0")
must.NoError(t, err) must.NoError(t, err)

View File

@@ -34,15 +34,15 @@ func TestSystemEndpoint_GarbageCollect(t *testing.T) {
job := mock.Job() job := mock.Job()
job.Type = structs.JobTypeBatch job.Type = structs.JobTypeBatch
job.Stop = true job.Stop = true
// submit time must be older than default job GC // set submit time older than now but still newer than default GC threshold
job.SubmitTime = time.Now().Add(-6 * time.Hour).UnixNano() job.SubmitTime = time.Now().Add(-10 * time.Millisecond).UnixNano()
must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 1000, nil, job)) must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 1000, nil, job))
eval := mock.Eval() eval := mock.Eval()
eval.Status = structs.EvalStatusComplete eval.Status = structs.EvalStatusComplete
eval.JobID = job.ID eval.JobID = job.ID
// modify time must be older than default eval GC // set modify time older than now but still newer than default GC threshold
eval.ModifyTime = time.Now().Add(-5 * time.Hour).UnixNano() eval.ModifyTime = time.Now().Add(-10 * time.Millisecond).UnixNano()
must.NoError(t, state.UpsertEvals(structs.MsgTypeTestSetup, 1001, []*structs.Evaluation{eval})) must.NoError(t, state.UpsertEvals(structs.MsgTypeTestSetup, 1001, []*structs.Evaluation{eval}))
// Make the GC request // Make the GC request