Don't GC running but desired stop allocations

This PR fixes an edge case where we could GC an allocation that was in a
desired stop state but had not terminated yet. This can be hit if the
client hasn't shutdown the allocation yet or if the allocation is still
shutting down (long kill_timeout).

Fixes https://github.com/hashicorp/nomad/issues/4940
This commit is contained in:
Alex Dadgar
2018-12-05 13:01:12 -08:00
parent 661dc4b386
commit 5a98dfa493
2 changed files with 16 additions and 1 deletions

View File

@@ -7,7 +7,6 @@ import (
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/scheduler"
@@ -623,6 +622,12 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
return false
}
// If the allocation is still running on the client we can not garbage
// collect it.
if a.ClientStatus == structs.AllocClientStatusRunning {
return false
}
// If the job is deleted, stopped or dead all allocs can be removed
if job == nil || job.Stop || job.Status == structs.JobStatusDead {
return true

View File

@@ -1972,6 +1972,16 @@ func TestAllocation_GCEligible(t *testing.T) {
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "Don't GC when non terminal on client and job dead",
ClientStatus: structs.AllocClientStatusRunning,
DesiredStatus: structs.AllocDesiredStatusStop,
JobStatus: structs.JobStatusDead,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when terminal but not failed ",
ClientStatus: structs.AllocClientStatusComplete,