scheduler: correctly detect inplace update with wildcard datacenters (#16362)

Wildcard datacenters introduced a bug where a job with any wildcard datacenters
will always be treated as a destructive update when we check whether a
datacenter has been removed from the jobspec.

Includes updating the helper so that callers don't have to loop over the job's
datacenters.
This commit is contained in:
Tim Gross
2023-03-07 10:05:59 -05:00
committed by GitHub
parent 605f15506b
commit 6f52a9194f
5 changed files with 47 additions and 16 deletions

3
.changelog/16362.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where allocs of system jobs with wildcard datacenters would be destructively updated
```

View File

@@ -1620,11 +1620,8 @@ func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string,
// here, but datacenter is a good optimization to start with as // here, but datacenter is a good optimization to start with as
// datacenter cardinality tends to be low so the check // datacenter cardinality tends to be low so the check
// shouldn't add much work. // shouldn't add much work.
for _, dc := range job.Datacenters { if node.IsInAnyDC(job.Datacenters) {
if node.IsInDC(dc) { sysJobs = append(sysJobs, job)
sysJobs = append(sysJobs, job)
break
}
} }
} }

View File

@@ -2311,8 +2311,13 @@ func (n *Node) ComparableResources() *ComparableResources {
} }
} }
func (n *Node) IsInDC(dc string) bool { func (n *Node) IsInAnyDC(datacenters []string) bool {
return glob.Glob(dc, n.Datacenter) for _, dc := range datacenters {
if glob.Glob(dc, n.Datacenter) {
return true
}
}
return false
} }
// Stub returns a summarized version of the node // Stub returns a summarized version of the node

View File

@@ -9,7 +9,6 @@ import (
log "github.com/hashicorp/go-hclog" log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb" memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
"golang.org/x/exp/slices"
) )
// allocTuple is a tuple of the allocation name and potential alloc ID // allocTuple is a tuple of the allocation name and potential alloc ID
@@ -66,12 +65,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]str
notReady[node.ID] = struct{}{} notReady[node.ID] = struct{}{}
continue continue
} }
for _, dc := range dcs { if node.IsInAnyDC(dcs) {
if node.IsInDC(dc) { out = append(out, node)
out = append(out, node) dcMap[node.Datacenter]++
dcMap[node.Datacenter]++
break
}
} }
} }
return out, notReady, dcMap, nil return out, notReady, dcMap, nil
@@ -558,7 +554,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
} }
// The alloc is on a node that's now in an ineligible DC // The alloc is on a node that's now in an ineligible DC
if !slices.Contains(job.Datacenters, node.Datacenter) { if !node.IsInAnyDC(job.Datacenters) {
continue continue
} }
@@ -802,7 +798,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
} }
// The alloc is on a node that's now in an ineligible DC // The alloc is on a node that's now in an ineligible DC
if !slices.Contains(newJob.Datacenters, node.Datacenter) { if !node.IsInAnyDC(newJob.Datacenters) {
return false, true, nil return false, true, nil
} }

View File

@@ -320,6 +320,7 @@ func TestTaskUpdatedSpread(t *testing.T) {
require.False(t, tasksUpdated(j5, j6, name)) require.False(t, tasksUpdated(j5, j6, name))
} }
func TestTasksUpdated(t *testing.T) { func TestTasksUpdated(t *testing.T) {
ci.Parallel(t) ci.Parallel(t)
@@ -972,6 +973,35 @@ func TestInplaceUpdate_Success(t *testing.T) {
} }
} }
func TestInplaceUpdate_WildcardDatacenters(t *testing.T) {
ci.Parallel(t)
store, ctx := testContext(t)
eval := mock.Eval()
job := mock.Job()
job.Datacenters = []string{"*"}
node := mock.Node()
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 900, node))
// Register an alloc
alloc := mock.AllocForNode(node)
alloc.Job = job
alloc.JobID = job.ID
must.NoError(t, store.UpsertJobSummary(1000, mock.JobSummary(alloc.JobID)))
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1001, []*structs.Allocation{alloc}))
updates := []allocTuple{{Alloc: alloc, TaskGroup: job.TaskGroups[0]}}
stack := NewGenericStack(false, ctx)
unplaced, inplace := inplaceUpdate(ctx, eval, job, stack, updates)
must.Len(t, 1, inplace,
must.Sprintf("inplaceUpdate should have an inplace update"))
must.Len(t, 0, unplaced)
must.MapNotEmpty(t, ctx.plan.NodeAllocation,
must.Sprintf("inplaceUpdate should have an inplace update"))
}
func TestUtil_connectUpdated(t *testing.T) { func TestUtil_connectUpdated(t *testing.T) {
ci.Parallel(t) ci.Parallel(t)