diff --git a/CHANGELOG.md b/CHANGELOG.md index 697c6e038..54d7cd17d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ IMPROVEMENTS: * cli: Added `-monitor` flag to `deployment status` command and automatically monitor deployments from `job run` command. [[GH-10661](https://github.com/hashicorp/nomad/pull/10661)] +BUG FIXES: +* quotas (Enterprise): Fixed a bug where quotas were evaluated before constraints, resulting in quota capacity being used up by filtered nodes. [[GH-10753](https://github.com/hashicorp/nomad/issues/10753)] + ## 1.1.1 (June 9, 2021) FEATURES: @@ -114,6 +117,10 @@ BUG FIXES: * server: Fixed a panic that may arise on submission of jobs containing invalid service checks [[GH-10154](https://github.com/hashicorp/nomad/issues/10154)] * ui: Fixed the rendering of interstitial components shown after processing a dynamic application sizing recommendation. [[GH-10094](https://github.com/hashicorp/nomad/pull/10094)] +## 1.0.8 (Unreleased) +* quotas (Enterprise): Fixed a bug where quotas were evaluated before constraints, resulting in quota capacity being used up by filtered nodes. [[GH-10753](https://github.com/hashicorp/nomad/issues/10753)] +* quotas (Enterprise): Fixed a bug where stopped allocations for a failed deployment can be double-credited to quota limits, resulting in a quota limit bypass. [[GH-10694](https://github.com/hashicorp/nomad/issues/10694) + ## 1.0.7 (June 9, 2021) BUG FIXES: @@ -121,7 +128,6 @@ BUG FIXES: * cli: Fixed a bug where `plugin status` did not validate the passed `type` flag correctly [[GH-10712](https://github.com/hashicorp/nomad/pull/10712)] * cli: Fixed a bug where `alloc exec` may fail with "unexpected EOF" without returning the exit code after a command [[GH-10657](https://github.com/hashicorp/nomad/issues/10657)] * client: Fixed a bug where `alloc exec` sessions may terminate abruptly after a few minutes [[GH-10710](https://github.com/hashicorp/nomad/issues/10710)] -* quotas (Enterprise): Fixed a bug where stopped allocations for a failed deployment can be double-credited to quota limits, resulting in a quota limit bypass. [[GH-10694](https://github.com/hashicorp/nomad/issues/10694)] * drivers/exec: Fixed a bug where `exec` and `java` tasks inherit the Nomad agent's `oom_score_adj` value [[GH-10698](https://github.com/hashicorp/nomad/issues/10698)] * ui: Fixed a bug where exec would not work across regions. [[GH-10539](https://github.com/hashicorp/nomad/issues/10539)] * ui: Fixed global-search shortcut for non-english keyboards. [[GH-10714](https://github.com/hashicorp/nomad/issues/10714)] diff --git a/scheduler/stack.go b/scheduler/stack.go index b8ef95a9e..856e0c2a3 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -208,10 +208,6 @@ func NewSystemStack(ctx Context) *SystemStack { // have to evaluate on all nodes. s.source = NewStaticIterator(ctx, nil) - // Create the quota iterator to determine if placements would result in the - // quota attached to the namespace of the job to go over. - s.quota = NewQuotaIterator(ctx, s.source) - // Attach the job constraints. The job is filled in later. s.jobConstraint = NewConstraintChecker(ctx, nil) @@ -243,13 +239,20 @@ func NewSystemStack(ctx Context) *SystemStack { s.taskGroupDevices, s.taskGroupNetwork} avail := []FeasibilityChecker{s.taskGroupCSIVolumes} - s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail) + s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs, avail) // Filter on distinct property constraints. s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.wrappedChecks) + // Create the quota iterator to determine if placements would result in + // the quota attached to the namespace of the job to go over. + // Note: the quota iterator must be the last feasibility iterator before + // we upgrade to ranking, or our quota usage will include ineligible + // nodes! + s.quota = NewQuotaIterator(ctx, s.distinctPropertyConstraint) + // Upgrade from feasible to rank iterator - rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint) + rankSource := NewFeasibleRankIterator(ctx, s.quota) // Apply the bin packing, this depends on the resources needed // by a particular task group. Enable eviction as system jobs are high @@ -330,10 +333,6 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // balancing across eligible nodes. s.source = NewRandomIterator(ctx, nil) - // Create the quota iterator to determine if placements would result in the - // quota attached to the namespace of the job to go over. - s.quota = NewQuotaIterator(ctx, s.source) - // Attach the job constraints. The job is filled in later. s.jobConstraint = NewConstraintChecker(ctx, nil) @@ -366,7 +365,7 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { s.taskGroupDevices, s.taskGroupNetwork} avail := []FeasibilityChecker{s.taskGroupCSIVolumes} - s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail) + s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs, avail) // Filter on distinct host constraints. s.distinctHostsConstraint = NewDistinctHostsIterator(ctx, s.wrappedChecks) @@ -374,8 +373,15 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // Filter on distinct property constraints. s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.distinctHostsConstraint) + // Create the quota iterator to determine if placements would result in + // the quota attached to the namespace of the job to go over. + // Note: the quota iterator must be the last feasibility iterator before + // we upgrade to ranking, or our quota usage will include ineligible + // nodes! + s.quota = NewQuotaIterator(ctx, s.distinctPropertyConstraint) + // Upgrade from feasible to rank iterator - rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint) + rankSource := NewFeasibleRankIterator(ctx, s.quota) // Apply the bin packing, this depends on the resources needed // by a particular task group.