mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
quotas: evaluate quota feasibility last in scheduler (#10753)
The `QuotaIterator` is used as the source of nodes passed into feasibility checking for constraints. Every node that passes the quota check counts the allocation resources agains the quota, and as a result we count nodes which will be later filtered out by constraints. Therefore for jobs with constraints, nodes that are feasibility checked but fail have been counted against quotas. This failure mode is order dependent; if all the unfiltered nodes happen to be quota checked first, everything works as expected. This changeset moves the `QuotaIterator` to happen last among all feasibility checkers (but before ranking). The `QuotaIterator` will never receive filtered nodes so it will calculate quotas correctly.
This commit is contained in:
@@ -3,6 +3,9 @@
|
||||
IMPROVEMENTS:
|
||||
* cli: Added `-monitor` flag to `deployment status` command and automatically monitor deployments from `job run` command. [[GH-10661](https://github.com/hashicorp/nomad/pull/10661)]
|
||||
|
||||
BUG FIXES:
|
||||
* quotas (Enterprise): Fixed a bug where quotas were evaluated before constraints, resulting in quota capacity being used up by filtered nodes. [[GH-10753](https://github.com/hashicorp/nomad/issues/10753)]
|
||||
|
||||
## 1.1.1 (June 9, 2021)
|
||||
|
||||
FEATURES:
|
||||
@@ -114,6 +117,10 @@ BUG FIXES:
|
||||
* server: Fixed a panic that may arise on submission of jobs containing invalid service checks [[GH-10154](https://github.com/hashicorp/nomad/issues/10154)]
|
||||
* ui: Fixed the rendering of interstitial components shown after processing a dynamic application sizing recommendation. [[GH-10094](https://github.com/hashicorp/nomad/pull/10094)]
|
||||
|
||||
## 1.0.8 (Unreleased)
|
||||
* quotas (Enterprise): Fixed a bug where quotas were evaluated before constraints, resulting in quota capacity being used up by filtered nodes. [[GH-10753](https://github.com/hashicorp/nomad/issues/10753)]
|
||||
* quotas (Enterprise): Fixed a bug where stopped allocations for a failed deployment can be double-credited to quota limits, resulting in a quota limit bypass. [[GH-10694](https://github.com/hashicorp/nomad/issues/10694)
|
||||
|
||||
## 1.0.7 (June 9, 2021)
|
||||
|
||||
BUG FIXES:
|
||||
@@ -121,7 +128,6 @@ BUG FIXES:
|
||||
* cli: Fixed a bug where `plugin status` did not validate the passed `type` flag correctly [[GH-10712](https://github.com/hashicorp/nomad/pull/10712)]
|
||||
* cli: Fixed a bug where `alloc exec` may fail with "unexpected EOF" without returning the exit code after a command [[GH-10657](https://github.com/hashicorp/nomad/issues/10657)]
|
||||
* client: Fixed a bug where `alloc exec` sessions may terminate abruptly after a few minutes [[GH-10710](https://github.com/hashicorp/nomad/issues/10710)]
|
||||
* quotas (Enterprise): Fixed a bug where stopped allocations for a failed deployment can be double-credited to quota limits, resulting in a quota limit bypass. [[GH-10694](https://github.com/hashicorp/nomad/issues/10694)]
|
||||
* drivers/exec: Fixed a bug where `exec` and `java` tasks inherit the Nomad agent's `oom_score_adj` value [[GH-10698](https://github.com/hashicorp/nomad/issues/10698)]
|
||||
* ui: Fixed a bug where exec would not work across regions. [[GH-10539](https://github.com/hashicorp/nomad/issues/10539)]
|
||||
* ui: Fixed global-search shortcut for non-english keyboards. [[GH-10714](https://github.com/hashicorp/nomad/issues/10714)]
|
||||
|
||||
@@ -208,10 +208,6 @@ func NewSystemStack(ctx Context) *SystemStack {
|
||||
// have to evaluate on all nodes.
|
||||
s.source = NewStaticIterator(ctx, nil)
|
||||
|
||||
// Create the quota iterator to determine if placements would result in the
|
||||
// quota attached to the namespace of the job to go over.
|
||||
s.quota = NewQuotaIterator(ctx, s.source)
|
||||
|
||||
// Attach the job constraints. The job is filled in later.
|
||||
s.jobConstraint = NewConstraintChecker(ctx, nil)
|
||||
|
||||
@@ -243,13 +239,20 @@ func NewSystemStack(ctx Context) *SystemStack {
|
||||
s.taskGroupDevices,
|
||||
s.taskGroupNetwork}
|
||||
avail := []FeasibilityChecker{s.taskGroupCSIVolumes}
|
||||
s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail)
|
||||
s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs, avail)
|
||||
|
||||
// Filter on distinct property constraints.
|
||||
s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.wrappedChecks)
|
||||
|
||||
// Create the quota iterator to determine if placements would result in
|
||||
// the quota attached to the namespace of the job to go over.
|
||||
// Note: the quota iterator must be the last feasibility iterator before
|
||||
// we upgrade to ranking, or our quota usage will include ineligible
|
||||
// nodes!
|
||||
s.quota = NewQuotaIterator(ctx, s.distinctPropertyConstraint)
|
||||
|
||||
// Upgrade from feasible to rank iterator
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint)
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.quota)
|
||||
|
||||
// Apply the bin packing, this depends on the resources needed
|
||||
// by a particular task group. Enable eviction as system jobs are high
|
||||
@@ -330,10 +333,6 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
|
||||
// balancing across eligible nodes.
|
||||
s.source = NewRandomIterator(ctx, nil)
|
||||
|
||||
// Create the quota iterator to determine if placements would result in the
|
||||
// quota attached to the namespace of the job to go over.
|
||||
s.quota = NewQuotaIterator(ctx, s.source)
|
||||
|
||||
// Attach the job constraints. The job is filled in later.
|
||||
s.jobConstraint = NewConstraintChecker(ctx, nil)
|
||||
|
||||
@@ -366,7 +365,7 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
|
||||
s.taskGroupDevices,
|
||||
s.taskGroupNetwork}
|
||||
avail := []FeasibilityChecker{s.taskGroupCSIVolumes}
|
||||
s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail)
|
||||
s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs, avail)
|
||||
|
||||
// Filter on distinct host constraints.
|
||||
s.distinctHostsConstraint = NewDistinctHostsIterator(ctx, s.wrappedChecks)
|
||||
@@ -374,8 +373,15 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
|
||||
// Filter on distinct property constraints.
|
||||
s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.distinctHostsConstraint)
|
||||
|
||||
// Create the quota iterator to determine if placements would result in
|
||||
// the quota attached to the namespace of the job to go over.
|
||||
// Note: the quota iterator must be the last feasibility iterator before
|
||||
// we upgrade to ranking, or our quota usage will include ineligible
|
||||
// nodes!
|
||||
s.quota = NewQuotaIterator(ctx, s.distinctPropertyConstraint)
|
||||
|
||||
// Upgrade from feasible to rank iterator
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint)
|
||||
rankSource := NewFeasibleRankIterator(ctx, s.quota)
|
||||
|
||||
// Apply the bin packing, this depends on the resources needed
|
||||
// by a particular task group.
|
||||
|
||||
Reference in New Issue
Block a user