diff --git a/api/operator.go b/api/operator.go index f46216f31..5d4254442 100644 --- a/api/operator.go +++ b/api/operator.go @@ -115,7 +115,7 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error { // SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { // SchedulerAlgorithm lets you select between available scheduling algorithms. - SchedulerAlgorithm string + SchedulerAlgorithm SchedulerAlgorithm // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. @@ -149,6 +149,11 @@ type SchedulerSetConfigurationResponse struct { // scheduler to be user-selectable. type SchedulerAlgorithm string +const ( + SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack" + SchedulerAlgorithmSpread SchedulerAlgorithm = "spread" +) + // PreemptionConfig specifies whether preemption is enabled based on scheduler type type PreemptionConfig struct { SystemSchedulerEnabled bool diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index c5d5e232f..da278533b 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -184,6 +184,7 @@ func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) { "default case", nil, structs.SchedulerConfiguration{ + SchedulerAlgorithm: "binpack", PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: true, }, diff --git a/command/agent/command.go b/command/agent/command.go index bb1a2cda2..7546738e0 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -377,6 +377,11 @@ func (c *Command) isValidConfig(config, cmdConfig *Config) bool { c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") } + if err := config.Server.DefaultSchedulerConfig.Validate(); err != nil { + c.Ui.Error(err.Error()) + return false + } + return true } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index cd1284774..157413ec0 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -126,6 +126,7 @@ var basicConfig = &Config{ RetryMaxAttempts: 3, }, DefaultSchedulerConfig: &structs.SchedulerConfiguration{ + SchedulerAlgorithm: "spread", PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: true, BatchSchedulerEnabled: true, diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go index 8abe7cdf5..64d0ea258 100644 --- a/command/agent/operator_endpoint.go +++ b/command/agent/operator_endpoint.go @@ -253,18 +253,18 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing scheduler config: %v", err)) } - if !structs.SchedulerAlgorithmIsValid(conf.SchedulerAlgorithm) { - return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Invalid scheduler algorithm selected.")) - } - args.Config = structs.SchedulerConfiguration{ - SchedulerAlgorithm: conf.SchedulerAlgorithm, + SchedulerAlgorithm: structs.SchedulerAlgorithm(conf.SchedulerAlgorithm), PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: conf.PreemptionConfig.SystemSchedulerEnabled, BatchSchedulerEnabled: conf.PreemptionConfig.BatchSchedulerEnabled, ServiceSchedulerEnabled: conf.PreemptionConfig.ServiceSchedulerEnabled}, } + if err := args.Config.Validate(); err != nil { + return nil, CodedError(http.StatusBadRequest, err.Error()) + } + // Check for cas value params := req.URL.Query() if _, ok := params["cas"]; ok { diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index a4edcfef6..78d8d63ef 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -135,6 +135,8 @@ server { } default_scheduler_config { + scheduler_algorithm = "spread" + preemption_config { batch_scheduler_enabled = true system_scheduler_enabled = true diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 0052b6862..b02e0db88 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -297,6 +297,7 @@ "2.2.2.2" ], "default_scheduler_config": [{ + "scheduler_algorithm": "spread", "preemption_config": [{ "batch_scheduler_enabled": true, "system_scheduler_enabled": true, diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index bb5bb65e8..855ab7ff0 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -148,10 +148,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi return true, "", used, nil } -// ScoreFit is used to score the fit based on the Google work published here: -// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt -// This is equivalent to their BestFit v3 -func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { +func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) { // COMPAT(0.11): Remove in 0.11 reserved := node.ComparableReservedResources() res := node.ComparableResources() @@ -165,8 +162,18 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { } // Compute the free percentage - freePctCpu := 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu) - freePctRam := 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem) + freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu) + freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem) + return freePctCpu, freePctRam +} + +// ScoreFitBinPack computes a fit score to achieve pinbacking behavior. +// Score is in [0, 18] +// +// It's the BestFit v3 on the Google work published here: +// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt +func ScoreFitBinPack(node *Node, util *ComparableResources) float64 { + freePctCpu, freePctRam := computeFreePercentage(node, util) // Total will be "maximized" the smaller the value is. // At 100% utilization, the total is 2, while at 0% util it is 20. @@ -176,9 +183,6 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { // score. Because the floor is 20, we simply use that as an anchor. // This means at a perfect fit, we return 18 as the score. score := 20.0 - total - if algorithm == "spread" { - score = total - 2 - } // Bound the score, just in case // If the score is over 18, that means we've overfit the node. @@ -190,6 +194,24 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { return score } +// ScoreFitBinSpread computes a fit score to achieve spread behavior. +// Score is in [0, 18] +// +// This is equivalent to Worst Fit of +// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt +func ScoreFitSpread(node *Node, util *ComparableResources) float64 { + freePctCpu, freePctRam := computeFreePercentage(node, util) + total := math.Pow(10, freePctCpu) + math.Pow(10, freePctRam) + score := total - 2 + + if score > 18.0 { + score = 18.0 + } else if score < 0 { + score = 0 + } + return score +} + func CopySliceConstraints(s []*Constraint) []*Constraint { l := len(s) if l == 0 { diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index 4302164a6..62236f0cd 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -506,7 +506,7 @@ func TestAllocsFit_Devices(t *testing.T) { } // COMPAT(0.11): Remove in 0.11 -func TestScoreFit_Old(t *testing.T) { +func TestScoreFitBinPack_Old(t *testing.T) { node := &Node{} node.Resources = &Resources{ CPU: 4096, @@ -528,7 +528,7 @@ func TestScoreFit_Old(t *testing.T) { }, }, } - score := ScoreFit(node, util) + score := ScoreFitBinPack(node, util) if score != 18.0 { t.Fatalf("bad: %v", score) } @@ -544,7 +544,7 @@ func TestScoreFit_Old(t *testing.T) { }, }, } - score = ScoreFit(node, util) + score = ScoreFitBinPack(node, util) if score != 0.0 { t.Fatalf("bad: %v", score) } @@ -560,13 +560,13 @@ func TestScoreFit_Old(t *testing.T) { }, }, } - score = ScoreFit(node, util) + score = ScoreFitBinPack(node, util) if score < 10.0 || score > 16.0 { t.Fatalf("bad: %v", score) } } -func TestScoreFit(t *testing.T) { +func TestScoreFitBinPack(t *testing.T) { node := &Node{} node.NodeResources = &NodeResources{ Cpu: NodeCpuResources{ @@ -585,52 +585,53 @@ func TestScoreFit(t *testing.T) { }, } - // Test a perfect fit - util := &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 2048, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 4096, + cases := []struct { + name string + flattened AllocatedTaskResources + binPackScore float64 + spreadScore float64 + }{ + { + name: "almost filled node, but with just enough whole", + flattened: AllocatedTaskResources{ + Cpu: AllocatedCpuResources{CpuShares: 2048}, + Memory: AllocatedMemoryResources{MemoryMB: 4096}, }, + binPackScore: 18, + spreadScore: 0, + }, + { + name: "unutilized node", + flattened: AllocatedTaskResources{ + Cpu: AllocatedCpuResources{CpuShares: 0}, + Memory: AllocatedMemoryResources{MemoryMB: 0}, + }, + binPackScore: 0, + spreadScore: 18, + }, + { + name: "mid-case scnario", + flattened: AllocatedTaskResources{ + Cpu: AllocatedCpuResources{CpuShares: 1024}, + Memory: AllocatedMemoryResources{MemoryMB: 2048}, + }, + binPackScore: 13.675, + spreadScore: 4.325, }, - } - score := ScoreFit(node, util) - if score != 18.0 { - t.Fatalf("bad: %v", score) } - // Test the worst fit - util = &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 0, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 0, - }, - }, - } - score = ScoreFit(node, util) - if score != 0.0 { - t.Fatalf("bad: %v", score) - } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + util := &ComparableResources{Flattened: c.flattened} - // Test a mid-case scenario - util = &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 1024, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 2048, - }, - }, - } - score = ScoreFit(node, util) - if score < 10.0 || score > 16.0 { - t.Fatalf("bad: %v", score) + binPackScore := ScoreFitBinPack(node, util) + require.InDelta(t, c.binPackScore, binPackScore, 0.001, "binpack score") + + spreadScore := ScoreFitSpread(node, util) + require.InDelta(t, c.spreadScore, spreadScore, 0.001, "spread score") + + require.InDelta(t, 18, binPackScore+spreadScore, 0.001, "score sum") + }) } } diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index 101ce6351..38d181176 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -1,6 +1,7 @@ package structs import ( + "fmt" "time" "github.com/hashicorp/raft" @@ -132,28 +133,17 @@ type SchedulerAlgorithm string const ( // SchedulerAlgorithmBinpack indicates that the scheduler should spread // allocations as evenly as possible over the available hardware. - SchedulerAlgorithmBinpack string = "binpack" + SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack" // SchedulerAlgorithmSpread indicates that the scheduler should spread // allocations as evenly as possible over the available hardware. - SchedulerAlgorithmSpread string = "spread" + SchedulerAlgorithmSpread SchedulerAlgorithm = "spread" ) -// SchedulerAlgorithmIsValid validates the given SchedulerAlgorithm string and -// returns true only when a correct algorithm is specified. -func SchedulerAlgorithmIsValid(alg string) bool { - switch alg { - case SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread: - return true - default: - return false - } -} - // SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { // SchedulerAlgorithm lets you select between available scheduling algorithms. - SchedulerAlgorithm string `hcl:"scheduler_algorithm"` + SchedulerAlgorithm SchedulerAlgorithm `hcl:"scheduler_algorithm"` // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. @@ -164,6 +154,28 @@ type SchedulerConfiguration struct { ModifyIndex uint64 } +func (s *SchedulerConfiguration) EffectiveSchedulerAlgorithm() SchedulerAlgorithm { + if s == nil || s.SchedulerAlgorithm == "" { + return SchedulerAlgorithmBinpack + } + + return s.SchedulerAlgorithm +} + +func (s *SchedulerConfiguration) Validate() error { + if s == nil { + return nil + } + + switch s.SchedulerAlgorithm { + case "", SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread: + default: + return fmt.Errorf("invalid scheduler algorithm: %v", s.SchedulerAlgorithm) + } + + return nil +} + // SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration type SchedulerConfigurationResponse struct { // SchedulerConfig contains scheduler config options diff --git a/scheduler/preemption_test.go b/scheduler/preemption_test.go index 798c3985c..f8a43d7d9 100644 --- a/scheduler/preemption_test.go +++ b/scheduler/preemption_test.go @@ -1349,7 +1349,7 @@ func TestPreemption(t *testing.T) { ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions } static := NewStaticRankIterator(ctx, nodes) - binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority) + binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, structs.SchedulerAlgorithmBinpack) job := mock.Job() job.Priority = tc.jobPriority binPackIter.SetJob(job) diff --git a/scheduler/rank.go b/scheduler/rank.go index 6556ab7af..06ef9c452 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -151,20 +151,26 @@ type BinPackIterator struct { source RankIterator evict bool priority int - algorithm string jobId *structs.NamespacedID taskGroup *structs.TaskGroup + scoreFit func(*structs.Node, *structs.ComparableResources) float64 } // NewBinPackIterator returns a BinPackIterator which tries to fit tasks // potentially evicting other tasks based on a given priority. -func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm string) *BinPackIterator { +func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm structs.SchedulerAlgorithm) *BinPackIterator { + + scoreFn := structs.ScoreFitBinPack + if algorithm == structs.SchedulerAlgorithmSpread { + scoreFn = structs.ScoreFitSpread + } + iter := &BinPackIterator{ - ctx: ctx, - source: source, - evict: evict, - priority: priority, - algorithm: algorithm, + ctx: ctx, + source: source, + evict: evict, + priority: priority, + scoreFit: scoreFn, } iter.ctx.Logger().Named("binpack").Trace("NewBinPackIterator created", "algorithm", algorithm) return iter @@ -439,7 +445,7 @@ OUTER: } // Score the fit normally otherwise - fitness := structs.ScoreFit(option.Node, util, iter.algorithm) + fitness := iter.scoreFit(option.Node, util) normalizedFit := fitness / binPackingMaxFitScore option.Scores = append(option.Scores, normalizedFit) iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit) diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index aa1ee01a1..7454d075a 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -107,7 +107,7 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) { }, }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -220,7 +220,7 @@ func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) { }, }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -471,7 +471,7 @@ func TestBinPackIterator_Network_Failure(t *testing.T) { }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -569,7 +569,7 @@ func TestBinPackIterator_PlannedAlloc(t *testing.T) { }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -685,7 +685,7 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { }, }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -805,7 +805,7 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -1110,7 +1110,7 @@ func TestBinPackIterator_Devices(t *testing.T) { } static := NewStaticRankIterator(ctx, []*RankedNode{{Node: c.Node}}) - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(c.TaskGroup) out := binp.Next() diff --git a/scheduler/stack.go b/scheduler/stack.go index d1a484fd9..b9ceb19bb 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -237,11 +237,10 @@ func NewSystemStack(ctx Context) *SystemStack { // by a particular task group. Enable eviction as system jobs are high // priority. _, schedConfig, _ := s.ctx.State().SchedulerConfig() + schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm() enablePreemption := true - schedulerAlgorithm := "binpack" if schedConfig != nil { enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled - schedulerAlgorithm = schedConfig.SchedulerAlgorithm } s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0, schedulerAlgorithm) diff --git a/scheduler/stack_oss.go b/scheduler/stack_oss.go index 8aaa98031..a705b8c73 100644 --- a/scheduler/stack_oss.go +++ b/scheduler/stack_oss.go @@ -61,10 +61,7 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // Apply the bin packing, this depends on the resources needed // by a particular task group. _, schedConfig, _ := s.ctx.State().SchedulerConfig() - schedulerAlgorithm := "binpack" - if schedConfig != nil { - schedulerAlgorithm = schedConfig.SchedulerAlgorithm - } + schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm() s.binPack = NewBinPackIterator(ctx, rankSource, false, 0, schedulerAlgorithm)