core: fix node reservation scoring

The BinPackIter accounted for node reservations twice when scoring nodes which could bias scores toward nodes with reservations. Pseudo-code for previous algorithm: ``` proposed = reservedResources + sum(allocsResources) available = nodeResources - reservedResources score = 1 - (proposed / available) ``` The node's reserved resources are added to the total resources used by allocations, and then the node's reserved resources are later substracted from the node's overall resources. The new algorithm is: ``` proposed = sum(allocResources) available = nodeResources - reservedResources score = 1 - (proposed / available) ``` The node's reserved resources are no longer added to the total resources used by allocations. My guess as to how this bug happened is that the resource utilization variable (`util`) is calculated and returned by the `AllocsFit` function which needs to take reserved resources into account as a basic feasibility check. To avoid re-calculating alloc resource usage (because there may be a large number of allocs), we reused `util` in the `ScoreFit` function. `ScoreFit` properly accounts for reserved resources by subtracting them from the node's overall resources. However since `util` _also_ took reserved resources into account the score would be incorrect. Prior to the fix the added test output: ``` Node: reserved Score: 1.0000 Node: reserved2 Score: 1.0000 Node: no-reserved Score: 0.9741 ``` The scores being 1.0 for *both* nodes with reserved resources is a good hint something is wrong as they should receive different scores. Upon further inspection the double accounting of reserved resources caused their scores to be >1.0 and clamped. After the fix the added test outputs: ``` Node: no-reserved Score: 0.9741 Node: reserved Score: 0.9480 Node: reserved2 Score: 0.8717 ```
2026-01-01 16:05:42 +03:00 · 2020-04-15 14:24:47 -07:00
parent 417f50f925
commit 2781717ad6
3 changed files with 142 additions and 25 deletions
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -101,12 +101,9 @@ func FilterTerminalAllocs(allocs []*Allocation) ([]*Allocation, map[string]*Allo
 // ensured there are no collisions. If checkDevices is set to true, we check if
 // there is a device oversubscription.
 func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevices bool) (bool, string, *ComparableResources, error) {
-	// Compute the utilization from zero
+	// Compute the allocs' utilization from zero
 	used := new(ComparableResources)

-	// Add the reserved resources of the node
-	used.Add(node.ComparableReservedResources())
-
 	// For each alloc, add the resources
 	for _, alloc := range allocs {
 		// Do not consider the resource impact of terminal allocations
@@ -117,9 +114,11 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
 		used.Add(alloc.ComparableResources())
 	}

-	// Check that the node resources are a super set of those
-	// that are being allocated
-	if superset, dimension := node.ComparableResources().Superset(used); !superset {
+	// Check that the node resources (after subtracting reserved) are a
+	// super set of those that are being allocated
+	available := node.ComparableResources()
+	available.Subtract(node.ComparableReservedResources())
+	if superset, dimension := available.Superset(used); !superset {
 		return false, dimension, used, nil
 	}

--- a/nomad/structs/funcs_test.go
+++ b/nomad/structs/funcs_test.go
@@ -188,8 +188,8 @@ func TestAllocsFit_Old(t *testing.T) {
 	require.True(fit)

 	// Sanity check the used resources
-	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

 	// Should not fit second allocation
 	fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
@@ -197,8 +197,8 @@ func TestAllocsFit_Old(t *testing.T) {
 	require.False(fit)

 	// Sanity check the used resources
-	require.EqualValues(3000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(3072, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
 }

 // COMPAT(0.11): Remove in 0.11
@@ -255,8 +255,8 @@ func TestAllocsFit_TerminalAlloc_Old(t *testing.T) {
 	require.True(fit)

 	// Sanity check the used resources
-	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

 	// Should fit second allocation since it is terminal
 	a2 := a1.Copy()
@@ -266,8 +266,8 @@ func TestAllocsFit_TerminalAlloc_Old(t *testing.T) {
 	require.True(fit)

 	// Sanity check the used resources
-	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
 }

 func TestAllocsFit(t *testing.T) {
@@ -340,8 +340,8 @@ func TestAllocsFit(t *testing.T) {
 	require.True(fit)

 	// Sanity check the used resources
-	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

 	// Should not fit second allocation
 	fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
@@ -349,8 +349,8 @@ func TestAllocsFit(t *testing.T) {
 	require.False(fit)

 	// Sanity check the used resources
-	require.EqualValues(3000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(3072, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
 }

 func TestAllocsFit_TerminalAlloc(t *testing.T) {
@@ -424,8 +424,8 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) {
 	require.True(fit)

 	// Sanity check the used resources
-	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

 	// Should fit second allocation since it is terminal
 	a2 := a1.Copy()
@@ -435,8 +435,8 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) {
 	require.True(fit, dim)

 	// Sanity check the used resources
-	require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
-	require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
+	require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
+	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
 }

 // Tests that AllocsFit detects device collisions
--- a/scheduler/rank_test.go
+++ b/scheduler/rank_test.go
@@ -1,6 +1,7 @@
 package scheduler

 import (
+	"sort"
 	"testing"

 	"github.com/hashicorp/nomad/helper/uuid"
@@ -122,11 +123,128 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) {
 	if out[0].FinalScore != 1.0 {
 		t.Fatalf("Bad Score: %v", out[0].FinalScore)
 	}
-	if out[1].FinalScore < 0.75 || out[1].FinalScore > 0.95 {
+	if out[1].FinalScore < 0.50 || out[1].FinalScore > 0.60 {
 		t.Fatalf("Bad Score: %v", out[1].FinalScore)
 	}
 }

+// TestBinPackIterator_NoExistingAlloc_MixedReserve asserts that node's with
+// reserved resources are scored equivalent to as if they had a lower amount of
+// resources.
+func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) {
+	_, ctx := testContext(t)
+	nodes := []*RankedNode{
+		{
+			// Best fit
+			Node: &structs.Node{
+				Name: "no-reserved",
+				NodeResources: &structs.NodeResources{
+					Cpu: structs.NodeCpuResources{
+						CpuShares: 1100,
+					},
+					Memory: structs.NodeMemoryResources{
+						MemoryMB: 1100,
+					},
+				},
+			},
+		},
+		{
+			// Not best fit if reserve is calculated properly
+			Node: &structs.Node{
+				Name: "reserved",
+				NodeResources: &structs.NodeResources{
+					Cpu: structs.NodeCpuResources{
+						CpuShares: 2000,
+					},
+					Memory: structs.NodeMemoryResources{
+						MemoryMB: 2000,
+					},
+				},
+				ReservedResources: &structs.NodeReservedResources{
+					Cpu: structs.NodeReservedCpuResources{
+						CpuShares: 800,
+					},
+					Memory: structs.NodeReservedMemoryResources{
+						MemoryMB: 800,
+					},
+				},
+			},
+		},
+		{
+			// Even worse fit due to reservations
+			Node: &structs.Node{
+				Name: "reserved2",
+				NodeResources: &structs.NodeResources{
+					Cpu: structs.NodeCpuResources{
+						CpuShares: 2000,
+					},
+					Memory: structs.NodeMemoryResources{
+						MemoryMB: 2000,
+					},
+				},
+				ReservedResources: &structs.NodeReservedResources{
+					Cpu: structs.NodeReservedCpuResources{
+						CpuShares: 500,
+					},
+					Memory: structs.NodeReservedMemoryResources{
+						MemoryMB: 500,
+					},
+				},
+			},
+		},
+		{
+			Node: &structs.Node{
+				Name: "overloaded",
+				NodeResources: &structs.NodeResources{
+					Cpu: structs.NodeCpuResources{
+						CpuShares: 900,
+					},
+					Memory: structs.NodeMemoryResources{
+						MemoryMB: 900,
+					},
+				},
+			},
+		},
+	}
+	static := NewStaticRankIterator(ctx, nodes)
+
+	taskGroup := &structs.TaskGroup{
+		EphemeralDisk: &structs.EphemeralDisk{},
+		Tasks: []*structs.Task{
+			{
+				Name: "web",
+				Resources: &structs.Resources{
+					CPU:      1000,
+					MemoryMB: 1000,
+				},
+			},
+		},
+	}
+	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp.SetTaskGroup(taskGroup)
+
+	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
+
+	out := collectRanked(scoreNorm)
+
+	// Sort descending (highest score to lowest) and log for debugging
+	sort.Slice(out, func(i, j int) bool { return out[i].FinalScore > out[j].FinalScore })
+	for i := range out {
+		t.Logf("Node: %-12s Score: %-1.4f", out[i].Node.Name, out[i].FinalScore)
+	}
+
+	// 3 nodes should be feasible
+	require.Len(t, out, 3)
+
+	// Node without reservations is the best fit
+	require.Equal(t, nodes[0].Node.Name, out[0].Node.Name)
+
+	// Node with smallest remaining resources ("best fit") should get a
+	// higher score than node with more remaining resources ("worse fit")
+	require.Equal(t, nodes[1].Node.Name, out[1].Node.Name)
+	require.Equal(t, nodes[2].Node.Name, out[2].Node.Name)
+}
+
 // Tests bin packing iterator with network resources at task and task group level
 func TestBinPackIterator_Network_Success(t *testing.T) {
 	_, ctx := testContext(t)
@@ -239,7 +357,7 @@ func TestBinPackIterator_Network_Success(t *testing.T) {
 	// First node should have a perfect score
 	require.Equal(1.0, out[0].FinalScore)

-	if out[1].FinalScore < 0.75 || out[1].FinalScore > 0.95 {
+	if out[1].FinalScore < 0.50 || out[1].FinalScore > 0.60 {
 		t.Fatalf("Bad Score: %v", out[1].FinalScore)
 	}