From 55523ecf8e498949fbf9014a26eba587937b642f Mon Sep 17 00:00:00 2001 From: tehut Date: Thu, 22 May 2025 12:49:27 -0700 Subject: [PATCH] Add NodeMaxAllocations to client configuration (#25785) * Set MaxAllocations in client config Add NodeAllocationTracker struct to Node struct Evaluate MaxAllocations in AllocsFit function Set up cli config parsing Integrate maxAllocs into AllocatedResources view Co-authored-by: Tim Gross --------- Co-authored-by: Tim Gross --- .changelog/25785.txt | 3 + api/nodes.go | 1 + client/client.go | 2 + client/config/config.go | 4 + command/agent/agent.go | 1 + command/agent/config.go | 7 + command/agent/config_parse_test.go | 5 +- command/agent/config_test.go | 24 ++++ .../test-resources/client_with_maxallocs.hcl | 7 + .../test-resources/client_with_maxallocs.json | 6 + command/agent/testdata/sample0.json | 3 + command/node_status.go | 11 +- nomad/structs/funcs.go | 6 +- nomad/structs/funcs_test.go | 75 ++++++++++ nomad/structs/structs.go | 4 +- scheduler/rank_test.go | 132 ++++++++++++++++++ website/content/docs/commands/node/status.mdx | 33 +++-- website/content/docs/configuration/client.mdx | 4 + 18 files changed, 311 insertions(+), 17 deletions(-) create mode 100644 .changelog/25785.txt create mode 100644 command/agent/test-resources/client_with_maxallocs.hcl create mode 100644 command/agent/test-resources/client_with_maxallocs.json diff --git a/.changelog/25785.txt b/.changelog/25785.txt new file mode 100644 index 000000000..4b044ea5a --- /dev/null +++ b/.changelog/25785.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: add ability to set maximum allocation count by adding node_max_allocs to client configuration +``` diff --git a/api/nodes.go b/api/nodes.go index 1d4cf4e65..75df0957c 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -572,6 +572,7 @@ type Node struct { LastDrain *DrainMetadata CreateIndex uint64 ModifyIndex uint64 + NodeMaxAllocs int } type NodeResources struct { diff --git a/client/client.go b/client/client.go index ef08ff5dc..e3f02e03c 100644 --- a/client/client.go +++ b/client/client.go @@ -1628,6 +1628,8 @@ func (c *Client) setupNode() error { if _, ok := node.Meta[envoy.DefaultTransparentProxyOutboundPortParam]; !ok { node.Meta[envoy.DefaultTransparentProxyOutboundPortParam] = envoy.DefaultTransparentProxyOutboundPort } + // Set NodeMaxAllocs before dynamic configuration is set + node.NodeMaxAllocs = newConfig.NodeMaxAllocs // Since node.Meta will get dynamic metadata merged in, save static metadata // here. diff --git a/client/config/config.go b/client/config/config.go index ae6af7e30..b32956929 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -375,6 +375,10 @@ type Config struct { // ExtraAllocHooks are run with other allocation hooks, mainly for testing. ExtraAllocHooks []interfaces.RunnerHook + + // NodeMaxAllocs is an optional field that sets the maximum number of + // allocations a node can be assigned. Defaults to 0 and ignored if unset. + NodeMaxAllocs int } type APIListenerRegistrar interface { diff --git a/command/agent/agent.go b/command/agent/agent.go index 90606c754..23b83214a 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -787,6 +787,7 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { if agentConfig.Client.NetworkInterface != "" { conf.NetworkInterface = agentConfig.Client.NetworkInterface } + conf.NodeMaxAllocs = agentConfig.Client.NodeMaxAllocs // handle rpc yamux configuration conf.RPCSessionConfig = yamux.DefaultConfig() diff --git a/command/agent/config.go b/command/agent/config.go index a98f2c30b..8b67fda80 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -415,6 +415,10 @@ type ClientConfig struct { // ExtraKeysHCL is used by hcl to surface unexpected keys ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"` + + // NodeMaxAllocs sets the maximum number of allocations per node + // Defaults to 0 and ignored if unset. + NodeMaxAllocs int `hcl:"node_max_allocs"` } func (c *ClientConfig) Copy() *ClientConfig { @@ -2652,6 +2656,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { result.Drain = a.Drain.Merge(b.Drain) result.Users = a.Users.Merge(b.Users) + if b.NodeMaxAllocs != 0 { + result.NodeMaxAllocs = b.NodeMaxAllocs + } return &result } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 485af4643..d7d8b13fc 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -696,7 +696,10 @@ var sample0 = &Config{ RPC: "host.example.com", Serf: "host.example.com", }, - Client: &ClientConfig{ServerJoin: &ServerJoin{}}, + Client: &ClientConfig{ + ServerJoin: &ServerJoin{}, + NodeMaxAllocs: 5, + }, Server: &ServerConfig{ Enabled: true, BootstrapExpect: 3, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 201868c78..ab77b4388 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -1872,3 +1872,27 @@ func Test_mergeKEKProviderConfigs(t *testing.T) { }, }, result) } + +func TestConfig_LoadClientNodeMaxAllocs(t *testing.T) { + ci.Parallel(t) + testCases := []struct { + fileName string + }{ + { + fileName: "test-resources/client_with_maxallocs.hcl", + }, + { + fileName: "test-resources/client_with_maxallocs.json", + }, + } + for _, tc := range testCases { + t.Run("minimal client expect defaults", func(t *testing.T) { + defaultConfig := DefaultConfig() + agentConfig, err := LoadConfig(tc.fileName) + must.NoError(t, err) + agentConfig = defaultConfig.Merge(agentConfig) + must.Eq(t, 5, agentConfig.Client.NodeMaxAllocs) + }) + } + +} diff --git a/command/agent/test-resources/client_with_maxallocs.hcl b/command/agent/test-resources/client_with_maxallocs.hcl new file mode 100644 index 000000000..b0a188c2a --- /dev/null +++ b/command/agent/test-resources/client_with_maxallocs.hcl @@ -0,0 +1,7 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +client { + enabled = true + node_max_allocs = 5 +} diff --git a/command/agent/test-resources/client_with_maxallocs.json b/command/agent/test-resources/client_with_maxallocs.json new file mode 100644 index 000000000..65f869c55 --- /dev/null +++ b/command/agent/test-resources/client_with_maxallocs.json @@ -0,0 +1,6 @@ +{ + "client": { + "enabled": true, + "node_max_allocs": 5 + } +} diff --git a/command/agent/testdata/sample0.json b/command/agent/testdata/sample0.json index a836f93c0..1e2fa06e2 100644 --- a/command/agent/testdata/sample0.json +++ b/command/agent/testdata/sample0.json @@ -45,6 +45,9 @@ "client_auto_join": false, "token": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" }, + "client": { + "node_max_allocs": 5 + }, "data_dir": "/opt/data/nomad/data", "datacenter": "dc1", "enable_syslog": true, diff --git a/command/node_status.go b/command/node_status.go index 7125c0af4..df31b3c02 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -954,16 +954,21 @@ func getAllocatedResources(client *api.Client, runningAllocs []*api.Allocation, mem += *alloc.Resources.MemoryMB disk += *alloc.Resources.DiskMB } + allocCount := strconv.Itoa(len(runningAllocs)) + if node.NodeMaxAllocs != 0 { + allocCount = fmt.Sprintf("%d/%d", len(runningAllocs), node.NodeMaxAllocs) + } resources := make([]string, 2) - resources[0] = "CPU|Memory|Disk" - resources[1] = fmt.Sprintf("%d/%d MHz|%s/%s|%s/%s", + resources[0] = "CPU|Memory|Disk|Alloc Count" + resources[1] = fmt.Sprintf("%d/%d MHz|%s/%s|%s/%s|%s", cpu, *total.CPU, humanize.IBytes(uint64(mem*bytesPerMegabyte)), humanize.IBytes(uint64(*total.MemoryMB*bytesPerMegabyte)), humanize.IBytes(uint64(disk*bytesPerMegabyte)), - humanize.IBytes(uint64(*total.DiskMB*bytesPerMegabyte))) + humanize.IBytes(uint64(*total.DiskMB*bytesPerMegabyte)), + allocCount) return resources } diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index 32000467b..7d5bebf45 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -141,7 +141,11 @@ func (a TerminalByNodeByName) Get(nodeID, name string) (*Allocation, bool) { func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevices bool) (bool, string, *ComparableResources, error) { // Compute the allocs' utilization from zero used := new(ComparableResources) - + if node.NodeMaxAllocs != 0 { + if node.NodeMaxAllocs < len(allocs) { + return false, "max allocation exceeded", used, fmt.Errorf("plan exceeds max allocation") + } + } reservedCores := map[uint16]struct{}{} var coreOverlap bool diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index 1f27e729d..269e5c899 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -716,6 +716,81 @@ func TestScoreFitBinPack(t *testing.T) { } } +func TestAllocsFit_MaxNodeAllocs(t *testing.T) { + ci.Parallel(t) + baseAlloc := &Allocation{ + AllocatedResources: &AllocatedResources{ + Tasks: map[string]*AllocatedTaskResources{ + "web": { + Cpu: AllocatedCpuResources{ + CpuShares: 1000, + ReservedCores: []uint16{}, + }, + Memory: AllocatedMemoryResources{ + MemoryMB: 1024, + }, + }, + }, + Shared: AllocatedSharedResources{ + DiskMB: 5000, + Networks: Networks{ + { + Mode: "host", + IP: "10.0.0.1", + ReservedPorts: []Port{{Label: "main", Value: 8000}}, + }, + }, + Ports: AllocatedPorts{ + { + Label: "main", + Value: 8000, + HostIP: "10.0.0.1", + }, + }, + }, + }, + } + + testCases := []struct { + name string + allocations []*Allocation + expectErr bool + maxAllocs int + }{ + { + name: "happy_path", + allocations: []*Allocation{baseAlloc}, + expectErr: false, + maxAllocs: 2, + }, + { + name: "too many allocs", + allocations: []*Allocation{baseAlloc, baseAlloc, baseAlloc}, + expectErr: true, + maxAllocs: 2, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + n := node2k() + n.NodeMaxAllocs = tc.maxAllocs + fit, dim, used, err := AllocsFit(n, tc.allocations, nil, false) + if !tc.expectErr { + must.NoError(t, err) + must.True(t, fit) + must.Eq(t, 1000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 1024, used.Flattened.Memory.MemoryMB) + } else { + must.False(t, fit) + must.StrContains(t, dim, "max allocation exceeded") + must.ErrorContains(t, err, "plan exceeds max allocation") + must.Eq(t, 0, used.Flattened.Cpu.CpuShares) + must.Eq(t, 0, used.Flattened.Memory.MemoryMB) + } + }) + } +} func TestACLPolicyListHash(t *testing.T) { ci.Parallel(t) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f781d506a..a872927b0 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2163,6 +2163,9 @@ type Node struct { // LastDrain contains metadata about the most recent drain operation LastDrain *DrainMetadata + // NodeMaxAllocs defaults to 0 unless set in the client config + NodeMaxAllocs int + // LastMissedHeartbeatIndex stores the Raft index when the node last missed // a heartbeat. It resets to zero once the node is marked as ready again. LastMissedHeartbeatIndex uint64 @@ -2325,7 +2328,6 @@ func (n *Node) HasEvent(msg string) bool { // Stub returns a summarized version of the node func (n *Node) Stub(fields *NodeStubFields) *NodeListStub { - addr, _, _ := net.SplitHostPort(n.HTTPAddr) s := &NodeListStub{ diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index 410a75211..addf2336a 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -2081,6 +2081,138 @@ func TestBinPackIterator_Device_Failure_With_Eviction(t *testing.T) { must.Eq(t, 1, ctx.metrics.DimensionExhausted["devices: no devices match request"]) } +// Tests that bin packing iterator will not place workloads on nodes +// that would go over a designated MaxAlloc value +func TestBinPackIterator_MaxAlloc(t *testing.T) { + state, ctx := testContext(t) + + taskGen := func(name string) *structs.Task { + return &structs.Task{ + Name: name, + Resources: &structs.Resources{}, + } + } + nodes := []*RankedNode{ + { + Node: &structs.Node{ + ID: uuid.Generate(), + NodeResources: &structs.NodeResources{ + Processors: processorResources2048, + Cpu: legacyCpuResources2048, + Memory: structs.NodeMemoryResources{ + MemoryMB: 2048, + }, + }, + }, + }, + { + Node: &structs.Node{ + ID: uuid.Generate(), + NodeResources: &structs.NodeResources{ + Processors: processorResources2048, + Cpu: legacyCpuResources2048, + Memory: structs.NodeMemoryResources{ + MemoryMB: 2048, + }, + }, + }, + }, + } + // Add 1 existing allocation to each node + j1, j2 := mock.Job(), mock.Job() + alloc1 := &structs.Allocation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + EvalID: uuid.Generate(), + NodeID: nodes[0].Node.ID, + JobID: j1.ID, + Job: j1, + AllocatedResources: &structs.AllocatedResources{}, + DesiredStatus: structs.AllocDesiredStatusRun, + ClientStatus: structs.AllocClientStatusPending, + TaskGroup: "web", + } + alloc2 := &structs.Allocation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + EvalID: uuid.Generate(), + NodeID: nodes[1].Node.ID, + JobID: j2.ID, + Job: j2, + AllocatedResources: &structs.AllocatedResources{}, + DesiredStatus: structs.AllocDesiredStatusRun, + ClientStatus: structs.AllocClientStatusPending, + TaskGroup: "web", + } + must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(alloc1.JobID))) + must.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID))) + must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc1, alloc2})) + + testCases := []struct { + name string + maxAllocNode1 int + maxAllocNode2 int + tasks []*structs.Task + tasksOn1 int + nodesPlaced int + noNodes bool + }{ + { + name: "both_nodes", + maxAllocNode1: 2, + maxAllocNode2: 2, + tasks: []*structs.Task{ + taskGen("web1"), + taskGen("web2"), + taskGen("web3")}, + nodesPlaced: 2, + }, + { + name: "only_node2", + maxAllocNode1: 1, + maxAllocNode2: 2, + tasks: []*structs.Task{ + taskGen("web1"), + taskGen("web2"), + taskGen("web3")}, + nodesPlaced: 1, + }, + { + name: "no_nodes", + maxAllocNode1: 1, + maxAllocNode2: 1, + tasks: []*structs.Task{ + taskGen("web1"), + taskGen("web2"), + taskGen("web3")}, + nodesPlaced: 0, + noNodes: true, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // add allocation limits + nodes[0].Node.NodeMaxAllocs = tc.maxAllocNode1 + nodes[1].Node.NodeMaxAllocs = tc.maxAllocNode2 + static := NewStaticRankIterator(ctx, nodes) + + // Create task group with empty resource sets + taskGroup := &structs.TaskGroup{ + EphemeralDisk: &structs.EphemeralDisk{}, + Tasks: tc.tasks, + } + // Create BinPackIterator and evaluate tasks + binp := NewBinPackIterator(ctx, static, false, 0) + binp.SetTaskGroup(taskGroup) + binp.SetSchedulerConfiguration(testSchedulerConfig) + scoreNorm := NewScoreNormalizationIterator(ctx, binp) + + // Place tasks + out := collectRanked(scoreNorm) + must.Len(t, tc.nodesPlaced, out) + }) + } +} func TestJobAntiAffinity_PlannedAlloc(t *testing.T) { _, ctx := testContext(t) nodes := []*RankedNode{ diff --git a/website/content/docs/commands/node/status.mdx b/website/content/docs/commands/node/status.mdx index 98f98c7e8..3909fa34b 100644 --- a/website/content/docs/commands/node/status.mdx +++ b/website/content/docs/commands/node/status.mdx @@ -95,13 +95,22 @@ f35be281-85a5-d1e6-d268-6e8a6f0684df **NOTE**: `-quiet` cannot be used in conjunction with `-verbose` or `-json`. +List view with verbose: + +```shell-session +$ nomad node status -verbose +ID Node Pool DC Name Class Address Version Drain Eligibility Status +4d2ba53b default dc1 node1 127.0.0.1 1.10.1 false eligible ready +34dfba32 dev dc1 node2 127.0.0.1 1.10.1 false eligible ready +``` + List view, with running allocations: ```shell-session $ nomad node status -allocs -ID Node Pool DC Name Class Drain Eligibility Status Running Allocs -4d2ba53b default dc1 node1 false eligible ready 1 -34dfba32 dev dc1 node2 false eligible ready 3 +ID Node Pool DC Name Class Address Version Drain Eligibility Status Running Allocs +4d2ba53b default dc1 node1 127.0.0.1 1.10.1 false eligible ready 1 +34dfba32 dev dc1 node2 127.0.0.1 1.10.1 false eligible ready 3 ``` Single-node view in short mode: @@ -120,6 +129,7 @@ Uptime = 17h2m25s Allocations ID Eval ID Job ID Task Group Desired Status Client Status 0b8b9e37 8bf94335 example cache run running + ``` Full output for a single node: @@ -150,8 +160,8 @@ Time Subsystem Message 2018-03-29T17:23:42Z Cluster Node registered Allocated Resources -CPU Memory Disk -500/2600 MHz 256 MiB/2.0 GiB 300 MiB/32 GiB +CPU Memory Disk Alloc Count +500/2600 MHz 256 MiB/2.0 GiB 300 MiB/32 GiB 1/50 Allocation Resource Utilization CPU Memory @@ -194,8 +204,8 @@ Time Subsystem Message 2018-03-29T17:23:42Z Cluster Node registered Allocated Resources -CPU Memory Disk -2500/2600 MHz 1.3 GiB/2.0 GiB 1.5 GiB/32 GiB +CPU Memory Disk Alloc Count +2500/2600 MHz 1.3 GiB/2.0 GiB 1.5 GiB/32 GiB 5/50 Allocation Resource Utilization CPU Memory @@ -264,9 +274,10 @@ Time Subsystem Message 2018-03-29T17:24:42Z Driver: docker Driver docker is not detected 2018-03-29T17:23:42Z Cluster Node registered + Allocated Resources -CPU Memory Disk -2500/2600 MHz 1.3 GiB/2.0 GiB 1.5 GiB/32 GiB +CPU Memory Disk Alloc Count +2500/2600 MHz 1.3 GiB/2.0 GiB 1.5 GiB/32 GiB 5/50 Allocation Resource Utilization CPU Memory @@ -354,8 +365,8 @@ Time Subsystem Message Details 2018-03-29T17:23:42Z Cluster Node registered Allocated Resources -CPU Memory Disk -2500/2600 MHz 1.3 GiB/2.0 GiB 1.5 GiB/32 GiB +CPU Memory Disk Alloc Count +2500/2600 MHz 1.3 GiB/2.0 GiB 1.5 GiB/32 GiB 0/50 Allocation Resource Utilization CPU Memory diff --git a/website/content/docs/configuration/client.mdx b/website/content/docs/configuration/client.mdx index 0e1de8920..23e586cfc 100644 --- a/website/content/docs/configuration/client.mdx +++ b/website/content/docs/configuration/client.mdx @@ -112,6 +112,10 @@ client { placement as an [`affinity`][] or [`constraint`][] attribute and other places where [variable interpolation][runtime_var_interpolation] is supported. +- `node_max_allocs` `(int: 0)` - Specifies the maximum number of allocations + that may be scheduled on a client node and is not enforced if unset. This + value can be seen in `nomad node status` under Allocated Resources. + - `node_pool` `(string: "default")` - Specifies the node pool in which the client is registered. If the node pool does not exist yet, it will be created automatically if the node registers in the authoritative region. In