mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
* client: refactor cpuset partitioning This PR updates the way Nomad client manages the split between tasks that make use of resources.cpus vs. resources.cores. Previously, each task was explicitly assigned which CPU cores they were able to run on. Every time a task was started or destroyed, all other tasks' cpusets would need to be updated. This was inefficient and would crush the Linux kernel when a client would try to run ~400 or so tasks. Now, we make use of cgroup heirarchy and cpuset inheritence to efficiently manage cpusets. * cr: tweaks for feedback
56 lines
1.4 KiB
Go
56 lines
1.4 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package allocrunner
|
|
|
|
import (
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/lib/cgroupslib"
|
|
"github.com/hashicorp/nomad/client/lib/idset"
|
|
"github.com/hashicorp/nomad/client/lib/numalib/hw"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
const (
|
|
cpuPartsHookName = "cpuparts_hook"
|
|
)
|
|
|
|
// cpuPartsHooks is responsible for managing cpuset partitioning on Linux
|
|
// nodes. This mechanism works by segregating tasks that make use of "cpu" vs.
|
|
// "cores" resources. Tasks that make use of "cpu" resource actually make use
|
|
// of shared cores that have not been reserved. The scheduler ensures enough
|
|
// cores on a node are not reserved such that all tasks have the minimum amount
|
|
// of cpu bandwidth they requested.
|
|
type cpuPartsHook struct {
|
|
logger hclog.Logger
|
|
allocID string
|
|
|
|
reservations *idset.Set[hw.CoreID]
|
|
partitions cgroupslib.Partition
|
|
}
|
|
|
|
func newCPUPartsHook(
|
|
logger hclog.Logger,
|
|
partitions cgroupslib.Partition,
|
|
alloc *structs.Allocation,
|
|
) *cpuPartsHook {
|
|
return &cpuPartsHook{
|
|
logger: logger.Named(cpuPartsHookName),
|
|
allocID: alloc.ID,
|
|
partitions: partitions,
|
|
reservations: alloc.ReservedCores(),
|
|
}
|
|
}
|
|
|
|
func (h *cpuPartsHook) Name() string {
|
|
return cpuPartsHookName
|
|
}
|
|
|
|
func (h *cpuPartsHook) Prerun() error {
|
|
return h.partitions.Reserve(h.reservations)
|
|
}
|
|
|
|
func (h *cpuPartsHook) Postrun() error {
|
|
return h.partitions.Release(h.reservations)
|
|
}
|