mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
scheduler: take all assigned cpu cores into account instead of only those part of the largest lifecycle (#24304)
Fixes a bug in the AllocatedResources.Comparable method, where the scheduler would only take into account the cpusets of the tasks in the largest lifecycle. This could result in overlapping cgroup cpusets. Now we make the distinction between reserved and fungible resources throughout the lifespan of the alloc. In addition, added logging in case of future regressions thus not requiring manual inspection of cgroup files.
This commit is contained in:
3
.changelog/24304.txt
Normal file
3
.changelog/24304.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
scheduler: take all assigned cpu cores into account instead of only those part of the largest lifecycle
|
||||
```
|
||||
@@ -79,10 +79,18 @@ func (p *partition) Reserve(cores *idset.Set[hw.CoreID]) error {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
||||
p.share.RemoveSet(cores)
|
||||
|
||||
// Use the intersection with the usable cores to avoid adding more cores than available.
|
||||
p.reserve.InsertSet(p.usableCores.Intersect(cores))
|
||||
usableCores := p.usableCores.Intersect(cores)
|
||||
|
||||
overlappingCores := p.reserve.Intersect(usableCores)
|
||||
if overlappingCores.Size() > 0 {
|
||||
// COMPAT: prior to Nomad 1.9.X this would silently happen, this should probably return an error instead
|
||||
p.log.Warn("Unable to exclusively reserve the requested cores", "cores", cores, "overlapping_cores", overlappingCores)
|
||||
}
|
||||
|
||||
p.share.RemoveSet(cores)
|
||||
p.reserve.InsertSet(usableCores)
|
||||
|
||||
return p.write()
|
||||
}
|
||||
|
||||
|
||||
@@ -248,6 +248,79 @@ func TestAllocsFit(t *testing.T) {
|
||||
must.Eq(t, 1024, used.Flattened.Memory.MemoryMB)
|
||||
}
|
||||
|
||||
func TestAllocsFit_Cores(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
n := node2k()
|
||||
|
||||
a1 := &Allocation{
|
||||
AllocatedResources: &AllocatedResources{
|
||||
Tasks: map[string]*AllocatedTaskResources{
|
||||
"web": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 500,
|
||||
ReservedCores: []uint16{0},
|
||||
},
|
||||
Memory: AllocatedMemoryResources{
|
||||
MemoryMB: 1024,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
a2 := &Allocation{
|
||||
AllocatedResources: &AllocatedResources{
|
||||
Tasks: map[string]*AllocatedTaskResources{
|
||||
"web-prestart": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 500,
|
||||
ReservedCores: []uint16{1},
|
||||
},
|
||||
Memory: AllocatedMemoryResources{
|
||||
MemoryMB: 1024,
|
||||
},
|
||||
},
|
||||
"web": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 500,
|
||||
ReservedCores: []uint16{0},
|
||||
},
|
||||
Memory: AllocatedMemoryResources{
|
||||
MemoryMB: 1024,
|
||||
},
|
||||
},
|
||||
},
|
||||
TaskLifecycles: map[string]*TaskLifecycleConfig{
|
||||
"web-prestart": {
|
||||
Hook: TaskLifecycleHookPrestart,
|
||||
Sidecar: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Should fit one allocation
|
||||
fit, dim, used, err := AllocsFit(n, []*Allocation{a1}, nil, false)
|
||||
must.NoError(t, err)
|
||||
must.True(t, fit, must.Sprintf("failed for dimension %q", dim))
|
||||
must.Eq(t, 500, used.Flattened.Cpu.CpuShares)
|
||||
must.Eq(t, 1024, used.Flattened.Memory.MemoryMB)
|
||||
|
||||
// Should fit one allocation
|
||||
fit, dim, used, err = AllocsFit(n, []*Allocation{a2}, nil, false)
|
||||
must.NoError(t, err)
|
||||
must.True(t, fit, must.Sprintf("failed for dimension %q", dim))
|
||||
must.Eq(t, 1000, used.Flattened.Cpu.CpuShares)
|
||||
must.Eq(t, 1024, used.Flattened.Memory.MemoryMB)
|
||||
|
||||
// Should not fit both allocations
|
||||
fit, dim, used, err = AllocsFit(n, []*Allocation{a1, a2}, nil, false)
|
||||
must.NoError(t, err)
|
||||
must.False(t, fit)
|
||||
must.Eq(t, dim, "cores")
|
||||
}
|
||||
|
||||
func TestAllocsFit_TerminalAlloc(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
|
||||
@@ -3856,35 +3856,45 @@ func (a *AllocatedResources) Comparable() *ComparableResources {
|
||||
Shared: a.Shared,
|
||||
}
|
||||
|
||||
prestartSidecarTasks := &AllocatedTaskResources{}
|
||||
prestartEphemeralTasks := &AllocatedTaskResources{}
|
||||
main := &AllocatedTaskResources{}
|
||||
poststartTasks := &AllocatedTaskResources{}
|
||||
poststopTasks := &AllocatedTaskResources{}
|
||||
// The lifecycle in which a task could run
|
||||
prestartLifecycle := &AllocatedTaskResources{}
|
||||
mainLifecycle := &AllocatedTaskResources{}
|
||||
stopLifecycle := &AllocatedTaskResources{}
|
||||
|
||||
for taskName, r := range a.Tasks {
|
||||
lc := a.TaskLifecycles[taskName]
|
||||
if lc == nil {
|
||||
main.Add(r)
|
||||
} else if lc.Hook == TaskLifecycleHookPrestart {
|
||||
if lc.Sidecar {
|
||||
prestartSidecarTasks.Add(r)
|
||||
for taskName, taskResources := range a.Tasks {
|
||||
taskLifecycle := a.TaskLifecycles[taskName]
|
||||
fungibleTaskResources := taskResources.Copy()
|
||||
|
||||
// Reserved cores (and their respective bandwidth) are not fungible,
|
||||
// hence we should always include it as part of the Flattened resources.
|
||||
if len(fungibleTaskResources.Cpu.ReservedCores) > 0 {
|
||||
c.Flattened.Cpu.Add(&fungibleTaskResources.Cpu)
|
||||
fungibleTaskResources.Cpu = AllocatedCpuResources{}
|
||||
}
|
||||
|
||||
if taskLifecycle == nil {
|
||||
mainLifecycle.Add(fungibleTaskResources)
|
||||
} else if taskLifecycle.Hook == TaskLifecycleHookPrestart {
|
||||
if taskLifecycle.Sidecar {
|
||||
// These tasks span both the prestart and main lifecycle
|
||||
prestartLifecycle.Add(fungibleTaskResources)
|
||||
mainLifecycle.Add(fungibleTaskResources)
|
||||
} else {
|
||||
prestartEphemeralTasks.Add(r)
|
||||
prestartLifecycle.Add(fungibleTaskResources)
|
||||
}
|
||||
} else if lc.Hook == TaskLifecycleHookPoststart {
|
||||
poststartTasks.Add(r)
|
||||
} else if lc.Hook == TaskLifecycleHookPoststop {
|
||||
poststopTasks.Add(r)
|
||||
} else if taskLifecycle.Hook == TaskLifecycleHookPoststart {
|
||||
mainLifecycle.Add(fungibleTaskResources)
|
||||
} else if taskLifecycle.Hook == TaskLifecycleHookPoststop {
|
||||
stopLifecycle.Add(fungibleTaskResources)
|
||||
}
|
||||
}
|
||||
|
||||
// update this loop to account for lifecycle hook
|
||||
main.Add(poststartTasks)
|
||||
prestartEphemeralTasks.Max(main)
|
||||
prestartEphemeralTasks.Max(poststopTasks)
|
||||
prestartSidecarTasks.Add(prestartEphemeralTasks)
|
||||
c.Flattened.Add(prestartSidecarTasks)
|
||||
// Update the main lifecycle to reflect the largest fungible resource set
|
||||
mainLifecycle.Max(prestartLifecycle)
|
||||
mainLifecycle.Max(stopLifecycle)
|
||||
|
||||
// Add the fungible resources
|
||||
c.Flattened.Add(mainLifecycle)
|
||||
|
||||
// Add network resources that are at the task group level
|
||||
for _, network := range a.Shared.Networks {
|
||||
|
||||
@@ -7763,6 +7763,124 @@ func TestAllocatedResources_Comparable_Flattened(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
allocationResources := AllocatedResources{
|
||||
TaskLifecycles: map[string]*TaskLifecycleConfig{
|
||||
"prestart-task": {
|
||||
Hook: TaskLifecycleHookPrestart,
|
||||
Sidecar: false,
|
||||
},
|
||||
"poststop-task": {
|
||||
Hook: TaskLifecycleHookPoststop,
|
||||
Sidecar: false,
|
||||
},
|
||||
},
|
||||
Tasks: map[string]*AllocatedTaskResources{
|
||||
"prestart-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 2000,
|
||||
},
|
||||
},
|
||||
"main-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 4000,
|
||||
},
|
||||
},
|
||||
"poststop-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 1000,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// The output of Flattened should return the resource required during the execution of the largest lifecycle
|
||||
must.Eq(t, 4000, allocationResources.Comparable().Flattened.Cpu.CpuShares)
|
||||
must.Len(t, 0, allocationResources.Comparable().Flattened.Cpu.ReservedCores)
|
||||
|
||||
allocationResources = AllocatedResources{
|
||||
TaskLifecycles: map[string]*TaskLifecycleConfig{
|
||||
"prestart-task": {
|
||||
Hook: TaskLifecycleHookPrestart,
|
||||
Sidecar: false,
|
||||
},
|
||||
"prestart-sidecar-task": {
|
||||
Hook: TaskLifecycleHookPrestart,
|
||||
Sidecar: true,
|
||||
},
|
||||
"poststop-task": {
|
||||
Hook: TaskLifecycleHookPoststop,
|
||||
Sidecar: false,
|
||||
},
|
||||
},
|
||||
Tasks: map[string]*AllocatedTaskResources{
|
||||
"prestart-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 1000,
|
||||
ReservedCores: []uint16{0},
|
||||
},
|
||||
},
|
||||
"prestart-sidecar-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 2000,
|
||||
ReservedCores: []uint16{1, 2},
|
||||
},
|
||||
},
|
||||
"main-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 2000,
|
||||
ReservedCores: []uint16{3, 4},
|
||||
},
|
||||
},
|
||||
"poststop-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 1000,
|
||||
ReservedCores: []uint16{5},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Reserved core resources are claimed throughout the lifespan of the allocation
|
||||
must.Eq(t, 6000, allocationResources.Comparable().Flattened.Cpu.CpuShares)
|
||||
must.Len(t, 6, allocationResources.Comparable().Flattened.Cpu.ReservedCores)
|
||||
|
||||
allocationResources = AllocatedResources{
|
||||
TaskLifecycles: map[string]*TaskLifecycleConfig{
|
||||
"prestart-task": {
|
||||
Hook: TaskLifecycleHookPrestart,
|
||||
Sidecar: false,
|
||||
},
|
||||
"poststop-task": {
|
||||
Hook: TaskLifecycleHookPoststop,
|
||||
Sidecar: false,
|
||||
},
|
||||
},
|
||||
Tasks: map[string]*AllocatedTaskResources{
|
||||
"prestart-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 1000,
|
||||
},
|
||||
},
|
||||
"main-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 2000,
|
||||
ReservedCores: []uint16{1, 2},
|
||||
},
|
||||
},
|
||||
"poststop-task": {
|
||||
Cpu: AllocatedCpuResources{
|
||||
CpuShares: 1000,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Reserved core resources are claimed throughout the lifespan of the allocation,
|
||||
// but the prestart and poststop task can reuse the CpuShares. It's important to
|
||||
// note that we will only claim 1000 MHz as part of the share slice.
|
||||
must.Eq(t, 3000, allocationResources.Comparable().Flattened.Cpu.CpuShares)
|
||||
must.Len(t, 2, allocationResources.Comparable().Flattened.Cpu.ReservedCores)
|
||||
|
||||
allocationResources = AllocatedResources{
|
||||
TaskLifecycles: map[string]*TaskLifecycleConfig{
|
||||
"prestart-task": {
|
||||
Hook: TaskLifecycleHookPrestart,
|
||||
@@ -7816,8 +7934,8 @@ func TestAllocatedResources_Comparable_Flattened(t *testing.T) {
|
||||
}
|
||||
|
||||
// The output of Flattened should return the resource required during the execution of the largest lifecycle
|
||||
must.Eq(t, 5000, allocationResources.Comparable().Flattened.Cpu.CpuShares)
|
||||
must.Len(t, 5, allocationResources.Comparable().Flattened.Cpu.ReservedCores)
|
||||
must.Eq(t, 9000, allocationResources.Comparable().Flattened.Cpu.CpuShares)
|
||||
must.Len(t, 9, allocationResources.Comparable().Flattened.Cpu.ReservedCores)
|
||||
}
|
||||
|
||||
func requireErrors(t *testing.T, err error, expected ...string) {
|
||||
|
||||
Reference in New Issue
Block a user