mirror of
https://github.com/kemko/nomad.git
synced 2026-01-02 16:35:44 +03:00
In #20619 we overhauled how we were gathering stats for Windows processes. Unlike in Linux where we can ask for processes in a cgroup, on Windows we have to make a single expensive syscall to get all the processes and then build the tree ourselves. Our algorithm to do so is recursive and quadratic in both steps and space with the number of processes on the host. For busy hosts this hits the stack limit and panics the Nomad client. We already build a map of parent PID to PID, so modify this to be a map of parent PID to slice of children and then traverse that tree only from the root we care about (the executor PID). This moves the allocations to the heap but makes the stats gathering linear in steps and space required. This changeset also moves as much of this code as possible into an area not conditionally-compiled by OS, as the tagged test file was not being run in CI. Fixes: https://github.com/hashicorp/nomad/issues/23984
133 lines
3.4 KiB
Go
133 lines
3.4 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package procstats
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-set/v3"
|
|
"github.com/hashicorp/nomad/client/lib/cpustats"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
"github.com/mitchellh/go-ps"
|
|
)
|
|
|
|
var (
|
|
// The statistics the basic executor exposes
|
|
ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
|
|
ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
|
|
)
|
|
|
|
// ProcessID is an alias for int; it just helps us identify where PIDs from
|
|
// the kernel are being used.
|
|
type ProcessID = int
|
|
|
|
// ProcUsages is a map from PID to the resources that process is consuming.
|
|
//
|
|
// The pid type is a string because that's how Nomad wants it.
|
|
type ProcUsages map[string]*drivers.ResourceUsage
|
|
|
|
// A ProcessStats is anything (i.e. a task driver) that implements StatProcesses
|
|
// for gathering CPU and memory process stats for all processes associated with
|
|
// a task.
|
|
type ProcessStats interface {
|
|
StatProcesses() ProcUsages
|
|
}
|
|
|
|
// A ProcessList is anything (i.e. a task driver) that implements ListProcesses
|
|
// for gathering the list of process IDs associated with a task.
|
|
type ProcessList interface {
|
|
ListProcesses() set.Collection[ProcessID]
|
|
}
|
|
|
|
// Aggregate combines a given ProcUsages with the Tracker for the Client.
|
|
func Aggregate(systemStats *cpustats.Tracker, procStats ProcUsages) *drivers.TaskResourceUsage {
|
|
ts := time.Now().UTC().UnixNano()
|
|
var (
|
|
systemModeCPU, userModeCPU, percent float64
|
|
totalRSS, totalSwap uint64
|
|
)
|
|
|
|
for _, pidStat := range procStats {
|
|
systemModeCPU += pidStat.CpuStats.SystemMode
|
|
userModeCPU += pidStat.CpuStats.UserMode
|
|
percent += pidStat.CpuStats.Percent
|
|
|
|
totalRSS += pidStat.MemoryStats.RSS
|
|
totalSwap += pidStat.MemoryStats.Swap
|
|
}
|
|
|
|
totalCPU := &drivers.CpuStats{
|
|
SystemMode: systemModeCPU,
|
|
UserMode: userModeCPU,
|
|
Percent: percent,
|
|
Measured: ExecutorBasicMeasuredCpuStats,
|
|
TotalTicks: systemStats.TicksConsumed(percent),
|
|
}
|
|
|
|
totalMemory := &drivers.MemoryStats{
|
|
RSS: totalRSS,
|
|
Swap: totalSwap,
|
|
Measured: ExecutorBasicMeasuredMemStats,
|
|
}
|
|
|
|
resourceUsage := drivers.ResourceUsage{
|
|
MemoryStats: totalMemory,
|
|
CpuStats: totalCPU,
|
|
}
|
|
return &drivers.TaskResourceUsage{
|
|
ResourceUsage: &resourceUsage,
|
|
Timestamp: ts,
|
|
Pids: procStats,
|
|
}
|
|
}
|
|
|
|
func list(executorPID int, processes func() ([]ps.Process, error)) (set.Collection[ProcessID], int) {
|
|
family := set.From([]int{executorPID})
|
|
|
|
all, err := processes()
|
|
if err != nil {
|
|
return family, 0
|
|
}
|
|
|
|
parents, examined := mapping(all)
|
|
examined += gather(family, parents, executorPID)
|
|
|
|
return family, examined
|
|
}
|
|
|
|
func gather(family set.Collection[int], parents map[int]set.Collection[int], parent int) int {
|
|
examined := 0
|
|
candidates, ok := parents[parent]
|
|
if !ok {
|
|
return examined
|
|
}
|
|
for _, candidate := range candidates.Slice() {
|
|
examined++
|
|
family.Insert(candidate)
|
|
examined += gather(family, parents, candidate)
|
|
}
|
|
|
|
return examined
|
|
}
|
|
|
|
// mapping builds a reverse map of parent to children
|
|
func mapping(all []ps.Process) (map[int]set.Collection[int], int) {
|
|
|
|
parents := map[int]set.Collection[int]{}
|
|
examined := 0
|
|
|
|
for _, candidate := range all {
|
|
if candidate != nil {
|
|
examined++
|
|
if children, ok := parents[candidate.PPid()]; ok {
|
|
children.Insert(candidate.Pid())
|
|
} else {
|
|
parents[candidate.PPid()] = set.From([]int{candidate.Pid()})
|
|
}
|
|
}
|
|
}
|
|
|
|
return parents, examined
|
|
}
|