Files
nomad/drivers/shared/executor/procstats/list_test.go
Tim Gross 77c8acb422 telemetry: fix excessive CPU consumption in executor (#25870)
Collecting metrics from processes is expensive, especially on platforms like
Windows. The executor code has a 5s cache of stats to ensure that we don't
thrash syscalls on nodes running many allocations. But the timestamp used to
calculate TTL of this cache was never being set, so we were always treating it
as expired. This causes excess CPU utilization on client nodes.

Ensure that when we fill the cache, we set the timestamp. In testing on Windows,
this reduces exector CPU overhead by roughly 75%.

This changeset includes two other related items:

* The `telemetry.publish_allocation_metrics` field correctly prevents a node
  from publishing metrics, but the stats hook on the taskrunner still collects
  the metrics, which can be expensive. Thread the configuration value into the
  stats hook so that we don't collect if `telemetry.publish_allocation_metrics =
  false`.

* The `linuxProcStats` type in the executor's `procstats` package is misnamed as
  a result of a couple rounds of refactoring. It's used by all task executors,
  not just Linux. Rename this and move a comment about how Windows processes are
  listed so that the comment is closer to where the logic is implemented.

Fixes: https://github.com/hashicorp/nomad/issues/23323
Fixes: https://hashicorp.atlassian.net/browse/NMD-455
2025-05-19 09:24:13 -04:00

105 lines
1.9 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package procstats
import (
"math/rand"
"testing"
"github.com/mitchellh/go-ps"
"github.com/shoenig/test/must"
)
type mockProcess struct {
pid int
ppid int
}
func (p *mockProcess) Pid() int {
return p.pid
}
func (p *mockProcess) PPid() int {
return p.ppid
}
func (p *mockProcess) Executable() string {
return ""
}
func mockProc(pid, ppid int) *mockProcess {
return &mockProcess{pid: pid, ppid: ppid}
}
func genMockProcs(needles, haystack int) ([]ps.Process, []ProcessID) {
procs := []ps.Process{mockProc(1, 1), mockProc(42, 1)}
expect := []ProcessID{42}
// TODO: make this into a tree structure, not just a linear tree
for i := range needles {
parent := 42 + i
pid := parent + 1
procs = append(procs, mockProc(pid, parent))
expect = append(expect, pid)
}
for i := range haystack {
parent := 200 + i
pid := parent + 1
procs = append(procs, mockProc(pid, parent))
}
rand.Shuffle(len(procs), func(i, j int) {
procs[i], procs[j] = procs[j], procs[i]
})
return procs, expect
}
func Test_list(t *testing.T) {
cases := []struct {
name string
needles int
haystack int
}{
{
name: "minimal",
needles: 2,
haystack: 10,
},
{
name: "small needles small haystack",
needles: 5,
haystack: 200,
},
{
name: "small needles large haystack",
needles: 10,
haystack: 1000,
},
{
name: "moderate needles giant haystack",
needles: 20,
haystack: 2000,
},
}
for _, tc := range cases {
const executorPID = 42
t.Run(tc.name, func(t *testing.T) {
procs, expect := genMockProcs(tc.needles, tc.haystack)
lister := func() ([]ps.Process, error) {
return procs, nil
}
result := list(executorPID, lister)
must.SliceContainsAll(t, expect, result.Slice(),
must.Sprintf("exp: %v; got: %v", expect, result),
)
})
}
}