From 374e987b9ba2885d401f301724e609fc7718c329 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 24 Apr 2025 12:48:18 -0400 Subject: [PATCH] metrics: emit cache and rss stats on cgroup v2 (#25751) In cgroups v2, a different map of memory stats is available from the kernel than in v1. The Docker API reflects this change. But there are equivalent values in the map for RSS (anonymously mapped memory) and cache (filesystem cache and tmpfs), which the Docker driver is not currently emitting. Fallback to these alternate values when the cgroups v1 values are not available. Include the anonymous mapping in the "measured" allocation stats as "RSS" so that they both show up in allocation metrics. We can do this on both the `docker` driver and the Linux executor for `exec` and `java` drivers. Fixes: https://github.com/hashicorp/nomad/issues/19185 Ref: https://hashicorp.atlassian.net/browse/NMD-437 Ref: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt --- .changelog/25751.txt | 3 +++ drivers/docker/driver_test.go | 2 ++ drivers/docker/stats_test.go | 6 ++++++ drivers/docker/util/stats_posix.go | 24 ++++++++++++++++++----- drivers/shared/executor/executor_linux.go | 21 ++++++++++++++++---- 5 files changed, 47 insertions(+), 9 deletions(-) create mode 100644 .changelog/25751.txt diff --git a/.changelog/25751.txt b/.changelog/25751.txt new file mode 100644 index 000000000..df61d9e4f --- /dev/null +++ b/.changelog/25751.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Fixed a bug where RSS and cache stats would not be reported for docker, exec, and java drivers under Linux cgroups v2 +``` diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index b4a24acf4..476604ff5 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -3288,6 +3288,8 @@ DONE: ticks := stats.ResourceUsage.CpuStats.TotalTicks must.Greater(t, 0, ticks) tickValues.Insert(ticks) + rss := stats.ResourceUsage.MemoryStats.RSS + must.Greater(t, 0, rss) if statsReceived >= 3 { cancel() // 3 is plenty } diff --git a/drivers/docker/stats_test.go b/drivers/docker/stats_test.go index b537dd176..8cab9016d 100644 --- a/drivers/docker/stats_test.go +++ b/drivers/docker/stats_test.go @@ -168,6 +168,12 @@ func Test_taskHandle_collectDockerStats(t *testing.T) { must.NonZero(t, dockerStats.MemoryStats.Usage) must.MapContainsKey(t, dockerStats.MemoryStats.Stats, "file_mapped") + + _, hasRSS := dockerStats.MemoryStats.Stats["rss"] + if !hasRSS { + _, hasRSS = dockerStats.MemoryStats.Stats["anon"] + } + must.True(t, hasRSS) } // Test Windows specific memory stats are collected as and when expected. diff --git a/drivers/docker/util/stats_posix.go b/drivers/docker/util/stats_posix.go index 4d310b3cc..72a2e8ede 100644 --- a/drivers/docker/util/stats_posix.go +++ b/drivers/docker/util/stats_posix.go @@ -16,7 +16,7 @@ var ( // cgroup-v2 only exposes a subset of memory stats DockerCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage"} - DockerCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} + DockerCgroupV2MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage"} ) func DockerStatsToTaskResourceUsage(s *containerapi.StatsResponse, compute cpustats.Compute) *cstructs.TaskResourceUsage { @@ -33,12 +33,26 @@ func DockerStatsToTaskResourceUsage(s *containerapi.StatsResponse, compute cpust measuredMems = DockerCgroupV2MeasuredMemStats } + cache := s.MemoryStats.Stats["cache"] + if cache == 0 { + // This is the equivalent stat for cgroups v2, including filesystem + // cache and tmpfs + cache = s.MemoryStats.Stats["file"] + } + rss := s.MemoryStats.Stats["rss"] + if rss == 0 { + // This is the equivalent stat of anonymous mappings for cgroups v2. + rss = s.MemoryStats.Stats["anon"] + } + + // containerapi exposes memory stat file as a map. for the meaning of the + // source values, consult: + // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt + // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files ms := &cstructs.MemoryStats{ - // containerapi exposes memory stat file as a map, consult - // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt MappedFile: s.MemoryStats.Stats["file_mapped"], - Cache: s.MemoryStats.Stats["cache"], - RSS: s.MemoryStats.Stats["rss"], + Cache: cache, + RSS: rss, Swap: s.MemoryStats.Stats["swap"], Usage: s.MemoryStats.Usage, MaxUsage: s.MemoryStats.MaxUsage, diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 662d51571..d727f8555 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -52,11 +52,13 @@ const ( ) var ( - // ExecutorCgroupV1MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v1 + // ExecutorCgroupV1MeasuredMemStats is the list of memory stats captured by + // the executor with cgroup-v1 ExecutorCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"} - // ExecutorCgroupV2MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v2. cgroup-v2 exposes different memory stats and no longer reports rss or max usage. - ExecutorCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} + // ExecutorCgroupV2MeasuredMemStats is the list of memory stats captured by + // the executor with cgroup-v2. cgroup-v2 exposes different memory stats + ExecutorCgroupV2MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage"} // ExecutorCgroupMeasuredCpuStats is the list of CPU stats captures by the executor ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} @@ -439,8 +441,19 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, // Memory Related Stats swap := stats.MemoryStats.SwapUsage maxUsage := stats.MemoryStats.Usage.MaxUsage - rss := stats.MemoryStats.Stats["rss"] + cache := stats.MemoryStats.Stats["cache"] + if cache == 0 { + // This is the equivalent stat for cgroups v2, including filesystem + // cache and tmpfs + cache = stats.MemoryStats.Stats["file"] + } + rss := stats.MemoryStats.Stats["rss"] + if rss == 0 { + // This is the equivalent stat of anonymous mappings for cgroups v2. + rss = stats.MemoryStats.Stats["anon"] + } + mapped_file := stats.MemoryStats.Stats["mapped_file"] ms := &cstructs.MemoryStats{ RSS: rss,