diff --git a/.changelog/25751.txt b/.changelog/25751.txt new file mode 100644 index 000000000..df61d9e4f --- /dev/null +++ b/.changelog/25751.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Fixed a bug where RSS and cache stats would not be reported for docker, exec, and java drivers under Linux cgroups v2 +``` diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index b4a24acf4..476604ff5 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -3288,6 +3288,8 @@ DONE: ticks := stats.ResourceUsage.CpuStats.TotalTicks must.Greater(t, 0, ticks) tickValues.Insert(ticks) + rss := stats.ResourceUsage.MemoryStats.RSS + must.Greater(t, 0, rss) if statsReceived >= 3 { cancel() // 3 is plenty } diff --git a/drivers/docker/stats_test.go b/drivers/docker/stats_test.go index b537dd176..8cab9016d 100644 --- a/drivers/docker/stats_test.go +++ b/drivers/docker/stats_test.go @@ -168,6 +168,12 @@ func Test_taskHandle_collectDockerStats(t *testing.T) { must.NonZero(t, dockerStats.MemoryStats.Usage) must.MapContainsKey(t, dockerStats.MemoryStats.Stats, "file_mapped") + + _, hasRSS := dockerStats.MemoryStats.Stats["rss"] + if !hasRSS { + _, hasRSS = dockerStats.MemoryStats.Stats["anon"] + } + must.True(t, hasRSS) } // Test Windows specific memory stats are collected as and when expected. diff --git a/drivers/docker/util/stats_posix.go b/drivers/docker/util/stats_posix.go index 4d310b3cc..72a2e8ede 100644 --- a/drivers/docker/util/stats_posix.go +++ b/drivers/docker/util/stats_posix.go @@ -16,7 +16,7 @@ var ( // cgroup-v2 only exposes a subset of memory stats DockerCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage"} - DockerCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} + DockerCgroupV2MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage"} ) func DockerStatsToTaskResourceUsage(s *containerapi.StatsResponse, compute cpustats.Compute) *cstructs.TaskResourceUsage { @@ -33,12 +33,26 @@ func DockerStatsToTaskResourceUsage(s *containerapi.StatsResponse, compute cpust measuredMems = DockerCgroupV2MeasuredMemStats } + cache := s.MemoryStats.Stats["cache"] + if cache == 0 { + // This is the equivalent stat for cgroups v2, including filesystem + // cache and tmpfs + cache = s.MemoryStats.Stats["file"] + } + rss := s.MemoryStats.Stats["rss"] + if rss == 0 { + // This is the equivalent stat of anonymous mappings for cgroups v2. + rss = s.MemoryStats.Stats["anon"] + } + + // containerapi exposes memory stat file as a map. for the meaning of the + // source values, consult: + // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt + // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files ms := &cstructs.MemoryStats{ - // containerapi exposes memory stat file as a map, consult - // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt MappedFile: s.MemoryStats.Stats["file_mapped"], - Cache: s.MemoryStats.Stats["cache"], - RSS: s.MemoryStats.Stats["rss"], + Cache: cache, + RSS: rss, Swap: s.MemoryStats.Stats["swap"], Usage: s.MemoryStats.Usage, MaxUsage: s.MemoryStats.MaxUsage, diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 662d51571..d727f8555 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -52,11 +52,13 @@ const ( ) var ( - // ExecutorCgroupV1MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v1 + // ExecutorCgroupV1MeasuredMemStats is the list of memory stats captured by + // the executor with cgroup-v1 ExecutorCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"} - // ExecutorCgroupV2MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v2. cgroup-v2 exposes different memory stats and no longer reports rss or max usage. - ExecutorCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} + // ExecutorCgroupV2MeasuredMemStats is the list of memory stats captured by + // the executor with cgroup-v2. cgroup-v2 exposes different memory stats + ExecutorCgroupV2MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage"} // ExecutorCgroupMeasuredCpuStats is the list of CPU stats captures by the executor ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} @@ -439,8 +441,19 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, // Memory Related Stats swap := stats.MemoryStats.SwapUsage maxUsage := stats.MemoryStats.Usage.MaxUsage - rss := stats.MemoryStats.Stats["rss"] + cache := stats.MemoryStats.Stats["cache"] + if cache == 0 { + // This is the equivalent stat for cgroups v2, including filesystem + // cache and tmpfs + cache = stats.MemoryStats.Stats["file"] + } + rss := stats.MemoryStats.Stats["rss"] + if rss == 0 { + // This is the equivalent stat of anonymous mappings for cgroups v2. + rss = stats.MemoryStats.Stats["anon"] + } + mapped_file := stats.MemoryStats.Stats["mapped_file"] ms := &cstructs.MemoryStats{ RSS: rss,