mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
In cgroups v2, a different map of memory stats is available from the kernel than in v1. The Docker API reflects this change. But there are equivalent values in the map for RSS (anonymously mapped memory) and cache (filesystem cache and tmpfs), which the Docker driver is not currently emitting. Fallback to these alternate values when the cgroups v1 values are not available. Include the anonymous mapping in the "measured" allocation stats as "RSS" so that they both show up in allocation metrics. We can do this on both the `docker` driver and the Linux executor for `exec` and `java` drivers. Fixes: https://github.com/hashicorp/nomad/issues/19185 Ref: https://hashicorp.atlassian.net/browse/NMD-437 Ref: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
186 lines
5.7 KiB
Go
186 lines
5.7 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package docker
|
|
|
|
import (
|
|
"context"
|
|
"runtime"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
containerapi "github.com/docker/docker/api/types/container"
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/client/lib/cpustats"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/client/testutil"
|
|
"github.com/hashicorp/nomad/drivers/docker/util"
|
|
"github.com/shoenig/test/must"
|
|
)
|
|
|
|
func TestDriver_DockerStatsCollector(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
stats := &containerapi.StatsResponse{}
|
|
stats.CPUStats.ThrottlingData.Periods = 10
|
|
stats.CPUStats.ThrottlingData.ThrottledPeriods = 10
|
|
stats.CPUStats.ThrottlingData.ThrottledTime = 10
|
|
|
|
stats.MemoryStats.Stats = map[string]uint64{}
|
|
stats.MemoryStats.Stats["rss"] = 6537216
|
|
stats.MemoryStats.Stats["cache"] = 1234
|
|
stats.MemoryStats.Stats["swap"] = 0
|
|
stats.MemoryStats.Stats["file_mapped"] = 1024
|
|
stats.MemoryStats.Usage = 5651904
|
|
stats.MemoryStats.MaxUsage = 6651904
|
|
stats.MemoryStats.Commit = 123231
|
|
stats.MemoryStats.CommitPeak = 321323
|
|
stats.MemoryStats.PrivateWorkingSet = 62222
|
|
|
|
ru := util.DockerStatsToTaskResourceUsage(stats, cpustats.Compute{})
|
|
|
|
if runtime.GOOS != "windows" {
|
|
must.Eq(t, stats.MemoryStats.Stats["file_mapped"], ru.ResourceUsage.MemoryStats.MappedFile)
|
|
must.Eq(t, stats.MemoryStats.Stats["rss"], ru.ResourceUsage.MemoryStats.RSS)
|
|
must.Eq(t, stats.MemoryStats.Stats["cache"], ru.ResourceUsage.MemoryStats.Cache)
|
|
must.Eq(t, stats.MemoryStats.Stats["swap"], ru.ResourceUsage.MemoryStats.Swap)
|
|
must.Eq(t, stats.MemoryStats.Usage, ru.ResourceUsage.MemoryStats.Usage)
|
|
must.Eq(t, stats.MemoryStats.MaxUsage, ru.ResourceUsage.MemoryStats.MaxUsage)
|
|
must.Eq(t, stats.CPUStats.ThrottlingData.ThrottledPeriods, ru.ResourceUsage.CpuStats.ThrottledPeriods)
|
|
must.Eq(t, stats.CPUStats.ThrottlingData.ThrottledTime, ru.ResourceUsage.CpuStats.ThrottledTime)
|
|
} else {
|
|
must.Eq(t, stats.MemoryStats.PrivateWorkingSet, ru.ResourceUsage.MemoryStats.RSS)
|
|
must.Eq(t, stats.MemoryStats.Commit, ru.ResourceUsage.MemoryStats.Usage)
|
|
must.Eq(t, stats.MemoryStats.CommitPeak, ru.ResourceUsage.MemoryStats.MaxUsage)
|
|
must.Eq(t, stats.CPUStats.ThrottlingData.ThrottledPeriods, ru.ResourceUsage.CpuStats.ThrottledPeriods)
|
|
must.Eq(t, stats.CPUStats.ThrottlingData.ThrottledTime, ru.ResourceUsage.CpuStats.ThrottledTime)
|
|
|
|
}
|
|
}
|
|
|
|
// TestDriver_DockerUsageSender asserts that the TaskResourceUsage chan wrapper
|
|
// supports closing and sending on a chan from concurrent goroutines.
|
|
func TestDriver_DockerUsageSender(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
// sample payload
|
|
res := &cstructs.TaskResourceUsage{}
|
|
|
|
destCh, recvCh := newStatsChanPipe()
|
|
|
|
// Sending should never fail
|
|
destCh.send(res)
|
|
destCh.send(res)
|
|
destCh.send(res)
|
|
|
|
// Clear chan
|
|
<-recvCh
|
|
|
|
// Send and close concurrently to let the race detector help us out
|
|
wg := sync.WaitGroup{}
|
|
wg.Add(3)
|
|
|
|
// Sender
|
|
go func() {
|
|
destCh.send(res)
|
|
wg.Done()
|
|
}()
|
|
|
|
// Closer
|
|
go func() {
|
|
destCh.close()
|
|
wg.Done()
|
|
}()
|
|
|
|
// Clear recv chan
|
|
go func() {
|
|
for range recvCh {
|
|
}
|
|
wg.Done()
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
// Assert closed
|
|
destCh.mu.Lock()
|
|
closed := destCh.closed
|
|
destCh.mu.Unlock()
|
|
must.True(t, closed)
|
|
|
|
select {
|
|
case _, ok := <-recvCh:
|
|
must.False(t, ok)
|
|
default:
|
|
t.Fatal("expect recvCh to be closed")
|
|
}
|
|
|
|
// Assert sending and closing never fails
|
|
destCh.send(res)
|
|
destCh.close()
|
|
destCh.close()
|
|
destCh.send(res)
|
|
}
|
|
|
|
func Test_taskHandle_collectDockerStats(t *testing.T) {
|
|
ci.Parallel(t)
|
|
testutil.DockerCompatible(t)
|
|
|
|
// Start a Docker container and wait for it to be running, so we can
|
|
// guarantee stats generation.
|
|
driverCfg, dockerTaskConfig, _ := dockerTask(t)
|
|
|
|
must.NoError(t, driverCfg.EncodeConcreteDriverConfig(dockerTaskConfig))
|
|
|
|
_, driverHarness, handle, cleanup := dockerSetup(t, driverCfg, nil)
|
|
defer cleanup()
|
|
must.NoError(t, driverHarness.WaitUntilStarted(driverCfg.ID, 5*time.Second))
|
|
|
|
// Generate a context, so the test doesn't hang on Docker problems and
|
|
// execute a single collection of the stats.
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
defer cancel()
|
|
|
|
dockerStats, err := handle.collectDockerStats(ctx)
|
|
must.NoError(t, err)
|
|
must.NotNil(t, dockerStats)
|
|
|
|
// Ensure all the stats we use for calculating CPU percentages within
|
|
// DockerStatsToTaskResourceUsage are present and non-zero.
|
|
must.NonZero(t, dockerStats.CPUStats.CPUUsage.TotalUsage)
|
|
must.NonZero(t, dockerStats.CPUStats.CPUUsage.TotalUsage)
|
|
|
|
must.NonZero(t, dockerStats.PreCPUStats.CPUUsage.TotalUsage)
|
|
must.NonZero(t, dockerStats.PreCPUStats.CPUUsage.TotalUsage)
|
|
|
|
// System usage is only populated on Linux machines. GitHub Actions Windows
|
|
// runners do not have UsageInKernelmode or UsageInUsermode populated and
|
|
// these datapoints are not used by the Windows stats usage function. Also
|
|
// wrap the Linux specific memory stats.
|
|
if runtime.GOOS == "linux" {
|
|
must.NonZero(t, dockerStats.CPUStats.SystemUsage)
|
|
must.NonZero(t, dockerStats.CPUStats.CPUUsage.UsageInKernelmode)
|
|
must.NonZero(t, dockerStats.CPUStats.CPUUsage.UsageInUsermode)
|
|
|
|
must.NonZero(t, dockerStats.PreCPUStats.SystemUsage)
|
|
must.NonZero(t, dockerStats.PreCPUStats.CPUUsage.UsageInKernelmode)
|
|
must.NonZero(t, dockerStats.PreCPUStats.CPUUsage.UsageInUsermode)
|
|
|
|
must.NonZero(t, dockerStats.MemoryStats.Usage)
|
|
must.MapContainsKey(t, dockerStats.MemoryStats.Stats, "file_mapped")
|
|
|
|
_, hasRSS := dockerStats.MemoryStats.Stats["rss"]
|
|
if !hasRSS {
|
|
_, hasRSS = dockerStats.MemoryStats.Stats["anon"]
|
|
}
|
|
must.True(t, hasRSS)
|
|
}
|
|
|
|
// Test Windows specific memory stats are collected as and when expected.
|
|
if runtime.GOOS == "windows" {
|
|
must.NonZero(t, dockerStats.MemoryStats.PrivateWorkingSet)
|
|
must.NonZero(t, dockerStats.MemoryStats.Commit)
|
|
must.NonZero(t, dockerStats.MemoryStats.CommitPeak)
|
|
}
|
|
}
|