mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
numa: enable numa topology detection (#18146)
* client: refactor cgroups management in client * client: fingerprint numa topology * client: plumb numa and cgroups changes to drivers * client: cleanup task resource accounting * client: numa client and config plumbing * lib: add a stack implementation * tools: remove ec2info tool * plugins: fixup testing for cgroups / numa changes * build: update makefile and package tests and cl
This commit is contained in:
@@ -16,7 +16,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/lib/cgutil"
|
||||
"github.com/hashicorp/nomad/client/lib/cgroupslib"
|
||||
"github.com/hashicorp/nomad/plugins/drivers"
|
||||
dproto "github.com/hashicorp/nomad/plugins/drivers/proto"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
@@ -193,13 +193,14 @@ func TestExecFSIsolation(t *testing.T, driver *DriverHarness, taskID string) {
|
||||
// we always run in a cgroup - testing freezer cgroup
|
||||
r = execTask(t, driver, taskID,
|
||||
"cat /proc/self/cgroup",
|
||||
false, "")
|
||||
false, "",
|
||||
)
|
||||
require.Zero(t, r.exitCode)
|
||||
|
||||
if !cgutil.UseV2 {
|
||||
acceptable := []string{
|
||||
":freezer:/nomad", ":freezer:/docker",
|
||||
}
|
||||
switch cgroupslib.GetMode() {
|
||||
|
||||
case cgroupslib.CG1:
|
||||
acceptable := []string{":freezer:/nomad", ":freezer:/docker"}
|
||||
if testutil.IsCI() {
|
||||
// github actions freezer cgroup
|
||||
acceptable = append(acceptable, ":freezer:/actions_job")
|
||||
@@ -215,7 +216,7 @@ func TestExecFSIsolation(t *testing.T, driver *DriverHarness, taskID string) {
|
||||
if !ok {
|
||||
require.Fail(t, "unexpected freezer cgroup", "expected freezer to be /nomad/ or /docker/, but found:\n%s", r.stdout)
|
||||
}
|
||||
} else {
|
||||
case cgroupslib.CG2:
|
||||
info, _ := driver.PluginInfo()
|
||||
if info.Name == "docker" {
|
||||
// Note: docker on cgroups v2 now returns nothing
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
plugin "github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/ci"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/lib/cgutil"
|
||||
"github.com/hashicorp/nomad/client/logmon"
|
||||
"github.com/hashicorp/nomad/client/taskenv"
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
@@ -68,46 +67,9 @@ func NewDriverHarness(t testing.T, d drivers.DriverPlugin) *DriverHarness {
|
||||
}
|
||||
}
|
||||
|
||||
// setupCgroupV2 creates a v2 cgroup for the task, as if a Client were initialized
|
||||
// and managing the cgroup as it normally would via the cpuset manager.
|
||||
//
|
||||
// Note that we are being lazy and trying to avoid importing cgutil because
|
||||
// currently plugins/drivers/testutils is platform agnostic-ish.
|
||||
//
|
||||
// Some drivers (raw_exec) setup their own cgroup, while others (exec, java, docker)
|
||||
// would otherwise depend on the Nomad cpuset manager (and docker daemon) to create
|
||||
// one, which isn't available here in testing, and so we create one via the harness.
|
||||
// Plumbing such metadata through to the harness is a mind bender, so we just always
|
||||
// create the cgroup, but at least put it under 'testing.slice'.
|
||||
//
|
||||
// tl;dr raw_exec tests should ignore this cgroup.
|
||||
func (h *DriverHarness) setupCgroupV2(allocID, task string) {
|
||||
if cgutil.UseV2 {
|
||||
h.cgroup = filepath.Join(cgutil.CgroupRoot, "testing.slice", cgutil.CgroupScope(allocID, task))
|
||||
h.logger.Trace("create cgroup for test", "parent", "testing.slice", "id", allocID, "task", task, "path", h.cgroup)
|
||||
if err := os.MkdirAll(h.cgroup, 0755); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *DriverHarness) Kill() {
|
||||
_ = h.client.Close()
|
||||
h.server.Stop()
|
||||
h.cleanupCgroup()
|
||||
}
|
||||
|
||||
// cleanupCgroup might cleanup a cgroup that may or may not be tricked by DriverHarness.
|
||||
func (h *DriverHarness) cleanupCgroup() {
|
||||
// some [non-exec] tests don't bother with MkAllocDir which is what would create
|
||||
// the cgroup, but then do call Kill, so in that case skip the cgroup cleanup
|
||||
if cgutil.UseV2 && h.cgroup != "" {
|
||||
if err := os.Remove(h.cgroup); err != nil && !os.IsNotExist(err) {
|
||||
// in some cases the driver will cleanup the cgroup itself, in which
|
||||
// case we do not care about the cgroup not existing at cleanup time
|
||||
h.t.Fatalf("failed to cleanup cgroup: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MkAllocDir creates a temporary directory and allocdir structure.
|
||||
@@ -159,9 +121,6 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func(
|
||||
}
|
||||
}
|
||||
|
||||
// setup a v2 cgroup for test cases that assume one exists
|
||||
h.setupCgroupV2(alloc.ID, task.Name)
|
||||
|
||||
//logmon
|
||||
if enableLogs {
|
||||
lm := logmon.NewLogMon(h.logger.Named("logmon"))
|
||||
@@ -194,7 +153,6 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func(
|
||||
return func() {
|
||||
h.client.Close()
|
||||
allocDir.Destroy()
|
||||
h.cleanupCgroup()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
10
plugins/drivers/testutils/testing_default.go
Normal file
10
plugins/drivers/testutils/testing_default.go
Normal file
@@ -0,0 +1,10 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
//go:build !linux
|
||||
|
||||
package testutils
|
||||
|
||||
func (*DriverHarness) MakeTaskCgroup(string, string) {
|
||||
// nothing
|
||||
}
|
||||
25
plugins/drivers/testutils/testing_linux.go
Normal file
25
plugins/drivers/testutils/testing_linux.go
Normal file
@@ -0,0 +1,25 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
//go:build linux
|
||||
|
||||
package testutils
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/nomad/client/lib/cgroupslib"
|
||||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
// MakeTaskCgroup creates the cgroup that the task driver might assume already
|
||||
// exists, since Nomad client creates them. Why do we write tests that directly
|
||||
// invoke task drivers without any context of the Nomad client? Who knows.
|
||||
func (h *DriverHarness) MakeTaskCgroup(allocID, taskName string) {
|
||||
f := cgroupslib.Factory(allocID, taskName)
|
||||
must.NoError(h.t, f.Setup())
|
||||
|
||||
// ensure child procs are dead and remove the cgroup when the test is done
|
||||
h.t.Cleanup(func() {
|
||||
_ = f.Kill()
|
||||
_ = f.Teardown()
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user