numa: enable numa topology detection (#18146)

* client: refactor cgroups management in client

* client: fingerprint numa topology

* client: plumb numa and cgroups changes to drivers

* client: cleanup task resource accounting

* client: numa client and config plumbing

* lib: add a stack implementation

* tools: remove ec2info tool

* plugins: fixup testing for cgroups / numa changes

* build: update makefile and package tests and cl
This commit is contained in:
Seth Hoenig
2023-08-10 17:05:30 -05:00
committed by GitHub
parent 5bc49e5208
commit a4cc76bd3e
149 changed files with 3677 additions and 5395 deletions

View File

@@ -15,7 +15,6 @@ import (
plugin "github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/lib/cgutil"
"github.com/hashicorp/nomad/client/logmon"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/helper/testlog"
@@ -68,46 +67,9 @@ func NewDriverHarness(t testing.T, d drivers.DriverPlugin) *DriverHarness {
}
}
// setupCgroupV2 creates a v2 cgroup for the task, as if a Client were initialized
// and managing the cgroup as it normally would via the cpuset manager.
//
// Note that we are being lazy and trying to avoid importing cgutil because
// currently plugins/drivers/testutils is platform agnostic-ish.
//
// Some drivers (raw_exec) setup their own cgroup, while others (exec, java, docker)
// would otherwise depend on the Nomad cpuset manager (and docker daemon) to create
// one, which isn't available here in testing, and so we create one via the harness.
// Plumbing such metadata through to the harness is a mind bender, so we just always
// create the cgroup, but at least put it under 'testing.slice'.
//
// tl;dr raw_exec tests should ignore this cgroup.
func (h *DriverHarness) setupCgroupV2(allocID, task string) {
if cgutil.UseV2 {
h.cgroup = filepath.Join(cgutil.CgroupRoot, "testing.slice", cgutil.CgroupScope(allocID, task))
h.logger.Trace("create cgroup for test", "parent", "testing.slice", "id", allocID, "task", task, "path", h.cgroup)
if err := os.MkdirAll(h.cgroup, 0755); err != nil {
panic(err)
}
}
}
func (h *DriverHarness) Kill() {
_ = h.client.Close()
h.server.Stop()
h.cleanupCgroup()
}
// cleanupCgroup might cleanup a cgroup that may or may not be tricked by DriverHarness.
func (h *DriverHarness) cleanupCgroup() {
// some [non-exec] tests don't bother with MkAllocDir which is what would create
// the cgroup, but then do call Kill, so in that case skip the cgroup cleanup
if cgutil.UseV2 && h.cgroup != "" {
if err := os.Remove(h.cgroup); err != nil && !os.IsNotExist(err) {
// in some cases the driver will cleanup the cgroup itself, in which
// case we do not care about the cgroup not existing at cleanup time
h.t.Fatalf("failed to cleanup cgroup: %v", err)
}
}
}
// MkAllocDir creates a temporary directory and allocdir structure.
@@ -159,9 +121,6 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func(
}
}
// setup a v2 cgroup for test cases that assume one exists
h.setupCgroupV2(alloc.ID, task.Name)
//logmon
if enableLogs {
lm := logmon.NewLogMon(h.logger.Named("logmon"))
@@ -194,7 +153,6 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func(
return func() {
h.client.Close()
allocDir.Destroy()
h.cleanupCgroup()
}
}