diff --git a/e2e/oversubscription/doc.go b/e2e/oversubscription/doc.go new file mode 100644 index 000000000..98a09196a --- /dev/null +++ b/e2e/oversubscription/doc.go @@ -0,0 +1,5 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +// The oversubscription package contains tests around scheduler oversubcription. +package oversubscription diff --git a/e2e/oversubscription/testdata/docker.nomad b/e2e/oversubscription/input/docker.hcl similarity index 61% rename from e2e/oversubscription/testdata/docker.nomad rename to e2e/oversubscription/input/docker.hcl index 0e1f8cad3..5863c89b9 100644 --- a/e2e/oversubscription/testdata/docker.nomad +++ b/e2e/oversubscription/input/docker.hcl @@ -2,18 +2,18 @@ # SPDX-License-Identifier: BUSL-1.1 job "oversubscription-docker" { - datacenters = ["dc1"] + type = "batch" constraint { attribute = "${attr.kernel.name}" - operator = "set_contains_any" - value = "darwin,linux" + operator = "=" + value = "linux" } constraint { - attribute = "${attr.unique.cgroup.version}" + attribute = "${attr.os.cgroups.version}" operator = "=" - value = "v2" + value = "2" } group "group" { @@ -21,9 +21,9 @@ job "oversubscription-docker" { driver = "docker" config { - image = "busybox:1.29.2" + image = "busybox:1" command = "/bin/sh" - args = ["-c", "cat /sys/fs/cgroup/memory.max; sleep 1000"] + args = ["-c", "cat /sys/fs/cgroup/memory.max; sleep infinity"] } resources { diff --git a/e2e/oversubscription/input/exec.hcl b/e2e/oversubscription/input/exec.hcl new file mode 100644 index 000000000..1474a0c46 --- /dev/null +++ b/e2e/oversubscription/input/exec.hcl @@ -0,0 +1,54 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +job "oversubscription-exec" { + type = "batch" + + constraint { + attribute = "${attr.kernel.name}" + operator = "=" + value = "linux" + } + + constraint { + attribute = "${attr.os.cgroups.version}" + operator = "=" + value = "2" + } + + group "group" { + task "sleep" { + driver = "exec" + + config { + command = "/bin/sh" + args = ["-c", "sleep infinity"] + } + + resources { + cpu = 500 + memory = 20 + memory_max = 30 + } + } + + task "cat" { + driver = "pledge" + + lifecycle { + hook = "poststart" + } + + config { + command = "/bin/cat" + args = ["/sys/fs/cgroup/nomad.slice/share.slice/${NOMAD_ALLOC_ID}.sleep.scope/memory.max"] + unveil = ["r:/sys/fs/cgroup/"] + } + + resources { + cpu = 100 + memory = 20 + } + } + } +} diff --git a/e2e/oversubscription/oversubscription.go b/e2e/oversubscription/oversubscription.go deleted file mode 100644 index 5f1826469..000000000 --- a/e2e/oversubscription/oversubscription.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: BUSL-1.1 - -package oversubscription - -import ( - "fmt" - "strings" - "time" - - "github.com/hashicorp/nomad/api" - "github.com/hashicorp/nomad/e2e/e2eutil" - "github.com/hashicorp/nomad/e2e/framework" - "github.com/hashicorp/nomad/helper/uuid" -) - -type OversubscriptionTest struct { - framework.TC - jobIDs []string - initialSchedulerConfig *api.SchedulerConfiguration -} - -func init() { - framework.AddSuites(&framework.TestSuite{ - Component: "oversubscription", - CanRunLocal: true, - Cases: []framework.TestCase{ - new(OversubscriptionTest), - }, - }) -} - -func (tc *OversubscriptionTest) BeforeAll(f *framework.F) { - // Ensure cluster has leader before running tests - e2eutil.WaitForLeader(f.T(), tc.Nomad()) - e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) - - tc.enableMemoryOversubscription(f) -} - -func (tc *OversubscriptionTest) AfterAll(f *framework.F) { - tc.restoreSchedulerConfig(f) -} - -func (tc *OversubscriptionTest) enableMemoryOversubscription(f *framework.F) { - resp, _, err := tc.Nomad().Operator().SchedulerGetConfiguration(nil) - f.NoError(err) - - tc.initialSchedulerConfig = resp.SchedulerConfig - - conf := *resp.SchedulerConfig - conf.MemoryOversubscriptionEnabled = true - _, _, err = tc.Nomad().Operator().SchedulerSetConfiguration(&conf, nil) - f.NoError(err) -} - -func (tc *OversubscriptionTest) restoreSchedulerConfig(f *framework.F) { - if tc.initialSchedulerConfig != nil { - _, _, err := tc.Nomad().Operator().SchedulerSetConfiguration(tc.initialSchedulerConfig, nil) - f.NoError(err) - } -} - -func (tc *OversubscriptionTest) AfterEach(f *framework.F) { - nomadClient := tc.Nomad() - j := nomadClient.Jobs() - - for _, id := range tc.jobIDs { - j.Deregister(id, true, nil) - } - tc.Nomad().System().GarbageCollect() -} - -func (tc *OversubscriptionTest) TestDocker(f *framework.F) { - alloc := tc.runTest(f, "oversubscription-docker-", "docker.nomad") - - // check that cgroup reports the memoryMaxMB as the limit within he container - stdout, err := e2eutil.AllocLogs(alloc.ID, "", e2eutil.LogsStdOut) - f.NoError(err) - f.Equal(fmt.Sprintf("%d\n", 30*1024*1024), stdout) -} - -func (tc *OversubscriptionTest) TestExec(f *framework.F) { - alloc := tc.runTest(f, "oversubscription-exec-", "exec.nomad") - - // check the the cgroup is configured with the memoryMaxMB - var err error - expected := fmt.Sprintf("%d\n", 30*1024*1024) - e2eutil.WaitForAllocFile(alloc.ID, "/alloc/tmp/memory.limit_in_bytes", func(s string) bool { - if s != expected { - err = fmt.Errorf("expected %v got %v", expected, s) - return false - } - err = nil - return true - }, nil) - f.NoError(err) -} - -func (tc *OversubscriptionTest) runTest(f *framework.F, jobPrefix, jobfile string) *api.Allocation { - // register a job - jobID := jobPrefix + uuid.Generate()[:8] - tc.jobIDs = append(tc.jobIDs, jobID) - - allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), tc.Nomad(), "oversubscription/testdata/"+jobfile, jobID, "") - f.Len(allocs, 1) - - e2eutil.WaitForAllocRunning(f.T(), tc.Nomad(), allocs[0].ID) - - alloc, _, err := tc.Nomad().Allocations().Info(allocs[0].ID, nil) - f.NoError(err) - - // assert the resources info - resources := alloc.AllocatedResources.Tasks["task"] - f.Equal(int64(20), resources.Memory.MemoryMB) - f.Equal(int64(30), resources.Memory.MemoryMaxMB) - - // assert the status API reports memory, we need to wait for the - // for metrics to be written before we can assert the entire - // command line - var allocInfo string - f.Eventually(func() bool { - allocInfo, err = e2eutil.Command("nomad", "alloc", "status", alloc.ID) - if err != nil { - return false - } - return strings.Contains(allocInfo, "/20 MiB") && // memory reserve - strings.Contains(allocInfo, "Max: 30 MiB") // memory max - }, 10*time.Second, 200*time.Millisecond, "unexpected memory output") - - return alloc -} diff --git a/e2e/oversubscription/oversubscription_test.go b/e2e/oversubscription/oversubscription_test.go new file mode 100644 index 000000000..029f81995 --- /dev/null +++ b/e2e/oversubscription/oversubscription_test.go @@ -0,0 +1,90 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package oversubscription + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/e2e/e2eutil" + "github.com/hashicorp/nomad/e2e/v3/cluster3" + "github.com/hashicorp/nomad/e2e/v3/jobs3" + "github.com/shoenig/test/must" +) + +var ( + // store the original scheduler configuration + origConfig *api.SchedulerConfiguration +) + +func TestOversubscription(t *testing.T) { + cluster3.Establish(t, + cluster3.Leader(), + cluster3.LinuxClients(1), + ) + + // store the current state of scheduler configuration so we + // may restore it after the suite is done + captureSchedulerConfiguration(t) + t.Cleanup(func() { restoreSchedulerConfiguration(t) }) + + // enable memory oversubscription for these tests + enableMemoryOversubscription(t) + + t.Run("testDocker", testDocker) + t.Run("testExec", testExec) +} + +func testDocker(t *testing.T) { + job, jobCleanup := jobs3.Submit(t, "./input/docker.hcl") + t.Cleanup(jobCleanup) + + // wait for logs + // TODO(shoenig) a better way to do this? + time.Sleep(10 * time.Second) + + // job will cat /sys/fs/cgroup/memory.max which should be + // set to the 30 megabyte memory_max value + logs := job.TaskLogs("group", "task") + must.StrContains(t, logs.Stdout, "31457280") +} + +func testExec(t *testing.T) { + job, jobCleanup := jobs3.Submit(t, "./input/exec.hcl") + t.Cleanup(jobCleanup) + + // wait for poststart + time.Sleep(10 * time.Second) + + // job will cat /sys/fs/cgroup/nomad.slice/share.slice/.sleep.scope/memory.max + // which should be set to the 30 megabyte memory_max value + logs := job.TaskLogs("group", "cat") + must.StrContains(t, logs.Stdout, "31457280") +} + +func captureSchedulerConfiguration(t *testing.T) { + origConfig = getSchedulerConfiguration(t) +} + +func restoreSchedulerConfiguration(t *testing.T) { + operatorAPI := e2eutil.NomadClient(t).Operator() + _, _, err := operatorAPI.SchedulerSetConfiguration(origConfig, nil) + must.NoError(t, err) +} + +func enableMemoryOversubscription(t *testing.T) { + schedulerConfig := getSchedulerConfiguration(t) + schedulerConfig.MemoryOversubscriptionEnabled = true + operatorAPI := e2eutil.NomadClient(t).Operator() + _, _, err := operatorAPI.SchedulerCASConfiguration(schedulerConfig, nil) + must.NoError(t, err) +} + +func getSchedulerConfiguration(t *testing.T) *api.SchedulerConfiguration { + operatorAPI := e2eutil.NomadClient(t).Operator() + resp, _, err := operatorAPI.SchedulerGetConfiguration(nil) + must.NoError(t, err) + return resp.SchedulerConfig +} diff --git a/e2e/oversubscription/testdata/exec.nomad b/e2e/oversubscription/testdata/exec.nomad deleted file mode 100644 index 363557a6d..000000000 --- a/e2e/oversubscription/testdata/exec.nomad +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) HashiCorp, Inc. -# SPDX-License-Identifier: BUSL-1.1 - -job "oversubscription-exec" { - datacenters = ["dc1"] - - constraint { - attribute = "${attr.kernel.name}" - value = "linux" - } - - group "group" { - task "task" { - driver = "exec" - - config { - command = "/bin/sh" - args = ["-c", "cat /proc/self/cgroup | grep memory | cut -d: -f3 | tee ${NOMAD_ALLOC_DIR}/tmp/cgroup_name; sleep 1000"] - } - - resources { - cpu = 500 - memory = 20 - memory_max = 30 - } - } - - task "cgroup-fetcher" { - driver = "raw_exec" - - config { - command = "/bin/sh" - args = ["-c", <