mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
e2e refactor oversubscription (#19060)
* e2e: remove old oversubscription test * e2e: fixup and cleanup oversubscription test suite Fix and cleanup this old oversubscription test. * use t.Cleanup instead of defer in tests
This commit is contained in:
5
e2e/oversubscription/doc.go
Normal file
5
e2e/oversubscription/doc.go
Normal file
@@ -0,0 +1,5 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
// The oversubscription package contains tests around scheduler oversubcription.
|
||||
package oversubscription
|
||||
@@ -2,18 +2,18 @@
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
job "oversubscription-docker" {
|
||||
datacenters = ["dc1"]
|
||||
type = "batch"
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
operator = "set_contains_any"
|
||||
value = "darwin,linux"
|
||||
operator = "="
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.unique.cgroup.version}"
|
||||
attribute = "${attr.os.cgroups.version}"
|
||||
operator = "="
|
||||
value = "v2"
|
||||
value = "2"
|
||||
}
|
||||
|
||||
group "group" {
|
||||
@@ -21,9 +21,9 @@ job "oversubscription-docker" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "busybox:1.29.2"
|
||||
image = "busybox:1"
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "cat /sys/fs/cgroup/memory.max; sleep 1000"]
|
||||
args = ["-c", "cat /sys/fs/cgroup/memory.max; sleep infinity"]
|
||||
}
|
||||
|
||||
resources {
|
||||
54
e2e/oversubscription/input/exec.hcl
Normal file
54
e2e/oversubscription/input/exec.hcl
Normal file
@@ -0,0 +1,54 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
job "oversubscription-exec" {
|
||||
type = "batch"
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
operator = "="
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.os.cgroups.version}"
|
||||
operator = "="
|
||||
value = "2"
|
||||
}
|
||||
|
||||
group "group" {
|
||||
task "sleep" {
|
||||
driver = "exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "sleep infinity"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 20
|
||||
memory_max = 30
|
||||
}
|
||||
}
|
||||
|
||||
task "cat" {
|
||||
driver = "pledge"
|
||||
|
||||
lifecycle {
|
||||
hook = "poststart"
|
||||
}
|
||||
|
||||
config {
|
||||
command = "/bin/cat"
|
||||
args = ["/sys/fs/cgroup/nomad.slice/share.slice/${NOMAD_ALLOC_ID}.sleep.scope/memory.max"]
|
||||
unveil = ["r:/sys/fs/cgroup/"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package oversubscription
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/framework"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
)
|
||||
|
||||
type OversubscriptionTest struct {
|
||||
framework.TC
|
||||
jobIDs []string
|
||||
initialSchedulerConfig *api.SchedulerConfiguration
|
||||
}
|
||||
|
||||
func init() {
|
||||
framework.AddSuites(&framework.TestSuite{
|
||||
Component: "oversubscription",
|
||||
CanRunLocal: true,
|
||||
Cases: []framework.TestCase{
|
||||
new(OversubscriptionTest),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) BeforeAll(f *framework.F) {
|
||||
// Ensure cluster has leader before running tests
|
||||
e2eutil.WaitForLeader(f.T(), tc.Nomad())
|
||||
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
||||
|
||||
tc.enableMemoryOversubscription(f)
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) AfterAll(f *framework.F) {
|
||||
tc.restoreSchedulerConfig(f)
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) enableMemoryOversubscription(f *framework.F) {
|
||||
resp, _, err := tc.Nomad().Operator().SchedulerGetConfiguration(nil)
|
||||
f.NoError(err)
|
||||
|
||||
tc.initialSchedulerConfig = resp.SchedulerConfig
|
||||
|
||||
conf := *resp.SchedulerConfig
|
||||
conf.MemoryOversubscriptionEnabled = true
|
||||
_, _, err = tc.Nomad().Operator().SchedulerSetConfiguration(&conf, nil)
|
||||
f.NoError(err)
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) restoreSchedulerConfig(f *framework.F) {
|
||||
if tc.initialSchedulerConfig != nil {
|
||||
_, _, err := tc.Nomad().Operator().SchedulerSetConfiguration(tc.initialSchedulerConfig, nil)
|
||||
f.NoError(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) AfterEach(f *framework.F) {
|
||||
nomadClient := tc.Nomad()
|
||||
j := nomadClient.Jobs()
|
||||
|
||||
for _, id := range tc.jobIDs {
|
||||
j.Deregister(id, true, nil)
|
||||
}
|
||||
tc.Nomad().System().GarbageCollect()
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) TestDocker(f *framework.F) {
|
||||
alloc := tc.runTest(f, "oversubscription-docker-", "docker.nomad")
|
||||
|
||||
// check that cgroup reports the memoryMaxMB as the limit within he container
|
||||
stdout, err := e2eutil.AllocLogs(alloc.ID, "", e2eutil.LogsStdOut)
|
||||
f.NoError(err)
|
||||
f.Equal(fmt.Sprintf("%d\n", 30*1024*1024), stdout)
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) TestExec(f *framework.F) {
|
||||
alloc := tc.runTest(f, "oversubscription-exec-", "exec.nomad")
|
||||
|
||||
// check the the cgroup is configured with the memoryMaxMB
|
||||
var err error
|
||||
expected := fmt.Sprintf("%d\n", 30*1024*1024)
|
||||
e2eutil.WaitForAllocFile(alloc.ID, "/alloc/tmp/memory.limit_in_bytes", func(s string) bool {
|
||||
if s != expected {
|
||||
err = fmt.Errorf("expected %v got %v", expected, s)
|
||||
return false
|
||||
}
|
||||
err = nil
|
||||
return true
|
||||
}, nil)
|
||||
f.NoError(err)
|
||||
}
|
||||
|
||||
func (tc *OversubscriptionTest) runTest(f *framework.F, jobPrefix, jobfile string) *api.Allocation {
|
||||
// register a job
|
||||
jobID := jobPrefix + uuid.Generate()[:8]
|
||||
tc.jobIDs = append(tc.jobIDs, jobID)
|
||||
|
||||
allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), tc.Nomad(), "oversubscription/testdata/"+jobfile, jobID, "")
|
||||
f.Len(allocs, 1)
|
||||
|
||||
e2eutil.WaitForAllocRunning(f.T(), tc.Nomad(), allocs[0].ID)
|
||||
|
||||
alloc, _, err := tc.Nomad().Allocations().Info(allocs[0].ID, nil)
|
||||
f.NoError(err)
|
||||
|
||||
// assert the resources info
|
||||
resources := alloc.AllocatedResources.Tasks["task"]
|
||||
f.Equal(int64(20), resources.Memory.MemoryMB)
|
||||
f.Equal(int64(30), resources.Memory.MemoryMaxMB)
|
||||
|
||||
// assert the status API reports memory, we need to wait for the
|
||||
// for metrics to be written before we can assert the entire
|
||||
// command line
|
||||
var allocInfo string
|
||||
f.Eventually(func() bool {
|
||||
allocInfo, err = e2eutil.Command("nomad", "alloc", "status", alloc.ID)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(allocInfo, "/20 MiB") && // memory reserve
|
||||
strings.Contains(allocInfo, "Max: 30 MiB") // memory max
|
||||
}, 10*time.Second, 200*time.Millisecond, "unexpected memory output")
|
||||
|
||||
return alloc
|
||||
}
|
||||
90
e2e/oversubscription/oversubscription_test.go
Normal file
90
e2e/oversubscription/oversubscription_test.go
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package oversubscription
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/v3/cluster3"
|
||||
"github.com/hashicorp/nomad/e2e/v3/jobs3"
|
||||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
var (
|
||||
// store the original scheduler configuration
|
||||
origConfig *api.SchedulerConfiguration
|
||||
)
|
||||
|
||||
func TestOversubscription(t *testing.T) {
|
||||
cluster3.Establish(t,
|
||||
cluster3.Leader(),
|
||||
cluster3.LinuxClients(1),
|
||||
)
|
||||
|
||||
// store the current state of scheduler configuration so we
|
||||
// may restore it after the suite is done
|
||||
captureSchedulerConfiguration(t)
|
||||
t.Cleanup(func() { restoreSchedulerConfiguration(t) })
|
||||
|
||||
// enable memory oversubscription for these tests
|
||||
enableMemoryOversubscription(t)
|
||||
|
||||
t.Run("testDocker", testDocker)
|
||||
t.Run("testExec", testExec)
|
||||
}
|
||||
|
||||
func testDocker(t *testing.T) {
|
||||
job, jobCleanup := jobs3.Submit(t, "./input/docker.hcl")
|
||||
t.Cleanup(jobCleanup)
|
||||
|
||||
// wait for logs
|
||||
// TODO(shoenig) a better way to do this?
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
// job will cat /sys/fs/cgroup/memory.max which should be
|
||||
// set to the 30 megabyte memory_max value
|
||||
logs := job.TaskLogs("group", "task")
|
||||
must.StrContains(t, logs.Stdout, "31457280")
|
||||
}
|
||||
|
||||
func testExec(t *testing.T) {
|
||||
job, jobCleanup := jobs3.Submit(t, "./input/exec.hcl")
|
||||
t.Cleanup(jobCleanup)
|
||||
|
||||
// wait for poststart
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
// job will cat /sys/fs/cgroup/nomad.slice/share.slice/<allocid>.sleep.scope/memory.max
|
||||
// which should be set to the 30 megabyte memory_max value
|
||||
logs := job.TaskLogs("group", "cat")
|
||||
must.StrContains(t, logs.Stdout, "31457280")
|
||||
}
|
||||
|
||||
func captureSchedulerConfiguration(t *testing.T) {
|
||||
origConfig = getSchedulerConfiguration(t)
|
||||
}
|
||||
|
||||
func restoreSchedulerConfiguration(t *testing.T) {
|
||||
operatorAPI := e2eutil.NomadClient(t).Operator()
|
||||
_, _, err := operatorAPI.SchedulerSetConfiguration(origConfig, nil)
|
||||
must.NoError(t, err)
|
||||
}
|
||||
|
||||
func enableMemoryOversubscription(t *testing.T) {
|
||||
schedulerConfig := getSchedulerConfiguration(t)
|
||||
schedulerConfig.MemoryOversubscriptionEnabled = true
|
||||
operatorAPI := e2eutil.NomadClient(t).Operator()
|
||||
_, _, err := operatorAPI.SchedulerCASConfiguration(schedulerConfig, nil)
|
||||
must.NoError(t, err)
|
||||
}
|
||||
|
||||
func getSchedulerConfiguration(t *testing.T) *api.SchedulerConfiguration {
|
||||
operatorAPI := e2eutil.NomadClient(t).Operator()
|
||||
resp, _, err := operatorAPI.SchedulerGetConfiguration(nil)
|
||||
must.NoError(t, err)
|
||||
return resp.SchedulerConfig
|
||||
}
|
||||
53
e2e/oversubscription/testdata/exec.nomad
vendored
53
e2e/oversubscription/testdata/exec.nomad
vendored
@@ -1,53 +0,0 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
job "oversubscription-exec" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
group "group" {
|
||||
task "task" {
|
||||
driver = "exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "cat /proc/self/cgroup | grep memory | cut -d: -f3 | tee ${NOMAD_ALLOC_DIR}/tmp/cgroup_name; sleep 1000"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 20
|
||||
memory_max = 30
|
||||
}
|
||||
}
|
||||
|
||||
task "cgroup-fetcher" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "/bin/sh"
|
||||
args = ["-c", <<EOF
|
||||
until [ -s "${NOMAD_ALLOC_DIR}/tmp/cgroup_name" ]; do
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
cat "/sys/fs/cgroup/memory/$(cat "${NOMAD_ALLOC_DIR}/tmp/cgroup_name" )/memory.limit_in_bytes" \
|
||||
| tee "${NOMAD_ALLOC_DIR}/tmp/memory.limit_in_bytes"
|
||||
|
||||
sleep 1000
|
||||
|
||||
EOF
|
||||
]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user