mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
E2E: fix scaling test assertion for extra Windows host (#26077)
* E2E: fix scaling test assertion for extra Windows host The scaling test assumes that all nodes will receive the system job. But the job can only run on Linux hosts, so the count will be wrong if we're running a Windows host as part of the cluster. Filter the expected count by the OS. While we're touching this test, let's also migrate it off the legacy framework. * address comments from code review
This commit is contained in:
@@ -23,7 +23,6 @@ import (
|
|||||||
_ "github.com/hashicorp/nomad/e2e/parameterized"
|
_ "github.com/hashicorp/nomad/e2e/parameterized"
|
||||||
_ "github.com/hashicorp/nomad/e2e/periodic"
|
_ "github.com/hashicorp/nomad/e2e/periodic"
|
||||||
_ "github.com/hashicorp/nomad/e2e/quotas"
|
_ "github.com/hashicorp/nomad/e2e/quotas"
|
||||||
_ "github.com/hashicorp/nomad/e2e/scaling"
|
|
||||||
_ "github.com/hashicorp/nomad/e2e/scalingpolicies"
|
_ "github.com/hashicorp/nomad/e2e/scalingpolicies"
|
||||||
_ "github.com/hashicorp/nomad/e2e/scheduler_sysbatch"
|
_ "github.com/hashicorp/nomad/e2e/scheduler_sysbatch"
|
||||||
_ "github.com/hashicorp/nomad/e2e/scheduler_system"
|
_ "github.com/hashicorp/nomad/e2e/scheduler_system"
|
||||||
@@ -44,6 +43,7 @@ import (
|
|||||||
_ "github.com/hashicorp/nomad/e2e/oversubscription"
|
_ "github.com/hashicorp/nomad/e2e/oversubscription"
|
||||||
_ "github.com/hashicorp/nomad/e2e/podman"
|
_ "github.com/hashicorp/nomad/e2e/podman"
|
||||||
_ "github.com/hashicorp/nomad/e2e/rescheduling"
|
_ "github.com/hashicorp/nomad/e2e/rescheduling"
|
||||||
|
_ "github.com/hashicorp/nomad/e2e/scaling"
|
||||||
_ "github.com/hashicorp/nomad/e2e/spread"
|
_ "github.com/hashicorp/nomad/e2e/spread"
|
||||||
_ "github.com/hashicorp/nomad/e2e/vaultsecrets"
|
_ "github.com/hashicorp/nomad/e2e/vaultsecrets"
|
||||||
_ "github.com/hashicorp/nomad/e2e/volume_mounts"
|
_ "github.com/hashicorp/nomad/e2e/volume_mounts"
|
||||||
|
|||||||
@@ -240,6 +240,20 @@ func MaybeCleanupJobsAndGC(jobIDs *[]string) func() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MaybeCleanupNamespacedJobsAndGC stops and purges the list of jobIDs in the namespace and runs a
|
||||||
|
// system gc. Returns a func so that the return value can be used
|
||||||
|
// in t.Cleanup. Similar to CleanupJobsAndGC, but this one does not assert
|
||||||
|
// on a successful stop and gc, which is useful for tests that want to stop and
|
||||||
|
// gc the jobs themselves but we want a backup Cleanup just in case.
|
||||||
|
func MaybeCleanupNamespacedJobsAndGC(ns string, jobIDs []string) func() {
|
||||||
|
return func() {
|
||||||
|
for _, jobID := range jobIDs {
|
||||||
|
_ = StopJob(jobID, "-namespace", ns, "-purge", "-detach")
|
||||||
|
}
|
||||||
|
_, _ = Command("nomad", "system", "gc")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// CleanupJobsAndGCWithContext stops and purges the list of jobIDs and runs a
|
// CleanupJobsAndGCWithContext stops and purges the list of jobIDs and runs a
|
||||||
// system gc. The passed context allows callers to cancel the execution of the
|
// system gc. The passed context allows callers to cancel the execution of the
|
||||||
// cleanup as they desire. This is useful for tests which attempt to remove the
|
// cleanup as they desire. This is useful for tests which attempt to remove the
|
||||||
|
|||||||
8
e2e/scaling/doc.go
Normal file
8
e2e/scaling/doc.go
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
// Package scaling provides end-to-end tests for scaling Nomad workloads.
|
||||||
|
//
|
||||||
|
// In order to run this test suite only, from the e2e directory you can trigger
|
||||||
|
// go test -v ./spread
|
||||||
|
package scaling
|
||||||
@@ -2,8 +2,7 @@
|
|||||||
# SPDX-License-Identifier: BUSL-1.1
|
# SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
job "horizontally_scalable" {
|
job "horizontally_scalable" {
|
||||||
datacenters = ["dc1"]
|
namespace = "NamespaceScalingTestA"
|
||||||
type = "service"
|
|
||||||
|
|
||||||
update {
|
update {
|
||||||
health_check = "task_states"
|
health_check = "task_states"
|
||||||
@@ -2,9 +2,6 @@
|
|||||||
# SPDX-License-Identifier: BUSL-1.1
|
# SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
job "horizontally_scalable" {
|
job "horizontally_scalable" {
|
||||||
datacenters = ["dc1"]
|
|
||||||
type = "service"
|
|
||||||
namespace = "NamespaceA"
|
|
||||||
|
|
||||||
update {
|
update {
|
||||||
health_check = "task_states"
|
health_check = "task_states"
|
||||||
@@ -2,8 +2,6 @@
|
|||||||
# SPDX-License-Identifier: BUSL-1.1
|
# SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
job "horizontally_scalable" {
|
job "horizontally_scalable" {
|
||||||
datacenters = ["dc1"]
|
|
||||||
type = "service"
|
|
||||||
|
|
||||||
update {
|
update {
|
||||||
health_check = "task_states"
|
health_check = "task_states"
|
||||||
@@ -2,8 +2,6 @@
|
|||||||
# SPDX-License-Identifier: BUSL-1.1
|
# SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
job "horizontally_scalable" {
|
job "horizontally_scalable" {
|
||||||
datacenters = ["dc1"]
|
|
||||||
type = "service"
|
|
||||||
|
|
||||||
update {
|
update {
|
||||||
health_check = "task_states"
|
health_check = "task_states"
|
||||||
@@ -4,6 +4,11 @@
|
|||||||
job "system_job" {
|
job "system_job" {
|
||||||
type = "system"
|
type = "system"
|
||||||
|
|
||||||
|
constraint {
|
||||||
|
attribute = "${attr.kernel.name}"
|
||||||
|
value = "linux"
|
||||||
|
}
|
||||||
|
|
||||||
group "system_job_group" {
|
group "system_job_group" {
|
||||||
|
|
||||||
task "system_task" {
|
task "system_task" {
|
||||||
@@ -22,4 +27,3 @@ job "system_job" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1,265 +0,0 @@
|
|||||||
// Copyright (c) HashiCorp, Inc.
|
|
||||||
// SPDX-License-Identifier: BUSL-1.1
|
|
||||||
|
|
||||||
package scaling
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/hashicorp/nomad/api"
|
|
||||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
|
||||||
"github.com/hashicorp/nomad/e2e/framework"
|
|
||||||
"github.com/hashicorp/nomad/helper/pointer"
|
|
||||||
"github.com/hashicorp/nomad/helper/uuid"
|
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
|
||||||
)
|
|
||||||
|
|
||||||
type ScalingE2ETest struct {
|
|
||||||
framework.TC
|
|
||||||
namespaceIDs []string
|
|
||||||
namespacedJobIDs [][2]string
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
framework.AddSuites(&framework.TestSuite{
|
|
||||||
Component: "Scaling",
|
|
||||||
CanRunLocal: true,
|
|
||||||
Cases: []framework.TestCase{
|
|
||||||
new(ScalingE2ETest),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *ScalingE2ETest) BeforeAll(f *framework.F) {
|
|
||||||
e2eutil.WaitForLeader(f.T(), tc.Nomad())
|
|
||||||
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *ScalingE2ETest) AfterEach(f *framework.F) {
|
|
||||||
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, namespacedJob := range tc.namespacedJobIDs {
|
|
||||||
err := e2eutil.StopJob(namespacedJob[1], "-purge", "-namespace",
|
|
||||||
namespacedJob[0])
|
|
||||||
f.NoError(err)
|
|
||||||
}
|
|
||||||
tc.namespacedJobIDs = [][2]string{}
|
|
||||||
|
|
||||||
for _, ns := range tc.namespaceIDs {
|
|
||||||
_, err := e2eutil.Command("nomad", "namespace", "delete", ns)
|
|
||||||
f.NoError(err)
|
|
||||||
}
|
|
||||||
tc.namespaceIDs = []string{}
|
|
||||||
|
|
||||||
_, err := e2eutil.Command("nomad", "system", "gc")
|
|
||||||
f.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestScalingBasic performs basic scaling e2e tests within a single namespace.
|
|
||||||
func (tc *ScalingE2ETest) TestScalingBasic(f *framework.F) {
|
|
||||||
defaultNS := "default"
|
|
||||||
|
|
||||||
// Register a job with a scaling policy. The group doesn't include the
|
|
||||||
// count parameter, therefore Nomad should dynamically set this value to
|
|
||||||
// the policy min.
|
|
||||||
jobID := "test-scaling-" + uuid.Generate()[0:8]
|
|
||||||
f.NoError(e2eutil.Register(jobID, "scaling/input/namespace_default_1.nomad"))
|
|
||||||
tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{defaultNS, jobID})
|
|
||||||
f.NoError(e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running"}),
|
|
||||||
"job should be running with 2 allocs")
|
|
||||||
|
|
||||||
// Ensure we wait for the deployment to finish, otherwise scaling will
|
|
||||||
// fail.
|
|
||||||
f.NoError(e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
|
|
||||||
|
|
||||||
// Simple scaling action.
|
|
||||||
testMeta := map[string]interface{}{"scaling-e2e-test": "value"}
|
|
||||||
scaleResp, _, err := tc.Nomad().Jobs().Scale(
|
|
||||||
jobID, "horizontally_scalable", pointer.Of(3),
|
|
||||||
"Nomad e2e testing", false, testMeta, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.NotEmpty(scaleResp.EvalID)
|
|
||||||
f.NoError(e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running", "running"}),
|
|
||||||
"job should be running with 3 allocs")
|
|
||||||
|
|
||||||
// Ensure we wait for the deployment to finish, otherwise scaling will
|
|
||||||
// fail for this reason.
|
|
||||||
f.NoError(e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
|
|
||||||
|
|
||||||
// Attempt break break the policy min/max parameters.
|
|
||||||
_, _, err = tc.Nomad().Jobs().Scale(
|
|
||||||
jobID, "horizontally_scalable", pointer.Of(4),
|
|
||||||
"Nomad e2e testing", false, nil, nil)
|
|
||||||
f.Error(err)
|
|
||||||
_, _, err = tc.Nomad().Jobs().Scale(
|
|
||||||
jobID, "horizontally_scalable", pointer.Of(1),
|
|
||||||
"Nomad e2e testing", false, nil, nil)
|
|
||||||
f.Error(err)
|
|
||||||
|
|
||||||
// Check the scaling events.
|
|
||||||
statusResp, _, err := tc.Nomad().Jobs().ScaleStatus(jobID, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.Len(statusResp.TaskGroups["horizontally_scalable"].Events, 1)
|
|
||||||
f.Equal(testMeta, statusResp.TaskGroups["horizontally_scalable"].Events[0].Meta)
|
|
||||||
|
|
||||||
// Remove the job.
|
|
||||||
_, _, err = tc.Nomad().Jobs().Deregister(jobID, true, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.NoError(tc.Nomad().System().GarbageCollect())
|
|
||||||
tc.namespacedJobIDs = [][2]string{}
|
|
||||||
|
|
||||||
// Attempt job registrations where the group count violates the policy
|
|
||||||
// min/max parameters.
|
|
||||||
f.Error(e2eutil.Register(jobID, "scaling/input/namespace_default_2.nomad"))
|
|
||||||
f.Error(e2eutil.Register(jobID, "scaling/input/namespace_default_3.nomad"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestScalingNamespaces runs tests to ensure the job scaling endpoint adheres
|
|
||||||
// to Nomad's basic namespace principles.
|
|
||||||
func (tc *ScalingE2ETest) TestScalingNamespaces(f *framework.F) {
|
|
||||||
|
|
||||||
defaultNS := "default"
|
|
||||||
ANS := "NamespaceA"
|
|
||||||
|
|
||||||
// Create our non-default namespace.
|
|
||||||
_, err := e2eutil.Command("nomad", "namespace", "apply", ANS)
|
|
||||||
f.NoError(err, "could not create namespace")
|
|
||||||
tc.namespaceIDs = append(tc.namespaceIDs, ANS)
|
|
||||||
|
|
||||||
defaultJobID := "test-scaling-default-" + uuid.Generate()[0:8]
|
|
||||||
aJobID := "test-scaling-a-" + uuid.Generate()[0:8]
|
|
||||||
|
|
||||||
// Register and wait for the job deployments to succeed.
|
|
||||||
f.NoError(e2eutil.Register(defaultJobID, "scaling/input/namespace_default_1.nomad"))
|
|
||||||
f.NoError(e2eutil.Register(aJobID, "scaling/input/namespace_a_1.nomad"))
|
|
||||||
f.NoError(e2eutil.WaitForLastDeploymentStatus(defaultJobID, defaultNS, "successful", nil))
|
|
||||||
f.NoError(e2eutil.WaitForLastDeploymentStatus(aJobID, ANS, "successful", nil))
|
|
||||||
|
|
||||||
tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{defaultNS, defaultJobID})
|
|
||||||
tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{ANS, aJobID})
|
|
||||||
|
|
||||||
// Setup the WriteOptions for each namespace.
|
|
||||||
defaultWriteOpts := api.WriteOptions{Namespace: defaultNS}
|
|
||||||
aWriteOpts := api.WriteOptions{Namespace: ANS}
|
|
||||||
|
|
||||||
// We shouldn't be able to trigger scaling across the namespace boundary.
|
|
||||||
_, _, err = tc.Nomad().Jobs().Scale(
|
|
||||||
defaultJobID, "horizontally_scalable", pointer.Of(3),
|
|
||||||
"Nomad e2e testing", false, nil, &aWriteOpts)
|
|
||||||
f.Error(err)
|
|
||||||
_, _, err = tc.Nomad().Jobs().Scale(
|
|
||||||
aJobID, "horizontally_scalable", pointer.Of(3),
|
|
||||||
"Nomad e2e testing", false, nil, &defaultWriteOpts)
|
|
||||||
f.Error(err)
|
|
||||||
|
|
||||||
// We should be able to trigger scaling when using the correct namespace,
|
|
||||||
// duh.
|
|
||||||
_, _, err = tc.Nomad().Jobs().Scale(
|
|
||||||
defaultJobID, "horizontally_scalable", pointer.Of(3),
|
|
||||||
"Nomad e2e testing", false, nil, &defaultWriteOpts)
|
|
||||||
f.NoError(err)
|
|
||||||
_, _, err = tc.Nomad().Jobs().Scale(
|
|
||||||
aJobID, "horizontally_scalable", pointer.Of(3),
|
|
||||||
"Nomad e2e testing", false, nil, &aWriteOpts)
|
|
||||||
f.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestScalingBasic performs basic scaling e2e tests within a single namespace using
|
|
||||||
// using a SystemScheduler.
|
|
||||||
func (tc *ScalingE2ETest) TestScalingBasicWithSystemSchedule(f *framework.F) {
|
|
||||||
t := f.T()
|
|
||||||
nomadClient := tc.Nomad()
|
|
||||||
|
|
||||||
// Register a system job with a scaling policy without a group count, it should
|
|
||||||
// default to 1 per node.
|
|
||||||
|
|
||||||
jobID := "test-scaling-" + uuid.Generate()[0:8]
|
|
||||||
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scaling/input/namespace_default_system.nomad", jobID, "")
|
|
||||||
|
|
||||||
jobs := nomadClient.Jobs()
|
|
||||||
initialAllocs, _, err := jobs.Allocations(jobID, true, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
|
|
||||||
nodeStubList, _, err := nomadClient.Nodes().List(&api.QueryOptions{Namespace: "default"})
|
|
||||||
f.NoError(err)
|
|
||||||
|
|
||||||
// A system job will spawn an allocation per node, we need to know how many nodes
|
|
||||||
// there are to know how many allocations to expect.
|
|
||||||
numberOfNodes := len(nodeStubList)
|
|
||||||
|
|
||||||
f.Equal(numberOfNodes, len(initialAllocs))
|
|
||||||
allocIDs := e2eutil.AllocIDsFromAllocationListStubs(initialAllocs)
|
|
||||||
|
|
||||||
// Wait for allocations to get past initial pending state
|
|
||||||
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
|
|
||||||
|
|
||||||
// Try to scale beyond 1
|
|
||||||
testMeta := map[string]interface{}{"scaling-e2e-test": "value"}
|
|
||||||
scaleResp, _, err := tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(3),
|
|
||||||
"Nomad e2e testing", false, testMeta, nil)
|
|
||||||
|
|
||||||
f.Error(err)
|
|
||||||
f.Nil(scaleResp)
|
|
||||||
|
|
||||||
// The same allocs should be running.
|
|
||||||
jobs = nomadClient.Jobs()
|
|
||||||
allocs1, _, err := jobs.Allocations(jobID, true, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
|
|
||||||
f.Equal(len(initialAllocs), len(allocs1))
|
|
||||||
|
|
||||||
for i, a := range allocs1 {
|
|
||||||
f.Equal(a.ID, initialAllocs[i].ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scale down to 0
|
|
||||||
testMeta = map[string]interface{}{"scaling-e2e-test": "value"}
|
|
||||||
scaleResp, _, err = tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(0),
|
|
||||||
"Nomad e2e testing", false, testMeta, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.NotEmpty(scaleResp.EvalID)
|
|
||||||
|
|
||||||
// Assert job is still up but no allocs are running
|
|
||||||
stopedAllocs, _, err := jobs.Allocations(jobID, false, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
|
|
||||||
f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, stopedAllocs)))
|
|
||||||
f.Equal(numberOfNodes, len(stopedAllocs))
|
|
||||||
|
|
||||||
// Scale up to 1 again
|
|
||||||
testMeta = map[string]interface{}{"scaling-e2e-test": "value"}
|
|
||||||
scaleResp, _, err = tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(1),
|
|
||||||
"Nomad e2e testing", false, testMeta, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.NotEmpty(scaleResp.EvalID)
|
|
||||||
|
|
||||||
// Wait for new allocation to get past initial pending state
|
|
||||||
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
|
|
||||||
|
|
||||||
// Assert job is still running and there is a running allocation again
|
|
||||||
allocs, _, err := jobs.Allocations(jobID, true, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.Equal(numberOfNodes*2, len(allocs))
|
|
||||||
|
|
||||||
f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, allocs)))
|
|
||||||
f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusRun, allocs)))
|
|
||||||
|
|
||||||
// Remove the job.
|
|
||||||
_, _, err = tc.Nomad().Jobs().Deregister(jobID, true, nil)
|
|
||||||
f.NoError(err)
|
|
||||||
f.NoError(tc.Nomad().System().GarbageCollect())
|
|
||||||
}
|
|
||||||
|
|
||||||
func filterAllocsByDesiredStatus(status string, allocs []*api.AllocationListStub) []*api.AllocationListStub {
|
|
||||||
res := []*api.AllocationListStub{}
|
|
||||||
|
|
||||||
for _, a := range allocs {
|
|
||||||
if a.DesiredStatus == status {
|
|
||||||
res = append(res, a)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
240
e2e/scaling/scaling_test.go
Normal file
240
e2e/scaling/scaling_test.go
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
package scaling
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/api"
|
||||||
|
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||||
|
"github.com/hashicorp/nomad/e2e/v3/cluster3"
|
||||||
|
"github.com/hashicorp/nomad/helper/pointer"
|
||||||
|
"github.com/hashicorp/nomad/helper/uuid"
|
||||||
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
|
"github.com/shoenig/test/must"
|
||||||
|
"github.com/shoenig/test/wait"
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultNS = "default"
|
||||||
|
|
||||||
|
func TestScaling(t *testing.T) {
|
||||||
|
cluster3.Establish(t,
|
||||||
|
cluster3.Leader(),
|
||||||
|
cluster3.LinuxClients(1),
|
||||||
|
cluster3.Timeout(3*time.Second),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Run our test cases.
|
||||||
|
t.Run("TestScaling_Basic", testScalingBasic)
|
||||||
|
t.Run("TestScaling_Namespaces", testScalingNamespaces)
|
||||||
|
t.Run("TestScaling_System", testScalingSystemJob)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testScalingBasic(t *testing.T) {
|
||||||
|
nomad := e2eutil.NomadClient(t)
|
||||||
|
|
||||||
|
jobID := "scaling-basic-" + uuid.Short()
|
||||||
|
jobIDs := []string{jobID}
|
||||||
|
t.Cleanup(e2eutil.MaybeCleanupJobsAndGC(&jobIDs))
|
||||||
|
|
||||||
|
// start job
|
||||||
|
allocs := e2eutil.RegisterAndWaitForAllocs(t,
|
||||||
|
nomad, "./input/namespace_default_1.nomad.hcl", jobID, "")
|
||||||
|
must.Len(t, 2, allocs, must.Sprint("expected 2 allocs"))
|
||||||
|
|
||||||
|
// Ensure we wait for the deployment to finish, otherwise scaling will fail.
|
||||||
|
must.NoError(t, e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
|
||||||
|
|
||||||
|
// Simple scaling action.
|
||||||
|
testMeta := map[string]any{"scaling-e2e-test": "value"}
|
||||||
|
scaleResp, _, err := nomad.Jobs().Scale(
|
||||||
|
jobID, "horizontally_scalable", pointer.Of(3),
|
||||||
|
"Nomad e2e testing", false, testMeta, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.NotEq(t, "", scaleResp.EvalID)
|
||||||
|
must.NoError(t, e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running", "running"}),
|
||||||
|
must.Sprint("job should be running with 3 allocs"))
|
||||||
|
|
||||||
|
// Ensure we wait for the deployment to finish, otherwise scaling will
|
||||||
|
// fail for this reason.
|
||||||
|
must.NoError(t, e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil))
|
||||||
|
|
||||||
|
// Attempt break break the policy min/max parameters.
|
||||||
|
_, _, err = nomad.Jobs().Scale(
|
||||||
|
jobID, "horizontally_scalable", pointer.Of(4),
|
||||||
|
"Nomad e2e testing", false, nil, nil)
|
||||||
|
must.ErrorContains(t, err, "group count was greater than scaling policy maximum")
|
||||||
|
_, _, err = nomad.Jobs().Scale(
|
||||||
|
jobID, "horizontally_scalable", pointer.Of(1),
|
||||||
|
"Nomad e2e testing", false, nil, nil)
|
||||||
|
must.ErrorContains(t, err, "group count was less than scaling policy minimum")
|
||||||
|
|
||||||
|
// Check the scaling events.
|
||||||
|
statusResp, _, err := nomad.Jobs().ScaleStatus(jobID, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.Len(t, 1, statusResp.TaskGroups["horizontally_scalable"].Events)
|
||||||
|
must.Eq(t, testMeta, statusResp.TaskGroups["horizontally_scalable"].Events[0].Meta)
|
||||||
|
|
||||||
|
// Remove the job.
|
||||||
|
_, _, err = nomad.Jobs().Deregister(jobID, true, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.NoError(t, nomad.System().GarbageCollect())
|
||||||
|
|
||||||
|
// Attempt job registrations where the group count violates the policy
|
||||||
|
// min/max parameters.
|
||||||
|
err = e2eutil.Register(jobID, "input/namespace_default_2.nomad.hcl")
|
||||||
|
must.ErrorContains(t, err, "task group count must not be greater than maximum count")
|
||||||
|
must.Error(t, e2eutil.Register(jobID, "input/namespace_default_3.nomad.hcl"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func testScalingNamespaces(t *testing.T) {
|
||||||
|
nomad := e2eutil.NomadClient(t)
|
||||||
|
|
||||||
|
// Create our non-default namespace.
|
||||||
|
ANS := "NamespaceScalingTestA"
|
||||||
|
_, err := e2eutil.Command("nomad", "namespace", "apply", ANS)
|
||||||
|
must.NoError(t, err, must.Sprint("could not create namespace"))
|
||||||
|
e2eutil.CleanupCommand(t, "nomad namespace delete %s", ANS)
|
||||||
|
|
||||||
|
defaultJobID := "test-scaling-default-" + uuid.Generate()[0:8]
|
||||||
|
aJobID := "test-scaling-a-" + uuid.Generate()[0:8]
|
||||||
|
|
||||||
|
// Register and wait for the job deployments to succeed.
|
||||||
|
must.NoError(t, e2eutil.Register(defaultJobID, "input/namespace_default_1.nomad.hcl"))
|
||||||
|
must.NoError(t, e2eutil.Register(aJobID, "input/namespace_a_1.nomad.hcl"))
|
||||||
|
must.NoError(t, e2eutil.WaitForLastDeploymentStatus(defaultJobID, defaultNS, "successful", nil))
|
||||||
|
must.NoError(t, e2eutil.WaitForLastDeploymentStatus(aJobID, ANS, "successful", nil))
|
||||||
|
|
||||||
|
t.Cleanup(e2eutil.MaybeCleanupNamespacedJobsAndGC(ANS, []string{aJobID}))
|
||||||
|
t.Cleanup(e2eutil.MaybeCleanupJobsAndGC(&[]string{defaultJobID}))
|
||||||
|
|
||||||
|
// Setup the WriteOptions for each namespace.
|
||||||
|
defaultWriteOpts := api.WriteOptions{Namespace: defaultNS}
|
||||||
|
aWriteOpts := api.WriteOptions{Namespace: ANS}
|
||||||
|
|
||||||
|
// We shouldn't be able to trigger scaling across the namespace boundary.
|
||||||
|
_, _, err = nomad.Jobs().Scale(
|
||||||
|
defaultJobID, "horizontally_scalable", pointer.Of(3),
|
||||||
|
"Nomad e2e testing", false, nil, &aWriteOpts)
|
||||||
|
must.ErrorContains(t, err, "not found")
|
||||||
|
_, _, err = nomad.Jobs().Scale(
|
||||||
|
aJobID, "horizontally_scalable", pointer.Of(3),
|
||||||
|
"Nomad e2e testing", false, nil, &defaultWriteOpts)
|
||||||
|
must.ErrorContains(t, err, "not found")
|
||||||
|
|
||||||
|
// We should be able to trigger scaling when using the correct namespace,
|
||||||
|
// duh.
|
||||||
|
_, _, err = nomad.Jobs().Scale(
|
||||||
|
defaultJobID, "horizontally_scalable", pointer.Of(3),
|
||||||
|
"Nomad e2e testing", false, nil, &defaultWriteOpts)
|
||||||
|
must.NoError(t, err)
|
||||||
|
_, _, err = nomad.Jobs().Scale(
|
||||||
|
aJobID, "horizontally_scalable", pointer.Of(3),
|
||||||
|
"Nomad e2e testing", false, nil, &aWriteOpts)
|
||||||
|
must.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testScalingSystemJob(t *testing.T) {
|
||||||
|
nomad := e2eutil.NomadClient(t)
|
||||||
|
|
||||||
|
// Register a system job with a scaling policy without a group count, it
|
||||||
|
// should default to 1 per node.
|
||||||
|
|
||||||
|
jobID := "test-scaling-" + uuid.Generate()[0:8]
|
||||||
|
e2eutil.RegisterAndWaitForAllocs(t, nomad,
|
||||||
|
"input/namespace_default_system.nomad.hcl", jobID, "")
|
||||||
|
|
||||||
|
t.Cleanup(e2eutil.CleanupJobsAndGC(t, &[]string{jobID}))
|
||||||
|
|
||||||
|
jobs := nomad.Jobs()
|
||||||
|
initialAllocs, _, err := jobs.Allocations(jobID, true, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
// A system job will spawn an allocation per feasible node, we need to know
|
||||||
|
// how many nodes there are to know how many allocations to expect.
|
||||||
|
nodeStubList, _, err := nomad.Nodes().List(
|
||||||
|
&api.QueryOptions{
|
||||||
|
Namespace: "default",
|
||||||
|
Params: map[string]string{"os": "true"},
|
||||||
|
Filter: `Attributes["os.name"] == "ubuntu"`,
|
||||||
|
})
|
||||||
|
must.NoError(t, err)
|
||||||
|
numberOfNodes := len(nodeStubList)
|
||||||
|
|
||||||
|
must.Len(t, numberOfNodes, initialAllocs)
|
||||||
|
allocIDs := e2eutil.AllocIDsFromAllocationListStubs(initialAllocs)
|
||||||
|
|
||||||
|
// Wait for allocations to get past initial pending state
|
||||||
|
e2eutil.WaitForAllocsNotPending(t, nomad, allocIDs)
|
||||||
|
|
||||||
|
// Try to scale beyond 1
|
||||||
|
testMeta := map[string]any{"scaling-e2e-test": "value"}
|
||||||
|
scaleResp, _, err := nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(3),
|
||||||
|
"Nomad e2e testing", false, testMeta, nil)
|
||||||
|
|
||||||
|
must.ErrorContains(t, err, "can only be scaled between 0 and 1")
|
||||||
|
must.Nil(t, scaleResp)
|
||||||
|
|
||||||
|
// The same allocs should be running.
|
||||||
|
jobs = nomad.Jobs()
|
||||||
|
allocs1, _, err := jobs.Allocations(jobID, true, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
must.Eq(t, len(initialAllocs), len(allocs1))
|
||||||
|
for i, a := range allocs1 {
|
||||||
|
must.Eq(t, a.ID, initialAllocs[i].ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale down to 0
|
||||||
|
testMeta = map[string]any{"scaling-e2e-test": "value"}
|
||||||
|
scaleResp, _, err = nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(0),
|
||||||
|
"Nomad e2e testing", false, testMeta, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.NotEq(t, "", scaleResp.EvalID)
|
||||||
|
|
||||||
|
// Wait until allocs all stop
|
||||||
|
must.Wait(t, wait.InitialSuccess(
|
||||||
|
wait.BoolFunc(func() bool {
|
||||||
|
allocs, _, err := jobs.Allocations(jobID, false, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
stoppedAllocs := filterAllocsByDesiredStatus(
|
||||||
|
structs.AllocDesiredStatusStop, allocs)
|
||||||
|
return len(stoppedAllocs) == numberOfNodes
|
||||||
|
}),
|
||||||
|
wait.Timeout(10*time.Second),
|
||||||
|
wait.Gap(100*time.Millisecond),
|
||||||
|
), must.Sprint("allocs did not stop"))
|
||||||
|
|
||||||
|
// Scale up to 1 again
|
||||||
|
testMeta = map[string]any{"scaling-e2e-test": "value"}
|
||||||
|
scaleResp, _, err = nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(1),
|
||||||
|
"Nomad e2e testing", false, testMeta, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.NotEq(t, "", scaleResp.EvalID)
|
||||||
|
|
||||||
|
// Wait for new allocation to get past initial pending state
|
||||||
|
e2eutil.WaitForAllocsNotPending(t, nomad, allocIDs)
|
||||||
|
|
||||||
|
// Assert job is still running and there is a running allocation again
|
||||||
|
allocs, _, err := jobs.Allocations(jobID, true, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.Len(t, numberOfNodes*2, allocs)
|
||||||
|
must.Len(t, numberOfNodes,
|
||||||
|
filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, allocs))
|
||||||
|
must.Len(t, numberOfNodes,
|
||||||
|
filterAllocsByDesiredStatus(structs.AllocDesiredStatusRun, allocs))
|
||||||
|
}
|
||||||
|
|
||||||
|
func filterAllocsByDesiredStatus(status string, allocs []*api.AllocationListStub) []*api.AllocationListStub {
|
||||||
|
res := []*api.AllocationListStub{}
|
||||||
|
|
||||||
|
for _, a := range allocs {
|
||||||
|
if a.DesiredStatus == status {
|
||||||
|
res = append(res, a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user