diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 13f548f3c..17242c74e 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -23,7 +23,6 @@ import ( _ "github.com/hashicorp/nomad/e2e/parameterized" _ "github.com/hashicorp/nomad/e2e/periodic" _ "github.com/hashicorp/nomad/e2e/quotas" - _ "github.com/hashicorp/nomad/e2e/scaling" _ "github.com/hashicorp/nomad/e2e/scalingpolicies" _ "github.com/hashicorp/nomad/e2e/scheduler_sysbatch" _ "github.com/hashicorp/nomad/e2e/scheduler_system" @@ -44,6 +43,7 @@ import ( _ "github.com/hashicorp/nomad/e2e/oversubscription" _ "github.com/hashicorp/nomad/e2e/podman" _ "github.com/hashicorp/nomad/e2e/rescheduling" + _ "github.com/hashicorp/nomad/e2e/scaling" _ "github.com/hashicorp/nomad/e2e/spread" _ "github.com/hashicorp/nomad/e2e/vaultsecrets" _ "github.com/hashicorp/nomad/e2e/volume_mounts" diff --git a/e2e/e2eutil/job.go b/e2e/e2eutil/job.go index 505e8476b..6559c0e58 100644 --- a/e2e/e2eutil/job.go +++ b/e2e/e2eutil/job.go @@ -240,6 +240,20 @@ func MaybeCleanupJobsAndGC(jobIDs *[]string) func() { } } +// MaybeCleanupNamespacedJobsAndGC stops and purges the list of jobIDs in the namespace and runs a +// system gc. Returns a func so that the return value can be used +// in t.Cleanup. Similar to CleanupJobsAndGC, but this one does not assert +// on a successful stop and gc, which is useful for tests that want to stop and +// gc the jobs themselves but we want a backup Cleanup just in case. +func MaybeCleanupNamespacedJobsAndGC(ns string, jobIDs []string) func() { + return func() { + for _, jobID := range jobIDs { + _ = StopJob(jobID, "-namespace", ns, "-purge", "-detach") + } + _, _ = Command("nomad", "system", "gc") + } +} + // CleanupJobsAndGCWithContext stops and purges the list of jobIDs and runs a // system gc. The passed context allows callers to cancel the execution of the // cleanup as they desire. This is useful for tests which attempt to remove the diff --git a/e2e/scaling/doc.go b/e2e/scaling/doc.go new file mode 100644 index 000000000..b5ee24921 --- /dev/null +++ b/e2e/scaling/doc.go @@ -0,0 +1,8 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +// Package scaling provides end-to-end tests for scaling Nomad workloads. +// +// In order to run this test suite only, from the e2e directory you can trigger +// go test -v ./spread +package scaling diff --git a/e2e/scaling/input/namespace_default_1.nomad b/e2e/scaling/input/namespace_a_1.nomad.hcl similarity index 91% rename from e2e/scaling/input/namespace_default_1.nomad rename to e2e/scaling/input/namespace_a_1.nomad.hcl index 445aeb6b1..ed2e8795c 100644 --- a/e2e/scaling/input/namespace_default_1.nomad +++ b/e2e/scaling/input/namespace_a_1.nomad.hcl @@ -2,8 +2,7 @@ # SPDX-License-Identifier: BUSL-1.1 job "horizontally_scalable" { - datacenters = ["dc1"] - type = "service" + namespace = "NamespaceScalingTestA" update { health_check = "task_states" diff --git a/e2e/scaling/input/namespace_a_1.nomad b/e2e/scaling/input/namespace_default_1.nomad.hcl similarity index 86% rename from e2e/scaling/input/namespace_a_1.nomad rename to e2e/scaling/input/namespace_default_1.nomad.hcl index 25363b26e..5febce6d7 100644 --- a/e2e/scaling/input/namespace_a_1.nomad +++ b/e2e/scaling/input/namespace_default_1.nomad.hcl @@ -2,9 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "horizontally_scalable" { - datacenters = ["dc1"] - type = "service" - namespace = "NamespaceA" update { health_check = "task_states" diff --git a/e2e/scaling/input/namespace_default_2.nomad b/e2e/scaling/input/namespace_default_2.nomad.hcl similarity index 91% rename from e2e/scaling/input/namespace_default_2.nomad rename to e2e/scaling/input/namespace_default_2.nomad.hcl index afe3b8ef4..b14004ca4 100644 --- a/e2e/scaling/input/namespace_default_2.nomad +++ b/e2e/scaling/input/namespace_default_2.nomad.hcl @@ -2,8 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "horizontally_scalable" { - datacenters = ["dc1"] - type = "service" update { health_check = "task_states" diff --git a/e2e/scaling/input/namespace_default_3.nomad b/e2e/scaling/input/namespace_default_3.nomad.hcl similarity index 91% rename from e2e/scaling/input/namespace_default_3.nomad rename to e2e/scaling/input/namespace_default_3.nomad.hcl index b963fcf04..70aa90a56 100644 --- a/e2e/scaling/input/namespace_default_3.nomad +++ b/e2e/scaling/input/namespace_default_3.nomad.hcl @@ -2,8 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "horizontally_scalable" { - datacenters = ["dc1"] - type = "service" update { health_check = "task_states" diff --git a/e2e/scaling/input/namespace_default_system.nomad b/e2e/scaling/input/namespace_default_system.nomad.hcl similarity index 81% rename from e2e/scaling/input/namespace_default_system.nomad rename to e2e/scaling/input/namespace_default_system.nomad.hcl index 75a22af86..773a8aefd 100644 --- a/e2e/scaling/input/namespace_default_system.nomad +++ b/e2e/scaling/input/namespace_default_system.nomad.hcl @@ -4,6 +4,11 @@ job "system_job" { type = "system" + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + group "system_job_group" { task "system_task" { @@ -22,4 +27,3 @@ job "system_job" { } } } - diff --git a/e2e/scaling/scaling.go b/e2e/scaling/scaling.go deleted file mode 100644 index 5b3580e03..000000000 --- a/e2e/scaling/scaling.go +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: BUSL-1.1 - -package scaling - -import ( - "os" - - "github.com/hashicorp/nomad/api" - "github.com/hashicorp/nomad/e2e/e2eutil" - "github.com/hashicorp/nomad/e2e/framework" - "github.com/hashicorp/nomad/helper/pointer" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/nomad/structs" -) - -type ScalingE2ETest struct { - framework.TC - namespaceIDs []string - namespacedJobIDs [][2]string -} - -func init() { - framework.AddSuites(&framework.TestSuite{ - Component: "Scaling", - CanRunLocal: true, - Cases: []framework.TestCase{ - new(ScalingE2ETest), - }, - }) -} - -func (tc *ScalingE2ETest) BeforeAll(f *framework.F) { - e2eutil.WaitForLeader(f.T(), tc.Nomad()) - e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) -} - -func (tc *ScalingE2ETest) AfterEach(f *framework.F) { - if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" { - return - } - - for _, namespacedJob := range tc.namespacedJobIDs { - err := e2eutil.StopJob(namespacedJob[1], "-purge", "-namespace", - namespacedJob[0]) - f.NoError(err) - } - tc.namespacedJobIDs = [][2]string{} - - for _, ns := range tc.namespaceIDs { - _, err := e2eutil.Command("nomad", "namespace", "delete", ns) - f.NoError(err) - } - tc.namespaceIDs = []string{} - - _, err := e2eutil.Command("nomad", "system", "gc") - f.NoError(err) -} - -// TestScalingBasic performs basic scaling e2e tests within a single namespace. -func (tc *ScalingE2ETest) TestScalingBasic(f *framework.F) { - defaultNS := "default" - - // Register a job with a scaling policy. The group doesn't include the - // count parameter, therefore Nomad should dynamically set this value to - // the policy min. - jobID := "test-scaling-" + uuid.Generate()[0:8] - f.NoError(e2eutil.Register(jobID, "scaling/input/namespace_default_1.nomad")) - tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{defaultNS, jobID}) - f.NoError(e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running"}), - "job should be running with 2 allocs") - - // Ensure we wait for the deployment to finish, otherwise scaling will - // fail. - f.NoError(e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil)) - - // Simple scaling action. - testMeta := map[string]interface{}{"scaling-e2e-test": "value"} - scaleResp, _, err := tc.Nomad().Jobs().Scale( - jobID, "horizontally_scalable", pointer.Of(3), - "Nomad e2e testing", false, testMeta, nil) - f.NoError(err) - f.NotEmpty(scaleResp.EvalID) - f.NoError(e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running", "running"}), - "job should be running with 3 allocs") - - // Ensure we wait for the deployment to finish, otherwise scaling will - // fail for this reason. - f.NoError(e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil)) - - // Attempt break break the policy min/max parameters. - _, _, err = tc.Nomad().Jobs().Scale( - jobID, "horizontally_scalable", pointer.Of(4), - "Nomad e2e testing", false, nil, nil) - f.Error(err) - _, _, err = tc.Nomad().Jobs().Scale( - jobID, "horizontally_scalable", pointer.Of(1), - "Nomad e2e testing", false, nil, nil) - f.Error(err) - - // Check the scaling events. - statusResp, _, err := tc.Nomad().Jobs().ScaleStatus(jobID, nil) - f.NoError(err) - f.Len(statusResp.TaskGroups["horizontally_scalable"].Events, 1) - f.Equal(testMeta, statusResp.TaskGroups["horizontally_scalable"].Events[0].Meta) - - // Remove the job. - _, _, err = tc.Nomad().Jobs().Deregister(jobID, true, nil) - f.NoError(err) - f.NoError(tc.Nomad().System().GarbageCollect()) - tc.namespacedJobIDs = [][2]string{} - - // Attempt job registrations where the group count violates the policy - // min/max parameters. - f.Error(e2eutil.Register(jobID, "scaling/input/namespace_default_2.nomad")) - f.Error(e2eutil.Register(jobID, "scaling/input/namespace_default_3.nomad")) -} - -// TestScalingNamespaces runs tests to ensure the job scaling endpoint adheres -// to Nomad's basic namespace principles. -func (tc *ScalingE2ETest) TestScalingNamespaces(f *framework.F) { - - defaultNS := "default" - ANS := "NamespaceA" - - // Create our non-default namespace. - _, err := e2eutil.Command("nomad", "namespace", "apply", ANS) - f.NoError(err, "could not create namespace") - tc.namespaceIDs = append(tc.namespaceIDs, ANS) - - defaultJobID := "test-scaling-default-" + uuid.Generate()[0:8] - aJobID := "test-scaling-a-" + uuid.Generate()[0:8] - - // Register and wait for the job deployments to succeed. - f.NoError(e2eutil.Register(defaultJobID, "scaling/input/namespace_default_1.nomad")) - f.NoError(e2eutil.Register(aJobID, "scaling/input/namespace_a_1.nomad")) - f.NoError(e2eutil.WaitForLastDeploymentStatus(defaultJobID, defaultNS, "successful", nil)) - f.NoError(e2eutil.WaitForLastDeploymentStatus(aJobID, ANS, "successful", nil)) - - tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{defaultNS, defaultJobID}) - tc.namespacedJobIDs = append(tc.namespacedJobIDs, [2]string{ANS, aJobID}) - - // Setup the WriteOptions for each namespace. - defaultWriteOpts := api.WriteOptions{Namespace: defaultNS} - aWriteOpts := api.WriteOptions{Namespace: ANS} - - // We shouldn't be able to trigger scaling across the namespace boundary. - _, _, err = tc.Nomad().Jobs().Scale( - defaultJobID, "horizontally_scalable", pointer.Of(3), - "Nomad e2e testing", false, nil, &aWriteOpts) - f.Error(err) - _, _, err = tc.Nomad().Jobs().Scale( - aJobID, "horizontally_scalable", pointer.Of(3), - "Nomad e2e testing", false, nil, &defaultWriteOpts) - f.Error(err) - - // We should be able to trigger scaling when using the correct namespace, - // duh. - _, _, err = tc.Nomad().Jobs().Scale( - defaultJobID, "horizontally_scalable", pointer.Of(3), - "Nomad e2e testing", false, nil, &defaultWriteOpts) - f.NoError(err) - _, _, err = tc.Nomad().Jobs().Scale( - aJobID, "horizontally_scalable", pointer.Of(3), - "Nomad e2e testing", false, nil, &aWriteOpts) - f.NoError(err) -} - -// TestScalingBasic performs basic scaling e2e tests within a single namespace using -// using a SystemScheduler. -func (tc *ScalingE2ETest) TestScalingBasicWithSystemSchedule(f *framework.F) { - t := f.T() - nomadClient := tc.Nomad() - - // Register a system job with a scaling policy without a group count, it should - // default to 1 per node. - - jobID := "test-scaling-" + uuid.Generate()[0:8] - e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scaling/input/namespace_default_system.nomad", jobID, "") - - jobs := nomadClient.Jobs() - initialAllocs, _, err := jobs.Allocations(jobID, true, nil) - f.NoError(err) - - nodeStubList, _, err := nomadClient.Nodes().List(&api.QueryOptions{Namespace: "default"}) - f.NoError(err) - - // A system job will spawn an allocation per node, we need to know how many nodes - // there are to know how many allocations to expect. - numberOfNodes := len(nodeStubList) - - f.Equal(numberOfNodes, len(initialAllocs)) - allocIDs := e2eutil.AllocIDsFromAllocationListStubs(initialAllocs) - - // Wait for allocations to get past initial pending state - e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) - - // Try to scale beyond 1 - testMeta := map[string]interface{}{"scaling-e2e-test": "value"} - scaleResp, _, err := tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(3), - "Nomad e2e testing", false, testMeta, nil) - - f.Error(err) - f.Nil(scaleResp) - - // The same allocs should be running. - jobs = nomadClient.Jobs() - allocs1, _, err := jobs.Allocations(jobID, true, nil) - f.NoError(err) - - f.Equal(len(initialAllocs), len(allocs1)) - - for i, a := range allocs1 { - f.Equal(a.ID, initialAllocs[i].ID) - } - - // Scale down to 0 - testMeta = map[string]interface{}{"scaling-e2e-test": "value"} - scaleResp, _, err = tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(0), - "Nomad e2e testing", false, testMeta, nil) - f.NoError(err) - f.NotEmpty(scaleResp.EvalID) - - // Assert job is still up but no allocs are running - stopedAllocs, _, err := jobs.Allocations(jobID, false, nil) - f.NoError(err) - - f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, stopedAllocs))) - f.Equal(numberOfNodes, len(stopedAllocs)) - - // Scale up to 1 again - testMeta = map[string]interface{}{"scaling-e2e-test": "value"} - scaleResp, _, err = tc.Nomad().Jobs().Scale(jobID, "system_job_group", pointer.Of(1), - "Nomad e2e testing", false, testMeta, nil) - f.NoError(err) - f.NotEmpty(scaleResp.EvalID) - - // Wait for new allocation to get past initial pending state - e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) - - // Assert job is still running and there is a running allocation again - allocs, _, err := jobs.Allocations(jobID, true, nil) - f.NoError(err) - f.Equal(numberOfNodes*2, len(allocs)) - - f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, allocs))) - f.Equal(numberOfNodes, len(filterAllocsByDesiredStatus(structs.AllocDesiredStatusRun, allocs))) - - // Remove the job. - _, _, err = tc.Nomad().Jobs().Deregister(jobID, true, nil) - f.NoError(err) - f.NoError(tc.Nomad().System().GarbageCollect()) -} - -func filterAllocsByDesiredStatus(status string, allocs []*api.AllocationListStub) []*api.AllocationListStub { - res := []*api.AllocationListStub{} - - for _, a := range allocs { - if a.DesiredStatus == status { - res = append(res, a) - } - } - - return res -} diff --git a/e2e/scaling/scaling_test.go b/e2e/scaling/scaling_test.go new file mode 100644 index 000000000..5ab9f0468 --- /dev/null +++ b/e2e/scaling/scaling_test.go @@ -0,0 +1,240 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package scaling + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/e2e/e2eutil" + "github.com/hashicorp/nomad/e2e/v3/cluster3" + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" +) + +const defaultNS = "default" + +func TestScaling(t *testing.T) { + cluster3.Establish(t, + cluster3.Leader(), + cluster3.LinuxClients(1), + cluster3.Timeout(3*time.Second), + ) + + // Run our test cases. + t.Run("TestScaling_Basic", testScalingBasic) + t.Run("TestScaling_Namespaces", testScalingNamespaces) + t.Run("TestScaling_System", testScalingSystemJob) +} + +func testScalingBasic(t *testing.T) { + nomad := e2eutil.NomadClient(t) + + jobID := "scaling-basic-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.MaybeCleanupJobsAndGC(&jobIDs)) + + // start job + allocs := e2eutil.RegisterAndWaitForAllocs(t, + nomad, "./input/namespace_default_1.nomad.hcl", jobID, "") + must.Len(t, 2, allocs, must.Sprint("expected 2 allocs")) + + // Ensure we wait for the deployment to finish, otherwise scaling will fail. + must.NoError(t, e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil)) + + // Simple scaling action. + testMeta := map[string]any{"scaling-e2e-test": "value"} + scaleResp, _, err := nomad.Jobs().Scale( + jobID, "horizontally_scalable", pointer.Of(3), + "Nomad e2e testing", false, testMeta, nil) + must.NoError(t, err) + must.NotEq(t, "", scaleResp.EvalID) + must.NoError(t, e2eutil.WaitForAllocStatusExpected(jobID, defaultNS, []string{"running", "running", "running"}), + must.Sprint("job should be running with 3 allocs")) + + // Ensure we wait for the deployment to finish, otherwise scaling will + // fail for this reason. + must.NoError(t, e2eutil.WaitForLastDeploymentStatus(jobID, defaultNS, "successful", nil)) + + // Attempt break break the policy min/max parameters. + _, _, err = nomad.Jobs().Scale( + jobID, "horizontally_scalable", pointer.Of(4), + "Nomad e2e testing", false, nil, nil) + must.ErrorContains(t, err, "group count was greater than scaling policy maximum") + _, _, err = nomad.Jobs().Scale( + jobID, "horizontally_scalable", pointer.Of(1), + "Nomad e2e testing", false, nil, nil) + must.ErrorContains(t, err, "group count was less than scaling policy minimum") + + // Check the scaling events. + statusResp, _, err := nomad.Jobs().ScaleStatus(jobID, nil) + must.NoError(t, err) + must.Len(t, 1, statusResp.TaskGroups["horizontally_scalable"].Events) + must.Eq(t, testMeta, statusResp.TaskGroups["horizontally_scalable"].Events[0].Meta) + + // Remove the job. + _, _, err = nomad.Jobs().Deregister(jobID, true, nil) + must.NoError(t, err) + must.NoError(t, nomad.System().GarbageCollect()) + + // Attempt job registrations where the group count violates the policy + // min/max parameters. + err = e2eutil.Register(jobID, "input/namespace_default_2.nomad.hcl") + must.ErrorContains(t, err, "task group count must not be greater than maximum count") + must.Error(t, e2eutil.Register(jobID, "input/namespace_default_3.nomad.hcl")) +} + +func testScalingNamespaces(t *testing.T) { + nomad := e2eutil.NomadClient(t) + + // Create our non-default namespace. + ANS := "NamespaceScalingTestA" + _, err := e2eutil.Command("nomad", "namespace", "apply", ANS) + must.NoError(t, err, must.Sprint("could not create namespace")) + e2eutil.CleanupCommand(t, "nomad namespace delete %s", ANS) + + defaultJobID := "test-scaling-default-" + uuid.Generate()[0:8] + aJobID := "test-scaling-a-" + uuid.Generate()[0:8] + + // Register and wait for the job deployments to succeed. + must.NoError(t, e2eutil.Register(defaultJobID, "input/namespace_default_1.nomad.hcl")) + must.NoError(t, e2eutil.Register(aJobID, "input/namespace_a_1.nomad.hcl")) + must.NoError(t, e2eutil.WaitForLastDeploymentStatus(defaultJobID, defaultNS, "successful", nil)) + must.NoError(t, e2eutil.WaitForLastDeploymentStatus(aJobID, ANS, "successful", nil)) + + t.Cleanup(e2eutil.MaybeCleanupNamespacedJobsAndGC(ANS, []string{aJobID})) + t.Cleanup(e2eutil.MaybeCleanupJobsAndGC(&[]string{defaultJobID})) + + // Setup the WriteOptions for each namespace. + defaultWriteOpts := api.WriteOptions{Namespace: defaultNS} + aWriteOpts := api.WriteOptions{Namespace: ANS} + + // We shouldn't be able to trigger scaling across the namespace boundary. + _, _, err = nomad.Jobs().Scale( + defaultJobID, "horizontally_scalable", pointer.Of(3), + "Nomad e2e testing", false, nil, &aWriteOpts) + must.ErrorContains(t, err, "not found") + _, _, err = nomad.Jobs().Scale( + aJobID, "horizontally_scalable", pointer.Of(3), + "Nomad e2e testing", false, nil, &defaultWriteOpts) + must.ErrorContains(t, err, "not found") + + // We should be able to trigger scaling when using the correct namespace, + // duh. + _, _, err = nomad.Jobs().Scale( + defaultJobID, "horizontally_scalable", pointer.Of(3), + "Nomad e2e testing", false, nil, &defaultWriteOpts) + must.NoError(t, err) + _, _, err = nomad.Jobs().Scale( + aJobID, "horizontally_scalable", pointer.Of(3), + "Nomad e2e testing", false, nil, &aWriteOpts) + must.NoError(t, err) +} + +func testScalingSystemJob(t *testing.T) { + nomad := e2eutil.NomadClient(t) + + // Register a system job with a scaling policy without a group count, it + // should default to 1 per node. + + jobID := "test-scaling-" + uuid.Generate()[0:8] + e2eutil.RegisterAndWaitForAllocs(t, nomad, + "input/namespace_default_system.nomad.hcl", jobID, "") + + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &[]string{jobID})) + + jobs := nomad.Jobs() + initialAllocs, _, err := jobs.Allocations(jobID, true, nil) + must.NoError(t, err) + + // A system job will spawn an allocation per feasible node, we need to know + // how many nodes there are to know how many allocations to expect. + nodeStubList, _, err := nomad.Nodes().List( + &api.QueryOptions{ + Namespace: "default", + Params: map[string]string{"os": "true"}, + Filter: `Attributes["os.name"] == "ubuntu"`, + }) + must.NoError(t, err) + numberOfNodes := len(nodeStubList) + + must.Len(t, numberOfNodes, initialAllocs) + allocIDs := e2eutil.AllocIDsFromAllocationListStubs(initialAllocs) + + // Wait for allocations to get past initial pending state + e2eutil.WaitForAllocsNotPending(t, nomad, allocIDs) + + // Try to scale beyond 1 + testMeta := map[string]any{"scaling-e2e-test": "value"} + scaleResp, _, err := nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(3), + "Nomad e2e testing", false, testMeta, nil) + + must.ErrorContains(t, err, "can only be scaled between 0 and 1") + must.Nil(t, scaleResp) + + // The same allocs should be running. + jobs = nomad.Jobs() + allocs1, _, err := jobs.Allocations(jobID, true, nil) + must.NoError(t, err) + + must.Eq(t, len(initialAllocs), len(allocs1)) + for i, a := range allocs1 { + must.Eq(t, a.ID, initialAllocs[i].ID) + } + + // Scale down to 0 + testMeta = map[string]any{"scaling-e2e-test": "value"} + scaleResp, _, err = nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(0), + "Nomad e2e testing", false, testMeta, nil) + must.NoError(t, err) + must.NotEq(t, "", scaleResp.EvalID) + + // Wait until allocs all stop + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(func() bool { + allocs, _, err := jobs.Allocations(jobID, false, nil) + must.NoError(t, err) + stoppedAllocs := filterAllocsByDesiredStatus( + structs.AllocDesiredStatusStop, allocs) + return len(stoppedAllocs) == numberOfNodes + }), + wait.Timeout(10*time.Second), + wait.Gap(100*time.Millisecond), + ), must.Sprint("allocs did not stop")) + + // Scale up to 1 again + testMeta = map[string]any{"scaling-e2e-test": "value"} + scaleResp, _, err = nomad.Jobs().Scale(jobID, "system_job_group", pointer.Of(1), + "Nomad e2e testing", false, testMeta, nil) + must.NoError(t, err) + must.NotEq(t, "", scaleResp.EvalID) + + // Wait for new allocation to get past initial pending state + e2eutil.WaitForAllocsNotPending(t, nomad, allocIDs) + + // Assert job is still running and there is a running allocation again + allocs, _, err := jobs.Allocations(jobID, true, nil) + must.NoError(t, err) + must.Len(t, numberOfNodes*2, allocs) + must.Len(t, numberOfNodes, + filterAllocsByDesiredStatus(structs.AllocDesiredStatusStop, allocs)) + must.Len(t, numberOfNodes, + filterAllocsByDesiredStatus(structs.AllocDesiredStatusRun, allocs)) +} + +func filterAllocsByDesiredStatus(status string, allocs []*api.AllocationListStub) []*api.AllocationListStub { + res := []*api.AllocationListStub{} + + for _, a := range allocs { + if a.DesiredStatus == status { + res = append(res, a) + } + } + + return res +}