From db2bdb4410ffc71a143fd8ae69ee31afe7439ca7 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 29 Mar 2018 11:35:45 -0500 Subject: [PATCH] More e2e tests --- e2e/rescheduling/input/norescheduling.hcl | 33 +++++---- e2e/rescheduling/input/reschedule_success.hcl | 24 +++--- .../input/rescheduling_canary.hcl | 34 +++++---- .../input/rescheduling_canary_autorevert.hcl | 34 +++++++++ .../input/rescheduling_default.hcl | 8 +- e2e/rescheduling/input/rescheduling_fail.hcl | 24 +++--- e2e/rescheduling/input/rescheduling_maxp.hcl | 35 +++++++++ .../input/rescheduling_maxp_autorevert.hcl | 35 +++++++++ .../input/rescheduling_system.hcl | 7 +- .../input/rescheduling_update.hcl | 31 ++++---- e2e/rescheduling/server_side_restarts_test.go | 73 +++++++++++++++++-- 11 files changed, 265 insertions(+), 73 deletions(-) create mode 100644 e2e/rescheduling/input/rescheduling_canary_autorevert.hcl create mode 100644 e2e/rescheduling/input/rescheduling_maxp.hcl create mode 100644 e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl diff --git a/e2e/rescheduling/input/norescheduling.hcl b/e2e/rescheduling/input/norescheduling.hcl index 63ca9a0df..940548b74 100644 --- a/e2e/rescheduling/input/norescheduling.hcl +++ b/e2e/rescheduling/input/norescheduling.hcl @@ -1,30 +1,35 @@ job "test1" { datacenters = ["dc1"] - type = "service" + type = "service" group "t1" { count = 3 + task "t1" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "lol 5000"] + command = "bash" + args = ["-c", "lol 5000"] } } + update { - max_parallel = 1 - min_healthy_time = "10s" - auto_revert = false + max_parallel = 1 + min_healthy_time = "10s" + auto_revert = false } + restart { - attempts = 0 - delay = "0s" - mode = "fail" - } - reschedule { attempts = 0 - interval = "5m" + delay = "0s" + mode = "fail" + } + + reschedule { + attempts = 0 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/reschedule_success.hcl b/e2e/rescheduling/input/reschedule_success.hcl index 3ac127e10..76b3561c5 100644 --- a/e2e/rescheduling/input/reschedule_success.hcl +++ b/e2e/rescheduling/input/reschedule_success.hcl @@ -1,25 +1,29 @@ job "test3" { datacenters = ["dc1"] - type = "service" + type = "service" group "t3" { count = 3 + task "t3" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "a=`cksum <<< \"${NOMAD_ALLOC_ID}\"| cut -d ' ' -f1`; if ! (( a % 2 )); then sleep 5000; else exit -1; fi"] + command = "bash" + args = ["-c", "a=`cksum <<< \"${NOMAD_ALLOC_ID}\"| cut -d ' ' -f1`; if ! (( a % 2 )); then sleep 5000; else exit -1; fi"] } } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 2 - interval = "5m" + attempts = 2 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_canary.hcl b/e2e/rescheduling/input/rescheduling_canary.hcl index 9979b3d7c..1a848ba75 100644 --- a/e2e/rescheduling/input/rescheduling_canary.hcl +++ b/e2e/rescheduling/input/rescheduling_canary.hcl @@ -1,31 +1,37 @@ job "test5" { datacenters = ["dc1"] - type = "service" + type = "service" group "t5" { count = 3 + task "t5" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "sleep 5000"] + command = "bash" + args = ["-c", "sleep 5000"] } } + update { - max_parallel = 1 - canary = 1 - min_healthy_time = "1s" - auto_revert = false + max_parallel = 1 + canary = 1 + min_healthy_time = "1s" + auto_revert = false } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 3 - interval = "5m" + attempts = 3 + interval = "5m" + delay = "5s" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl b/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl new file mode 100644 index 000000000..cef2e5a37 --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_canary_autorevert.hcl @@ -0,0 +1,34 @@ +job "test" { + datacenters = ["dc1"] + type = "service" + + group "t1" { + count = 3 + + task "t1" { + driver = "raw_exec" + + config { + command = "bash" + args = ["-c", "sleep 5000"] + } + } + + update { + canary = 3 + max_parallel = 1 + min_healthy_time = "1s" + healthy_deadline = "1m" + auto_revert = true + } + + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + unlimited = "true" + } + } +} diff --git a/e2e/rescheduling/input/rescheduling_default.hcl b/e2e/rescheduling/input/rescheduling_default.hcl index 56a829d7a..6f1f45c62 100644 --- a/e2e/rescheduling/input/rescheduling_default.hcl +++ b/e2e/rescheduling/input/rescheduling_default.hcl @@ -1,21 +1,23 @@ job "test" { datacenters = ["dc1"] - type = "service" + type = "service" group "t" { count = 3 + task "t" { driver = "raw_exec" + config { command = "bash" args = ["-c", "lol 5000"] } } + restart { attempts = 0 delay = "0s" mode = "fail" } - } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_fail.hcl b/e2e/rescheduling/input/rescheduling_fail.hcl index 02ea9c132..117069e5a 100644 --- a/e2e/rescheduling/input/rescheduling_fail.hcl +++ b/e2e/rescheduling/input/rescheduling_fail.hcl @@ -1,25 +1,29 @@ job "test2" { datacenters = ["dc1"] - type = "service" + type = "service" group "t2" { count = 3 + task "t2" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "lol 5000"] + command = "bash" + args = ["-c", "lol 5000"] } } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 2 - interval = "5m" + attempts = 2 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_maxp.hcl b/e2e/rescheduling/input/rescheduling_maxp.hcl new file mode 100644 index 000000000..2ab26b80f --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_maxp.hcl @@ -0,0 +1,35 @@ +job "demo2" { + datacenters = ["dc1"] + type = "service" + + group "t2" { + count = 3 + + task "t2" { + driver = "raw_exec" + + config { + command = "bash" + args = ["-c", "sleep 5000"] + } + } + + update { + max_parallel = 1 + min_healthy_time = "5s" + healthy_deadline = "10m" + auto_revert = false + } + + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + unlimited = "true" + + # attempts = 0 + } + } +} diff --git a/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl b/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl new file mode 100644 index 000000000..39056ae89 --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_maxp_autorevert.hcl @@ -0,0 +1,35 @@ +job "demo3" { + datacenters = ["dc1"] + type = "service" + + group "t2" { + count = 3 + + task "t2" { + driver = "raw_exec" + + config { + command = "bash" + args = ["-c", "ssleep 5000"] + } + } + + update { + max_parallel = 1 + min_healthy_time = "5s" + healthy_deadline = "10m" + auto_revert = true + } + + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + unlimited = "true" + + # attempts = 0 + } + } +} diff --git a/e2e/rescheduling/input/rescheduling_system.hcl b/e2e/rescheduling/input/rescheduling_system.hcl index 91f95fbd5..4291e860b 100644 --- a/e2e/rescheduling/input/rescheduling_system.hcl +++ b/e2e/rescheduling/input/rescheduling_system.hcl @@ -1,20 +1,23 @@ job "test" { datacenters = ["dc1"] - type = "system" + type = "system" group "t" { count = 1 + task "t" { driver = "raw_exec" + config { command = "bash" args = ["-c", "lol 5000"] } } + restart { attempts = 0 delay = "0s" mode = "fail" } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/input/rescheduling_update.hcl b/e2e/rescheduling/input/rescheduling_update.hcl index 844fa8718..d4ecd6481 100644 --- a/e2e/rescheduling/input/rescheduling_update.hcl +++ b/e2e/rescheduling/input/rescheduling_update.hcl @@ -1,30 +1,35 @@ job "test4" { datacenters = ["dc1"] - type = "service" + type = "service" group "t4" { count = 3 + task "t4" { driver = "raw_exec" + config { - command = "bash" - args = ["-c", "sleep 5000"] + command = "bash" + args = ["-c", "sleep 5000"] } } + update { - max_parallel = 1 - min_healthy_time = "10s" - auto_revert = false + max_parallel = 1 + min_healthy_time = "10s" + auto_revert = false } + restart { - attempts = 0 - delay = "0s" - mode = "fail" + attempts = 0 + delay = "0s" + mode = "fail" } + reschedule { - attempts = 3 - interval = "5m" + attempts = 3 + interval = "5m" unlimited = false - } + } } -} \ No newline at end of file +} diff --git a/e2e/rescheduling/server_side_restarts_test.go b/e2e/rescheduling/server_side_restarts_test.go index 96bfc1155..8298e390e 100644 --- a/e2e/rescheduling/server_side_restarts_test.go +++ b/e2e/rescheduling/server_side_restarts_test.go @@ -12,16 +12,18 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/structs" ) var _ = Describe("Server Side Restart Tests", func() { var ( - jobs *api.Jobs - system *api.System - job *api.Job - err error - specFile string + jobs *api.Jobs + system *api.System + deployments *api.Deployments + job *api.Job + err error + specFile string // allocStatuses is a helper function that pulls // out client statuses from a slice of allocs @@ -43,12 +45,27 @@ var _ = Describe("Server Side Restart Tests", func() { Expect(err).ShouldNot(HaveOccurred()) var ret []string for _, a := range allocs { - if a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0 { + if (a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0) || a.FollowupEvalID != "" { ret = append(ret, a.ClientStatus) } } return ret } + + // deploymentStatus is a helper function that returns deployment status of all deployments + // sorted by time + deploymentStatus = func() []string { + deploys, _, err := jobs.Deployments(*job.ID, nil) + Expect(err).ShouldNot(HaveOccurred()) + var ret []string + sort.Slice(deploys, func(i, j int) bool { + return deploys[i].CreateIndex < deploys[j].CreateIndex + }) + for _, d := range deploys { + ret = append(ret, d.Status) + } + return ret + } ) BeforeSuite(func() { @@ -59,6 +76,7 @@ var _ = Describe("Server Side Restart Tests", func() { Expect(err).ShouldNot(HaveOccurred()) jobs = client.Jobs() system = client.System() + deployments = client.Deployments() }) JustBeforeEach(func() { @@ -167,21 +185,62 @@ var _ = Describe("Server Side Restart Tests", func() { BeforeEach(func() { specFile = "input/rescheduling_canary.hcl" }) - It("Should have all running allocs", func() { + It("Should have running allocs and successful deployment", func() { Eventually(allocStatuses, 3*time.Second, time.Second).Should( ConsistOf([]string{"running", "running", "running"})) + + time.Sleep(2 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ContainElement(structs.DeploymentStatusSuccessful)) }) + Context("Updating job to make allocs fail", func() { It("Should have no rescheduled allocs", func() { job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} _, _, err := jobs.Register(job, nil) Expect(err).ShouldNot(HaveOccurred()) Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) + + // Verify new deployment and its status + time.Sleep(3 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ContainElement(structs.DeploymentStatusFailed)) }) }) }) + Context("Reschedule with canary and auto revert ", func() { + BeforeEach(func() { + specFile = "input/rescheduling_canary_autorevert.hcl" + }) + It("Should have running allocs and successful deployment", func() { + Eventually(allocStatuses, 3*time.Second, time.Second).Should( + ConsistOf([]string{"running", "running", "running"})) + + time.Sleep(4 * time.Second) + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ContainElement(structs.DeploymentStatusSuccessful)) + + // Make an update that causes the job to fail + job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} + _, _, err := jobs.Register(job, nil) + Expect(err).ShouldNot(HaveOccurred()) + Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) + + // Wait for the revert + Eventually(allocStatuses, 3*time.Second, time.Second).Should( + ConsistOf([]string{"failed", "failed", "failed", "running", "running", "running"})) + + // Verify new deployment and its status + // There should be one successful, one failed, and one more successful (after revert) + time.Sleep(5 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs + Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( + ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful)) + }) + + }) + }) })