e2e: refactor CLI utils out of rescheduling test (#8905)

The CLI helpers in the rescheduling test were intended for shared use, but
until some other tests were written we didn't want to waste time making them
generic. This changeset refactors them and adds some new helpers associated
with the node drain tests (under separate PR).
This commit is contained in:
Tim Gross
2020-09-16 16:10:06 -04:00
committed by GitHub
parent d60071c20d
commit 2ec1eb4ec6
7 changed files with 443 additions and 270 deletions

146
e2e/e2eutil/allocs.go Normal file
View File

@@ -0,0 +1,146 @@
package e2eutil
import (
"encoding/json"
"fmt"
"reflect"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/testutil"
)
// WaitForAllocStatusExpected polls 'nomad job status' and exactly compares
// the status of all allocations (including any previous versions) against the
// expected list.
func WaitForAllocStatusExpected(jobID string, expected []string) error {
return WaitForAllocStatusComparison(
func() ([]string, error) { return AllocStatuses(jobID) },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
nil,
)
}
// WaitForAllocStatusComparison is a convenience wrapper that polls the query
// function until the comparison function returns true.
func WaitForAllocStatusComparison(query func() ([]string, error), comparison func([]string) bool, wc *WaitConfig) error {
var got []string
var err error
interval, retries := wc.OrDefault()
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(interval)
got, err = query()
if err != nil {
return false, err
}
return comparison(got), nil
}, func(e error) {
err = fmt.Errorf("alloc status check failed: got %#v", got)
})
return err
}
// AllocsForJob returns a slice of key->value maps, each describing the values
// of the 'nomad job status' Allocations section (not actual
// structs.Allocation objects, query the API if you want those)
func AllocsForJob(jobID string) ([]map[string]string, error) {
out, err := Command("nomad", "job", "status", "-verbose", "-all-allocs", jobID)
if err != nil {
return nil, fmt.Errorf("'nomad job status' failed: %w", err)
}
section, err := GetSection(out, "Allocations")
if err != nil {
return nil, fmt.Errorf("could not find Allocations section: %w", err)
}
allocs, err := ParseColumns(section)
if err != nil {
return nil, fmt.Errorf("could not parse Allocations section: %w", err)
}
return allocs, nil
}
// AllocsForNode returns a slice of key->value maps, each describing the values
// of the 'nomad node status' Allocations section (not actual
// structs.Allocation objects, query the API if you want those)
func AllocsForNode(nodeID string) ([]map[string]string, error) {
out, err := Command("nomad", "node", "status", "-verbose", nodeID)
if err != nil {
return nil, fmt.Errorf("'nomad node status' failed: %w", err)
}
section, err := GetSection(out, "Allocations")
if err != nil {
return nil, fmt.Errorf("could not find Allocations section: %w", err)
}
allocs, err := ParseColumns(section)
if err != nil {
return nil, fmt.Errorf("could not parse Allocations section: %w", err)
}
return allocs, nil
}
// AllocStatuses returns a slice of client statuses
func AllocStatuses(jobID string) ([]string, error) {
allocs, err := AllocsForJob(jobID)
if err != nil {
return nil, err
}
statuses := []string{}
for _, alloc := range allocs {
statuses = append(statuses, alloc["Status"])
}
return statuses, nil
}
// AllocStatusesRescheduled is a helper function that pulls
// out client statuses only from rescheduled allocs.
func AllocStatusesRescheduled(jobID string) ([]string, error) {
out, err := Command("nomad", "job", "status", "-verbose", jobID)
if err != nil {
return nil, fmt.Errorf("nomad job status failed: %w", err)
}
section, err := GetSection(out, "Allocations")
if err != nil {
return nil, fmt.Errorf("could not find Allocations section: %w", err)
}
allocs, err := ParseColumns(section)
if err != nil {
return nil, fmt.Errorf("could not parse Allocations section: %w", err)
}
statuses := []string{}
for _, alloc := range allocs {
allocID := alloc["ID"]
// reschedule tracker isn't exposed in the normal CLI output
out, err := Command("nomad", "alloc", "status", "-json", allocID)
if err != nil {
return nil, fmt.Errorf("nomad alloc status failed: %w", err)
}
dec := json.NewDecoder(strings.NewReader(out))
alloc := &api.Allocation{}
err = dec.Decode(alloc)
if err != nil {
return nil, fmt.Errorf("could not decode alloc status JSON: %w", err)
}
if (alloc.RescheduleTracker != nil &&
len(alloc.RescheduleTracker.Events) > 0) || alloc.FollowupEvalID != "" {
statuses = append(statuses, alloc.ClientStatus)
}
}
return statuses, nil
}

View File

@@ -0,0 +1,38 @@
package e2eutil
import (
"fmt"
"time"
"github.com/hashicorp/nomad/testutil"
)
func WaitForLastDeploymentStatus(jobID, status string, wc *WaitConfig) error {
var got string
var err error
interval, retries := wc.OrDefault()
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(interval)
out, err := Command("nomad", "job", "status", jobID)
if err != nil {
return false, fmt.Errorf("could not get job status: %v\n%v", err, out)
}
section, err := GetSection(out, "Latest Deployment")
if err != nil {
return false, fmt.Errorf("could not find Latest Deployment section: %w", err)
}
fields, err := ParseFields(section)
if err != nil {
return false, fmt.Errorf("could not parse Latest Deployment section: %w", err)
}
got = fields["Status"]
return got == status, nil
}, func(e error) {
err = fmt.Errorf("deployment status check failed: got %#v", got)
})
return err
}

41
e2e/e2eutil/job.go Normal file
View File

@@ -0,0 +1,41 @@
package e2eutil
import (
"fmt"
"io"
"io/ioutil"
"os/exec"
"regexp"
)
// Register registers a jobspec from a file but with a unique ID.
// The caller is responsible for recording that ID for later cleanup.
func Register(jobID, jobFilePath string) error {
cmd := exec.Command("nomad", "job", "run", "-")
stdin, err := cmd.StdinPipe()
if err != nil {
return fmt.Errorf("could not open stdin?: %w", err)
}
content, err := ioutil.ReadFile(jobFilePath)
if err != nil {
return fmt.Errorf("could not open job file: %w", err)
}
// hack off the first line to replace with our unique ID
var re = regexp.MustCompile(`^job "\w+" \{`)
jobspec := re.ReplaceAllString(string(content),
fmt.Sprintf("job \"%s\" {", jobID))
go func() {
defer stdin.Close()
io.WriteString(stdin, jobspec)
}()
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("could not register job: %w\n%v", err, string(out))
}
return nil
}

View File

@@ -144,3 +144,43 @@ func listClientNodesByOS(client *api.Client, osName string) ([]string, error) {
}
return nodeIDs, nil
}
func NodeStatusList() ([]map[string]string, error) {
out, err := Command("nomad", "node", "status", "-verbose")
if err != nil {
return nil, fmt.Errorf("'nomad node status' failed: %w", err)
}
nodes, err := ParseColumns(out)
if err != nil {
return nil, fmt.Errorf("could not parse node status output: %w", err)
}
return nodes, nil
}
func NodeStatusListFiltered(filterFn func(string) bool) ([]map[string]string, error) {
out, err := Command("nomad", "node", "status", "-verbose")
if err != nil {
return nil, fmt.Errorf("'nomad node status' failed: %w", err)
}
allNodes, err := ParseColumns(out)
if err != nil {
return nil, fmt.Errorf("could not parse node status output: %w", err)
}
nodes := []map[string]string{}
for _, node := range allNodes {
out, err := Command("nomad", "node", "status", "-verbose", node["ID"])
if err != nil {
return nil, fmt.Errorf("could not node status output: %w", err)
}
if filterFn(out) {
nodes = append(nodes, node)
}
}
return nodes, nil
}

25
e2e/e2eutil/wait.go Normal file
View File

@@ -0,0 +1,25 @@
package e2eutil
import "time"
// WaitConfig is an interval and wait time that can be passed to a waiter
// function, but with a default value that comes from the OrDefault method
// if the config is nil
type WaitConfig struct {
Interval time.Duration
Retries int64
}
// Return a default wait config of 10s
func (wc *WaitConfig) OrDefault() (time.Duration, int64) {
if wc == nil {
return time.Millisecond * 100, 100
}
if wc.Interval == 0 {
wc.Interval = time.Millisecond * 100
}
if wc.Retries == 0 {
wc.Retries = 100
}
return wc.Interval, wc.Retries
}

View File

@@ -1,134 +0,0 @@
package rescheduling
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os/exec"
"regexp"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/testutil"
)
// allocStatuses returns a slice of client statuses
func allocStatuses(f *framework.F, jobID string) []string {
out, err := e2eutil.Command("nomad", "job", "status", "-verbose", "-all-allocs", jobID)
f.NoError(err, "nomad job status failed", err)
section, err := e2eutil.GetSection(out, "Allocations")
f.NoError(err, "could not find Allocations section", err)
allocs, err := e2eutil.ParseColumns(section)
f.NoError(err, "could not parse Allocations section", err)
statuses := []string{}
for _, alloc := range allocs {
statuses = append(statuses, alloc["Status"])
}
return statuses
}
// allocStatusesRescheduled is a helper function that pulls
// out client statuses only from rescheduled allocs.
func allocStatusesRescheduled(f *framework.F, jobID string) []string {
out, err := e2eutil.Command("nomad", "job", "status", "-verbose", jobID)
f.NoError(err, "nomad job status failed", err)
section, err := e2eutil.GetSection(out, "Allocations")
f.NoError(err, "could not find Allocations section", err)
allocs, err := e2eutil.ParseColumns(section)
f.NoError(err, "could not parse Allocations section", err)
statuses := []string{}
for _, alloc := range allocs {
allocID := alloc["ID"]
// reschedule tracker isn't exposed in the normal CLI output
out, err := e2eutil.Command("nomad", "alloc", "status", "-json", allocID)
f.NoError(err, "nomad alloc status failed", err)
dec := json.NewDecoder(strings.NewReader(out))
alloc := &api.Allocation{}
err = dec.Decode(alloc)
f.NoError(err, "could not decode alloc status JSON: %w", err)
if (alloc.RescheduleTracker != nil &&
len(alloc.RescheduleTracker.Events) > 0) || alloc.FollowupEvalID != "" {
statuses = append(statuses, alloc.ClientStatus)
}
}
return statuses
}
// register is a helper that registers a jobspec with a unique ID
// and records that ID in the testcase for later cleanup
func register(f *framework.F, jobFile, jobID string) {
cmd := exec.Command("nomad", "job", "run", "-")
stdin, err := cmd.StdinPipe()
f.NoError(err, fmt.Sprintf("could not open stdin?: %v", err))
content, err := ioutil.ReadFile(jobFile)
f.NoError(err, fmt.Sprintf("could not open job file: %v", err))
// hack off the first line to replace with our unique ID
var re = regexp.MustCompile(`^job "\w+" \{`)
jobspec := re.ReplaceAllString(string(content),
fmt.Sprintf("job \"%s\" {", jobID))
go func() {
defer stdin.Close()
io.WriteString(stdin, jobspec)
}()
out, err := cmd.CombinedOutput()
f.NoError(err, "could not register job: %v\n%v", err, string(out))
}
func waitForAllocStatusComparison(query func() ([]string, error), comparison func([]string) bool) error {
var got []string
var err error
testutil.WaitForResultRetries(50, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
got, err = query()
if err != nil {
return false, err
}
return comparison(got), nil
}, func(e error) {
err = fmt.Errorf("alloc status check failed: got %#v", got)
})
return err
}
func waitForLastDeploymentStatus(f *framework.F, jobID, status string) error {
var got string
var err error
testutil.WaitForResultRetries(50, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
out, err := e2eutil.Command("nomad", "job", "status", jobID)
f.NoError(err, "could not get job status: %v\n%v", err, out)
section, err := e2eutil.GetSection(out, "Latest Deployment")
f.NoError(err, "could not find Latest Deployment section", err)
fields, err := e2eutil.ParseFields(section)
f.NoError(err, "could not parse Latest Deployment section", err)
got = fields["Status"]
return got == status, nil
}, func(e error) {
err = fmt.Errorf("deployment status check failed: got %#v", got)
})
return err
}

View File

@@ -7,7 +7,7 @@ import (
"sort"
"time"
"github.com/hashicorp/nomad/e2e/e2eutil"
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/jobspec"
@@ -31,8 +31,8 @@ func init() {
}
func (tc *RescheduleE2ETest) BeforeAll(f *framework.F) {
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
e2e.WaitForLeader(f.T(), tc.Nomad())
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
}
func (tc *RescheduleE2ETest) AfterEach(f *framework.F) {
@@ -41,86 +41,84 @@ func (tc *RescheduleE2ETest) AfterEach(f *framework.F) {
}
for _, id := range tc.jobIds {
_, err := e2eutil.Command("nomad", "job", "stop", "-purge", id)
_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
f.NoError(err)
}
tc.jobIds = []string{}
_, err := e2eutil.Command("nomad", "system", "gc")
_, err := e2e.Command("nomad", "system", "gc")
f.NoError(err)
}
// TestNoReschedule runs a job that should fail and never reschedule
func (tc *RescheduleE2ETest) TestNoReschedule(f *framework.F) {
jobID := "test-no-reschedule-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/norescheduling.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/norescheduling.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"failed", "failed", "failed"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 failed allocs",
)
f.NoError(err, "should have exactly 3 failed allocs")
}
// TestNoRescheduleSystem runs a system job that should fail and never reschedule
func (tc *RescheduleE2ETest) TestNoRescheduleSystem(f *framework.F) {
jobID := "test-reschedule-system-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_system.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_system.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
for _, status := range got {
if status != "failed" {
return false
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
func(got []string) bool {
for _, status := range got {
if status != "failed" {
return false
}
}
}
return true
},
return true
}, nil,
),
"should have only failed allocs",
)
f.NoError(err, "should have only failed allocs")
}
// TestDefaultReschedule runs a job that should reschedule after delay
func (tc *RescheduleE2ETest) TestDefaultReschedule(f *framework.F) {
jobID := "test-default-reschedule-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_default.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_default.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"failed", "failed", "failed"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 failed allocs",
)
f.NoError(err, "should have exactly 3 failed allocs")
// TODO(tgross): return early if "slow" isn't set
// wait until first exponential delay kicks in and rescheduling is attempted
time.Sleep(time.Second * 35)
expected = []string{"failed", "failed", "failed", "failed", "failed", "failed"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 6 failed allocs after 35s",
)
f.NoError(err, "should have exactly 6 failed allocs after 35s")
}
// TestRescheduleMaxAttempts runs a job with a maximum reschedule attempts
func (tc *RescheduleE2ETest) TestRescheduleMaxAttempts(f *framework.F) {
jobID := "test-reschedule-fail-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_fail.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_fail.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"failed", "failed", "failed"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 failed allocs",
)
f.NoError(err, "should have exactly 3 failed allocs")
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_fail.nomad")
f.NoError(err)
@@ -129,39 +127,43 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxAttempts(f *framework.F) {
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
for _, status := range got {
if status == "running" {
return true
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
func(got []string) bool {
for _, status := range got {
if status == "running" {
return true
}
}
}
return false
},
return false
}, nil,
),
"should have at least 1 running alloc",
)
f.NoError(err, "should have at least 1 running alloc")
}
// TestRescheduleSuccess runs a job that should be running after rescheduling
func (tc *RescheduleE2ETest) TestRescheduleSuccess(f *framework.F) {
jobID := "test-reschedule-success-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_success.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_success.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
for _, status := range got {
if status == "running" {
return true
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
func(got []string) bool {
for _, status := range got {
if status == "running" {
return true
}
}
}
return false
},
return false
}, nil,
),
"should have at least 1 running alloc",
)
f.NoError(err, "should have at least 1 running alloc")
}
// TestRescheduleWithUpdate updates a running job to fail, and verifies that
@@ -169,15 +171,14 @@ func (tc *RescheduleE2ETest) TestRescheduleSuccess(f *framework.F) {
func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
jobID := "test-reschedule-update-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_update.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_update.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 running allocs",
)
f.NoError(err, "should have exactly 3 running allocs")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_update.nomad")
@@ -187,11 +188,13 @@ func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) > 0 },
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
func(got []string) bool { return len(got) > 0 }, nil,
),
"should have rescheduled allocs until progress deadline",
)
f.NoError(err, "should have rescheduled allocs until progress deadline")
}
// TestRescheduleWithCanary updates a running job to fail, and verify that the
@@ -199,18 +202,18 @@ func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
jobID := "test-reschedule-canary-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_canary.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_canary.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 running allocs",
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
"deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary.nomad")
@@ -220,14 +223,17 @@ func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) > 0 },
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
func(got []string) bool { return len(got) > 0 }, nil,
),
"should have rescheduled allocs until progress deadline",
)
f.NoError(err, "should have rescheduled allocs until progress deadline")
err = waitForLastDeploymentStatus(f, jobID, "running")
f.NoError(err, "deployment should be running")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "running", nil),
"deployment should be running")
}
// TestRescheduleWithCanary updates a running job to fail, and verifies that
@@ -235,18 +241,18 @@ func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
func (tc *RescheduleE2ETest) TestRescheduleWithCanaryAutoRevert(f *framework.F) {
jobID := "test-reschedule-canary-revert-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_canary_autorevert.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_canary_autorevert.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 running allocs",
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
"deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary_autorevert.nomad")
@@ -256,40 +262,42 @@ func (tc *RescheduleE2ETest) TestRescheduleWithCanaryAutoRevert(f *framework.F)
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) == 0 },
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
func(got []string) bool { return len(got) == 0 }, nil,
),
"should have new allocs after update",
)
f.NoError(err, "should have new allocs after update")
// then we'll fail and revert
expected = []string{"failed", "failed", "failed", "running", "running", "running"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 running reverted allocs",
)
f.NoError(err, "should have exactly 3 running reverted allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
"deployment should be successful")
}
// TestRescheduleMaxParallel updates a job with a max_parallel config
func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
jobID := "test-reschedule-maxp-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_maxp.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_maxp.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 running allocs",
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
"deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp.nomad")
@@ -300,17 +308,21 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
f.NoError(err, "could not register updated job")
expected = []string{"complete", "failed", "failed", "running", "running"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
sort.Strings(got)
return reflect.DeepEqual(got, expected)
},
)
f.NoError(err, "should have failed allocs including rescheduled failed allocs")
err = waitForLastDeploymentStatus(f, jobID, "running")
f.NoError(err, "deployment should be running")
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
func(got []string) bool {
sort.Strings(got)
return reflect.DeepEqual(got, expected)
}, nil,
),
"should have failed allocs including rescheduled failed allocs",
)
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "running", nil),
"deployment should be running")
}
// TestRescheduleMaxParallelAutoRevert updates a job with a max_parallel
@@ -318,18 +330,18 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F) {
jobID := "test-reschedule-maxp-revert-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_maxp_autorevert.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_maxp_autorevert.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
f.NoError(
e2e.WaitForAllocStatusExpected(jobID, expected),
"should have exactly 3 running allocs",
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
"deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp_autorevert.nomad")
@@ -337,29 +349,33 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
f.NoError(err, "could not e2e.Register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) == 0 },
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
func(got []string) bool { return len(got) == 0 }, nil,
),
"should have new allocs after update",
)
f.NoError(err, "should have new allocs after update")
// wait for the revert
expected = []string{"complete", "failed", "running", "running", "running"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
sort.Strings(got)
return reflect.DeepEqual(got, expected)
},
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
func(got []string) bool {
sort.Strings(got)
return reflect.DeepEqual(got, expected)
}, nil,
),
"should have one successful, one failed, and 3 reverted allocs",
)
f.NoError(err, "should have one successful, one failed, and 3 reverted allocs")
out, err := e2eutil.Command("nomad", "deployment", "status")
out, err := e2e.Command("nomad", "deployment", "status")
f.NoError(err, "could not get deployment status")
results, err := e2eutil.ParseColumns(out)
results, err := e2e.ParseColumns(out)
f.NoError(err, "could not parse deployment status")
statuses := []string{}
for _, row := range results {
@@ -377,12 +393,13 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F)
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadline(f *framework.F) {
jobID := "test-reschedule-deadline-" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_progressdeadline.nomad", jobID)
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
// TODO(tgross): return early if "slow" isn't set
// wait until first exponential delay kicks in and rescheduling is attempted
time.Sleep(time.Second * 30)
err := waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
"deployment should be successful")
}