mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
e2e: refactor CLI utils out of rescheduling test (#8905)
The CLI helpers in the rescheduling test were intended for shared use, but until some other tests were written we didn't want to waste time making them generic. This changeset refactors them and adds some new helpers associated with the node drain tests (under separate PR).
This commit is contained in:
146
e2e/e2eutil/allocs.go
Normal file
146
e2e/e2eutil/allocs.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package e2eutil
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
)
|
||||
|
||||
// WaitForAllocStatusExpected polls 'nomad job status' and exactly compares
|
||||
// the status of all allocations (including any previous versions) against the
|
||||
// expected list.
|
||||
func WaitForAllocStatusExpected(jobID string, expected []string) error {
|
||||
return WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return AllocStatuses(jobID) },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
// WaitForAllocStatusComparison is a convenience wrapper that polls the query
|
||||
// function until the comparison function returns true.
|
||||
func WaitForAllocStatusComparison(query func() ([]string, error), comparison func([]string) bool, wc *WaitConfig) error {
|
||||
var got []string
|
||||
var err error
|
||||
interval, retries := wc.OrDefault()
|
||||
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
||||
time.Sleep(interval)
|
||||
got, err = query()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return comparison(got), nil
|
||||
}, func(e error) {
|
||||
err = fmt.Errorf("alloc status check failed: got %#v", got)
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// AllocsForJob returns a slice of key->value maps, each describing the values
|
||||
// of the 'nomad job status' Allocations section (not actual
|
||||
// structs.Allocation objects, query the API if you want those)
|
||||
func AllocsForJob(jobID string) ([]map[string]string, error) {
|
||||
|
||||
out, err := Command("nomad", "job", "status", "-verbose", "-all-allocs", jobID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("'nomad job status' failed: %w", err)
|
||||
}
|
||||
|
||||
section, err := GetSection(out, "Allocations")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not find Allocations section: %w", err)
|
||||
}
|
||||
|
||||
allocs, err := ParseColumns(section)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse Allocations section: %w", err)
|
||||
}
|
||||
return allocs, nil
|
||||
}
|
||||
|
||||
// AllocsForNode returns a slice of key->value maps, each describing the values
|
||||
// of the 'nomad node status' Allocations section (not actual
|
||||
// structs.Allocation objects, query the API if you want those)
|
||||
func AllocsForNode(nodeID string) ([]map[string]string, error) {
|
||||
|
||||
out, err := Command("nomad", "node", "status", "-verbose", nodeID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("'nomad node status' failed: %w", err)
|
||||
}
|
||||
|
||||
section, err := GetSection(out, "Allocations")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not find Allocations section: %w", err)
|
||||
}
|
||||
|
||||
allocs, err := ParseColumns(section)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse Allocations section: %w", err)
|
||||
}
|
||||
return allocs, nil
|
||||
}
|
||||
|
||||
// AllocStatuses returns a slice of client statuses
|
||||
func AllocStatuses(jobID string) ([]string, error) {
|
||||
|
||||
allocs, err := AllocsForJob(jobID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
statuses := []string{}
|
||||
for _, alloc := range allocs {
|
||||
statuses = append(statuses, alloc["Status"])
|
||||
}
|
||||
return statuses, nil
|
||||
}
|
||||
|
||||
// AllocStatusesRescheduled is a helper function that pulls
|
||||
// out client statuses only from rescheduled allocs.
|
||||
func AllocStatusesRescheduled(jobID string) ([]string, error) {
|
||||
|
||||
out, err := Command("nomad", "job", "status", "-verbose", jobID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("nomad job status failed: %w", err)
|
||||
}
|
||||
|
||||
section, err := GetSection(out, "Allocations")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not find Allocations section: %w", err)
|
||||
}
|
||||
|
||||
allocs, err := ParseColumns(section)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse Allocations section: %w", err)
|
||||
}
|
||||
|
||||
statuses := []string{}
|
||||
for _, alloc := range allocs {
|
||||
|
||||
allocID := alloc["ID"]
|
||||
|
||||
// reschedule tracker isn't exposed in the normal CLI output
|
||||
out, err := Command("nomad", "alloc", "status", "-json", allocID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("nomad alloc status failed: %w", err)
|
||||
}
|
||||
|
||||
dec := json.NewDecoder(strings.NewReader(out))
|
||||
alloc := &api.Allocation{}
|
||||
err = dec.Decode(alloc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not decode alloc status JSON: %w", err)
|
||||
}
|
||||
|
||||
if (alloc.RescheduleTracker != nil &&
|
||||
len(alloc.RescheduleTracker.Events) > 0) || alloc.FollowupEvalID != "" {
|
||||
statuses = append(statuses, alloc.ClientStatus)
|
||||
}
|
||||
}
|
||||
return statuses, nil
|
||||
}
|
||||
38
e2e/e2eutil/deployments.go
Normal file
38
e2e/e2eutil/deployments.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package e2eutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
)
|
||||
|
||||
func WaitForLastDeploymentStatus(jobID, status string, wc *WaitConfig) error {
|
||||
var got string
|
||||
var err error
|
||||
interval, retries := wc.OrDefault()
|
||||
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
||||
time.Sleep(interval)
|
||||
|
||||
out, err := Command("nomad", "job", "status", jobID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not get job status: %v\n%v", err, out)
|
||||
}
|
||||
|
||||
section, err := GetSection(out, "Latest Deployment")
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not find Latest Deployment section: %w", err)
|
||||
}
|
||||
|
||||
fields, err := ParseFields(section)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not parse Latest Deployment section: %w", err)
|
||||
}
|
||||
|
||||
got = fields["Status"]
|
||||
return got == status, nil
|
||||
}, func(e error) {
|
||||
err = fmt.Errorf("deployment status check failed: got %#v", got)
|
||||
})
|
||||
return err
|
||||
}
|
||||
41
e2e/e2eutil/job.go
Normal file
41
e2e/e2eutil/job.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package e2eutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// Register registers a jobspec from a file but with a unique ID.
|
||||
// The caller is responsible for recording that ID for later cleanup.
|
||||
func Register(jobID, jobFilePath string) error {
|
||||
|
||||
cmd := exec.Command("nomad", "job", "run", "-")
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open stdin?: %w", err)
|
||||
}
|
||||
|
||||
content, err := ioutil.ReadFile(jobFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open job file: %w", err)
|
||||
}
|
||||
|
||||
// hack off the first line to replace with our unique ID
|
||||
var re = regexp.MustCompile(`^job "\w+" \{`)
|
||||
jobspec := re.ReplaceAllString(string(content),
|
||||
fmt.Sprintf("job \"%s\" {", jobID))
|
||||
|
||||
go func() {
|
||||
defer stdin.Close()
|
||||
io.WriteString(stdin, jobspec)
|
||||
}()
|
||||
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not register job: %w\n%v", err, string(out))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -144,3 +144,43 @@ func listClientNodesByOS(client *api.Client, osName string) ([]string, error) {
|
||||
}
|
||||
return nodeIDs, nil
|
||||
}
|
||||
|
||||
func NodeStatusList() ([]map[string]string, error) {
|
||||
|
||||
out, err := Command("nomad", "node", "status", "-verbose")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("'nomad node status' failed: %w", err)
|
||||
}
|
||||
|
||||
nodes, err := ParseColumns(out)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse node status output: %w", err)
|
||||
}
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
func NodeStatusListFiltered(filterFn func(string) bool) ([]map[string]string, error) {
|
||||
|
||||
out, err := Command("nomad", "node", "status", "-verbose")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("'nomad node status' failed: %w", err)
|
||||
}
|
||||
|
||||
allNodes, err := ParseColumns(out)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse node status output: %w", err)
|
||||
}
|
||||
nodes := []map[string]string{}
|
||||
|
||||
for _, node := range allNodes {
|
||||
out, err := Command("nomad", "node", "status", "-verbose", node["ID"])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not node status output: %w", err)
|
||||
}
|
||||
if filterFn(out) {
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
}
|
||||
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
25
e2e/e2eutil/wait.go
Normal file
25
e2e/e2eutil/wait.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package e2eutil
|
||||
|
||||
import "time"
|
||||
|
||||
// WaitConfig is an interval and wait time that can be passed to a waiter
|
||||
// function, but with a default value that comes from the OrDefault method
|
||||
// if the config is nil
|
||||
type WaitConfig struct {
|
||||
Interval time.Duration
|
||||
Retries int64
|
||||
}
|
||||
|
||||
// Return a default wait config of 10s
|
||||
func (wc *WaitConfig) OrDefault() (time.Duration, int64) {
|
||||
if wc == nil {
|
||||
return time.Millisecond * 100, 100
|
||||
}
|
||||
if wc.Interval == 0 {
|
||||
wc.Interval = time.Millisecond * 100
|
||||
}
|
||||
if wc.Retries == 0 {
|
||||
wc.Retries = 100
|
||||
}
|
||||
return wc.Interval, wc.Retries
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
package rescheduling
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/framework"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
)
|
||||
|
||||
// allocStatuses returns a slice of client statuses
|
||||
func allocStatuses(f *framework.F, jobID string) []string {
|
||||
|
||||
out, err := e2eutil.Command("nomad", "job", "status", "-verbose", "-all-allocs", jobID)
|
||||
f.NoError(err, "nomad job status failed", err)
|
||||
section, err := e2eutil.GetSection(out, "Allocations")
|
||||
f.NoError(err, "could not find Allocations section", err)
|
||||
|
||||
allocs, err := e2eutil.ParseColumns(section)
|
||||
f.NoError(err, "could not parse Allocations section", err)
|
||||
|
||||
statuses := []string{}
|
||||
for _, alloc := range allocs {
|
||||
statuses = append(statuses, alloc["Status"])
|
||||
}
|
||||
|
||||
return statuses
|
||||
}
|
||||
|
||||
// allocStatusesRescheduled is a helper function that pulls
|
||||
// out client statuses only from rescheduled allocs.
|
||||
func allocStatusesRescheduled(f *framework.F, jobID string) []string {
|
||||
|
||||
out, err := e2eutil.Command("nomad", "job", "status", "-verbose", jobID)
|
||||
f.NoError(err, "nomad job status failed", err)
|
||||
section, err := e2eutil.GetSection(out, "Allocations")
|
||||
f.NoError(err, "could not find Allocations section", err)
|
||||
|
||||
allocs, err := e2eutil.ParseColumns(section)
|
||||
f.NoError(err, "could not parse Allocations section", err)
|
||||
|
||||
statuses := []string{}
|
||||
for _, alloc := range allocs {
|
||||
|
||||
allocID := alloc["ID"]
|
||||
|
||||
// reschedule tracker isn't exposed in the normal CLI output
|
||||
out, err := e2eutil.Command("nomad", "alloc", "status", "-json", allocID)
|
||||
f.NoError(err, "nomad alloc status failed", err)
|
||||
|
||||
dec := json.NewDecoder(strings.NewReader(out))
|
||||
alloc := &api.Allocation{}
|
||||
err = dec.Decode(alloc)
|
||||
f.NoError(err, "could not decode alloc status JSON: %w", err)
|
||||
|
||||
if (alloc.RescheduleTracker != nil &&
|
||||
len(alloc.RescheduleTracker.Events) > 0) || alloc.FollowupEvalID != "" {
|
||||
statuses = append(statuses, alloc.ClientStatus)
|
||||
}
|
||||
}
|
||||
return statuses
|
||||
}
|
||||
|
||||
// register is a helper that registers a jobspec with a unique ID
|
||||
// and records that ID in the testcase for later cleanup
|
||||
func register(f *framework.F, jobFile, jobID string) {
|
||||
|
||||
cmd := exec.Command("nomad", "job", "run", "-")
|
||||
stdin, err := cmd.StdinPipe()
|
||||
f.NoError(err, fmt.Sprintf("could not open stdin?: %v", err))
|
||||
|
||||
content, err := ioutil.ReadFile(jobFile)
|
||||
f.NoError(err, fmt.Sprintf("could not open job file: %v", err))
|
||||
|
||||
// hack off the first line to replace with our unique ID
|
||||
var re = regexp.MustCompile(`^job "\w+" \{`)
|
||||
jobspec := re.ReplaceAllString(string(content),
|
||||
fmt.Sprintf("job \"%s\" {", jobID))
|
||||
|
||||
go func() {
|
||||
defer stdin.Close()
|
||||
io.WriteString(stdin, jobspec)
|
||||
}()
|
||||
|
||||
out, err := cmd.CombinedOutput()
|
||||
f.NoError(err, "could not register job: %v\n%v", err, string(out))
|
||||
}
|
||||
|
||||
func waitForAllocStatusComparison(query func() ([]string, error), comparison func([]string) bool) error {
|
||||
var got []string
|
||||
var err error
|
||||
testutil.WaitForResultRetries(50, func() (bool, error) {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
got, err = query()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return comparison(got), nil
|
||||
}, func(e error) {
|
||||
err = fmt.Errorf("alloc status check failed: got %#v", got)
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func waitForLastDeploymentStatus(f *framework.F, jobID, status string) error {
|
||||
var got string
|
||||
var err error
|
||||
testutil.WaitForResultRetries(50, func() (bool, error) {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
|
||||
out, err := e2eutil.Command("nomad", "job", "status", jobID)
|
||||
f.NoError(err, "could not get job status: %v\n%v", err, out)
|
||||
|
||||
section, err := e2eutil.GetSection(out, "Latest Deployment")
|
||||
f.NoError(err, "could not find Latest Deployment section", err)
|
||||
|
||||
fields, err := e2eutil.ParseFields(section)
|
||||
f.NoError(err, "could not parse Latest Deployment section", err)
|
||||
|
||||
got = fields["Status"]
|
||||
return got == status, nil
|
||||
}, func(e error) {
|
||||
err = fmt.Errorf("deployment status check failed: got %#v", got)
|
||||
})
|
||||
return err
|
||||
}
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/framework"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/jobspec"
|
||||
@@ -31,8 +31,8 @@ func init() {
|
||||
}
|
||||
|
||||
func (tc *RescheduleE2ETest) BeforeAll(f *framework.F) {
|
||||
e2eutil.WaitForLeader(f.T(), tc.Nomad())
|
||||
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
||||
e2e.WaitForLeader(f.T(), tc.Nomad())
|
||||
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
||||
}
|
||||
|
||||
func (tc *RescheduleE2ETest) AfterEach(f *framework.F) {
|
||||
@@ -41,86 +41,84 @@ func (tc *RescheduleE2ETest) AfterEach(f *framework.F) {
|
||||
}
|
||||
|
||||
for _, id := range tc.jobIds {
|
||||
_, err := e2eutil.Command("nomad", "job", "stop", "-purge", id)
|
||||
_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
|
||||
f.NoError(err)
|
||||
}
|
||||
tc.jobIds = []string{}
|
||||
_, err := e2eutil.Command("nomad", "system", "gc")
|
||||
_, err := e2e.Command("nomad", "system", "gc")
|
||||
f.NoError(err)
|
||||
}
|
||||
|
||||
// TestNoReschedule runs a job that should fail and never reschedule
|
||||
func (tc *RescheduleE2ETest) TestNoReschedule(f *framework.F) {
|
||||
jobID := "test-no-reschedule-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/norescheduling.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/norescheduling.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 failed allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 failed allocs")
|
||||
}
|
||||
|
||||
// TestNoRescheduleSystem runs a system job that should fail and never reschedule
|
||||
func (tc *RescheduleE2ETest) TestNoRescheduleSystem(f *framework.F) {
|
||||
jobID := "test-reschedule-system-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_system.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_system.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "failed" {
|
||||
return false
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "failed" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
},
|
||||
return true
|
||||
}, nil,
|
||||
),
|
||||
"should have only failed allocs",
|
||||
)
|
||||
f.NoError(err, "should have only failed allocs")
|
||||
}
|
||||
|
||||
// TestDefaultReschedule runs a job that should reschedule after delay
|
||||
func (tc *RescheduleE2ETest) TestDefaultReschedule(f *framework.F) {
|
||||
|
||||
jobID := "test-default-reschedule-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_default.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_default.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 failed allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 failed allocs")
|
||||
|
||||
// TODO(tgross): return early if "slow" isn't set
|
||||
// wait until first exponential delay kicks in and rescheduling is attempted
|
||||
time.Sleep(time.Second * 35)
|
||||
expected = []string{"failed", "failed", "failed", "failed", "failed", "failed"}
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 6 failed allocs after 35s",
|
||||
)
|
||||
f.NoError(err, "should have exactly 6 failed allocs after 35s")
|
||||
}
|
||||
|
||||
// TestRescheduleMaxAttempts runs a job with a maximum reschedule attempts
|
||||
func (tc *RescheduleE2ETest) TestRescheduleMaxAttempts(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-fail-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_fail.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_fail.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 failed allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 failed allocs")
|
||||
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_fail.nomad")
|
||||
f.NoError(err)
|
||||
@@ -129,39 +127,43 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxAttempts(f *framework.F) {
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
},
|
||||
return false
|
||||
}, nil,
|
||||
),
|
||||
"should have at least 1 running alloc",
|
||||
)
|
||||
f.NoError(err, "should have at least 1 running alloc")
|
||||
}
|
||||
|
||||
// TestRescheduleSuccess runs a job that should be running after rescheduling
|
||||
func (tc *RescheduleE2ETest) TestRescheduleSuccess(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-success-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_success.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_success.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
},
|
||||
return false
|
||||
}, nil,
|
||||
),
|
||||
"should have at least 1 running alloc",
|
||||
)
|
||||
f.NoError(err, "should have at least 1 running alloc")
|
||||
}
|
||||
|
||||
// TestRescheduleWithUpdate updates a running job to fail, and verifies that
|
||||
@@ -169,15 +171,14 @@ func (tc *RescheduleE2ETest) TestRescheduleSuccess(f *framework.F) {
|
||||
func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-update-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_update.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_update.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 running allocs")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_update.nomad")
|
||||
@@ -187,11 +188,13 @@ func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
|
||||
func(got []string) bool { return len(got) > 0 },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
"should have rescheduled allocs until progress deadline",
|
||||
)
|
||||
f.NoError(err, "should have rescheduled allocs until progress deadline")
|
||||
}
|
||||
|
||||
// TestRescheduleWithCanary updates a running job to fail, and verify that the
|
||||
@@ -199,18 +202,18 @@ func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
|
||||
func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-canary-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_canary.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_canary.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 running allocs")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "successful")
|
||||
f.NoError(err, "deployment should be successful")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary.nomad")
|
||||
@@ -220,14 +223,17 @@ func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
|
||||
func(got []string) bool { return len(got) > 0 },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
"should have rescheduled allocs until progress deadline",
|
||||
)
|
||||
f.NoError(err, "should have rescheduled allocs until progress deadline")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "running")
|
||||
f.NoError(err, "deployment should be running")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "running", nil),
|
||||
"deployment should be running")
|
||||
}
|
||||
|
||||
// TestRescheduleWithCanary updates a running job to fail, and verifies that
|
||||
@@ -235,18 +241,18 @@ func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
|
||||
func (tc *RescheduleE2ETest) TestRescheduleWithCanaryAutoRevert(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-canary-revert-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_canary_autorevert.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_canary_autorevert.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 running allocs")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "successful")
|
||||
f.NoError(err, "deployment should be successful")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary_autorevert.nomad")
|
||||
@@ -256,40 +262,42 @@ func (tc *RescheduleE2ETest) TestRescheduleWithCanaryAutoRevert(f *framework.F)
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
|
||||
func(got []string) bool { return len(got) == 0 },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
|
||||
func(got []string) bool { return len(got) == 0 }, nil,
|
||||
),
|
||||
"should have new allocs after update",
|
||||
)
|
||||
f.NoError(err, "should have new allocs after update")
|
||||
|
||||
// then we'll fail and revert
|
||||
expected = []string{"failed", "failed", "failed", "running", "running", "running"}
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 running reverted allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 running reverted allocs")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "successful")
|
||||
f.NoError(err, "deployment should be successful")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
|
||||
"deployment should be successful")
|
||||
}
|
||||
|
||||
// TestRescheduleMaxParallel updates a job with a max_parallel config
|
||||
func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-maxp-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_maxp.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_maxp.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 running allocs")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "successful")
|
||||
f.NoError(err, "deployment should be successful")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp.nomad")
|
||||
@@ -300,17 +308,21 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
expected = []string{"complete", "failed", "failed", "running", "running"}
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
},
|
||||
)
|
||||
f.NoError(err, "should have failed allocs including rescheduled failed allocs")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "running")
|
||||
f.NoError(err, "deployment should be running")
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
}, nil,
|
||||
),
|
||||
"should have failed allocs including rescheduled failed allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "running", nil),
|
||||
"deployment should be running")
|
||||
}
|
||||
|
||||
// TestRescheduleMaxParallelAutoRevert updates a job with a max_parallel
|
||||
@@ -318,18 +330,18 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
|
||||
func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-maxp-revert-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_maxp_autorevert.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_maxp_autorevert.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
err := waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool { return reflect.DeepEqual(got, expected) },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
f.NoError(err, "should have exactly 3 running allocs")
|
||||
|
||||
err = waitForLastDeploymentStatus(f, jobID, "successful")
|
||||
f.NoError(err, "deployment should be successful")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp_autorevert.nomad")
|
||||
@@ -337,29 +349,33 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
f.NoError(err, "could not e2e.Register updated job")
|
||||
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
|
||||
func(got []string) bool { return len(got) == 0 },
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID) },
|
||||
func(got []string) bool { return len(got) == 0 }, nil,
|
||||
),
|
||||
"should have new allocs after update",
|
||||
)
|
||||
f.NoError(err, "should have new allocs after update")
|
||||
|
||||
// wait for the revert
|
||||
expected = []string{"complete", "failed", "running", "running", "running"}
|
||||
err = waitForAllocStatusComparison(
|
||||
func() ([]string, error) { return allocStatuses(f, jobID), nil },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
},
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID) },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
}, nil,
|
||||
),
|
||||
"should have one successful, one failed, and 3 reverted allocs",
|
||||
)
|
||||
f.NoError(err, "should have one successful, one failed, and 3 reverted allocs")
|
||||
|
||||
out, err := e2eutil.Command("nomad", "deployment", "status")
|
||||
out, err := e2e.Command("nomad", "deployment", "status")
|
||||
f.NoError(err, "could not get deployment status")
|
||||
|
||||
results, err := e2eutil.ParseColumns(out)
|
||||
results, err := e2e.ParseColumns(out)
|
||||
f.NoError(err, "could not parse deployment status")
|
||||
statuses := []string{}
|
||||
for _, row := range results {
|
||||
@@ -377,12 +393,13 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F)
|
||||
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadline(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-deadline-" + uuid.Generate()[0:8]
|
||||
register(f, "rescheduling/input/rescheduling_progressdeadline.nomad", jobID)
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
// TODO(tgross): return early if "slow" isn't set
|
||||
// wait until first exponential delay kicks in and rescheduling is attempted
|
||||
time.Sleep(time.Second * 30)
|
||||
err := waitForLastDeploymentStatus(f, jobID, "successful")
|
||||
f.NoError(err, "deployment should be successful")
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, "successful", nil),
|
||||
"deployment should be successful")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user