mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
e2e: rework rescheduling progress deadline test (#8958)
Eliminate sources of randomness in the progress deadline test and clarify the purpose of the test to check for progress deadline updates.
This commit is contained in:
@@ -45,3 +45,29 @@ func WaitForLastDeploymentStatus(jobID, ns, status string, wc *WaitConfig) error
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func LastDeploymentID(jobID, ns string) (string, error) {
|
||||
|
||||
var nsArg = []string{}
|
||||
if ns != "" {
|
||||
nsArg = []string{"-namespace", ns}
|
||||
}
|
||||
|
||||
cmd := []string{"nomad", "deployment", "list"}
|
||||
cmd = append(cmd, nsArg...)
|
||||
|
||||
out, err := Command(cmd[0], cmd[1:]...)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("could not get deployment list: %v\n%v", err, out)
|
||||
}
|
||||
rows, err := ParseColumns(out)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("could not parse deployment list output: %w", err)
|
||||
}
|
||||
for _, row := range rows {
|
||||
if row["Job ID"] == jobID {
|
||||
return row["ID"], nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("could not find a recent deployment for job")
|
||||
}
|
||||
|
||||
@@ -10,23 +10,27 @@ job "demo2" {
|
||||
type = "service"
|
||||
|
||||
group "t2" {
|
||||
count = 3
|
||||
count = 1
|
||||
|
||||
task "t2" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "bash"
|
||||
args = ["-c", "if (($RANDOM%2)); then sleep 200000 ; else exit -1 ; fi"]
|
||||
args = ["-c", "sleep 300"]
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
min_healthy_time = "1s"
|
||||
auto_revert = false
|
||||
healthy_deadline = "2s"
|
||||
progress_deadline = "30s"
|
||||
# we want the first allocation to take a while to become healthy,
|
||||
# so that we can check the deployment's progress deadline before
|
||||
# and after it becomes healthy
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "15s"
|
||||
progress_deadline = "20s"
|
||||
|
||||
max_parallel = 1
|
||||
auto_revert = false
|
||||
}
|
||||
|
||||
restart {
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
job "demo2" {
|
||||
|
||||
datacenters = ["dc1", "dc2"]
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
type = "service"
|
||||
|
||||
group "t2" {
|
||||
count = 1
|
||||
|
||||
task "t2" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "bash"
|
||||
args = ["-c", "exit 1"]
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
# we want the first allocation to take a while before we give up on it,
|
||||
# so that we can check the deployment's progress deadline before and
|
||||
# after it becomes healthy
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "15s"
|
||||
progress_deadline = "20s"
|
||||
|
||||
max_parallel = 1
|
||||
auto_revert = false
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 0
|
||||
mode = "fail"
|
||||
}
|
||||
|
||||
reschedule {
|
||||
unlimited = "true"
|
||||
delay_function = "constant"
|
||||
delay = "5s"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -390,18 +390,91 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F)
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduleProgressDeadline verifies a deployment succeeds by the
|
||||
// progress deadline
|
||||
// TestRescheduleProgressDeadline verifies the progress deadline is reset with
|
||||
// each healthy allocation, and that a rescheduled allocation does not.
|
||||
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadline(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-deadline-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
// TODO(tgross): return early if "slow" isn't set
|
||||
// wait until first exponential delay kicks in and rescheduling is attempted
|
||||
time.Sleep(time.Second * 30)
|
||||
expected := []string{"running"}
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have a running allocation",
|
||||
)
|
||||
|
||||
deploymentID, err := e2e.LastDeploymentID(jobID, ns)
|
||||
f.NoError(err, "couldn't look up deployment")
|
||||
|
||||
oldDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get progress deadline")
|
||||
time.Sleep(time.Second * 20)
|
||||
|
||||
newDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get new progress deadline")
|
||||
f.NotEqual(oldDeadline, newDeadline, "progress deadline should have been updated")
|
||||
|
||||
f.NoError(e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
}
|
||||
|
||||
// TestRescheduleProgressDeadlineFail verifies the progress deadline is reset with
|
||||
// each healthy allocation, and that a rescheduled allocation does not.
|
||||
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadlineFail(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-deadline-fail" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline_fail.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
deploymentID, err := e2e.LastDeploymentID(jobID, ns)
|
||||
f.NoError(err, "couldn't look up deployment")
|
||||
|
||||
oldDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get progress deadline")
|
||||
time.Sleep(time.Second * 20)
|
||||
|
||||
f.NoError(e2e.WaitForLastDeploymentStatus(jobID, ns, "failed", nil),
|
||||
"deployment should be failed")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "failed" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}, nil,
|
||||
),
|
||||
"should have only failed allocs",
|
||||
)
|
||||
|
||||
newDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get new progress deadline")
|
||||
f.Equal(oldDeadline, newDeadline, "progress deadline should not have been updated")
|
||||
}
|
||||
|
||||
func getProgressDeadline(deploymentID string) (time.Time, error) {
|
||||
|
||||
out, err := e2e.Command("nomad", "deployment", "status", deploymentID)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("could not get deployment status: %v\n%v", err, out)
|
||||
}
|
||||
|
||||
section, err := e2e.GetSection(out, "Deployed")
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("could not find Deployed section: %w", err)
|
||||
}
|
||||
|
||||
rows, err := e2e.ParseColumns(section)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("could not parse Deployed section: %w", err)
|
||||
}
|
||||
|
||||
layout := "2006-01-02T15:04:05Z07:00" // taken from command/helpers.go
|
||||
raw := rows[0]["Progress Deadline"]
|
||||
return time.Parse(layout, raw)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user