e2e: rework rescheduling progress deadline test (#8958)

Eliminate sources of randomness in the progress deadline test and clarify the
purpose of the test to check for progress deadline updates.
This commit is contained in:
Tim Gross
2020-09-29 11:02:16 -04:00
committed by GitHub
parent 095d409a32
commit dd0333c759
4 changed files with 163 additions and 13 deletions

View File

@@ -45,3 +45,29 @@ func WaitForLastDeploymentStatus(jobID, ns, status string, wc *WaitConfig) error
})
return err
}
func LastDeploymentID(jobID, ns string) (string, error) {
var nsArg = []string{}
if ns != "" {
nsArg = []string{"-namespace", ns}
}
cmd := []string{"nomad", "deployment", "list"}
cmd = append(cmd, nsArg...)
out, err := Command(cmd[0], cmd[1:]...)
if err != nil {
return "", fmt.Errorf("could not get deployment list: %v\n%v", err, out)
}
rows, err := ParseColumns(out)
if err != nil {
return "", fmt.Errorf("could not parse deployment list output: %w", err)
}
for _, row := range rows {
if row["Job ID"] == jobID {
return row["ID"], nil
}
}
return "", fmt.Errorf("could not find a recent deployment for job")
}

View File

@@ -10,23 +10,27 @@ job "demo2" {
type = "service"
group "t2" {
count = 3
count = 1
task "t2" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "if (($RANDOM%2)); then sleep 200000 ; else exit -1 ; fi"]
args = ["-c", "sleep 300"]
}
}
update {
max_parallel = 1
min_healthy_time = "1s"
auto_revert = false
healthy_deadline = "2s"
progress_deadline = "30s"
# we want the first allocation to take a while to become healthy,
# so that we can check the deployment's progress deadline before
# and after it becomes healthy
min_healthy_time = "10s"
healthy_deadline = "15s"
progress_deadline = "20s"
max_parallel = 1
auto_revert = false
}
restart {

View File

@@ -0,0 +1,47 @@
job "demo2" {
datacenters = ["dc1", "dc2"]
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}
type = "service"
group "t2" {
count = 1
task "t2" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "exit 1"]
}
}
update {
# we want the first allocation to take a while before we give up on it,
# so that we can check the deployment's progress deadline before and
# after it becomes healthy
min_healthy_time = "10s"
healthy_deadline = "15s"
progress_deadline = "20s"
max_parallel = 1
auto_revert = false
}
restart {
attempts = 0
mode = "fail"
}
reschedule {
unlimited = "true"
delay_function = "constant"
delay = "5s"
}
}
}

View File

@@ -390,18 +390,91 @@ func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F)
)
}
// TestRescheduleProgressDeadline verifies a deployment succeeds by the
// progress deadline
// TestRescheduleProgressDeadline verifies the progress deadline is reset with
// each healthy allocation, and that a rescheduled allocation does not.
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadline(f *framework.F) {
jobID := "test-reschedule-deadline-" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
// TODO(tgross): return early if "slow" isn't set
// wait until first exponential delay kicks in and rescheduling is attempted
time.Sleep(time.Second * 30)
expected := []string{"running"}
f.NoError(
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
"should have a running allocation",
)
deploymentID, err := e2e.LastDeploymentID(jobID, ns)
f.NoError(err, "couldn't look up deployment")
oldDeadline, err := getProgressDeadline(deploymentID)
f.NoError(err, "could not get progress deadline")
time.Sleep(time.Second * 20)
newDeadline, err := getProgressDeadline(deploymentID)
f.NoError(err, "could not get new progress deadline")
f.NotEqual(oldDeadline, newDeadline, "progress deadline should have been updated")
f.NoError(e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
"deployment should be successful")
}
// TestRescheduleProgressDeadlineFail verifies the progress deadline is reset with
// each healthy allocation, and that a rescheduled allocation does not.
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadlineFail(f *framework.F) {
jobID := "test-reschedule-deadline-fail" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline_fail.nomad"))
tc.jobIds = append(tc.jobIds, jobID)
deploymentID, err := e2e.LastDeploymentID(jobID, ns)
f.NoError(err, "couldn't look up deployment")
oldDeadline, err := getProgressDeadline(deploymentID)
f.NoError(err, "could not get progress deadline")
time.Sleep(time.Second * 20)
f.NoError(e2e.WaitForLastDeploymentStatus(jobID, ns, "failed", nil),
"deployment should be failed")
f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
func(got []string) bool {
for _, status := range got {
if status != "failed" {
return false
}
}
return true
}, nil,
),
"should have only failed allocs",
)
newDeadline, err := getProgressDeadline(deploymentID)
f.NoError(err, "could not get new progress deadline")
f.Equal(oldDeadline, newDeadline, "progress deadline should not have been updated")
}
func getProgressDeadline(deploymentID string) (time.Time, error) {
out, err := e2e.Command("nomad", "deployment", "status", deploymentID)
if err != nil {
return time.Time{}, fmt.Errorf("could not get deployment status: %v\n%v", err, out)
}
section, err := e2e.GetSection(out, "Deployed")
if err != nil {
return time.Time{}, fmt.Errorf("could not find Deployed section: %w", err)
}
rows, err := e2e.ParseColumns(section)
if err != nil {
return time.Time{}, fmt.Errorf("could not parse Deployed section: %w", err)
}
layout := "2006-01-02T15:04:05Z07:00" // taken from command/helpers.go
raw := rows[0]["Progress Deadline"]
return time.Parse(layout, raw)
}