on_update check_restart e2e

This commit is contained in:
Drew Bailey
2021-02-08 10:41:01 -05:00
parent 74e7bbb7d2
commit 7217bf8f06
4 changed files with 201 additions and 2 deletions

View File

@@ -15,7 +15,7 @@ job "test" {
}
service {
name = "echo-service"
name = "on-update-service"
port = "db"
check {

View File

@@ -0,0 +1,88 @@
job "test" {
datacenters = ["dc1"]
group "test" {
count = 1
network {
port "db" {
to = 6379
}
}
update {
health_check = "checks"
progress_deadline = "45s"
healthy_deadline = "30s"
}
service {
name = "script-check-svc"
port = "db"
check {
name = "tcp"
type = "tcp"
port = "db"
interval = "10s"
timeout = "2s"
}
check {
name = "script-check-script"
type = "script"
command = "/bin/bash"
interval = "5s"
timeout = "1s"
task = "server"
on_update = "ignore_warnings"
args = [
"-c",
"/local/ready.sh"
]
check_restart {
limit = 2
ignore_warnings = true
}
}
}
task "server" {
driver = "docker"
config {
image = "redis"
ports = ["db"]
}
# Check script that reports as warning for long enough for deployment to
# become healthy then errors
template {
data = <<EOT
#!/bin/sh
if [ ! -f /tmp/check_0 ]; then touch /tmp/check_0; exit 1; fi
if [ ! -f /tmp/check_1 ]; then touch /tmp/check_1; exit 1; fi
if [ ! -f /tmp/check_2 ]; then touch /tmp/check_2; exit 1; fi
if [ ! -f /tmp/check_3 ]; then touch /tmp/check_3; exit 1; fi
if [ ! -f /tmp/check_4 ]; then touch /tmp/check_4; exit 1; fi
if [ ! -f /tmp/check_5 ]; then touch /tmp/check_5; exit 1; fi
if [ ! -f /tmp/check_6 ]; then touch /tmp/check_6; exit 7; fi
if [ ! -f /tmp/check_7 ]; then touch /tmp/check_7; exit 7; fi
if [ ! -f /tmp/check_8 ]; then touch /tmp/check_8; exit 7; fi
if [ ! -f /tmp/check_9 ]; then touch /tmp/check_9; exit 7; fi
if [ -f /tmp/check_9 ]; then exit 7; fi
EOT
destination = "local/ready.sh"
perms = "777"
}
}
}
}

View File

@@ -5,8 +5,10 @@ import (
"time"
"github.com/hashicorp/nomad/e2e/e2eutil"
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/testutil"
)
type OnUpdateChecksTest struct {
@@ -65,5 +67,66 @@ func (tc *OnUpdateChecksTest) TestOnUpdateCheck_IgnoreWarning_IgnoreErrors(f *fr
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
"deployment should have completed successfully",
)
}
// TestOnUpdate_CheckRestart ensures that a service check set to ignore
// warnings still follows the check_restart stanza if the task becomes
// unhealthy after a deployment is successful
func (tc *OnUpdateChecksTest) TestOnUpdate_CheckRestart(f *framework.F) {
uuid := uuid.Generate()
jobID := fmt.Sprintf("on-update-restart-%s", uuid[0:8])
tc.jobIDs = append(tc.jobIDs, jobID)
f.NoError(
e2eutil.Register(jobID, "consul/input/on_update_check_restart.nomad"),
"should have registered successfully",
)
wc := &e2eutil.WaitConfig{
Interval: 1 * time.Second,
Retries: 60,
}
f.NoError(
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
"deployment should have completed successfully",
)
// register update with on_update = ignore
// this check errors, deployment should still be successful
f.NoError(
e2eutil.Register(jobID, "consul/input/on_update_2.nomad"),
"should have registered successfully",
)
f.NoError(
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
"deployment should have completed successfully",
)
interval, retries := wc.OrDefault()
// Wait for and ensure that allocation restarted
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(interval)
allocs, err := e2e.AllocTaskEventsForJob(jobID, "")
if err != nil {
return false, err
}
for allocID, allocEvents := range allocs {
var allocRestarted bool
for _, events := range allocEvents {
if events["Type"] == "Restart Signaled" {
allocRestarted = true
}
}
if allocRestarted {
return true, nil
}
return false, fmt.Errorf("alloc %s expected to restart", allocID)
}
return true, nil
}, func(err error) {
f.NoError(err)
})
}

View File

@@ -79,6 +79,39 @@ func AllocsForJob(jobID, ns string) ([]map[string]string, error) {
return allocs, nil
}
// AllocTaskEventsForJob returns a map of allocation IDs containing a map of
// Task Event key value pairs
func AllocTaskEventsForJob(jobID, ns string) (map[string][]map[string]string, error) {
allocs, err := AllocsForJob(jobID, ns)
if err != nil {
return nil, err
}
results := make(map[string][]map[string]string)
for _, alloc := range allocs {
results[alloc["ID"]] = make([]map[string]string, 0)
cmd := []string{"nomad", "alloc", "status", alloc["ID"]}
out, err := Command(cmd[0], cmd[1:]...)
if err != nil {
return nil, fmt.Errorf("querying alloc status: %w", err)
}
section, err := GetSection(out, "Recent Events:")
if err != nil {
return nil, fmt.Errorf("could not find Recent Events section: %w", err)
}
events, err := ParseColumns(section)
if err != nil {
return nil, fmt.Errorf("could not parse recent events section: %w", err)
}
results[alloc["ID"]] = events
}
return results, nil
}
// AllocsForNode returns a slice of key->value maps, each describing the values
// of the 'nomad node status' Allocations section (not actual
// structs.Allocation objects, query the API if you want those)
@@ -116,6 +149,21 @@ func AllocStatuses(jobID, ns string) ([]string, error) {
return statuses, nil
}
// AllocStatuses returns a slice of client statuses
func AllocTaskEvents(jobID, ns string) ([]string, error) {
allocs, err := AllocsForJob(jobID, ns)
if err != nil {
return nil, err
}
statuses := []string{}
for _, alloc := range allocs {
statuses = append(statuses, alloc["Status"])
}
return statuses, nil
}
// AllocStatusesRescheduled is a helper function that pulls
// out client statuses only from rescheduled allocs.
func AllocStatusesRescheduled(jobID, ns string) ([]string, error) {