e2e test for on_update service checks

check_restart not compatible with on_update=ignore

reword caveat
This commit is contained in:
Drew Bailey
2021-02-04 10:18:03 -05:00
parent 5c30148334
commit 74e7bbb7d2
16 changed files with 284 additions and 24 deletions

View File

@@ -678,6 +678,7 @@ func TestJobs_Canonicalize(t *testing.T) {
Type: "tcp",
Interval: 10 * time.Second,
Timeout: 2 * time.Second,
OnUpdate: "require_healthy",
},
},
},

View File

@@ -159,6 +159,11 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
if s.Checks[i].FailuresBeforeCritical < 0 {
s.Checks[i].FailuresBeforeCritical = 0
}
// Inhert Service
if s.Checks[i].OnUpdate == "" {
s.Checks[i].OnUpdate = s.OnUpdate
}
}
}

View File

@@ -23,6 +23,25 @@ func TestService_Canonicalize(t *testing.T) {
require.Equal(t, OnUpdateRequireHealthy, s.OnUpdate)
}
func TestServiceCheck_Canonicalize(t *testing.T) {
t.Parallel()
j := &Job{Name: stringToPtr("job")}
tg := &TaskGroup{Name: stringToPtr("group")}
task := &Task{Name: "task"}
s := &Service{
Checks: []ServiceCheck{
{
Name: "check",
},
},
}
s.Canonicalize(task, tg, j)
require.Equal(t, OnUpdateRequireHealthy, s.Checks[0].OnUpdate)
}
func TestService_Check_PassFail(t *testing.T) {
t.Parallel()

View File

@@ -344,8 +344,10 @@ func (t *Tracker) watchTaskEvents() {
}
}
// watchConsulEvents is a long lived watcher for the health of the allocation's
// Consul checks.
// watchConsulEvents is a watcher for the health of the allocation's Consul
// checks. If all checks report healthy the watcher will exit after the
// MinHealthyTime has been reached, Otherwise the watcher will continue to
// check unhealthy checks until the ctx is cancelled
func (t *Tracker) watchConsulEvents() {
// checkTicker is the ticker that triggers us to look at the checks in
// Consul

View File

@@ -487,13 +487,22 @@ func TestTracker_Checks_OnUpdate(t *testing.T) {
select {
case <-time.After(4 * checkInterval):
if !tc.expectedPass {
// tracker should still be running
require.Nil(t, tracker.ctx.Err())
return
}
require.Fail(t, "timed out while waiting for health")
case h := <-tracker.HealthyCh():
require.True(t, h)
}
// For healthy checks, the tracker should stop watching
select {
case <-tracker.ctx.Done():
// Ok, tracker should exit after reporting healthy
default:
require.Fail(t, "expected tracker to exit after reporting healthy")
}
})
}
}

View File

@@ -36,6 +36,7 @@ func init() {
new(ConsulE2ETest),
new(ScriptChecksE2ETest),
new(CheckRestartE2ETest),
new(OnUpdateChecksTest),
},
})
}

View File

@@ -0,0 +1,60 @@
job "test" {
datacenters = ["dc1"]
group "test" {
count = 3
network {
port "db" {
to = 6379
}
}
update {
health_check = "checks"
}
service {
name = "echo-service"
port = "db"
check {
name = "tcp"
type = "tcp"
port = "db"
interval = "10s"
timeout = "2s"
}
check {
name = "script-check"
type = "script"
command = "/bin/bash"
interval = "30s"
timeout = "10s"
task = "server"
on_update = "ignore_warnings"
args = [
"-c",
"echo 'this check warns'; exit 1;",
]
}
}
task "server" {
driver = "docker"
env {
a = "a"
}
config {
image = "redis"
ports = ["db"]
}
}
}
}

View File

@@ -0,0 +1,63 @@
job "test" {
datacenters = ["dc1"]
group "test" {
count = 3
network {
port "db" {
to = 6379
}
}
update {
health_check = "checks"
progress_deadline = "45s"
healthy_deadline = "30s"
}
service {
name = "echo-service"
port = "db"
check {
name = "tcp"
type = "tcp"
port = "db"
interval = "10s"
timeout = "2s"
}
check {
name = "script-check"
type = "script"
command = "/bin/bash"
interval = "30s"
timeout = "10s"
task = "server"
on_update = "ignore"
args = [
"-c",
"echo 'this check errors'; exit 2;",
]
}
}
task "server" {
driver = "docker"
env {
a = "b"
}
config {
image = "redis"
ports = ["db"]
}
}
}
}

69
e2e/consul/on_update.go Normal file
View File

@@ -0,0 +1,69 @@
package consul
import (
"fmt"
"time"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
)
type OnUpdateChecksTest struct {
framework.TC
jobIDs []string
}
func (tc *OnUpdateChecksTest) BeforeAll(f *framework.F) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(f.T(), tc.Nomad())
// Ensure that we have at least 1 client node in ready state
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
}
func (tc *OnUpdateChecksTest) AfterEach(f *framework.F) {
nomadClient := tc.Nomad()
j := nomadClient.Jobs()
for _, id := range tc.jobIDs {
j.Deregister(id, true, nil)
}
_, err := e2eutil.Command("nomad", "system", "gc")
f.NoError(err)
}
// TestOnUpdateCheck_IgnoreWarning_IgnoreErrors ensures that deployments
// complete successfully with service checks that warn and error when on_update
// is specified to ignore either.
func (tc *OnUpdateChecksTest) TestOnUpdateCheck_IgnoreWarning_IgnoreErrors(f *framework.F) {
uuid := uuid.Generate()
jobID := fmt.Sprintf("on-update-%s", uuid[0:8])
tc.jobIDs = append(tc.jobIDs, jobID)
f.NoError(
e2eutil.Register(jobID, "consul/input/on_update.nomad"),
"should have registered successfully",
)
wc := &e2eutil.WaitConfig{
Interval: 1 * time.Second,
Retries: 60,
}
f.NoError(
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
"deployment should have completed successfully",
)
// register update with on_update = ignore
// this check errors, deployment should still be successful
f.NoError(
e2eutil.Register(jobID, "consul/input/on_update_2.nomad"),
"should have registered successfully",
)
f.NoError(
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
"deployment should have completed successfully",
)
}

View File

@@ -50,7 +50,6 @@ func parseService(o *ast.ObjectItem) (*api.Service, error) {
"task",
"meta",
"canary_meta",
"on_update",
}
if err := checkHCLKeys(o.Val, valid); err != nil {
return nil, err

View File

@@ -262,7 +262,6 @@ func TestParse(t *testing.T) {
"canary": "boom",
},
PortLabel: "http",
OnUpdate: "require_healthy",
Checks: []api.ServiceCheck{
{
Name: "check-name",
@@ -277,7 +276,6 @@ func TestParse(t *testing.T) {
Grace: timeToPtr(10 * time.Second),
IgnoreWarnings: true,
},
OnUpdate: "require_healthy",
},
},
},

View File

@@ -200,7 +200,6 @@ job "binstore-storagelocker" {
abc = "123"
}
on_update = "require_healthy"
canary_meta {
canary = "boom"
@@ -218,7 +217,6 @@ job "binstore-storagelocker" {
port = "admin"
grpc_service = "foo.Bar"
grpc_use_tls = true
on_update = "require_healthy"
check_restart {
limit = 3

View File

@@ -268,7 +268,7 @@ func (sc *ServiceCheck) validate() error {
case "", OnUpdateIgnore, OnUpdateRequireHealthy, OnUpdateIgnoreWarn:
// OK
default:
return fmt.Errorf("invalid on_update - must be %q, %q, or %q; not %q", OnUpdateRequireHealthy, OnUpdateIgnoreWarn, OnUpdateIgnore, sc.OnUpdate)
return fmt.Errorf("on_update must be %q, %q, or %q; got %q", OnUpdateRequireHealthy, OnUpdateIgnoreWarn, OnUpdateIgnore, sc.OnUpdate)
}
// Note that we cannot completely validate the Expose field yet - we do not
@@ -300,10 +300,18 @@ func (sc *ServiceCheck) validate() error {
return fmt.Errorf("failures_before_critical not supported for check of type %q", sc.Type)
}
// CheckRestart IgnoreWarnings must be true if a check has defined OnUpdate
// ignore_warnings
if sc.CheckRestart != nil && !sc.CheckRestart.IgnoreWarnings {
if sc.OnUpdate == OnUpdateIgnoreWarn || sc.OnUpdate == OnUpdateIgnore {
// Check that CheckRestart and OnUpdate do not conflict
if sc.CheckRestart != nil {
// CheckRestart and OnUpdate Ignore are incompatible If OnUpdate treats
// an error has healthy, and the deployment succeeds followed by check
// restart restarting erroring checks, the deployment is left in an odd
// state
if sc.OnUpdate == OnUpdateIgnore {
return fmt.Errorf("on_update value %q is not compatible with check_restart", sc.OnUpdate)
}
// CheckRestart IgnoreWarnings must be true if a check has defined OnUpdate
// ignore_warnings
if !sc.CheckRestart.IgnoreWarnings && sc.OnUpdate == OnUpdateIgnoreWarn {
return fmt.Errorf("on_update value %q not supported with check_restart ignore_warnings value %q", sc.OnUpdate, strconv.FormatBool(sc.CheckRestart.IgnoreWarnings))
}
}

View File

@@ -150,6 +150,23 @@ func TestServiceCheck_validate_OnUpdate_CheckRestart_Conflict(t *testing.T) {
require.EqualError(t, err, `on_update value "ignore_warnings" not supported with check_restart ignore_warnings value "false"`)
})
t.Run("invalid", func(t *testing.T) {
err := (&ServiceCheck{
Name: "check",
Type: "script",
Command: "/nothing",
Interval: 1 * time.Second,
Timeout: 2 * time.Second,
CheckRestart: &CheckRestart{
IgnoreWarnings: false,
Limit: 3,
Grace: 5 * time.Second,
},
OnUpdate: "ignore",
}).validate()
require.EqualError(t, err, `on_update value "ignore" is not compatible with check_restart`)
})
t.Run("valid", func(t *testing.T) {
err := (&ServiceCheck{
Name: "check",

View File

@@ -159,6 +159,11 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
if s.Checks[i].FailuresBeforeCritical < 0 {
s.Checks[i].FailuresBeforeCritical = 0
}
// Inhert Service
if s.Checks[i].OnUpdate == "" {
s.Checks[i].OnUpdate = s.OnUpdate
}
}
}

View File

@@ -177,7 +177,9 @@ Connect][connect] integration.
evaluated when determining deployment health (including a job's initial
deployment). This allows job submitters to define certain checks as readiness
checks, progressing a deployment even if the Service's checks are not yet
healthy. On the Consul side, the status is not altered.
healthy. Checks inherit the Servie's value by default. The check status is
not altered in Consul and is only used to determine the checks health during
an update.
- `require_healthy` - In order for Nomad to consider the check healthy during
an update it must report as healthy.
@@ -188,10 +190,11 @@ Connect][connect] integration.
- `ignore` - Any status will be treated as healthy.
~> **Caveat:** `on_update` is only compatible with certain
[`check_restart`][check_restart_stanza] configurations. If `on_update` is set
to `ignore_warnings` then `check_restart` `ignore_warnings` must also be set
to true. `check_restart` can however have `ignore_warnings` set to true with
`on_update` equal to `require_healthy`.
[`check_restart`][check_restart_stanza] configurations. `on_update =
"ignore_warnings"` requires that `check_restart.ignore_warnings = true`.
`check_restart` can however specify `ignore_warnings = true` with `on_update
= "require_healthy"`. If `on_update` is set to `ignore`, `check_restart` must
be omitted entirely.
### `check` Parameters
@@ -302,10 +305,12 @@ scripts.
checks. Requires Consul >= 0.7.2.
- `on_update` `(string: "require_healthy")` - Specifies how checks should be
evaluated when determining deployment health (including an initial
evaluated when determining deployment health (including a job's initial
deployment). This allows job submitters to define certain checks as readiness
checks, progressing a deployment even if the Service's checks are not yet
healthy. On the Consul side, the status is not altered.
healthy. Checks inherit the Servie's value by default. The check status is
not altered in Consul and is only used to determine the checks health during
an update.
- `require_healthy` - In order for Nomad to consider the check healthy during
an update it must report as healthy.
@@ -316,10 +321,11 @@ scripts.
- `ignore` - Any status will be treated as healthy.
~> **Caveat:** `on_update` is only compatible with certain
[`check_restart`][check_restart_stanza] configurations. If `on_update` is set
to `ignore_warnings` then `check_restart` `ignore_warnings` must also be set
to true. `check_restart` can however have `ignore_warnings` set to true with
`on_update` equal to `require_healthy`.
[`check_restart`][check_restart_stanza] configurations. `on_update =
"ignore_warnings"` requires that `check_restart.ignore_warnings = true`.
`check_restart` can however specify `ignore_warnings = true` with `on_update
= "require_healthy"`. If `on_update` is set to `ignore`, `check_restart` must
be omitted entirely.
#### `header` Stanza