mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
e2e test for on_update service checks
check_restart not compatible with on_update=ignore reword caveat
This commit is contained in:
@@ -678,6 +678,7 @@ func TestJobs_Canonicalize(t *testing.T) {
|
||||
Type: "tcp",
|
||||
Interval: 10 * time.Second,
|
||||
Timeout: 2 * time.Second,
|
||||
OnUpdate: "require_healthy",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -159,6 +159,11 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
|
||||
if s.Checks[i].FailuresBeforeCritical < 0 {
|
||||
s.Checks[i].FailuresBeforeCritical = 0
|
||||
}
|
||||
|
||||
// Inhert Service
|
||||
if s.Checks[i].OnUpdate == "" {
|
||||
s.Checks[i].OnUpdate = s.OnUpdate
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,25 @@ func TestService_Canonicalize(t *testing.T) {
|
||||
require.Equal(t, OnUpdateRequireHealthy, s.OnUpdate)
|
||||
}
|
||||
|
||||
func TestServiceCheck_Canonicalize(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
j := &Job{Name: stringToPtr("job")}
|
||||
tg := &TaskGroup{Name: stringToPtr("group")}
|
||||
task := &Task{Name: "task"}
|
||||
s := &Service{
|
||||
Checks: []ServiceCheck{
|
||||
{
|
||||
Name: "check",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
s.Canonicalize(task, tg, j)
|
||||
|
||||
require.Equal(t, OnUpdateRequireHealthy, s.Checks[0].OnUpdate)
|
||||
}
|
||||
|
||||
func TestService_Check_PassFail(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@@ -344,8 +344,10 @@ func (t *Tracker) watchTaskEvents() {
|
||||
}
|
||||
}
|
||||
|
||||
// watchConsulEvents is a long lived watcher for the health of the allocation's
|
||||
// Consul checks.
|
||||
// watchConsulEvents is a watcher for the health of the allocation's Consul
|
||||
// checks. If all checks report healthy the watcher will exit after the
|
||||
// MinHealthyTime has been reached, Otherwise the watcher will continue to
|
||||
// check unhealthy checks until the ctx is cancelled
|
||||
func (t *Tracker) watchConsulEvents() {
|
||||
// checkTicker is the ticker that triggers us to look at the checks in
|
||||
// Consul
|
||||
|
||||
@@ -487,13 +487,22 @@ func TestTracker_Checks_OnUpdate(t *testing.T) {
|
||||
select {
|
||||
case <-time.After(4 * checkInterval):
|
||||
if !tc.expectedPass {
|
||||
// tracker should still be running
|
||||
require.Nil(t, tracker.ctx.Err())
|
||||
return
|
||||
}
|
||||
require.Fail(t, "timed out while waiting for health")
|
||||
case h := <-tracker.HealthyCh():
|
||||
require.True(t, h)
|
||||
}
|
||||
|
||||
// For healthy checks, the tracker should stop watching
|
||||
select {
|
||||
case <-tracker.ctx.Done():
|
||||
// Ok, tracker should exit after reporting healthy
|
||||
default:
|
||||
require.Fail(t, "expected tracker to exit after reporting healthy")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ func init() {
|
||||
new(ConsulE2ETest),
|
||||
new(ScriptChecksE2ETest),
|
||||
new(CheckRestartE2ETest),
|
||||
new(OnUpdateChecksTest),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
60
e2e/consul/input/on_update.nomad
Normal file
60
e2e/consul/input/on_update.nomad
Normal file
@@ -0,0 +1,60 @@
|
||||
job "test" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
group "test" {
|
||||
count = 3
|
||||
|
||||
network {
|
||||
port "db" {
|
||||
to = 6379
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
health_check = "checks"
|
||||
}
|
||||
|
||||
service {
|
||||
name = "echo-service"
|
||||
port = "db"
|
||||
|
||||
check {
|
||||
name = "tcp"
|
||||
type = "tcp"
|
||||
port = "db"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
check {
|
||||
name = "script-check"
|
||||
type = "script"
|
||||
command = "/bin/bash"
|
||||
interval = "30s"
|
||||
timeout = "10s"
|
||||
task = "server"
|
||||
on_update = "ignore_warnings"
|
||||
|
||||
args = [
|
||||
"-c",
|
||||
"echo 'this check warns'; exit 1;",
|
||||
]
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
task "server" {
|
||||
driver = "docker"
|
||||
|
||||
env {
|
||||
a = "a"
|
||||
}
|
||||
|
||||
config {
|
||||
image = "redis"
|
||||
ports = ["db"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
63
e2e/consul/input/on_update_2.nomad
Normal file
63
e2e/consul/input/on_update_2.nomad
Normal file
@@ -0,0 +1,63 @@
|
||||
job "test" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
group "test" {
|
||||
count = 3
|
||||
|
||||
network {
|
||||
port "db" {
|
||||
to = 6379
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
health_check = "checks"
|
||||
progress_deadline = "45s"
|
||||
healthy_deadline = "30s"
|
||||
}
|
||||
|
||||
service {
|
||||
name = "echo-service"
|
||||
port = "db"
|
||||
|
||||
check {
|
||||
name = "tcp"
|
||||
type = "tcp"
|
||||
port = "db"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
check {
|
||||
name = "script-check"
|
||||
type = "script"
|
||||
command = "/bin/bash"
|
||||
interval = "30s"
|
||||
timeout = "10s"
|
||||
task = "server"
|
||||
on_update = "ignore"
|
||||
|
||||
args = [
|
||||
"-c",
|
||||
"echo 'this check errors'; exit 2;",
|
||||
]
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
task "server" {
|
||||
driver = "docker"
|
||||
|
||||
env {
|
||||
a = "b"
|
||||
}
|
||||
|
||||
config {
|
||||
image = "redis"
|
||||
ports = ["db"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
69
e2e/consul/on_update.go
Normal file
69
e2e/consul/on_update.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package consul
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/framework"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
)
|
||||
|
||||
type OnUpdateChecksTest struct {
|
||||
framework.TC
|
||||
jobIDs []string
|
||||
}
|
||||
|
||||
func (tc *OnUpdateChecksTest) BeforeAll(f *framework.F) {
|
||||
// Ensure cluster has leader before running tests
|
||||
e2eutil.WaitForLeader(f.T(), tc.Nomad())
|
||||
// Ensure that we have at least 1 client node in ready state
|
||||
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
||||
}
|
||||
|
||||
func (tc *OnUpdateChecksTest) AfterEach(f *framework.F) {
|
||||
nomadClient := tc.Nomad()
|
||||
j := nomadClient.Jobs()
|
||||
|
||||
for _, id := range tc.jobIDs {
|
||||
j.Deregister(id, true, nil)
|
||||
}
|
||||
_, err := e2eutil.Command("nomad", "system", "gc")
|
||||
f.NoError(err)
|
||||
}
|
||||
|
||||
// TestOnUpdateCheck_IgnoreWarning_IgnoreErrors ensures that deployments
|
||||
// complete successfully with service checks that warn and error when on_update
|
||||
// is specified to ignore either.
|
||||
func (tc *OnUpdateChecksTest) TestOnUpdateCheck_IgnoreWarning_IgnoreErrors(f *framework.F) {
|
||||
uuid := uuid.Generate()
|
||||
jobID := fmt.Sprintf("on-update-%s", uuid[0:8])
|
||||
tc.jobIDs = append(tc.jobIDs, jobID)
|
||||
|
||||
f.NoError(
|
||||
e2eutil.Register(jobID, "consul/input/on_update.nomad"),
|
||||
"should have registered successfully",
|
||||
)
|
||||
|
||||
wc := &e2eutil.WaitConfig{
|
||||
Interval: 1 * time.Second,
|
||||
Retries: 60,
|
||||
}
|
||||
f.NoError(
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
|
||||
"deployment should have completed successfully",
|
||||
)
|
||||
|
||||
// register update with on_update = ignore
|
||||
// this check errors, deployment should still be successful
|
||||
f.NoError(
|
||||
e2eutil.Register(jobID, "consul/input/on_update_2.nomad"),
|
||||
"should have registered successfully",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, "", "successful", wc),
|
||||
"deployment should have completed successfully",
|
||||
)
|
||||
|
||||
}
|
||||
@@ -50,7 +50,6 @@ func parseService(o *ast.ObjectItem) (*api.Service, error) {
|
||||
"task",
|
||||
"meta",
|
||||
"canary_meta",
|
||||
"on_update",
|
||||
}
|
||||
if err := checkHCLKeys(o.Val, valid); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -262,7 +262,6 @@ func TestParse(t *testing.T) {
|
||||
"canary": "boom",
|
||||
},
|
||||
PortLabel: "http",
|
||||
OnUpdate: "require_healthy",
|
||||
Checks: []api.ServiceCheck{
|
||||
{
|
||||
Name: "check-name",
|
||||
@@ -277,7 +276,6 @@ func TestParse(t *testing.T) {
|
||||
Grace: timeToPtr(10 * time.Second),
|
||||
IgnoreWarnings: true,
|
||||
},
|
||||
OnUpdate: "require_healthy",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -200,7 +200,6 @@ job "binstore-storagelocker" {
|
||||
abc = "123"
|
||||
}
|
||||
|
||||
on_update = "require_healthy"
|
||||
|
||||
canary_meta {
|
||||
canary = "boom"
|
||||
@@ -218,7 +217,6 @@ job "binstore-storagelocker" {
|
||||
port = "admin"
|
||||
grpc_service = "foo.Bar"
|
||||
grpc_use_tls = true
|
||||
on_update = "require_healthy"
|
||||
|
||||
check_restart {
|
||||
limit = 3
|
||||
|
||||
@@ -268,7 +268,7 @@ func (sc *ServiceCheck) validate() error {
|
||||
case "", OnUpdateIgnore, OnUpdateRequireHealthy, OnUpdateIgnoreWarn:
|
||||
// OK
|
||||
default:
|
||||
return fmt.Errorf("invalid on_update - must be %q, %q, or %q; not %q", OnUpdateRequireHealthy, OnUpdateIgnoreWarn, OnUpdateIgnore, sc.OnUpdate)
|
||||
return fmt.Errorf("on_update must be %q, %q, or %q; got %q", OnUpdateRequireHealthy, OnUpdateIgnoreWarn, OnUpdateIgnore, sc.OnUpdate)
|
||||
}
|
||||
|
||||
// Note that we cannot completely validate the Expose field yet - we do not
|
||||
@@ -300,10 +300,18 @@ func (sc *ServiceCheck) validate() error {
|
||||
return fmt.Errorf("failures_before_critical not supported for check of type %q", sc.Type)
|
||||
}
|
||||
|
||||
// CheckRestart IgnoreWarnings must be true if a check has defined OnUpdate
|
||||
// ignore_warnings
|
||||
if sc.CheckRestart != nil && !sc.CheckRestart.IgnoreWarnings {
|
||||
if sc.OnUpdate == OnUpdateIgnoreWarn || sc.OnUpdate == OnUpdateIgnore {
|
||||
// Check that CheckRestart and OnUpdate do not conflict
|
||||
if sc.CheckRestart != nil {
|
||||
// CheckRestart and OnUpdate Ignore are incompatible If OnUpdate treats
|
||||
// an error has healthy, and the deployment succeeds followed by check
|
||||
// restart restarting erroring checks, the deployment is left in an odd
|
||||
// state
|
||||
if sc.OnUpdate == OnUpdateIgnore {
|
||||
return fmt.Errorf("on_update value %q is not compatible with check_restart", sc.OnUpdate)
|
||||
}
|
||||
// CheckRestart IgnoreWarnings must be true if a check has defined OnUpdate
|
||||
// ignore_warnings
|
||||
if !sc.CheckRestart.IgnoreWarnings && sc.OnUpdate == OnUpdateIgnoreWarn {
|
||||
return fmt.Errorf("on_update value %q not supported with check_restart ignore_warnings value %q", sc.OnUpdate, strconv.FormatBool(sc.CheckRestart.IgnoreWarnings))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,6 +150,23 @@ func TestServiceCheck_validate_OnUpdate_CheckRestart_Conflict(t *testing.T) {
|
||||
require.EqualError(t, err, `on_update value "ignore_warnings" not supported with check_restart ignore_warnings value "false"`)
|
||||
})
|
||||
|
||||
t.Run("invalid", func(t *testing.T) {
|
||||
err := (&ServiceCheck{
|
||||
Name: "check",
|
||||
Type: "script",
|
||||
Command: "/nothing",
|
||||
Interval: 1 * time.Second,
|
||||
Timeout: 2 * time.Second,
|
||||
CheckRestart: &CheckRestart{
|
||||
IgnoreWarnings: false,
|
||||
Limit: 3,
|
||||
Grace: 5 * time.Second,
|
||||
},
|
||||
OnUpdate: "ignore",
|
||||
}).validate()
|
||||
require.EqualError(t, err, `on_update value "ignore" is not compatible with check_restart`)
|
||||
})
|
||||
|
||||
t.Run("valid", func(t *testing.T) {
|
||||
err := (&ServiceCheck{
|
||||
Name: "check",
|
||||
|
||||
5
vendor/github.com/hashicorp/nomad/api/services.go
generated
vendored
5
vendor/github.com/hashicorp/nomad/api/services.go
generated
vendored
@@ -159,6 +159,11 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
|
||||
if s.Checks[i].FailuresBeforeCritical < 0 {
|
||||
s.Checks[i].FailuresBeforeCritical = 0
|
||||
}
|
||||
|
||||
// Inhert Service
|
||||
if s.Checks[i].OnUpdate == "" {
|
||||
s.Checks[i].OnUpdate = s.OnUpdate
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -177,7 +177,9 @@ Connect][connect] integration.
|
||||
evaluated when determining deployment health (including a job's initial
|
||||
deployment). This allows job submitters to define certain checks as readiness
|
||||
checks, progressing a deployment even if the Service's checks are not yet
|
||||
healthy. On the Consul side, the status is not altered.
|
||||
healthy. Checks inherit the Servie's value by default. The check status is
|
||||
not altered in Consul and is only used to determine the checks health during
|
||||
an update.
|
||||
|
||||
- `require_healthy` - In order for Nomad to consider the check healthy during
|
||||
an update it must report as healthy.
|
||||
@@ -188,10 +190,11 @@ Connect][connect] integration.
|
||||
- `ignore` - Any status will be treated as healthy.
|
||||
|
||||
~> **Caveat:** `on_update` is only compatible with certain
|
||||
[`check_restart`][check_restart_stanza] configurations. If `on_update` is set
|
||||
to `ignore_warnings` then `check_restart` `ignore_warnings` must also be set
|
||||
to true. `check_restart` can however have `ignore_warnings` set to true with
|
||||
`on_update` equal to `require_healthy`.
|
||||
[`check_restart`][check_restart_stanza] configurations. `on_update =
|
||||
"ignore_warnings"` requires that `check_restart.ignore_warnings = true`.
|
||||
`check_restart` can however specify `ignore_warnings = true` with `on_update
|
||||
= "require_healthy"`. If `on_update` is set to `ignore`, `check_restart` must
|
||||
be omitted entirely.
|
||||
|
||||
|
||||
### `check` Parameters
|
||||
@@ -302,10 +305,12 @@ scripts.
|
||||
checks. Requires Consul >= 0.7.2.
|
||||
|
||||
- `on_update` `(string: "require_healthy")` - Specifies how checks should be
|
||||
evaluated when determining deployment health (including an initial
|
||||
evaluated when determining deployment health (including a job's initial
|
||||
deployment). This allows job submitters to define certain checks as readiness
|
||||
checks, progressing a deployment even if the Service's checks are not yet
|
||||
healthy. On the Consul side, the status is not altered.
|
||||
healthy. Checks inherit the Servie's value by default. The check status is
|
||||
not altered in Consul and is only used to determine the checks health during
|
||||
an update.
|
||||
|
||||
- `require_healthy` - In order for Nomad to consider the check healthy during
|
||||
an update it must report as healthy.
|
||||
@@ -316,10 +321,11 @@ scripts.
|
||||
- `ignore` - Any status will be treated as healthy.
|
||||
|
||||
~> **Caveat:** `on_update` is only compatible with certain
|
||||
[`check_restart`][check_restart_stanza] configurations. If `on_update` is set
|
||||
to `ignore_warnings` then `check_restart` `ignore_warnings` must also be set
|
||||
to true. `check_restart` can however have `ignore_warnings` set to true with
|
||||
`on_update` equal to `require_healthy`.
|
||||
[`check_restart`][check_restart_stanza] configurations. `on_update =
|
||||
"ignore_warnings"` requires that `check_restart.ignore_warnings = true`.
|
||||
`check_restart` can however specify `ignore_warnings = true` with `on_update
|
||||
= "require_healthy"`. If `on_update` is set to `ignore`, `check_restart` must
|
||||
be omitted entirely.
|
||||
|
||||
#### `header` Stanza
|
||||
|
||||
|
||||
Reference in New Issue
Block a user