Adds support for failures before warning to Consul service checks (#19336)

Adds support for failures before warning and failures before critical
to the automatically created Nomad client and server services in Consul
This commit is contained in:
Mike Nomitch
2023-12-14 11:33:31 -08:00
committed by GitHub
parent 0e42569ffb
commit 31f4296826
19 changed files with 362 additions and 142 deletions

7
.changelog/19336.txt Normal file
View File

@@ -0,0 +1,7 @@
```release-note:improvement
consul: Added support for failures_before_warning and failures_before_critical in Nomad agent services
```
```release-note:improvement
consul: Added support for failures_before_warning in Consul service checks
```

View File

@@ -222,6 +222,7 @@ type ServiceCheck struct {
TaskName string `mapstructure:"task" hcl:"task,optional"`
SuccessBeforePassing int `mapstructure:"success_before_passing" hcl:"success_before_passing,optional"`
FailuresBeforeCritical int `mapstructure:"failures_before_critical" hcl:"failures_before_critical,optional"`
FailuresBeforeWarning int `mapstructure:"failures_before_warning" hcl:"failures_before_warning,optional"`
Body string `hcl:"body,optional"`
OnUpdate string `mapstructure:"on_update" hcl:"on_update,optional"`
}
@@ -320,6 +321,10 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
s.Checks[i].FailuresBeforeCritical = 0
}
if s.Checks[i].FailuresBeforeWarning < 0 {
s.Checks[i].FailuresBeforeWarning = 0
}
// Inhert Service
if s.Checks[i].OnUpdate == "" {
s.Checks[i].OnUpdate = s.OnUpdate

View File

@@ -75,12 +75,14 @@ func TestService_Check_PassFail(t *testing.T) {
Checks: []ServiceCheck{{
SuccessBeforePassing: -1,
FailuresBeforeCritical: -2,
FailuresBeforeWarning: -3,
}},
}
s.Canonicalize(task, tg, job)
must.Zero(t, s.Checks[0].SuccessBeforePassing)
must.Zero(t, s.Checks[0].FailuresBeforeCritical)
must.Zero(t, s.Checks[0].FailuresBeforeWarning)
})
t.Run("normal", func(t *testing.T) {
@@ -88,12 +90,14 @@ func TestService_Check_PassFail(t *testing.T) {
Checks: []ServiceCheck{{
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}},
}
s.Canonicalize(task, tg, job)
must.Eq(t, 3, s.Checks[0].SuccessBeforePassing)
must.Eq(t, 4, s.Checks[0].FailuresBeforeCritical)
must.Eq(t, 2, s.Checks[0].FailuresBeforeWarning)
})
}

View File

@@ -1172,19 +1172,25 @@ func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
httpCheckAddr = a.config.AdvertiseAddrs.HTTP
}
check := structs.ServiceCheck{
Name: defaultConsul.ClientHTTPCheckName,
Type: "http",
Path: "/v1/agent/health?type=client",
Protocol: "http",
Interval: agentHttpCheckInterval,
Timeout: agentHttpCheckTimeout,
PortLabel: httpCheckAddr,
Name: defaultConsul.ClientHTTPCheckName,
Type: "http",
Path: "/v1/agent/health?type=client",
Protocol: "http",
Interval: agentHttpCheckInterval,
Timeout: agentHttpCheckTimeout,
PortLabel: httpCheckAddr,
FailuresBeforeWarning: defaultConsul.ClientFailuresBeforeWarning,
FailuresBeforeCritical: defaultConsul.ClientFailuresBeforeCritical,
}
// Switch to endpoint that doesn't require a leader for servers
// and overwrite failures before x values
if server {
check.Name = defaultConsul.ServerHTTPCheckName
check.Path = "/v1/agent/health?type=server"
check.FailuresBeforeCritical = defaultConsul.ServerFailuresBeforeCritical
check.FailuresBeforeWarning = defaultConsul.ServerFailuresBeforeWarning
}
if !a.config.TLSConfig.EnableHTTP {
// No HTTPS, return a plain http check
return &check
@@ -1197,6 +1203,7 @@ func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
// HTTPS enabled; skip verification
check.Protocol = "https"
check.TLSSkipVerify = true
return &check
}

View File

@@ -775,8 +775,10 @@ func TestAgent_HTTPCheck(t *testing.T) {
AdvertiseAddrs: &AdvertiseAddrs{HTTP: "advertise:4646"},
normalizedAddrs: &NormalizedAddrs{HTTP: []string{"normalized:4646"}},
Consuls: []*config.ConsulConfig{{
Name: "default",
ChecksUseAdvertise: pointer.Of(false),
Name: "default",
ChecksUseAdvertise: pointer.Of(false),
ClientFailuresBeforeCritical: 2,
ClientFailuresBeforeWarning: 1,
}},
TLSConfig: &config.TLSConfig{EnableHTTP: false},
},
@@ -801,6 +803,12 @@ func TestAgent_HTTPCheck(t *testing.T) {
if expected := a.config.normalizedAddrs.HTTP[0]; check.PortLabel != expected {
t.Errorf("expected normalized addr not %q", check.PortLabel)
}
if expected := 2; check.FailuresBeforeCritical != expected {
t.Errorf("expected failured before critical count not: %q", expected)
}
if expected := 1; check.FailuresBeforeWarning != expected {
t.Errorf("expected failured before warning count not: %q", expected)
}
})
t.Run("Plain HTTP + ChecksUseAdvertise", func(t *testing.T) {
@@ -851,6 +859,10 @@ func TestAgent_HTTPCheckPath(t *testing.T) {
config: DevConfig(nil),
logger: testlog.HCLogger(t),
}
// setting to ensure this does not get set for the server
a.config.Consuls[0].ServerFailuresBeforeCritical = 4
a.config.Consuls[0].ServerFailuresBeforeWarning = 3
if err := a.config.normalizeAddrs(); err != nil {
t.Fatalf("error normalizing config: %v", err)
}
@@ -864,6 +876,13 @@ func TestAgent_HTTPCheckPath(t *testing.T) {
if expected := "/v1/agent/health?type=server"; check.Path != expected {
t.Errorf("expected server check path to be %q but found %q", expected, check.Path)
}
// ensure server failures before critical and warning are set
if expected := 4; check.FailuresBeforeCritical != expected {
t.Errorf("expected failured before critical count not: %q", expected)
}
if expected := 3; check.FailuresBeforeWarning != expected {
t.Errorf("expected failured before warning count not: %q", expected)
}
// Assert client check uses /v1/agent/health?type=client
isServer = false

View File

@@ -149,10 +149,14 @@ func (c *Command) readConfig() *Config {
}), "consul-client-auto-join", "")
flags.StringVar(&defaultConsul.ClientServiceName, "consul-client-service-name", "", "")
flags.StringVar(&defaultConsul.ClientHTTPCheckName, "consul-client-http-check-name", "", "")
flags.IntVar(&defaultConsul.ClientFailuresBeforeCritical, "consul-client-failures-before-critical", 0, "")
flags.IntVar(&defaultConsul.ClientFailuresBeforeWarning, "consul-client-failures-before-warning", 0, "")
flags.StringVar(&defaultConsul.ServerServiceName, "consul-server-service-name", "", "")
flags.StringVar(&defaultConsul.ServerHTTPCheckName, "consul-server-http-check-name", "", "")
flags.StringVar(&defaultConsul.ServerSerfCheckName, "consul-server-serf-check-name", "", "")
flags.StringVar(&defaultConsul.ServerRPCCheckName, "consul-server-rpc-check-name", "", "")
flags.IntVar(&defaultConsul.ServerFailuresBeforeCritical, "consul-server-failures-before-critical", 0, "")
flags.IntVar(&defaultConsul.ServerFailuresBeforeWarning, "consul-server-failures-before-warning", 0, "")
flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
defaultConsul.ServerAutoJoin = &b
return nil
@@ -689,63 +693,67 @@ func (c *Command) AutocompleteFlags() complete.Flags {
complete.PredictFiles("*.hcl"))
return map[string]complete.Predictor{
"-dev": complete.PredictNothing,
"-dev-connect": complete.PredictNothing,
"-server": complete.PredictNothing,
"-client": complete.PredictNothing,
"-bootstrap-expect": complete.PredictAnything,
"-encrypt": complete.PredictAnything,
"-raft-protocol": complete.PredictAnything,
"-rejoin": complete.PredictNothing,
"-join": complete.PredictAnything,
"-retry-join": complete.PredictAnything,
"-retry-max": complete.PredictAnything,
"-state-dir": complete.PredictDirs("*"),
"-alloc-dir": complete.PredictDirs("*"),
"-node-class": complete.PredictAnything,
"-node-pool": complete.PredictAnything,
"-servers": complete.PredictAnything,
"-meta": complete.PredictAnything,
"-config": configFilePredictor,
"-bind": complete.PredictAnything,
"-region": complete.PredictAnything,
"-data-dir": complete.PredictDirs("*"),
"-plugin-dir": complete.PredictDirs("*"),
"-dc": complete.PredictAnything,
"-log-level": complete.PredictAnything,
"-json-logs": complete.PredictNothing,
"-node": complete.PredictAnything,
"-consul-auth": complete.PredictAnything,
"-consul-auto-advertise": complete.PredictNothing,
"-consul-ca-file": complete.PredictAnything,
"-consul-cert-file": complete.PredictAnything,
"-consul-key-file": complete.PredictAnything,
"-consul-checks-use-advertise": complete.PredictNothing,
"-consul-client-auto-join": complete.PredictNothing,
"-consul-client-service-name": complete.PredictAnything,
"-consul-client-http-check-name": complete.PredictAnything,
"-consul-server-service-name": complete.PredictAnything,
"-consul-server-http-check-name": complete.PredictAnything,
"-consul-server-serf-check-name": complete.PredictAnything,
"-consul-server-rpc-check-name": complete.PredictAnything,
"-consul-server-auto-join": complete.PredictNothing,
"-consul-ssl": complete.PredictNothing,
"-consul-verify-ssl": complete.PredictNothing,
"-consul-address": complete.PredictAnything,
"-consul-token": complete.PredictAnything,
"-vault-enabled": complete.PredictNothing,
"-vault-allow-unauthenticated": complete.PredictNothing,
"-vault-token": complete.PredictAnything,
"-vault-address": complete.PredictAnything,
"-vault-create-from-role": complete.PredictAnything,
"-vault-ca-file": complete.PredictAnything,
"-vault-ca-path": complete.PredictAnything,
"-vault-cert-file": complete.PredictAnything,
"-vault-key-file": complete.PredictAnything,
"-vault-tls-skip-verify": complete.PredictNothing,
"-vault-tls-server-name": complete.PredictAnything,
"-acl-enabled": complete.PredictNothing,
"-acl-replication-token": complete.PredictAnything,
"-dev": complete.PredictNothing,
"-dev-connect": complete.PredictNothing,
"-server": complete.PredictNothing,
"-client": complete.PredictNothing,
"-bootstrap-expect": complete.PredictAnything,
"-encrypt": complete.PredictAnything,
"-raft-protocol": complete.PredictAnything,
"-rejoin": complete.PredictNothing,
"-join": complete.PredictAnything,
"-retry-join": complete.PredictAnything,
"-retry-max": complete.PredictAnything,
"-state-dir": complete.PredictDirs("*"),
"-alloc-dir": complete.PredictDirs("*"),
"-node-class": complete.PredictAnything,
"-node-pool": complete.PredictAnything,
"-servers": complete.PredictAnything,
"-meta": complete.PredictAnything,
"-config": configFilePredictor,
"-bind": complete.PredictAnything,
"-region": complete.PredictAnything,
"-data-dir": complete.PredictDirs("*"),
"-plugin-dir": complete.PredictDirs("*"),
"-dc": complete.PredictAnything,
"-log-level": complete.PredictAnything,
"-json-logs": complete.PredictNothing,
"-node": complete.PredictAnything,
"-consul-auth": complete.PredictAnything,
"-consul-auto-advertise": complete.PredictNothing,
"-consul-ca-file": complete.PredictAnything,
"-consul-cert-file": complete.PredictAnything,
"-consul-key-file": complete.PredictAnything,
"-consul-checks-use-advertise": complete.PredictNothing,
"-consul-client-auto-join": complete.PredictNothing,
"-consul-client-service-name": complete.PredictAnything,
"-consul-client-failures-before-critical": complete.PredictAnything,
"-consul-client-failures-before-warning": complete.PredictAnything,
"-consul-client-http-check-name": complete.PredictAnything,
"-consul-server-service-name": complete.PredictAnything,
"-consul-server-http-check-name": complete.PredictAnything,
"-consul-server-serf-check-name": complete.PredictAnything,
"-consul-server-rpc-check-name": complete.PredictAnything,
"-consul-server-auto-join": complete.PredictNothing,
"-consul-server-failures-before-critical": complete.PredictAnything,
"-consul-server-failures-before-warning": complete.PredictAnything,
"-consul-ssl": complete.PredictNothing,
"-consul-verify-ssl": complete.PredictNothing,
"-consul-address": complete.PredictAnything,
"-consul-token": complete.PredictAnything,
"-vault-enabled": complete.PredictNothing,
"-vault-allow-unauthenticated": complete.PredictNothing,
"-vault-token": complete.PredictAnything,
"-vault-address": complete.PredictAnything,
"-vault-create-from-role": complete.PredictAnything,
"-vault-ca-file": complete.PredictAnything,
"-vault-ca-path": complete.PredictAnything,
"-vault-cert-file": complete.PredictAnything,
"-vault-key-file": complete.PredictAnything,
"-vault-tls-skip-verify": complete.PredictNothing,
"-vault-tls-server-name": complete.PredictAnything,
"-acl-enabled": complete.PredictNothing,
"-acl-replication-token": complete.PredictAnything,
}
}
@@ -1564,6 +1572,14 @@ Consul Options:
-consul-client-http-check-name=<name>
Specifies the HTTP health check name in Consul for the Nomad clients.
-consul-client-failures-before-critical
Specifies the number of consecutive failures before the Nomad client
Consul health check is critical. Defaults to 0.
-consul-client-failures-before-warning
Specifies the number of consecutive failures before the Nomad client
Consul health check shows a warning. Defaults to 0.
-consul-key-file=<path>
Specifies the path to the private key used for Consul communication. If this
is set then you need to also set cert_file.
@@ -1586,6 +1602,14 @@ Consul Options:
server_service_name option. This search only happens if the server does not
have a leader.
-consul-server-failures-before-critical
Specifies the number of consecutive failures before the Nomad server
Consul health check is critical. Defaults to 0.
-consul-server-failures-before-warning
Specifies the number of consecutive failures before the Nomad server
Consul health check shows a warning. Defaults to 0.
-consul-ssl
Specifies if the transport scheme should use HTTPS to communicate with the
Consul agent.

View File

@@ -1420,6 +1420,7 @@ func apiCheckRegistrationToCheck(r *api.AgentCheckRegistration) *api.AgentServic
GRPCUseTLS: r.GRPCUseTLS,
SuccessBeforePassing: r.SuccessBeforePassing,
FailuresBeforeCritical: r.FailuresBeforeCritical,
FailuresBeforeWarning: r.FailuresBeforeWarning,
}
}
@@ -1969,6 +1970,7 @@ func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host
chkReg.Interval = check.Interval.String()
chkReg.SuccessBeforePassing = check.SuccessBeforePassing
chkReg.FailuresBeforeCritical = check.FailuresBeforeCritical
chkReg.FailuresBeforeWarning = check.FailuresBeforeWarning
// Require an address for http or tcp checks
if port == 0 && check.RequiresPort() {

View File

@@ -1608,6 +1608,7 @@ func ApiServicesToStructs(in []*api.Service, group bool) []*structs.Service {
GRPCUseTLS: check.GRPCUseTLS,
SuccessBeforePassing: check.SuccessBeforePassing,
FailuresBeforeCritical: check.FailuresBeforeCritical,
FailuresBeforeWarning: check.FailuresBeforeWarning,
OnUpdate: onUpdate,
}

View File

@@ -2736,6 +2736,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
TaskName: "task1",
SuccessBeforePassing: 2,
FailuresBeforeCritical: 3,
FailuresBeforeWarning: 2,
},
},
Connect: &api.ConsulConnect{
@@ -2836,6 +2837,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
InitialStatus: "ok",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
CheckRestart: &api.CheckRestart{
Limit: 3,
IgnoreWarnings: true,
@@ -3167,6 +3169,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
OnUpdate: structs.OnUpdateRequireHealthy,
SuccessBeforePassing: 2,
FailuresBeforeCritical: 3,
FailuresBeforeWarning: 2,
},
},
Connect: &structs.ConsulConnect{
@@ -3267,6 +3270,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
GRPCUseTLS: true,
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
CheckRestart: &structs.CheckRestart{
Limit: 3,
Grace: 11 * time.Second,

View File

@@ -1028,6 +1028,7 @@ func parseChecks(service *api.Service, checkObjs *ast.ObjectList) error {
"task",
"success_before_passing",
"failures_before_critical",
"failures_before_warning",
"on_update",
"body",
}

View File

@@ -758,6 +758,7 @@ func TestParse(t *testing.T) {
Method: "POST",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}},
}},
}},
@@ -789,6 +790,7 @@ func TestParse(t *testing.T) {
Method: "POST",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}},
}},
}},

View File

@@ -22,6 +22,7 @@ job "check_pass_fail" {
initial_status = "passing"
success_before_passing = 3
failures_before_critical = 4
failures_before_warning = 2
}
}
}

View File

@@ -48,6 +48,14 @@ type ConsulConfig struct {
// to register the server RPC health check with Consul
ServerRPCCheckName string `mapstructure:"server_rpc_check_name"`
// ServerFailuresBeforeCritical is the number of failures before the
// server health check is marked as critical
ServerFailuresBeforeCritical int `mapstructure:"server_failures_before_critical"`
// ServerFailuresBeforeWarning is the number of failures before the
// server health check is marked as a warning
ServerFailuresBeforeWarning int `mapstructure:"server_failures_before_warning"`
// ClientServiceName is the name of the service that Nomad uses to register
// clients with Consul
ClientServiceName string `mapstructure:"client_service_name"`
@@ -56,6 +64,14 @@ type ConsulConfig struct {
// to register the client HTTP health check with Consul
ClientHTTPCheckName string `mapstructure:"client_http_check_name"`
// ClientFailuresBeforeCritical is the number of failures before the
// client health check is marked as critical
ClientFailuresBeforeCritical int `mapstructure:"client_failures_before_critical"`
// ClientFailuresBeforeWarning is the number of failures before the
// client health check is marked as a warning
ClientFailuresBeforeWarning int `mapstructure:"client_failures_before_warning"`
// Tags are optional service tags that get registered with the service
// in Consul
Tags []string `mapstructure:"tags"`
@@ -234,12 +250,24 @@ func (c *ConsulConfig) Merge(b *ConsulConfig) *ConsulConfig {
if b.ServerRPCCheckName != "" {
result.ServerRPCCheckName = b.ServerRPCCheckName
}
if b.ServerFailuresBeforeCritical != 0 {
result.ServerFailuresBeforeCritical = b.ServerFailuresBeforeCritical
}
if b.ServerFailuresBeforeWarning != 0 {
result.ServerFailuresBeforeWarning = b.ServerFailuresBeforeWarning
}
if b.ClientServiceName != "" {
result.ClientServiceName = b.ClientServiceName
}
if b.ClientHTTPCheckName != "" {
result.ClientHTTPCheckName = b.ClientHTTPCheckName
}
if b.ClientFailuresBeforeCritical != 0 {
result.ClientFailuresBeforeCritical = b.ClientFailuresBeforeCritical
}
if b.ClientFailuresBeforeWarning != 0 {
result.ClientFailuresBeforeWarning = b.ClientFailuresBeforeWarning
}
result.Tags = append(result.Tags, b.Tags...)
if b.AutoAdvertise != nil {
result.AutoAdvertise = pointer.Of(*b.AutoAdvertise)
@@ -391,37 +419,41 @@ func (c *ConsulConfig) Copy() *ConsulConfig {
}
return &ConsulConfig{
Name: c.Name,
ServerServiceName: c.ServerServiceName,
ServerHTTPCheckName: c.ServerHTTPCheckName,
ServerSerfCheckName: c.ServerSerfCheckName,
ServerRPCCheckName: c.ServerRPCCheckName,
ClientServiceName: c.ClientServiceName,
ClientHTTPCheckName: c.ClientHTTPCheckName,
Tags: slices.Clone(c.Tags),
AutoAdvertise: c.AutoAdvertise,
ChecksUseAdvertise: c.ChecksUseAdvertise,
Addr: c.Addr,
GRPCAddr: c.GRPCAddr,
Timeout: c.Timeout,
TimeoutHCL: c.TimeoutHCL,
Token: c.Token,
AllowUnauthenticated: c.AllowUnauthenticated,
Auth: c.Auth,
EnableSSL: c.EnableSSL,
ShareSSL: c.ShareSSL,
VerifySSL: c.VerifySSL,
GRPCCAFile: c.GRPCCAFile,
CAFile: c.CAFile,
CertFile: c.CertFile,
KeyFile: c.KeyFile,
ServerAutoJoin: c.ServerAutoJoin,
ClientAutoJoin: c.ClientAutoJoin,
Namespace: c.Namespace,
ServiceIdentity: c.ServiceIdentity.Copy(),
TaskIdentity: c.TaskIdentity.Copy(),
ServiceIdentityAuthMethod: c.ServiceIdentityAuthMethod,
TaskIdentityAuthMethod: c.TaskIdentityAuthMethod,
ExtraKeysHCL: slices.Clone(c.ExtraKeysHCL),
Name: c.Name,
ServerServiceName: c.ServerServiceName,
ServerHTTPCheckName: c.ServerHTTPCheckName,
ServerSerfCheckName: c.ServerSerfCheckName,
ServerRPCCheckName: c.ServerRPCCheckName,
ServerFailuresBeforeCritical: c.ServerFailuresBeforeCritical,
ServerFailuresBeforeWarning: c.ServerFailuresBeforeWarning,
ClientServiceName: c.ClientServiceName,
ClientHTTPCheckName: c.ClientHTTPCheckName,
ClientFailuresBeforeCritical: c.ClientFailuresBeforeCritical,
ClientFailuresBeforeWarning: c.ClientFailuresBeforeWarning,
Tags: slices.Clone(c.Tags),
AutoAdvertise: c.AutoAdvertise,
ChecksUseAdvertise: c.ChecksUseAdvertise,
Addr: c.Addr,
GRPCAddr: c.GRPCAddr,
Timeout: c.Timeout,
TimeoutHCL: c.TimeoutHCL,
Token: c.Token,
AllowUnauthenticated: c.AllowUnauthenticated,
Auth: c.Auth,
EnableSSL: c.EnableSSL,
ShareSSL: c.ShareSSL,
VerifySSL: c.VerifySSL,
GRPCCAFile: c.GRPCCAFile,
CAFile: c.CAFile,
CertFile: c.CertFile,
KeyFile: c.KeyFile,
ServerAutoJoin: c.ServerAutoJoin,
ClientAutoJoin: c.ClientAutoJoin,
Namespace: c.Namespace,
ServiceIdentity: c.ServiceIdentity.Copy(),
TaskIdentity: c.TaskIdentity.Copy(),
ServiceIdentityAuthMethod: c.ServiceIdentityAuthMethod,
TaskIdentityAuthMethod: c.TaskIdentityAuthMethod,
ExtraKeysHCL: slices.Clone(c.ExtraKeysHCL),
}
}

View File

@@ -3156,6 +3156,7 @@ func TestTaskGroupDiff(t *testing.T) {
Timeout: 1 * time.Second,
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
},
},
Connect: &ConsulConnect{
@@ -3239,6 +3240,7 @@ func TestTaskGroupDiff(t *testing.T) {
},
SuccessBeforePassing: 5,
FailuresBeforeCritical: 6,
FailuresBeforeWarning: 4,
},
},
Connect: &ConsulConnect{
@@ -3415,6 +3417,12 @@ func TestTaskGroupDiff(t *testing.T) {
Old: "4",
New: "6",
},
{
Type: DiffTypeEdited,
Name: "FailuresBeforeWarning",
Old: "2",
New: "4",
},
{
Type: DiffTypeNone,
Name: "GRPCService",
@@ -6764,6 +6772,7 @@ func TestTaskDiff(t *testing.T) {
},
SuccessBeforePassing: 1,
FailuresBeforeCritical: 1,
FailuresBeforeWarning: 1,
},
{
Name: "bar",
@@ -6776,6 +6785,7 @@ func TestTaskDiff(t *testing.T) {
Timeout: 1 * time.Second,
SuccessBeforePassing: 7,
FailuresBeforeCritical: 7,
FailuresBeforeWarning: 5,
},
{
Name: "baz",
@@ -6807,6 +6817,7 @@ func TestTaskDiff(t *testing.T) {
Timeout: 1 * time.Second,
SuccessBeforePassing: 7,
FailuresBeforeCritical: 7,
FailuresBeforeWarning: 5,
},
{
Name: "baz",
@@ -6832,6 +6843,7 @@ func TestTaskDiff(t *testing.T) {
Timeout: 1 * time.Second,
SuccessBeforePassing: 2,
FailuresBeforeCritical: 2,
FailuresBeforeWarning: 1,
},
},
},
@@ -6892,6 +6904,12 @@ func TestTaskDiff(t *testing.T) {
Old: "",
New: "2",
},
{
Type: DiffTypeAdded,
Name: "FailuresBeforeWarning",
Old: "",
New: "1",
},
{
Type: DiffTypeAdded,
Name: "GRPCUseTLS",
@@ -6970,6 +6988,12 @@ func TestTaskDiff(t *testing.T) {
Old: "1",
New: "",
},
{
Type: DiffTypeDeleted,
Name: "FailuresBeforeWarning",
Old: "1",
New: "",
},
{
Type: DiffTypeDeleted,
Name: "GRPCUseTLS",
@@ -7068,6 +7092,7 @@ func TestTaskDiff(t *testing.T) {
},
SuccessBeforePassing: 4,
FailuresBeforeCritical: 5,
FailuresBeforeWarning: 4,
OnUpdate: "require_healthy",
},
},
@@ -7201,6 +7226,12 @@ func TestTaskDiff(t *testing.T) {
Old: "5",
New: "0",
},
{
Type: DiffTypeEdited,
Name: "FailuresBeforeWarning",
Old: "4",
New: "0",
},
{
Type: DiffTypeNone,
Name: "GRPCService",

View File

@@ -78,6 +78,7 @@ type ServiceCheck struct {
TaskName string // What task to execute this check in
SuccessBeforePassing int // Number of consecutive successes required before considered healthy
FailuresBeforeCritical int // Number of consecutive failures required before considered unhealthy
FailuresBeforeWarning int // Number of consecutive failures required before showing warning
Body string // Body to use in HTTP check
OnUpdate string
}
@@ -135,6 +136,10 @@ func (sc *ServiceCheck) Equal(o *ServiceCheck) bool {
return false
}
if sc.FailuresBeforeWarning != o.FailuresBeforeWarning {
return false
}
if sc.Command != o.Command {
return false
}
@@ -383,6 +388,11 @@ func (sc *ServiceCheck) validateNomad() error {
return errors.New("failures_before_critical may only be set for Consul service checks")
}
// failures_before_warning is consul only
if sc.FailuresBeforeWarning != 0 {
return errors.New("failures_before_warning may only be set for Consul service checks")
}
// tls_server_name is consul only
if sc.TLSServerName != "" {
return errors.New("tls_server_name may only be set for Consul service checks")
@@ -438,6 +448,12 @@ func (sc *ServiceCheck) validateConsul() error {
return fmt.Errorf("failures_before_critical not supported for check of type %q", sc.Type)
}
if sc.FailuresBeforeWarning < 0 {
return fmt.Errorf("failures_before_warning must be non-negative")
} else if sc.FailuresBeforeWarning > 0 && !slices.Contains(passFailCheckTypes, sc.Type) {
return fmt.Errorf("failures_before_warning not supported for check of type %q", sc.Type)
}
return nil
}
@@ -498,6 +514,7 @@ func (sc *ServiceCheck) Hash(serviceID string) string {
// Only include pass/fail if non-zero to maintain ID stability with Nomad < 0.12
hashIntIfNonZero(h, "success", sc.SuccessBeforePassing)
hashIntIfNonZero(h, "failures", sc.FailuresBeforeCritical)
hashIntIfNonZero(h, "failures-before-warning", sc.FailuresBeforeWarning)
// Hash is used for diffing against the Consul check definition, which does
// not have an expose parameter. Instead we rely on implied changes to

View File

@@ -22,6 +22,7 @@ func TestServiceCheck_Hash(t *testing.T) {
Name: "check",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}
type sc = ServiceCheck
@@ -57,6 +58,10 @@ func TestServiceCheck_Hash(t *testing.T) {
t.Run("failures_before_critical", func(t *testing.T) {
try(t, func(s *sc) { s.FailuresBeforeCritical = 99 })
})
t.Run("failures_before_warning", func(t *testing.T) {
try(t, func(s *sc) { s.FailuresBeforeWarning = 99 })
})
}
func TestServiceCheck_Canonicalize(t *testing.T) {
@@ -136,6 +141,7 @@ func TestServiceCheck_validate_FailingTypes(t *testing.T) {
Interval: 1 * time.Second,
Timeout: 2 * time.Second,
FailuresBeforeCritical: 3,
FailuresBeforeWarning: 2,
}).validateConsul()
require.NoError(t, err)
}
@@ -153,6 +159,19 @@ func TestServiceCheck_validate_FailingTypes(t *testing.T) {
}).validateConsul()
require.EqualError(t, err, `failures_before_critical not supported for check of type "script"`)
})
t.Run("invalid", func(t *testing.T) {
err := (&ServiceCheck{
Name: "check",
Type: "script",
Command: "/nothing",
Interval: 1 * time.Second,
Timeout: 2 * time.Second,
SuccessBeforePassing: 0,
FailuresBeforeWarning: 3,
}).validateConsul()
require.EqualError(t, err, `failures_before_warning not supported for check of type "script"`)
})
}
func TestServiceCheck_validate_PassFailZero_on_scripts(t *testing.T) {
@@ -276,6 +295,16 @@ func TestServiceCheck_validateNomad(t *testing.T) {
},
exp: `failures_before_critical may only be set for Consul service checks`,
},
{
name: "failures_before_warning",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
FailuresBeforeWarning: 3, // consul only
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
},
exp: `failures_before_warning may only be set for Consul service checks`,
},
{
name: "check_restart",
sc: &ServiceCheck{

View File

@@ -25,72 +25,84 @@ correctly.
A subset of the available Nomad agent configuration can optionally be passed in
via CLI arguments. The `agent` command accepts the following arguments:
- `-alloc-dir=<path>`: Equivalent to the Client [alloc_dir] config
- `-alloc-dir=<path>`: Equivalent to the Client [alloc_dir][] config
option.
- `-acl-enabled`: Equivalent to the ACL [enabled] config option.
- `-acl-enabled`: Equivalent to the ACL [enabled][] config option.
- `-acl-replication-token`: Equivalent to the ACL [replication_token] config
- `-acl-replication-token`: Equivalent to the ACL [replication_token][] config
option.
- `-bind=<address>`: Equivalent to the [bind_addr] config option.
- `-bind=<address>`: Equivalent to the [bind_addr][] config option.
- `-bootstrap-expect=<num>`: Equivalent to the
[bootstrap_expect] config option.
[bootstrap_expect][] config option.
- `-client`: Enable client mode on the local agent.
- `-config=<path>`: Specifies the path to a configuration file or a directory of
configuration files to load. Can be specified multiple times.
- `-consul-address=<addr>`: Equivalent to the [address] config option.
- `-consul-address=<addr>`: Equivalent to the [address][] config option.
- `-consul-auth=<auth>`: Equivalent to the [auth] config option.
- `-consul-auth=<auth>`: Equivalent to the [auth][] config option.
- `-consul-auto-advertise`: Equivalent to the [auto_advertise] config option.
- `-consul-auto-advertise`: Equivalent to the [auto_advertise][] config option.
- `-consul-ca-file=<path>`: Equivalent to the [ca_file] config option.
- `-consul-ca-file=<path>`: Equivalent to the [ca_file][] config option.
- `-consul-cert-file=<path>`: Equivalent to the [cert_file] config option.
- `-consul-cert-file=<path>`: Equivalent to the [cert_file][] config option.
- `-consul-checks-use-advertise`: Equivalent to the [checks_use_advertise]
- `-consul-checks-use-advertise`: Equivalent to the [checks_use_advertise][]
config option.
- `-consul-client-auto-join`: Equivalent to the [client_auto_join] config
- `-consul-client-auto-join`: Equivalent to the [client_auto_join][] config
option.
- `-consul-client-service-name=<name>`: Equivalent to the [client_service_name]
- `-consul-client-service-name=<name>`: Equivalent to the [client_service_name][]
config option.
- `-consul-client-http-check-name=<name>`: Equivalent to the
[client_http_check_name] config option.
[client_http_check_name][] config option.
- `-consul-key-file=<path>`: Equivalent to the [key_file] config option.
- `-consul-client-failures-before-critical=<num>`: Equivalent to the
[client_failures_before_critical][] config option.
- `-consul-server-service-name=<name>`: Equivalent to the [server_service_name]
- `-consul-client-failures-before-warning=<num>`: Equivalent to the
[client_failures_before_warning][] config option.
- `-consul-key-file=<path>`: Equivalent to the [key_file][] config option.
- `-consul-server-service-name=<name>`: Equivalent to the [server_service_name][]
config option.
- `-consul-server-http-check-name=<name>`: Equivalent to the
[server_http_check_name] config option.
[server_http_check_name][] config option.
- `-consul-server-serf-check-name=<name>`: Equivalent to the
[server_serf_check_name] config option.
[server_serf_check_name][] config option.
- `-consul-server-rpc-check-name=<name>`: Equivalent to the
[server_rpc_check_name] config option.
[server_rpc_check_name][] config option.
- `-consul-server-auto-join`: Equivalent to the [server_auto_join] config
- `-consul-server-auto-join`: Equivalent to the [server_auto_join][] config
option.
- `-consul-ssl`: Equivalent to the [ssl] config option.
- `-consul-server-failures-before-critical=<num>`: Equivalent to the
[server_failures_before_critical][] config option.
- `-consul-token=<token>`: Equivalent to the [token] config option.
- `-consul-server-failures-before-warning=<num>`: Equivalent to the
[server_failures_before_warning][] config option.
- `-consul-verify-ssl`: Equivalent to the [verify_ssl] config option.
- `-consul-ssl`: Equivalent to the [ssl][] config option.
- `-data-dir=<path>`: Equivalent to the [data_dir] config option.
- `-consul-token=<token>`: Equivalent to the [token][] config option.
- `-dc=<datacenter>`: Equivalent to the [datacenter] config option.
- `-consul-verify-ssl`: Equivalent to the [verify_ssl][] config option.
- `-data-dir=<path>`: Equivalent to the [data_dir][] config option.
- `-dc=<datacenter>`: Equivalent to the [datacenter][] config option.
- `-dev`: Start the agent in development mode. This enables a pre-configured
dual-role agent (client + server) which is useful for developing or testing
@@ -109,38 +121,38 @@ via CLI arguments. The `agent` command accepts the following arguments:
- `-dev-vault`: Starts the agent in development mode with a default Vault
configuration for Nomad workload identity.
- `-encrypt`: Set the Serf encryption key. See the [Encryption Overview] for
- `-encrypt`: Set the Serf encryption key. See the [Encryption Overview][] for
more details.
- `-join=<address>`: Address of another agent to join upon starting up. This can
be specified multiple times to specify multiple agents to join.
- `-log-level=<level>`: Equivalent to the [log_level] config option.
- `-log-level=<level>`: Equivalent to the [log_level][] config option.
- `-log-include-location`: Equivalent to the [log_include_location] config option.
- `-log-include-location`: Equivalent to the [log_include_location][] config option.
- `-log-json`: Equivalent to the [log_json] config option.
- `-log-json`: Equivalent to the [log_json][] config option.
- `-meta=<key=value>`: Equivalent to the Client [meta] config option.
- `-meta=<key=value>`: Equivalent to the Client [meta][] config option.
- `-network-interface=<interface>`: Equivalent to the Client
[network_interface] config option.
[network_interface][] config option.
- `-node=<name>`: Equivalent to the [name] config option.
- `-node=<name>`: Equivalent to the [name][] config option.
- `-node-class=<class>`: Equivalent to the Client [node_class]
- `-node-class=<class>`: Equivalent to the Client [node_class][]
config option.
- `-node-pool=<node-pool>`: Equivalent to the Client [node_pool]
- `-node-pool=<node-pool>`: Equivalent to the Client [node_pool][]
config option.
- `-plugin-dir=<path>`: Equivalent to the [plugin_dir] config option.
- `-plugin-dir=<path>`: Equivalent to the [plugin_dir][] config option.
- `-region=<region>`: Equivalent to the [region] config option.
- `-region=<region>`: Equivalent to the [region][] config option.
- `-rejoin`: Equivalent to the [rejoin_after_leave] config option.
- `-rejoin`: Equivalent to the [rejoin_after_leave][] config option.
- `-retry-interval`: Equivalent to the [retry_interval] config option.
- `-retry-interval`: Equivalent to the [retry_interval][] config option.
- `-retry-join`: Similar to `-join` but allows retrying a join if the first
attempt fails.
@@ -152,14 +164,14 @@ via CLI arguments. The `agent` command accepts the following arguments:
`retry-join` can be defined as a command line flag only for servers. Clients
can configure `retry-join` only in configuration files.
- `-retry-max`: Similar to the [retry_max] config option.
- `-retry-max`: Similar to the [retry_max][] config option.
- `-server`: Enable server mode on the local agent.
- `-servers=<host:port>`: Equivalent to the Client [servers] config
- `-servers=<host:port>`: Equivalent to the Client [servers][] config
option.
- `-state-dir=<path>`: Equivalent to the Client [state_dir] config
- `-state-dir=<path>`: Equivalent to the Client [state_dir][] config
option.
- `-vault-enabled`: Whether to enable or disabled Vault integration.
@@ -205,6 +217,10 @@ via CLI arguments. The `agent` command accepts the following arguments:
[checks_use_advertise]: /nomad/docs/configuration/consul#checks_use_advertise
[client_auto_join]: /nomad/docs/configuration/consul#client_auto_join
[client_http_check_name]: /nomad/docs/configuration/consul#client_http_check_name
[client_failures_before_critical]: /nomad/docs/configuration/consul#client_failures_before_critical
[client_failures_before_warning]: /nomad/docs/configuration/consul#client_failures_before_warning
[server_failures_before_critical]: /nomad/docs/configuration/consul#server_failures_before_critical
[server_failures_before_warning]: /nomad/docs/configuration/consul#server_failures_before_warning
[client_service_name]: /nomad/docs/configuration/consul#client_service_name
[configuration]: /nomad/docs/configuration
[data_dir]: /nomad/docs/configuration#data_dir

View File

@@ -141,6 +141,12 @@ agents with [`client.enabled`][] set to `true`.
- `client_http_check_name` `(string: "Nomad Client HTTP Check")` - Specifies the
HTTP health check name in Consul for the Nomad clients.
- `client_failures_before_critical` `(int: 0)` - Specifies the number of
consecutive failures before the Nomad client Consul health check is critical.
- `client_failures_before_warning` `(int: 0)` - Specifies the number of
consecutive failures before the Nomad client Consul health check shows a warning.
- `grpc_address` `(string: "127.0.0.1:8502")` - Specifies the address to the local
Consul agent for `gRPC` requests, given in the format `host:port`. Note that
Consul does not enable the [`grpc`][grpc_port] or [`grpc_tls`][grpctls_port]
@@ -186,6 +192,12 @@ agents with [`server.enabled`] set to `true`.
Consul service name defined in the `server_service_name` option. This search
only happens if the server does not have a leader.
- `server_failures_before_critical` `(int: 0)` - Specifies the number of
consecutive failures before the Nomad server Consul health check is critical.
- `server_failures_before_warning` `(int: 0)` - Specifies the number of
consecutive failures before the Nomad server Consul health check shows a warning.
- `service_identity` <code>([Identity](#service_identity-parameters): nil)</code> - Specifies
a default Workload Identity to use when obtaining Service Identity tokens from
Consul to register services. Refer to [Workload Identity](#workload-identity)

View File

@@ -92,11 +92,15 @@ job "example" {
until Nomad produces an initial check status result.
- `success_before_passing` `(int:0)` - The number of consecutive successful checks
required before Consul will transition the service status to [`passing`][consul_passfail].
required before Consul will transition the service status to [`passing`][consul_success_before_passing].
Only supported in the Consul service provider.
- `failures_before_critical` `(int:0)` - The number of consecutive failing checks
required before Consul will transition the service status to [`critical`][consul_passfail].
required before Consul will transition the service status to [`critical`][consul_failure_before_critical].
Only supported in the Consul service provider.
- `failures_before_warning` `(int:0)` - The number of consecutive failing checks
required before Consul will transition the service status to [`warning`][consul_failure_before_warning].
Only supported in the Consul service provider.
- `interval` `(string: <required>)` - Specifies the frequency of the health checks
@@ -464,7 +468,9 @@ Output = nomad: Get "http://:9999/": dial tcp :9999: connect: connection re
</small>
[check_restart_block]: /nomad/docs/job-specification/check_restart
[consul_passfail]: /consul/docs/discovery/checks#success-failures-before-passing-critical
[consul_success_before_passing]: /consul/api-docs/agent/check#successbeforepassing
[consul_failure_before_critical]: /consul/api-docs/agent/check#failuresbeforecritical
[consul_failure_before_warning]: /consul/api-docs/agent/check#failuresbeforewarning
[network]: /nomad/docs/job-specification/network 'Nomad network Job Specification'
[service]: /nomad/docs/job-specification/service
[service_task]: /nomad/docs/job-specification/service#task-1