mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
connect: deployments should wait for Connect sidecar checks (#19334)
When a Connect service is registered with Consul, Nomad includes the nested `Connect.SidecarService` field that includes health checks for the Envoy proxy. Because these are not part of the job spec, the alloc health tracker created by `health_hook` doesn't know to read the value of these checks. In many circumstances this won't be noticed, but if the Envoy health check happens to take longer than the `update.min_healthy_time` (perhaps because it's been set low), it's possible for a deployment to progress too early such that there will briefly be no healthy instances of the service available in Consul. Update the Consul service client to find the nested sidecar service in the service catalog and attach it to the results provided to the tracker. The tracker can then check the sidecar health checks. Fixes: https://github.com/hashicorp/nomad/issues/19269
This commit is contained in:
3
.changelog/19334.txt
Normal file
3
.changelog/19334.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:bug
|
||||||
|
connect: Fixed a bug where deployments would not wait for Connect sidecar task health checks to pass
|
||||||
|
```
|
||||||
@@ -575,6 +575,8 @@ func evaluateConsulChecks(services []*structs.Service, registrations *servicereg
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The sidecar service and checks are created when the service is
|
||||||
|
// registered, not on job registration, so they won't appear in the jobspec.
|
||||||
if !maps.Equal(expChecks, regChecks) {
|
if !maps.Equal(expChecks, regChecks) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -595,6 +597,21 @@ func evaluateConsulChecks(services []*structs.Service, registrations *servicereg
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check the health of Connect sidecars, so we don't accidentally
|
||||||
|
// mark an allocation healthy before min_healthy_time. These don't
|
||||||
|
// currently support on_update.
|
||||||
|
if service.SidecarService != nil {
|
||||||
|
for _, check := range service.SidecarChecks {
|
||||||
|
switch check.Status {
|
||||||
|
case api.HealthWarning:
|
||||||
|
return false
|
||||||
|
case api.HealthCritical:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1466,6 +1466,42 @@ func TestTracker_evaluateConsulChecks(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "failing sidecar checks only",
|
||||||
|
exp: false,
|
||||||
|
tg: &structs.TaskGroup{
|
||||||
|
Services: []*structs.Service{{
|
||||||
|
Name: "group-s1",
|
||||||
|
Checks: []*structs.ServiceCheck{
|
||||||
|
{Name: "c1"},
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
registrations: &serviceregistration.AllocRegistration{
|
||||||
|
Tasks: map[string]*serviceregistration.ServiceRegistrations{
|
||||||
|
"group": {
|
||||||
|
Services: map[string]*serviceregistration.ServiceRegistration{
|
||||||
|
"abc123": {
|
||||||
|
ServiceID: "abc123",
|
||||||
|
Checks: []*consulapi.AgentCheck{
|
||||||
|
{
|
||||||
|
Name: "c1",
|
||||||
|
Status: consulapi.HealthPassing,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
SidecarService: &consulapi.AgentService{},
|
||||||
|
SidecarChecks: []*consulapi.AgentCheck{
|
||||||
|
{
|
||||||
|
Name: "sidecar-check",
|
||||||
|
Status: consulapi.HealthCritical,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
|
|||||||
@@ -146,6 +146,12 @@ type ServiceRegistration struct {
|
|||||||
|
|
||||||
// Checks is the status of the registered checks.
|
// Checks is the status of the registered checks.
|
||||||
Checks []*api.AgentCheck
|
Checks []*api.AgentCheck
|
||||||
|
|
||||||
|
// SidecarService is the AgentService registered in Consul for any Connect sidecar
|
||||||
|
SidecarService *api.AgentService
|
||||||
|
|
||||||
|
// SidecarChecks is the status of the registered checks for any Connect sidecar
|
||||||
|
SidecarChecks []*api.AgentCheck
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ServiceRegistration) copy() *ServiceRegistration {
|
func (s *ServiceRegistration) copy() *ServiceRegistration {
|
||||||
|
|||||||
@@ -1760,6 +1760,15 @@ func (c *ServiceClient) AllocRegistrations(allocID string) (*serviceregistration
|
|||||||
sreg.Checks = append(sreg.Checks, check)
|
sreg.Checks = append(sreg.Checks, check)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if sidecarService := getNomadSidecar(serviceID, services); sidecarService != nil {
|
||||||
|
sreg.SidecarService = sidecarService
|
||||||
|
for _, check := range checks {
|
||||||
|
if check.ServiceID == sidecarService.ID {
|
||||||
|
sreg.SidecarChecks = append(sreg.SidecarChecks, check)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user