diff --git a/command/agent/consul/catalog_testing.go b/command/agent/consul/catalog_testing.go index 61e5e3088..c087f024c 100644 --- a/command/agent/consul/catalog_testing.go +++ b/command/agent/consul/catalog_testing.go @@ -187,6 +187,12 @@ func (c *MockAgent) ServiceDeregister(serviceID string) error { defer c.mu.Unlock() c.hits++ delete(c.services, serviceID) + for k, v := range c.checks { + if v.ServiceID == serviceID { + delete(c.checks, k) + delete(c.checkTTLs, k) + } + } return nil } diff --git a/command/agent/consul/client.go b/command/agent/consul/client.go index 645e907fd..ca93249b9 100644 --- a/command/agent/consul/client.go +++ b/command/agent/consul/client.go @@ -591,12 +591,6 @@ func (c *ServiceClient) sync(reason syncReason) error { return fmt.Errorf("error querying Consul services: %v", err) } - consulChecks, err := c.client.Checks() - if err != nil { - metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) - return fmt.Errorf("error querying Consul checks: %v", err) - } - inProbation := time.Now().Before(c.deregisterProbationExpiry) // Remove Nomad services in Consul but unknown locally @@ -656,6 +650,12 @@ func (c *ServiceClient) sync(reason syncReason) error { } + consulChecks, err := c.client.Checks() + if err != nil { + metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) + return fmt.Errorf("error querying Consul checks: %v", err) + } + // Remove Nomad checks in Consul but unknown locally for id, check := range consulChecks { if _, ok := c.checks[id]; ok { @@ -1227,9 +1227,28 @@ func (c *ServiceClient) Shutdown() error { c.logger.Error("failed deregistering agent service", "service_id", id, "error", err) } } + + remainingChecks, err := c.client.Checks() + if err != nil { + c.logger.Error("failed listing remaining checks after deregistering services", "error", err) + } + + checkRemains := func(id string) bool { + for _, c := range remainingChecks { + if c.CheckID == id { + return true + } + } + return false + } + for id := range c.agentChecks { - if err := c.client.CheckDeregister(id); err != nil { - c.logger.Error("failed deregistering agent check", "check_id", id, "error", err) + // if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway + // if we could list the remaining checks, verify that the check we store still exists before removing it. + if remainingChecks == nil || checkRemains(id) { + if err := c.client.CheckDeregister(id); err != nil { + c.logger.Error("failed deregistering agent check", "check_id", id, "error", err) + } } }