Track whether Consul has ever been seen

Need a way to squelch Consul operation errors on shutdown. If it's never
been seen don't log errors about deregs failing.
This commit is contained in:
Michael Schurter
2017-07-24 12:12:02 -07:00
parent b01a00a7f3
commit 82ea86fb6f
2 changed files with 44 additions and 3 deletions

View File

@@ -704,8 +704,10 @@ func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
}
// Determine version for TLSSkipVerify
seen := false
if self, err := client.Agent().Self(); err == nil {
a.consulSupportsTLSSkipVerify = consulSupportsTLSSkipVerify(self)
seen = true
}
// Create Consul Catalog client for service discovery.
@@ -713,6 +715,14 @@ func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
// Create Consul Service client for service advertisement and checks.
a.consulService = consul.NewServiceClient(client.Agent(), a.consulSupportsTLSSkipVerify, a.logger)
// If we've seen the Consul agent already, mark it so future Consul
// errors are logged
if seen {
a.consulService.MarkSeen()
}
// Run the Consul service client's sync'ing main loop
go a.consulService.Run()
return nil
}

View File

@@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
metrics "github.com/armon/go-metrics"
@@ -115,6 +116,10 @@ type ServiceClient struct {
agentServices map[string]struct{}
agentChecks map[string]struct{}
agentLock sync.Mutex
// seen is 1 if Consul has ever been seen; otherise 0. Accessed with
// atomics.
seen int64
}
// NewServiceClient creates a new Consul ServiceClient from an existing Consul API
@@ -139,6 +144,21 @@ func NewServiceClient(consulClient AgentAPI, skipVerifySupport bool, logger *log
}
}
// seen is used by MarkSeen and Seen
const seen = 1
// MarkSeen marks Consul as having been seen (meaning at least one operation
// has succeeded).
func (c *ServiceClient) MarkSeen() {
atomic.StoreInt64(&c.seen, seen)
}
// Seen returns true if any Consul operation has ever succeeded. Useful to
// squelch errors if Consul isn't running.
func (c *ServiceClient) Seen() bool {
return atomic.LoadInt64(&c.seen) == seen
}
// Run the Consul main loop which retries operations against Consul. It should
// be called exactly once.
func (c *ServiceClient) Run() {
@@ -155,10 +175,13 @@ func (c *ServiceClient) Run() {
}
if err := c.sync(); err != nil {
if failures == 0 {
c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err)
// Only log and track failures after Consul has been seen
if c.Seen() {
if failures == 0 {
c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err)
}
failures++
}
failures++
if !retryTimer.Stop() {
// Timer already expired, since the timer may
// or may not have been read in the select{}
@@ -241,6 +264,9 @@ func (c *ServiceClient) sync() error {
return fmt.Errorf("error querying Consul services: %v", err)
}
// A Consul operation has succeeded, mark Consul as having been seen
c.MarkSeen()
consulChecks, err := c.client.Checks()
if err != nil {
metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
@@ -666,6 +692,11 @@ func (c *ServiceClient) Shutdown() error {
// Don't wait forever though
}
// If Consul was never seen, exit early
if !c.Seen() {
return nil
}
// Always attempt to deregister Nomad agent Consul entries, even if
// deadline was reached
for id := range c.agentServices {