diff --git a/command/agent/agent.go b/command/agent/agent.go index 5fc211174..0f937fb86 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -704,8 +704,10 @@ func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { } // Determine version for TLSSkipVerify + seen := false if self, err := client.Agent().Self(); err == nil { a.consulSupportsTLSSkipVerify = consulSupportsTLSSkipVerify(self) + seen = true } // Create Consul Catalog client for service discovery. @@ -713,6 +715,14 @@ func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { // Create Consul Service client for service advertisement and checks. a.consulService = consul.NewServiceClient(client.Agent(), a.consulSupportsTLSSkipVerify, a.logger) + + // If we've seen the Consul agent already, mark it so future Consul + // errors are logged + if seen { + a.consulService.MarkSeen() + } + + // Run the Consul service client's sync'ing main loop go a.consulService.Run() return nil } diff --git a/command/agent/consul/client.go b/command/agent/consul/client.go index 2228276f0..d3c020a18 100644 --- a/command/agent/consul/client.go +++ b/command/agent/consul/client.go @@ -8,6 +8,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" metrics "github.com/armon/go-metrics" @@ -115,6 +116,10 @@ type ServiceClient struct { agentServices map[string]struct{} agentChecks map[string]struct{} agentLock sync.Mutex + + // seen is 1 if Consul has ever been seen; otherise 0. Accessed with + // atomics. + seen int64 } // NewServiceClient creates a new Consul ServiceClient from an existing Consul API @@ -139,6 +144,21 @@ func NewServiceClient(consulClient AgentAPI, skipVerifySupport bool, logger *log } } +// seen is used by MarkSeen and Seen +const seen = 1 + +// MarkSeen marks Consul as having been seen (meaning at least one operation +// has succeeded). +func (c *ServiceClient) MarkSeen() { + atomic.StoreInt64(&c.seen, seen) +} + +// Seen returns true if any Consul operation has ever succeeded. Useful to +// squelch errors if Consul isn't running. +func (c *ServiceClient) Seen() bool { + return atomic.LoadInt64(&c.seen) == seen +} + // Run the Consul main loop which retries operations against Consul. It should // be called exactly once. func (c *ServiceClient) Run() { @@ -155,10 +175,13 @@ func (c *ServiceClient) Run() { } if err := c.sync(); err != nil { - if failures == 0 { - c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err) + // Only log and track failures after Consul has been seen + if c.Seen() { + if failures == 0 { + c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err) + } + failures++ } - failures++ if !retryTimer.Stop() { // Timer already expired, since the timer may // or may not have been read in the select{} @@ -241,6 +264,9 @@ func (c *ServiceClient) sync() error { return fmt.Errorf("error querying Consul services: %v", err) } + // A Consul operation has succeeded, mark Consul as having been seen + c.MarkSeen() + consulChecks, err := c.client.Checks() if err != nil { metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) @@ -666,6 +692,11 @@ func (c *ServiceClient) Shutdown() error { // Don't wait forever though } + // If Consul was never seen, exit early + if !c.Seen() { + return nil + } + // Always attempt to deregister Nomad agent Consul entries, even if // deadline was reached for id := range c.agentServices {