Synchronously deregister agent on shutdown

Fixes #2891

Previously the agent services and checks were being asynchrously
deregistered on shutdown, so it was a race between the sync goroutine
deregistering them and Nomad shutting down.

This switches to synchronously deregister agent serivces and checks
which doesn't really have a downside since the sync goroutines retry
behavior doesn't help on shutdown anyway.
This commit is contained in:
Michael Schurter
2017-07-24 11:40:37 -07:00
parent 80c4b03f07
commit b01a00a7f3
2 changed files with 15 additions and 19 deletions

View File

@@ -648,29 +648,14 @@ func (c *ServiceClient) Shutdown() error {
// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
// entries.
c.agentLock.Lock()
defer c.agentLock.Unlock()
select {
case <-c.shutdownCh:
return nil
default:
close(c.shutdownCh)
}
// Deregister Nomad agent Consul entries before closing shutdown.
ops := operations{}
for id := range c.agentServices {
ops.deregServices = append(ops.deregServices, id)
}
for id := range c.agentChecks {
ops.deregChecks = append(ops.deregChecks, id)
}
c.commit(&ops)
// Then signal shutdown
close(c.shutdownCh)
// Safe to unlock after shutdownCh closed as RegisterAgent will check
// shutdownCh before committing.
c.agentLock.Unlock()
// Give run loop time to sync, but don't block indefinitely
deadline := time.After(c.shutdownWait)
@@ -679,7 +664,19 @@ func (c *ServiceClient) Shutdown() error {
case <-c.exitCh:
case <-deadline:
// Don't wait forever though
return fmt.Errorf("timed out waiting for Consul operations to complete")
}
// Always attempt to deregister Nomad agent Consul entries, even if
// deadline was reached
for id := range c.agentServices {
if err := c.client.ServiceDeregister(id); err != nil {
c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err)
}
}
for id := range c.agentChecks {
if err := c.client.CheckDeregister(id); err != nil {
c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err)
}
}
// Give script checks time to exit (no need to lock as Run() has exited)

View File

@@ -31,7 +31,6 @@ func testLogger() *log.Logger {
// TestConsul_Integration asserts TaskRunner properly registers and deregisters
// services and checks with Consul using an embedded Consul agent.
func TestConsul_Integration(t *testing.T) {
t.Skip("-short set; skipping")
if _, ok := driver.BuiltinDrivers["mock_driver"]; !ok {
t.Skip(`test requires mock_driver; run with "-tags nomad_test"`)
}