mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
Synchronously deregister agent on shutdown
Fixes #2891 Previously the agent services and checks were being asynchrously deregistered on shutdown, so it was a race between the sync goroutine deregistering them and Nomad shutting down. This switches to synchronously deregister agent serivces and checks which doesn't really have a downside since the sync goroutines retry behavior doesn't help on shutdown anyway.
This commit is contained in:
@@ -648,29 +648,14 @@ func (c *ServiceClient) Shutdown() error {
|
||||
// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
|
||||
// entries.
|
||||
c.agentLock.Lock()
|
||||
defer c.agentLock.Unlock()
|
||||
select {
|
||||
case <-c.shutdownCh:
|
||||
return nil
|
||||
default:
|
||||
close(c.shutdownCh)
|
||||
}
|
||||
|
||||
// Deregister Nomad agent Consul entries before closing shutdown.
|
||||
ops := operations{}
|
||||
for id := range c.agentServices {
|
||||
ops.deregServices = append(ops.deregServices, id)
|
||||
}
|
||||
for id := range c.agentChecks {
|
||||
ops.deregChecks = append(ops.deregChecks, id)
|
||||
}
|
||||
c.commit(&ops)
|
||||
|
||||
// Then signal shutdown
|
||||
close(c.shutdownCh)
|
||||
|
||||
// Safe to unlock after shutdownCh closed as RegisterAgent will check
|
||||
// shutdownCh before committing.
|
||||
c.agentLock.Unlock()
|
||||
|
||||
// Give run loop time to sync, but don't block indefinitely
|
||||
deadline := time.After(c.shutdownWait)
|
||||
|
||||
@@ -679,7 +664,19 @@ func (c *ServiceClient) Shutdown() error {
|
||||
case <-c.exitCh:
|
||||
case <-deadline:
|
||||
// Don't wait forever though
|
||||
return fmt.Errorf("timed out waiting for Consul operations to complete")
|
||||
}
|
||||
|
||||
// Always attempt to deregister Nomad agent Consul entries, even if
|
||||
// deadline was reached
|
||||
for id := range c.agentServices {
|
||||
if err := c.client.ServiceDeregister(id); err != nil {
|
||||
c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err)
|
||||
}
|
||||
}
|
||||
for id := range c.agentChecks {
|
||||
if err := c.client.CheckDeregister(id); err != nil {
|
||||
c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Give script checks time to exit (no need to lock as Run() has exited)
|
||||
|
||||
@@ -31,7 +31,6 @@ func testLogger() *log.Logger {
|
||||
// TestConsul_Integration asserts TaskRunner properly registers and deregisters
|
||||
// services and checks with Consul using an embedded Consul agent.
|
||||
func TestConsul_Integration(t *testing.T) {
|
||||
t.Skip("-short set; skipping")
|
||||
if _, ok := driver.BuiltinDrivers["mock_driver"]; !ok {
|
||||
t.Skip(`test requires mock_driver; run with "-tags nomad_test"`)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user