rpc accept loop: added backoff on logging for failed connections, in case there is a fast fail loop (NMD-1173)

This commit is contained in:
Chris Baker
2018-12-07 20:11:46 +00:00
parent c74e2d0243
commit 22a4bcd3ca

View File

@@ -84,6 +84,7 @@ type RPCContext struct {
// listen is used to listen for incoming RPC connections
func (r *rpcHandler) listen(ctx context.Context) {
defer close(r.listenerCh)
var tempDelay time.Duration
for {
select {
case <-ctx.Done():
@@ -105,9 +106,21 @@ func (r *rpcHandler) listen(ctx context.Context) {
default:
}
r.logger.Error("failed to accept RPC conn", "error", err)
if ne, ok := err.(net.Error); ok && ne.Temporary() {
if tempDelay == 0 {
tempDelay = 5 * time.Millisecond
} else {
tempDelay *= 2
}
if max := 1 * time.Second; tempDelay > max {
tempDelay = max
}
r.logger.Error("failed to accept RPC conn", "error", err, "delay", tempDelay)
time.Sleep(tempDelay)
}
continue
}
tempDelay = 0
go r.handleConn(ctx, conn, &RPCContext{Conn: conn})
metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1)