diff --git a/client/consul/check.go b/client/consul/check.go index f068863ee..052c5c78c 100644 --- a/client/consul/check.go +++ b/client/consul/check.go @@ -80,6 +80,7 @@ type Check interface { Run() *cstructs.CheckResult ID() string Interval() time.Duration + Timeout() time.Duration } // Returns a random stagger interval between 0 and the duration diff --git a/client/consul/sync.go b/client/consul/sync.go index 41cf85478..b1bc11ffa 100644 --- a/client/consul/sync.go +++ b/client/consul/sync.go @@ -429,6 +429,9 @@ func (c *ConsulService) consulPresent() bool { // runCheck runs a check and updates the corresponding ttl check in consul func (c *ConsulService) runCheck(check Check) { res := check.Run() + if res.Duration >= check.Timeout() { + c.logger.Printf("[DEBUG] consul.sync: check took time: %v, timeout: %v", res.Duration, check.Timeout()) + } state := consul.HealthCritical output := res.Output switch res.ExitCode { @@ -445,7 +448,7 @@ func (c *ConsulService) runCheck(check Check) { } if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil { if c.availble { - c.logger.Printf("[DEBUG] error updating ttl check for check %q: %v", check.ID(), err) + c.logger.Printf("[DEBUG] consul.sync: error updating ttl check for check %q: %v", check.ID(), err) c.availble = false } else { c.availble = true diff --git a/client/driver/executor/checks.go b/client/driver/executor/checks.go index 0d7e6eb51..9dd06e10d 100644 --- a/client/driver/executor/checks.go +++ b/client/driver/executor/checks.go @@ -20,20 +20,26 @@ var ( client *docker.Client ) +const ( + // The default check timeout + defaultCheckTimeout = 30 * time.Second +) + // DockerScriptCheck runs nagios compatible scripts in a docker container and // provides the check result type DockerScriptCheck struct { - id string - interval time.Duration - containerID string + id string // id of the check + interval time.Duration // interval of the check + timeout time.Duration // timeout of the check + containerID string // container id in which the check will be invoked logger *log.Logger - cmd string - args []string + cmd string // check command + args []string // check command arguments - dockerEndpoint string - tlsCert string - tlsCa string - tlsKey string + dockerEndpoint string // docker endpoint + tlsCert string // path to tls certificate + tlsCa string // path to tls ca + tlsKey string // path to tls key } // dockerClient creates the client to interact with the docker daemon @@ -117,15 +123,24 @@ func (d *DockerScriptCheck) Interval() time.Duration { return d.interval } +// Timeout returns the duration after which a check is timed out. +func (d *DockerScriptCheck) Timeout() time.Duration { + if d.timeout == 0 { + return defaultCheckTimeout + } + return d.timeout +} + // ExecScriptCheck runs a nagios compatible script and returns the check result type ExecScriptCheck struct { - id string - interval time.Duration - cmd string - args []string - taskDir string + id string // id of the script check + interval time.Duration // interval at which the check is invoked + timeout time.Duration // timeout duration of the check + cmd string // command of the check + args []string // args passed to the check + taskDir string // the root directory of the check - FSIsolation bool + FSIsolation bool // indicates whether the check has to be run within a chroot } // Run runs an exec script check @@ -146,6 +161,7 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult { for { select { case err := <-errCh: + endTime := time.Now() if err == nil { return &cstructs.CheckResult{ ExitCode: 0, @@ -163,8 +179,9 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult { ExitCode: exitCode, Output: string(buf.Bytes()), Timestamp: ts, + Duration: endTime.Sub(ts), } - case <-time.After(30 * time.Second): + case <-time.After(e.Timeout()): errCh <- fmt.Errorf("timed out after waiting 30s") } } @@ -180,3 +197,11 @@ func (e *ExecScriptCheck) ID() string { func (e *ExecScriptCheck) Interval() time.Duration { return e.interval } + +// Timeout returns the duration after which a check is timed out. +func (e *ExecScriptCheck) Timeout() time.Duration { + if e.timeout == 0 { + return defaultCheckTimeout + } + return e.timeout +} diff --git a/client/driver/executor/executor.go b/client/driver/executor/executor.go index 3f0cb3169..ce36c5e4e 100644 --- a/client/driver/executor/executor.go +++ b/client/driver/executor/executor.go @@ -566,6 +566,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str return &DockerScriptCheck{ id: checkID, interval: check.Interval, + timeout: check.Timeout, containerID: e.consulCtx.ContainerID, logger: e.logger, cmd: check.Command, @@ -577,6 +578,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str return &ExecScriptCheck{ id: checkID, interval: check.Interval, + timeout: check.Timeout, cmd: check.Command, args: check.Args, taskDir: e.taskDir, diff --git a/client/driver/structs/structs.go b/client/driver/structs/structs.go index ecc738e76..9059df6ec 100644 --- a/client/driver/structs/structs.go +++ b/client/driver/structs/structs.go @@ -68,8 +68,19 @@ func (r *RecoverableError) Error() string { // CheckResult encapsulates the result of a check type CheckResult struct { - ExitCode int - Output string + + // ExitCode is the exit code of the check + ExitCode int + + // Output is the output of the check script + Output string + + // Timestamp is the time at which the check was executed Timestamp time.Time - Err error + + // Duration is the time it took the check to run + Duration time.Duration + + // Err is the error that a check returned + Err error }