From 6db40283fe2efa37545c3810421416e864b219c3 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Sat, 11 Jun 2016 22:09:34 +0200 Subject: [PATCH] Emitting client resource usage metrics as guages instead of k/v pairs --- client/client.go | 32 ++++++++++++++++++-------------- client/task_runner.go | 34 +++++++++++++++++----------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/client/client.go b/client/client.go index 1c32e13e1..4dbe0b361 100644 --- a/client/client.go +++ b/client/client.go @@ -1351,25 +1351,29 @@ func (c *Client) collectHostStats() { // emitStats pushes host resource usage stats to remote metrics collection sinks func (c *Client) emitStats(hStats *stats.HostStats) { - metrics.EmitKey([]string{"memory", "total"}, float32(hStats.Memory.Total)) - metrics.EmitKey([]string{"memory", "available"}, float32(hStats.Memory.Available)) - metrics.EmitKey([]string{"memory", "used"}, float32(hStats.Memory.Used)) - metrics.EmitKey([]string{"memory", "free"}, float32(hStats.Memory.Free)) + nodeID, err := c.nodeID() + if err != nil { + return + } + metrics.SetGauge([]string{"client", "host", "memory", nodeID, "total"}, float32(hStats.Memory.Total)) + metrics.SetGauge([]string{"client", "host", "memory", nodeID, "available"}, float32(hStats.Memory.Available)) + metrics.SetGauge([]string{"client", "host", "memory", nodeID, "used"}, float32(hStats.Memory.Used)) + metrics.SetGauge([]string{"client", "host", "memory", nodeID, "free"}, float32(hStats.Memory.Free)) - metrics.EmitKey([]string{"uptime"}, float32(hStats.Uptime)) + metrics.SetGauge([]string{"uptime"}, float32(hStats.Uptime)) for _, cpu := range hStats.CPU { - metrics.EmitKey([]string{"cpu", cpu.CPU, "total"}, float32(cpu.Total)) - metrics.EmitKey([]string{"cpu", cpu.CPU, "user"}, float32(cpu.User)) - metrics.EmitKey([]string{"cpu", cpu.CPU, "idle"}, float32(cpu.Idle)) - metrics.EmitKey([]string{"cpu", cpu.CPU, "system"}, float32(cpu.System)) + metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "total"}, float32(cpu.Total)) + metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "user"}, float32(cpu.User)) + metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "idle"}, float32(cpu.Idle)) + metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "system"}, float32(cpu.System)) } for _, disk := range hStats.DiskStats { - metrics.EmitKey([]string{"disk", disk.Device, "size"}, float32(disk.Size)) - metrics.EmitKey([]string{"disk", disk.Device, "used"}, float32(disk.Used)) - metrics.EmitKey([]string{"disk", disk.Device, "available"}, float32(disk.Available)) - metrics.EmitKey([]string{"disk", disk.Device, "used_percent"}, float32(disk.UsedPercent)) - metrics.EmitKey([]string{"disk", disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent)) + metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "size"}, float32(disk.Size)) + metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used"}, float32(disk.Used)) + metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "available"}, float32(disk.Available)) + metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used_percent"}, float32(disk.UsedPercent)) + metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent)) } } diff --git a/client/task_runner.go b/client/task_runner.go index 758d7626a..a6d77b886 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -662,37 +662,37 @@ func (r *TaskRunner) Destroy() { // sinks func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) { if ru.ResourceUsage.MemoryStats != nil { - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage)) } if ru.ResourceUsage.CpuStats != nil { - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "percent"}, float32(ru.ResourceUsage.CpuStats.Percent)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "percent"}, float32(ru.ResourceUsage.CpuStats.Percent)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods)) } for pid, pidStats := range ru.Pids { if pidStats.MemoryStats != nil { // Not emitting max, kernel usages since we never get them on a per-pid // basis - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "memory", "rss"}, float32(pidStats.MemoryStats.RSS)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "memory", "cache"}, float32(pidStats.MemoryStats.Cache)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "memory", "swap"}, float32(pidStats.MemoryStats.Swap)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "memory", "rss"}, float32(pidStats.MemoryStats.RSS)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "memory", "cache"}, float32(pidStats.MemoryStats.Cache)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "memory", "swap"}, float32(pidStats.MemoryStats.Swap)) } if pidStats.CpuStats != nil { // Not emitting throttled time and periods since we never get them on a // per pid basis - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "cpu", "percent"}, float32(pidStats.CpuStats.Percent)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "cpu", "system"}, float32(pidStats.CpuStats.SystemMode)) - metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "cpu", "user"}, float32(pidStats.CpuStats.UserMode)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "cpu", "percent"}, float32(pidStats.CpuStats.Percent)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "cpu", "system"}, float32(pidStats.CpuStats.SystemMode)) + metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "cpu", "user"}, float32(pidStats.CpuStats.UserMode)) } } }