mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
Emitting client resource usage metrics as guages instead of k/v pairs
This commit is contained in:
@@ -1351,25 +1351,29 @@ func (c *Client) collectHostStats() {
|
||||
|
||||
// emitStats pushes host resource usage stats to remote metrics collection sinks
|
||||
func (c *Client) emitStats(hStats *stats.HostStats) {
|
||||
metrics.EmitKey([]string{"memory", "total"}, float32(hStats.Memory.Total))
|
||||
metrics.EmitKey([]string{"memory", "available"}, float32(hStats.Memory.Available))
|
||||
metrics.EmitKey([]string{"memory", "used"}, float32(hStats.Memory.Used))
|
||||
metrics.EmitKey([]string{"memory", "free"}, float32(hStats.Memory.Free))
|
||||
nodeID, err := c.nodeID()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "total"}, float32(hStats.Memory.Total))
|
||||
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "available"}, float32(hStats.Memory.Available))
|
||||
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "used"}, float32(hStats.Memory.Used))
|
||||
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "free"}, float32(hStats.Memory.Free))
|
||||
|
||||
metrics.EmitKey([]string{"uptime"}, float32(hStats.Uptime))
|
||||
metrics.SetGauge([]string{"uptime"}, float32(hStats.Uptime))
|
||||
|
||||
for _, cpu := range hStats.CPU {
|
||||
metrics.EmitKey([]string{"cpu", cpu.CPU, "total"}, float32(cpu.Total))
|
||||
metrics.EmitKey([]string{"cpu", cpu.CPU, "user"}, float32(cpu.User))
|
||||
metrics.EmitKey([]string{"cpu", cpu.CPU, "idle"}, float32(cpu.Idle))
|
||||
metrics.EmitKey([]string{"cpu", cpu.CPU, "system"}, float32(cpu.System))
|
||||
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "total"}, float32(cpu.Total))
|
||||
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "user"}, float32(cpu.User))
|
||||
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "idle"}, float32(cpu.Idle))
|
||||
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "system"}, float32(cpu.System))
|
||||
}
|
||||
|
||||
for _, disk := range hStats.DiskStats {
|
||||
metrics.EmitKey([]string{"disk", disk.Device, "size"}, float32(disk.Size))
|
||||
metrics.EmitKey([]string{"disk", disk.Device, "used"}, float32(disk.Used))
|
||||
metrics.EmitKey([]string{"disk", disk.Device, "available"}, float32(disk.Available))
|
||||
metrics.EmitKey([]string{"disk", disk.Device, "used_percent"}, float32(disk.UsedPercent))
|
||||
metrics.EmitKey([]string{"disk", disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent))
|
||||
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "size"}, float32(disk.Size))
|
||||
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used"}, float32(disk.Used))
|
||||
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "available"}, float32(disk.Available))
|
||||
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used_percent"}, float32(disk.UsedPercent))
|
||||
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -662,37 +662,37 @@ func (r *TaskRunner) Destroy() {
|
||||
// sinks
|
||||
func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
|
||||
if ru.ResourceUsage.MemoryStats != nil {
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
|
||||
}
|
||||
|
||||
if ru.ResourceUsage.CpuStats != nil {
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "percent"}, float32(ru.ResourceUsage.CpuStats.Percent))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "percent"}, float32(ru.ResourceUsage.CpuStats.Percent))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods))
|
||||
}
|
||||
|
||||
for pid, pidStats := range ru.Pids {
|
||||
if pidStats.MemoryStats != nil {
|
||||
// Not emitting max, kernel usages since we never get them on a per-pid
|
||||
// basis
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "memory", "rss"}, float32(pidStats.MemoryStats.RSS))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "memory", "cache"}, float32(pidStats.MemoryStats.Cache))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "memory", "swap"}, float32(pidStats.MemoryStats.Swap))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "memory", "rss"}, float32(pidStats.MemoryStats.RSS))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "memory", "cache"}, float32(pidStats.MemoryStats.Cache))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "memory", "swap"}, float32(pidStats.MemoryStats.Swap))
|
||||
}
|
||||
|
||||
if pidStats.CpuStats != nil {
|
||||
// Not emitting throttled time and periods since we never get them on a
|
||||
// per pid basis
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "cpu", "percent"}, float32(pidStats.CpuStats.Percent))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "cpu", "system"}, float32(pidStats.CpuStats.SystemMode))
|
||||
metrics.EmitKey([]string{r.alloc.Job.Name, r.alloc.Name, r.alloc.ID, r.task.Name, pid, "cpu", "user"}, float32(pidStats.CpuStats.UserMode))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "cpu", "percent"}, float32(pidStats.CpuStats.Percent))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "cpu", "system"}, float32(pidStats.CpuStats.SystemMode))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, pid, "cpu", "user"}, float32(pidStats.CpuStats.UserMode))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user