mirror of
https://github.com/kemko/nomad.git
synced 2026-01-02 16:35:44 +03:00
client: expose allocated memory per task
Related to #4280 This PR adds `client.allocs.<job>.<group>.<alloc>.<task>.memory.allocated` as a gauge in bytes to metrics to ease calculating how close a task is to OOMing. ``` 'nomad.client.allocs.memory.allocated.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 268435456.000 'nomad.client.allocs.memory.cache.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 5677056.000 'nomad.client.allocs.memory.kernel_max_usage.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 0.000 'nomad.client.allocs.memory.kernel_usage.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 0.000 'nomad.client.allocs.memory.max_usage.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 8908800.000 'nomad.client.allocs.memory.rss.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 876544.000 'nomad.client.allocs.memory.swap.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 0.000 'nomad.client.allocs.memory.usage.example.cache.6d98cbaf-d6bc-2a84-c63f-bfff8905a9d8.redis.rusty': 8208384.000 ```
This commit is contained in:
@@ -25,6 +25,7 @@ IMPROVEMENTS:
|
||||
* api: Add additional config options to scheduler configuration endpoint to disable preemption [[GH-5628](https://github.com/hashicorp/nomad/issues/5628)]
|
||||
* client: Allow use of maintenance mode and externally registered checks against Nomad-registered consul services [[GH-4537](https://github.com/hashicorp/nomad/issues/4537)]
|
||||
* client: Canary Promotion no longer causes services registered in Consul to become unhealthy [[GH-4566](https://github.com/hashicorp/nomad/issues/4566)]
|
||||
* telemetry: Add `client.allocs.memory.allocated` metric to expose allocated task memory in bytes. [[GH-5492](https://github.com/hashicorp/nomad/issues/5492)]
|
||||
|
||||
BUG FIXES:
|
||||
|
||||
|
||||
@@ -1132,6 +1132,13 @@ func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) {
|
||||
|
||||
//TODO Remove Backwardscompat or use tr.Alloc()?
|
||||
func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
|
||||
alloc := tr.Alloc()
|
||||
var allocatedMem float32
|
||||
if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil {
|
||||
// Convert to bytes to match other memory metrics
|
||||
allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024
|
||||
}
|
||||
|
||||
if !tr.clientConfig.DisableTaggedMetrics {
|
||||
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"},
|
||||
float32(ru.ResourceUsage.MemoryStats.RSS), tr.baseLabels)
|
||||
@@ -1147,16 +1154,23 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
|
||||
float32(ru.ResourceUsage.MemoryStats.KernelUsage), tr.baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_max_usage"},
|
||||
float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage), tr.baseLabels)
|
||||
if allocatedMem > 0 {
|
||||
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"},
|
||||
allocatedMem, tr.baseLabels)
|
||||
}
|
||||
}
|
||||
|
||||
if tr.clientConfig.BackwardsCompatibleMetrics {
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "usage"}, float32(ru.ResourceUsage.MemoryStats.Usage))
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", tr.alloc.Job.Name, tr.alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "usage"}, float32(ru.ResourceUsage.MemoryStats.Usage))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
|
||||
if allocatedMem > 0 {
|
||||
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "allocated"}, allocatedMem)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user