metrics: report task memory_max value (#17938)

Add new `nomad.client.allocs.memory.max_allocated` metric to report the
value of the task `memory_max` resource value.
This commit is contained in:
Luiz Aoqui
2023-07-19 16:50:12 -04:00
committed by GitHub
parent e664f1439a
commit ce0f60fb68
3 changed files with 10 additions and 0 deletions

3
.changelog/17938.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
metrics: Add `allocs.memory.max_allocated` to report the value of tasks' `memory_max` resource value
```

View File

@@ -1482,9 +1482,11 @@ func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) {
func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
alloc := tr.Alloc()
var allocatedMem float32
var allocatedMemMax float32
if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil {
// Convert to bytes to match other memory metrics
allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024
allocatedMemMax = float32(taskRes.Memory.MemoryMaxMB) * 1024 * 1024
}
ms := ru.ResourceUsage.MemoryStats
@@ -1508,6 +1510,10 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"},
allocatedMem, tr.baseLabels)
}
if allocatedMemMax > 0 {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_allocated"},
allocatedMemMax, tr.baseLabels)
}
}
// TODO Remove Backwardscompat or use tr.Alloc()?

View File

@@ -205,6 +205,7 @@ task driver; not all task drivers can provide all metrics.
| `nomad.client.allocs.memory.cache` | Amount of memory cached by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.kernel_max_usage` | Maximum amount of memory ever used by the kernel for this task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.kernel_usage` | Amount of memory used by the kernel for this task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.max_allocated` | Maximum amount of oversubscription memory allocated by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.max_usage` | Maximum amount of memory ever used by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.rss` | Amount of RSS memory consumed by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.swap` | Amount of memory swapped by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |