mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
metrics: introduce client config to include alloc metadata as part of the base labels (#23964)
This commit is contained in:
3
.changelog/23964.txt
Normal file
3
.changelog/23964.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:improvement
|
||||
metrics: introduce client config to include alloc metadata as part of the base labels
|
||||
```
|
||||
@@ -510,6 +510,20 @@ func (tr *TaskRunner) initLabels() {
|
||||
},
|
||||
}
|
||||
|
||||
if tr.clientConfig.IncludeAllocMetadataInMetrics {
|
||||
combined := alloc.Job.CombinedTaskMeta(alloc.TaskGroup, tr.taskName)
|
||||
for meta, metaValue := range combined {
|
||||
if len(tr.clientConfig.AllowedMetadataKeysInMetrics) > 0 && !slices.Contains(tr.clientConfig.AllowedMetadataKeysInMetrics, meta) {
|
||||
continue
|
||||
}
|
||||
|
||||
tr.baseLabels = append(tr.baseLabels, metrics.Label{
|
||||
Name: strings.ReplaceAll(meta, "-", "_"),
|
||||
Value: metaValue,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if tr.alloc.Job.ParentID != "" {
|
||||
tr.baseLabels = append(tr.baseLabels, metrics.Label{
|
||||
Name: "parent_id",
|
||||
|
||||
@@ -2866,6 +2866,40 @@ func TestTaskRunner_BaseLabels(t *testing.T) {
|
||||
require.Equal(alloc.Namespace, labels["namespace"])
|
||||
}
|
||||
|
||||
// TestTaskRunner_BaseLabels_IncludesAllocMetadata tests that the base labels include
|
||||
// the allocation metadata fields using the provided allowed list of keys
|
||||
func TestTaskRunner_BaseLabels_IncludesAllocMetadata(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
alloc := mock.BatchAlloc()
|
||||
alloc.Namespace = "not-default"
|
||||
job := alloc.Job
|
||||
job.Meta = map[string]string{"owner": "HashiCorp", "my-key": "my-value", "some_dynamic_value": "now()"}
|
||||
task := job.TaskGroups[0].Tasks[0]
|
||||
task.Driver = "raw_exec"
|
||||
task.Config = map[string]interface{}{
|
||||
"command": "whoami",
|
||||
}
|
||||
|
||||
trConfig, cleanup := testTaskRunnerConfig(t, alloc, task.Name, nil)
|
||||
defer cleanup()
|
||||
|
||||
trConfig.ClientConfig.IncludeAllocMetadataInMetrics = true
|
||||
trConfig.ClientConfig.AllowedMetadataKeysInMetrics = []string{"owner", "my-key"}
|
||||
|
||||
tr, err := NewTaskRunner(trConfig)
|
||||
must.NoError(t, err)
|
||||
|
||||
labels := map[string]string{}
|
||||
for _, e := range tr.baseLabels {
|
||||
labels[e.Name] = e.Value
|
||||
}
|
||||
|
||||
must.Eq(t, "HashiCorp", labels["owner"])
|
||||
must.Eq(t, "my-value", labels["my_key"])
|
||||
must.MapNotContainsKey(t, labels, "some_dynamic_value")
|
||||
}
|
||||
|
||||
// TestTaskRunner_IdentityHook_Enabled asserts that the identity hook exposes a
|
||||
// workload identity to a task.
|
||||
func TestTaskRunner_IdentityHook_Enabled(t *testing.T) {
|
||||
|
||||
@@ -203,6 +203,14 @@ type Config struct {
|
||||
// allocation metrics to remote Telemetry sinks
|
||||
PublishAllocationMetrics bool
|
||||
|
||||
// IncludeAllocMetadataInMetrics determines whether nomad should include the
|
||||
// allocation metadata as labels in the metrics to remote Telemetry sinks
|
||||
IncludeAllocMetadataInMetrics bool
|
||||
|
||||
// AllowedMetadataKeysInMetrics when provided nomad will only include the
|
||||
// configured metadata keys as part of the metrics to remote Telemetry sinks
|
||||
AllowedMetadataKeysInMetrics []string
|
||||
|
||||
// TLSConfig holds various TLS related configurations
|
||||
TLSConfig *structsc.TLSConfig
|
||||
|
||||
|
||||
@@ -861,6 +861,8 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
|
||||
conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
|
||||
conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
|
||||
conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
|
||||
conf.IncludeAllocMetadataInMetrics = agentConfig.Telemetry.IncludeAllocMetadataInMetrics
|
||||
conf.AllowedMetadataKeysInMetrics = agentConfig.Telemetry.AllowedMetadataKeysInMetrics
|
||||
|
||||
// Set the TLS related configs
|
||||
conf.TLSConfig = agentConfig.TLSConfig
|
||||
|
||||
@@ -969,17 +969,19 @@ type Telemetry struct {
|
||||
InMemoryRetentionPeriod string `hcl:"in_memory_retention_period"`
|
||||
inMemoryRetentionPeriod time.Duration `hcl:"-"`
|
||||
|
||||
StatsiteAddr string `hcl:"statsite_address"`
|
||||
StatsdAddr string `hcl:"statsd_address"`
|
||||
DataDogAddr string `hcl:"datadog_address"`
|
||||
DataDogTags []string `hcl:"datadog_tags"`
|
||||
PrometheusMetrics bool `hcl:"prometheus_metrics"`
|
||||
DisableHostname bool `hcl:"disable_hostname"`
|
||||
UseNodeName bool `hcl:"use_node_name"`
|
||||
CollectionInterval string `hcl:"collection_interval"`
|
||||
collectionInterval time.Duration `hcl:"-"`
|
||||
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
|
||||
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
|
||||
StatsiteAddr string `hcl:"statsite_address"`
|
||||
StatsdAddr string `hcl:"statsd_address"`
|
||||
DataDogAddr string `hcl:"datadog_address"`
|
||||
DataDogTags []string `hcl:"datadog_tags"`
|
||||
PrometheusMetrics bool `hcl:"prometheus_metrics"`
|
||||
DisableHostname bool `hcl:"disable_hostname"`
|
||||
UseNodeName bool `hcl:"use_node_name"`
|
||||
CollectionInterval string `hcl:"collection_interval"`
|
||||
collectionInterval time.Duration `hcl:"-"`
|
||||
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
|
||||
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
|
||||
IncludeAllocMetadataInMetrics bool `hcl:"include_alloc_metadata_in_metrics"`
|
||||
AllowedMetadataKeysInMetrics []string `hcl:"allowed_metadata_keys_in_metrics"`
|
||||
|
||||
// PrefixFilter allows for filtering out metrics from being collected
|
||||
PrefixFilter []string `hcl:"prefix_filter"`
|
||||
@@ -1343,6 +1345,8 @@ func DevConfig(mode *devModeConfig) *Config {
|
||||
conf.Telemetry.PrometheusMetrics = true
|
||||
conf.Telemetry.PublishAllocationMetrics = true
|
||||
conf.Telemetry.PublishNodeMetrics = true
|
||||
conf.Telemetry.IncludeAllocMetadataInMetrics = true
|
||||
conf.Telemetry.AllowedMetadataKeysInMetrics = []string{}
|
||||
|
||||
if mode.consulMode {
|
||||
conf.Consuls[0].ServiceIdentity = &config.WorkloadIdentityConfig{
|
||||
@@ -2524,6 +2528,10 @@ func (t *Telemetry) Merge(b *Telemetry) *Telemetry {
|
||||
if b.PublishAllocationMetrics {
|
||||
result.PublishAllocationMetrics = true
|
||||
}
|
||||
if b.IncludeAllocMetadataInMetrics {
|
||||
result.IncludeAllocMetadataInMetrics = true
|
||||
}
|
||||
result.AllowedMetadataKeysInMetrics = append(result.AllowedMetadataKeysInMetrics, b.AllowedMetadataKeysInMetrics...)
|
||||
if b.CirconusAPIToken != "" {
|
||||
result.CirconusAPIToken = b.CirconusAPIToken
|
||||
}
|
||||
|
||||
@@ -58,6 +58,14 @@ The following options are available on all telemetry configurations.
|
||||
- `publish_allocation_metrics` `(bool: false)` - Specifies if Nomad should
|
||||
publish runtime metrics of allocations.
|
||||
|
||||
- `include_alloc_metadata_in_metrics` `(bool: false)` - This controls whether
|
||||
allocation metadata is included in metric labels. Enabling this option may result in
|
||||
high cardinality labels. You should also configure [allowed_metadata_keys_in_metrics](#allowed_metadata_keys_in_metrics).
|
||||
|
||||
- `allowed_metadata_keys_in_metrics` `(list: [])` - This filters the metadata
|
||||
keys to be included in the metric publishing. By default it does not filter
|
||||
out any keys and thus include all metadata.
|
||||
|
||||
- `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish
|
||||
runtime metrics of nodes.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user