mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
metrics: introduce client config to include alloc metadata as part of the base labels (#23964)
This commit is contained in:
3
.changelog/23964.txt
Normal file
3
.changelog/23964.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
```release-note:improvement
|
||||||
|
metrics: introduce client config to include alloc metadata as part of the base labels
|
||||||
|
```
|
||||||
@@ -510,6 +510,20 @@ func (tr *TaskRunner) initLabels() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if tr.clientConfig.IncludeAllocMetadataInMetrics {
|
||||||
|
combined := alloc.Job.CombinedTaskMeta(alloc.TaskGroup, tr.taskName)
|
||||||
|
for meta, metaValue := range combined {
|
||||||
|
if len(tr.clientConfig.AllowedMetadataKeysInMetrics) > 0 && !slices.Contains(tr.clientConfig.AllowedMetadataKeysInMetrics, meta) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
tr.baseLabels = append(tr.baseLabels, metrics.Label{
|
||||||
|
Name: strings.ReplaceAll(meta, "-", "_"),
|
||||||
|
Value: metaValue,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if tr.alloc.Job.ParentID != "" {
|
if tr.alloc.Job.ParentID != "" {
|
||||||
tr.baseLabels = append(tr.baseLabels, metrics.Label{
|
tr.baseLabels = append(tr.baseLabels, metrics.Label{
|
||||||
Name: "parent_id",
|
Name: "parent_id",
|
||||||
|
|||||||
@@ -2866,6 +2866,40 @@ func TestTaskRunner_BaseLabels(t *testing.T) {
|
|||||||
require.Equal(alloc.Namespace, labels["namespace"])
|
require.Equal(alloc.Namespace, labels["namespace"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestTaskRunner_BaseLabels_IncludesAllocMetadata tests that the base labels include
|
||||||
|
// the allocation metadata fields using the provided allowed list of keys
|
||||||
|
func TestTaskRunner_BaseLabels_IncludesAllocMetadata(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
alloc := mock.BatchAlloc()
|
||||||
|
alloc.Namespace = "not-default"
|
||||||
|
job := alloc.Job
|
||||||
|
job.Meta = map[string]string{"owner": "HashiCorp", "my-key": "my-value", "some_dynamic_value": "now()"}
|
||||||
|
task := job.TaskGroups[0].Tasks[0]
|
||||||
|
task.Driver = "raw_exec"
|
||||||
|
task.Config = map[string]interface{}{
|
||||||
|
"command": "whoami",
|
||||||
|
}
|
||||||
|
|
||||||
|
trConfig, cleanup := testTaskRunnerConfig(t, alloc, task.Name, nil)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
trConfig.ClientConfig.IncludeAllocMetadataInMetrics = true
|
||||||
|
trConfig.ClientConfig.AllowedMetadataKeysInMetrics = []string{"owner", "my-key"}
|
||||||
|
|
||||||
|
tr, err := NewTaskRunner(trConfig)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
|
labels := map[string]string{}
|
||||||
|
for _, e := range tr.baseLabels {
|
||||||
|
labels[e.Name] = e.Value
|
||||||
|
}
|
||||||
|
|
||||||
|
must.Eq(t, "HashiCorp", labels["owner"])
|
||||||
|
must.Eq(t, "my-value", labels["my_key"])
|
||||||
|
must.MapNotContainsKey(t, labels, "some_dynamic_value")
|
||||||
|
}
|
||||||
|
|
||||||
// TestTaskRunner_IdentityHook_Enabled asserts that the identity hook exposes a
|
// TestTaskRunner_IdentityHook_Enabled asserts that the identity hook exposes a
|
||||||
// workload identity to a task.
|
// workload identity to a task.
|
||||||
func TestTaskRunner_IdentityHook_Enabled(t *testing.T) {
|
func TestTaskRunner_IdentityHook_Enabled(t *testing.T) {
|
||||||
|
|||||||
@@ -203,6 +203,14 @@ type Config struct {
|
|||||||
// allocation metrics to remote Telemetry sinks
|
// allocation metrics to remote Telemetry sinks
|
||||||
PublishAllocationMetrics bool
|
PublishAllocationMetrics bool
|
||||||
|
|
||||||
|
// IncludeAllocMetadataInMetrics determines whether nomad should include the
|
||||||
|
// allocation metadata as labels in the metrics to remote Telemetry sinks
|
||||||
|
IncludeAllocMetadataInMetrics bool
|
||||||
|
|
||||||
|
// AllowedMetadataKeysInMetrics when provided nomad will only include the
|
||||||
|
// configured metadata keys as part of the metrics to remote Telemetry sinks
|
||||||
|
AllowedMetadataKeysInMetrics []string
|
||||||
|
|
||||||
// TLSConfig holds various TLS related configurations
|
// TLSConfig holds various TLS related configurations
|
||||||
TLSConfig *structsc.TLSConfig
|
TLSConfig *structsc.TLSConfig
|
||||||
|
|
||||||
|
|||||||
@@ -861,6 +861,8 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
|
|||||||
conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
|
conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
|
||||||
conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
|
conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
|
||||||
conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
|
conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
|
||||||
|
conf.IncludeAllocMetadataInMetrics = agentConfig.Telemetry.IncludeAllocMetadataInMetrics
|
||||||
|
conf.AllowedMetadataKeysInMetrics = agentConfig.Telemetry.AllowedMetadataKeysInMetrics
|
||||||
|
|
||||||
// Set the TLS related configs
|
// Set the TLS related configs
|
||||||
conf.TLSConfig = agentConfig.TLSConfig
|
conf.TLSConfig = agentConfig.TLSConfig
|
||||||
|
|||||||
@@ -969,17 +969,19 @@ type Telemetry struct {
|
|||||||
InMemoryRetentionPeriod string `hcl:"in_memory_retention_period"`
|
InMemoryRetentionPeriod string `hcl:"in_memory_retention_period"`
|
||||||
inMemoryRetentionPeriod time.Duration `hcl:"-"`
|
inMemoryRetentionPeriod time.Duration `hcl:"-"`
|
||||||
|
|
||||||
StatsiteAddr string `hcl:"statsite_address"`
|
StatsiteAddr string `hcl:"statsite_address"`
|
||||||
StatsdAddr string `hcl:"statsd_address"`
|
StatsdAddr string `hcl:"statsd_address"`
|
||||||
DataDogAddr string `hcl:"datadog_address"`
|
DataDogAddr string `hcl:"datadog_address"`
|
||||||
DataDogTags []string `hcl:"datadog_tags"`
|
DataDogTags []string `hcl:"datadog_tags"`
|
||||||
PrometheusMetrics bool `hcl:"prometheus_metrics"`
|
PrometheusMetrics bool `hcl:"prometheus_metrics"`
|
||||||
DisableHostname bool `hcl:"disable_hostname"`
|
DisableHostname bool `hcl:"disable_hostname"`
|
||||||
UseNodeName bool `hcl:"use_node_name"`
|
UseNodeName bool `hcl:"use_node_name"`
|
||||||
CollectionInterval string `hcl:"collection_interval"`
|
CollectionInterval string `hcl:"collection_interval"`
|
||||||
collectionInterval time.Duration `hcl:"-"`
|
collectionInterval time.Duration `hcl:"-"`
|
||||||
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
|
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
|
||||||
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
|
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
|
||||||
|
IncludeAllocMetadataInMetrics bool `hcl:"include_alloc_metadata_in_metrics"`
|
||||||
|
AllowedMetadataKeysInMetrics []string `hcl:"allowed_metadata_keys_in_metrics"`
|
||||||
|
|
||||||
// PrefixFilter allows for filtering out metrics from being collected
|
// PrefixFilter allows for filtering out metrics from being collected
|
||||||
PrefixFilter []string `hcl:"prefix_filter"`
|
PrefixFilter []string `hcl:"prefix_filter"`
|
||||||
@@ -1343,6 +1345,8 @@ func DevConfig(mode *devModeConfig) *Config {
|
|||||||
conf.Telemetry.PrometheusMetrics = true
|
conf.Telemetry.PrometheusMetrics = true
|
||||||
conf.Telemetry.PublishAllocationMetrics = true
|
conf.Telemetry.PublishAllocationMetrics = true
|
||||||
conf.Telemetry.PublishNodeMetrics = true
|
conf.Telemetry.PublishNodeMetrics = true
|
||||||
|
conf.Telemetry.IncludeAllocMetadataInMetrics = true
|
||||||
|
conf.Telemetry.AllowedMetadataKeysInMetrics = []string{}
|
||||||
|
|
||||||
if mode.consulMode {
|
if mode.consulMode {
|
||||||
conf.Consuls[0].ServiceIdentity = &config.WorkloadIdentityConfig{
|
conf.Consuls[0].ServiceIdentity = &config.WorkloadIdentityConfig{
|
||||||
@@ -2524,6 +2528,10 @@ func (t *Telemetry) Merge(b *Telemetry) *Telemetry {
|
|||||||
if b.PublishAllocationMetrics {
|
if b.PublishAllocationMetrics {
|
||||||
result.PublishAllocationMetrics = true
|
result.PublishAllocationMetrics = true
|
||||||
}
|
}
|
||||||
|
if b.IncludeAllocMetadataInMetrics {
|
||||||
|
result.IncludeAllocMetadataInMetrics = true
|
||||||
|
}
|
||||||
|
result.AllowedMetadataKeysInMetrics = append(result.AllowedMetadataKeysInMetrics, b.AllowedMetadataKeysInMetrics...)
|
||||||
if b.CirconusAPIToken != "" {
|
if b.CirconusAPIToken != "" {
|
||||||
result.CirconusAPIToken = b.CirconusAPIToken
|
result.CirconusAPIToken = b.CirconusAPIToken
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,6 +58,14 @@ The following options are available on all telemetry configurations.
|
|||||||
- `publish_allocation_metrics` `(bool: false)` - Specifies if Nomad should
|
- `publish_allocation_metrics` `(bool: false)` - Specifies if Nomad should
|
||||||
publish runtime metrics of allocations.
|
publish runtime metrics of allocations.
|
||||||
|
|
||||||
|
- `include_alloc_metadata_in_metrics` `(bool: false)` - This controls whether
|
||||||
|
allocation metadata is included in metric labels. Enabling this option may result in
|
||||||
|
high cardinality labels. You should also configure [allowed_metadata_keys_in_metrics](#allowed_metadata_keys_in_metrics).
|
||||||
|
|
||||||
|
- `allowed_metadata_keys_in_metrics` `(list: [])` - This filters the metadata
|
||||||
|
keys to be included in the metric publishing. By default it does not filter
|
||||||
|
out any keys and thus include all metadata.
|
||||||
|
|
||||||
- `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish
|
- `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish
|
||||||
runtime metrics of nodes.
|
runtime metrics of nodes.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user