From ac86225e0958d86be4fb1ed1a4933244871bdaf2 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 12 Sep 2025 08:29:46 -0400 Subject: [PATCH] metrics: reduce heap usage of eval broker metrics (#26737) The metrics on the eval broker include labels for the job ID, but under a high volume of dispatch workloads, this results in excessive heap usage on the leader. Dispatch workloads should use their parent ID rather than their child ID for any metrics we collect. Also, eliminate an extra copy of the labels. And remove the extremely high cardinality `"eval_id"` label from the `nomad.broker.eval_waiting` metric. Fixes: https://github.com/hashicorp/nomad/issues/26657 --- .changelog/26737.txt | 7 ++++ nomad/eval_broker.go | 22 +++++------- nomad/structs/funcs.go | 14 ++++++++ nomad/structs/funcs_test.go | 35 +++++++++++++++++-- .../content/docs/upgrade/upgrade-specific.mdx | 10 ++++++ 5 files changed, 73 insertions(+), 15 deletions(-) create mode 100644 .changelog/26737.txt diff --git a/.changelog/26737.txt b/.changelog/26737.txt new file mode 100644 index 000000000..a8ebe2a8b --- /dev/null +++ b/.changelog/26737.txt @@ -0,0 +1,7 @@ +```release-note:breaking-change +metrics: Eval broker metrics that previously used the job ID as a label will now use the parent ID of dispatch and periodic jobs +``` + +```release-note:improvement +metrics: Reduce memory usage on the Nomad leader for collecting eval broker metrics. +``` diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index 047cb0739..b2d01cee3 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -218,7 +218,7 @@ func (b *EvalBroker) Enqueue(eval *structs.Evaluation) { } // Restore is used to restore an evaluation that was previously enqueued. It -// works like enqueue exceot that it does not track enqueueTime of the restored +// works like enqueue except that it does not track enqueueTime of the restored // evaluation. func (b *EvalBroker) Restore(eval *structs.Evaluation) { b.l.Lock() @@ -406,7 +406,7 @@ SCAN: b.dequeuedTime[eval.ID] = time.Now() } metrics.MeasureSinceWithLabels([]string{"nomad", "broker", "wait_time"}, t, []metrics.Label{ - {Name: "job", Value: eval.JobID}, + {Name: "job", Value: structs.ParentIDFromJobID(eval.JobID)}, {Name: "namespace", Value: eval.Namespace}, {Name: "eval_type", Value: eval.Type}, {Name: "triggered_by", Value: eval.TriggeredBy}, @@ -788,18 +788,15 @@ func (b *EvalBroker) handleAckNackLocked(eval *structs.Evaluation) { return } - metrics.MeasureSinceWithLabels([]string{"nomad", "broker", "process_time"}, tDeq, []metrics.Label{ - {Name: "job", Value: eval.JobID}, + labels := []metrics.Label{ + {Name: "job", Value: structs.ParentIDFromJobID(eval.JobID)}, {Name: "namespace", Value: eval.Namespace}, {Name: "eval_type", Value: eval.Type}, {Name: "triggered_by", Value: eval.TriggeredBy}, - }) - metrics.MeasureSinceWithLabels([]string{"nomad", "broker", "response_time"}, tEnq, []metrics.Label{ - {Name: "job", Value: eval.JobID}, - {Name: "namespace", Value: eval.Namespace}, - {Name: "eval_type", Value: eval.Type}, - {Name: "triggered_by", Value: eval.TriggeredBy}, - }) + } + + metrics.MeasureSinceWithLabels([]string{"nomad", "broker", "process_time"}, tDeq, labels) + metrics.MeasureSinceWithLabels([]string{"nomad", "broker", "response_time"}, tEnq, labels) delete(b.enqueuedTime, eval.ID) delete(b.dequeuedTime, eval.ID) } @@ -1003,8 +1000,7 @@ func (b *EvalBroker) EmitStats(period time.Duration, stopCh <-chan struct{}) { metrics.SetGaugeWithLabels([]string{"nomad", "broker", "eval_waiting"}, float32(time.Until(eval.WaitUntil).Seconds()), []metrics.Label{ - {Name: "eval_id", Value: eval.ID}, - {Name: "job", Value: eval.JobID}, + {Name: "job", Value: structs.ParentIDFromJobID(eval.JobID)}, {Name: "namespace", Value: eval.Namespace}, }) } diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index 47c453d8e..459be3910 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -562,3 +562,17 @@ func ParsePortRanges(spec string) ([]uint64, error) { return ports, nil } + +// ParentIDFromJobID returns the parent job ID of a given dispatch or periodic +// job. Generally you should use the child job's Job.ParentID field instead, but +// this is useful for contexts where the Job struct isn't present. +func ParentIDFromJobID(jobID string) string { + if strings.Index(jobID, "/") == 0 { + // do a cheap O(n) check first before we do the more expensive Cut + // method + return jobID + } + jobID, _, _ = strings.Cut(jobID, DispatchLaunchSuffix) + jobID, _, _ = strings.Cut(jobID, PeriodicLaunchSuffix) + return jobID +} diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index dcecd1684..79db2d1bf 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -7,6 +7,7 @@ import ( "encoding/base64" "fmt" "testing" + "time" "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" @@ -14,6 +15,7 @@ import ( "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/uuid" + "github.com/shoenig/test" "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -1024,8 +1026,7 @@ func TestParsePortRanges(t *testing.T) { }, } - for i := range cases { - tc := cases[i] + for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { results, err := ParsePortRanges(tc.spec) if tc.err == "" { @@ -1038,3 +1039,33 @@ func TestParsePortRanges(t *testing.T) { }) } } + +func TestParentIDFromJobID(t *testing.T) { + ci.Parallel(t) + + cases := []struct { + jobID string + expect string + }{ + { + jobID: DispatchedID("example", "", time.Now()), + expect: "example", + }, + { + jobID: fmt.Sprintf( + "example/whatever%s%d", PeriodicLaunchSuffix, time.Now().Unix()), + expect: "example/whatever", + }, + { + jobID: "example/whatever", + expect: "example/whatever", + }, + { + jobID: "example", + expect: "example", + }, + } + for _, tc := range cases { + test.Eq(t, tc.expect, ParentIDFromJobID(tc.jobID)) + } +} diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index c48abce70..ae6308fa4 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -20,6 +20,16 @@ In Nomad 1.11.0, submitting a sysbatch job with a `reschedule` block returns an error instead of being silently ignored, as it was in previous versions. The same behavior applies to system jobs. +#### Eval broker metrics for dispatch and periodic jobs + +The leader records metrics for the eval broker. In Nomad 1.11.0 the `job` label +on the `nomad.nomad.broker.wait_time`, `nomad.nomad.broker.process_time`, +`nomad.nomad.broker.response_time`, and `nomad.nomad.broker.eval_waiting` +metrics refers to the parent job ID for dispatch and periodic jobs. The +`nomad.nomad.broker.eval_waiting` no longer has an `eval_id` label. For clusters +running high volume dispatch workloads, this change significantly reduces +metrics cardinality and memory usage on the leader. + ## Nomad 1.10.2 #### Clients respect `telemetry.publish_allocation_metrics`