From 09cf00f3304e2e5d3b189c5a59121cb516c87734 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Tue, 9 Mar 2021 15:28:58 +0100 Subject: [PATCH 1/4] agent: return req error if prometheus metrics are disabled. If the user has disabled Prometheus metrics and a request is sent to the metrics endpoint requesting Prometheus formatted metrics, then the request should fail. --- api/internal/testutil/server.go | 6 +++++ api/operator_metrics_test.go | 5 +++- command/agent/metrics_endpoint.go | 6 +++++ command/agent/metrics_endpoint_test.go | 34 ++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/api/internal/testutil/server.go b/api/internal/testutil/server.go index 7d7cdf844..b0c1f5ef0 100644 --- a/api/internal/testutil/server.go +++ b/api/internal/testutil/server.go @@ -42,6 +42,7 @@ type TestServerConfig struct { Client *ClientConfig `json:"client,omitempty"` Vault *VaultConfig `json:"vault,omitempty"` ACL *ACLConfig `json:"acl,omitempty"` + Telemetry *Telemetry `json:"telemetry,omitempty"` DevMode bool `json:"-"` Stdout, Stderr io.Writer `json:"-"` } @@ -90,6 +91,11 @@ type ACLConfig struct { Enabled bool `json:"enabled"` } +// Telemetry is used to configure the Nomad telemetry setup. +type Telemetry struct { + PrometheusMetrics bool `json:"prometheus_metrics"` +} + // ServerConfigCallback is a function interface which can be // passed to NewTestServerConfig to modify the server config. type ServerConfigCallback func(c *TestServerConfig) diff --git a/api/operator_metrics_test.go b/api/operator_metrics_test.go index e21a6cecc..a53e9d13a 100644 --- a/api/operator_metrics_test.go +++ b/api/operator_metrics_test.go @@ -3,6 +3,7 @@ package api import ( "testing" + "github.com/hashicorp/nomad/api/internal/testutil" "github.com/stretchr/testify/require" ) @@ -31,7 +32,9 @@ func TestOperator_MetricsSummary(t *testing.T) { func TestOperator_Metrics_Prometheus(t *testing.T) { t.Parallel() - c, s := makeClient(t, nil, nil) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.Telemetry = &testutil.Telemetry{PrometheusMetrics: true} + }) defer s.Stop() operator := c.Operator() diff --git a/command/agent/metrics_endpoint.go b/command/agent/metrics_endpoint.go index 9e4f2259e..7233492ae 100644 --- a/command/agent/metrics_endpoint.go +++ b/command/agent/metrics_endpoint.go @@ -22,6 +22,12 @@ func (s *HTTPServer) MetricsRequest(resp http.ResponseWriter, req *http.Request) } if format := req.URL.Query().Get("format"); format == "prometheus" { + + // Only return Prometheus formatted metrics if the user has enabled + // this functionality. + if !s.agent.config.Telemetry.PrometheusMetrics { + return nil, CodedError(http.StatusUnsupportedMediaType, "Prometheus is not enabled") + } s.prometheusHandler().ServeHTTP(resp, req) return nil, nil } diff --git a/command/agent/metrics_endpoint_test.go b/command/agent/metrics_endpoint_test.go index 0cdc8cfb0..712fdb5d8 100644 --- a/command/agent/metrics_endpoint_test.go +++ b/command/agent/metrics_endpoint_test.go @@ -29,6 +29,40 @@ func TestHTTP_MetricsWithIllegalMethod(t *testing.T) { }) } +func TestHTTP_MetricsPrometheusDisabled(t *testing.T) { + assert := assert.New(t) + + t.Parallel() + httpTest(t, func(c *Config) { c.Telemetry.PrometheusMetrics = false }, func(s *TestAgent) { + req, err := http.NewRequest("GET", "/v1/metrics?format=prometheus", nil) + assert.Nil(err) + + resp, err := s.Server.MetricsRequest(nil, req) + assert.Nil(resp) + assert.Error(err, "Prometheus is not enabled") + }) +} + +func TestHTTP_MetricsPrometheusEnabled(t *testing.T) { + assert := assert.New(t) + + t.Parallel() + httpTest(t, nil, func(s *TestAgent) { + req, err := http.NewRequest("GET", "/v1/metrics?format=prometheus", nil) + assert.Nil(err) + respW := httptest.NewRecorder() + + resp, err := s.Server.MetricsRequest(respW, req) + assert.Nil(resp) + assert.Nil(err) + + // Ensure the response body is not empty and that it contains something + // that looks like a metric we expect. + assert.NotNil(respW.Body) + assert.Contains(respW.Body.String(), "HELP go_gc_duration_seconds") + }) +} + func TestHTTP_Metrics(t *testing.T) { assert := assert.New(t) From 1fd4fbd1d1755f0dd93b870651257b657efb68f8 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Tue, 9 Mar 2021 15:29:25 +0100 Subject: [PATCH 2/4] changelog: add entry for #10140 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e6928d18..133731a09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 1.1.0 (Unreleased) BUG FIXES: + * agent: Only allow querying Prometheus formatted metrics if Prometheus is enabled within the config [[GH-10140](https://github.com/hashicorp/nomad/pull/10140)] * api: Added missing devices block to AllocatedTaskResources [[GH-10064](https://github.com/hashicorp/nomad/pull/10064)] * cli: Fixed a bug where non-int proxy port would panic CLI [[GH-10072](https://github.com/hashicorp/nomad/issues/10072)] * cli: Fixed a bug where `nomad operator debug` incorrectly parsed https Consul API URLs. [[GH-10082](https://github.com/hashicorp/nomad/pull/10082)] From 65c826d8878a3e11cdcbbe6adcf2960abb37e70b Mon Sep 17 00:00:00 2001 From: James Rasell Date: Tue, 9 Mar 2021 15:40:08 +0100 Subject: [PATCH 3/4] docs: add upgrade guide for change to metrics API. --- website/content/docs/upgrade/upgrade-specific.mdx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index ce9a94b0a..e9fd2a10b 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -14,6 +14,15 @@ upgrade. However, specific versions of Nomad may have more details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 1.1.0 + +#### Agent Metrics API + +The Nomad agent metrics API now respects the +[prometheus_metrics](https://www.nomadproject.io/docs/configuration/telemetry#prometheus_metrics) +configuration value. If this value is set to `false`, which is the default value, +calling `/v1/metrics?format=prometheus` will now result in a response error. + ## Nomad 1.0.3, 0.12.10 Nomad versions 1.0.3 and 0.12.10 change the behavior of the `exec` and `java` drivers so that From 9263f4052a91fb5b7e8b7c36692597589ea63d95 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Wed, 10 Mar 2021 15:08:17 +0100 Subject: [PATCH 4/4] correctly format variable name within upgrade doc Co-authored-by: Tim Gross --- website/content/docs/upgrade/upgrade-specific.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index e9fd2a10b..fee5a538a 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -19,7 +19,7 @@ used to document those details separately from the standard upgrade flow. #### Agent Metrics API The Nomad agent metrics API now respects the -[prometheus_metrics](https://www.nomadproject.io/docs/configuration/telemetry#prometheus_metrics) +[`prometheus_metrics`](https://www.nomadproject.io/docs/configuration/telemetry#prometheus_metrics) configuration value. If this value is set to `false`, which is the default value, calling `/v1/metrics?format=prometheus` will now result in a response error.