From 8a5a057d88ac34d72ede80d6ba558907d4ba0b73 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 14 May 2025 09:51:13 -0400 Subject: [PATCH] offline license utilization reporting (#25844) Nomad Enterprise users operating in air-gapped or otherwise secured environments don't want to send license reporting metrics directly from their servers. Implement manual/offline reporting by periodically recording usage metrics snapshots in the state store, and providing an API and CLI by which cluster administrators can download the snapshot for review and out-of-band transmission to HashiCorp. This is the CE portion of the work required for implemention in the Enterprise product. Nomad CE does not perform utilization reporting. Ref: https://github.com/hashicorp/nomad-enterprise/pull/2673 Ref: https://hashicorp.atlassian.net/browse/NMD-68 Ref: https://go.hashi.co/rfc/nmd-210 --- .changelog/25844.txt | 3 + api/operator.go | 26 ++++ command/agent/config_parse.go | 2 + command/agent/config_parse_test.go | 8 +- command/agent/http.go | 1 + command/agent/operator_endpoint_ce.go | 8 ++ command/agent/testdata/basic.hcl | 5 +- command/agent/testdata/basic.json | 1 + command/commands.go | 5 + command/operator_utilization.go | 112 ++++++++++++++++++ nomad/event_endpoint.go | 6 +- nomad/state/events.go | 12 +- nomad/state/events_ce.go | 22 ++++ nomad/structs/config/reporting.go | 11 ++ nomad/structs/event.go | 2 + website/content/api-docs/events.mdx | 3 + .../content/api-docs/operator/utilization.mdx | 59 +++++++++ .../docs/commands/operator/utilization.mdx | 84 +++++++++++++ .../content/docs/configuration/reporting.mdx | 14 ++- website/data/api-docs-nav-data.json | 4 + website/data/docs-nav-data.json | 6 +- 21 files changed, 380 insertions(+), 14 deletions(-) create mode 100644 .changelog/25844.txt create mode 100644 command/operator_utilization.go create mode 100644 nomad/state/events_ce.go create mode 100644 website/content/api-docs/operator/utilization.mdx create mode 100644 website/content/docs/commands/operator/utilization.mdx diff --git a/.changelog/25844.txt b/.changelog/25844.txt new file mode 100644 index 000000000..02214cebc --- /dev/null +++ b/.changelog/25844.txt @@ -0,0 +1,3 @@ +```release-note:improvement +reporting (Enterprise): Added support for offline utilization reporting +``` diff --git a/api/operator.go b/api/operator.go index 0b29e37fd..ec5e7d415 100644 --- a/api/operator.go +++ b/api/operator.go @@ -8,6 +8,7 @@ import ( "errors" "io" "net/http" + "net/url" "strconv" "strings" "time" @@ -475,3 +476,28 @@ func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWo } return &resp, qm, nil } + +type OperatorUtilizationOptions struct { + TodayOnly bool +} + +type OperatorUtilizationSnapshotResponse struct { + // Bundle is the JSON serialized utilization reporting bundle. + Bundle []byte + WriteMeta +} + +// Utilization retrieves a utilization reporting bundle (Nomad Enterprise only). +func (op *Operator) Utilization(opts *OperatorUtilizationOptions, w *WriteOptions) (*OperatorUtilizationSnapshotResponse, *WriteMeta, error) { + resp := &OperatorUtilizationSnapshotResponse{} + v := url.Values{} + if opts.TodayOnly { + v.Add("today", "true") + } + + wm, err := op.c.post("/v1/operator/utilization?"+v.Encode(), nil, resp, w) + if err != nil { + return nil, nil, err + } + return resp, wm, nil +} diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index a6448c35e..494a6d6b9 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -182,6 +182,8 @@ func ParseConfigFile(path string) (*Config, error) { }, {"reporting.export_interval", &c.Reporting.ExportInterval, &c.Reporting.ExportIntervalHCL, nil}, + {"reporting.snapshot_retention_time", + &c.Reporting.SnapshotRetentionTime, &c.Reporting.SnapshotRetentionTimeHCL, nil}, {"rpc.keep_alive_interval", &c.RPC.KeepAliveInterval, &c.RPC.KeepAliveIntervalHCL, nil}, {"rpc.connection_write_timeout", &c.RPC.ConnectionWriteTimeout, &c.RPC.ConnectionWriteTimeoutHCL, nil}, {"rpc.stream_open_timeout", &c.RPC.StreamOpenTimeout, &c.RPC.StreamOpenTimeoutHCL, nil}, diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 16988efe9..fda422997 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -343,9 +343,11 @@ var basicConfig = &Config{ }, }, Reporting: &config.ReportingConfig{ - ExportAddress: "http://localhost:8080", - ExportIntervalHCL: "15m", - ExportInterval: time.Minute * 15, + ExportAddress: "http://localhost:8080", + ExportIntervalHCL: "15m", + ExportInterval: time.Minute * 15, + SnapshotRetentionTime: time.Hour * 24, + SnapshotRetentionTimeHCL: "24h", License: &config.LicenseReportingConfig{ Enabled: pointer.Of(true), }, diff --git a/command/agent/http.go b/command/agent/http.go index 2bd712396..52c552677 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -496,6 +496,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/operator/autopilot/health", s.wrap(s.OperatorServerHealth)) s.mux.HandleFunc("/v1/operator/snapshot", s.wrap(s.SnapshotRequest)) s.mux.HandleFunc("/v1/operator/upgrade-check/", s.wrap(s.UpgradeCheckRequest)) + s.mux.HandleFunc("/v1/operator/utilization", s.wrap(s.OperatorUtilizationRequest)) s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest)) s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries)) diff --git a/command/agent/operator_endpoint_ce.go b/command/agent/operator_endpoint_ce.go index 5916b573f..ece148d5b 100644 --- a/command/agent/operator_endpoint_ce.go +++ b/command/agent/operator_endpoint_ce.go @@ -25,6 +25,14 @@ func (s *HTTPServer) LicenseRequest(resp http.ResponseWriter, req *http.Request) } } +// OperatorUtilizationRequest is used get a utilization reporting bundle. +func (s *HTTPServer) OperatorUtilizationRequest(resp http.ResponseWriter, req *http.Request) (any, error) { + if req.Method != http.MethodPost { + return nil, CodedError(405, ErrInvalidMethod) + } + return nil, CodedError(501, ErrEntOnly) +} + func autopilotToAPIEntState(_ structs.OperatorHealthReply, _ *api.OperatorHealthReply) interface{} { return nil } diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index ec7f60eca..d0e20d0bd 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -348,8 +348,9 @@ reporting { enabled = true } - address = "http://localhost:8080" - export_interval = "15m" + address = "http://localhost:8080" + export_interval = "15m" + snapshot_retention_time = "24h" } keyring "awskms" { diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 2b75a36cb..7394e7b05 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -425,6 +425,7 @@ "reporting": { "address": "http://localhost:8080", "export_interval": "15m", + "snapshot_retention_time": "24h", "license": { "enabled": "true" } diff --git a/command/commands.go b/command/commands.go index 983f0299a..94b444214 100644 --- a/command/commands.go +++ b/command/commands.go @@ -882,6 +882,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "operator utilization": func() (cli.Command, error) { + return &OperatorUtilizationCommand{ + Meta: meta, + }, nil + }, "plan": func() (cli.Command, error) { return &JobPlanCommand{ diff --git a/command/operator_utilization.go b/command/operator_utilization.go new file mode 100644 index 000000000..f5392373e --- /dev/null +++ b/command/operator_utilization.go @@ -0,0 +1,112 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/posener/complete" +) + +type OperatorUtilizationCommand struct { + Meta +} + +func (c *OperatorUtilizationCommand) Help() string { + helpText := ` +Usage: nomad operator utilization [options] + + This command allows Nomad Enterprise users to generate utilization reporting + bundles. If you have disabled automated reporting, use this command to + manually generate the report and send it to HashiCorp. If no snapshots were + persisted in the last 24 hrs, Nomad takes a new snapshot. + + If ACLs are enabled, this command requires a token with the 'operator:write' + capability. + + -message + Provide context about the conditions under which the report was generated + and submitted. This message is not included in the utilization bundle but + will be included in the Nomad server logs. + + -output + Specifies the output path for the bundle. Defaults to a time-based generated + file name in the current working directory. + + -today-only + Include snapshots from the previous 24 hours, not historical snapshots. + +` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + + return strings.TrimSpace(helpText) +} + +func (c *OperatorUtilizationCommand) Synopsis() string { + return "Generate a utilization reporting bundle" +} + +func (c *OperatorUtilizationCommand) Name() string { return "operator utilization" } + +func (c *OperatorUtilizationCommand) AutocompleteFlags() complete.Flags { + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-message": complete.PredictNothing, + "-today-only": complete.PredictNothing, + "-output": complete.PredictFiles(""), + }) +} + +func (c *OperatorUtilizationCommand) Run(args []string) int { + var todayOnly bool + var message, outputPath string + + flags := c.Meta.FlagSet("operator utilization", FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + flags.BoolVar(&todayOnly, "today-only", false, "only today's snapshot") + flags.StringVar(&outputPath, "output", "", "output path for the bundle") + flags.StringVar(&message, "message", "", "provided context for logs") + + if err := flags.Parse(args); err != nil { + return 1 + } + + args = flags.Args() + if len(args) != 0 { + c.Ui.Error("This command requires no arguments.") + c.Ui.Error(commandErrorText(c)) + return 1 + } + + client, err := c.Meta.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error creating nomad API client: %s", err)) + return 1 + } + + resp, _, err := client.Operator().Utilization( + &api.OperatorUtilizationOptions{TodayOnly: todayOnly}, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error generating bundle: %s", err)) + return 1 + } + + if outputPath == "" { + t := time.Now().Unix() + outputPath = fmt.Sprintf("nomad-utilization-%v.json", t) + } + + err = os.WriteFile(outputPath, resp.Bundle, 0600) + if err != nil { + c.Ui.Error(fmt.Sprintf("Could not write bundle to file: %s", err)) + return 1 + } + + c.Ui.Output(fmt.Sprintf( + "Success! Utilization reporting bundle written to: %s", outputPath)) + return 0 +} diff --git a/nomad/event_endpoint.go b/nomad/event_endpoint.go index 13146ff95..74860fe3a 100644 --- a/nomad/event_endpoint.go +++ b/nomad/event_endpoint.go @@ -293,7 +293,11 @@ func validateNsOp(namespace string, topics map[structs.Topic][]string, aclObj *a if ok := aclObj.IsManagement(); !ok { return structs.ErrPermissionDenied } - default: + case structs.TopicOperator: + if ok := aclObj.AllowOperatorRead(); !ok { + return structs.ErrPermissionDenied + } + default: // including TopicAll if ok := aclObj.IsManagement(); !ok { return structs.ErrPermissionDenied } diff --git a/nomad/state/events.go b/nomad/state/events.go index 67e3fd93a..385eb0ffe 100644 --- a/nomad/state/events.go +++ b/nomad/state/events.go @@ -51,7 +51,10 @@ var MsgTypeEvents = map[structs.MessageType]string{ func eventsFromChanges(tx ReadTxn, changes Changes) *structs.Events { eventType, ok := MsgTypeEvents[changes.MsgType] if !ok { - return nil + eventType, ok = EnterpriseMsgTypeEvents[changes.MsgType] + if !ok { + return nil + } } var events []structs.Event @@ -238,8 +241,9 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) { Plugin: before, }, }, true + default: + return enterpriseEventFromChangeDeleted(change) } - return structs.Event{}, false } switch change.Table { @@ -468,7 +472,7 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) { Plugin: after, }, }, true + default: + return enterpriseEventFromChange(change) } - - return structs.Event{}, false } diff --git a/nomad/state/events_ce.go b/nomad/state/events_ce.go new file mode 100644 index 000000000..308f16b2d --- /dev/null +++ b/nomad/state/events_ce.go @@ -0,0 +1,22 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !ent +// +build !ent + +package state + +import ( + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/structs" +) + +var EnterpriseMsgTypeEvents = map[structs.MessageType]string{} + +func enterpriseEventFromChangeDeleted(_ memdb.Change) (structs.Event, bool) { + return structs.Event{}, false +} + +func enterpriseEventFromChange(_ memdb.Change) (structs.Event, bool) { + return structs.Event{}, false +} diff --git a/nomad/structs/config/reporting.go b/nomad/structs/config/reporting.go index 24396f184..a8b0ecb24 100644 --- a/nomad/structs/config/reporting.go +++ b/nomad/structs/config/reporting.go @@ -53,6 +53,11 @@ type ReportingConfig struct { // for testing and should not be configured by end-users. ExportInterval time.Duration ExportIntervalHCL string `hcl:"export_interval" json:"-"` + + // SnapshotRetentionTime overrides the default time we retain utilization + // snapshots in Raft. + SnapshotRetentionTime time.Duration + SnapshotRetentionTimeHCL string `hcl:"snapshot_retention_time"` } func (r *ReportingConfig) Copy() *ReportingConfig { @@ -91,6 +96,12 @@ func (r *ReportingConfig) Merge(b *ReportingConfig) *ReportingConfig { if r.ExportInterval == 0 { result.ExportInterval = b.ExportInterval } + if r.SnapshotRetentionTime == 0 { + result.SnapshotRetentionTime = b.SnapshotRetentionTime + } + if r.SnapshotRetentionTimeHCL == "" { + result.SnapshotRetentionTimeHCL = b.SnapshotRetentionTimeHCL + } return &result } diff --git a/nomad/structs/event.go b/nomad/structs/event.go index 7a2e98fd0..92fc60d4b 100644 --- a/nomad/structs/event.go +++ b/nomad/structs/event.go @@ -34,6 +34,7 @@ const ( TopicHostVolume Topic = "HostVolume" TopicCSIVolume Topic = "CSIVolume" TopicCSIPlugin Topic = "CSIPlugin" + TopicOperator Topic = "Operator" TopicAll Topic = "*" TypeNodeRegistration = "NodeRegistration" @@ -71,6 +72,7 @@ const ( TypeCSIVolumeRegistered = "CSIVolumeRegistered" TypeCSIVolumeDeregistered = "CSIVolumeDeregistered" TypeCSIVolumeClaim = "CSIVolumeClaim" + TypeUtilizationSnapshotUpserted = "UtilizationSnapshotUpserted" ) // Event represents a change in Nomads state. diff --git a/website/content/api-docs/events.mdx b/website/content/api-docs/events.mdx index 2ec3eeedd..57e44bdff 100644 --- a/website/content/api-docs/events.mdx +++ b/website/content/api-docs/events.mdx @@ -44,6 +44,7 @@ by default, requiring a management token. | `Job` | `namespace:read-job` | | `NodePool` | `management` | | `Node` | `node:read` | +| `Operator` | `operator:read` | | `Service` | `namespace:read-job` | ### Parameters @@ -84,6 +85,7 @@ by default, requiring a management token. | Node | Node | | NodeDrain | Node | | NodePool | NodePool | +| Operator | UtilizationSnapshot (Enterprise only) | | Service | Service Registrations | ### Event Types @@ -120,6 +122,7 @@ by default, requiring a management token. | PlanResult | | ServiceDeregistration | | ServiceRegistration | +| UtilizationSnapshotUpserted | ### Sample Request diff --git a/website/content/api-docs/operator/utilization.mdx b/website/content/api-docs/operator/utilization.mdx new file mode 100644 index 000000000..19dfe10e0 --- /dev/null +++ b/website/content/api-docs/operator/utilization.mdx @@ -0,0 +1,59 @@ +--- +layout: api +page_title: Utilization - Operator - HTTP API +description: |- + The /operator/utilization endpoints provide tools for generating utilization reporting bundles for Nomad Enterprise. +--- + +# Operator Utilization HTTP API + +The `/operator/utilization` endpoints provide tools for generating utilization +reporting bundles for Nomad Enterprise. + + + +## Generate Nomad Enterprise Utilization Report Buindle + +This endpoint generates a utilization report. If Nomad did not record a +utilization snapshot in the previous 24 hours, Nomad records a utilization +snapshot first. + +| Method | Path | Produces | +|--------|----------------------------|--------------------| +| `POST` | `/v1/operator/utilization` | `application/json` | + +This table shows this endpoint's support for [blocking queries][] and +[required ACLs][]. + +| Blocking Queries | ACL Required | +|------------------|------------------| +| `NO` | `operator:write` | + +### Parameters + +- `today` `(bool: false)` - Specifies to return a bundle that only includes + utilization snapshots from the previous 24 hours. + +### Sample Request + +```shell-session +$ curl -XPOST \ + https://localhost:4646/v1/operator/utilization?today=true +``` + +### Sample Response + +The `Bundle` field in the response body is a base64-encoded blob. The exact +format of this blog is not part of the Nomad API and is subject to change +between versions. The [`nomad operator utilization`][] command, which uses this +API, decodes this to a human-readable file in the current working directory. + +```json +{ + "Bundle": "eyJ2ZXJzaW9uIjoiMiIsIm1vZGUiOiJtYW51Y..." +} +``` + +[blocking queries]: /nomad/api-docs#blocking-queries +[required ACLs]: /nomad/api-docs#acls +[`nomad operator utilization`]: /nomad/docs/command/operator/utilization diff --git a/website/content/docs/commands/operator/utilization.mdx b/website/content/docs/commands/operator/utilization.mdx new file mode 100644 index 000000000..b48f2f6cf --- /dev/null +++ b/website/content/docs/commands/operator/utilization.mdx @@ -0,0 +1,84 @@ +--- +layout: docs +page_title: nomad operator utilization command reference +description: |- + The "operator utilization" generates utilization reporting bundles for Nomad Enterprise users. +--- + +# `nomad operator utilization` command reference + +The `operator utilization` command allows Nomad Enterprise users to generate +utilization reporting bundles. If you have disabled automated reporting, use +this command to manually generate the report and send it to HashiCorp. Nomad +takes a new snapshot when there are no snapshots from the past twenty-four +hours. + +If ACLs are enabled, this command requires a token with the `operator:write` +capability. + + + +Refer to the [manual license utilization +reporting](/nomad/docs/enterprise/license/manual-reporting) page to learn more +about reporting your Nomad Enterprise license utilization. + + + +## Command Options + +- `-message` `(string; "")` - Provide context about the conditions under which + the report was generated and submitted. This message is not included in the + utilization bundle but is included in the Nomad server logs. + +- `-output` `(string; "")` - Specifies the output path for the bundle. Defaults + to a time-based generated file name in the current working directory. + +- `-today-only` `(bool: false)` - Include snapshots from the previous 24 hours, + not historical snapshots. + +## General options + +@include 'general_options.mdx' + +## Examples + +Create a utilization reporting bundle that includes all persisted historical +snapshots and has the default bundle name +(`nomad-utilization-.json`). + +```shell-session +$ nomad operator utilization +Success! Utilization reporting bundle written to: nomad-utilization-2024-02-16T22-23-25Z.json +``` + +Create a utilization reporting bundle with a message about the bundle, and +output the file at the specified path, `/utilization/reports/latest.json`. + +```shell-session +$ nomad operator utilization \ + -message="Change Control 654987" \ + -output="/utilization/reports/latest.json" +``` + +**Example output** + + + +```plaintext +Success! Utilization reporting bundle written to: /utilization/reports/latest.json +``` + + + +The message, `Change Control 654987`, is included in the _server log_, but not +in the bundle. + +**Example entry in the server log** + + + +```plaintext +[INFO] reporting: reporting bundle generated: message="Change Control 654987" +``` + + diff --git a/website/content/docs/configuration/reporting.mdx b/website/content/docs/configuration/reporting.mdx index d00fdaaa4..1ef8e8cd6 100644 --- a/website/content/docs/configuration/reporting.mdx +++ b/website/content/docs/configuration/reporting.mdx @@ -22,9 +22,11 @@ data is shared. ```hcl reporting { - license { - enabled = true - } + license { + enabled = true + } + + snapshot_retention_time = "9600h" } ``` @@ -33,6 +35,11 @@ reporting { - `license` ([license](#license-block): default) - Configures automated license utilization reporting. +- `snapshot_retention_time` `(string: "9600h")` - Configures the maximum amount + of time that Nomad retains a utilization reporting snapshot in the Nomad + state store. You can export these snapshots with the [`nomad operator + utilization`][] command. + ## `license` Block - `enabled` `(bool: true)` - Specifies whether automated license utilization @@ -40,3 +47,4 @@ reporting { [server_mode_enabled]: /nomad/docs/configuration/server#enabled [automated_license_utilization_reporting]: /nomad/docs/enterprise/license/utilization-reporting +[`nomad operator utilization`]: /nomad/docs/commands/operator/utilization diff --git a/website/data/api-docs-nav-data.json b/website/data/api-docs-nav-data.json index e89bec40a..689540408 100644 --- a/website/data/api-docs-nav-data.json +++ b/website/data/api-docs-nav-data.json @@ -136,6 +136,10 @@ { "title": "Upgrade Check", "path": "operator/upgrade-check" + }, + { + "title": "Utilization Reporting", + "path": "operator/utilization" } ] }, diff --git a/website/data/docs-nav-data.json b/website/data/docs-nav-data.json index c4e6a7a20..682231679 100644 --- a/website/data/docs-nav-data.json +++ b/website/data/docs-nav-data.json @@ -1014,7 +1014,11 @@ "path": "commands/operator/snapshot/state" } ] - } + }, + { + "title": "utilization", + "path": "commands/operator/utilization" + } ] }, {