offline license utilization reporting (#25844)

Nomad Enterprise users operating in air-gapped or otherwise secured environments
don't want to send license reporting metrics directly from their
servers. Implement manual/offline reporting by periodically recording usage
metrics snapshots in the state store, and providing an API and CLI by which
cluster administrators can download the snapshot for review and out-of-band
transmission to HashiCorp.

This is the CE portion of the work required for implemention in the Enterprise
product. Nomad CE does not perform utilization reporting.

Ref: https://github.com/hashicorp/nomad-enterprise/pull/2673
Ref: https://hashicorp.atlassian.net/browse/NMD-68
Ref: https://go.hashi.co/rfc/nmd-210
This commit is contained in:
Tim Gross
2025-05-14 09:51:13 -04:00
committed by GitHub
parent 79d35f072a
commit 8a5a057d88
21 changed files with 380 additions and 14 deletions

3
.changelog/25844.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
reporting (Enterprise): Added support for offline utilization reporting
```

View File

@@ -8,6 +8,7 @@ import (
"errors"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"
@@ -475,3 +476,28 @@ func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWo
}
return &resp, qm, nil
}
type OperatorUtilizationOptions struct {
TodayOnly bool
}
type OperatorUtilizationSnapshotResponse struct {
// Bundle is the JSON serialized utilization reporting bundle.
Bundle []byte
WriteMeta
}
// Utilization retrieves a utilization reporting bundle (Nomad Enterprise only).
func (op *Operator) Utilization(opts *OperatorUtilizationOptions, w *WriteOptions) (*OperatorUtilizationSnapshotResponse, *WriteMeta, error) {
resp := &OperatorUtilizationSnapshotResponse{}
v := url.Values{}
if opts.TodayOnly {
v.Add("today", "true")
}
wm, err := op.c.post("/v1/operator/utilization?"+v.Encode(), nil, resp, w)
if err != nil {
return nil, nil, err
}
return resp, wm, nil
}

View File

@@ -182,6 +182,8 @@ func ParseConfigFile(path string) (*Config, error) {
},
{"reporting.export_interval",
&c.Reporting.ExportInterval, &c.Reporting.ExportIntervalHCL, nil},
{"reporting.snapshot_retention_time",
&c.Reporting.SnapshotRetentionTime, &c.Reporting.SnapshotRetentionTimeHCL, nil},
{"rpc.keep_alive_interval", &c.RPC.KeepAliveInterval, &c.RPC.KeepAliveIntervalHCL, nil},
{"rpc.connection_write_timeout", &c.RPC.ConnectionWriteTimeout, &c.RPC.ConnectionWriteTimeoutHCL, nil},
{"rpc.stream_open_timeout", &c.RPC.StreamOpenTimeout, &c.RPC.StreamOpenTimeoutHCL, nil},

View File

@@ -343,9 +343,11 @@ var basicConfig = &Config{
},
},
Reporting: &config.ReportingConfig{
ExportAddress: "http://localhost:8080",
ExportIntervalHCL: "15m",
ExportInterval: time.Minute * 15,
ExportAddress: "http://localhost:8080",
ExportIntervalHCL: "15m",
ExportInterval: time.Minute * 15,
SnapshotRetentionTime: time.Hour * 24,
SnapshotRetentionTimeHCL: "24h",
License: &config.LicenseReportingConfig{
Enabled: pointer.Of(true),
},

View File

@@ -496,6 +496,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
s.mux.HandleFunc("/v1/operator/autopilot/health", s.wrap(s.OperatorServerHealth))
s.mux.HandleFunc("/v1/operator/snapshot", s.wrap(s.SnapshotRequest))
s.mux.HandleFunc("/v1/operator/upgrade-check/", s.wrap(s.UpgradeCheckRequest))
s.mux.HandleFunc("/v1/operator/utilization", s.wrap(s.OperatorUtilizationRequest))
s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest))
s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries))

View File

@@ -25,6 +25,14 @@ func (s *HTTPServer) LicenseRequest(resp http.ResponseWriter, req *http.Request)
}
}
// OperatorUtilizationRequest is used get a utilization reporting bundle.
func (s *HTTPServer) OperatorUtilizationRequest(resp http.ResponseWriter, req *http.Request) (any, error) {
if req.Method != http.MethodPost {
return nil, CodedError(405, ErrInvalidMethod)
}
return nil, CodedError(501, ErrEntOnly)
}
func autopilotToAPIEntState(_ structs.OperatorHealthReply, _ *api.OperatorHealthReply) interface{} {
return nil
}

View File

@@ -348,8 +348,9 @@ reporting {
enabled = true
}
address = "http://localhost:8080"
export_interval = "15m"
address = "http://localhost:8080"
export_interval = "15m"
snapshot_retention_time = "24h"
}
keyring "awskms" {

View File

@@ -425,6 +425,7 @@
"reporting": {
"address": "http://localhost:8080",
"export_interval": "15m",
"snapshot_retention_time": "24h",
"license": {
"enabled": "true"
}

View File

@@ -882,6 +882,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"operator utilization": func() (cli.Command, error) {
return &OperatorUtilizationCommand{
Meta: meta,
}, nil
},
"plan": func() (cli.Command, error) {
return &JobPlanCommand{

View File

@@ -0,0 +1,112 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package command
import (
"fmt"
"os"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/posener/complete"
)
type OperatorUtilizationCommand struct {
Meta
}
func (c *OperatorUtilizationCommand) Help() string {
helpText := `
Usage: nomad operator utilization [options]
This command allows Nomad Enterprise users to generate utilization reporting
bundles. If you have disabled automated reporting, use this command to
manually generate the report and send it to HashiCorp. If no snapshots were
persisted in the last 24 hrs, Nomad takes a new snapshot.
If ACLs are enabled, this command requires a token with the 'operator:write'
capability.
-message
Provide context about the conditions under which the report was generated
and submitted. This message is not included in the utilization bundle but
will be included in the Nomad server logs.
-output
Specifies the output path for the bundle. Defaults to a time-based generated
file name in the current working directory.
-today-only
Include snapshots from the previous 24 hours, not historical snapshots.
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace)
return strings.TrimSpace(helpText)
}
func (c *OperatorUtilizationCommand) Synopsis() string {
return "Generate a utilization reporting bundle"
}
func (c *OperatorUtilizationCommand) Name() string { return "operator utilization" }
func (c *OperatorUtilizationCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-message": complete.PredictNothing,
"-today-only": complete.PredictNothing,
"-output": complete.PredictFiles(""),
})
}
func (c *OperatorUtilizationCommand) Run(args []string) int {
var todayOnly bool
var message, outputPath string
flags := c.Meta.FlagSet("operator utilization", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&todayOnly, "today-only", false, "only today's snapshot")
flags.StringVar(&outputPath, "output", "", "output path for the bundle")
flags.StringVar(&message, "message", "", "provided context for logs")
if err := flags.Parse(args); err != nil {
return 1
}
args = flags.Args()
if len(args) != 0 {
c.Ui.Error("This command requires no arguments.")
c.Ui.Error(commandErrorText(c))
return 1
}
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error creating nomad API client: %s", err))
return 1
}
resp, _, err := client.Operator().Utilization(
&api.OperatorUtilizationOptions{TodayOnly: todayOnly}, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error generating bundle: %s", err))
return 1
}
if outputPath == "" {
t := time.Now().Unix()
outputPath = fmt.Sprintf("nomad-utilization-%v.json", t)
}
err = os.WriteFile(outputPath, resp.Bundle, 0600)
if err != nil {
c.Ui.Error(fmt.Sprintf("Could not write bundle to file: %s", err))
return 1
}
c.Ui.Output(fmt.Sprintf(
"Success! Utilization reporting bundle written to: %s", outputPath))
return 0
}

View File

@@ -293,7 +293,11 @@ func validateNsOp(namespace string, topics map[structs.Topic][]string, aclObj *a
if ok := aclObj.IsManagement(); !ok {
return structs.ErrPermissionDenied
}
default:
case structs.TopicOperator:
if ok := aclObj.AllowOperatorRead(); !ok {
return structs.ErrPermissionDenied
}
default: // including TopicAll
if ok := aclObj.IsManagement(); !ok {
return structs.ErrPermissionDenied
}

View File

@@ -51,7 +51,10 @@ var MsgTypeEvents = map[structs.MessageType]string{
func eventsFromChanges(tx ReadTxn, changes Changes) *structs.Events {
eventType, ok := MsgTypeEvents[changes.MsgType]
if !ok {
return nil
eventType, ok = EnterpriseMsgTypeEvents[changes.MsgType]
if !ok {
return nil
}
}
var events []structs.Event
@@ -238,8 +241,9 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) {
Plugin: before,
},
}, true
default:
return enterpriseEventFromChangeDeleted(change)
}
return structs.Event{}, false
}
switch change.Table {
@@ -468,7 +472,7 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) {
Plugin: after,
},
}, true
default:
return enterpriseEventFromChange(change)
}
return structs.Event{}, false
}

22
nomad/state/events_ce.go Normal file
View File

@@ -0,0 +1,22 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
//go:build !ent
// +build !ent
package state
import (
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/structs"
)
var EnterpriseMsgTypeEvents = map[structs.MessageType]string{}
func enterpriseEventFromChangeDeleted(_ memdb.Change) (structs.Event, bool) {
return structs.Event{}, false
}
func enterpriseEventFromChange(_ memdb.Change) (structs.Event, bool) {
return structs.Event{}, false
}

View File

@@ -53,6 +53,11 @@ type ReportingConfig struct {
// for testing and should not be configured by end-users.
ExportInterval time.Duration
ExportIntervalHCL string `hcl:"export_interval" json:"-"`
// SnapshotRetentionTime overrides the default time we retain utilization
// snapshots in Raft.
SnapshotRetentionTime time.Duration
SnapshotRetentionTimeHCL string `hcl:"snapshot_retention_time"`
}
func (r *ReportingConfig) Copy() *ReportingConfig {
@@ -91,6 +96,12 @@ func (r *ReportingConfig) Merge(b *ReportingConfig) *ReportingConfig {
if r.ExportInterval == 0 {
result.ExportInterval = b.ExportInterval
}
if r.SnapshotRetentionTime == 0 {
result.SnapshotRetentionTime = b.SnapshotRetentionTime
}
if r.SnapshotRetentionTimeHCL == "" {
result.SnapshotRetentionTimeHCL = b.SnapshotRetentionTimeHCL
}
return &result
}

View File

@@ -34,6 +34,7 @@ const (
TopicHostVolume Topic = "HostVolume"
TopicCSIVolume Topic = "CSIVolume"
TopicCSIPlugin Topic = "CSIPlugin"
TopicOperator Topic = "Operator"
TopicAll Topic = "*"
TypeNodeRegistration = "NodeRegistration"
@@ -71,6 +72,7 @@ const (
TypeCSIVolumeRegistered = "CSIVolumeRegistered"
TypeCSIVolumeDeregistered = "CSIVolumeDeregistered"
TypeCSIVolumeClaim = "CSIVolumeClaim"
TypeUtilizationSnapshotUpserted = "UtilizationSnapshotUpserted"
)
// Event represents a change in Nomads state.

View File

@@ -44,6 +44,7 @@ by default, requiring a management token.
| `Job` | `namespace:read-job` |
| `NodePool` | `management` |
| `Node` | `node:read` |
| `Operator` | `operator:read` |
| `Service` | `namespace:read-job` |
### Parameters
@@ -84,6 +85,7 @@ by default, requiring a management token.
| Node | Node |
| NodeDrain | Node |
| NodePool | NodePool |
| Operator | UtilizationSnapshot (Enterprise only) |
| Service | Service Registrations |
### Event Types
@@ -120,6 +122,7 @@ by default, requiring a management token.
| PlanResult |
| ServiceDeregistration |
| ServiceRegistration |
| UtilizationSnapshotUpserted |
### Sample Request

View File

@@ -0,0 +1,59 @@
---
layout: api
page_title: Utilization - Operator - HTTP API
description: |-
The /operator/utilization endpoints provide tools for generating utilization reporting bundles for Nomad Enterprise.
---
# Operator Utilization HTTP API
The `/operator/utilization` endpoints provide tools for generating utilization
reporting bundles for Nomad Enterprise.
<EnterpriseAlert />
## Generate Nomad Enterprise Utilization Report Buindle
This endpoint generates a utilization report. If Nomad did not record a
utilization snapshot in the previous 24 hours, Nomad records a utilization
snapshot first.
| Method | Path | Produces |
|--------|----------------------------|--------------------|
| `POST` | `/v1/operator/utilization` | `application/json` |
This table shows this endpoint's support for [blocking queries][] and
[required ACLs][].
| Blocking Queries | ACL Required |
|------------------|------------------|
| `NO` | `operator:write` |
### Parameters
- `today` `(bool: false)` - Specifies to return a bundle that only includes
utilization snapshots from the previous 24 hours.
### Sample Request
```shell-session
$ curl -XPOST \
https://localhost:4646/v1/operator/utilization?today=true
```
### Sample Response
The `Bundle` field in the response body is a base64-encoded blob. The exact
format of this blog is not part of the Nomad API and is subject to change
between versions. The [`nomad operator utilization`][] command, which uses this
API, decodes this to a human-readable file in the current working directory.
```json
{
"Bundle": "eyJ2ZXJzaW9uIjoiMiIsIm1vZGUiOiJtYW51Y..."
}
```
[blocking queries]: /nomad/api-docs#blocking-queries
[required ACLs]: /nomad/api-docs#acls
[`nomad operator utilization`]: /nomad/docs/command/operator/utilization

View File

@@ -0,0 +1,84 @@
---
layout: docs
page_title: nomad operator utilization command reference
description: |-
The "operator utilization" generates utilization reporting bundles for Nomad Enterprise users.
---
# `nomad operator utilization` command reference
The `operator utilization` command allows Nomad Enterprise users to generate
utilization reporting bundles. If you have disabled automated reporting, use
this command to manually generate the report and send it to HashiCorp. Nomad
takes a new snapshot when there are no snapshots from the past twenty-four
hours.
If ACLs are enabled, this command requires a token with the `operator:write`
capability.
<Note title="Nomad Enterprise">
Refer to the [manual license utilization
reporting](/nomad/docs/enterprise/license/manual-reporting) page to learn more
about reporting your Nomad Enterprise license utilization.
</Note>
## Command Options
- `-message` `(string; "")` - Provide context about the conditions under which
the report was generated and submitted. This message is not included in the
utilization bundle but is included in the Nomad server logs.
- `-output` `(string; "")` - Specifies the output path for the bundle. Defaults
to a time-based generated file name in the current working directory.
- `-today-only` `(bool: false)` - Include snapshots from the previous 24 hours,
not historical snapshots.
## General options
@include 'general_options.mdx'
## Examples
Create a utilization reporting bundle that includes all persisted historical
snapshots and has the default bundle name
(`nomad-utilization-<time_stamp>.json`).
```shell-session
$ nomad operator utilization
Success! Utilization reporting bundle written to: nomad-utilization-2024-02-16T22-23-25Z.json
```
Create a utilization reporting bundle with a message about the bundle, and
output the file at the specified path, `/utilization/reports/latest.json`.
```shell-session
$ nomad operator utilization \
-message="Change Control 654987" \
-output="/utilization/reports/latest.json"
```
**Example output**
<CodeBlockConfig hideClipboard>
```plaintext
Success! Utilization reporting bundle written to: /utilization/reports/latest.json
```
</CodeBlockConfig>
The message, `Change Control 654987`, is included in the _server log_, but not
in the bundle.
**Example entry in the server log**
<CodeBlockConfig hideClipboard>
```plaintext
[INFO] reporting: reporting bundle generated: message="Change Control 654987"
```
</CodeBlockConfig>

View File

@@ -22,9 +22,11 @@ data is shared.
```hcl
reporting {
license {
enabled = true
}
license {
enabled = true
}
snapshot_retention_time = "9600h"
}
```
@@ -33,6 +35,11 @@ reporting {
- `license` <code>([license](#license-block): default)</code> - Configures
automated license utilization reporting.
- `snapshot_retention_time` `(string: "9600h")` - Configures the maximum amount
of time that Nomad retains a utilization reporting snapshot in the Nomad
state store. You can export these snapshots with the [`nomad operator
utilization`][] command.
## `license` Block
- `enabled` `(bool: true)` - Specifies whether automated license utilization
@@ -40,3 +47,4 @@ reporting {
[server_mode_enabled]: /nomad/docs/configuration/server#enabled
[automated_license_utilization_reporting]: /nomad/docs/enterprise/license/utilization-reporting
[`nomad operator utilization`]: /nomad/docs/commands/operator/utilization

View File

@@ -136,6 +136,10 @@
{
"title": "Upgrade Check",
"path": "operator/upgrade-check"
},
{
"title": "Utilization Reporting",
"path": "operator/utilization"
}
]
},

View File

@@ -1014,7 +1014,11 @@
"path": "commands/operator/snapshot/state"
}
]
}
},
{
"title": "utilization",
"path": "commands/operator/utilization"
}
]
},
{