Files
nomad/command/operator_snapshot_save.go
Tim Gross a7f2cb879e command line tools for redacting keyring from snapshots (#24023)
In #23977 we moved the keyring into Raft, which can expose key material in Raft
snapshots when using the less-secure AEAD keyring instead of KMS. This changeset
adds tools for redacting this material from snapshots:

* The `operator snapshot state` command gains the ability to display key
  metadata (only), which respects the `-filter` option.
* The `operator snapshot save` command gains a `-redact` option that removes key
  material from the snapshot after it's downloaded.
* A new `operator snapshot redact` command allows removing key material from an
  existing snapshot.
2024-09-20 15:30:14 -04:00

171 lines
4.5 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package command
import (
"fmt"
"io"
"os"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/helper/raftutil"
"github.com/posener/complete"
)
type OperatorSnapshotSaveCommand struct {
Meta
}
func (c *OperatorSnapshotSaveCommand) Help() string {
helpText := `
Usage: nomad operator snapshot save [options] <file>
Retrieves an atomic, point-in-time snapshot of the state of the Nomad servers
which includes jobs, nodes, allocations, periodic jobs, and ACLs.
If ACLs are enabled, a management token must be supplied in order to perform
snapshot operations.
To create a snapshot from the leader server and save it to "backup.snap":
$ nomad operator snapshot save backup.snap
To create a potentially stale snapshot from any available server (useful if no
leader is available):
$ nomad operator snapshot save -stale backup.snap
This is useful for situations where a cluster is in a degraded state and no
leader is available. To target a specific server for a snapshot, you can run
the 'nomad operator snapshot save' command on that specific server.
General Options:
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
Snapshot Save Options:
-redact
The -redact option will locally edit the snapshot to remove any cleartext key
material from the root keyring. Only the AEAD keyring provider has cleartext
key material in Raft. Note that this operation requires loading the snapshot
into memory locally.
-stale
The -stale option defaults to "false" which means the leader provides the
result. If the cluster is in an outage state without a leader, you may need
to set -stale to "true" to get the configuration from a non-leader server.
`
return strings.TrimSpace(helpText)
}
func (c *OperatorSnapshotSaveCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-stale": complete.PredictAnything,
})
}
func (c *OperatorSnapshotSaveCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictNothing
}
func (c *OperatorSnapshotSaveCommand) Synopsis() string {
return "Saves snapshot of Nomad server state"
}
func (c *OperatorSnapshotSaveCommand) Name() string { return "operator snapshot save" }
func (c *OperatorSnapshotSaveCommand) Run(args []string) int {
var stale, redact bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&stale, "stale", false, "")
flags.BoolVar(&redact, "redact", false, "")
if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
return 1
}
// Check for misuse
// Check that we either got no filename or exactly one.
args = flags.Args()
if len(args) > 1 {
c.Ui.Error("This command takes either no arguments or one: <filename>")
c.Ui.Error(commandErrorText(c))
return 1
}
now := time.Now()
filename := fmt.Sprintf("nomad-state-%04d%02d%0d-%d.snap", now.Year(), now.Month(), now.Day(), now.Unix())
if len(args) == 1 {
filename = args[0]
}
if _, err := os.Lstat(filename); err == nil {
c.Ui.Error(fmt.Sprintf("Destination file already exists: %q", filename))
c.Ui.Error(commandErrorText(c))
return 1
} else if !os.IsNotExist(err) {
c.Ui.Error(fmt.Sprintf("Unexpected failure checking %q: %v", filename, err))
return 1
}
// Set up a client.
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
tmpFile, err := os.Create(filename + ".tmp")
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to create file: %v", err))
return 1
}
// Fetch the current configuration.
q := &api.QueryOptions{
AllowStale: stale,
}
snapIn, err := client.Operator().Snapshot(q)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to get snapshot file: %v", err))
return 1
}
defer snapIn.Close()
_, err = io.Copy(tmpFile, snapIn)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to download snapshot file: %v", err))
return 1
}
if redact {
c.Ui.Info("Redacting key material from snapshot")
err := raftutil.RedactSnapshot(tmpFile)
if err != nil {
c.Ui.Error(fmt.Sprintf("Could not redact snapshot: %v", err))
return 1
}
}
err = os.Rename(tmpFile.Name(), filename)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to finalize snapshot file: %v", err))
return 1
}
c.Ui.Output(fmt.Sprintf("State file written to %v", filename))
return 0
}