Merge pull request #8214 from hashicorp/docs-snapshot-update

Update changelog and snapshot docs
This commit is contained in:
Mahmood Ali
2020-06-19 14:27:12 -04:00
committed by GitHub
16 changed files with 1898 additions and 427 deletions

View File

@@ -4,13 +4,16 @@ FEATURES:
* **Preemption**: Preemption is now an open source feature
* **Licensing (Enterprise)**: Nomad Enterprise now requires a license [[GH-8076](https://github.com/hashicorp/nomad/issues/8076)]
* **Multiregion Deployments (Enterprise)**: Nomad Enterprise now allows running deployments to multiple regions. [[GH-8184](https://github.com/hashicorp/nomad/issues/8184)]
* **Snapshot Backup and Restore **: Nomad eases disaster recovery with new endpoints and commands for point-in-time snapshots.
IMPROVEMENTS:
* core: support for persisting previous task group counts when updating a job [[GH-8168](https://github.com/hashicorp/nomad/issues/8168)]
* core: block Job.Scale actions when the job is under active deployment [[GH-8187](https://github.com/hashicorp/nomad/issues/8187)]
* core: Support for persisting previous task group counts when updating a job [[GH-8168](https://github.com/hashicorp/nomad/issues/8168)]
* core: Block Job.Scale actions when the job is under active deployment [[GH-8187](https://github.com/hashicorp/nomad/issues/8187)]
* api: Persist previous count with scaling events [[GH-8167](https://github.com/hashicorp/nomad/issues/8167)]
* api: Support querying for jobs and allocations across all namespaces [[GH-8192](https://github.com/hashicorp/nomad/issues/8192)]
* build: Updated to Go 1.14.4 [[GH-8172](https://github.com/hashicorp/nomad/issues/9172)]
* server: Added `raft_multiplier` config to tweak Raft related timeouts [[GH-8082](https://github.com/hashicorp/nomad/issues/8082)]
BUG FIXES:

View File

@@ -15,7 +15,7 @@ type OperatorSnapshotInspectCommand struct {
func (c *OperatorSnapshotInspectCommand) Help() string {
helpText := `
Usage: nomad operator snapshot inspect [options] FILE
Usage: nomad operator snapshot inspect [options] <file>
Displays information about a snapshot file on disk.

View File

@@ -15,7 +15,7 @@ type OperatorSnapshotRestoreCommand struct {
func (c *OperatorSnapshotRestoreCommand) Help() string {
helpText := `
Usage: nomad operator snapshot restore [options] FILE
Usage: nomad operator snapshot restore [options] <file>
Restores an atomic, point-in-time snapshot of the state of the Nomad servers
which includes jobs, nodes, allocations, periodic jobs, and ACLs.

View File

@@ -17,7 +17,7 @@ type OperatorSnapshotSaveCommand struct {
func (c *OperatorSnapshotSaveCommand) Help() string {
helpText := `
Usage: nomad operator snapshot save [options] <filename>
Usage: nomad operator snapshot save [options] <file>
Retrieves an atomic, point-in-time snapshot of the state of the Nomad servers
which includes jobs, nodes, allocations, periodic jobs, and ACLs.
@@ -32,6 +32,13 @@ Usage: nomad operator snapshot save [options] <filename>
To create a potentially stale snapshot from any available server (useful if no
leader is available):
$ nomad snapshot save -stale backup.snap
This is useful for situations where a cluster is in a degraded state and no
leader is available. To target a specific server for a snapshot, you can run
the 'nomad operator snapshot save' command on that specific server.
General Options:
` + generalOptionsUsage() + `

4
website/.stylelintrc.js Normal file
View File

@@ -0,0 +1,4 @@
module.exports = {
...require('@hashicorp/nextjs-scripts/.stylelintrc.js'),
/* Specify overrides here */
}

View File

@@ -129,7 +129,11 @@ export default [
'keygen',
'keyring',
'raft-list-peers',
'raft-remove-peer'
'raft-remove-peer',
'snapshot-agent',
'snapshot-inspect',
'snapshot-restore',
'snapshot-save'
]
},
{ category: 'plugin', content: ['status'] },

View File

@@ -1,17 +1,20 @@
import DocsPage from '@hashicorp/react-docs-page'
import order from '../data/docs-navigation.js'
import { frontMatter } from '../pages/docs/**/*.mdx'
import { MDXProvider } from '@mdx-js/react'
import Placement from '../components/placement-table'
import Head from 'next/head'
import Link from 'next/link'
import DocsPage from '@hashicorp/react-docs-page'
import { createMdxProvider } from '@hashicorp/nextjs-scripts/lib/providers/docs'
import order from '../data/docs-navigation.js'
import { frontMatter } from '../pages/docs/**/*.mdx'
import Placement from '../components/placement-table'
const DEFAULT_COMPONENTS = { Placement }
const MDXProvider = createMdxProvider({
product: 'nomad',
additionalComponents: { Placement },
})
function DocsLayoutWrapper(pageMeta) {
function DocsLayout(props) {
return (
<MDXProvider components={DEFAULT_COMPONENTS}>
<MDXProvider>
<DocsPage
{...props}
product="nomad"
@@ -19,14 +22,14 @@ function DocsLayoutWrapper(pageMeta) {
is: Head,
title: `${pageMeta.page_title} | Nomad by HashiCorp`,
description: pageMeta.description,
siteName: 'Nomad by HashiCorp'
siteName: 'Nomad by HashiCorp',
}}
sidenav={{
Link,
category: 'docs',
currentPage: props.path,
data: frontMatter,
order
order,
}}
resourceURL={`https://github.com/hashicorp/nomad/blob/master/website/pages/${pageMeta.__resourcePath}`}
/>

1868
website/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@
"version": "0.0.1",
"author": "HashiCorp",
"dependencies": {
"@hashicorp/nextjs-scripts": "^7.1.2",
"@hashicorp/nextjs-scripts": "^10.0.2",
"@hashicorp/react-alert-banner": "^3.1.0",
"@hashicorp/react-button": "^2.2.0",
"@hashicorp/react-call-to-action": "^0.2.0",

View File

@@ -577,3 +577,83 @@ $ curl \
```
[`default_scheduler_config`]: /docs/configuration/server#default_scheduler_config
## Generate Snapshot
This endpoint generates and returns an atomic, point-in-time snapshot of the
Nomad server state for disaster recovery. Snapshots include all state managed by Nomad's
Raft [consensus protocol](/docs/internals/consensus).
Snapshots are exposed as gzipped tar archives which internally contain the Raft
metadata required to restore, as well as a binary serialized version of the
Nomad server state. The contents are covered internally by SHA-256 hashes.
These hashes are verified during snapshot restore operations. The structure of
the archive is internal to Nomad and not intended to be used other than for
restore operations. The archives are not designed to be modified before a
restore.
| Method | Path | Produces |
| :----- | :---------------------- | ------------------------ |
| `GET` | `/v1/operator/snapshot` | `200 application/x-gzip` |
The table below shows this endpoint's support for
[blocking queries](/api-docs#blocking-queries) and
[required ACLs](/api-docs#acls).
| Blocking Queries | ACL Required |
| ---------------- | ------------ |
| `NO` | `management` |
### Parameters
- `stale` - Specifies if the cluster should respond without an active leader.
This is specified as a query string parameter.
### Sample Request
```shell-session
$ curl \
-o snapshot.tgz \
http://127.0.0.1:4646/v1/operator/snapshot
```
The above example results in a tarball named `snapshot.tgz` in the current working directory.
## Restore Snapshot
This endpoint restores a point-in-time snapshot of the Nomad server state.
Restores involve a potentially dangerous low-level Raft operation that is not
designed to handle server failures during a restore. This operation is primarily
intended to be used when recovering from a disaster, restoring into a fresh
cluster of Nomad servers.
The body of the request should be a snapshot archive returned from a previous
call to the `GET` method.
| Method | Path | Produces |
| :----- | :---------------------- | ----------------------------- |
| `PUT` | `/v1/operator/snapshot` | `200 text/plain (empty body)` |
The table below shows this endpoint's support for
[blocking queries](/api-docs#blocking-queries) and
[required ACLs](/api-docs#acls).
| Blocking Queries | ACL Required |
| ---------------- | ------------ |
| `NO` | `management` |
### Sample Request
```shell-session
$ curl \
--request PUT \
--data-binary @snapshot.tgz \
http://127.0.0.1:4646/v1/operator/snapshot
```
~> Some tools default to www/encoded uploads. Nomad expects the snapshot to be
in pure binary form.

View File

@@ -41,6 +41,14 @@ The following subcommands are available:
- [`operator raft remove-peer`][remove] - Remove a Nomad server from the Raft
configuration
- [`operator snapshot agent`][snapshot-agent] <EnterpriseAlert inline /> - Inspects a snapshot of the Nomad server state
- [`operator snapshot save`][snapshot-save] - Saves a snapshot of the Nomad server state
- [`operator snapshot restore`][snapshot-restore] - Restores a snapshot of the Nomad server state
- [`operator snapshot inspect`][snapshot-inspect] - Inspects a snapshot of the Nomad server state
[get-config]: /docs/commands/operator/autopilot-get-config 'Autopilot Get Config command'
[keygen]: /docs/commands/operator/keygen 'Generates a new encryption key'
[keyring]: /docs/commands/operator/keyring 'Manages gossip layer encryption keys'
@@ -49,3 +57,7 @@ The following subcommands are available:
[outage recovery guide]: https://learn.hashicorp.com/nomad/operating-nomad/outage
[remove]: /docs/commands/operator/raft-remove-peer 'Raft Remove Peer command'
[set-config]: /docs/commands/operator/autopilot-set-config 'Autopilot Set Config command'
[snapshot-save]: /docs/commands/operator/snapshot-save 'Snapshot Save command'
[snapshot-restore]: /docs/commands/operator/snapshot-restore 'Snapshot Restore command'
[snapshot-inspect]: /docs/commands/operator/snapshot-inspect 'Snapshot Inspect command'
[snapshot-agent]: /docs/commands/operator/snapshot-agent 'Snapshot Agent command'

View File

@@ -0,0 +1,149 @@
---
layout: docs
page_title: 'Commands: operator snapshot agent'
sidebar_title: snapshot agent
description: |
Periodically saves snapshots of Nomad server state
---
# Command: operator snapshot agent
<EnterpriseAlert />
The snapshot agent takes snapshots of the state of the nomad servers and
saves them locally, or pushes them to an optional remote storage service.
The agent can be run as a long-running daemon process or in a one-shot mode
from a batch job. As a long-running daemon, the agent will perform a leader
election so multiple processes can be run in a highly available fashion with
automatic failover. In daemon mode, the agent will also register itself with
Nomad as a service, along with health checks that show the agent is alive
and able to take snapshots.
If ACLs are enabled, a management token must be supplied in order to perform
snapshot operations.
The Config file has the following format (shown populated with default values):
```hcl
nomad {
http_addr = "127.0.0.1:8500"
token = ""
region = ""
ca_file = ""
ca_path = ""
cert_file = ""
key_file = ""
tls_server_name = ""
}
snapshot {
interval = "1h"
retain = 30
stale = false
service = "nomad-snapshot"
deregister_after = "72h"
lock_key = "nomad-snapshot/lock"
max_failures = 3
name_prefix = "nomad"
}
log {
level = "INFO"
enable_syslog = false
syslog_facility = "LOCAL0"
}
consul {
enabled = true
http_addr = "127.0.0.1:8500"
token = ""
datacenter = ""
ca_file = ""
ca_path = ""
cert_file = ""
key_file = ""
tls_server_name = ""
}
# one storage block is required
local_storage {
path = "."
}
aws_storage {
access_key_id = ""
secret_access_key = ""
s3_region = ""
s3_endpoint = ""
s3_bucket = ""
s3_key_prefix = "nomad-snapshot"
}
azure_blob_storage {
account_name = ""
account_key = ""
container_name = ""
}
google_storage {
bucket = ""
}
```
## Usage
```plaintext
nomad operator snapshot agent [options] <config_file>
```
## General Options
@include 'general_options.mdx'
## Snapshot agent Options
### Snapshot Options
- `-interval`: Interval at which to perform snapshots as a time with a unit suffix, which can be "s", "m", "h" for seconds, minutes, or hours. If 0 is provided, the agent will take a single snapshot and then exit, which is useful for running snapshots via batch jobs. Defaults to "1h".
- `-lock-key`: A prefix in Consul's key-value store used to coordinate between different instances of the snapshot agent in order to only have one active instance at a time. For highly available operation of the snapshot agent, simply run multiple instances. All instances must be configured with the same lock key in order to properly coordinate. Defaults to "nomad-snapshot/lock".
- `-max-failures`: Number of snapshot failures after which the snapshot agent will give up leadership. In a highly available operation with multiple snapshot agents available, this gives another agent a chance to take over if an agent is experiencing issues, such as running out of disk space for snapshots. Defaults to 3.
- `-retain`: Number of snapshots to retain. After each snapshot is taken, the oldest snapshots will start to be deleted in order to retain at most this many snapshots. If this is set to 0, the agent will not perform this and snapshots will accumulate forever. Defaults to 30.
### Agent Options
- `-deregister-after`: An interval, after which if the agent is unhealthy it will be automatically deregistered from Consul service. discovery. This is a time with a unit suffix, which can be "s", "m", "h" for seconds, minutes, or hours. If 0 is provided, this will be disabled. Defaults to "72h".
- `-log-level`: Controls verbosity of snapshot agent logs. Valid options are "TRACE", "DEBUG", "INFO", "WARN", "ERR". Defaults to "INFO".
- `-log-json`: Output logs in JSON format. Defaults to false.
- `-service`: The service name to used when registering the agent with Consul. Registering helps monitor running agents and the leader registers an additional health check to monitor that snapshots are taking place. Defaults to "nomad-snapshot".
- `-syslog`: This enables forwarding logs to syslog. Defaults to false.
- `-syslog-facility`: Sets the facility to use for forwarding logs to syslog. Defaults to "LOCAL0".
### Local Storage Options
- `-local-path`: Location to store snapshots locally. The default behavior of the snapshot agent is to store snapshots locally in this directory. Defaults to "." to use the current working directory. If an alternate storage option is configured, then local storage will be disabled and this option will be ignored.
### S3 Storage Options:
Note that despite the AWS references, any S3-compatible endpoint can be specified with '-aws-s3-endpoint'.
- `-aws-access-key-id`: These arguments supply authentication information for
- `-aws-secret-access-key`: connecting to S3. These may also be supplied using the following alternative methods:
- AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables
- A credentials file (~/.aws/credentials or the file at the path specified by the AWS_SHARED_CREDENTIALS_FILE environment variable)
- ECS task role metadata (container-specific)
- EC2 instance role metadata
- `-aws-s3-bucket`: S3 bucket to use. Required for S3 storage, and setting this disables local storage.
- `-aws-s3-key-prefix`: Prefix to use for snapshot files in S3. Defaults to "nomad-snapshot".
- `-aws-s3-region`: S3 region to use. Required for S3 storage.
- `-aws-s3-endpoint`: Optional S3 endpoint to use. Can also be specified using the AWS_S3_ENDPOINT environment variable.
- `-aws-s3-server-side-encryption`: Enables server side encryption with AES-256, when storing snapshots to S3. Defaults to false.
- `-aws-s3-static-snapshot-name`: Static file name to use for snapshot files. If this is set, snapshots are always saved with the same name, and are not versioned or rotated.
- `-aws-s3-enable-kms`: Enables using Amazon KMS for encrypting snapshots
- `-aws-s3-kms-key`: Optional KMS key to use, if this is not set the default KMS key will be used.
### Azure Blob Storage Options
(Note: Non-Solaris platforms only)
- `-azure-blob-account-name`: These arguments supply authentication information
- `-azure-blob-account_key`: for connecting to Azure Blob storage.
- `-azure-blob-container-name`: Container to use. Required for Azure blob storage, and setting this disables local storage.
- `-azure-blob-environment`: Environment to use. Defaults to AZUREPUBLICCLOUD. Other valid environments are AZURECHINACLOUD, AZUREGERMANCLOUD and AZUREUSGOVERNMENTCLOUD.

View File

@@ -0,0 +1,30 @@
---
layout: docs
page_title: 'Commands: operator snapshot inspect'
sidebar_title: snapshot inspect
description: |
Display the current Raft peer configuration.
---
# Command: operator snapshot inspect
Displays information about a snapshot file on disk.
To inspect the file "backup.snap":
```shell-session
$ nomad operator snapshot inspect backup.snap
ID 2-19-1592495928936
Size 3902
Index 19
Term 2
Version 1
```
## Usage
```plaintext
nomad operator snapshot inspect <file>
```
[outage recovery]: https://learn.hashicorp.com/nomad/operating-nomad/outage

View File

@@ -0,0 +1,38 @@
---
layout: docs
page_title: 'Commands: operator snapshot restore'
sidebar_title: snapshot restore
description: |
Restore snapshot of Nomad server state
---
# Command: operator snapshot restore
Restores an atomic, point-in-time snapshot of the state of the Nomad servers
which includes jobs, nodes, allocations, periodic jobs, and ACLs.
Restores involve a potentially dangerous low-level Raft operation that is not
designed to handle server failures during a restore. This command is primarily
intended to be used when recovering from a disaster, restoring into a fresh
cluster of Nomad servers.
If ACLs are enabled, a management token must be supplied in order to perform
snapshot operations.
To restore a snapshot from the file "backup.snap":
```shell-session
$ nomad operator snapshot restore backup.snap
```
## Usage
```plaintext
nomad operator snapshot restore [options] <file>
```
## General Options
@include 'general_options.mdx'
[outage recovery]: https://learn.hashicorp.com/nomad/operating-nomad/outage

View File

@@ -0,0 +1,48 @@
---
layout: docs
page_title: 'Commands: operator snapshot save'
sidebar_title: snapshot save
description: |
Saves snapshot of Nomad server state
---
# Command: operator snapshot save
Retrieves an atomic, point-in-time snapshot of the state of the Nomad servers
which includes jobs, nodes, allocations, periodic jobs, and ACLs for [outage
recovery].
If ACLs are enabled, a management token must be supplied in order to perform
snapshot operations.
To create a snapshot from the leader server and save it to "backup.snap":
```shell-session
$ nomad snapshot save backup.snap
```
To create a potentially stale snapshot from any available server (useful if no
leader is available):
```shell-session
$ nomad snapshot save backup.snap
```
## Usage
```plaintext
nomad operator snapshot save [options] <file>
```
## General Options
@include 'general_options.mdx'
## Snapshot Save Options
- `-stale`: The stale argument defaults to "false" which means the leader
provides the result. If the cluster is in an outage state without a leader, you
may need to set `-stale` to "true" to get the configuration from a non-leader
server.
[outage recovery]: https://learn.hashicorp.com/nomad/operating-nomad/outage

View File

@@ -12,28 +12,29 @@
}
/* Global Transpiled Components */
@import '~@hashicorp/react-mega-nav/style.css';
@import '~@hashicorp/react-text-input/dist/style.css';
@import '~@hashicorp/react-alert/dist/style.css';
@import '~@hashicorp/react-button/dist/style.css';
@import '~@hashicorp/react-use-cases/dist/style.css';
@import '~@hashicorp/react-consent-manager/dist/style.css';
@import '~@hashicorp/react-hero/dist/style.css';
@import '~@hashicorp/react-section-header/dist/style.css';
@import '~@hashicorp/react-logo-grid/dist/style.css';
@import '~@hashicorp/react-product-downloader/dist/style.css';
@import '~@hashicorp/react-vertical-text-block-list/dist/style.css';
@import '~@hashicorp/react-docs-sidenav/dist/style.css';
@import '~@hashicorp/react-content/dist/style.css';
@import '~@hashicorp/react-subnav/dist/style.css';
@import '~@hashicorp/react-text-and-content/dist/style.css';
@import '~@hashicorp/react-call-to-action/dist/style.css';
@import '~@hashicorp/react-code-block/dist/style.css';
@import '~@hashicorp/react-consent-manager/dist/style.css';
@import '~@hashicorp/react-toggle/dist/style.css';
@import '~@hashicorp/react-alert/dist/style.css';
@import '~@hashicorp/react-call-to-action/dist/style.css';
@import '~@hashicorp/react-text-split/dist/style.css';
@import '~@hashicorp/react-text-split-with-code/dist/style.css';
@import '~@hashicorp/react-content/dist/style.css';
@import '~@hashicorp/react-docs-page/dist/style.css';
@import '~@hashicorp/react-docs-sidenav/dist/style.css';
@import '~@hashicorp/react-enterprise-alert/dist/style.css';
@import '~@hashicorp/react-hero/dist/style.css';
@import '~@hashicorp/react-logo-grid/dist/style.css';
@import '~@hashicorp/react-mega-nav/style.css';
@import '~@hashicorp/react-product-downloader/dist/style.css';
@import '~@hashicorp/react-section-header/dist/style.css';
@import '~@hashicorp/react-subnav/dist/style.css';
@import '~@hashicorp/react-tabs/dist/style.css';
@import '~@hashicorp/react-text-and-content/dist/style.css';
@import '~@hashicorp/react-text-input/dist/style.css';
@import '~@hashicorp/react-text-split-with-code/dist/style.css';
@import '~@hashicorp/react-text-split/dist/style.css';
@import '~@hashicorp/react-toggle/dist/style.css';
@import '~@hashicorp/react-use-cases/dist/style.css';
@import '~@hashicorp/react-vertical-text-block-list/dist/style.css';
/* Local Components */
@import '../components/placement-table/style.css';
@@ -71,17 +72,17 @@
.g-section-block section {
padding-top: 96px;
padding-bottom: 96px;
}
.g-section-block section > .g-section-header + *,
.g-section-block section > .g-container > .g-section-header + * {
& > .g-section-header + *,
& > .g-container > .g-section-header + * {
margin-top: 72px;
}
.g-section-block section > * + *,
.g-section-block section > .g-container > * + * {
& > * + *,
& > .g-container > * + * {
margin-top: 96px;
}
}
.g-section-block .button-container {
display: -webkit-box;