Merge pull request #8869 from hashicorp/docs-move-ea-guides

Sunset older instances of EA Guides
This commit is contained in:
Charlie Voiselle
2020-09-15 12:49:35 -04:00
committed by GitHub
5 changed files with 93 additions and 432 deletions

View File

@@ -432,11 +432,7 @@
/guides/security/sentinel-policy https://learn.hashicorp.com/nomad/governance-and-policy/sentinel 301!
/guides/operations/install/index.html /docs/install 301!
/guides/operations/install/index /docs/install 301!
/guides/operations/deployment-guide.html /docs/install/production/deployment-guide 301!
/guides/operations/deployment-guide /docs/install/production/deployment-guide 301!
/guides/operations/agent/index.html /docs/install/production/nomad-agent 301!
/guides/operations/reference-architecture.html /docs/install/production/reference-architecture 301!
/guides/operations/reference-architecture /docs/install/production/reference-architecture 301!
/guides/operations/requirements.html /docs/install/production/requirements 301!
/guides/operations/requirements /docs/install/production/requirements 301!
/guides/operations/consul-integration/index.html /docs/integrations/consul-integration 301!
@@ -455,7 +451,12 @@
/guides/upgrade/upgrade-specific.html /docs/upgrade/upgrade-specific 301!
/guides/upgrade/upgrade-specific /docs/upgrade/upgrade-specific 301!
/guides/operations/deployment-guide.html https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul 301!
/guides/operations/deployment-guide https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul 301!
/guides/operations/reference-architecture.html https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul 301!
/guides/operations/reference-architecture https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul 301!
/docs/install/production/deployment-guide https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul 301!
/docs/install/production/reference-architecture https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul 301!
# Enterprise

View File

@@ -15,32 +15,43 @@ export default [
content: [
'requirements',
'nomad-agent',
'reference-architecture',
'deployment-guide'
]
{
title: 'Reference Architecture',
href:
'https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul',
},
{
title: 'Deployment Guide',
href:
'https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul',
},
],
},
'windows-service'
]
'windows-service',
],
},
{ category: 'upgrade', content: ['upgrade-specific'] },
{
category: 'integrations',
content: ['consul-integration', 'consul-connect', 'vault-integration']
content: ['consul-integration', 'consul-connect', 'vault-integration'],
},
'-----------',
{
category: 'internals',
content: [
'architecture',
{ category: 'plugins', content: ['base', 'task-drivers', 'devices', 'csi'] },
{
category: 'plugins',
content: ['base', 'task-drivers', 'devices', 'csi'],
},
{
category: 'scheduling',
content: ['scheduling', 'preemption']
content: ['scheduling', 'preemption'],
},
'consensus',
'gossip',
'security'
]
'security',
],
},
{
category: 'configuration',
@@ -56,8 +67,8 @@ export default [
'server_join',
'telemetry',
'tls',
'vault'
]
'vault',
],
},
{
category: 'commands',
@@ -75,18 +86,26 @@ export default [
'token-info',
'token-list',
'token-self',
'token-update'
]
'token-update',
],
},
'agent',
'agent-info',
{
category: 'alloc',
content: ['exec', 'fs', 'logs', 'restart', 'signal', 'status', 'stop']
content: ['exec', 'fs', 'logs', 'restart', 'signal', 'status', 'stop'],
},
{
category: 'deployment',
content: ['fail', 'list', 'pause', 'promote', 'resume', 'status', 'unblock']
content: [
'fail',
'list',
'pause',
'promote',
'resume',
'status',
'unblock',
],
},
'eval-status',
{
@@ -105,21 +124,21 @@ export default [
'run',
'status',
'stop',
'validate'
]
'validate',
],
},
{
category: 'license',
content: ['get', 'put']
content: ['get', 'put'],
},
'monitor',
{
category: 'namespace',
content: ['apply', 'delete', 'inspect', 'list', 'status']
content: ['apply', 'delete', 'inspect', 'list', 'status'],
},
{
category: 'node',
content: ['config', 'drain', 'eligibility', 'status']
content: ['config', 'drain', 'eligibility', 'status'],
},
{
category: 'operator',
@@ -134,13 +153,13 @@ export default [
'snapshot-agent',
'snapshot-inspect',
'snapshot-restore',
'snapshot-save'
]
'snapshot-save',
],
},
{ category: 'plugin', content: ['status'] },
{
category: 'quota',
content: ['apply', 'delete', 'init', 'inspect', 'list', 'status']
content: ['apply', 'delete', 'init', 'inspect', 'list', 'status'],
},
{ category: 'sentinel', content: ['apply', 'delete', 'list', 'read'] },
{ category: 'server', content: ['force-leave', 'join', 'members'] },
@@ -148,8 +167,11 @@ export default [
{ category: 'system', content: ['gc', 'reconcile-summaries'] },
'ui',
'version',
{ category: 'volume', content: ['deregister', 'detach', 'status', 'register'] }
]
{
category: 'volume',
content: ['deregister', 'detach', 'status', 'register'],
},
],
},
'----------',
{
@@ -192,8 +214,8 @@ export default [
'upstreams',
'vault',
'volume',
'volume_mount'
]
'volume_mount',
],
},
{
category: 'drivers',
@@ -215,14 +237,14 @@ export default [
'rkt',
'singularity',
'nspawn',
'iis'
]
}
]
'iis',
],
},
],
},
{
category: 'devices',
content: ['nvidia', 'community']
content: ['nvidia', 'community'],
},
'schedulers',
{ category: 'runtime', content: ['environment', 'interpolation'] },
@@ -236,22 +258,16 @@ export default [
'telemetry',
{
category: 'plugins',
content: [
'apm',
'strategy',
'target'
]
content: ['apm', 'strategy', 'target'],
},
{
category: 'internals',
content: [
'checks'
]
}
]
content: ['checks'],
},
],
},
{ category: 'telemetry', content: ['metrics'] },
'------------',
{ category: 'enterprise' },
'faq'
'faq',
]

View File

@@ -1,229 +0,0 @@
---
layout: docs
page_title: Deployment Guide
sidebar_title: Reference Install Guide
description: |-
This deployment guide covers the steps required to install and
configure a single HashiCorp Nomad cluster as defined in the
Nomad Reference Architecture
ea_version: 0.9
---
# Nomad Reference Install Guide
This deployment guide covers the steps required to install and configure a single HashiCorp Nomad cluster as defined in the [Nomad Reference Architecture](/docs/install/production/reference-architecture).
These instructions are for installing and configuring Nomad on Linux hosts running the systemd system and service manager.
## Reference Material
This deployment guide is designed to work in combination with the [Nomad Reference Architecture](/docs/install/production/reference-architecture) and [Consul Deployment Guide](https://www.consul.io/docs/guides/deployment-guide.html). Although it is not a strict requirement to follow the Nomad Reference Architecture, please ensure you are familiar with the overall architecture design. For example, installing Nomad server agents on multiple physical or virtual (with correct anti-affinity) hosts for high-availability.
## Overview
To provide a highly-available single cluster architecture, we recommend Nomad server agents be deployed to more than one host, as shown in the [Nomad Reference Architecture](/docs/install/production/reference-architecture).
![Reference diagram](/img/nomad_reference_diagram.png)
These setup steps should be completed on all Nomad hosts:
- [Download Nomad](#download-nomad)
- [Install Nomad](#install-nomad)
- [Configure systemd](#configure-systemd)
- [Configure Nomad](#configure-nomad)
- [Start Nomad](#start-nomad)
## Download Nomad
Precompiled Nomad binaries are available for download at [https://releases.hashicorp.com/nomad/](https://releases.hashicorp.com/nomad/) and Nomad Enterprise binaries are available for download by following the instructions made available to HashiCorp Enterprise customers.
```text
export NOMAD_VERSION="0.9.0"
curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip
```
You may perform checksum verification of the zip packages using the SHA256SUMS and SHA256SUMS.sig files available for the specific release version. HashiCorp provides [a guide on checksum verification](https://www.hashicorp.com/security) for precompiled binaries.
## Install Nomad
Unzip the downloaded package and move the `nomad` binary to `/usr/local/bin/`. Check `nomad` is available on the system path.
```text
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
sudo chown root:root nomad
sudo mv nomad /usr/local/bin/
nomad version
```
The `nomad` command features opt-in autocompletion for flags, subcommands, and arguments (where supported). Enable autocompletion.
```text
nomad -autocomplete-install
complete -C /usr/local/bin/nomad nomad
```
Create a data directory for Nomad.
```text
sudo mkdir --parents /opt/nomad
```
## Configure systemd
Systemd uses [documented sane defaults](https://www.freedesktop.org/software/systemd/man/systemd.directives.html) so only non-default values must be set in the configuration file.
Create a Nomad service file at `/etc/systemd/system/nomad.service`.
```text
sudo touch /etc/systemd/system/nomad.service
```
Add this configuration to the Nomad service file:
```text
[Unit]
Description=Nomad
Documentation=https://nomadproject.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d
KillMode=process
KillSignal=SIGINT
LimitNOFILE=infinity
LimitNPROC=infinity
Restart=on-failure
RestartSec=2
StartLimitBurst=3
StartLimitIntervalSec=10
TasksMax=infinity
[Install]
WantedBy=multi-user.target
```
The following parameters are set for the `[Unit]` stanza:
- [`Description`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Description=) - Free-form string describing the nomad service
- [`Documentation`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Documentation=) - Link to the nomad documentation
- [`Wants`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants=) - Configure a dependency on the network service
- [`After`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#After=) - Configure an ordering dependency on the network service being started before the nomad service
The following parameters are set for the `[Service]` stanza:
- [`ExecReload`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecReload=) - Send Nomad a `SIGHUP` signal to trigger a configuration reload
- [`ExecStart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart=) - Start Nomad with the `agent` argument and path to a directory of configuration files
- [`KillMode`](https://www.freedesktop.org/software/systemd/man/systemd.kill.html#KillMode=) - Treat nomad as a single process
- [`LimitNOFILE`, `LimitNPROC`](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Properties) - Disable limits for file descriptors and processes
- [`RestartSec`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#RestartSec=) - Restart nomad after 2 seconds of it being considered 'failed'
- [`Restart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#Restart=) - Restart nomad unless it returned a clean exit code
- [`StartLimitBurst`, `StartLimitIntervalSec`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#StartLimitIntervalSec=interval) - Configure unit start rate limiting
- [`TasksMax`](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#TasksMax=N) - Disable task limits (only available in systemd >= 226)
The following parameters are set for the `[Install]` stanza:
- [`WantedBy`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#WantedBy=) - Creates a weak dependency on nomad being started by the multi-user run level
## Configure Nomad
Nomad uses [documented sane defaults](/docs/configuration) so only non-default values must be set in the configuration file. Configuration can be read from multiple files and is loaded in lexical order. See the [full description](/docs/configuration) for more information about configuration loading and merge semantics.
Some configuration settings are common to both server and client Nomad agents, while some configuration settings must only exist on one or the other. Follow the [common configuration](#common-configuration) guidance on all hosts and then the specific guidance depending on whether you are configuring a Nomad [server](#server-configuration) or [client](#client-configuration).
- [Common Nomad configuration](#common-configuration)
- [Configure a Nomad server](#server-configuration)
- [Configure a Nomad client](#client-configuration)
### Common configuration
Create a configuration file at `/etc/nomad.d/nomad.hcl`:
```text
sudo mkdir --parents /etc/nomad.d
sudo chmod 700 /etc/nomad.d
sudo touch /etc/nomad.d/nomad.hcl
```
Add this configuration to the `nomad.hcl` configuration file:
~> **Note:** Replace the `datacenter` parameter value with the identifier you will use for the datacenter this Nomad cluster is deployed in.
```hcl
datacenter = "dc1"
data_dir = "/opt/nomad"
```
- [`datacenter`](/docs/configuration#datacenter) - The datacenter in which the agent is running.
- [`data_dir`](/docs/configuration#data_dir) - The data directory for the agent to store state.
### Server configuration
Create a configuration file at `/etc/nomad.d/server.hcl`:
```text
sudo touch /etc/nomad.d/server.hcl
```
Add this configuration to the `server.hcl` configuration file:
~> **NOTE** Replace the `bootstrap_expect` value with the number of Nomad servers you will use; three or five [is recommended](/docs/internals/consensus#deployment-table).
```hcl
server {
enabled = true
bootstrap_expect = 3
}
```
- [`server`](/docs/configuration/server#enabled) - Specifies if this agent should run in server mode. All other server options depend on this value being set.
- [`bootstrap_expect`](/docs/configuration/server#bootstrap_expect) - The number of expected servers in the cluster. Either this value should not be provided or the value must agree with other servers in the cluster.
### Client configuration
Create a configuration file at `/etc/nomad.d/client.hcl`:
```text
sudo touch /etc/nomad.d/client.hcl
```
Add this configuration to the `client.hcl` configuration file:
```hcl
client {
enabled = true
}
```
- [`client`](/docs/configuration/client#enabled) - Specifies if this agent should run in client mode. All other client options depend on this value being set.
~> **NOTE** The [`options`](/docs/configuration/client#options-parameters) parameter can be used to enable or disable specific configurations on Nomad clients, unique to your use case requirements.
### ACL configuration
The [Access Control](https://learn.hashicorp.com/collections/nomad/access-control) guide provides instructions on configuring and enabling ACLs.
### TLS configuration
Securing Nomad's cluster communication with mutual TLS (mTLS) is recommended for production deployments and can even ease operations by preventing mistakes and misconfigurations. Nomad clients and servers should not be publicly accessible without mTLS enabled.
The [Securing Nomad with TLS](https://learn.hashicorp.com/nomad/transport-security/enable-tls) guide provides instructions on configuring and enabling TLS.
## Start Nomad
Enable and start Nomad using the systemctl command responsible for controlling systemd managed services. Check the status of the nomad service using systemctl.
```text
sudo systemctl enable nomad
sudo systemctl start nomad
sudo systemctl status nomad
```
## Next Steps
- Read [Outage Recovery](https://learn.hashicorp.com/nomad/operating-nomad/outage) to learn
the steps required to recover from a Nomad cluster outage.
- Read [Autopilot](https://learn.hashicorp.com/nomad/operating-nomad/autopilot) to learn about
features in Nomad 0.8 to allow for automatic operator-friendly
management of Nomad servers.

View File

@@ -7,34 +7,41 @@ description: Learn how to install Nomad for Production.
# Installing Nomad for Production
This section covers how to install Nomad for production.
While HashiCorp Nomad provides a low-friction practitioner experience out of
the box, there are a few critical steps to take for a successful production
Nomad deployment.
There are multiple steps to cover for a successful Nomad deployment:
## Explore the Reference Architecture and Installation Guide
## Installing Nomad
Learn more about recommended practices and explore a reference architecture for
deploying HashiCorp Nomad in production.
This page lists the two primary methods to installing Nomad and how to verify a successful installation.
- [Nomad Reference Architecture][] - Learn recommended practices and a reference
architecture for Nomad production deployments. This reference architecture
conveys a general architecture. Adapt it to accommodate the specific needs
of your implementation.
Please refer to [Installing Nomad](/docs/install) sub-section.
- [Nomad Deployment Guide][] - Follow along with an end-to-end outline of the
steps required to install a single production-ready Nomad cluster as defined
in the Reference Architecture section.
## Hardware Requirements
## Verify Hardware Requirements
This page details the recommended machine resources (instances), port requirements, and network topology for Nomad.
Review the recommended machine resources (instances), port requirements, and
network topology for Nomad in the [Hardware Requirements](/docs/install/production/requirements).
Please refer to [Hardware Requirements](/docs/install/production/requirements) sub-section.
## Install Nomad
## Setting Nodes with Nomad Agent
These pages explain the Nomad agent process and how to set the server and client nodes in the cluster.
Visit the [Installing Nomad](/docs/install) page to learn the options
available for installing Nomad and how to verify a successful
installation.
Please refer to [Set Server & Client Nodes](/docs/install/production/nomad-agent) and [Nomad Agent documentation](/docs/commands/agent) pages.
## Configure your Nomad Servers and Clients
## Reference Architecture
Refer to the [Set Server & Client Nodes](/docs/install/production/nomad-agent)
and [Nomad Agent documentation](/docs/commands/agent) pages to learn about the
Nomad agent process and how to configure the server and client nodes in your
cluster.
This document provides recommended practices and a reference architecture for HashiCorp Nomad production deployments. This reference architecture conveys a general architecture that should be adapted to accommodate the specific needs of each implementation.
Please refer to [Reference Architecture](/docs/install/production/reference-architecture) sub-section.
## Install Guide Based on Reference Architecture
This guide provides an end-to-end walkthrough of the steps required to install a single production-ready Nomad cluster as defined in the Reference Architecture section.
Please refer to [Reference Install Guide](/docs/install/production/deployment-guide) sub-section.
[Nomad Reference Architecture]: https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul
[Nomad Deployment Guide]: https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul

View File

@@ -1,134 +0,0 @@
---
layout: docs
page_title: Nomad Reference Architecture
sidebar_title: Reference Architecture
description: |-
This document provides recommended practices and a reference
architecture for HashiCorp Nomad production deployments.
ea_version: 0.9
---
# Nomad Reference Architecture
This document provides recommended practices and a reference architecture for HashiCorp Nomad production deployments. This reference architecture conveys a general architecture that should be adapted to accommodate the specific needs of each implementation.
The following topics are addressed:
- [Reference Architecture](#ra)
- [Deployment Topology within a Single Region](#one-region)
- [Deployment Topology across Multiple Regions](#multi-region)
- [Network Connectivity Details](#net)
- [Deployment System Requirements](#system-reqs)
- [High Availability](#high-availability)
- [Failure Scenarios](#failure-scenarios)
This document describes deploying a Nomad cluster in combination with, or with access to, a [Consul cluster](/docs/integrations/consul-integration). We recommend the use of Consul with Nomad to provide automatic clustering, service discovery, health checking and dynamic configuration.
## Reference Architecture ((#ra))
A Nomad cluster typically comprises three or five servers (but no more than seven) and a number of client agents. Nomad differs slightly from Consul in that it divides infrastructure into regions which are served by one Nomad server cluster, but can manage multiple datacenters or availability zones. For example, a _US Region_ can include datacenters _us-east-1_ and _us-west-2_.
In a Nomad multi-region architecture, communication happens via [WAN gossip](/docs/internals/gossip). Additionally, Nomad can integrate easily with Consul to provide features such as automatic clustering, service discovery, and dynamic configurations. Thus we recommend you use Consul in your Nomad deployment to simplify the deployment.
In cloud environments, a single cluster may be deployed across multiple availability zones. For example, in AWS each Nomad server can be deployed to an associated EC2 instance, and those EC2 instances distributed across multiple AZs. Similarly, Nomad server clusters can be deployed to multiple cloud regions to allow for region level HA scenarios.
For more information on Nomad server cluster design, see the [cluster requirements documentation](/docs/install/production/requirements).
The design shared in this document is the recommended architecture for production environments, as it provides flexibility and resilience. Nomad utilizes an existing Consul server cluster; however, the deployment design of the Consul server cluster is outside the scope of this document.
Nomad to Consul connectivity is over HTTP and should be secured with TLS as well as a Consul token to provide encryption of all traffic. This is done using Nomad's [Automatic Clustering with Consul](https://learn.hashicorp.com/nomad/operating-nomad/clustering).
### Deployment Topology within a Single Region ((#one-region))
A single Nomad cluster is recommended for applications deployed in the same region.
Each cluster is expected to have either three or five servers. This strikes a balance between availability in the case of failure and performance, as [Raft](https://raft.github.io/) consensus gets progressively slower as more servers are added.
The time taken by a new server to join an existing large cluster may increase as the size of the cluster increases.
#### Reference Diagram
![Reference diagram](/img/nomad_reference_diagram.png)
### Deployment Topology across Multiple Regions ((#multi-region))
By deploying Nomad server clusters in multiple regions, the user is able to interact with the Nomad servers by targeting any region from any Nomad server even if that server resides in a separate region. However, most data is not replicated between regions as they are fully independent clusters. The exceptions are [ACL tokens and policies][acl], as well as [Sentinel policies in Nomad Enterprise][sentinel], which _are_ replicated between regions.
Nomad server clusters in different datacenters can be federated using WAN links. The server clusters can be joined to communicate over the WAN on port `4648`. This same port is used for single datacenter deployments over LAN as well.
Additional documentation is available to learn more about [Nomad server federation](https://learn.hashicorp.com/nomad/operating-nomad/federation).
## Network Connectivity Details ((#net))
![Nomad network diagram](/img/nomad_network_arch.png)
Nomad servers are expected to be able to communicate in high bandwidth, low latency network environments and have below 10 millisecond latencies between cluster members. Nomad servers can be spread across cloud regions or datacenters if they satisfy these latency requirements.
Nomad client clusters require the ability to receive traffic as noted above in the Network Connectivity Details; however, clients can be separated into any type of infrastructure (multi-cloud, on-prem, virtual, bare metal, etc.) as long as they are reachable and can receive job requests from the Nomad servers.
Additional documentation is available to learn more about [Nomad networking](/docs/install/production/requirements#network-topology).
## Deployment System Requirements ((#system-reqs))
Nomad server agents are responsible for maintaining the cluster state, responding to RPC queries (read operations), and for processing all write operations. Given that Nomad server agents do most of the heavy lifting, server sizing is critical for the overall performance efficiency and health of the Nomad cluster.
### Nomad Servers
| Size | CPU | Memory | Disk | Typical Cloud Instance Types |
| ----- | -------- | ------------ | ------ | ----------------------------------------- |
| Small | 2 core | 8-16 GB RAM | 50 GB | **AWS:** m5.large, m5.xlarge |
| | | | | **Azure:** Standard_D2_v3, Standard_D4_v3 |
| | | | | **GCE:** n1-standard-8, n1-standard-16 |
| Large | 4-8 core | 32-64 GB RAM | 100 GB | **AWS:** m5.2xlarge, m5.2xlarge |
| | | | | **Azure:** Standard_D4_v3, Standard_D8_v3 |
| | | | | **GCE:** n1-standard-16, n1-standard-32 |
#### Hardware Sizing Considerations
- The small size would be appropriate for most initial production
deployments, or for development/testing environments.
- The large size is for production environments where there is a
consistently high workload.
~> **NOTE** For large workloads, ensure that the disks support a high number of IOPS to keep up with the rapid Raft log update rate.
Nomad clients can be setup with specialized workloads as well. For example, if workloads require GPU processing, a Nomad datacenter can be created to serve those GPU specific jobs and joined to a Nomad server cluster. For more information on specialized workloads, see the documentation on [job constraints](/docs/job-specification/constraint) to target specific client nodes.
## High Availability
A Nomad server cluster is the highly-available unit of deployment within a single datacenter. A recommended approach is to deploy a three or five node Nomad server cluster. With this configuration, during a Nomad server outage, failover is handled immediately without human intervention.
When setting up high availability across regions, multiple Nomad server clusters are deployed and connected via WAN gossip. Nomad clusters in regions are fully independent from each other and do not share jobs, clients, or state. Data residing in a single region-specific cluster is not replicated to other clusters in other regions.
## Failure Scenarios
Typical distribution in a cloud environment is to spread Nomad server nodes into separate Availability Zones (AZs) within a high bandwidth, low latency network, such as an AWS Region. The diagram below shows Nomad servers deployed in multiple AZs promoting a single voting member per AZ and providing both AZ-level and node-level failure protection.
![Nomad fault tolerance](/img/nomad_fault_tolerance.png)
Additional documentation is available to learn more about [cluster sizing and failure tolerances](/docs/internals/consensus#deployment-table) as well as [outage recovery](https://learn.hashicorp.com/nomad/operating-nomad/outage).
### Availability Zone Failure
In the event of a single AZ failure, only a single Nomad server will be affected which would not impact job scheduling as long as there is still a Raft quorum (i.e. 2 available servers in a 3 server cluster, 3 available servers in a 5 server cluster, etc.). There are two scenarios that could occur should an AZ fail in a multiple AZ setup: leader loss or follower loss.
#### Leader Server Loss
If the AZ containing the Nomad leader server fails, the remaining quorum members would elect a new leader. The new leader then begins to accept new log entries and replicates these entries to the remaining followers.
#### Follower Server Loss
If the AZ containing a Nomad follower server fails, there is no immediate impact to the Nomad leader server or cluster operations. However, there still must be a Raft quorum in order to properly manage a future failure of the Nomad leader server.
### Region Failure
In the event of a region-level failure (which would contain an entire Nomad server cluster), clients will still be able to submit jobs to another region that is properly federated. However, there will likely be data loss as Nomad server clusters do not replicate their data to other region clusters. See [Multi-region Federation](https://learn.hashicorp.com/nomad/operating-nomad/federation) for more setup information.
## Next Steps
- Read [Deployment Guide](/docs/install/production/deployment-guide) to learn
the steps required to install and configure a single HashiCorp Nomad cluster.
[acl]: https://learn.hashicorp.com/nomad?track=acls#operations-and-development
[sentinel]: https://learn.hashicorp.com/nomad/governance-and-policy/sentinel