From 475bcaafd6a50c6164338f00391d915349ec11f9 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 19 Mar 2019 15:18:12 -0700 Subject: [PATCH 1/3] docs: sync systemd unit files; update deploy guide The systemd configs spread across our repo were fairly out of sync. This should get them on our best practices. The deployment guide also had some strange things like running Nomad as a non-root user. It would be fine for servers but completely breaks clients. For simplicity I simply removed the non-root user references. --- dist/systemd/nomad.service | 12 +++-- e2e/terraform/shared/config/nomad.service | 15 ++++-- terraform/shared/config/nomad.service | 25 ++++++--- .../operations/deployment-guide.html.md | 54 ++++++++----------- 4 files changed, 57 insertions(+), 49 deletions(-) diff --git a/dist/systemd/nomad.service b/dist/systemd/nomad.service index 771641978..7e7f74e65 100644 --- a/dist/systemd/nomad.service +++ b/dist/systemd/nomad.service @@ -4,21 +4,23 @@ Documentation=https://nomadproject.io/docs/ Wants=network-online.target After=network-online.target -# If you are running Consul, please uncomment following Wants/After configs. -# Assuming your Consul service unit name is "consul" +# When using Nomad with Consul it is not necessary to start Consul first. These +# lines start Consul before Nomad as an optimization to avoid Nomad logging +# that Consul is unavailable at startup. #Wants=consul.service #After=consul.service [Service] +ExecReload=/bin/kill -HUP $MAINPID +ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d KillMode=process KillSignal=SIGINT -ExecStart=/usr/bin/nomad agent -config /etc/nomad -ExecReload=/bin/kill -HUP $MAINPID +LimitNOFILE=infinity +LimitNPROC=infinity Restart=on-failure RestartSec=2 StartLimitBurst=3 StartLimitIntervalSec=10 -LimitNOFILE=65536 TasksMax=infinity [Install] diff --git a/e2e/terraform/shared/config/nomad.service b/e2e/terraform/shared/config/nomad.service index a7f977494..995d055e7 100644 --- a/e2e/terraform/shared/config/nomad.service +++ b/e2e/terraform/shared/config/nomad.service @@ -4,12 +4,17 @@ Requires=network-online.target After=network-online.target [Service] -Restart=on-failure -ExecStart=/usr/local/bin/nomad agent -config="/etc/nomad.d/nomad.hcl" ExecReload=/bin/kill -HUP $MAINPID -KillSignal=SIGTERM -User=root -Group=root +ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d +KillMode=process +KillSignal=SIGINT +LimitNOFILE=infinity +LimitNPROC=infinity +Restart=on-failure +RestartSec=2 +StartLimitBurst=3 +StartLimitIntervalSec=10 +TasksMax=infinity [Install] WantedBy=multi-user.target diff --git a/terraform/shared/config/nomad.service b/terraform/shared/config/nomad.service index a7f977494..0857c5a0d 100644 --- a/terraform/shared/config/nomad.service +++ b/terraform/shared/config/nomad.service @@ -1,15 +1,26 @@ [Unit] -Description=Nomad Agent -Requires=network-online.target +Description=Nomad +Documentation=https://nomadproject.io/docs/ +Wants=network-online.target After=network-online.target +# If you are running Consul, please uncomment following Wants/After configs. +# Assuming your Consul service unit name is "consul" +#Wants=consul.service +#After=consul.service + [Service] -Restart=on-failure -ExecStart=/usr/local/bin/nomad agent -config="/etc/nomad.d/nomad.hcl" ExecReload=/bin/kill -HUP $MAINPID -KillSignal=SIGTERM -User=root -Group=root +ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d +KillMode=process +KillSignal=SIGINT +LimitNOFILE=infinity +LimitNPROC=infinity +Restart=on-failure +RestartSec=2 +StartLimitBurst=3 +StartLimitIntervalSec=10 +TasksMax=infinity [Install] WantedBy=multi-user.target diff --git a/website/source/guides/operations/deployment-guide.html.md b/website/source/guides/operations/deployment-guide.html.md index 6000e3b85..639885f32 100644 --- a/website/source/guides/operations/deployment-guide.html.md +++ b/website/source/guides/operations/deployment-guide.html.md @@ -40,7 +40,7 @@ Precompiled Nomad binaries are available for download at [https://releases.hashi You should perform checksum verification of the zip packages using the SHA256SUMS and SHA256SUMS.sig files available for the specific release version. HashiCorp provides [a guide on checksum verification](https://www.hashicorp.com/security.html) for precompiled binaries. ```text -NOMAD_VERSION="0.8.4" +export NOMAD_VERSION="0.8.7" curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS.sig @@ -54,7 +54,7 @@ Unzip the downloaded package and move the `nomad` binary to `/usr/local/bin/`. C unzip nomad_${NOMAD_VERSION}_linux_amd64.zip sudo chown root:root nomad sudo mv nomad /usr/local/bin/ -nomad --version +nomad version ``` The `nomad` command features opt-in autocompletion for flags, subcommands, and arguments (where supported). Enable autocompletion. @@ -64,19 +64,17 @@ nomad -autocomplete-install complete -C /usr/local/bin/nomad nomad ``` -Create a unique, non-privileged system user to run Nomad and create its data directory. +Create a data directory for Nomad. ```text -sudo useradd --system --home /etc/nomad.d --shell /bin/false nomad sudo mkdir --parents /opt/nomad -sudo chown --recursive nomad:nomad /opt/nomad ``` ## Configure systemd Systemd uses [documented sane defaults](https://www.freedesktop.org/software/systemd/man/systemd.directives.html) so only non-default values must be set in the configuration file. -Create a Nomad service file at /etc/systemd/system/nomad.service. +Create a Nomad service file at `/etc/systemd/system/nomad.service`. ```text sudo touch /etc/systemd/system/nomad.service @@ -86,23 +84,23 @@ Add this configuration to the Nomad service file: ```text [Unit] -Description="HashiCorp Nomad - An application and service scheduler" -Documentation=https://www.nomad.io/docs/ -Requires=network-online.target +Description=Nomad +Documentation=https://nomadproject.io/docs/ +Wants=network-online.target After=network-online.target -ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl [Service] -User=nomad -Group=nomad -ExecStart=/usr/local/bin/nomad agent -config=/etc/nomad.d/ -ExecReload=/bin/kill --signal HUP $MAINPID +ExecReload=/bin/kill -HUP $MAINPID +ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d KillMode=process +KillSignal=SIGINT +LimitNOFILE=infinity +LimitNPROC=infinity Restart=on-failure RestartSec=2 StartLimitBurst=3 StartLimitIntervalSec=10 -LimitNOFILE=65536 +TasksMax=infinity [Install] WantedBy=multi-user.target @@ -112,20 +110,19 @@ The following parameters are set for the `[Unit]` stanza: - [`Description`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Description=) - Free-form string describing the nomad service - [`Documentation`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Documentation=) - Link to the nomad documentation -- [`Requires`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requires=) - Configure a requirement dependency on the network service -- [`After`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Before=) - Configure an ordering dependency on the network service being started before the nomad service -- [`ConditionFileNotEmpty`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#ConditionArchitecture=) - Check for a non-zero sized configuration file before nomad is started +- [`Wants`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants=) - Configure a dependency on the network service +- [`After`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#After=) - Configure an ordering dependency on the network service being started before the nomad service The following parameters are set for the `[Service]` stanza: -- [`User`, `Group`](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#User=) - Run nomad as the nomad user -- [`ExecStart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart=) - Start nomad with the `agent` argument and path to the configuration file -- [`ExecReload`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecReload=) - Send nomad a SIGHUP signal to trigger a configuration reload in nomad +- [`ExecReload`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecReload=) - Send Nomad a `SIGHUP` signal to trigger a configuration reload +- [`ExecStart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart=) - Start Nomad with the `agent` argument and path to a directory of configuration files - [`KillMode`](https://www.freedesktop.org/software/systemd/man/systemd.kill.html#KillMode=) - Treat nomad as a single process -- [`Restart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#Restart=) - Restart nomad unless it returned a clean exit code +- [`LimitNOFILE`, `LimitNPROC`](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Properties) - Disable limits for file descriptors and processes - [`RestartSec`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#RestartSec=) - Restart nomad after 2 seconds of it being considered 'failed' +- [`Restart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#Restart=) - Restart nomad unless it returned a clean exit code - [`StartLimitBurst`, `StartLimitIntervalSec`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#StartLimitIntervalSec=interval) - Configure unit start rate limiting -- [`LimitNOFILE`](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Properties) - Set an increased Limit for File Descriptors +- [`TasksMax`](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#TasksMax=N) - Disable task limits (only available in systemd >= 226) The following parameters are set for the `[Install]` stanza: @@ -147,9 +144,8 @@ Create a configuration file at `/etc/nomad.d/nomad.hcl`: ```text sudo mkdir --parents /etc/nomad.d +sudo chmod 700 /etc/nomad.d sudo touch /etc/nomad.d/nomad.hcl -sudo chown --recursive nomad:nomad /etc/nomad.d -sudo chmod 640 /etc/nomad.d/nomad.hcl ``` Add this configuration to the `nomad.hcl` configuration file: @@ -169,10 +165,7 @@ data_dir = "/opt/nomad" Create a configuration file at `/etc/nomad.d/server.hcl`: ```text -sudo mkdir --parents /etc/nomad.d sudo touch /etc/nomad.d/server.hcl -sudo chown --recursive nomad:nomad /etc/nomad.d -sudo chmod 640 /etc/nomad.d/server.hcl ``` Add this configuration to the `server.hcl` configuration file: @@ -187,17 +180,14 @@ server { ``` - [`server`](/docs/configuration/server.html#enabled) - Specifies if this agent should run in server mode. All other server options depend on this value being set. -- [`bootstrap-expect`](/docs/configuration/server.html#bootstrap_expect) - This flag provides the number of expected servers in the datacenter. Either this value should not be provided or the value must agree with other servers in the cluster. +- [`bootstrap_expect`](/docs/configuration/server.html#bootstrap_expect) - The number of expected servers in the datacenter. Either this value should not be provided or the value must agree with other servers in the cluster. ### Client configuration Create a configuration file at `/etc/nomad.d/client.hcl`: ```text -sudo mkdir --parents /etc/nomad.d sudo touch /etc/nomad.d/client.hcl -sudo chown --recursive nomad:nomad /etc/nomad.d -sudo chmod 640 /etc/nomad.d/client.hcl ``` Add this configuration to the `client.hcl` configuration file: From 608f17e7c999a4a32a7443d45820de58f7a0de68 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 20 Mar 2019 08:08:37 -0700 Subject: [PATCH 2/3] Don't use "datacenter" in a confusing way Co-Authored-By: schmichael --- website/source/guides/operations/deployment-guide.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/guides/operations/deployment-guide.html.md b/website/source/guides/operations/deployment-guide.html.md index 639885f32..8f51ded98 100644 --- a/website/source/guides/operations/deployment-guide.html.md +++ b/website/source/guides/operations/deployment-guide.html.md @@ -180,7 +180,7 @@ server { ``` - [`server`](/docs/configuration/server.html#enabled) - Specifies if this agent should run in server mode. All other server options depend on this value being set. -- [`bootstrap_expect`](/docs/configuration/server.html#bootstrap_expect) - The number of expected servers in the datacenter. Either this value should not be provided or the value must agree with other servers in the cluster. +- [`bootstrap_expect`](/docs/configuration/server.html#bootstrap_expect) - The number of expected servers in the cluster. Either this value should not be provided or the value must agree with other servers in the cluster. ### Client configuration From 673a168b7258b4ced3442b6128998e5b9391bc0b Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 20 Mar 2019 09:36:29 -0700 Subject: [PATCH 3/3] docs: remove partial sig/checksum verification Point users to security doc instead. Right now it takes a lot of explaining to describe to users exactly how to validate the binary and what the output of the tools used means. For example, this is the output when validating according to the instructions in this guide and the linked doc: ``` vagrant@linux:/tmp$ gpg --verify nomad_0.8.7_SHA256SUMS.sig nomad_0.8.7_SHA256SUMS gpg: Signature made Fri 11 Jan 2019 09:47:56 PM UTC using RSA key ID 348FFC4C gpg: Good signature from "HashiCorp Security " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. Primary key fingerprint: 91A6 E7F8 5D05 C656 30BE F189 5185 2D87 348F FC4C vagrant@linux:/tmp$ shasum -a 256 -c nomad_0.8.7_SHA256SUMS shasum: ./nomad_0.8.7_darwin_amd64.zip: ./nomad_0.8.7_darwin_amd64.zip: FAILED open or read shasum: ./nomad_0.8.7_linux_386.zip: No such file or directory ./nomad_0.8.7_linux_386.zip: FAILED open or read shasum: ./nomad_0.8.7_linux_amd64-lxc.zip: No such file or directory ./nomad_0.8.7_linux_amd64-lxc.zip: FAILED open or read ./nomad_0.8.7_linux_amd64.zip: OK shasum: ./nomad_0.8.7_linux_arm64.zip: No such file or directory ./nomad_0.8.7_linux_arm64.zip: FAILED open or read shasum: ./nomad_0.8.7_linux_arm.zip: No such file or directory ./nomad_0.8.7_linux_arm.zip: FAILED open or read shasum: ./nomad_0.8.7_windows_386.zip: No such file or directory ./nomad_0.8.7_windows_386.zip: FAILED open or read shasum: ./nomad_0.8.7_windows_amd64.zip: No such file or directory ./nomad_0.8.7_windows_amd64.zip: FAILED open or read shasum: WARNING: 7 listed files could not be read ``` There are only two lines that matter in all of that output: ``` ... gpg: Good signature from "HashiCorp Security " ... ./nomad_0.8.7_linux_amd64.zip: OK ... ``` I feel like trying to teach users how to use and interpret these tools in our deployment guide may be as likely to reduce confidence as increase it. --- website/source/guides/operations/deployment-guide.html.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/website/source/guides/operations/deployment-guide.html.md b/website/source/guides/operations/deployment-guide.html.md index 8f51ded98..1c6b460ac 100644 --- a/website/source/guides/operations/deployment-guide.html.md +++ b/website/source/guides/operations/deployment-guide.html.md @@ -37,15 +37,13 @@ These setup steps should be completed on all Nomad hosts: Precompiled Nomad binaries are available for download at [https://releases.hashicorp.com/nomad/](https://releases.hashicorp.com/nomad/) and Nomad Enterprise binaries are available for download by following the instructions made available to HashiCorp Enterprise customers. -You should perform checksum verification of the zip packages using the SHA256SUMS and SHA256SUMS.sig files available for the specific release version. HashiCorp provides [a guide on checksum verification](https://www.hashicorp.com/security.html) for precompiled binaries. - ```text export NOMAD_VERSION="0.8.7" curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS -curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS.sig ``` +You may perform checksum verification of the zip packages using the SHA256SUMS and SHA256SUMS.sig files available for the specific release version. HashiCorp provides [a guide on checksum verification](https://www.hashicorp.com/security.html) for precompiled binaries. + ## Install Nomad Unzip the downloaded package and move the `nomad` binary to `/usr/local/bin/`. Check `nomad` is available on the system path.