func: add initial enos skeleton (#24787)

* func: add initial enos skeleton

* style: add headers

* func: change the variables input to a map of objects to simplify the workloads creation

* style: formating

* Add tests for servers and clients

* style: separate the tests in diferent scripts

* style: add missing headers

* func: add tests for allocs

* style: improve output

* func: add step to copy remote upgrade version

* style: hcl formatting

* fix: remove the terraform nomad provider

* fix: Add clean token to remove extra new line added in provision

* fix: Add clean token to remove extra new line added in provision

* fix: Add clean token to remove extra new line added in provision

* fix: add missing license headers

* style: hcl fmt

* style: rename variables and fix format

* func: remove the template step on the workloads module and chop the noamd token output on the provide module

* fix: correct the jobspec path on the workloads module

* fix: add missing variable definitions on job specs for workloads

* style: formatting

* fix: rename variable in health test
This commit is contained in:
Juana De La Cuesta
2025-01-30 16:37:55 +01:00
committed by GitHub
parent 0d57e91282
commit 3861c40220
25 changed files with 1011 additions and 1 deletions

View File

@@ -86,6 +86,6 @@ output "ssh_key_file" {
}
output "nomad_token" {
value = "${data.local_sensitive_file.nomad_token.content}"
value = chomp(data.local_sensitive_file.nomad_token.content)
sensitive = true
}

2
enos/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
# enos scenarios
.enos/

20
enos/enos-modules.hcl Normal file
View File

@@ -0,0 +1,20 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
// Find any released RPM or Deb in Artifactory. Requires the version, edition, distro, and distro
// version.
module "build_artifactory" {
source = "./modules/fetch_artifactory"
}
module "provision_cluster" {
source = "../e2e/terraform/provision-infra"
}
module "run_workloads" {
source = "./modules/run_workloads"
}
module "test_cluster_health" {
source = "./modules/test_cluster_health"
}

6
enos/enos-providers.hcl Normal file
View File

@@ -0,0 +1,6 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
provider "aws" "default" {
region = var.aws_region
}

47
enos/enos-quality.hcl Normal file
View File

@@ -0,0 +1,47 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
quality "nomad_agent_info" {
description = "A GET call to /v1/agent/members returns the correct number of running servers and they are all alive"
}
quality "nomad_agent_info_self" {
description = "A GET call to /v1/agent/self against every server returns the same last_log_index for all of them"
}
quality "nomad_nodes_status" {
description = "A GET call to /v1/nodes returns the correct number of clients and they are all eligible and ready"
}
quality "nomad_node_eligibility" {
description = "A GET call to /v1/node/:node-id returns the same node.SchedulingEligibility before and after a server upgrade"
}
quality "nomad_node_metadata" {
description = "A GET call to /v1/node/:node-id returns the same node.Meta for each server before and after a server upgrade"
}
quality "nomad_job_status" {
description = "A GET call to /v1/jobs returns the correct number of jobs and they are all running"
}
quality "nomad_register_job" {
description = "A POST call to /v1/jobs results in a new job running and allocations being started accordingly"
}
quality "nomad_reschedule_alloc" {
description = "A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled"
}
quality "nomad_restore_snapshot" {
description = "A node can be restored from a snapshot built on a previous version"
}
quality "nomad_allocs_status" {
description = "A GET call to /v1/allocs returns the correct number of allocations and they are all running"
}
quality "nomad_alloc_reconect" {
description = "A GET call to /v1/alloc/:alloc_id will return the same alloc.CreateTime for each allocation before and after a client upgrade"
}

View File

@@ -0,0 +1,284 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
scenario "upgrade" {
description = <<-EOF
The upgrade scenario verifies in-place upgrades between previously released versions of Nomad
against another candidate build.
EOF
matrix {
arch = ["amd64"]
edition = ["ce"]
os = ["linux"]
//service_discovery = ["consul", "nomad"]
//arch = ["amd64", "arm64"]
//edition = ["ce", "ent"]
//os = ["linux", "windows"]
exclude {
os = ["windows"]
arch = ["arm64"]
}
}
providers = [
provider.aws.default,
]
locals {
cluster_name = "mcj-${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}"
linux_count = matrix.os == "linux" ? "4" : "0"
windows_count = matrix.os == "windows" ? "4" : "0"
arch = matrix.arch
}
step "copy_initial_binary" {
description = <<-EOF
Determine which Nomad artifact we want to use for the scenario, depending on the
'arch', 'edition' and 'os' and bring it from the artifactory to a local instance.
EOF
module = module.build_artifactory
variables {
artifactory_username = var.artifactory_username
artifactory_token = var.artifactory_token
arch = local.arch
edition = matrix.edition
product_version = var.product_version
os = matrix.os
binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}"
}
}
step "provision_cluster" {
depends_on = [step.copy_initial_binary]
description = <<-EOF
Using the binary from the previous step, provision a Nomad cluster using the e2e
EOF
module = module.provision_cluster
variables {
name = local.cluster_name
nomad_local_binary = step.copy_initial_binary.nomad_local_binary
server_count = var.server_count
client_count_linux = local.linux_count
client_count_windows_2016 = local.windows_count
nomad_license = var.nomad_license
consul_license = var.consul_license
volumes = false
region = var.aws_region
instance_arch = matrix.arch
}
}
step "run_initial_workloads" {
depends_on = [step.provision_cluster]
description = <<-EOF
Verify the health of the cluster by running new workloads
EOF
module = module.run_workloads
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
}
verifies = [
quality.nomad_register_job,
]
}
step "initial_test_cluster_health" {
depends_on = [step.run_initial_workloads]
description = <<-EOF
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
EOF
module = module.test_cluster_health
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
server_count = var.server_count
client_count = local.linux_count + local.windows_count
jobs_count = step.run_initial_workloads.jobs_count
alloc_count = step.run_initial_workloads.allocs_count
}
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
quality.nomad_nodes_status,
quality.nomad_job_status,
quality.nomad_allocs_status,
quality.nomad_reschedule_alloc,
]
}
step "copy_upgrade_binary" {
depends_on = [step.provision_cluster]
description = <<-EOF
Bring the new upgraded binary from the artifactory
EOF
module = module.build_artifactory
variables {
artifactory_username = var.artifactory_username
artifactory_token = var.artifactory_token
arch = local.arch
edition = matrix.edition
product_version = var.upgrade_version
os = matrix.os
binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.upgrade_version}"
}
}
/*
step "upgrade_servers" {
description = <<-EOF
Upgrade the cluster's servers by invoking nomad-cc ...
EOF
module = module.run_cc_nomad
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
nomad_restore_snapshot
]
variables {
cc_update_type = "server"
nomad_upgraded_binary = step.copy_initial_binary.nomad_local_binary
// ...
}
}
step "run_servers_workloads" {
// ...
}
step "server_upgrade_test_cluster_health" {
depends_on = [step.run_initial_workloads]
description = <<-EOF
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
EOF
module = module.test_cluster_health
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
server_count = var.server_count
client_count = local.linux_count + local.windows_count
jobs_count = step.run_initial_workloads.jobs_count
alloc_count = step.run_initial_workloads.allocs_count
}
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
quality.nomad_nodes_status,
quality.nomad_job_status,
quality.nomad_allocs_status,
quality.nomad_reschedule_alloc,
]
}
step "upgrade_client" {
description = <<-EOF
Upgrade the cluster's clients by invoking nomad-cc ...
EOF
module = module.run_cc_nomad
verifies = [
quality.nomad_nodes_status,
quality.nomad_job_status
]
variables {
cc_update_type = "client"
nomad_upgraded_binary = step.copy_initial_binary.nomad_local_binary
// ...
}
}
step "run_clients_workloads" {
// ...
}
step "client_upgrade_test_cluster_health" {
depends_on = [step.run_initial_workloads]
description = <<-EOF
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
EOF
module = module.test_cluster_health
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
server_count = var.server_count
client_count = local.linux_count + local.windows_count
jobs_count = step.run_initial_workloads.jobs_count
alloc_count = step.run_initial_workloads.allocs_count
}
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
quality.nomad_nodes_status,
quality.nomad_job_status,
quality.nomad_allocs_status,
quality.nomad_reschedule_alloc,
]
}
*/
output "servers" {
value = step.provision_cluster.servers
}
output "linux_clients" {
value = step.provision_cluster.linux_clients
}
output "windows_clients" {
value = step.provision_cluster.windows_clients
}
output "message" {
value = step.provision_cluster.message
}
output "nomad_addr" {
value = step.provision_cluster.nomad_addr
}
output "ca_file" {
value = step.provision_cluster.ca_file
}
output "cert_file" {
value = step.provision_cluster.cert_file
}
output "key_file" {
value = step.provision_cluster.key_file
}
output "nomad_token" {
value = step.provision_cluster.nomad_token
sensitive = true
}
}

17
enos/enos-terraform.hcl Normal file
View File

@@ -0,0 +1,17 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
terraform "default" {
required_version = ">= 1.2.0"
required_providers {
aws = {
source = "hashicorp/aws"
}
enos = {
source = "registry.terraform.io/hashicorp-forge/enos"
version = ">= 0.4.0"
}
}
}

65
enos/enos-vars.hcl Normal file
View File

@@ -0,0 +1,65 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
# Variables for the fetch_artifactory module
variable "artifactory_username" {
type = string
description = "The username to use when connecting to artifactory"
default = null
}
variable "artifactory_token" {
type = string
description = "The token to use when connecting to artifactory"
default = null
sensitive = true
}
variable "product_version" {
description = "The version of Nomad we are testing"
type = string
default = null
}
variable "upgrade_version" {
description = "The version of Nomad we want to upgrade the cluster to"
type = string
default = null
}
variable "binary_local_path" {
description = "The path to donwload and unzip the binary"
type = string
}
# Variables for the provision_cluster module
variable "nomad_local_binary" {
description = "The path to a local binary to provision"
}
variable "nomad_license" {
type = string
description = "If nomad_license is set, deploy a license"
default = ""
}
variable "consul_license" {
type = string
description = "If consul_license is set, deploy a license"
default = ""
}
variable "nomad_region" {
description = "The AWS region to deploy to."
default = "us-east-1"
}
variable "server_count" {
description = "The number of servers to provision."
default = "3"
}
variable "aws_region" {
description = "The AWS region to deploy to."
default = "us-east-1"
}

View File

@@ -0,0 +1,23 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
locals {
path = var.edition == "ce" ? "nomad/*" : "nomad-enterprise/*"
artifact_version = var.edition == "ce" ? "${var.product_version}" : "${var.product_version}+ent"
package_extensions = {
amd64 = {
linux = "_linux_amd64.zip"
windows = "_windows_amd64.zip"
}
arm64 = {
linux = "_linux_arm64.zip"
}
}
artifact_name = "nomad_${local.artifact_version}${local.package_extensions[var.arch][var.os]}"
artifact_zip = "${local.artifact_name}.zip"
}

View File

@@ -0,0 +1,34 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
terraform {
required_providers {
enos = {
source = "registry.terraform.io/hashicorp-forge/enos"
}
}
}
data "enos_artifactory_item" "nomad" {
username = var.artifactory_username
token = var.artifactory_token
host = var.artifactory_host
repo = var.artifactory_repo
path = local.path
name = local.artifact_name
properties = tomap({
"product-name" = var.edition == "ce" ? "nomad" : "nomad-enterprise"
})
}
resource "enos_local_exec" "install_binary" {
environment = {
URL = data.enos_artifactory_item.nomad.results[0].url
BINARY_PATH = var.binary_path
TOKEN = var.artifactory_token
LOCAL_ZIP = local.artifact_zip
}
scripts = [abspath("${path.module}/scripts/install.sh")]
}

View File

@@ -0,0 +1,7 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
output "nomad_local_binary" {
description = "Path where the binary will be placed"
value = var.os == "windows" ? "${var.binary_path}/nomad.exe" : "${var.binary_path}/nomad"
}

View File

@@ -0,0 +1,26 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -xeuo pipefail
wget --header="X-JFrog-Art-Api:$TOKEN" -O "$LOCAL_ZIP" "$URL"
if [ $? -eq 0 ]; then
echo "File downloaded successfully: $LOCAL_ZIP"
else
echo "Error downloading file." >&2
exit 1
fi
mkdir -p "$BINARY_PATH"
unzip -o "$LOCAL_ZIP" -d "$BINARY_PATH"
if [ $? -eq 0 ]; then
echo "File unzipped successfully to $BINARY_PATH"
else
echo "Error unzipping file." >&2
exit 1
fi
rm "$LOCAL_ZIP"

View File

@@ -0,0 +1,55 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "artifactory_username" {
type = string
description = "The username to use when connecting to artifactory"
default = null
}
variable "artifactory_token" {
type = string
description = "The token to use when connecting to artifactory"
default = null
sensitive = true
}
variable "artifactory_host" {
type = string
description = "The artifactory host to search for Nomad artifacts"
default = "https://artifactory.hashicorp.engineering/artifactory"
}
variable "artifactory_repo" {
type = string
description = "The artifactory repo to search for Nomad artifacts"
default = "hashicorp-crt-staging-local*"
}
variable "edition" {
type = string
description = "The edition of the binary to search, it can be either CE or ENT"
}
variable "os" {
type = string
description = "The operative system the binary is needed for"
default = "linux"
}
variable "product_version" {
description = "The version of Nomad we are testing"
type = string
default = null
}
variable "arch" {
description = "The artifactory path to search for Nomad artifacts"
type = string
}
variable "binary_path" {
description = "The path to donwload and unzip the binary"
type = string
default = "/home/ubuntu/nomad"
}

View File

@@ -0,0 +1,28 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "alloc_count" {
type = number
default = 1
}
job "service-docker" {
group "service-docker" {
count = var.alloc_count
task "alpine" {
driver = "docker"
config {
image = "alpine:latest"
command = "sh"
args = ["-c", "while true; do sleep 300; done"]
}
resources {
cpu = 100
memory = 128
}
}
}
}

View File

@@ -0,0 +1,40 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "alloc_count" {
type = number
default = 1
}
job "service-raw" {
group "service-raw" {
count = var.alloc_count
task "raw" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "./local/runme.sh"]
}
template {
data = <<EOH
#!/bin/bash
sigkill_handler() {
echo "Received SIGKILL signal. Exiting..."
exit 0
}
echo "Sleeping until SIGKILL signal is received..."
while true; do
sleep 300
done
EOH
destination = "local/runme.sh"
perms = "755"
}
}
}
}

View File

@@ -0,0 +1,36 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
terraform {
required_providers {
enos = {
source = "hashicorp-forge/enos"
}
}
}
resource "enos_local_exec" "wait_for_nomad_api" {
environment = {
NOMAD_ADDR = var.nomad_addr
NOMAD_CACERT = var.ca_file
NOMAD_CLIENT_CERT = var.cert_file
NOMAD_CLIENT_KEY = var.key_file
NOMAD_TOKEN = var.nomad_token
}
scripts = [abspath("${path.module}/scripts/wait_for_nomad_api.sh")]
}
resource "enos_local_exec" "workloads" {
for_each = var.workloads
environment = {
NOMAD_ADDR = var.nomad_addr
NOMAD_CACERT = var.ca_file
NOMAD_CLIENT_CERT = var.cert_file
NOMAD_CLIENT_KEY = var.key_file
NOMAD_TOKEN = var.nomad_token
}
inline = ["nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}"]
}

View File

@@ -0,0 +1,16 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
/* output "jobs_count" {
value = length(local.job_names)
} */
output "jobs_count" {
description = "The number of jobs thar should be running in the cluster"
value = length(var.workloads)
}
output "allocs_count" {
description = "The number of allocs that should be running in the cluster"
value = sum([for wl in var.workloads : wl.alloc_count])
}

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -xeuo pipefail
TIMEOUT=20
INTERVAL=5
start_time=$(date +%s)
while ! nomad server members > /dev/null 2>&1; do
echo "Waiting for Nomad API..."
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ "$elapsed_time" -ge "$TIMEOUT" ]; then
echo "Error: Nomad API did not become available within $TIMEOUT seconds."
exit 1
fi
sleep "$INTERVAL"
done
echo "Nomad API is available!"

View File

@@ -0,0 +1,43 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "nomad_addr" {
description = "The Nomad API HTTP address."
type = string
default = "http://localhost:4646"
}
variable "ca_file" {
description = "A local file path to a PEM-encoded certificate authority used to verify the remote agent's certificate"
type = string
}
variable "cert_file" {
description = "A local file path to a PEM-encoded certificate provided to the remote agent. If this is specified, key_file or key_pem is also required"
type = string
}
variable "key_file" {
description = "A local file path to a PEM-encoded private key. This is required if cert_file or cert_pem is specified."
type = string
}
variable "nomad_token" {
description = "The Secret ID of an ACL token to make requests with, for ACL-enabled clusters."
type = string
sensitive = true
}
variable "workloads" {
description = "A map of workloads to provision"
type = map(object({
job_spec = string
alloc_count = number
}))
default = {
service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3 }
service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3 }
}
}

View File

@@ -0,0 +1,35 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
terraform {
required_providers {
enos = {
source = "registry.terraform.io/hashicorp-forge/enos"
}
}
}
locals {
clean_token = trimspace(var.nomad_token) #Somewhere in the process, a newline is added to the token.
}
resource "enos_local_exec" "run_tests" {
environment = {
NOMAD_ADDR = var.nomad_addr
NOMAD_CACERT = var.ca_file
NOMAD_CLIENT_CERT = var.cert_file
NOMAD_CLIENT_KEY = var.key_file
NOMAD_TOKEN = local.clean_token
SERVER_COUNT = var.server_count
CLIENT_COUNT = var.client_count
JOB_COUNT = var.jobs_count
ALLOC_COUNT = var.alloc_count
}
scripts = [
abspath("${path.module}/scripts/servers.sh"),
abspath("${path.module}/scripts/clients.sh"),
abspath("${path.module}/scripts/jobs.sh"),
abspath("${path.module}/scripts/allocs.sh")
]
}

View File

@@ -0,0 +1,63 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -euo pipefail
error_exit() {
printf 'Error: %s' "${1}"
exit 1
}
# Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running
allocs=$(nomad alloc status -json)
running_allocs=$(echo $allocs | jq '[.[] | select(.ClientStatus == "running")]')
allocs_length=$(echo "$running_allocs" | jq 'length' )
if [ -z "$allocs_length" ]; then
error_exit "No allocs found"
fi
if [ "$allocs_length" -ne "$ALLOC_COUNT" ]; then
error_exit "Some allocs are not running:\n$(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"
fi
echo "All allocs are running."
# Quality: nomad_reschedule_alloc: A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled
MAX_WAIT_TIME=30 # Maximum wait time in seconds
POLL_INTERVAL=2 # Interval between status checks
random_alloc_id=$(echo "$running_allocs" | jq -r ".[$((RANDOM % ($allocs_length + 1)))].ID")
echo "about to stop alloc $random_alloc_id"
nomad alloc stop -detach "$random_alloc_id" || error_exit "Failed to stop allocation $random_alloc_id."
echo "Waiting for allocation $random_alloc_id to reach 'complete' status..."
elapsed_time=0
while alloc_status=$(nomad alloc status -json "$random_alloc_id" | jq -r '.ClientStatus'); [ "$alloc_status" != "complete" ]; do
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds."
exit 1
fi
echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..."
sleep $POLL_INTERVAL
elapsed_time=$((elapsed_time + POLL_INTERVAL))
done
echo "Waiting for all the allocations to be running again"
elapsed_time=0
while new_allocs=$(nomad alloc status -json | jq '[.[] | select(.ClientStatus == "running")]'); [ $(echo "$new_allocs" | jq 'length') != "$ALLOC_COUNT" ]; do
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds."
exit 1
fi
echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..."
sleep $POLL_INTERVAL
elapsed_time=$((elapsed_time + POLL_INTERVAL))
done
echo "Alloc successfully restarted"

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -euo pipefail
error_exit() {
printf 'Error: %s' "${1}"
exit 1
}
# Quality: "nomad_CLIENTS_status: A GET call to /v1/nodes returns the correct number of clients and they are all eligible and ready"
clients=$(nomad node status -json)
running_clients=$(echo $clients | jq '[.[] | select(.Status == "ready")]')
clients_length=$(echo "$running_clients" | jq 'length' )
if [ -z "$clients_length" ]; then
error_exit "No clients found"
fi
if [ "$clients_length" -ne "$CLIENT_COUNT" ]; then
error_exit "Unexpected number of clients are ready: $clients_length\n $(echo $clients | jq '.[] | select(.Status != "ready") | .Name')"
fi
echo "$running_clients" | jq -c '.[]' | while read -r node; do
status=$(echo "$node" | jq -r '.Status')
eligibility=$(echo "$node" | jq -r '.SchedulingEligibility')
if [ "$eligibility" != "eligible" ]; then
error_exit "Client not eligible: $(echo "$node" | jq -r '.Name')"
fi
done
echo "All CLIENTS are eligible and running."

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -euo pipefail
error_exit() {
printf 'Error: %s' "${1}"
exit 1
}
# Quality: nomad_job_status: A GET call to /v1/jobs returns the correct number of jobs and they are all running.
jobs_length=$(nomad job status| awk '$4 == "running" {count++} END {print count+0}')
if [ -z "$jobs_length" ]; then
error_exit "No jobs found"
fi
if [ "$jobs_length" -ne "$JOB_COUNT" ]; then
error_exit "The number of running jobs ($jobs_length) does not match the expected count ($JOB_COUNT)\n$(nomad job status | awk 'NR > 1 && $4 != "running" {print $4}')"
fi
echo "All JOBS are running."

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -euo pipefail
error_exit() {
printf 'Error: %s' "${1}"
exit 1
}
# Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all alive
servers=$(nomad server members -json )
running_servers=$(echo $servers | jq '[.[] | select(.Status == "alive")]')
servers_length=$(echo "$running_servers" | jq 'length' )
if [ -z "$servers_length" ]; then
error_exit "No servers found"
fi
if [ "$servers_length" -ne "$SERVER_COUNT" ]; then
error_exit "Unexpected number of servers are alive: $servers_length\n$(echo $servers | jq '.[] | select(.Status != "alive") | .Name')"
fi
if [ $(echo "$running_servers" | jq -r "map(.last_log_index ) | unique | length == 1") != "true" ]; then
error_exit "Servers not up to date"
fi
echo "All SERVERS are alive and up to date."

View File

@@ -0,0 +1,47 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "nomad_addr" {
description = "The Nomad API HTTP address."
type = string
default = "http://localhost:4646"
}
variable "ca_file" {
description = "A local file path to a PEM-encoded certificate authority used to verify the remote agent's certificate"
type = string
}
variable "cert_file" {
description = "A local file path to a PEM-encoded certificate provided to the remote agent. If this is specified, key_file or key_pem is also required"
type = string
}
variable "key_file" {
description = "A local file path to a PEM-encoded private key. This is required if cert_file or cert_pem is specified."
type = string
}
variable "nomad_token" {
description = "The Secret ID of an ACL token to make requests with, for ACL-enabled clusters."
type = string
}
variable "server_count" {
description = "The expected number of servers."
type = number
}
variable "client_count" {
description = "The expected number of Ubuntu clients."
type = number
}
variable "jobs_count" {
description = "The number of jobs that should be running in the cluster"
type = number
}
variable "alloc_count" {
description = "Number of allocation that should be running in the cluster"
}