mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
func: add initial enos skeleton (#24787)
* func: add initial enos skeleton * style: add headers * func: change the variables input to a map of objects to simplify the workloads creation * style: formating * Add tests for servers and clients * style: separate the tests in diferent scripts * style: add missing headers * func: add tests for allocs * style: improve output * func: add step to copy remote upgrade version * style: hcl formatting * fix: remove the terraform nomad provider * fix: Add clean token to remove extra new line added in provision * fix: Add clean token to remove extra new line added in provision * fix: Add clean token to remove extra new line added in provision * fix: add missing license headers * style: hcl fmt * style: rename variables and fix format * func: remove the template step on the workloads module and chop the noamd token output on the provide module * fix: correct the jobspec path on the workloads module * fix: add missing variable definitions on job specs for workloads * style: formatting * fix: rename variable in health test
This commit is contained in:
committed by
GitHub
parent
0d57e91282
commit
3861c40220
@@ -86,6 +86,6 @@ output "ssh_key_file" {
|
||||
}
|
||||
|
||||
output "nomad_token" {
|
||||
value = "${data.local_sensitive_file.nomad_token.content}"
|
||||
value = chomp(data.local_sensitive_file.nomad_token.content)
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
2
enos/.gitignore
vendored
Normal file
2
enos/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# enos scenarios
|
||||
.enos/
|
||||
20
enos/enos-modules.hcl
Normal file
20
enos/enos-modules.hcl
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
// Find any released RPM or Deb in Artifactory. Requires the version, edition, distro, and distro
|
||||
// version.
|
||||
module "build_artifactory" {
|
||||
source = "./modules/fetch_artifactory"
|
||||
}
|
||||
|
||||
module "provision_cluster" {
|
||||
source = "../e2e/terraform/provision-infra"
|
||||
}
|
||||
|
||||
module "run_workloads" {
|
||||
source = "./modules/run_workloads"
|
||||
}
|
||||
|
||||
module "test_cluster_health" {
|
||||
source = "./modules/test_cluster_health"
|
||||
}
|
||||
6
enos/enos-providers.hcl
Normal file
6
enos/enos-providers.hcl
Normal file
@@ -0,0 +1,6 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
provider "aws" "default" {
|
||||
region = var.aws_region
|
||||
}
|
||||
47
enos/enos-quality.hcl
Normal file
47
enos/enos-quality.hcl
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
quality "nomad_agent_info" {
|
||||
description = "A GET call to /v1/agent/members returns the correct number of running servers and they are all alive"
|
||||
}
|
||||
|
||||
quality "nomad_agent_info_self" {
|
||||
description = "A GET call to /v1/agent/self against every server returns the same last_log_index for all of them"
|
||||
}
|
||||
|
||||
quality "nomad_nodes_status" {
|
||||
description = "A GET call to /v1/nodes returns the correct number of clients and they are all eligible and ready"
|
||||
}
|
||||
|
||||
quality "nomad_node_eligibility" {
|
||||
description = "A GET call to /v1/node/:node-id returns the same node.SchedulingEligibility before and after a server upgrade"
|
||||
}
|
||||
|
||||
quality "nomad_node_metadata" {
|
||||
description = "A GET call to /v1/node/:node-id returns the same node.Meta for each server before and after a server upgrade"
|
||||
}
|
||||
|
||||
quality "nomad_job_status" {
|
||||
description = "A GET call to /v1/jobs returns the correct number of jobs and they are all running"
|
||||
}
|
||||
|
||||
quality "nomad_register_job" {
|
||||
description = "A POST call to /v1/jobs results in a new job running and allocations being started accordingly"
|
||||
}
|
||||
|
||||
quality "nomad_reschedule_alloc" {
|
||||
description = "A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled"
|
||||
}
|
||||
|
||||
quality "nomad_restore_snapshot" {
|
||||
description = "A node can be restored from a snapshot built on a previous version"
|
||||
}
|
||||
|
||||
quality "nomad_allocs_status" {
|
||||
description = "A GET call to /v1/allocs returns the correct number of allocations and they are all running"
|
||||
}
|
||||
|
||||
quality "nomad_alloc_reconect" {
|
||||
description = "A GET call to /v1/alloc/:alloc_id will return the same alloc.CreateTime for each allocation before and after a client upgrade"
|
||||
}
|
||||
|
||||
284
enos/enos-scenario-upgrade.hcl
Normal file
284
enos/enos-scenario-upgrade.hcl
Normal file
@@ -0,0 +1,284 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
scenario "upgrade" {
|
||||
description = <<-EOF
|
||||
The upgrade scenario verifies in-place upgrades between previously released versions of Nomad
|
||||
against another candidate build.
|
||||
EOF
|
||||
|
||||
matrix {
|
||||
arch = ["amd64"]
|
||||
edition = ["ce"]
|
||||
os = ["linux"]
|
||||
//service_discovery = ["consul", "nomad"]
|
||||
//arch = ["amd64", "arm64"]
|
||||
//edition = ["ce", "ent"]
|
||||
//os = ["linux", "windows"]
|
||||
exclude {
|
||||
os = ["windows"]
|
||||
arch = ["arm64"]
|
||||
}
|
||||
}
|
||||
|
||||
providers = [
|
||||
provider.aws.default,
|
||||
]
|
||||
|
||||
locals {
|
||||
cluster_name = "mcj-${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}"
|
||||
linux_count = matrix.os == "linux" ? "4" : "0"
|
||||
windows_count = matrix.os == "windows" ? "4" : "0"
|
||||
arch = matrix.arch
|
||||
}
|
||||
|
||||
step "copy_initial_binary" {
|
||||
description = <<-EOF
|
||||
Determine which Nomad artifact we want to use for the scenario, depending on the
|
||||
'arch', 'edition' and 'os' and bring it from the artifactory to a local instance.
|
||||
EOF
|
||||
|
||||
module = module.build_artifactory
|
||||
|
||||
variables {
|
||||
artifactory_username = var.artifactory_username
|
||||
artifactory_token = var.artifactory_token
|
||||
arch = local.arch
|
||||
edition = matrix.edition
|
||||
product_version = var.product_version
|
||||
os = matrix.os
|
||||
binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}"
|
||||
}
|
||||
}
|
||||
|
||||
step "provision_cluster" {
|
||||
depends_on = [step.copy_initial_binary]
|
||||
description = <<-EOF
|
||||
Using the binary from the previous step, provision a Nomad cluster using the e2e
|
||||
EOF
|
||||
|
||||
module = module.provision_cluster
|
||||
variables {
|
||||
name = local.cluster_name
|
||||
nomad_local_binary = step.copy_initial_binary.nomad_local_binary
|
||||
server_count = var.server_count
|
||||
client_count_linux = local.linux_count
|
||||
client_count_windows_2016 = local.windows_count
|
||||
nomad_license = var.nomad_license
|
||||
consul_license = var.consul_license
|
||||
volumes = false
|
||||
region = var.aws_region
|
||||
instance_arch = matrix.arch
|
||||
}
|
||||
}
|
||||
|
||||
step "run_initial_workloads" {
|
||||
depends_on = [step.provision_cluster]
|
||||
description = <<-EOF
|
||||
Verify the health of the cluster by running new workloads
|
||||
EOF
|
||||
|
||||
module = module.run_workloads
|
||||
variables {
|
||||
nomad_addr = step.provision_cluster.nomad_addr
|
||||
ca_file = step.provision_cluster.ca_file
|
||||
cert_file = step.provision_cluster.cert_file
|
||||
key_file = step.provision_cluster.key_file
|
||||
nomad_token = step.provision_cluster.nomad_token
|
||||
}
|
||||
verifies = [
|
||||
quality.nomad_register_job,
|
||||
]
|
||||
}
|
||||
|
||||
step "initial_test_cluster_health" {
|
||||
depends_on = [step.run_initial_workloads]
|
||||
description = <<-EOF
|
||||
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
|
||||
EOF
|
||||
|
||||
module = module.test_cluster_health
|
||||
variables {
|
||||
nomad_addr = step.provision_cluster.nomad_addr
|
||||
ca_file = step.provision_cluster.ca_file
|
||||
cert_file = step.provision_cluster.cert_file
|
||||
key_file = step.provision_cluster.key_file
|
||||
nomad_token = step.provision_cluster.nomad_token
|
||||
server_count = var.server_count
|
||||
client_count = local.linux_count + local.windows_count
|
||||
jobs_count = step.run_initial_workloads.jobs_count
|
||||
alloc_count = step.run_initial_workloads.allocs_count
|
||||
}
|
||||
|
||||
verifies = [
|
||||
quality.nomad_agent_info,
|
||||
quality.nomad_agent_info_self,
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_allocs_status,
|
||||
quality.nomad_reschedule_alloc,
|
||||
]
|
||||
}
|
||||
|
||||
step "copy_upgrade_binary" {
|
||||
depends_on = [step.provision_cluster]
|
||||
description = <<-EOF
|
||||
Bring the new upgraded binary from the artifactory
|
||||
EOF
|
||||
|
||||
module = module.build_artifactory
|
||||
|
||||
variables {
|
||||
artifactory_username = var.artifactory_username
|
||||
artifactory_token = var.artifactory_token
|
||||
arch = local.arch
|
||||
edition = matrix.edition
|
||||
product_version = var.upgrade_version
|
||||
os = matrix.os
|
||||
binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.upgrade_version}"
|
||||
}
|
||||
}
|
||||
/*
|
||||
step "upgrade_servers" {
|
||||
description = <<-EOF
|
||||
Upgrade the cluster's servers by invoking nomad-cc ...
|
||||
EOF
|
||||
|
||||
module = module.run_cc_nomad
|
||||
|
||||
verifies = [
|
||||
quality.nomad_agent_info,
|
||||
quality.nomad_agent_info_self,
|
||||
nomad_restore_snapshot
|
||||
]
|
||||
|
||||
variables {
|
||||
cc_update_type = "server"
|
||||
nomad_upgraded_binary = step.copy_initial_binary.nomad_local_binary
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
step "run_servers_workloads" {
|
||||
// ...
|
||||
}
|
||||
|
||||
step "server_upgrade_test_cluster_health" {
|
||||
depends_on = [step.run_initial_workloads]
|
||||
description = <<-EOF
|
||||
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
|
||||
EOF
|
||||
|
||||
module = module.test_cluster_health
|
||||
variables {
|
||||
nomad_addr = step.provision_cluster.nomad_addr
|
||||
ca_file = step.provision_cluster.ca_file
|
||||
cert_file = step.provision_cluster.cert_file
|
||||
key_file = step.provision_cluster.key_file
|
||||
nomad_token = step.provision_cluster.nomad_token
|
||||
server_count = var.server_count
|
||||
client_count = local.linux_count + local.windows_count
|
||||
jobs_count = step.run_initial_workloads.jobs_count
|
||||
alloc_count = step.run_initial_workloads.allocs_count
|
||||
}
|
||||
|
||||
verifies = [
|
||||
quality.nomad_agent_info,
|
||||
quality.nomad_agent_info_self,
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_allocs_status,
|
||||
quality.nomad_reschedule_alloc,
|
||||
]
|
||||
}
|
||||
|
||||
step "upgrade_client" {
|
||||
description = <<-EOF
|
||||
Upgrade the cluster's clients by invoking nomad-cc ...
|
||||
EOF
|
||||
|
||||
module = module.run_cc_nomad
|
||||
|
||||
verifies = [
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status
|
||||
]
|
||||
|
||||
variables {
|
||||
cc_update_type = "client"
|
||||
nomad_upgraded_binary = step.copy_initial_binary.nomad_local_binary
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
step "run_clients_workloads" {
|
||||
// ...
|
||||
}
|
||||
|
||||
step "client_upgrade_test_cluster_health" {
|
||||
depends_on = [step.run_initial_workloads]
|
||||
description = <<-EOF
|
||||
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
|
||||
EOF
|
||||
|
||||
module = module.test_cluster_health
|
||||
variables {
|
||||
nomad_addr = step.provision_cluster.nomad_addr
|
||||
ca_file = step.provision_cluster.ca_file
|
||||
cert_file = step.provision_cluster.cert_file
|
||||
key_file = step.provision_cluster.key_file
|
||||
nomad_token = step.provision_cluster.nomad_token
|
||||
server_count = var.server_count
|
||||
client_count = local.linux_count + local.windows_count
|
||||
jobs_count = step.run_initial_workloads.jobs_count
|
||||
alloc_count = step.run_initial_workloads.allocs_count
|
||||
}
|
||||
|
||||
verifies = [
|
||||
quality.nomad_agent_info,
|
||||
quality.nomad_agent_info_self,
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_allocs_status,
|
||||
quality.nomad_reschedule_alloc,
|
||||
]
|
||||
}
|
||||
*/
|
||||
output "servers" {
|
||||
value = step.provision_cluster.servers
|
||||
}
|
||||
|
||||
output "linux_clients" {
|
||||
value = step.provision_cluster.linux_clients
|
||||
}
|
||||
|
||||
output "windows_clients" {
|
||||
value = step.provision_cluster.windows_clients
|
||||
}
|
||||
|
||||
output "message" {
|
||||
value = step.provision_cluster.message
|
||||
}
|
||||
|
||||
output "nomad_addr" {
|
||||
value = step.provision_cluster.nomad_addr
|
||||
}
|
||||
|
||||
output "ca_file" {
|
||||
value = step.provision_cluster.ca_file
|
||||
}
|
||||
|
||||
output "cert_file" {
|
||||
value = step.provision_cluster.cert_file
|
||||
}
|
||||
|
||||
output "key_file" {
|
||||
value = step.provision_cluster.key_file
|
||||
}
|
||||
|
||||
output "nomad_token" {
|
||||
value = step.provision_cluster.nomad_token
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
}
|
||||
17
enos/enos-terraform.hcl
Normal file
17
enos/enos-terraform.hcl
Normal file
@@ -0,0 +1,17 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
terraform "default" {
|
||||
required_version = ">= 1.2.0"
|
||||
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
}
|
||||
|
||||
enos = {
|
||||
source = "registry.terraform.io/hashicorp-forge/enos"
|
||||
version = ">= 0.4.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
65
enos/enos-vars.hcl
Normal file
65
enos/enos-vars.hcl
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
# Variables for the fetch_artifactory module
|
||||
variable "artifactory_username" {
|
||||
type = string
|
||||
description = "The username to use when connecting to artifactory"
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "artifactory_token" {
|
||||
type = string
|
||||
description = "The token to use when connecting to artifactory"
|
||||
default = null
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "product_version" {
|
||||
description = "The version of Nomad we are testing"
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "upgrade_version" {
|
||||
description = "The version of Nomad we want to upgrade the cluster to"
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "binary_local_path" {
|
||||
description = "The path to donwload and unzip the binary"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Variables for the provision_cluster module
|
||||
variable "nomad_local_binary" {
|
||||
description = "The path to a local binary to provision"
|
||||
}
|
||||
|
||||
variable "nomad_license" {
|
||||
type = string
|
||||
description = "If nomad_license is set, deploy a license"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "consul_license" {
|
||||
type = string
|
||||
description = "If consul_license is set, deploy a license"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "nomad_region" {
|
||||
description = "The AWS region to deploy to."
|
||||
default = "us-east-1"
|
||||
}
|
||||
|
||||
variable "server_count" {
|
||||
description = "The number of servers to provision."
|
||||
default = "3"
|
||||
}
|
||||
|
||||
variable "aws_region" {
|
||||
description = "The AWS region to deploy to."
|
||||
default = "us-east-1"
|
||||
}
|
||||
23
enos/modules/fetch_artifactory/locals.tf
Normal file
23
enos/modules/fetch_artifactory/locals.tf
Normal file
@@ -0,0 +1,23 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
locals {
|
||||
|
||||
path = var.edition == "ce" ? "nomad/*" : "nomad-enterprise/*"
|
||||
|
||||
artifact_version = var.edition == "ce" ? "${var.product_version}" : "${var.product_version}+ent"
|
||||
|
||||
package_extensions = {
|
||||
amd64 = {
|
||||
linux = "_linux_amd64.zip"
|
||||
windows = "_windows_amd64.zip"
|
||||
}
|
||||
|
||||
arm64 = {
|
||||
linux = "_linux_arm64.zip"
|
||||
}
|
||||
}
|
||||
|
||||
artifact_name = "nomad_${local.artifact_version}${local.package_extensions[var.arch][var.os]}"
|
||||
artifact_zip = "${local.artifact_name}.zip"
|
||||
}
|
||||
34
enos/modules/fetch_artifactory/main.tf
Normal file
34
enos/modules/fetch_artifactory/main.tf
Normal file
@@ -0,0 +1,34 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
terraform {
|
||||
required_providers {
|
||||
enos = {
|
||||
source = "registry.terraform.io/hashicorp-forge/enos"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data "enos_artifactory_item" "nomad" {
|
||||
username = var.artifactory_username
|
||||
token = var.artifactory_token
|
||||
host = var.artifactory_host
|
||||
repo = var.artifactory_repo
|
||||
path = local.path
|
||||
name = local.artifact_name
|
||||
|
||||
properties = tomap({
|
||||
"product-name" = var.edition == "ce" ? "nomad" : "nomad-enterprise"
|
||||
})
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "install_binary" {
|
||||
environment = {
|
||||
URL = data.enos_artifactory_item.nomad.results[0].url
|
||||
BINARY_PATH = var.binary_path
|
||||
TOKEN = var.artifactory_token
|
||||
LOCAL_ZIP = local.artifact_zip
|
||||
}
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/install.sh")]
|
||||
}
|
||||
7
enos/modules/fetch_artifactory/outputs.tf
Normal file
7
enos/modules/fetch_artifactory/outputs.tf
Normal file
@@ -0,0 +1,7 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
output "nomad_local_binary" {
|
||||
description = "Path where the binary will be placed"
|
||||
value = var.os == "windows" ? "${var.binary_path}/nomad.exe" : "${var.binary_path}/nomad"
|
||||
}
|
||||
26
enos/modules/fetch_artifactory/scripts/install.sh
Executable file
26
enos/modules/fetch_artifactory/scripts/install.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -xeuo pipefail
|
||||
|
||||
wget --header="X-JFrog-Art-Api:$TOKEN" -O "$LOCAL_ZIP" "$URL"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "File downloaded successfully: $LOCAL_ZIP"
|
||||
else
|
||||
echo "Error downloading file." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$BINARY_PATH"
|
||||
unzip -o "$LOCAL_ZIP" -d "$BINARY_PATH"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "File unzipped successfully to $BINARY_PATH"
|
||||
else
|
||||
echo "Error unzipping file." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm "$LOCAL_ZIP"
|
||||
55
enos/modules/fetch_artifactory/variables.tf
Normal file
55
enos/modules/fetch_artifactory/variables.tf
Normal file
@@ -0,0 +1,55 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
variable "artifactory_username" {
|
||||
type = string
|
||||
description = "The username to use when connecting to artifactory"
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "artifactory_token" {
|
||||
type = string
|
||||
description = "The token to use when connecting to artifactory"
|
||||
default = null
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "artifactory_host" {
|
||||
type = string
|
||||
description = "The artifactory host to search for Nomad artifacts"
|
||||
default = "https://artifactory.hashicorp.engineering/artifactory"
|
||||
}
|
||||
|
||||
variable "artifactory_repo" {
|
||||
type = string
|
||||
description = "The artifactory repo to search for Nomad artifacts"
|
||||
default = "hashicorp-crt-staging-local*"
|
||||
}
|
||||
|
||||
variable "edition" {
|
||||
type = string
|
||||
description = "The edition of the binary to search, it can be either CE or ENT"
|
||||
}
|
||||
|
||||
variable "os" {
|
||||
type = string
|
||||
description = "The operative system the binary is needed for"
|
||||
default = "linux"
|
||||
}
|
||||
|
||||
variable "product_version" {
|
||||
description = "The version of Nomad we are testing"
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "arch" {
|
||||
description = "The artifactory path to search for Nomad artifacts"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "binary_path" {
|
||||
description = "The path to donwload and unzip the binary"
|
||||
type = string
|
||||
default = "/home/ubuntu/nomad"
|
||||
}
|
||||
28
enos/modules/run_workloads/jobs/docker-service.nomad.hcl
Normal file
28
enos/modules/run_workloads/jobs/docker-service.nomad.hcl
Normal file
@@ -0,0 +1,28 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
variable "alloc_count" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
job "service-docker" {
|
||||
|
||||
group "service-docker" {
|
||||
count = var.alloc_count
|
||||
task "alpine" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "alpine:latest"
|
||||
command = "sh"
|
||||
args = ["-c", "while true; do sleep 300; done"]
|
||||
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 128
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
40
enos/modules/run_workloads/jobs/raw-exec-service.nomad.hcl
Normal file
40
enos/modules/run_workloads/jobs/raw-exec-service.nomad.hcl
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
variable "alloc_count" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
job "service-raw" {
|
||||
|
||||
group "service-raw" {
|
||||
count = var.alloc_count
|
||||
task "raw" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "bash"
|
||||
args = ["-c", "./local/runme.sh"]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
#!/bin/bash
|
||||
|
||||
sigkill_handler() {
|
||||
echo "Received SIGKILL signal. Exiting..."
|
||||
exit 0
|
||||
}
|
||||
|
||||
echo "Sleeping until SIGKILL signal is received..."
|
||||
while true; do
|
||||
sleep 300
|
||||
done
|
||||
EOH
|
||||
destination = "local/runme.sh"
|
||||
perms = "755"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
36
enos/modules/run_workloads/main.tf
Normal file
36
enos/modules/run_workloads/main.tf
Normal file
@@ -0,0 +1,36 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
terraform {
|
||||
required_providers {
|
||||
enos = {
|
||||
source = "hashicorp-forge/enos"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "wait_for_nomad_api" {
|
||||
environment = {
|
||||
NOMAD_ADDR = var.nomad_addr
|
||||
NOMAD_CACERT = var.ca_file
|
||||
NOMAD_CLIENT_CERT = var.cert_file
|
||||
NOMAD_CLIENT_KEY = var.key_file
|
||||
NOMAD_TOKEN = var.nomad_token
|
||||
}
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/wait_for_nomad_api.sh")]
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "workloads" {
|
||||
for_each = var.workloads
|
||||
|
||||
environment = {
|
||||
NOMAD_ADDR = var.nomad_addr
|
||||
NOMAD_CACERT = var.ca_file
|
||||
NOMAD_CLIENT_CERT = var.cert_file
|
||||
NOMAD_CLIENT_KEY = var.key_file
|
||||
NOMAD_TOKEN = var.nomad_token
|
||||
}
|
||||
|
||||
inline = ["nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}"]
|
||||
}
|
||||
16
enos/modules/run_workloads/outputs.tf
Normal file
16
enos/modules/run_workloads/outputs.tf
Normal file
@@ -0,0 +1,16 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
/* output "jobs_count" {
|
||||
value = length(local.job_names)
|
||||
} */
|
||||
|
||||
output "jobs_count" {
|
||||
description = "The number of jobs thar should be running in the cluster"
|
||||
value = length(var.workloads)
|
||||
}
|
||||
|
||||
output "allocs_count" {
|
||||
description = "The number of allocs that should be running in the cluster"
|
||||
value = sum([for wl in var.workloads : wl.alloc_count])
|
||||
}
|
||||
25
enos/modules/run_workloads/scripts/wait_for_nomad_api.sh
Normal file
25
enos/modules/run_workloads/scripts/wait_for_nomad_api.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -xeuo pipefail
|
||||
|
||||
TIMEOUT=20
|
||||
INTERVAL=5
|
||||
|
||||
start_time=$(date +%s)
|
||||
|
||||
while ! nomad server members > /dev/null 2>&1; do
|
||||
echo "Waiting for Nomad API..."
|
||||
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
if [ "$elapsed_time" -ge "$TIMEOUT" ]; then
|
||||
echo "Error: Nomad API did not become available within $TIMEOUT seconds."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep "$INTERVAL"
|
||||
done
|
||||
|
||||
echo "Nomad API is available!"
|
||||
43
enos/modules/run_workloads/variables.tf
Normal file
43
enos/modules/run_workloads/variables.tf
Normal file
@@ -0,0 +1,43 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
variable "nomad_addr" {
|
||||
description = "The Nomad API HTTP address."
|
||||
type = string
|
||||
default = "http://localhost:4646"
|
||||
}
|
||||
|
||||
variable "ca_file" {
|
||||
description = "A local file path to a PEM-encoded certificate authority used to verify the remote agent's certificate"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cert_file" {
|
||||
description = "A local file path to a PEM-encoded certificate provided to the remote agent. If this is specified, key_file or key_pem is also required"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "key_file" {
|
||||
description = "A local file path to a PEM-encoded private key. This is required if cert_file or cert_pem is specified."
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "nomad_token" {
|
||||
description = "The Secret ID of an ACL token to make requests with, for ACL-enabled clusters."
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "workloads" {
|
||||
description = "A map of workloads to provision"
|
||||
|
||||
type = map(object({
|
||||
job_spec = string
|
||||
alloc_count = number
|
||||
}))
|
||||
|
||||
default = {
|
||||
service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3 }
|
||||
service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3 }
|
||||
}
|
||||
}
|
||||
35
enos/modules/test_cluster_health/main.tf
Normal file
35
enos/modules/test_cluster_health/main.tf
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
terraform {
|
||||
required_providers {
|
||||
enos = {
|
||||
source = "registry.terraform.io/hashicorp-forge/enos"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
locals {
|
||||
clean_token = trimspace(var.nomad_token) #Somewhere in the process, a newline is added to the token.
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "run_tests" {
|
||||
environment = {
|
||||
NOMAD_ADDR = var.nomad_addr
|
||||
NOMAD_CACERT = var.ca_file
|
||||
NOMAD_CLIENT_CERT = var.cert_file
|
||||
NOMAD_CLIENT_KEY = var.key_file
|
||||
NOMAD_TOKEN = local.clean_token
|
||||
SERVER_COUNT = var.server_count
|
||||
CLIENT_COUNT = var.client_count
|
||||
JOB_COUNT = var.jobs_count
|
||||
ALLOC_COUNT = var.alloc_count
|
||||
}
|
||||
|
||||
scripts = [
|
||||
abspath("${path.module}/scripts/servers.sh"),
|
||||
abspath("${path.module}/scripts/clients.sh"),
|
||||
abspath("${path.module}/scripts/jobs.sh"),
|
||||
abspath("${path.module}/scripts/allocs.sh")
|
||||
]
|
||||
}
|
||||
63
enos/modules/test_cluster_health/scripts/allocs.sh
Executable file
63
enos/modules/test_cluster_health/scripts/allocs.sh
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
error_exit() {
|
||||
printf 'Error: %s' "${1}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running
|
||||
|
||||
allocs=$(nomad alloc status -json)
|
||||
running_allocs=$(echo $allocs | jq '[.[] | select(.ClientStatus == "running")]')
|
||||
allocs_length=$(echo "$running_allocs" | jq 'length' )
|
||||
|
||||
if [ -z "$allocs_length" ]; then
|
||||
error_exit "No allocs found"
|
||||
fi
|
||||
|
||||
if [ "$allocs_length" -ne "$ALLOC_COUNT" ]; then
|
||||
error_exit "Some allocs are not running:\n$(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"
|
||||
fi
|
||||
|
||||
echo "All allocs are running."
|
||||
|
||||
# Quality: nomad_reschedule_alloc: A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled
|
||||
|
||||
MAX_WAIT_TIME=30 # Maximum wait time in seconds
|
||||
POLL_INTERVAL=2 # Interval between status checks
|
||||
|
||||
random_alloc_id=$(echo "$running_allocs" | jq -r ".[$((RANDOM % ($allocs_length + 1)))].ID")
|
||||
echo "about to stop alloc $random_alloc_id"
|
||||
nomad alloc stop -detach "$random_alloc_id" || error_exit "Failed to stop allocation $random_alloc_id."
|
||||
|
||||
echo "Waiting for allocation $random_alloc_id to reach 'complete' status..."
|
||||
elapsed_time=0
|
||||
while alloc_status=$(nomad alloc status -json "$random_alloc_id" | jq -r '.ClientStatus'); [ "$alloc_status" != "complete" ]; do
|
||||
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
||||
echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..."
|
||||
sleep $POLL_INTERVAL
|
||||
elapsed_time=$((elapsed_time + POLL_INTERVAL))
|
||||
done
|
||||
|
||||
echo "Waiting for all the allocations to be running again"
|
||||
elapsed_time=0
|
||||
while new_allocs=$(nomad alloc status -json | jq '[.[] | select(.ClientStatus == "running")]'); [ $(echo "$new_allocs" | jq 'length') != "$ALLOC_COUNT" ]; do
|
||||
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
||||
echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..."
|
||||
sleep $POLL_INTERVAL
|
||||
elapsed_time=$((elapsed_time + POLL_INTERVAL))
|
||||
done
|
||||
|
||||
echo "Alloc successfully restarted"
|
||||
37
enos/modules/test_cluster_health/scripts/clients.sh
Executable file
37
enos/modules/test_cluster_health/scripts/clients.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
error_exit() {
|
||||
printf 'Error: %s' "${1}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Quality: "nomad_CLIENTS_status: A GET call to /v1/nodes returns the correct number of clients and they are all eligible and ready"
|
||||
|
||||
clients=$(nomad node status -json)
|
||||
running_clients=$(echo $clients | jq '[.[] | select(.Status == "ready")]')
|
||||
clients_length=$(echo "$running_clients" | jq 'length' )
|
||||
|
||||
if [ -z "$clients_length" ]; then
|
||||
error_exit "No clients found"
|
||||
fi
|
||||
|
||||
if [ "$clients_length" -ne "$CLIENT_COUNT" ]; then
|
||||
error_exit "Unexpected number of clients are ready: $clients_length\n $(echo $clients | jq '.[] | select(.Status != "ready") | .Name')"
|
||||
|
||||
fi
|
||||
|
||||
echo "$running_clients" | jq -c '.[]' | while read -r node; do
|
||||
status=$(echo "$node" | jq -r '.Status')
|
||||
|
||||
eligibility=$(echo "$node" | jq -r '.SchedulingEligibility')
|
||||
|
||||
if [ "$eligibility" != "eligible" ]; then
|
||||
error_exit "Client not eligible: $(echo "$node" | jq -r '.Name')"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "All CLIENTS are eligible and running."
|
||||
24
enos/modules/test_cluster_health/scripts/jobs.sh
Executable file
24
enos/modules/test_cluster_health/scripts/jobs.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
error_exit() {
|
||||
printf 'Error: %s' "${1}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Quality: nomad_job_status: A GET call to /v1/jobs returns the correct number of jobs and they are all running.
|
||||
|
||||
jobs_length=$(nomad job status| awk '$4 == "running" {count++} END {print count+0}')
|
||||
|
||||
if [ -z "$jobs_length" ]; then
|
||||
error_exit "No jobs found"
|
||||
fi
|
||||
|
||||
if [ "$jobs_length" -ne "$JOB_COUNT" ]; then
|
||||
error_exit "The number of running jobs ($jobs_length) does not match the expected count ($JOB_COUNT)\n$(nomad job status | awk 'NR > 1 && $4 != "running" {print $4}')"
|
||||
fi
|
||||
|
||||
echo "All JOBS are running."
|
||||
30
enos/modules/test_cluster_health/scripts/servers.sh
Executable file
30
enos/modules/test_cluster_health/scripts/servers.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
error_exit() {
|
||||
printf 'Error: %s' "${1}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all alive
|
||||
|
||||
servers=$(nomad server members -json )
|
||||
running_servers=$(echo $servers | jq '[.[] | select(.Status == "alive")]')
|
||||
servers_length=$(echo "$running_servers" | jq 'length' )
|
||||
|
||||
if [ -z "$servers_length" ]; then
|
||||
error_exit "No servers found"
|
||||
fi
|
||||
|
||||
if [ "$servers_length" -ne "$SERVER_COUNT" ]; then
|
||||
error_exit "Unexpected number of servers are alive: $servers_length\n$(echo $servers | jq '.[] | select(.Status != "alive") | .Name')"
|
||||
fi
|
||||
|
||||
if [ $(echo "$running_servers" | jq -r "map(.last_log_index ) | unique | length == 1") != "true" ]; then
|
||||
error_exit "Servers not up to date"
|
||||
fi
|
||||
|
||||
echo "All SERVERS are alive and up to date."
|
||||
47
enos/modules/test_cluster_health/variables.tf
Normal file
47
enos/modules/test_cluster_health/variables.tf
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
variable "nomad_addr" {
|
||||
description = "The Nomad API HTTP address."
|
||||
type = string
|
||||
default = "http://localhost:4646"
|
||||
}
|
||||
|
||||
variable "ca_file" {
|
||||
description = "A local file path to a PEM-encoded certificate authority used to verify the remote agent's certificate"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "cert_file" {
|
||||
description = "A local file path to a PEM-encoded certificate provided to the remote agent. If this is specified, key_file or key_pem is also required"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "key_file" {
|
||||
description = "A local file path to a PEM-encoded private key. This is required if cert_file or cert_pem is specified."
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "nomad_token" {
|
||||
description = "The Secret ID of an ACL token to make requests with, for ACL-enabled clusters."
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "server_count" {
|
||||
description = "The expected number of servers."
|
||||
type = number
|
||||
}
|
||||
|
||||
variable "client_count" {
|
||||
description = "The expected number of Ubuntu clients."
|
||||
type = number
|
||||
}
|
||||
|
||||
variable "jobs_count" {
|
||||
description = "The number of jobs that should be running in the cluster"
|
||||
type = number
|
||||
}
|
||||
|
||||
variable "alloc_count" {
|
||||
description = "Number of allocation that should be running in the cluster"
|
||||
}
|
||||
Reference in New Issue
Block a user