Files
nomad/enos/enos-scenario-upgrade.hcl
Juana De La Cuesta 3861c40220 func: add initial enos skeleton (#24787)
* func: add initial enos skeleton

* style: add headers

* func: change the variables input to a map of objects to simplify the workloads creation

* style: formating

* Add tests for servers and clients

* style: separate the tests in diferent scripts

* style: add missing headers

* func: add tests for allocs

* style: improve output

* func: add step to copy remote upgrade version

* style: hcl formatting

* fix: remove the terraform nomad provider

* fix: Add clean token to remove extra new line added in provision

* fix: Add clean token to remove extra new line added in provision

* fix: Add clean token to remove extra new line added in provision

* fix: add missing license headers

* style: hcl fmt

* style: rename variables and fix format

* func: remove the template step on the workloads module and chop the noamd token output on the provide module

* fix: correct the jobspec path on the workloads module

* fix: add missing variable definitions on job specs for workloads

* style: formatting

* fix: rename variable in health test
2025-01-30 16:37:55 +01:00

285 lines
8.2 KiB
HCL

# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
scenario "upgrade" {
description = <<-EOF
The upgrade scenario verifies in-place upgrades between previously released versions of Nomad
against another candidate build.
EOF
matrix {
arch = ["amd64"]
edition = ["ce"]
os = ["linux"]
//service_discovery = ["consul", "nomad"]
//arch = ["amd64", "arm64"]
//edition = ["ce", "ent"]
//os = ["linux", "windows"]
exclude {
os = ["windows"]
arch = ["arm64"]
}
}
providers = [
provider.aws.default,
]
locals {
cluster_name = "mcj-${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}"
linux_count = matrix.os == "linux" ? "4" : "0"
windows_count = matrix.os == "windows" ? "4" : "0"
arch = matrix.arch
}
step "copy_initial_binary" {
description = <<-EOF
Determine which Nomad artifact we want to use for the scenario, depending on the
'arch', 'edition' and 'os' and bring it from the artifactory to a local instance.
EOF
module = module.build_artifactory
variables {
artifactory_username = var.artifactory_username
artifactory_token = var.artifactory_token
arch = local.arch
edition = matrix.edition
product_version = var.product_version
os = matrix.os
binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}"
}
}
step "provision_cluster" {
depends_on = [step.copy_initial_binary]
description = <<-EOF
Using the binary from the previous step, provision a Nomad cluster using the e2e
EOF
module = module.provision_cluster
variables {
name = local.cluster_name
nomad_local_binary = step.copy_initial_binary.nomad_local_binary
server_count = var.server_count
client_count_linux = local.linux_count
client_count_windows_2016 = local.windows_count
nomad_license = var.nomad_license
consul_license = var.consul_license
volumes = false
region = var.aws_region
instance_arch = matrix.arch
}
}
step "run_initial_workloads" {
depends_on = [step.provision_cluster]
description = <<-EOF
Verify the health of the cluster by running new workloads
EOF
module = module.run_workloads
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
}
verifies = [
quality.nomad_register_job,
]
}
step "initial_test_cluster_health" {
depends_on = [step.run_initial_workloads]
description = <<-EOF
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
EOF
module = module.test_cluster_health
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
server_count = var.server_count
client_count = local.linux_count + local.windows_count
jobs_count = step.run_initial_workloads.jobs_count
alloc_count = step.run_initial_workloads.allocs_count
}
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
quality.nomad_nodes_status,
quality.nomad_job_status,
quality.nomad_allocs_status,
quality.nomad_reschedule_alloc,
]
}
step "copy_upgrade_binary" {
depends_on = [step.provision_cluster]
description = <<-EOF
Bring the new upgraded binary from the artifactory
EOF
module = module.build_artifactory
variables {
artifactory_username = var.artifactory_username
artifactory_token = var.artifactory_token
arch = local.arch
edition = matrix.edition
product_version = var.upgrade_version
os = matrix.os
binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.upgrade_version}"
}
}
/*
step "upgrade_servers" {
description = <<-EOF
Upgrade the cluster's servers by invoking nomad-cc ...
EOF
module = module.run_cc_nomad
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
nomad_restore_snapshot
]
variables {
cc_update_type = "server"
nomad_upgraded_binary = step.copy_initial_binary.nomad_local_binary
// ...
}
}
step "run_servers_workloads" {
// ...
}
step "server_upgrade_test_cluster_health" {
depends_on = [step.run_initial_workloads]
description = <<-EOF
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
EOF
module = module.test_cluster_health
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
server_count = var.server_count
client_count = local.linux_count + local.windows_count
jobs_count = step.run_initial_workloads.jobs_count
alloc_count = step.run_initial_workloads.allocs_count
}
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
quality.nomad_nodes_status,
quality.nomad_job_status,
quality.nomad_allocs_status,
quality.nomad_reschedule_alloc,
]
}
step "upgrade_client" {
description = <<-EOF
Upgrade the cluster's clients by invoking nomad-cc ...
EOF
module = module.run_cc_nomad
verifies = [
quality.nomad_nodes_status,
quality.nomad_job_status
]
variables {
cc_update_type = "client"
nomad_upgraded_binary = step.copy_initial_binary.nomad_local_binary
// ...
}
}
step "run_clients_workloads" {
// ...
}
step "client_upgrade_test_cluster_health" {
depends_on = [step.run_initial_workloads]
description = <<-EOF
Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules"
EOF
module = module.test_cluster_health
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
server_count = var.server_count
client_count = local.linux_count + local.windows_count
jobs_count = step.run_initial_workloads.jobs_count
alloc_count = step.run_initial_workloads.allocs_count
}
verifies = [
quality.nomad_agent_info,
quality.nomad_agent_info_self,
quality.nomad_nodes_status,
quality.nomad_job_status,
quality.nomad_allocs_status,
quality.nomad_reschedule_alloc,
]
}
*/
output "servers" {
value = step.provision_cluster.servers
}
output "linux_clients" {
value = step.provision_cluster.linux_clients
}
output "windows_clients" {
value = step.provision_cluster.windows_clients
}
output "message" {
value = step.provision_cluster.message
}
output "nomad_addr" {
value = step.provision_cluster.nomad_addr
}
output "ca_file" {
value = step.provision_cluster.ca_file
}
output "cert_file" {
value = step.provision_cluster.cert_file
}
output "key_file" {
value = step.provision_cluster.key_file
}
output "nomad_token" {
value = step.provision_cluster.nomad_token
sensitive = true
}
}