diff --git a/enos/enos-quality.hcl b/enos/enos-quality.hcl index 5f423045e..fae9c85d3 100644 --- a/enos/enos-quality.hcl +++ b/enos/enos-quality.hcl @@ -41,7 +41,7 @@ quality "nomad_allocs_status" { description = "A GET call to /v1/allocs returns the correct number of allocations and they are all running" } -quality "nomad_alloc_reconect" { - description = "A GET call to /v1/alloc/:alloc_id will return the same alloc.CreateTime for each allocation before and after a client upgrade" +quality "nomad_alloc_reconnect" { + description = "A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client" } diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl index 0621bc7c0..d9fcb205f 100644 --- a/enos/enos-scenario-upgrade.hcl +++ b/enos/enos-scenario-upgrade.hcl @@ -371,7 +371,8 @@ scenario "upgrade" { verifies = [ quality.nomad_nodes_status, quality.nomad_job_status, - quality.nomad_node_metadata + quality.nomad_node_metadata, + quality.nomad_alloc_reconnect ] variables { @@ -412,7 +413,8 @@ scenario "upgrade" { verifies = [ quality.nomad_nodes_status, quality.nomad_job_status, - quality.nomad_node_metadata + quality.nomad_node_metadata, + quality.nomad_alloc_reconnect ] variables { @@ -473,7 +475,8 @@ scenario "upgrade" { verifies = [ quality.nomad_nodes_status, quality.nomad_job_status, - quality.nomad_node_metadata + quality.nomad_node_metadata, + quality.nomad_alloc_reconnect ] variables { @@ -514,7 +517,8 @@ scenario "upgrade" { verifies = [ quality.nomad_nodes_status, quality.nomad_job_status, - quality.nomad_node_metadata + quality.nomad_node_metadata, + quality.nomad_alloc_reconnect ] variables { diff --git a/enos/modules/upgrade_client/main.tf b/enos/modules/upgrade_client/main.tf index 050a999ce..5da47ec0b 100644 --- a/enos/modules/upgrade_client/main.tf +++ b/enos/modules/upgrade_client/main.tf @@ -51,8 +51,25 @@ resource "enos_local_exec" "set_metadata" { scripts = [abspath("${path.module}/scripts/set_metadata.sh")] } +resource "enos_local_exec" "get_alloc_ids" { + + environment = merge( + local.nomad_env, + { + CLIENT_IP = var.client + } + ) + + inline = [ + "nomad alloc status -json | jq -r --arg NODE_ID \"$(nomad node status -allocs -address https://$CLIENT_IP:4646 -self -json | jq -r '.ID')\" '[.[] | select(.ClientStatus == \"running\" and .NodeID == $NODE_ID) | .ID] | join(\" \")'" + ] +} + module "upgrade_client" { - depends_on = [enos_local_exec.set_metadata] + depends_on = [ + enos_local_exec.set_metadata, + enos_local_exec.get_alloc_ids, + ] source = "../upgrade_instance" @@ -83,3 +100,16 @@ resource "enos_local_exec" "verify_metadata" { scripts = [abspath("${path.module}/scripts/verify_metadata.sh")] } + +resource "enos_local_exec" "verify_allocs" { + depends_on = [enos_local_exec.wait_for_nomad_api_post_update] + + environment = merge( + local.nomad_env, + { + CLIENT_IP = var.client + ALLOCS = enos_local_exec.get_alloc_ids.stdout + }) + + scripts = [abspath("${path.module}/scripts/verify_allocs.sh")] +} diff --git a/enos/modules/upgrade_client/scripts/verify_allocs.sh b/enos/modules/upgrade_client/scripts/verify_allocs.sh new file mode 100644 index 000000000..4a17bdccb --- /dev/null +++ b/enos/modules/upgrade_client/scripts/verify_allocs.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +error_exit() { + printf 'Error: %s' "${1}" + exit 1 +} + +MAX_WAIT_TIME=60 # Maximum wait time in seconds +POLL_INTERVAL=2 # Interval between status checks + +elapsed_time=0 +last_error= +client_id= + +checkClientReady() { + local client client_status + echo "Checking client health for $CLIENT_IP" + + client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json) || { + last_error="Unable to get info for node at $CLIENT_IP" + return 1 + } + client_status=$(echo "$client" | jq -r '.Status') + if [ "$client_status" == "ready" ]; then + client_id=$(echo "$client" | jq '.ID' | tr -d '"') + last_error= + return 0 + fi + + last_error="Node at $CLIENT_IP is ${client_status}, not ready" + return 1 +} + +while true; do + checkClientReady && break + if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then + error_exit "$last_error within $elapsed_time seconds." + fi + + echo "$last_error within $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..." + sleep "$POLL_INTERVAL" + elapsed_time=$((elapsed_time + POLL_INTERVAL)) +done + +echo "Client $client_id at $CLIENT_IP is ready" + +# Quality: "nomad_alloc_reconect: A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client" +echo "Reading allocs for client at $CLIENT_IP" + +current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id) | .ID] | join(" ")') +if [ -z "$current_allocs" ]; then + error_exit "Failed to read allocs for node: $client_id" +fi + +IFS=' ' read -r -a INPUT_ARRAY <<< "${ALLOCS[*]}" +IFS=' ' read -r -a RUNNING_ARRAY <<< "$current_allocs" + +sorted_input=($(printf "%s\n" "${INPUT_ARRAY[@]}" | sort)) +sorted_running=($(printf "%s\n" "${RUNNING_ARRAY[@]}" | sort)) + +if [[ "${sorted_input[*]}" != "${sorted_running[*]}" ]]; then + error_exi "Different allocs found, expected: ${sorted_input[*]} found: ${sorted_running[*]}" +fi + +echo "All allocs reattached correctly for node at $CLIENT_IP" diff --git a/enos/modules/upgrade_client/scripts/verify_metadata.sh b/enos/modules/upgrade_client/scripts/verify_metadata.sh index 3ab78a91a..7a18bd1aa 100755 --- a/enos/modules/upgrade_client/scripts/verify_metadata.sh +++ b/enos/modules/upgrade_client/scripts/verify_metadata.sh @@ -39,7 +39,6 @@ while true; do checkClientReady && break if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then error_exit "$last_error within $elapsed_time seconds." - exit 1 fi echo "$last_error within $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..." @@ -52,20 +51,17 @@ echo "Client $client_id at $CLIENT_IP is ready" # Quality: "nomad_node_metadata: A GET call to /v1/node/:node-id returns the same node.Meta for each node before and after a node upgrade" echo "Reading metadata for client at $CLIENT_IP" if ! client_meta=$(nomad node meta read -json -node-id "$client_id"); then - echo "Failed to read metadata for node: $client_id" - exit 1 + error_exit "Failed to read metadata for node: $client_id" fi meta_node_ip=$(echo "$client_meta" | jq -r '.Dynamic.node_ip' ) if [ "$meta_node_ip" != "$CLIENT_IP" ]; then - echo "Wrong value returned for node_ip: $meta_node_ip" - exit 1 + error_exit "Wrong value returned for node_ip: $meta_node_ip" fi meta_nomad_addr=$(echo "$client_meta" | jq -r '.Dynamic.nomad_addr' ) if [ "$meta_nomad_addr" != "$NOMAD_ADDR" ]; then - echo "Wrong value returned for nomad_addr: $meta_nomad_addr" - exit 1 + error_exit "Wrong value returned for nomad_addr: $meta_nomad_addr" fi echo "Metadata correct in $client_id at $CLIENT_IP"