From 8c95f5f17ee90a8b7a23b5ca75bdf1d831bcddcc Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 24 Feb 2025 09:37:17 -0500 Subject: [PATCH] upgrade testing: make sure we capture last error if not exiting (#25186) While testing #25172 I found a few spots where #25152 wasn't capturing the errors from transient failures correctly or exiting early instead of retrying. Ref: https://hashicorp.atlassian.net/browse/NET-11546 --- enos/modules/test_cluster_health/scripts/servers.sh | 5 ++++- enos/modules/upgrade_clients/scripts/verify_metadata.sh | 7 ++++--- .../upgrade_servers/scripts/wait_for_stable_cluster.sh | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/enos/modules/test_cluster_health/scripts/servers.sh b/enos/modules/test_cluster_health/scripts/servers.sh index 39d695389..7c5bcc7ba 100755 --- a/enos/modules/test_cluster_health/scripts/servers.sh +++ b/enos/modules/test_cluster_health/scripts/servers.sh @@ -21,7 +21,10 @@ leader_last_term= checkAutopilotHealth() { local autopilotHealth servers_healthy leader - autopilotHealth=$(nomad operator autopilot health -json) || return 1 + autopilotHealth=$(nomad operator autopilot health -json) || { + last_error="Could not read autopilot health" + return 1 + } servers_healthy=$(echo "$autopilotHealth" | jq -r '[.Servers[] | select(.Healthy == true) | .ID] | length') diff --git a/enos/modules/upgrade_clients/scripts/verify_metadata.sh b/enos/modules/upgrade_clients/scripts/verify_metadata.sh index 898718b69..ab992e5e8 100755 --- a/enos/modules/upgrade_clients/scripts/verify_metadata.sh +++ b/enos/modules/upgrade_clients/scripts/verify_metadata.sh @@ -20,9 +20,10 @@ checkClientReady() { local client client_status echo "Checking client health for $CLIENT_IP" - client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json) || - error_exit "Unable to get info for node at $CLIENT_IP" - + client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json) || { + last_error="Unable to get info for node at $CLIENT_IP" + return 1 + } client_status=$(echo "$client" | jq -r '.Status') if [ "$client_status" == "ready" ]; then client_id=$(echo "$client" | jq '.ID' | tr -d '"') diff --git a/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh b/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh index 0cd4f35c9..3343ab597 100755 --- a/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh +++ b/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh @@ -19,9 +19,11 @@ leader_last_term= checkAutopilotHealth() { local autopilotHealth leader - autopilotHealth=$(nomad operator autopilot health -json) || return 1 + autopilotHealth=$(nomad operator autopilot health -json) || { + last_error="Could not read autopilot health" + return 1 + } leader=$(echo "$autopilotHealth" | jq -r '[.Servers[] | select(.Leader == true)]') - if [ "$(echo "$leader" | jq 'length')" -eq 1 ]; then leader_last_index=$(echo "$leader" | jq -r '.[0].LastIndex') leader_last_term=$(echo "$leader" | jq -r '.[0].LastTerm')