mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
Merge pull request #25479 from hashicorp/NET-11546-enos-same-allocs
Add a test for re attaching allocs after client restart
This commit is contained in:
@@ -41,7 +41,7 @@ quality "nomad_allocs_status" {
|
||||
description = "A GET call to /v1/allocs returns the correct number of allocations and they are all running"
|
||||
}
|
||||
|
||||
quality "nomad_alloc_reconect" {
|
||||
description = "A GET call to /v1/alloc/:alloc_id will return the same alloc.CreateTime for each allocation before and after a client upgrade"
|
||||
quality "nomad_alloc_reconnect" {
|
||||
description = "A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client"
|
||||
}
|
||||
|
||||
|
||||
@@ -371,7 +371,8 @@ scenario "upgrade" {
|
||||
verifies = [
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_node_metadata
|
||||
quality.nomad_node_metadata,
|
||||
quality.nomad_alloc_reconnect
|
||||
]
|
||||
|
||||
variables {
|
||||
@@ -412,7 +413,8 @@ scenario "upgrade" {
|
||||
verifies = [
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_node_metadata
|
||||
quality.nomad_node_metadata,
|
||||
quality.nomad_alloc_reconnect
|
||||
]
|
||||
|
||||
variables {
|
||||
@@ -473,7 +475,8 @@ scenario "upgrade" {
|
||||
verifies = [
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_node_metadata
|
||||
quality.nomad_node_metadata,
|
||||
quality.nomad_alloc_reconnect
|
||||
]
|
||||
|
||||
variables {
|
||||
@@ -514,7 +517,8 @@ scenario "upgrade" {
|
||||
verifies = [
|
||||
quality.nomad_nodes_status,
|
||||
quality.nomad_job_status,
|
||||
quality.nomad_node_metadata
|
||||
quality.nomad_node_metadata,
|
||||
quality.nomad_alloc_reconnect
|
||||
]
|
||||
|
||||
variables {
|
||||
|
||||
@@ -51,8 +51,25 @@ resource "enos_local_exec" "set_metadata" {
|
||||
scripts = [abspath("${path.module}/scripts/set_metadata.sh")]
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "get_alloc_ids" {
|
||||
|
||||
environment = merge(
|
||||
local.nomad_env,
|
||||
{
|
||||
CLIENT_IP = var.client
|
||||
}
|
||||
)
|
||||
|
||||
inline = [
|
||||
"nomad alloc status -json | jq -r --arg NODE_ID \"$(nomad node status -allocs -address https://$CLIENT_IP:4646 -self -json | jq -r '.ID')\" '[.[] | select(.ClientStatus == \"running\" and .NodeID == $NODE_ID) | .ID] | join(\" \")'"
|
||||
]
|
||||
}
|
||||
|
||||
module "upgrade_client" {
|
||||
depends_on = [enos_local_exec.set_metadata]
|
||||
depends_on = [
|
||||
enos_local_exec.set_metadata,
|
||||
enos_local_exec.get_alloc_ids,
|
||||
]
|
||||
|
||||
source = "../upgrade_instance"
|
||||
|
||||
@@ -83,3 +100,16 @@ resource "enos_local_exec" "verify_metadata" {
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/verify_metadata.sh")]
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "verify_allocs" {
|
||||
depends_on = [enos_local_exec.wait_for_nomad_api_post_update]
|
||||
|
||||
environment = merge(
|
||||
local.nomad_env,
|
||||
{
|
||||
CLIENT_IP = var.client
|
||||
ALLOCS = enos_local_exec.get_alloc_ids.stdout
|
||||
})
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/verify_allocs.sh")]
|
||||
}
|
||||
|
||||
69
enos/modules/upgrade_client/scripts/verify_allocs.sh
Normal file
69
enos/modules/upgrade_client/scripts/verify_allocs.sh
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
error_exit() {
|
||||
printf 'Error: %s' "${1}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
MAX_WAIT_TIME=60 # Maximum wait time in seconds
|
||||
POLL_INTERVAL=2 # Interval between status checks
|
||||
|
||||
elapsed_time=0
|
||||
last_error=
|
||||
client_id=
|
||||
|
||||
checkClientReady() {
|
||||
local client client_status
|
||||
echo "Checking client health for $CLIENT_IP"
|
||||
|
||||
client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json) || {
|
||||
last_error="Unable to get info for node at $CLIENT_IP"
|
||||
return 1
|
||||
}
|
||||
client_status=$(echo "$client" | jq -r '.Status')
|
||||
if [ "$client_status" == "ready" ]; then
|
||||
client_id=$(echo "$client" | jq '.ID' | tr -d '"')
|
||||
last_error=
|
||||
return 0
|
||||
fi
|
||||
|
||||
last_error="Node at $CLIENT_IP is ${client_status}, not ready"
|
||||
return 1
|
||||
}
|
||||
|
||||
while true; do
|
||||
checkClientReady && break
|
||||
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
||||
error_exit "$last_error within $elapsed_time seconds."
|
||||
fi
|
||||
|
||||
echo "$last_error within $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
|
||||
sleep "$POLL_INTERVAL"
|
||||
elapsed_time=$((elapsed_time + POLL_INTERVAL))
|
||||
done
|
||||
|
||||
echo "Client $client_id at $CLIENT_IP is ready"
|
||||
|
||||
# Quality: "nomad_alloc_reconect: A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client"
|
||||
echo "Reading allocs for client at $CLIENT_IP"
|
||||
|
||||
current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id) | .ID] | join(" ")')
|
||||
if [ -z "$current_allocs" ]; then
|
||||
error_exit "Failed to read allocs for node: $client_id"
|
||||
fi
|
||||
|
||||
IFS=' ' read -r -a INPUT_ARRAY <<< "${ALLOCS[*]}"
|
||||
IFS=' ' read -r -a RUNNING_ARRAY <<< "$current_allocs"
|
||||
|
||||
sorted_input=($(printf "%s\n" "${INPUT_ARRAY[@]}" | sort))
|
||||
sorted_running=($(printf "%s\n" "${RUNNING_ARRAY[@]}" | sort))
|
||||
|
||||
if [[ "${sorted_input[*]}" != "${sorted_running[*]}" ]]; then
|
||||
error_exi "Different allocs found, expected: ${sorted_input[*]} found: ${sorted_running[*]}"
|
||||
fi
|
||||
|
||||
echo "All allocs reattached correctly for node at $CLIENT_IP"
|
||||
@@ -39,7 +39,6 @@ while true; do
|
||||
checkClientReady && break
|
||||
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
||||
error_exit "$last_error within $elapsed_time seconds."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$last_error within $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
|
||||
@@ -52,20 +51,17 @@ echo "Client $client_id at $CLIENT_IP is ready"
|
||||
# Quality: "nomad_node_metadata: A GET call to /v1/node/:node-id returns the same node.Meta for each node before and after a node upgrade"
|
||||
echo "Reading metadata for client at $CLIENT_IP"
|
||||
if ! client_meta=$(nomad node meta read -json -node-id "$client_id"); then
|
||||
echo "Failed to read metadata for node: $client_id"
|
||||
exit 1
|
||||
error_exit "Failed to read metadata for node: $client_id"
|
||||
fi
|
||||
|
||||
meta_node_ip=$(echo "$client_meta" | jq -r '.Dynamic.node_ip' )
|
||||
if [ "$meta_node_ip" != "$CLIENT_IP" ]; then
|
||||
echo "Wrong value returned for node_ip: $meta_node_ip"
|
||||
exit 1
|
||||
error_exit "Wrong value returned for node_ip: $meta_node_ip"
|
||||
fi
|
||||
|
||||
meta_nomad_addr=$(echo "$client_meta" | jq -r '.Dynamic.nomad_addr' )
|
||||
if [ "$meta_nomad_addr" != "$NOMAD_ADDR" ]; then
|
||||
echo "Wrong value returned for nomad_addr: $meta_nomad_addr"
|
||||
exit 1
|
||||
error_exit "Wrong value returned for nomad_addr: $meta_nomad_addr"
|
||||
fi
|
||||
|
||||
echo "Metadata correct in $client_id at $CLIENT_IP"
|
||||
|
||||
Reference in New Issue
Block a user