Check for allocs running before checking for IDs after a client upgrade (#25790)

* fix: wait for all allocs to be running before checking for their IDs after client upgrade

* style: linter fix

* fix: filter running allocs per client ID when checking for allocs after upgrade
This commit is contained in:
Juana De La Cuesta
2025-05-06 16:22:45 +02:00
committed by GitHub
parent 242ee16c81
commit 22921418b6
2 changed files with 36 additions and 5 deletions

View File

@@ -61,8 +61,7 @@ resource "enos_local_exec" "get_alloc_info" {
)
inline = [
"nomad alloc status -json | jq -r --arg NODE_ID \"$(nomad node status -allocs -address https://$CLIENT_IP:4646 -self -json | jq -r '.ID')\" '[ .[] | select(.ClientStatus == \"running\" and .NodeID == $NODE_ID) | {ID: .ID, Name: .Name, ClientStatus: .ClientStatus}]'"
]
"nomad alloc status -json | jq -r --arg NODE_ID \"$(nomad node status -allocs -address https://$CLIENT_IP:4646 -self -json | jq -r '.ID')\" '[ .[] | select(.NodeID == $NODE_ID) | {ID: .ID, Name: .Name, ClientStatus: .ClientStatus, TaskStates: .TaskStates}]'"]
}
module "upgrade_client" {

View File

@@ -48,17 +48,49 @@ done
echo "Client $client_id at $CLIENT_IP is ready"
# Quality: "nomad_alloc_reconect: A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client"
echo "Allocs found before upgrade $ALLOCS"
allocs_count=$($ALLOCS |jq '[ .[] | select(.ClientStatus == "running")] | length')
# Quality: "nomad_alloc_reconnect: A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client"
echo "$allocs_count allocs found before upgrade $ALLOCS"
checkAllocsCount() {
local allocs
running_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id)]') \
|| error_exit "Failed to check alloc status"
allocs_length=$(echo "$running_allocs" | jq 'length') \
|| error_exit "Invalid alloc status -json output"
if [ "$allocs_length" -eq "$allocs_count" ]; then
return 0
fi
return 1
}
echo "Reading allocs for client at $CLIENT_IP"
elapsed_time=0
while true; do
checkAllocsCount && break
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
error_exit "Some allocs are not running: $(nomad alloc status -json | jq -r '.[] | "\(.ID) \(.Name) \(.ClientStatus)"')"
fi
echo "Running allocs: $allocs_length, expected $allocs_count. Waiting for $elapsed_time Retrying in $POLL_INTERVAL seconds..."
sleep $POLL_INTERVAL
elapsed_time=$((elapsed_time + POLL_INTERVAL))
done
echo "Correct number of allocs found running: $allocs_length"
current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id) | .ID] | join(" ")')
if [ -z "$current_allocs" ]; then
error_exit "Failed to read allocs for node: $client_id"
fi
IDs=$(echo $ALLOCS | jq -r '[.[].ID] | join(" ")')
IDs=$($ALLOCS |jq '[ .[] | select(.ClientStatus == \"running\")] | [.[].ID] | join(" ")')
IFS=' ' read -r -a INPUT_ARRAY <<< "${IDs[*]}"
IFS=' ' read -r -a RUNNING_ARRAY <<< "$current_allocs"