mirror of
https://github.com/kemko/nomad.git
synced 2026-01-04 17:35:43 +03:00
107 lines
3.6 KiB
Bash
Executable File
107 lines
3.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Copyright (c) HashiCorp, Inc.
|
|
# SPDX-License-Identifier: BUSL-1.1
|
|
|
|
set -euo pipefail
|
|
|
|
error_exit() {
|
|
printf 'Error: %s' "${1}"
|
|
exit 1
|
|
}
|
|
|
|
MAX_WAIT_TIME=60 # Maximum wait time in seconds
|
|
POLL_INTERVAL=2 # Interval between status checks
|
|
|
|
elapsed_time=0
|
|
last_error=
|
|
client_id=
|
|
|
|
checkClientReady() {
|
|
local client client_status
|
|
echo "Checking client health for $CLIENT_IP"
|
|
|
|
client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json) || {
|
|
last_error="Unable to get info for node at $CLIENT_IP"
|
|
return 1
|
|
}
|
|
client_status=$(echo "$client" | jq -r '.Status')
|
|
if [ "$client_status" == "ready" ]; then
|
|
client_id=$(echo "$client" | jq '.ID' | tr -d '"')
|
|
last_error=
|
|
return 0
|
|
fi
|
|
|
|
last_error="Node at $CLIENT_IP is ${client_status}, not ready"
|
|
return 1
|
|
}
|
|
|
|
while true; do
|
|
checkClientReady && break
|
|
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
|
error_exit "$last_error within $elapsed_time seconds."
|
|
fi
|
|
|
|
echo "$last_error within $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
|
|
sleep "$POLL_INTERVAL"
|
|
elapsed_time=$((elapsed_time + POLL_INTERVAL))
|
|
done
|
|
|
|
echo "Client $client_id at $CLIENT_IP is ready"
|
|
|
|
allocs_count=$(echo $ALLOCS |jq '[ .[] | select(.ClientStatus == "running")] | length')
|
|
echo "$allocs_count allocs found before upgrade $ALLOCS"
|
|
|
|
# Quality: "nomad_alloc_reconnect: A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client"
|
|
|
|
checkAllocsCount() {
|
|
local allocs
|
|
running_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id)]') \
|
|
|| error_exit "Failed to check alloc status"
|
|
allocs_length=$(echo "$running_allocs" | jq 'length') \
|
|
|| error_exit "Invalid alloc status -json output"
|
|
|
|
if [ "$allocs_length" -eq "$allocs_count" ]; then
|
|
return 0
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
echo "Reading allocs for client at $CLIENT_IP"
|
|
|
|
elapsed_time=0
|
|
while true; do
|
|
checkAllocsCount && break
|
|
|
|
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
|
error_exit "Some allocs are not running: $(nomad alloc status -json | jq -r '.[] | "\(.ID) \(.Name) \(.ClientStatus)"')"
|
|
fi
|
|
|
|
echo "Running allocs: $allocs_length, expected $allocs_count. Waiting for $elapsed_time Retrying in $POLL_INTERVAL seconds..."
|
|
sleep $POLL_INTERVAL
|
|
elapsed_time=$((elapsed_time + POLL_INTERVAL))
|
|
|
|
done
|
|
|
|
echo "Correct number of allocs found running: $allocs_length"
|
|
|
|
current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id) | .ID] | join(" ")')
|
|
if [ -z "$current_allocs" ]; then
|
|
error_exit "Failed to read allocs for node: $client_id"
|
|
fi
|
|
|
|
IDs=$(echo $ALLOCS | jq '[ .[] | select(.ClientStatus == "running")] | [.[].ID] | join(" ")')
|
|
|
|
IFS=' ' read -r -a INPUT_ARRAY <<< "${IDs[*]}"
|
|
IFS=' ' read -r -a RUNNING_ARRAY <<< "$current_allocs"
|
|
|
|
sorted_input=($(printf "%s\n" "${INPUT_ARRAY[@]}" | sort))
|
|
sorted_running=($(printf "%s\n" "${RUNNING_ARRAY[@]}" | sort))
|
|
|
|
if [[ "${sorted_input[*]}" != "${sorted_running[*]}" ]]; then
|
|
full_current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.NodeID == $client_id) | { ID: .ID, Name: .Name, ClientStatus: .ClientStatus}]')
|
|
error_exit "Different allocs found, expected: ${sorted_input[*]} found: ${sorted_running[*]}. Current allocs info: $full_current_allocs"
|
|
fi
|
|
|
|
echo "All allocs reattached correctly for node at $CLIENT_IP"
|