mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
func: expand on some logs to get more info in case of a failure
This commit is contained in:
@@ -38,7 +38,7 @@ while true; do
|
||||
checkAllocsCount && break
|
||||
|
||||
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
|
||||
error_exit "Some allocs are not running:\n$(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"
|
||||
error_exit "Some allocs are not running: $(nomad alloc status -json | jq -r '.[] | "\(.ID) \(.Name) \(.ClientStatus)"')"
|
||||
fi
|
||||
|
||||
echo "Running allocs: $allocs_length, expected $ALLOC_COUNT. Waiting for $elapsed_time Retrying in $POLL_INTERVAL seconds..."
|
||||
|
||||
@@ -51,7 +51,7 @@ resource "enos_local_exec" "set_metadata" {
|
||||
scripts = [abspath("${path.module}/scripts/set_metadata.sh")]
|
||||
}
|
||||
|
||||
resource "enos_local_exec" "get_alloc_ids" {
|
||||
resource "enos_local_exec" "get_alloc_info" {
|
||||
|
||||
environment = merge(
|
||||
local.nomad_env,
|
||||
@@ -61,14 +61,14 @@ resource "enos_local_exec" "get_alloc_ids" {
|
||||
)
|
||||
|
||||
inline = [
|
||||
"nomad alloc status -json | jq -r --arg NODE_ID \"$(nomad node status -allocs -address https://$CLIENT_IP:4646 -self -json | jq -r '.ID')\" '[.[] | select(.ClientStatus == \"running\" and .NodeID == $NODE_ID) | .ID] | join(\" \")'"
|
||||
"nomad alloc status -json | jq -r --arg NODE_ID \"$(nomad node status -allocs -address https://$CLIENT_IP:4646 -self -json | jq -r '.ID')\" '[ .[] | select(.ClientStatus == \"running\" and .NodeID == $NODE_ID) | {ID: .ID, Name: .Name, ClientStatus: .ClientStatus}]'"
|
||||
]
|
||||
}
|
||||
|
||||
module "upgrade_client" {
|
||||
depends_on = [
|
||||
enos_local_exec.set_metadata,
|
||||
enos_local_exec.get_alloc_ids,
|
||||
enos_local_exec.get_alloc_info,
|
||||
]
|
||||
|
||||
source = "../upgrade_instance"
|
||||
@@ -108,7 +108,7 @@ resource "enos_local_exec" "verify_allocs" {
|
||||
local.nomad_env,
|
||||
{
|
||||
CLIENT_IP = var.client
|
||||
ALLOCS = enos_local_exec.get_alloc_ids.stdout
|
||||
ALLOCS = enos_local_exec.get_alloc_info.stdout
|
||||
})
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/verify_allocs.sh")]
|
||||
|
||||
9
enos/modules/upgrade_client/scripts/verify_allocs.sh
Normal file → Executable file
9
enos/modules/upgrade_client/scripts/verify_allocs.sh
Normal file → Executable file
@@ -49,6 +49,8 @@ done
|
||||
echo "Client $client_id at $CLIENT_IP is ready"
|
||||
|
||||
# Quality: "nomad_alloc_reconect: A GET call to /v1/allocs will return the same IDs for running allocs before and after a client upgrade on each client"
|
||||
echo "Allocs found before upgrade $ALLOCS"
|
||||
|
||||
echo "Reading allocs for client at $CLIENT_IP"
|
||||
|
||||
current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.ClientStatus == "running" and .NodeID == $client_id) | .ID] | join(" ")')
|
||||
@@ -56,14 +58,17 @@ if [ -z "$current_allocs" ]; then
|
||||
error_exit "Failed to read allocs for node: $client_id"
|
||||
fi
|
||||
|
||||
IFS=' ' read -r -a INPUT_ARRAY <<< "${ALLOCS[*]}"
|
||||
IDs=$(echo $ALLOCS | jq -r '[.[].ID] | join(" ")')
|
||||
|
||||
IFS=' ' read -r -a INPUT_ARRAY <<< "${IDs[*]}"
|
||||
IFS=' ' read -r -a RUNNING_ARRAY <<< "$current_allocs"
|
||||
|
||||
sorted_input=($(printf "%s\n" "${INPUT_ARRAY[@]}" | sort))
|
||||
sorted_running=($(printf "%s\n" "${RUNNING_ARRAY[@]}" | sort))
|
||||
|
||||
if [[ "${sorted_input[*]}" != "${sorted_running[*]}" ]]; then
|
||||
error_exit "Different allocs found, expected: ${sorted_input[*]} found: ${sorted_running[*]}"
|
||||
full_current_allocs=$(nomad alloc status -json | jq -r --arg client_id "$client_id" '[.[] | select(.NodeID == $client_id) | { ID: .ID, Name: .Name, ClientStatus: .ClientStatus}]')
|
||||
error_exit "Different allocs found, expected: ${sorted_input[*]} found: ${sorted_running[*]}. Current allocs info: $full_current_allocs"
|
||||
fi
|
||||
|
||||
echo "All allocs reattached correctly for node at $CLIENT_IP"
|
||||
|
||||
@@ -36,19 +36,27 @@ locals {
|
||||
resource "random_pet" "upgrade" {
|
||||
}
|
||||
|
||||
|
||||
|
||||
resource "enos_local_exec" "wait_for_leader" {
|
||||
environment = local.nomad_env
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/wait_for_stable_cluster.sh")]
|
||||
}
|
||||
|
||||
resource "time_sleep" "wait_20_seconds" {
|
||||
depends_on = [enos_local_exec.wait_for_leader]
|
||||
|
||||
create_duration = "20s"
|
||||
}
|
||||
|
||||
// Forcing a snapshot from the leader drives the cluster to store the most recent
|
||||
// state and exercise the snapshot restore at least once when upgrading.
|
||||
// The resulting file wont be used.
|
||||
// The stale flag defaults to "false" but it is included to reinforce the fact
|
||||
// that it has to be taken from the leader for future readers.
|
||||
resource "enos_local_exec" "take_cluster_snapshot" {
|
||||
depends_on = [enos_local_exec.wait_for_leader]
|
||||
depends_on = [time_sleep.wait_20_seconds]
|
||||
|
||||
environment = local.nomad_env
|
||||
|
||||
|
||||
Reference in New Issue
Block a user