func: add a step to drain a node as part of the upgrade process

This commit is contained in:
Juanadelacuesta
2025-03-14 17:43:36 +01:00
parent e3f21166af
commit fba2efa728
5 changed files with 115 additions and 1 deletions

View File

@@ -24,3 +24,7 @@ module "upgrade_servers" {
module "upgrade_client" {
source = "./modules/upgrade_client"
}
module "drain_client" {
source = "./modules/drain_nodes"
}

View File

@@ -414,9 +414,20 @@ scenario "upgrade" {
}
}
step "upgrade_third_client" {
step "drain_client" {
depends_on = [step.upgrade_second_client]
description = <<-EOF
Selects one client to drain, waits for all allocs to be rescheduled and
brings back the node eligibility
EOF
module = module.drain_client
}
step "upgrade_third_client" {
depends_on = [step.drain_client]
description = <<-EOF
Takes a client, writes some dynamic metadata to it,
updates the binary with the new one previously fetched and restarts it.

View File

@@ -0,0 +1,31 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
terraform {
required_providers {
enos = {
source = "hashicorp-forge/enos"
}
}
}
locals {
nomad_env = {
NOMAD_ADDR = var.nomad_addr
NOMAD_CACERT = var.ca_file
NOMAD_CLIENT_CERT = var.cert_file
NOMAD_CLIENT_KEY = var.key_file
NOMAD_TOKEN = var.nomad_token
}
}
resource "enos_local_exec" "run_tests" {
environment = merge(
local.nomad_env, {
NODES_TO_DRAIN = var.nodes_to_drain
})
scripts = [
abspath("${path.module}/scripts/drain.sh"),
]
}

View File

@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -euo pipefail
error_exit() {
printf 'Error: %s' "${1}"
exit 1
}
DRAIN_DEADLINE="5s"
nodes=$(nomad node status -json | jq -r "[.[] | select(.Status == \"ready\") | .ID] | sort | .[:${NODES_TO_DRAIN}] | join(\" \")" )
for node in $nodes; do
echo "Drainning the node $node"
nomad node drain --enable --deadline "$DRAIN_DEADLINE" "$node" \
|| error_exit "Failed to drain node $node"
allocs=$(nomad alloc status -json | jq --arg node "$node" '[.[] | select(.NodeID == $node and .ClientStatus == "running")] | length')
if [ $? -ne 0 ]; then
error_exit "Allocs still running on $node"
fi
nomad node drain --disable "$node" \
|| error_exit "Failed to disable drain for node $node"
nomad eligibility -enable "$node" \
|| error_exit "Failed to set node $node back to eligible"
done

View File

@@ -0,0 +1,36 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "nodes_to_drain" {
description = "Number of clients to drain"
type = number
default = 1
}
variable "nomad_addr" {
description = "The Nomad API HTTP address."
type = string
default = "http://localhost:4646"
}
variable "ca_file" {
description = "A local file path to a PEM-encoded certificate authority used to verify the remote agent's certificate"
type = string
}
variable "cert_file" {
description = "A local file path to a PEM-encoded certificate provided to the remote agent. If this is specified, key_file or key_pem is also required"
type = string
}
variable "key_file" {
description = "A local file path to a PEM-encoded private key. This is required if cert_file or cert_pem is specified."
type = string
}
variable "nomad_token" {
description = "The Secret ID of an ACL token to make requests with, for ACL-enabled clusters."
type = string
sensitive = true
}