From 5cc1b4e6063b61121a9e9d915f60eb3f2bfdaa56 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 7 Mar 2025 15:25:26 -0500 Subject: [PATCH] upgrade tests: add transparent proxy workload (#25176) Add an upgrade test workload for Consul service mesh with transparent proxy. Note this breaks from the "countdash" demo. The dashboard application only can verify the backend is up by making a websocket connection, which we can't do as a health check, and the health check it exposes for that purpose only passes once the websocket connection has been made. So replace the dashboard with a minimal nginx reverse proxy to the count-api instead. Ref: https://hashicorp.atlassian.net/browse/NET-12217 --- e2e/terraform/outputs.tf | 9 + e2e/terraform/provision-infra/main.tf | 2 +- e2e/terraform/provision-infra/outputs.tf | 11 +- enos/enos-scenario-upgrade.hcl | 9 + .../run_workloads/jobs/tproxy.nomad.hcl | 155 ++++++++++++++++++ enos/modules/run_workloads/main.tf | 3 + .../scripts/create-consul-intention.sh | 8 + .../run_workloads/scripts/intention.hcl | 11 ++ enos/modules/run_workloads/variables.tf | 12 ++ 9 files changed, 218 insertions(+), 2 deletions(-) create mode 100644 enos/modules/run_workloads/jobs/tproxy.nomad.hcl create mode 100755 enos/modules/run_workloads/scripts/create-consul-intention.sh create mode 100644 enos/modules/run_workloads/scripts/intention.hcl diff --git a/e2e/terraform/outputs.tf b/e2e/terraform/outputs.tf index a6539a7d7..5550e76eb 100644 --- a/e2e/terraform/outputs.tf +++ b/e2e/terraform/outputs.tf @@ -38,6 +38,15 @@ output "nomad_token" { sensitive = true } +output "consul_token" { + value = module.provision-infra.consul_token + sensitive = true +} + +output "consul_addr" { + value = module.provision-infra.consul_addr +} + output "cluster_unique_identifier" { value = module.provision-infra.cluster_unique_identifier } diff --git a/e2e/terraform/provision-infra/main.tf b/e2e/terraform/provision-infra/main.tf index 342691a64..630a2460d 100644 --- a/e2e/terraform/provision-infra/main.tf +++ b/e2e/terraform/provision-infra/main.tf @@ -23,7 +23,7 @@ locals { module "keys" { depends_on = [random_pet.e2e] name = local.random_name - path = "${local.keys_dir}" + path = local.keys_dir source = "mitchellh/dynamic-keys/aws" version = "v2.0.0" } diff --git a/e2e/terraform/provision-infra/outputs.tf b/e2e/terraform/provision-infra/outputs.tf index 5a60d84d6..cbbf76f01 100644 --- a/e2e/terraform/provision-infra/outputs.tf +++ b/e2e/terraform/provision-infra/outputs.tf @@ -66,7 +66,7 @@ EOM } output "cluster_unique_identifier" { - value = "${local.random_name}" + value = local.random_name } output "nomad_addr" { @@ -93,3 +93,12 @@ output "nomad_token" { value = chomp(data.local_sensitive_file.nomad_token.content) sensitive = true } + +output "consul_addr" { + value = "https://${aws_instance.consul_server.public_ip}:8501" +} + +output "consul_token" { + value = chomp(local_sensitive_file.consul_initial_management_token.content) + sensitive = true +} diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl index 16be45e01..bf74c0345 100644 --- a/enos/enos-scenario-upgrade.hcl +++ b/enos/enos-scenario-upgrade.hcl @@ -133,6 +133,8 @@ scenario "upgrade" { key_file = step.provision_cluster.key_file nomad_token = step.provision_cluster.nomad_token availability_zone = var.availability_zone + consul_addr = step.provision_cluster.consul_addr + consul_token = step.provision_cluster.consul_token workloads = { service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3, type = "service" } @@ -167,6 +169,13 @@ scenario "upgrade" { pre_script = "scripts/wait_for_nfs_volume.sh" } + tproxy = { + job_spec = "jobs/tproxy.nomad.hcl" + alloc_count = 2 + type = "service" + pre_script = "scripts/create-consul-intention.sh" + } + } } diff --git a/enos/modules/run_workloads/jobs/tproxy.nomad.hcl b/enos/modules/run_workloads/jobs/tproxy.nomad.hcl new file mode 100644 index 000000000..a5db6e5e2 --- /dev/null +++ b/enos/modules/run_workloads/jobs/tproxy.nomad.hcl @@ -0,0 +1,155 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +# this variable is not used but required by runner +variable "alloc_count" { + type = number + default = 2 +} + +job "countdash" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "api" { + network { + mode = "bridge" + } + + service { + name = "count-api" + port = "9001" + + check { + type = "http" + path = "/health" + expose = true + interval = "3s" + timeout = "1s" + + check_restart { + limit = 0 # don't restart on failure + } + } + + connect { + sidecar_service { + proxy { + transparent_proxy {} + } + } + } + } + + task "web" { + driver = "docker" + + config { + image = "hashicorpdev/counter-api:v3" + auth_soft_fail = true + } + } + } + + group "dashboard" { + network { + mode = "bridge" + + port "http" { + # TODO: for some reason without a static port the health check never + # succeeds, even though we have expose=true on the check + static = 9002 + to = 9002 + } + } + + service { + name = "count-dashboard" + port = "9002" + + # this check will fail if connectivity between the dashboard and the API + # fails, and restart the task. we poll frequently but also allow it to + # fail temporarily so we can account for allocations being rescheduled + # during tests + check { + type = "http" + path = "/health" + expose = true + task = "dashboard" + interval = "3s" + timeout = "1s" + + # note it seems to take an extremely long time for this API to return ok + check_restart { + limit = 30 + } + } + + connect { + sidecar_service { + proxy { + transparent_proxy {} + } + } + } + } + + # note: this is not the usual countdash frontend because that only sets the + # health check that tests the backend as healthy once a browser connection + # has been made. So serve a reverse proxy to the count API instead. + task "dashboard" { + driver = "docker" + + env { + COUNTING_SERVICE_URL = "http://count-api.virtual.consul" + } + + config { + image = "nginx:latest" + command = "nginx" + args = ["-c", "/local/default.conf"] + auth_soft_fail = true + } + + template { + destination = "local/default.conf" + data = < v if v.type == "system" }) diff --git a/enos/modules/run_workloads/scripts/create-consul-intention.sh b/enos/modules/run_workloads/scripts/create-consul-intention.sh new file mode 100755 index 000000000..053631c8f --- /dev/null +++ b/enos/modules/run_workloads/scripts/create-consul-intention.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +dir=$(dirname "${BASH_SOURCE[0]}") +consul config write "${dir}/intention.hcl" diff --git a/enos/modules/run_workloads/scripts/intention.hcl b/enos/modules/run_workloads/scripts/intention.hcl new file mode 100644 index 000000000..6156d4186 --- /dev/null +++ b/enos/modules/run_workloads/scripts/intention.hcl @@ -0,0 +1,11 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +Kind = "service-intentions" +Name = "count-api" +Sources = [ + { + Name = "count-dashboard" + Action = "allow" + } +] diff --git a/enos/modules/run_workloads/variables.tf b/enos/modules/run_workloads/variables.tf index f21ec5e4c..0a1c5f920 100644 --- a/enos/modules/run_workloads/variables.tf +++ b/enos/modules/run_workloads/variables.tf @@ -28,6 +28,18 @@ variable "nomad_token" { sensitive = true } +variable "consul_addr" { + description = "The Consul API HTTP address." + type = string + default = "http://localhost:8500" +} + +variable "consul_token" { + description = "The Secret ID of an ACL token to make requests to Consul with" + type = string + sensitive = true +} + variable "availability_zone" { description = "The AZ where the cluster is being run" type = string