mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
upgrade tests: add transparent proxy workload (#25176)
Add an upgrade test workload for Consul service mesh with transparent proxy. Note this breaks from the "countdash" demo. The dashboard application only can verify the backend is up by making a websocket connection, which we can't do as a health check, and the health check it exposes for that purpose only passes once the websocket connection has been made. So replace the dashboard with a minimal nginx reverse proxy to the count-api instead. Ref: https://hashicorp.atlassian.net/browse/NET-12217
This commit is contained in:
@@ -38,6 +38,15 @@ output "nomad_token" {
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
output "consul_token" {
|
||||
value = module.provision-infra.consul_token
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
output "consul_addr" {
|
||||
value = module.provision-infra.consul_addr
|
||||
}
|
||||
|
||||
output "cluster_unique_identifier" {
|
||||
value = module.provision-infra.cluster_unique_identifier
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ locals {
|
||||
module "keys" {
|
||||
depends_on = [random_pet.e2e]
|
||||
name = local.random_name
|
||||
path = "${local.keys_dir}"
|
||||
path = local.keys_dir
|
||||
source = "mitchellh/dynamic-keys/aws"
|
||||
version = "v2.0.0"
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ EOM
|
||||
}
|
||||
|
||||
output "cluster_unique_identifier" {
|
||||
value = "${local.random_name}"
|
||||
value = local.random_name
|
||||
}
|
||||
|
||||
output "nomad_addr" {
|
||||
@@ -93,3 +93,12 @@ output "nomad_token" {
|
||||
value = chomp(data.local_sensitive_file.nomad_token.content)
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
output "consul_addr" {
|
||||
value = "https://${aws_instance.consul_server.public_ip}:8501"
|
||||
}
|
||||
|
||||
output "consul_token" {
|
||||
value = chomp(local_sensitive_file.consul_initial_management_token.content)
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
@@ -133,6 +133,8 @@ scenario "upgrade" {
|
||||
key_file = step.provision_cluster.key_file
|
||||
nomad_token = step.provision_cluster.nomad_token
|
||||
availability_zone = var.availability_zone
|
||||
consul_addr = step.provision_cluster.consul_addr
|
||||
consul_token = step.provision_cluster.consul_token
|
||||
|
||||
workloads = {
|
||||
service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3, type = "service" }
|
||||
@@ -167,6 +169,13 @@ scenario "upgrade" {
|
||||
pre_script = "scripts/wait_for_nfs_volume.sh"
|
||||
}
|
||||
|
||||
tproxy = {
|
||||
job_spec = "jobs/tproxy.nomad.hcl"
|
||||
alloc_count = 2
|
||||
type = "service"
|
||||
pre_script = "scripts/create-consul-intention.sh"
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
155
enos/modules/run_workloads/jobs/tproxy.nomad.hcl
Normal file
155
enos/modules/run_workloads/jobs/tproxy.nomad.hcl
Normal file
@@ -0,0 +1,155 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
# this variable is not used but required by runner
|
||||
variable "alloc_count" {
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
job "countdash" {
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
group "api" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
}
|
||||
|
||||
service {
|
||||
name = "count-api"
|
||||
port = "9001"
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/health"
|
||||
expose = true
|
||||
interval = "3s"
|
||||
timeout = "1s"
|
||||
|
||||
check_restart {
|
||||
limit = 0 # don't restart on failure
|
||||
}
|
||||
}
|
||||
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
transparent_proxy {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task "web" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "hashicorpdev/counter-api:v3"
|
||||
auth_soft_fail = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group "dashboard" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
|
||||
port "http" {
|
||||
# TODO: for some reason without a static port the health check never
|
||||
# succeeds, even though we have expose=true on the check
|
||||
static = 9002
|
||||
to = 9002
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "count-dashboard"
|
||||
port = "9002"
|
||||
|
||||
# this check will fail if connectivity between the dashboard and the API
|
||||
# fails, and restart the task. we poll frequently but also allow it to
|
||||
# fail temporarily so we can account for allocations being rescheduled
|
||||
# during tests
|
||||
check {
|
||||
type = "http"
|
||||
path = "/health"
|
||||
expose = true
|
||||
task = "dashboard"
|
||||
interval = "3s"
|
||||
timeout = "1s"
|
||||
|
||||
# note it seems to take an extremely long time for this API to return ok
|
||||
check_restart {
|
||||
limit = 30
|
||||
}
|
||||
}
|
||||
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
transparent_proxy {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# note: this is not the usual countdash frontend because that only sets the
|
||||
# health check that tests the backend as healthy once a browser connection
|
||||
# has been made. So serve a reverse proxy to the count API instead.
|
||||
task "dashboard" {
|
||||
driver = "docker"
|
||||
|
||||
env {
|
||||
COUNTING_SERVICE_URL = "http://count-api.virtual.consul"
|
||||
}
|
||||
|
||||
config {
|
||||
image = "nginx:latest"
|
||||
command = "nginx"
|
||||
args = ["-c", "/local/default.conf"]
|
||||
auth_soft_fail = true
|
||||
}
|
||||
|
||||
template {
|
||||
destination = "local/default.conf"
|
||||
data = <<EOT
|
||||
daemon off;
|
||||
worker_processes 1;
|
||||
user www-data;
|
||||
error_log /var/log/error.log info;
|
||||
|
||||
events {
|
||||
use epoll;
|
||||
worker_connections 128;
|
||||
}
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
charset utf-8;
|
||||
access_log /var/log/access.log combined;
|
||||
server {
|
||||
listen 9002;
|
||||
location / {
|
||||
proxy_pass http://count-api.virtual.consul;
|
||||
}
|
||||
}
|
||||
}
|
||||
EOT
|
||||
|
||||
}
|
||||
|
||||
# restart only once because we're using the service for this task to
|
||||
# detect tproxy connectivity failures in this test
|
||||
restart {
|
||||
delay = "5s"
|
||||
attempts = 1
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,9 @@ locals {
|
||||
NOMAD_CLIENT_CERT = var.cert_file
|
||||
NOMAD_CLIENT_KEY = var.key_file
|
||||
NOMAD_TOKEN = var.nomad_token
|
||||
CONSUL_HTTP_TOKEN = var.consul_token
|
||||
CONSUL_CACERT = var.ca_file
|
||||
CONSUL_HTTP_ADDR = var.consul_addr
|
||||
}
|
||||
|
||||
system_job_count = length({ for k, v in var.workloads : k => v if v.type == "system" })
|
||||
|
||||
8
enos/modules/run_workloads/scripts/create-consul-intention.sh
Executable file
8
enos/modules/run_workloads/scripts/create-consul-intention.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
dir=$(dirname "${BASH_SOURCE[0]}")
|
||||
consul config write "${dir}/intention.hcl"
|
||||
11
enos/modules/run_workloads/scripts/intention.hcl
Normal file
11
enos/modules/run_workloads/scripts/intention.hcl
Normal file
@@ -0,0 +1,11 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
Kind = "service-intentions"
|
||||
Name = "count-api"
|
||||
Sources = [
|
||||
{
|
||||
Name = "count-dashboard"
|
||||
Action = "allow"
|
||||
}
|
||||
]
|
||||
@@ -28,6 +28,18 @@ variable "nomad_token" {
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "consul_addr" {
|
||||
description = "The Consul API HTTP address."
|
||||
type = string
|
||||
default = "http://localhost:8500"
|
||||
}
|
||||
|
||||
variable "consul_token" {
|
||||
description = "The Secret ID of an ACL token to make requests to Consul with"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "availability_zone" {
|
||||
description = "The AZ where the cluster is being run"
|
||||
type = string
|
||||
|
||||
Reference in New Issue
Block a user