E2E: remove dnsmasq and references to ECS plugin (#25892)

The DNS configuration for our E2E cluster uses dnsmasq to pass all DNS through
Consul. But there's a circular reference in systemd configurations that
sometimes causes the Docker service to fail, this is causing test flakes during
upgrade testing because we count the number of nodes and expect `system` jobs
using Docker to run on all nodes.

We no longer have any tests that require Consul DNS, so remove the complication
of dnsmasq to break the reference cycle. Also, while I was looking at this I
noticed we still had setup that would configure the ECS remote task driver
plugin, which is archived. Remove this as well.

Ref: https://hashicorp.atlassian.net/browse/NMD-162
This commit is contained in:
Tim Gross
2025-05-20 08:26:22 -04:00
committed by GitHub
parent 2d63abd80f
commit 0e728b87db
5 changed files with 1 additions and 140 deletions

View File

@@ -1,55 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -e
# These tasks can't be executed during AMI builds because they rely on
# instance-specific data.
mkdir -p /var/run/dnsmasq
mkdir -p /etc/dnsmasq.d
# Add hostname to /etc/hosts
echo "127.0.0.1 $(hostname)" | tee --append /etc/hosts
# this script should run after docker.service but we can't guarantee
# it's created docker0 yet, so wait to make sure
while ! (ip link | grep -q docker0)
do
sleep 1
done
# Use dnsmasq first and then docker bridge network for DNS resolution
DOCKER_BRIDGE_IP_ADDRESS=$(docker network inspect bridge --format='{{(index .IPAM.Config 0).Gateway}}')
cat <<EOF > /tmp/resolv.conf
nameserver 127.0.0.1
nameserver $DOCKER_BRIDGE_IP_ADDRESS
EOF
cp /tmp/resolv.conf /etc/resolv.conf
# need to get the interface for dnsmasq config so that we can
# accomodate both "predictable" and old-style interface names
IFACE=$(ip route | grep default | awk '{print "interface="$5}')
cat <<EOF > /tmp/dnsmasq
port=53
resolv-file=/var/run/dnsmasq/resolv.conf
bind-interfaces
interface=docker0
interface=lo
$IFACE
listen-address=127.0.0.1
server=/consul/127.0.0.1#8600
EOF
cp /tmp/dnsmasq /etc/dnsmasq.d/default
# need to get the AWS DNS address from the VPC...
# this is pretty hacky but will work for any typical case
MAC=$(curl -s --fail http://169.254.169.254/latest/meta-data/mac)
CIDR_BLOCK=$(curl -s --fail "http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-block")
VPC_DNS_ROOT=$(echo "$CIDR_BLOCK" | cut -d'.' -f1-3)
echo "nameserver ${VPC_DNS_ROOT}.2" > /tmp/dnsmasq-resolv.conf
cp /tmp/dnsmasq-resolv.conf /var/run/dnsmasq/resolv.conf
/usr/sbin/dnsmasq --test

View File

@@ -1,8 +0,0 @@
port=53
resolv-file=/var/run/dnsmasq/resolv.conf
bind-interfaces
interface=docker0
interface=lo
interface=eth0
listen-address=127.0.0.1
server=/consul/127.0.0.1#8600

View File

@@ -1,37 +0,0 @@
[Unit]
Description=dnsmasq - A lightweight DHCP and caching DNS server
Requires=network.target
Wants=nss-lookup.target
Before=nss-lookup.target
After=network.target
After=docker.service
[Service]
Type=forking
PIDFile=/run/dnsmasq/dnsmasq.pid
# Configure our hosts and resolver file with info from the host,
# then test the resulting config file before starting
ExecStartPre=/usr/local/bin/dnsconfig.sh
# (from upstream)
# We run dnsmasq via the /etc/init.d/dnsmasq script which acts as a
# wrapper picking up extra configuration files and then execs dnsmasq
# itself, when called with the "systemd-exec" function.
ExecStart=/etc/init.d/dnsmasq systemd-exec
# (from upstream)
# The systemd-*-resolvconf functions configure (and deconfigure)
# resolvconf to work with the dnsmasq DNS server. They're called like
# this to get correct error handling (ie don't start-resolvconf if the
# dnsmasq daemon fails to start.
ExecStartPost=/etc/init.d/dnsmasq systemd-start-resolvconf
# We need to tell docker to pick up the changes
ExecStartPost=/bin/systemctl restart docker
ExecStop=/etc/init.d/dnsmasq systemd-stop-resolvconf
ExecReload=/bin/kill -HUP $MAINPID
[Install]
WantedBy=multi-user.target

View File

@@ -29,7 +29,7 @@ sudo apt-get update
sudo apt-get upgrade -y
sudo apt-get install -y \
software-properties-common \
dnsmasq unzip tree redis-tools jq curl tmux awscli nfs-common \
unzip tree redis-tools jq curl tmux awscli nfs-common \
apt-transport-https ca-certificates gnupg2 stress
# Install hc-install
@@ -135,37 +135,6 @@ echo "Installing Envoy"
sudo curl -s -S -L -o /opt/bin/envoy https://github.com/envoyproxy/envoy/releases/download/v1.29.4/envoy-1.29.4-linux-x86_64
sudo chmod +x /opt/bin/envoy
# ECS
if [ -a "/tmp/linux/nomad-driver-ecs" ]; then
echo "Installing nomad-driver-ecs"
sudo install --mode=0755 --owner=ubuntu /tmp/linux/nomad-driver-ecs "$NOMAD_PLUGIN_DIR"
else
echo "nomad-driver-ecs not found: skipping install"
fi
echo "Configuring dnsmasq"
# disable systemd stub resolver
sudo sed -i 's|#DNSStubListener=yes|DNSStubListener=no|g' /etc/systemd/resolved.conf
# disable systemd-resolved and configure dnsmasq to forward local requests to
# consul. the resolver files need to dynamic configuration based on the VPC
# address and docker bridge IP, so those will be rewritten at boot time.
sudo systemctl disable systemd-resolved.service
sudo systemctl stop systemd-resolved.service
sudo mv /tmp/linux/dnsmasq /etc/dnsmasq.d/default
sudo chown root:root /etc/dnsmasq.d/default
# this is going to be overwritten at provisioning time, but we need something
# here or we can't fetch binaries to do the provisioning
echo 'nameserver 8.8.8.8' > /tmp/resolv.conf
sudo mv /tmp/resolv.conf /etc/resolv.conf
sudo mv /tmp/linux/dnsmasq.service /etc/systemd/system/dnsmasq.service
sudo mv /tmp/linux/dnsconfig.sh /usr/local/bin/dnsconfig.sh
sudo chmod +x /usr/local/bin/dnsconfig.sh
sudo systemctl daemon-reload
echo "Updating boot parameters"
# enable cgroup_memory and swap

View File

@@ -22,14 +22,6 @@ plugin "nomad-driver-podman" {
}
}
plugin "nomad-driver-ecs" {
config {
enabled = true
cluster = "nomad-rtd-e2e"
region = "us-east-1"
}
}
plugin "raw_exec" {
config {
enabled = true