From 0f2d9ea91507ce7369b7d3a356306332ef5d85ce Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 4 Feb 2020 10:51:03 -0600 Subject: [PATCH] e2e: wait 2m rather than 10s after disabling consul acls Pretty sure Consul / Nomad clients are often not ready yet after the ConsulACLs test disables ACLs, by the time the next test starts running. Running locally things tend to work, but in TeamCity this seems to be a recurring problem. However, when running locally sometimes I do see that the "show status" step after disabling ACLs, some nodes are still initializing, suggesting we're right on the border of not waiting long enough nomad node status ID DC Name Class Drain Eligibility Status 0e4dfce2 dc1 EC2AMAZ-JB3NF9P false eligible ready 6b90aa06 dc2 ip-172-31-16-225 false eligible ready 7068558a dc2 ip-172-31-20-143 false eligible ready e0ae3c5c dc1 ip-172-31-25-165 false eligible ready 15b59ed6 dc1 ip-172-31-23-199 false eligible initializing Going to try waiting a full 2 minutes after disabling ACLs, hopefully that will help things Just Work. In the future, we should probably be parsing the output of the status checks and actually confirming all nodes are ready. Even better, maybe that's something shipyard will have built-in. --- e2e/consulacls/consul-acls-manage.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/e2e/consulacls/consul-acls-manage.sh b/e2e/consulacls/consul-acls-manage.sh index 367381937..8caa97edb 100755 --- a/e2e/consulacls/consul-acls-manage.sh +++ b/e2e/consulacls/consul-acls-manage.sh @@ -379,9 +379,9 @@ function doDeactivateACLs { doSSH "${agent}" "sudo systemctl restart consul" done - # Wait 10s before moving on, Consul needs a second to calm down. - echo " deactivate: sleep 10s ..." - sleep 10 + # Wait 120s before moving on, Consul / Nomad need time to settle down. + echo " deactivate: sleep 2m ..." + sleep 120 } function doStatus {