mirror of
https://github.com/kemko/nomad.git
synced 2026-01-05 01:45:44 +03:00
e2e: deflake nodedrain test
The nodedrain deadline test asserts that all allocations are migrated by the deadline. However, when the deadline is short (e.g. 10s), the test may fail because of scheduler/client-propagation delays. In one failing test, it took ~15s from the RPC call to the moment to the moment the scheduler issued migration update, and then 3 seconds for the alloc to be stopped. Here, I increase the timeouts to avoid such false positives.
This commit is contained in:
@@ -11,7 +11,7 @@ job "drain_deadline" {
|
||||
task "task" {
|
||||
driver = "docker"
|
||||
|
||||
kill_timeout = "30s"
|
||||
kill_timeout = "2m"
|
||||
|
||||
config {
|
||||
image = "busybox:1"
|
||||
|
||||
@@ -258,16 +258,18 @@ func (tc *NodeDrainE2ETest) TestNodeDrainDeadline(f *framework.F) {
|
||||
f.Len(nodes, 1, "could not get nodes for job")
|
||||
nodeID := nodes[0]
|
||||
|
||||
f.T().Logf("draining node %v", nodeID)
|
||||
out, err := e2e.Command(
|
||||
"nomad", "node", "drain",
|
||||
"-deadline", "5s",
|
||||
"-enable", "-yes", "-detach", nodeID)
|
||||
f.NoError(err, fmt.Sprintf("'nomad node drain' failed: %v\n%v", err, out))
|
||||
f.NoError(err, fmt.Sprintf("'nomad node drain %v' failed: %v\n%v", nodeID, err, out))
|
||||
tc.nodeIDs = append(tc.nodeIDs, nodeID)
|
||||
|
||||
// the deadline is 5s but we can't guarantee its instantly terminated at
|
||||
// that point, so we give it 10s which is well under the 30s kill_timeout in
|
||||
// the job
|
||||
// the deadline is 40s but we can't guarantee its instantly terminated at
|
||||
// that point, so we give it 30s which is well under the 2m kill_timeout in
|
||||
// the job.
|
||||
// deadline here needs to account for scheduling and propagation delays.
|
||||
f.NoError(waitForNodeDrain(nodeID,
|
||||
func(got []map[string]string) bool {
|
||||
for _, alloc := range got {
|
||||
@@ -276,7 +278,7 @@ func (tc *NodeDrainE2ETest) TestNodeDrainDeadline(f *framework.F) {
|
||||
}
|
||||
}
|
||||
return false
|
||||
}, &e2e.WaitConfig{Interval: time.Millisecond * 100, Retries: 100},
|
||||
}, &e2e.WaitConfig{Interval: time.Second, Retries: 40},
|
||||
), "node did not drain immediately following deadline")
|
||||
}
|
||||
|
||||
@@ -304,7 +306,7 @@ func (tc *NodeDrainE2ETest) TestNodeDrainForce(f *framework.F) {
|
||||
tc.nodeIDs = append(tc.nodeIDs, nodeID)
|
||||
|
||||
// we've passed -force but we can't guarantee its instantly terminated at
|
||||
// that point, so we give it 20s which is under the 30s kill_timeout in
|
||||
// that point, so we give it 30s which is under the 2m kill_timeout in
|
||||
// the job
|
||||
f.NoError(waitForNodeDrain(nodeID,
|
||||
func(got []map[string]string) bool {
|
||||
@@ -314,7 +316,7 @@ func (tc *NodeDrainE2ETest) TestNodeDrainForce(f *framework.F) {
|
||||
}
|
||||
}
|
||||
return false
|
||||
}, &e2e.WaitConfig{Interval: time.Millisecond * 100, Retries: 200},
|
||||
}, &e2e.WaitConfig{Interval: time.Second, Retries: 40},
|
||||
), "node did not drain immediately when forced")
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user