Merge pull request #7072 from hashicorp/system-sched-e2e

System sched e2e
This commit is contained in:
Drew Bailey
2020-02-04 14:16:21 -05:00
committed by GitHub
5 changed files with 242 additions and 0 deletions

View File

@@ -20,6 +20,7 @@ import (
_ "github.com/hashicorp/nomad/e2e/nomad09upgrade"
_ "github.com/hashicorp/nomad/e2e/nomadexec"
_ "github.com/hashicorp/nomad/e2e/spread"
_ "github.com/hashicorp/nomad/e2e/systemsched"
_ "github.com/hashicorp/nomad/e2e/taskevents"
)

View File

@@ -133,6 +133,26 @@ func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []stri
}
}
func WaitForAllocsNotPending(t *testing.T, nomadClient *api.Client, allocIDs []string) {
for _, allocID := range allocIDs {
WaitForAllocNotPending(t, nomadClient, allocID)
}
}
func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}
return alloc.ClientStatus != structs.AllocClientStatusPending, fmt.Errorf("expected status not pending, but was: %s", alloc.ClientStatus)
}, func(err error) {
t.Fatalf("failed to wait on alloc: %v", err)
})
}
func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string {
allocIDs := make([]string, 0, len(allocs))
for _, alloc := range allocs {

View File

@@ -0,0 +1,37 @@
job "system_job" {
datacenters = ["dc1"]
type = "system"
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}
group "system_job_group" {
count = 1
restart {
attempts = 10
interval = "1m"
delay = "2s"
mode = "delay"
}
task "system_task" {
driver = "docker"
config {
image = "bash:latest"
command = "bash"
args = ["-c", "sleep 15000"]
}
env {
version = "1"
}
}
}
}

View File

@@ -0,0 +1,37 @@
job "system_job" {
datacenters = ["dc1"]
type = "system"
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}
group "system_job_group" {
count = 1
restart {
attempts = 10
interval = "1m"
delay = "2s"
mode = "delay"
}
task "system_task" {
driver = "docker"
config {
image = "bash:latest"
command = "bash"
args = ["-c", "sleep 15000"]
}
env {
version = "2"
}
}
}
}

View File

@@ -0,0 +1,147 @@
package systemsched
import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
)
type SystemSchedTest struct {
framework.TC
jobIDs []string
}
func init() {
framework.AddSuites(&framework.TestSuite{
Component: "SystemScheduler",
CanRunLocal: true,
Cases: []framework.TestCase{
new(SystemSchedTest),
},
})
}
func (tc *SystemSchedTest) BeforeAll(f *framework.F) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
}
func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
t := f.T()
nomadClient := tc.Nomad()
jobID := "system_deployment"
tc.jobIDs = append(tc.jobIDs, jobID)
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job0.nomad", jobID, "")
jobs := nomadClient.Jobs()
allocs, _, err := jobs.Allocations(jobID, true, nil)
require.NoError(t, err)
var allocIDs []string
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
// Wait for allocations to get past initial pending state
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
// Mark one node as ineligible
nodesAPI := tc.Nomad().Nodes()
disabledNodeID := allocs[0].NodeID
_, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil)
require.NoError(t, err)
// Assert all jobs still running
jobs = nomadClient.Jobs()
allocs, _, err = jobs.Allocations(jobID, true, nil)
allocIDs = nil
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
require.NoError(t, err)
allocForDisabledNode := make(map[string]*api.AllocationListStub)
// Wait for allocs to run and collect allocs on ineligible node
// Allocation could have failed, ensure there is one thats running
// and that it is the correct version (0)
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
for _, alloc := range allocs {
if alloc.NodeID == disabledNodeID {
allocForDisabledNode[alloc.ID] = alloc
}
}
// Filter down to only our latest running alloc
for _, alloc := range allocForDisabledNode {
require.Equal(t, uint64(0), alloc.JobVersion)
if alloc.ClientStatus == structs.AllocClientStatusComplete {
// remove the old complete alloc from map
delete(allocForDisabledNode, alloc.ID)
}
}
require.NotEmpty(t, allocForDisabledNode)
require.Len(t, allocForDisabledNode, 1)
// Update job
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job1.nomad", jobID, "")
// Get updated allocations
jobs = nomadClient.Jobs()
allocs, _, err = jobs.Allocations(jobID, false, nil)
require.NoError(t, err)
allocIDs = nil
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
// Wait for allocs to start
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
// Get latest alloc status now that they are no longer pending
allocs, _, err = jobs.Allocations(jobID, false, nil)
require.NoError(t, err)
var foundPreviousAlloc bool
for _, dAlloc := range allocForDisabledNode {
for _, alloc := range allocs {
if alloc.ID == dAlloc.ID {
foundPreviousAlloc = true
require.Equal(t, uint64(0), alloc.JobVersion)
} else {
// Ensure allocs running on non disabled node are
// newer version
if alloc.ClientStatus == structs.AllocClientStatusRunning {
require.Equal(t, uint64(1), alloc.JobVersion)
}
}
}
}
require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node")
}
func (tc *SystemSchedTest) AfterEach(f *framework.F) {
nomadClient := tc.Nomad()
// Mark all nodes eligible
nodesAPI := tc.Nomad().Nodes()
nodes, _, _ := nodesAPI.List(nil)
for _, node := range nodes {
nodesAPI.ToggleEligibility(node.ID, true, nil)
}
jobs := nomadClient.Jobs()
// Stop all jobs in test
for _, id := range tc.jobIDs {
jobs.Deregister(id, true, nil)
}
tc.jobIDs = []string{}
// Garbage collect
nomadClient.System().GarbageCollect()
}