Merge pull request #3868 from hashicorp/f-server-side-restarts

server side rescheduling
This commit is contained in:
Preetha
2018-02-13 20:09:51 -06:00
committed by GitHub
49 changed files with 3201 additions and 286 deletions

View File

@@ -4,6 +4,9 @@ __BACKWARDS INCOMPATIBILITIES:__
* discovery: Prevent absolute URLs in check paths. The documentation indicated
that absolute URLs are not allowed, but it was not enforced. Absolute URLs
in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)]
* jobspec: The default values for restart policy have changed. Restart policy mode defaults to "fail" and the
attempts/time interval values have been changed to enable faster server side rescheduling. See
[restart stanza](https://www.nomadproject.io/docs/job-specification/restart.html) for more information.
IMPROVEMENTS:
* core: Allow upgrading/downgrading TLS via SIGHUP on both servers and clients [[GH-3492](https://github.com/hashicorp/nomad/issues/3492)]

View File

@@ -92,6 +92,7 @@ type Allocation struct {
DeploymentStatus *AllocDeploymentStatus
PreviousAllocation string
NextAllocation string
RescheduleTracker *RescheduleTracker
CreateIndex uint64
ModifyIndex uint64
AllocModifyIndex uint64
@@ -131,6 +132,7 @@ type AllocationListStub struct {
ClientDescription string
TaskStates map[string]*TaskState
DeploymentStatus *AllocDeploymentStatus
RescheduleTracker *RescheduleTracker
CreateIndex uint64
ModifyIndex uint64
CreateTime int64
@@ -159,3 +161,49 @@ func (a AllocIndexSort) Less(i, j int) bool {
func (a AllocIndexSort) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
// RescheduleInfo is used to calculate remaining reschedule attempts
// according to the given time and the task groups reschedule policy
func (a Allocation) RescheduleInfo(t time.Time) (int, int) {
var reschedulePolicy *ReschedulePolicy
for _, tg := range a.Job.TaskGroups {
if *tg.Name == a.TaskGroup {
reschedulePolicy = tg.ReschedulePolicy
}
}
if reschedulePolicy == nil {
return 0, 0
}
availableAttempts := *reschedulePolicy.Attempts
interval := *reschedulePolicy.Interval
attempted := 0
// Loop over reschedule tracker to find attempts within the restart policy's interval
if a.RescheduleTracker != nil && availableAttempts > 0 && interval > 0 {
for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- {
lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime
timeDiff := t.UTC().UnixNano() - lastAttempt
if timeDiff < interval.Nanoseconds() {
attempted += 1
}
}
}
return attempted, availableAttempts
}
// RescheduleTracker encapsulates previous reschedule events
type RescheduleTracker struct {
Events []*RescheduleEvent
}
// RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation
type RescheduleEvent struct {
// RescheduleTime is the timestamp of a reschedule attempt
RescheduleTime int64
// PrevAllocID is the ID of the previous allocation being restarted
PrevAllocID string
// PrevNodeID is the node ID of the previous allocation
PrevNodeID string
}

View File

@@ -4,6 +4,12 @@ import (
"reflect"
"sort"
"testing"
"time"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/require"
)
func TestAllocations_List(t *testing.T) {
@@ -119,3 +125,117 @@ func TestAllocations_CreateIndexSort(t *testing.T) {
t.Fatalf("\n\n%#v\n\n%#v", allocs, expect)
}
}
func TestAllocations_RescheduleInfo(t *testing.T) {
t.Parallel()
// Create a job, task group and alloc
job := &Job{
Name: helper.StringToPtr("foo"),
Namespace: helper.StringToPtr(DefaultNamespace),
ID: helper.StringToPtr("bar"),
ParentID: helper.StringToPtr("lol"),
TaskGroups: []*TaskGroup{
{
Name: helper.StringToPtr("bar"),
Tasks: []*Task{
{
Name: "task1",
},
},
},
},
}
job.Canonicalize()
alloc := &Allocation{
ID: uuid.Generate(),
Namespace: DefaultNamespace,
EvalID: uuid.Generate(),
Name: "foo-bar[1]",
NodeID: uuid.Generate(),
TaskGroup: *job.TaskGroups[0].Name,
JobID: *job.ID,
Job: job,
}
type testCase struct {
desc string
reschedulePolicy *ReschedulePolicy
rescheduleTracker *RescheduleTracker
time time.Time
expAttempted int
expTotal int
}
testCases := []testCase{
{
desc: "no reschedule policy",
expAttempted: 0,
expTotal: 0,
},
{
desc: "no reschedule events",
reschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(3),
Interval: helper.TimeToPtr(15 * time.Minute),
},
expAttempted: 0,
expTotal: 3,
},
{
desc: "all reschedule events within interval",
reschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(3),
Interval: helper.TimeToPtr(15 * time.Minute),
},
time: time.Now(),
rescheduleTracker: &RescheduleTracker{
Events: []*RescheduleEvent{
{
RescheduleTime: time.Now().Add(-5 * time.Minute).UTC().UnixNano(),
},
},
},
expAttempted: 1,
expTotal: 3,
},
{
desc: "some reschedule events outside interval",
reschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(3),
Interval: helper.TimeToPtr(15 * time.Minute),
},
time: time.Now(),
rescheduleTracker: &RescheduleTracker{
Events: []*RescheduleEvent{
{
RescheduleTime: time.Now().Add(-45 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: time.Now().Add(-30 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: time.Now().Add(-10 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: time.Now().Add(-5 * time.Minute).UTC().UnixNano(),
},
},
},
expAttempted: 2,
expTotal: 3,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
require := require.New(t)
alloc.RescheduleTracker = tc.rescheduleTracker
job.TaskGroups[0].ReschedulePolicy = tc.reschedulePolicy
attempted, total := alloc.RescheduleInfo(tc.time)
require.Equal(tc.expAttempted, attempted)
require.Equal(tc.expTotal, total)
})
}
}

View File

@@ -558,6 +558,7 @@ type Job struct {
Periodic *PeriodicConfig
ParameterizedJob *ParameterizedJobConfig
Payload []byte
Reschedule *ReschedulePolicy
Meta map[string]string
VaultToken *string `mapstructure:"vault_token"`
Status *string

View File

@@ -132,8 +132,12 @@ func TestJobs_Canonicalize(t *testing.T) {
RestartPolicy: &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second),
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Minute),
Mode: helper.StringToPtr("delay"),
Interval: helper.TimeToPtr(30 * time.Minute),
Mode: helper.StringToPtr("fail"),
},
ReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Hour),
},
Tasks: []*Task{
{
@@ -194,8 +198,12 @@ func TestJobs_Canonicalize(t *testing.T) {
RestartPolicy: &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second),
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Minute),
Mode: helper.StringToPtr("delay"),
Interval: helper.TimeToPtr(30 * time.Minute),
Mode: helper.StringToPtr("fail"),
},
ReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Hour),
},
Tasks: []*Task{
{
@@ -326,6 +334,10 @@ func TestJobs_Canonicalize(t *testing.T) {
Delay: helper.TimeToPtr(25 * time.Second),
Mode: helper.StringToPtr("delay"),
},
ReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Hour),
},
EphemeralDisk: &EphemeralDisk{
Sticky: helper.BoolToPtr(false),
Migrate: helper.BoolToPtr(false),
@@ -534,8 +546,12 @@ func TestJobs_Canonicalize(t *testing.T) {
RestartPolicy: &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second),
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Minute),
Mode: helper.StringToPtr("delay"),
Interval: helper.TimeToPtr(30 * time.Minute),
Mode: helper.StringToPtr("fail"),
},
ReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Hour),
},
Update: &UpdateStrategy{
Stagger: helper.TimeToPtr(2 * time.Second),
@@ -566,8 +582,12 @@ func TestJobs_Canonicalize(t *testing.T) {
RestartPolicy: &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second),
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Minute),
Mode: helper.StringToPtr("delay"),
Interval: helper.TimeToPtr(30 * time.Minute),
Mode: helper.StringToPtr("fail"),
},
ReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Hour),
},
Update: &UpdateStrategy{
Stagger: helper.TimeToPtr(1 * time.Second),

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
// MemoryStats holds memory usage related stats
@@ -78,6 +79,33 @@ func (r *RestartPolicy) Merge(rp *RestartPolicy) {
}
}
// Reschedule configures how Tasks are rescheduled when they crash or fail.
type ReschedulePolicy struct {
// Attempts limits the number of rescheduling attempts that can occur in an interval.
Attempts *int `mapstructure:"attempts"`
// Interval is a duration in which we can limit the number of reschedule attempts.
Interval *time.Duration `mapstructure:"interval"`
}
func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) {
if rp.Interval != nil {
r.Interval = rp.Interval
}
if rp.Attempts != nil {
r.Attempts = rp.Attempts
}
}
func (r *ReschedulePolicy) Copy() *ReschedulePolicy {
if r == nil {
return nil
}
nrp := new(ReschedulePolicy)
*nrp = *r
return nrp
}
// CheckRestart describes if and when a task should be restarted based on
// failing health checks.
type CheckRestart struct {
@@ -222,14 +250,15 @@ func (e *EphemeralDisk) Canonicalize() {
// TaskGroup is the unit of scheduling.
type TaskGroup struct {
Name *string
Count *int
Constraints []*Constraint
Tasks []*Task
RestartPolicy *RestartPolicy
EphemeralDisk *EphemeralDisk
Update *UpdateStrategy
Meta map[string]string
Name *string
Count *int
Constraints []*Constraint
Tasks []*Task
RestartPolicy *RestartPolicy
ReschedulePolicy *ReschedulePolicy
EphemeralDisk *EphemeralDisk
Update *UpdateStrategy
Meta map[string]string
}
// NewTaskGroup creates a new TaskGroup.
@@ -272,21 +301,56 @@ func (g *TaskGroup) Canonicalize(job *Job) {
g.Update.Canonicalize()
}
// Merge the reschedule policy from the job
if jr, tr := job.Reschedule != nil, g.ReschedulePolicy != nil; jr && tr {
jobReschedule := job.Reschedule.Copy()
jobReschedule.Merge(g.ReschedulePolicy)
g.ReschedulePolicy = jobReschedule
} else if jr {
jobReschedule := job.Reschedule.Copy()
g.ReschedulePolicy = jobReschedule
}
// Merge with default reschedule policy
var defaultReschedulePolicy *ReschedulePolicy
switch *job.Type {
case "service":
defaultReschedulePolicy = &ReschedulePolicy{
Attempts: helper.IntToPtr(structs.DefaultServiceJobReschedulePolicy.Attempts),
Interval: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Interval),
}
case "batch":
defaultReschedulePolicy = &ReschedulePolicy{
Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts),
Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval),
}
default:
defaultReschedulePolicy = &ReschedulePolicy{
Attempts: helper.IntToPtr(0),
Interval: helper.TimeToPtr(0 * time.Second),
}
}
if g.ReschedulePolicy != nil {
defaultReschedulePolicy.Merge(g.ReschedulePolicy)
}
g.ReschedulePolicy = defaultReschedulePolicy
var defaultRestartPolicy *RestartPolicy
switch *job.Type {
case "service", "system":
defaultRestartPolicy = &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second),
Attempts: helper.IntToPtr(2),
Interval: helper.TimeToPtr(1 * time.Minute),
Mode: helper.StringToPtr("delay"),
Delay: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Delay),
Attempts: helper.IntToPtr(structs.DefaultServiceJobRestartPolicy.Attempts),
Interval: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Interval),
Mode: helper.StringToPtr(structs.DefaultServiceJobRestartPolicy.Mode),
}
default:
defaultRestartPolicy = &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second),
Attempts: helper.IntToPtr(15),
Interval: helper.TimeToPtr(7 * 24 * time.Hour),
Mode: helper.StringToPtr("delay"),
Delay: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Delay),
Attempts: helper.IntToPtr(structs.DefaultBatchJobRestartPolicy.Attempts),
Interval: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Interval),
Mode: helper.StringToPtr(structs.DefaultBatchJobRestartPolicy.Mode),
}
}

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/assert"
)
@@ -268,6 +269,118 @@ func TestTaskGroup_Canonicalize_Update(t *testing.T) {
assert.Nil(t, tg.Update)
}
// Verifies that reschedule policy is merged correctly
func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) {
type testCase struct {
desc string
jobReschedulePolicy *ReschedulePolicy
taskReschedulePolicy *ReschedulePolicy
expected *ReschedulePolicy
}
testCases := []testCase{
{
desc: "Default",
jobReschedulePolicy: nil,
taskReschedulePolicy: nil,
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts),
Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval),
},
},
{
desc: "Empty job reschedule policy",
jobReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(0),
Interval: helper.TimeToPtr(0),
},
taskReschedulePolicy: nil,
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(0),
Interval: helper.TimeToPtr(0),
},
},
{
desc: "Inherit from job",
jobReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
Interval: helper.TimeToPtr(20 * time.Second),
},
taskReschedulePolicy: nil,
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
Interval: helper.TimeToPtr(20 * time.Second),
},
},
{
desc: "Set in task",
jobReschedulePolicy: nil,
taskReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(5),
Interval: helper.TimeToPtr(2 * time.Minute),
},
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(5),
Interval: helper.TimeToPtr(2 * time.Minute),
},
},
{
desc: "Merge from job",
jobReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
},
taskReschedulePolicy: &ReschedulePolicy{
Interval: helper.TimeToPtr(5 * time.Minute),
},
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
Interval: helper.TimeToPtr(5 * time.Minute),
},
},
{
desc: "Override from group",
jobReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
},
taskReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(5),
},
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(5),
Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval),
},
},
{
desc: "Attempts from job, default interval",
jobReschedulePolicy: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
},
taskReschedulePolicy: nil,
expected: &ReschedulePolicy{
Attempts: helper.IntToPtr(1),
Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval),
},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
job := &Job{
ID: helper.StringToPtr("test"),
Reschedule: tc.jobReschedulePolicy,
Type: helper.StringToPtr(JobTypeBatch),
}
job.Canonicalize()
tg := &TaskGroup{
Name: helper.StringToPtr("foo"),
ReschedulePolicy: tc.taskReschedulePolicy,
}
tg.Canonicalize(job)
assert.Equal(t, tc.expected, tg.ReschedulePolicy)
})
}
}
// TestService_CheckRestart asserts Service.CheckRestart settings are properly
// inherited by Checks.
func TestService_CheckRestart(t *testing.T) {

View File

@@ -638,6 +638,11 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) {
Mode: *taskGroup.RestartPolicy.Mode,
}
tg.ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: *taskGroup.ReschedulePolicy.Attempts,
Interval: *taskGroup.ReschedulePolicy.Interval,
}
tg.EphemeralDisk = &structs.EphemeralDisk{
Sticky: *taskGroup.EphemeralDisk.Sticky,
SizeMB: *taskGroup.EphemeralDisk.SizeMB,

View File

@@ -1171,6 +1171,10 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
Delay: helper.TimeToPtr(10 * time.Second),
Mode: helper.StringToPtr("delay"),
},
ReschedulePolicy: &api.ReschedulePolicy{
Interval: helper.TimeToPtr(12 * time.Hour),
Attempts: helper.IntToPtr(5),
},
EphemeralDisk: &api.EphemeralDisk{
SizeMB: helper.IntToPtr(100),
Sticky: helper.BoolToPtr(true),
@@ -1379,6 +1383,10 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
Delay: 10 * time.Second,
Mode: "delay",
},
ReschedulePolicy: &structs.ReschedulePolicy{
Interval: 12 * time.Hour,
Attempts: 5,
},
EphemeralDisk: &structs.EphemeralDisk{
SizeMB: 100,
Sticky: true,

View File

@@ -274,6 +274,16 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength
}
}
if alloc.RescheduleTracker != nil && len(alloc.RescheduleTracker.Events) > 0 {
attempts, total := alloc.RescheduleInfo(time.Unix(0, alloc.ModifyTime))
reschedInfo := fmt.Sprintf("Reschedule Attempts|%d/%d", attempts, total)
basic = append(basic, reschedInfo)
}
if alloc.NextAllocation != "" {
basic = append(basic,
fmt.Sprintf("Replacement Alloc ID|%s", limit(alloc.NextAllocation, uuidLength)))
}
if verbose {
basic = append(basic,
fmt.Sprintf("Evaluated Nodes|%d", alloc.Metrics.NodesEvaluated),

View File

@@ -2,15 +2,19 @@ package command
import (
"fmt"
"regexp"
"strings"
"testing"
"time"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/mitchellh/cli"
"github.com/posener/complete"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestAllocStatusCommand_Implements(t *testing.T) {
@@ -168,6 +172,56 @@ func TestAllocStatusCommand_Run(t *testing.T) {
t.Fatal("expected to find alloc id in output")
}
ui.OutputWriter.Reset()
}
func TestAllocStatusCommand_RescheduleInfo(t *testing.T) {
t.Parallel()
srv, client, url := testServer(t, true, nil)
defer srv.Shutdown()
// Wait for a node to be ready
testutil.WaitForResult(func() (bool, error) {
nodes, _, err := client.Nodes().List(nil)
if err != nil {
return false, err
}
for _, node := range nodes {
if node.Status == structs.NodeStatusReady {
return true, nil
}
}
return false, fmt.Errorf("no ready nodes")
}, func(err error) {
t.Fatalf("err: %v", err)
})
ui := new(cli.MockUi)
cmd := &AllocStatusCommand{Meta: Meta{Ui: ui}}
// Test reschedule attempt info
require := require.New(t)
state := srv.Agent.Server().State()
a := mock.Alloc()
a.Metrics = &structs.AllocMetric{}
nextAllocId := uuid.Generate()
a.NextAllocation = nextAllocId
a.RescheduleTracker = &structs.RescheduleTracker{
Events: []*structs.RescheduleEvent{
{
RescheduleTime: time.Now().Add(-2 * time.Minute).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
},
}
require.Nil(state.UpsertAllocs(1000, []*structs.Allocation{a}))
if code := cmd.Run([]string{"-address=" + url, a.ID}); code != 0 {
t.Fatalf("expected exit 0, got: %d", code)
}
out := ui.OutputWriter.String()
require.Contains(out, "Replacement Alloc ID")
require.Regexp(regexp.MustCompile(".*Reschedule Attempts\\s*=\\s*1/2"), out)
}
func TestAllocStatusCommand_AutocompleteArgs(t *testing.T) {

View File

@@ -183,18 +183,18 @@ job "example" {
#
restart {
# The number of attempts to run the job within the specified interval.
attempts = 10
interval = "5m"
attempts = 2
interval = "30m"
# The "delay" parameter specifies the duration to wait before restarting
# a task after it has failed.
delay = "25s"
delay = "15s"
# The "mode" parameter controls what happens when a task has restarted
# "attempts" times within the interval. "delay" mode delays the next
# restart until the next interval. "fail" mode does not restart the task
# if "attempts" has been hit within the interval.
mode = "delay"
mode = "fail"
}
# The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk

View File

@@ -108,6 +108,7 @@ func parseJob(result *api.Job, list *ast.ObjectList) error {
delete(m, "periodic")
delete(m, "vault")
delete(m, "parameterized")
delete(m, "reschedule")
// Set the ID and name to the object key
result.ID = helper.StringToPtr(obj.Keys[0].Token.Value().(string))
@@ -143,6 +144,7 @@ func parseJob(result *api.Job, list *ast.ObjectList) error {
"task",
"type",
"update",
"reschedule",
"vault",
"vault_token",
}
@@ -178,6 +180,13 @@ func parseJob(result *api.Job, list *ast.ObjectList) error {
}
}
// If we have a reschedule stanza, then parse that
if o := listVal.Filter("reschedule"); len(o.Items) > 0 {
if err := parseReschedulePolicy(&result.Reschedule, o); err != nil {
return multierror.Prefix(err, "reschedule ->")
}
}
// Parse out meta fields. These are in HCL as a list so we need
// to iterate over them and merge them.
if metaO := listVal.Filter("meta"); len(metaO.Items) > 0 {
@@ -274,6 +283,7 @@ func parseGroups(result *api.Job, list *ast.ObjectList) error {
"task",
"ephemeral_disk",
"update",
"reschedule",
"vault",
}
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
@@ -313,6 +323,12 @@ func parseGroups(result *api.Job, list *ast.ObjectList) error {
}
}
// Parse reschedule policy
if o := listVal.Filter("reschedule"); len(o.Items) > 0 {
if err := parseReschedulePolicy(&g.ReschedulePolicy, o); err != nil {
return multierror.Prefix(err, fmt.Sprintf("'%s', reschedule ->", n))
}
}
// Parse ephemeral disk
if o := listVal.Filter("ephemeral_disk"); len(o.Items) > 0 {
g.EphemeralDisk = &api.EphemeralDisk{}
@@ -417,6 +433,46 @@ func parseRestartPolicy(final **api.RestartPolicy, list *ast.ObjectList) error {
return nil
}
func parseReschedulePolicy(final **api.ReschedulePolicy, list *ast.ObjectList) error {
list = list.Elem()
if len(list.Items) > 1 {
return fmt.Errorf("only one 'reschedule' block allowed")
}
// Get our job object
obj := list.Items[0]
// Check for invalid keys
valid := []string{
"attempts",
"interval",
}
if err := helper.CheckHCLKeys(obj.Val, valid); err != nil {
return err
}
var m map[string]interface{}
if err := hcl.DecodeObject(&m, obj.Val); err != nil {
return err
}
var result api.ReschedulePolicy
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
WeaklyTypedInput: true,
Result: &result,
})
if err != nil {
return err
}
if err := dec.Decode(m); err != nil {
return err
}
*final = &result
return nil
}
func parseConstraints(result *[]*api.Constraint, list *ast.ObjectList) error {
for _, o := range list.Elem().Items {
// Check for invalid keys

View File

@@ -94,6 +94,10 @@ func TestParse(t *testing.T) {
Delay: helper.TimeToPtr(15 * time.Second),
Mode: helper.StringToPtr("delay"),
},
ReschedulePolicy: &api.ReschedulePolicy{
Interval: helper.TimeToPtr(12 * time.Hour),
Attempts: helper.IntToPtr(5),
},
EphemeralDisk: &api.EphemeralDisk{
Sticky: helper.BoolToPtr(true),
SizeMB: helper.IntToPtr(150),
@@ -667,6 +671,36 @@ func TestParse(t *testing.T) {
},
false,
},
{
"reschedule-job.hcl",
&api.Job{
ID: helper.StringToPtr("foo"),
Name: helper.StringToPtr("foo"),
Type: helper.StringToPtr("batch"),
Datacenters: []string{"dc1"},
Reschedule: &api.ReschedulePolicy{
Attempts: helper.IntToPtr(15),
Interval: helper.TimeToPtr(30 * time.Minute),
},
TaskGroups: []*api.TaskGroup{
{
Name: helper.StringToPtr("bar"),
Count: helper.IntToPtr(3),
Tasks: []*api.Task{
{
Name: "bar",
Driver: "raw_exec",
Config: map[string]interface{}{
"command": "bash",
"args": []interface{}{"-c", "echo hi"},
},
},
},
},
},
},
false,
},
}
for _, tc := range cases {

View File

@@ -48,6 +48,11 @@ job "binstore-storagelocker" {
mode = "delay"
}
reschedule {
attempts = 5
interval = "12h"
}
ephemeral_disk {
sticky = true
size = 150

View File

@@ -0,0 +1,18 @@
job "foo" {
datacenters = ["dc1"]
type = "batch"
reschedule {
attempts = 15
interval = "30m"
}
group "bar" {
count = 3
task "bar" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "echo hi"]
}
}
}
}

View File

@@ -7,6 +7,7 @@ import (
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
@@ -218,7 +219,13 @@ func TestAllocEndpoint_GetAlloc(t *testing.T) {
testutil.WaitForLeader(t, s1.RPC)
// Create the register request
prevAllocID := uuid.Generate()
alloc := mock.Alloc()
alloc.RescheduleTracker = &structs.RescheduleTracker{
Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().UTC().UnixNano(), PrevNodeID: "boom", PrevAllocID: prevAllocID},
},
}
state := s1.fsm.State()
state.UpsertJobSummary(999, mock.JobSummary(alloc.JobID))
err := state.UpsertAllocs(1000, []*structs.Allocation{alloc})

View File

@@ -241,16 +241,18 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
// Create a watchset
ws := memdb.NewWatchSet()
// Look up the job
job, err := c.snap.JobByID(ws, eval.Namespace, eval.JobID)
if err != nil {
return false, nil, err
}
// If the eval is from a running "batch" job we don't want to garbage
// collect its allocations. If there is a long running batch job and its
// terminal allocations get GC'd the scheduler would re-run the
// allocations.
if eval.Type == structs.JobTypeBatch {
// Check if the job is running
job, err := c.snap.JobByID(ws, eval.Namespace, eval.JobID)
if err != nil {
return false, nil, err
}
// Can collect if:
// Job doesn't exist
@@ -286,7 +288,7 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
gcEval := true
var gcAllocIDs []string
for _, alloc := range allocs {
if !alloc.TerminalStatus() || alloc.ModifyIndex > thresholdIndex {
if !allocGCEligible(alloc, job, time.Now(), thresholdIndex) {
// Can't GC the evaluation since not all of the allocations are
// terminal
gcEval = false
@@ -559,3 +561,43 @@ func (c *CoreScheduler) partitionDeploymentReap(deployments []string) []*structs
return requests
}
// allocGCEligible returns if the allocation is eligible to be garbage collected
// according to its terminal status and its reschedule trackers
func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time, thresholdIndex uint64) bool {
// Not in a terminal status and old enough
if !a.TerminalStatus() || a.ModifyIndex > thresholdIndex {
return false
}
if job == nil || job.Stop || job.Status == structs.JobStatusDead {
return true
}
var reschedulePolicy *structs.ReschedulePolicy
tg := job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
reschedulePolicy = tg.ReschedulePolicy
}
// No reschedule policy or restarts are disabled
if reschedulePolicy == nil || reschedulePolicy.Attempts == 0 || reschedulePolicy.Interval == 0 {
return true
}
// Restart tracking information has been carried forward
if a.NextAllocation != "" {
return true
}
// Eligible for restarts but none have been attempted yet
if a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0 {
return false
}
// Most recent reschedule attempt is within time interval
interval := reschedulePolicy.Interval
lastIndex := len(a.RescheduleTracker.Events)
lastRescheduleEvent := a.RescheduleTracker.Events[lastIndex-1]
timeDiff := gcTime.UTC().UnixNano() - lastRescheduleEvent.RescheduleTime
return timeDiff > interval.Nanoseconds()
}

View File

@@ -6,10 +6,12 @@ import (
"time"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCoreScheduler_EvalGC(t *testing.T) {
@@ -17,6 +19,7 @@ func TestCoreScheduler_EvalGC(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
@@ -27,15 +30,24 @@ func TestCoreScheduler_EvalGC(t *testing.T) {
eval.Status = structs.EvalStatusFailed
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
if err != nil {
t.Fatalf("err: %v", err)
require.Nil(err)
// Insert mock job with rescheduling disabled
job := mock.Job()
job.ID = eval.JobID
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second,
}
err = state.UpsertJob(1001, job)
require.Nil(err)
// Insert "dead" alloc
alloc := mock.Alloc()
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusStop
alloc.JobID = eval.JobID
alloc.TaskGroup = job.TaskGroups[0].Name
// Insert "lost" alloc
alloc2 := mock.Alloc()
@@ -43,6 +55,7 @@ func TestCoreScheduler_EvalGC(t *testing.T) {
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
alloc2.ClientStatus = structs.AllocClientStatusLost
alloc2.JobID = eval.JobID
alloc2.TaskGroup = job.TaskGroups[0].Name
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
if err != nil {
t.Fatalf("err: %v", err)
@@ -93,6 +106,182 @@ func TestCoreScheduler_EvalGC(t *testing.T) {
}
}
// Tests GC behavior on allocations being rescheduled
func TestCoreScheduler_EvalGC_ReshedulingAllocs(t *testing.T) {
t.Parallel()
s1 := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
// Insert "dead" eval
state := s1.fsm.State()
eval := mock.Eval()
eval.Status = structs.EvalStatusFailed
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
require.Nil(err)
// Insert "pending" eval for same job
eval2 := mock.Eval()
eval2.JobID = eval.JobID
state.UpsertJobSummary(999, mock.JobSummary(eval2.JobID))
err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
require.Nil(err)
// Insert mock job with default reschedule policy of 2 in 10 minutes
job := mock.Job()
job.ID = eval.JobID
err = state.UpsertJob(1001, job)
require.Nil(err)
// Insert failed alloc with an old reschedule attempt, can be GCed
alloc := mock.Alloc()
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusRun
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.JobID = eval.JobID
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.RescheduleTracker = &structs.RescheduleTracker{
Events: []*structs.RescheduleEvent{
{
RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevNodeID: uuid.Generate(),
PrevAllocID: uuid.Generate(),
},
},
}
// Insert another failed alloc with a recent reschedule attempt, can't be GCed
alloc2 := mock.Alloc()
alloc2.EvalID = eval.ID
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
alloc2.ClientStatus = structs.AllocClientStatusLost
alloc2.JobID = eval.JobID
alloc2.TaskGroup = job.TaskGroups[0].Name
alloc2.RescheduleTracker = &structs.RescheduleTracker{
Events: []*structs.RescheduleEvent{
{
RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
PrevNodeID: uuid.Generate(),
PrevAllocID: uuid.Generate(),
},
},
}
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
require.Nil(err)
// Update the time tables to make this work
tt := s1.fsm.TimeTable()
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
// Create a core scheduler
snap, err := state.Snapshot()
if err != nil {
t.Fatalf("err: %v", err)
}
core := NewCoreScheduler(s1, snap)
// Attempt the GC, job has all terminal allocs and one pending eval
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
err = core.Process(gc)
require.Nil(err)
// Eval should still exist
ws := memdb.NewWatchSet()
out, err := state.EvalByID(ws, eval.ID)
require.Nil(err)
require.Equal(eval.ID, out.ID)
outA, err := state.AllocByID(ws, alloc.ID)
require.Nil(err)
require.Nil(outA)
outA2, err := state.AllocByID(ws, alloc2.ID)
require.Nil(err)
require.Equal(alloc2.ID, outA2.ID)
}
// Tests GC behavior on stopped job with reschedulable allocs
func TestCoreScheduler_EvalGC_StoppedJob_Reschedulable(t *testing.T) {
t.Parallel()
s1 := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
// Insert "dead" eval
state := s1.fsm.State()
eval := mock.Eval()
eval.Status = structs.EvalStatusFailed
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
require.Nil(err)
// Insert mock stopped job with default reschedule policy of 2 in 10 minutes
job := mock.Job()
job.ID = eval.JobID
job.Stop = true
err = state.UpsertJob(1001, job)
require.Nil(err)
// Insert failed alloc with a recent reschedule attempt
alloc := mock.Alloc()
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusRun
alloc.ClientStatus = structs.AllocClientStatusLost
alloc.JobID = eval.JobID
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.RescheduleTracker = &structs.RescheduleTracker{
Events: []*structs.RescheduleEvent{
{
RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
PrevNodeID: uuid.Generate(),
PrevAllocID: uuid.Generate(),
},
},
}
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc})
require.Nil(err)
// Update the time tables to make this work
tt := s1.fsm.TimeTable()
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
// Create a core scheduler
snap, err := state.Snapshot()
if err != nil {
t.Fatalf("err: %v", err)
}
core := NewCoreScheduler(s1, snap)
// Attempt the GC
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
err = core.Process(gc)
require.Nil(err)
// Eval should not exist
ws := memdb.NewWatchSet()
out, err := state.EvalByID(ws, eval.ID)
require.Nil(err)
require.Nil(out)
// Alloc should not exist
outA, err := state.AllocByID(ws, alloc.ID)
require.Nil(err)
require.Nil(outA)
}
// An EvalGC should never reap a batch job that has not been stopped
func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
t.Parallel()
@@ -201,6 +390,7 @@ func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) {
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
@@ -209,21 +399,27 @@ func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) {
job := mock.Job()
job.Type = structs.JobTypeBatch
job.Status = structs.JobStatusDead
job.Stop = true
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second,
}
err := state.UpsertJob(1001, job)
require.Nil(err)
// Insert "complete" eval
eval := mock.Eval()
eval.Status = structs.EvalStatusComplete
eval.Type = structs.JobTypeBatch
eval.JobID = job.ID
err := state.UpsertEvals(1001, []*structs.Evaluation{eval})
if err != nil {
t.Fatalf("err: %v", err)
}
err = state.UpsertEvals(1002, []*structs.Evaluation{eval})
require.Nil(err)
// Insert "failed" alloc
alloc := mock.Alloc()
alloc.JobID = job.ID
alloc.EvalID = eval.ID
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DesiredStatus = structs.AllocDesiredStatusStop
// Insert "lost" alloc
@@ -232,8 +428,9 @@ func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) {
alloc2.EvalID = eval.ID
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
alloc2.ClientStatus = structs.AllocClientStatusLost
alloc2.TaskGroup = job.TaskGroups[0].Name
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
err = state.UpsertAllocs(1003, []*structs.Allocation{alloc, alloc2})
if err != nil {
t.Fatalf("err: %v", err)
}
@@ -288,7 +485,7 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
@@ -302,16 +499,23 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
t.Fatalf("err: %v", err)
}
// Create mock job with id same as eval
job := mock.Job()
job.ID = eval.JobID
// Insert "dead" alloc
alloc := mock.Alloc()
alloc.JobID = job.ID
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusStop
alloc.TaskGroup = job.TaskGroups[0].Name
state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
// Insert "lost" alloc
alloc2 := mock.Alloc()
alloc2.JobID = alloc.JobID
alloc2.JobID = job.ID
alloc2.EvalID = eval.ID
alloc2.TaskGroup = job.TaskGroups[0].Name
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
alloc2.ClientStatus = structs.AllocClientStatusLost
@@ -323,12 +527,21 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
// Insert "running" alloc
alloc3 := mock.Alloc()
alloc3.EvalID = eval.ID
alloc3.JobID = job.ID
state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID))
err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3})
if err != nil {
t.Fatalf("err: %v", err)
}
// Insert mock job with rescheduling disabled
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second,
}
err = state.UpsertJob(1001, job)
require.Nil(err)
// Update the time tables to make this work
tt := s1.fsm.TimeTable()
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
@@ -387,6 +600,7 @@ func TestCoreScheduler_EvalGC_Force(t *testing.T) {
t.Parallel()
for _, withAcl := range []bool{false, true} {
t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
require := require.New(t)
var server *Server
if withAcl {
server, _ = testACLServer(t, nil)
@@ -409,10 +623,21 @@ func TestCoreScheduler_EvalGC_Force(t *testing.T) {
t.Fatalf("err: %v", err)
}
// Insert mock job with rescheduling disabled
job := mock.Job()
job.ID = eval.JobID
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second,
}
err = state.UpsertJob(1001, job)
require.Nil(err)
// Insert "dead" alloc
alloc := mock.Alloc()
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusStop
alloc.TaskGroup = job.TaskGroups[0].Name
state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
if err != nil {
@@ -802,6 +1027,10 @@ func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) {
job := mock.Job()
job.Type = structs.JobTypeBatch
job.Status = structs.JobStatusDead
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second,
}
err := state.UpsertJob(1000, job)
if err != nil {
t.Fatalf("err: %v", err)
@@ -822,12 +1051,14 @@ func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) {
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusRun
alloc.ClientStatus = structs.AllocClientStatusComplete
alloc.TaskGroup = job.TaskGroups[0].Name
alloc2 := mock.Alloc()
alloc2.JobID = job.ID
alloc2.EvalID = eval.ID
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
alloc2.ClientStatus = structs.AllocClientStatusRunning
alloc2.TaskGroup = job.TaskGroups[0].Name
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
if err != nil {
@@ -1051,8 +1282,11 @@ func TestCoreScheduler_JobGC_Stopped(t *testing.T) {
// Insert job.
state := s1.fsm.State()
job := mock.Job()
//job.Status = structs.JobStatusDead
job.Stop = true
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second,
}
err := state.UpsertJob(1000, job)
if err != nil {
t.Fatalf("err: %v", err)
@@ -1077,7 +1311,7 @@ func TestCoreScheduler_JobGC_Stopped(t *testing.T) {
alloc.JobID = job.ID
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusStop
alloc.TaskGroup = job.TaskGroups[0].Name
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
if err != nil {
t.Fatalf("err: %v", err)
@@ -1532,3 +1766,197 @@ func TestCoreScheduler_PartitionDeploymentReap(t *testing.T) {
t.Fatalf("Unexpected second request: %v", second)
}
}
// Tests various scenarios when allocations are eligible to be GCed
func TestAllocation_GCEligible(t *testing.T) {
type testCase struct {
Desc string
GCTime time.Time
ClientStatus string
DesiredStatus string
JobStatus string
JobStop bool
ModifyIndex uint64
NextAllocID string
ReschedulePolicy *structs.ReschedulePolicy
RescheduleTrackers []*structs.RescheduleEvent
ThresholdIndex uint64
ShouldGC bool
}
fail := time.Now()
harness := []testCase{
{
Desc: "GC when non terminal",
ClientStatus: structs.AllocClientStatusPending,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when non terminal and job stopped",
ClientStatus: structs.AllocClientStatusPending,
DesiredStatus: structs.AllocDesiredStatusRun,
JobStop: true,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when non terminal and job dead",
ClientStatus: structs.AllocClientStatusPending,
DesiredStatus: structs.AllocDesiredStatusRun,
JobStatus: structs.JobStatusDead,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when threshold not met",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusStop,
GCTime: fail,
ModifyIndex: 100,
ThresholdIndex: 90,
ReschedulePolicy: nil,
ShouldGC: false,
},
{
Desc: "GC when no reschedule policy",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: nil,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC when empty policy",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{0, 0 * time.Minute},
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC with no previous attempts",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ReschedulePolicy: &structs.ReschedulePolicy{1, 1 * time.Minute},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt within interval",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
ReschedulePolicy: &structs.ReschedulePolicy{2, 30 * time.Minute},
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt outside interval",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: true,
},
{
Desc: "GC when next alloc id is set",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
},
},
NextAllocID: uuid.Generate(),
ShouldGC: true,
},
{
Desc: "GC when job is stopped",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
},
},
JobStop: true,
ShouldGC: true,
},
{
Desc: "GC when job status is dead",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
},
},
JobStatus: structs.JobStatusDead,
ShouldGC: true,
},
}
for _, tc := range harness {
alloc := &structs.Allocation{}
alloc.ModifyIndex = tc.ModifyIndex
alloc.DesiredStatus = tc.DesiredStatus
alloc.ClientStatus = tc.ClientStatus
alloc.RescheduleTracker = &structs.RescheduleTracker{tc.RescheduleTrackers}
alloc.NextAllocation = tc.NextAllocID
job := mock.Job()
alloc.TaskGroup = job.TaskGroups[0].Name
job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
if tc.JobStatus != "" {
job.Status = tc.JobStatus
}
job.Stop = tc.JobStop
t.Run(tc.Desc, func(t *testing.T) {
if got := allocGCEligible(alloc, job, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
}
})
}
// Verify nil job
require := require.New(t)
alloc := mock.Alloc()
alloc.ClientStatus = structs.AllocClientStatusComplete
require.True(allocGCEligible(alloc, nil, time.Now(), 1000))
}

View File

@@ -476,13 +476,16 @@ func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} {
if err := structs.Decode(buf, &req); err != nil {
panic(fmt.Errorf("failed to decode request: %v", err))
}
return n.upsertEvals(index, req.Evals)
}
if err := n.state.UpsertEvals(index, req.Evals); err != nil {
func (n *nomadFSM) upsertEvals(index uint64, evals []*structs.Evaluation) error {
if err := n.state.UpsertEvals(index, evals); err != nil {
n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err)
return err
}
for _, eval := range req.Evals {
for _, eval := range evals {
if eval.ShouldEnqueue() {
n.evalBroker.Enqueue(eval)
} else if eval.ShouldBlock() {
@@ -582,6 +585,14 @@ func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{}
return err
}
// Update any evals
if len(req.Evals) > 0 {
if err := n.upsertEvals(index, req.Evals); err != nil {
n.logger.Printf("[ERR] nomad.fsm: applyAllocClientUpdate failed to update evaluations: %v", err)
return err
}
}
// Unblock evals for the nodes computed node class if the client has
// finished running an allocation.
for _, alloc := range req.Alloc {

View File

@@ -19,6 +19,7 @@ import (
"github.com/hashicorp/raft"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type MockSink struct {
@@ -1074,6 +1075,7 @@ func TestFSM_UpdateAllocFromClient(t *testing.T) {
t.Parallel()
fsm := testFSM(t)
state := fsm.State()
require := require.New(t)
alloc := mock.Alloc()
state.UpsertJobSummary(9, mock.JobSummary(alloc.JobID))
@@ -1083,30 +1085,38 @@ func TestFSM_UpdateAllocFromClient(t *testing.T) {
*clientAlloc = *alloc
clientAlloc.ClientStatus = structs.AllocClientStatusFailed
eval := mock.Eval()
eval.JobID = alloc.JobID
eval.TriggeredBy = structs.EvalTriggerRetryFailedAlloc
eval.Type = alloc.Job.Type
req := structs.AllocUpdateRequest{
Alloc: []*structs.Allocation{clientAlloc},
Evals: []*structs.Evaluation{eval},
}
buf, err := structs.Encode(structs.AllocClientUpdateRequestType, req)
if err != nil {
t.Fatalf("err: %v", err)
}
require.Nil(err)
resp := fsm.Apply(makeLog(buf))
if resp != nil {
t.Fatalf("resp: %v", resp)
}
require.Nil(resp)
// Verify we are registered
ws := memdb.NewWatchSet()
out, err := fsm.State().AllocByID(ws, alloc.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
require.Nil(err)
clientAlloc.CreateIndex = out.CreateIndex
clientAlloc.ModifyIndex = out.ModifyIndex
if !reflect.DeepEqual(clientAlloc, out) {
t.Fatalf("err: %#v,%#v", clientAlloc, out)
}
require.Equal(clientAlloc, out)
// Verify eval was inserted
ws = memdb.NewWatchSet()
evals, err := fsm.State().EvalsByJob(ws, eval.Namespace, eval.JobID)
require.Nil(err)
require.Equal(1, len(evals))
res := evals[0]
eval.CreateIndex = res.CreateIndex
eval.ModifyIndex = res.ModifyIndex
require.Equal(eval, res)
}
func TestFSM_UpsertVaultAccessor(t *testing.T) {

View File

@@ -91,6 +91,10 @@ func Job() *structs.Job {
Delay: 1 * time.Minute,
Mode: structs.RestartPolicyModeDelay,
},
ReschedulePolicy: &structs.ReschedulePolicy{
Attempts: 2,
Interval: 10 * time.Minute,
},
Tasks: []*structs.Task{
{
Name: "web",

View File

@@ -820,10 +820,51 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
return fmt.Errorf("must update at least one allocation")
}
// Ensure that evals aren't set from client RPCs
// We create them here before the raft update
if len(args.Evals) != 0 {
return fmt.Errorf("evals field must not be set ")
}
// Update modified timestamp for client initiated allocation updates
now := time.Now().UTC().UnixNano()
now := time.Now()
var evals []*structs.Evaluation
for _, alloc := range args.Alloc {
alloc.ModifyTime = now
alloc.ModifyTime = now.UTC().UnixNano()
// Add an evaluation if this is a failed alloc that is eligible for rescheduling
if alloc.ClientStatus == structs.AllocClientStatusFailed {
// Only create evaluations if this is an existing alloc,
// and eligible as per its task group's ReschedulePolicy
if existingAlloc, _ := n.srv.State().AllocByID(nil, alloc.ID); existingAlloc != nil {
job, err := n.srv.State().JobByID(nil, existingAlloc.Namespace, existingAlloc.JobID)
if err != nil {
n.srv.logger.Printf("[ERR] nomad.client: UpdateAlloc unable to find job ID %q :%v", existingAlloc.JobID, err)
continue
}
if job == nil {
n.srv.logger.Printf("[DEBUG] nomad.client: UpdateAlloc unable to find job ID %q", existingAlloc.JobID)
continue
}
taskGroup := job.LookupTaskGroup(existingAlloc.TaskGroup)
if taskGroup != nil && existingAlloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) {
eval := &structs.Evaluation{
ID: uuid.Generate(),
Namespace: existingAlloc.Namespace,
TriggeredBy: structs.EvalTriggerRetryFailedAlloc,
JobID: existingAlloc.JobID,
Type: job.Type,
Priority: job.Priority,
Status: structs.EvalStatusPending,
}
evals = append(evals, eval)
}
}
}
}
if len(evals) > 0 {
n.srv.logger.Printf("[DEBUG] nomad.client: Adding %v evaluations for rescheduling failed allocations", len(evals))
}
// Add this to the batch
n.updatesLock.Lock()
@@ -845,7 +886,7 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
n.updatesLock.Unlock()
// Perform the batch update
n.batchUpdate(future, updates)
n.batchUpdate(future, updates, evals)
})
}
n.updatesLock.Unlock()
@@ -861,10 +902,11 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
}
// batchUpdate is used to update all the allocations
func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) {
func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) {
// Prepare the batch update
batch := &structs.AllocUpdateRequest{
Alloc: updates,
Evals: evals,
WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/hashicorp/nomad/testutil"
vapi "github.com/hashicorp/vault/api"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestClientEndpoint_Register(t *testing.T) {
@@ -1648,6 +1649,7 @@ func TestClientEndpoint_UpdateAlloc(t *testing.T) {
defer s1.Shutdown()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// Create the register request
node := mock.Node()
@@ -1662,15 +1664,21 @@ func TestClientEndpoint_UpdateAlloc(t *testing.T) {
t.Fatalf("err: %v", err)
}
// Inject fake evaluations
alloc := mock.Alloc()
alloc.NodeID = node.ID
state := s1.fsm.State()
state.UpsertJobSummary(99, mock.JobSummary(alloc.JobID))
err := state.UpsertAllocs(100, []*structs.Allocation{alloc})
if err != nil {
t.Fatalf("err: %v", err)
}
// Inject mock job
job := mock.Job()
err := state.UpsertJob(101, job)
require.Nil(err)
// Inject fake allocations
alloc := mock.Alloc()
alloc.JobID = job.ID
alloc.NodeID = node.ID
err = state.UpsertJobSummary(99, mock.JobSummary(alloc.JobID))
require.Nil(err)
alloc.TaskGroup = job.TaskGroups[0].Name
err = state.UpsertAllocs(100, []*structs.Allocation{alloc})
require.Nil(err)
// Attempt update
clientAlloc := new(structs.Allocation)
@@ -1684,12 +1692,10 @@ func TestClientEndpoint_UpdateAlloc(t *testing.T) {
}
var resp2 structs.NodeAllocsResponse
start := time.Now()
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", update, &resp2); err != nil {
t.Fatalf("err: %v", err)
}
if resp2.Index == 0 {
t.Fatalf("Bad index: %d", resp2.Index)
}
err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", update, &resp2)
require.Nil(err)
require.NotEqual(0, resp2.Index)
if diff := time.Since(start); diff < batchUpdateInterval {
t.Fatalf("too fast: %v", diff)
}
@@ -1697,16 +1703,22 @@ func TestClientEndpoint_UpdateAlloc(t *testing.T) {
// Lookup the alloc
ws := memdb.NewWatchSet()
out, err := state.AllocByID(ws, alloc.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if out.ClientStatus != structs.AllocClientStatusFailed {
t.Fatalf("Bad: %#v", out)
}
require.Nil(err)
require.Equal(structs.AllocClientStatusFailed, out.ClientStatus)
require.True(out.ModifyTime > 0)
if out.ModifyTime <= 0 {
t.Fatalf("must have valid modify time but was %v", out.ModifyTime)
// Assert that one eval with TriggeredBy EvalTriggerRetryFailedAlloc exists
evaluations, err := state.EvalsByJob(ws, job.Namespace, job.ID)
require.Nil(err)
require.True(len(evaluations) != 0)
found := false
for _, resultEval := range evaluations {
if resultEval.TriggeredBy == structs.EvalTriggerRetryFailedAlloc {
found = true
}
}
require.True(found, "Should create an eval for failed alloc")
}
func TestClientEndpoint_BatchUpdate(t *testing.T) {
@@ -1747,7 +1759,7 @@ func TestClientEndpoint_BatchUpdate(t *testing.T) {
// Call to do the batch update
bf := NewBatchFuture()
endpoint := s1.endpoints.Node
endpoint.batchUpdate(bf, []*structs.Allocation{clientAlloc})
endpoint.batchUpdate(bf, []*structs.Allocation{clientAlloc}, nil)
if err := bf.Wait(); err != nil {
t.Fatalf("err: %v", err)
}
@@ -1806,6 +1818,14 @@ func TestClientEndpoint_UpdateAlloc_Vault(t *testing.T) {
t.Fatalf("err: %v", err)
}
// Inject mock job
job := mock.Job()
job.ID = alloc.JobID
err := state.UpsertJob(101, job)
if err != nil {
t.Fatalf("err: %v", err)
}
// Attempt update
clientAlloc := new(structs.Allocation)
*clientAlloc = *alloc

View File

@@ -393,7 +393,7 @@ func correctDeploymentCanaries(result *structs.PlanResult) {
}
}
// evaluateNodePlan is used to evalute the plan for a single node,
// evaluateNodePlan is used to evaluate the plan for a single node,
// returning if the plan is valid or if an error is encountered
func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID string) (bool, string, error) {
// If this is an evict-only plan, it always 'fits' since we are removing things.

View File

@@ -234,6 +234,12 @@ func (tg *TaskGroup) Diff(other *TaskGroup, contextual bool) (*TaskGroupDiff, er
diff.Objects = append(diff.Objects, rDiff)
}
// Reschedule policy diff
reschedDiff := primitiveObjectDiff(tg.ReschedulePolicy, other.ReschedulePolicy, nil, "ReschedulePolicy", contextual)
if reschedDiff != nil {
diff.Objects = append(diff.Objects, reschedDiff)
}
// EphemeralDisk diff
diskDiff := primitiveObjectDiff(tg.EphemeralDisk, other.EphemeralDisk, nil, "EphemeralDisk", contextual)
if diskDiff != nil {

View File

@@ -1494,6 +1494,148 @@ func TestTaskGroupDiff(t *testing.T) {
},
},
},
{
// ReschedulePolicy added
Old: &TaskGroup{},
New: &TaskGroup{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 15 * time.Second,
},
},
Expected: &TaskGroupDiff{
Type: DiffTypeEdited,
Objects: []*ObjectDiff{
{
Type: DiffTypeAdded,
Name: "ReschedulePolicy",
Fields: []*FieldDiff{
{
Type: DiffTypeAdded,
Name: "Attempts",
Old: "",
New: "1",
},
{
Type: DiffTypeAdded,
Name: "Interval",
Old: "",
New: "15000000000",
},
},
},
},
},
},
{
// ReschedulePolicy deleted
Old: &TaskGroup{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 15 * time.Second,
},
},
New: &TaskGroup{},
Expected: &TaskGroupDiff{
Type: DiffTypeEdited,
Objects: []*ObjectDiff{
{
Type: DiffTypeDeleted,
Name: "ReschedulePolicy",
Fields: []*FieldDiff{
{
Type: DiffTypeDeleted,
Name: "Attempts",
Old: "1",
New: "",
},
{
Type: DiffTypeDeleted,
Name: "Interval",
Old: "15000000000",
New: "",
},
},
},
},
},
},
{
// ReschedulePolicy edited
Old: &TaskGroup{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 1 * time.Second,
},
},
New: &TaskGroup{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 2,
Interval: 2 * time.Second,
},
},
Expected: &TaskGroupDiff{
Type: DiffTypeEdited,
Objects: []*ObjectDiff{
{
Type: DiffTypeEdited,
Name: "ReschedulePolicy",
Fields: []*FieldDiff{
{
Type: DiffTypeEdited,
Name: "Attempts",
Old: "1",
New: "2",
},
{
Type: DiffTypeEdited,
Name: "Interval",
Old: "1000000000",
New: "2000000000",
},
},
},
},
},
}, {
// ReschedulePolicy edited with context
Contextual: true,
Old: &TaskGroup{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 1 * time.Second,
},
},
New: &TaskGroup{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 2 * time.Second,
},
},
Expected: &TaskGroupDiff{
Type: DiffTypeEdited,
Objects: []*ObjectDiff{
{
Type: DiffTypeEdited,
Name: "ReschedulePolicy",
Fields: []*FieldDiff{
{
Type: DiffTypeNone,
Name: "Attempts",
Old: "1",
New: "1",
},
{
Type: DiffTypeEdited,
Name: "Interval",
Old: "1000000000",
New: "2000000000",
},
},
},
},
},
},
{
// Update strategy deleted
Old: &TaskGroup{

View File

@@ -533,6 +533,10 @@ type AllocUpdateRequest struct {
// Alloc is the list of new allocations to assign
Alloc []*Allocation
// Evals is the list of new evaluations to create
// Evals are valid only when used in the Raft RPC
Evals []*Evaluation
// Job is the shared parent job of the allocations.
// It is pulled out since it is common to reduce payload size.
Job *Job
@@ -2506,17 +2510,28 @@ func (d *DispatchPayloadConfig) Validate() error {
}
var (
defaultServiceJobRestartPolicy = RestartPolicy{
DefaultServiceJobRestartPolicy = RestartPolicy{
Delay: 15 * time.Second,
Attempts: 2,
Interval: 1 * time.Minute,
Mode: RestartPolicyModeDelay,
Interval: 30 * time.Minute,
Mode: RestartPolicyModeFail,
}
defaultBatchJobRestartPolicy = RestartPolicy{
DefaultBatchJobRestartPolicy = RestartPolicy{
Delay: 15 * time.Second,
Attempts: 15,
Interval: 7 * 24 * time.Hour,
Mode: RestartPolicyModeDelay,
Attempts: 3,
Interval: 24 * time.Hour,
Mode: RestartPolicyModeFail,
}
)
var (
DefaultServiceJobReschedulePolicy = ReschedulePolicy{
Attempts: 2,
Interval: 1 * time.Hour,
}
DefaultBatchJobReschedulePolicy = ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
}
)
@@ -2589,10 +2604,57 @@ func (r *RestartPolicy) Validate() error {
func NewRestartPolicy(jobType string) *RestartPolicy {
switch jobType {
case JobTypeService, JobTypeSystem:
rp := defaultServiceJobRestartPolicy
rp := DefaultServiceJobRestartPolicy
return &rp
case JobTypeBatch:
rp := defaultBatchJobRestartPolicy
rp := DefaultBatchJobRestartPolicy
return &rp
}
return nil
}
const ReschedulePolicyMinInterval = 15 * time.Second
// ReschedulePolicy configures how Tasks are rescheduled when they crash or fail.
type ReschedulePolicy struct {
// Attempts limits the number of rescheduling attempts that can occur in an interval.
Attempts int
// Interval is a duration in which we can limit the number of reschedule attempts.
Interval time.Duration
//TODO delay
}
func (r *ReschedulePolicy) Copy() *ReschedulePolicy {
if r == nil {
return nil
}
nrp := new(ReschedulePolicy)
*nrp = *r
return nrp
}
func (r *ReschedulePolicy) Validate() error {
if r != nil && r.Attempts > 0 {
var mErr multierror.Error
// Check for ambiguous/confusing settings
if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() {
multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", RestartPolicyMinInterval, r.Interval))
}
return mErr.ErrorOrNil()
}
return nil
}
func NewReshedulePolicy(jobType string) *ReschedulePolicy {
switch jobType {
case JobTypeService:
rp := DefaultServiceJobReschedulePolicy
return &rp
case JobTypeBatch:
rp := DefaultBatchJobReschedulePolicy
return &rp
}
return nil
@@ -2628,6 +2690,10 @@ type TaskGroup struct {
// Meta is used to associate arbitrary metadata with this
// task group. This is opaque to Nomad.
Meta map[string]string
// ReschedulePolicy is used to configure how the scheduler should
// retry failed allocations.
ReschedulePolicy *ReschedulePolicy
}
func (tg *TaskGroup) Copy() *TaskGroup {
@@ -2639,6 +2705,7 @@ func (tg *TaskGroup) Copy() *TaskGroup {
ntg.Update = ntg.Update.Copy()
ntg.Constraints = CopySliceConstraints(ntg.Constraints)
ntg.RestartPolicy = ntg.RestartPolicy.Copy()
ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy()
if tg.Tasks != nil {
tasks := make([]*Task, len(ntg.Tasks))
@@ -2669,6 +2736,10 @@ func (tg *TaskGroup) Canonicalize(job *Job) {
tg.RestartPolicy = NewRestartPolicy(job.Type)
}
if tg.ReschedulePolicy == nil {
tg.ReschedulePolicy = NewReshedulePolicy(job.Type)
}
// Set a default ephemeral disk object if the user has not requested for one
if tg.EphemeralDisk == nil {
tg.EphemeralDisk = DefaultEphemeralDisk()
@@ -2719,6 +2790,14 @@ func (tg *TaskGroup) Validate(j *Job) error {
mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
}
if tg.ReschedulePolicy != nil {
if err := tg.ReschedulePolicy.Validate(); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
} else {
mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name))
}
if tg.EphemeralDisk != nil {
if err := tg.EphemeralDisk.Validate(); err != nil {
mErr.Errors = append(mErr.Errors, err)
@@ -4842,6 +4921,52 @@ type DeploymentStatusUpdate struct {
StatusDescription string
}
// RescheduleTracker encapsulates previous reschedule events
type RescheduleTracker struct {
Events []*RescheduleEvent
}
func (rt *RescheduleTracker) Copy() *RescheduleTracker {
if rt == nil {
return nil
}
nt := &RescheduleTracker{}
*nt = *rt
rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events))
for _, tracker := range rt.Events {
rescheduleEvents = append(rescheduleEvents, tracker.Copy())
}
nt.Events = rescheduleEvents
return nt
}
// RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation
type RescheduleEvent struct {
// RescheduleTime is the timestamp of a reschedule attempt
RescheduleTime int64
// PrevAllocID is the ID of the previous allocation being restarted
PrevAllocID string
// PrevNodeID is the node ID of the previous allocation
PrevNodeID string
}
func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string) *RescheduleEvent {
return &RescheduleEvent{RescheduleTime: rescheduleTime,
PrevAllocID: prevAllocID,
PrevNodeID: prevNodeID}
}
func (re *RescheduleEvent) Copy() *RescheduleEvent {
if re == nil {
return nil
}
copy := new(RescheduleEvent)
*copy = *re
return copy
}
const (
AllocDesiredStatusRun = "run" // Allocation should run
AllocDesiredStatusStop = "stop" // Allocation should stop
@@ -4940,6 +5065,9 @@ type Allocation struct {
// ModifyTime is the time the allocation was last updated.
ModifyTime int64
// RescheduleTrackers captures details of previous reschedule attempts of the allocation
RescheduleTracker *RescheduleTracker
}
// Index returns the index of the allocation. If the allocation is from a task
@@ -4997,6 +5125,8 @@ func (a *Allocation) copyImpl(job bool) *Allocation {
}
na.TaskStates = ts
}
na.RescheduleTracker = a.RescheduleTracker.Copy()
return na
}
@@ -5019,6 +5149,49 @@ func (a *Allocation) TerminalStatus() bool {
}
}
// ShouldReschedule returns if the allocation is eligible to be rescheduled according
// to its status and ReschedulePolicy given its failure time
func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool {
// First check the desired state
switch a.DesiredStatus {
case AllocDesiredStatusStop, AllocDesiredStatusEvict:
return false
default:
}
switch a.ClientStatus {
case AllocClientStatusFailed:
return a.RescheduleEligible(reschedulePolicy, failTime)
default:
return false
}
}
// RescheduleEligible returns if the allocation is eligible to be rescheduled according
// to its ReschedulePolicy and the current state of its reschedule trackers
func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool {
if reschedulePolicy == nil {
return false
}
attempts := reschedulePolicy.Attempts
interval := reschedulePolicy.Interval
if attempts == 0 {
return false
}
if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 {
return true
}
attempted := 0
for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- {
lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime
timeDiff := failTime.UTC().UnixNano() - lastAttempt
if timeDiff < interval.Nanoseconds() {
attempted += 1
}
}
return attempted < attempts
}
// Terminated returns if the allocation is in a terminal state on a client.
func (a *Allocation) Terminated() bool {
if a.ClientStatus == AllocClientStatusFailed ||
@@ -5042,7 +5215,7 @@ func (a *Allocation) RanSuccessfully() bool {
return false
}
// Check to see if all the tasks finised successfully in the allocation
// Check to see if all the tasks finished successfully in the allocation
allSuccess := true
for _, state := range a.TaskStates {
allSuccess = allSuccess && state.Successful()
@@ -5328,6 +5501,7 @@ const (
EvalTriggerDeploymentWatcher = "deployment-watcher"
EvalTriggerFailedFollowUp = "failed-follow-up"
EvalTriggerMaxPlans = "max-plan-attempts"
EvalTriggerRetryFailedAlloc = "alloc-failure"
)
const (

View File

@@ -189,10 +189,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
},
TaskGroups: []*TaskGroup{
{
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
ReschedulePolicy: NewReshedulePolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Update: &UpdateStrategy{
Stagger: 30 * time.Second,
MaxParallel: 2,
@@ -229,10 +230,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
Update: UpdateStrategy{},
TaskGroups: []*TaskGroup{
{
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeBatch),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeBatch),
ReschedulePolicy: NewReshedulePolicy(JobTypeBatch),
EphemeralDisk: DefaultEphemeralDisk(),
},
},
},
@@ -272,10 +274,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
Update: UpdateStrategy{},
TaskGroups: []*TaskGroup{
{
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeBatch),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeBatch),
ReschedulePolicy: NewReshedulePolicy(JobTypeBatch),
EphemeralDisk: DefaultEphemeralDisk(),
},
},
},
@@ -321,10 +324,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
},
TaskGroups: []*TaskGroup{
{
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
ReschedulePolicy: NewReshedulePolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Update: &UpdateStrategy{
Stagger: 2 * time.Second,
MaxParallel: 2,
@@ -363,10 +367,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
},
TaskGroups: []*TaskGroup{
{
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
ReschedulePolicy: NewReshedulePolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Update: &UpdateStrategy{
Stagger: 30 * time.Second,
MaxParallel: 2,
@@ -414,10 +419,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
},
TaskGroups: []*TaskGroup{
{
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "foo",
Count: 2,
RestartPolicy: NewRestartPolicy(JobTypeService),
ReschedulePolicy: NewReshedulePolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Update: &UpdateStrategy{
Stagger: 30 * time.Second,
MaxParallel: 1,
@@ -429,10 +435,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
},
},
{
Name: "bar",
Count: 14,
RestartPolicy: NewRestartPolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Name: "bar",
Count: 14,
RestartPolicy: NewRestartPolicy(JobTypeService),
ReschedulePolicy: NewReshedulePolicy(JobTypeService),
EphemeralDisk: DefaultEphemeralDisk(),
Update: &UpdateStrategy{
Stagger: 30 * time.Second,
MaxParallel: 1,
@@ -444,10 +451,11 @@ func TestJob_Canonicalize_Update(t *testing.T) {
},
},
{
Name: "foo",
Count: 26,
EphemeralDisk: DefaultEphemeralDisk(),
RestartPolicy: NewRestartPolicy(JobTypeService),
Name: "foo",
Count: 26,
EphemeralDisk: DefaultEphemeralDisk(),
RestartPolicy: NewRestartPolicy(JobTypeService),
ReschedulePolicy: NewReshedulePolicy(JobTypeService),
Update: &UpdateStrategy{
Stagger: 30 * time.Second,
MaxParallel: 3,
@@ -560,6 +568,10 @@ func testJob() *Job {
Interval: 10 * time.Minute,
Delay: 1 * time.Minute,
},
ReschedulePolicy: &ReschedulePolicy{
Interval: 5 * time.Minute,
Attempts: 10,
},
Tasks: []*Task{
{
Name: "web",
@@ -914,6 +926,10 @@ func TestTaskGroup_Validate(t *testing.T) {
Attempts: 10,
Mode: RestartPolicyModeDelay,
},
ReschedulePolicy: &ReschedulePolicy{
Interval: 5 * time.Minute,
Attempts: 5,
},
}
err := tg.Validate(j)
mErr := err.(*multierror.Error)
@@ -994,6 +1010,10 @@ func TestTaskGroup_Validate(t *testing.T) {
Attempts: 10,
Mode: RestartPolicyModeDelay,
},
ReschedulePolicy: &ReschedulePolicy{
Interval: 5 * time.Minute,
Attempts: 10,
},
}
err = tg.Validate(j)
@@ -2401,6 +2421,50 @@ func TestRestartPolicy_Validate(t *testing.T) {
}
}
func TestReschedulePolicy_Validate(t *testing.T) {
type testCase struct {
ReschedulePolicy *ReschedulePolicy
err error
}
testCases := []testCase{
{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 0,
Interval: 0 * time.Second},
err: nil,
},
{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 5 * time.Minute},
err: nil,
},
{
ReschedulePolicy: &ReschedulePolicy{
Attempts: -1,
Interval: 5 * time.Minute},
err: nil,
},
{
ReschedulePolicy: &ReschedulePolicy{
Attempts: 1,
Interval: 1 * time.Second},
err: fmt.Errorf("Interval cannot be less than %v (got %v)", RestartPolicyMinInterval, time.Second),
},
}
assert := assert.New(t)
for _, tc := range testCases {
if tc.err != nil {
assert.Contains(tc.ReschedulePolicy.Validate().Error(), tc.err.Error())
} else {
assert.Nil(tc.err)
}
}
}
func TestAllocation_Index(t *testing.T) {
a1 := Allocation{
Name: "example.cache[1]",
@@ -2627,6 +2691,157 @@ func TestAllocation_Terminated(t *testing.T) {
}
}
func TestAllocation_ShouldReschedule(t *testing.T) {
type testCase struct {
Desc string
FailTime time.Time
ClientStatus string
DesiredStatus string
ReschedulePolicy *ReschedulePolicy
RescheduleTrackers []*RescheduleEvent
ShouldReschedule bool
}
fail := time.Now()
harness := []testCase{
{
Desc: "Reschedule when desired state is stop",
ClientStatus: AllocClientStatusPending,
DesiredStatus: AllocDesiredStatusStop,
FailTime: fail,
ReschedulePolicy: nil,
ShouldReschedule: false,
},
{
Desc: "Disabled recheduling",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: &ReschedulePolicy{0, 1 * time.Minute},
ShouldReschedule: false,
},
{
Desc: "Reschedule when client status is complete",
ClientStatus: AllocClientStatusComplete,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: nil,
ShouldReschedule: false,
},
{
Desc: "Reschedule with nil reschedule policy",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: nil,
ShouldReschedule: false,
},
{
Desc: "Reschedule when client status is complete",
ClientStatus: AllocClientStatusComplete,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: nil,
ShouldReschedule: false,
},
{
Desc: "Reschedule with policy when client status complete",
ClientStatus: AllocClientStatusComplete,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute},
ShouldReschedule: false,
},
{
Desc: "Reschedule with no previous attempts",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute},
ShouldReschedule: true,
},
{
Desc: "Reschedule with leftover attempts",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
ReschedulePolicy: &ReschedulePolicy{2, 5 * time.Minute},
FailTime: fail,
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-1 * time.Minute).UTC().UnixNano(),
},
},
ShouldReschedule: true,
},
{
Desc: "Reschedule with too old previous attempts",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: &ReschedulePolicy{1, 5 * time.Minute},
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-6 * time.Minute).UTC().UnixNano(),
},
},
ShouldReschedule: true,
},
{
Desc: "Reschedule with no leftover attempts",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
FailTime: fail,
ReschedulePolicy: &ReschedulePolicy{2, 5 * time.Minute},
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: fail.Add(-4 * time.Minute).UTC().UnixNano(),
},
},
ShouldReschedule: false,
},
}
for _, state := range harness {
alloc := Allocation{}
alloc.DesiredStatus = state.DesiredStatus
alloc.ClientStatus = state.ClientStatus
alloc.RescheduleTracker = &RescheduleTracker{state.RescheduleTrackers}
t.Run(state.Desc, func(t *testing.T) {
if got := alloc.ShouldReschedule(state.ReschedulePolicy, state.FailTime); got != state.ShouldReschedule {
t.Fatalf("expected %v but got %v", state.ShouldReschedule, got)
}
})
}
}
func TestRescheduleTracker_Copy(t *testing.T) {
type testCase struct {
original *RescheduleTracker
expected *RescheduleTracker
}
cases := []testCase{
{nil, nil},
{&RescheduleTracker{Events: []*RescheduleEvent{
{2, "12", "12"},
}}, &RescheduleTracker{Events: []*RescheduleEvent{
{2, "12", "12"},
}}},
}
for _, tc := range cases {
if got := tc.original.Copy(); !reflect.DeepEqual(got, tc.expected) {
t.Fatalf("expected %v but got %v", *tc.expected, *got)
}
}
}
func TestVault_Validate(t *testing.T) {
v := &Vault{
Env: true,

View File

@@ -114,7 +114,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,
structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate,
structs.EvalTriggerPeriodicJob, structs.EvalTriggerMaxPlans,
structs.EvalTriggerDeploymentWatcher:
structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerRetryFailedAlloc:
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
@@ -294,46 +294,6 @@ func (s *GenericScheduler) process() (bool, error) {
return true, nil
}
// filterCompleteAllocs filters allocations that are terminal and should be
// re-placed.
func (s *GenericScheduler) filterCompleteAllocs(allocs []*structs.Allocation) []*structs.Allocation {
filter := func(a *structs.Allocation) bool {
if s.batch {
// Allocs from batch jobs should be filtered when the desired status
// is terminal and the client did not finish or when the client
// status is failed so that they will be replaced. If they are
// complete but not failed, they shouldn't be replaced.
switch a.DesiredStatus {
case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
return !a.RanSuccessfully()
default:
}
switch a.ClientStatus {
case structs.AllocClientStatusFailed:
return true
default:
return false
}
}
// Filter terminal, non batch allocations
return a.TerminalStatus()
}
n := len(allocs)
for i := 0; i < n; i++ {
if filter(allocs[i]) {
// Remove the allocation
allocs[i], allocs[n-1] = allocs[n-1], nil
i--
n--
}
}
return allocs[:n]
}
// computeJobAllocs is used to reconcile differences between the job,
// existing allocations and node status to update the allocations.
func (s *GenericScheduler) computeJobAllocs() error {
@@ -356,9 +316,6 @@ func (s *GenericScheduler) computeJobAllocs() error {
// nodes to lost
updateNonTerminalAllocsToLost(s.plan, tainted, allocs)
// Filter out the allocations in a terminal state
allocs = s.filterCompleteAllocs(allocs)
reconciler := NewAllocReconciler(s.ctx.Logger(),
genericAllocUpdateFn(s.ctx, s.stack, s.eval.ID),
s.batch, s.eval.JobID, s.job, s.deployment, allocs, tainted)
@@ -471,17 +428,14 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
// stop the allocation before trying to find a replacement because this
// frees the resources currently used by the previous allocation.
stopPrevAlloc, stopPrevAllocDesc := missing.StopPreviousAlloc()
prevAllocation := missing.PreviousAllocation()
if stopPrevAlloc {
s.plan.AppendUpdate(missing.PreviousAllocation(), structs.AllocDesiredStatusStop, stopPrevAllocDesc, "")
s.plan.AppendUpdate(prevAllocation, structs.AllocDesiredStatusStop, stopPrevAllocDesc, "")
}
// Attempt to match the task group
var option *RankedNode
if preferredNode != nil {
option, _ = s.stack.SelectPreferringNodes(tg, []*structs.Node{preferredNode})
} else {
option, _ = s.stack.Select(tg)
}
// Compute penalty nodes for rescheduled allocs
selectOptions := getSelectOptions(prevAllocation, preferredNode)
option, _ := s.stack.Select(tg, selectOptions)
// Store the available nodes by datacenter
s.ctx.Metrics().NodesAvailable = byDC
@@ -510,8 +464,11 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
// If the new allocation is replacing an older allocation then we
// set the record the older allocation id so that they are chained
if prev := missing.PreviousAllocation(); prev != nil {
alloc.PreviousAllocation = prev.ID
if prevAllocation != nil {
alloc.PreviousAllocation = prevAllocation.ID
if missing.IsRescheduling() {
updateRescheduleTracker(alloc, prevAllocation)
}
}
// If we are placing a canary and we found a match, add the canary
@@ -537,15 +494,48 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
// If we weren't able to find a replacement for the allocation, back
// out the fact that we asked to stop the allocation.
if stopPrevAlloc {
s.plan.PopUpdate(missing.PreviousAllocation())
s.plan.PopUpdate(prevAllocation)
}
}
}
}
return nil
}
// getSelectOptions sets up preferred nodes and penalty nodes
func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs.Node) *SelectOptions {
selectOptions := &SelectOptions{}
if prevAllocation != nil {
penaltyNodes := make(map[string]struct{})
penaltyNodes[prevAllocation.NodeID] = struct{}{}
if prevAllocation.RescheduleTracker != nil {
for _, reschedEvent := range prevAllocation.RescheduleTracker.Events {
penaltyNodes[reschedEvent.PrevNodeID] = struct{}{}
}
}
selectOptions.PenaltyNodeIDs = penaltyNodes
}
if preferredNode != nil {
selectOptions.PreferredNodes = []*structs.Node{preferredNode}
}
return selectOptions
}
// updateRescheduleTracker carries over previous restart attempts and adds the most recent restart
func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation) {
var rescheduleEvents []*structs.RescheduleEvent
if prev.RescheduleTracker != nil {
for _, reschedEvent := range prev.RescheduleTracker.Events {
rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy())
}
}
rescheduleEvent := structs.NewRescheduleEvent(time.Now().UTC().UnixNano(), prev.ID, prev.NodeID)
rescheduleEvents = append(rescheduleEvents, rescheduleEvent)
alloc.RescheduleTracker = &structs.RescheduleTracker{Events: rescheduleEvents}
}
// findPreferredNode finds the preferred node for an allocation
func (s *GenericScheduler) findPreferredNode(place placementResult) (node *structs.Node, err error) {
if prev := place.PreviousAllocation(); prev != nil && place.TaskGroup().EphemeralDisk.Sticky == true {

View File

@@ -2467,6 +2467,16 @@ func TestServiceSched_NodeDrain_Down(t *testing.T) {
var complete []*structs.Allocation
for i := 6; i < 10; i++ {
newAlloc := stop[i].Copy()
newAlloc.TaskStates = make(map[string]*structs.TaskState)
newAlloc.TaskStates["web"] = &structs.TaskState{
State: structs.TaskStateDead,
Events: []*structs.TaskEvent{
{
Type: structs.TaskTerminated,
ExitCode: 0,
},
},
}
newAlloc.ClientStatus = structs.AllocClientStatusComplete
complete = append(complete, newAlloc)
}
@@ -2705,6 +2715,300 @@ func TestServiceSched_RetryLimit(t *testing.T) {
h.AssertEvalStatus(t, structs.EvalStatusFailed)
}
func TestServiceSched_Reschedule_Once(t *testing.T) {
h := NewHarness(t)
// Create some nodes
var nodes []*structs.Node
for i := 0; i < 10; i++ {
node := mock.Node()
nodes = append(nodes, node)
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
}
// Generate a fake job with allocations and an update policy.
job := mock.Job()
job.TaskGroups[0].Count = 2
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 15 * time.Minute,
}
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
var allocs []*structs.Allocation
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = nodes[i].ID
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
allocs = append(allocs, alloc)
}
// Mark one of the allocations as failed
allocs[1].ClientStatus = structs.AllocClientStatusFailed
failedAllocID := allocs[1].ID
successAllocID := allocs[0].ID
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
// Create a mock evaluation
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))
// Process the evaluation
err := h.Process(NewServiceScheduler, eval)
if err != nil {
t.Fatalf("err: %v", err)
}
// Ensure multiple plans
if len(h.Plans) == 0 {
t.Fatalf("bad: %#v", h.Plans)
}
// Lookup the allocations by JobID
ws := memdb.NewWatchSet()
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
noErr(t, err)
// Verify that one new allocation got created with its restart tracker info
assert := assert.New(t)
assert.Equal(3, len(out))
var newAlloc *structs.Allocation
for _, alloc := range out {
if alloc.ID != successAllocID && alloc.ID != failedAllocID {
newAlloc = alloc
}
}
assert.Equal(failedAllocID, newAlloc.PreviousAllocation)
assert.Equal(1, len(newAlloc.RescheduleTracker.Events))
assert.Equal(failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID)
// Mark this alloc as failed again, should not get rescheduled
newAlloc.ClientStatus = structs.AllocClientStatusFailed
noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc}))
// Create another mock evaluation
eval = &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))
// Process the evaluation
err = h.Process(NewServiceScheduler, eval)
assert.Nil(err)
// Verify no new allocs were created this time
out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
noErr(t, err)
assert.Equal(3, len(out))
}
func TestServiceSched_Reschedule_Multiple(t *testing.T) {
h := NewHarness(t)
// Create some nodes
var nodes []*structs.Node
for i := 0; i < 10; i++ {
node := mock.Node()
nodes = append(nodes, node)
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
}
maxRestartAttempts := 3
// Generate a fake job with allocations and an update policy.
job := mock.Job()
job.TaskGroups[0].Count = 2
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: maxRestartAttempts,
Interval: 30 * time.Minute,
}
noErr(t, h.State.UpsertJob(h.NextIndex(), job))
var allocs []*structs.Allocation
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = nodes[i].ID
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
allocs = append(allocs, alloc)
}
// Mark one of the allocations as failed
allocs[1].ClientStatus = structs.AllocClientStatusFailed
noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
// Create a mock evaluation
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))
expectedNumAllocs := 3
expectedNumReschedTrackers := 1
failedAllocId := allocs[1].ID
failedNodeID := allocs[1].NodeID
assert := assert.New(t)
for i := 0; i < maxRestartAttempts; i++ {
// Process the evaluation
err := h.Process(NewServiceScheduler, eval)
noErr(t, err)
// Ensure multiple plans
if len(h.Plans) == 0 {
t.Fatalf("bad: %#v", h.Plans)
}
// Lookup the allocations by JobID
ws := memdb.NewWatchSet()
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
noErr(t, err)
// Verify that a new allocation got created with its restart tracker info
assert.Equal(expectedNumAllocs, len(out))
// Find the new alloc with ClientStatusPending
var pendingAllocs []*structs.Allocation
var prevFailedAlloc *structs.Allocation
for _, alloc := range out {
if alloc.ClientStatus == structs.AllocClientStatusPending {
pendingAllocs = append(pendingAllocs, alloc)
}
if alloc.ID == failedAllocId {
prevFailedAlloc = alloc
}
}
assert.Equal(1, len(pendingAllocs))
newAlloc := pendingAllocs[0]
assert.Equal(expectedNumReschedTrackers, len(newAlloc.RescheduleTracker.Events))
// Verify the previous NodeID in the most recent reschedule event
reschedEvents := newAlloc.RescheduleTracker.Events
assert.Equal(failedAllocId, reschedEvents[len(reschedEvents)-1].PrevAllocID)
assert.Equal(failedNodeID, reschedEvents[len(reschedEvents)-1].PrevNodeID)
// Verify that the next alloc of the failed alloc is the newly rescheduled alloc
assert.Equal(newAlloc.ID, prevFailedAlloc.NextAllocation)
// Mark this alloc as failed again
newAlloc.ClientStatus = structs.AllocClientStatusFailed
failedAllocId = newAlloc.ID
failedNodeID = newAlloc.NodeID
noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc}))
// Create another mock evaluation
eval = &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))
expectedNumAllocs += 1
expectedNumReschedTrackers += 1
}
// Process last eval again, should not reschedule
err := h.Process(NewServiceScheduler, eval)
assert.Nil(err)
// Verify no new allocs were created because restart attempts were exhausted
ws := memdb.NewWatchSet()
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
noErr(t, err)
assert.Equal(5, len(out)) // 2 original, plus 3 reschedule attempts
}
// Tests that deployments with failed allocs don't result in placements
func TestDeployment_FailedAllocs_NoReschedule(t *testing.T) {
h := NewHarness(t)
require := require.New(t)
// Create some nodes
var nodes []*structs.Node
for i := 0; i < 10; i++ {
node := mock.Node()
nodes = append(nodes, node)
noErr(t, h.State.UpsertNode(h.NextIndex(), node))
}
// Generate a fake job with allocations and a reschedule policy.
job := mock.Job()
job.TaskGroups[0].Count = 2
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 15 * time.Minute,
}
jobIndex := h.NextIndex()
require.Nil(h.State.UpsertJob(jobIndex, job))
deployment := mock.Deployment()
deployment.JobID = job.ID
deployment.JobCreateIndex = jobIndex
deployment.JobVersion = job.Version
require.Nil(h.State.UpsertDeployment(h.NextIndex(), deployment))
var allocs []*structs.Allocation
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = nodes[i].ID
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
alloc.DeploymentID = deployment.ID
allocs = append(allocs, alloc)
}
// Mark one of the allocations as failed
allocs[1].ClientStatus = structs.AllocClientStatusFailed
require.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs))
// Create a mock evaluation
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
require.Nil(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))
// Process the evaluation
require.Nil(h.Process(NewServiceScheduler, eval))
// Verify no plan created
require.Equal(0, len(h.Plans))
}
func TestBatchSched_Run_CompleteAlloc(t *testing.T) {
h := NewHarness(t)

View File

@@ -304,3 +304,49 @@ func (iter *JobAntiAffinityIterator) Next() *RankedNode {
func (iter *JobAntiAffinityIterator) Reset() {
iter.source.Reset()
}
// NodeAntiAffinityIterator is used to apply a penalty to
// a node that had a previous failed allocation for the same job.
// This is used when attempting to reschedule a failed alloc
type NodeAntiAffinityIterator struct {
ctx Context
source RankIterator
penalty float64
penaltyNodes map[string]struct{}
}
// NewNodeAntiAffinityIterator is used to create a NodeAntiAffinityIterator that
// applies the given penalty for placement onto nodes in penaltyNodes
func NewNodeAntiAffinityIterator(ctx Context, source RankIterator, penalty float64) *NodeAntiAffinityIterator {
iter := &NodeAntiAffinityIterator{
ctx: ctx,
source: source,
penalty: penalty,
}
return iter
}
func (iter *NodeAntiAffinityIterator) SetPenaltyNodes(penaltyNodes map[string]struct{}) {
iter.penaltyNodes = penaltyNodes
}
func (iter *NodeAntiAffinityIterator) Next() *RankedNode {
for {
option := iter.source.Next()
if option == nil {
return nil
}
_, ok := iter.penaltyNodes[option.Node.ID]
if ok {
option.Score -= iter.penalty
iter.ctx.Metrics().ScoreNode(option.Node, "node-anti-affinity", iter.penalty)
}
return option
}
}
func (iter *NodeAntiAffinityIterator) Reset() {
iter.penaltyNodes = make(map[string]struct{})
iter.source.Reset()
}

View File

@@ -6,6 +6,7 @@ import (
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
require "github.com/stretchr/testify/require"
)
func TestFeasibleRankIterator(t *testing.T) {
@@ -429,3 +430,37 @@ func collectRanked(iter RankIterator) (out []*RankedNode) {
}
return
}
func TestNodeAntiAffinity_PenaltyNodes(t *testing.T) {
_, ctx := testContext(t)
node1 := &structs.Node{
ID: uuid.Generate(),
}
node2 := &structs.Node{
ID: uuid.Generate(),
}
nodes := []*RankedNode{
{
Node: node1,
},
{
Node: node2,
},
}
static := NewStaticRankIterator(ctx, nodes)
nodeAntiAffIter := NewNodeAntiAffinityIterator(ctx, static, 50.0)
nodeAntiAffIter.SetPenaltyNodes(map[string]struct{}{node1.ID: {}})
out := collectRanked(nodeAntiAffIter)
require := require.New(t)
require.Equal(2, len(out))
require.Equal(node1.ID, out[0].Node.ID)
require.Equal(-50.0, out[0].Score)
require.Equal(node2.ID, out[1].Node.ID)
require.Equal(0.0, out[1].Score)
}

View File

@@ -159,8 +159,20 @@ func (a *allocReconciler) Compute() *reconcileResults {
// Detect if the deployment is paused
if a.deployment != nil {
// Detect if any allocs associated with this deploy have failed
// Failed allocations could edge trigger an evaluation before the deployment watcher
// runs and marks the deploy as failed. This block makes sure that is still
// considered a failed deploy
failedAllocsInDeploy := false
for _, as := range m {
for _, alloc := range as {
if alloc.DeploymentID == a.deployment.ID && alloc.ClientStatus == structs.AllocClientStatusFailed {
failedAllocsInDeploy = true
}
}
}
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed || failedAllocsInDeploy
}
// Reconcile each group
@@ -305,9 +317,12 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
// Determine what set of allocations are on tainted nodes
untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
// Determine what set of terminal allocations need to be rescheduled
untainted, reschedule := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy)
// Create a structure for choosing names. Seed with the taken names which is
// the union of untainted and migrating nodes (includes canaries)
nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate))
nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, reschedule))
// Stop any unneeded allocations and update the untainted set to not
// included stopped allocations.
@@ -364,7 +379,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
// * The deployment is not paused or failed
// * Not placing any canaries
// * If there are any canaries that they have been promoted
place := a.computePlacements(tg, nameIndex, untainted, migrate)
place := a.computePlacements(tg, nameIndex, untainted, migrate, reschedule)
if !existingDeployment {
dstate.DesiredTotal += len(place)
}
@@ -608,22 +623,38 @@ func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, dest
}
// computePlacement returns the set of allocations to place given the group
// definition, the set of untainted and migrating allocations for the group.
// definition, the set of untainted, migrating and reschedule allocations for the group.
func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
nameIndex *allocNameIndex, untainted, migrate allocSet) []allocPlaceResult {
nameIndex *allocNameIndex, untainted, migrate allocSet, reschedule allocSet) []allocPlaceResult {
// Hot path the nothing to do case
existing := len(untainted) + len(migrate)
if existing >= group.Count {
return nil
}
var place []allocPlaceResult
for _, name := range nameIndex.Next(uint(group.Count - existing)) {
// Add rescheduled placement results
// Any allocations being rescheduled will remain at DesiredStatusRun ClientStatusFailed
for _, alloc := range reschedule {
place = append(place, allocPlaceResult{
name: name,
taskGroup: group,
name: alloc.Name,
taskGroup: group,
previousAlloc: alloc,
reschedule: true,
})
existing += 1
if existing == group.Count {
break
}
}
// Add remaining placement results
if existing < group.Count {
for _, name := range nameIndex.Next(uint(group.Count - existing)) {
place = append(place, allocPlaceResult{
name: name,
taskGroup: group,
})
}
}
return place
@@ -652,6 +683,10 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
return stop
}
// Filter out any terminal allocations from the untainted set
// This is so that we don't try to mark them as stopped redundantly
untainted = filterByTerminal(untainted)
// Prefer stopping any alloc that has the same name as the canaries if we
// are promoted
if !canaryState && len(canaries) != 0 {

View File

@@ -38,6 +38,8 @@ Basic Tests:
√ Handle task group being removed
√ Handle job being stopped both as .Stopped and nil
√ Place more that one group
√ Handle rescheduling failed allocs for batch jobs
√ Handle rescheduling failed allocs for service jobs
Update stanza Tests:
√ Stopped job cancels any active deployment
@@ -71,6 +73,8 @@ Update stanza Tests:
√ The stagger is correctly calculated when it is applied across multiple task groups.
√ Change job change while scaling up
√ Update the job when all allocations from the previous job haven't been placed yet.
√ Paused or failed deployment doesn't do any rescheduling of failed allocs
√ Running deployment with failed allocs doesn't do any rescheduling of failed allocs
*/
var (
@@ -219,6 +223,30 @@ func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place [
}
}
func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
t.Helper()
names := make(map[string]struct{}, numRescheduled)
found := 0
for _, p := range place {
if _, ok := names[p.name]; ok {
t.Fatalf("Name %q already placed", p.name)
}
names[p.name] = struct{}{}
if p.previousAlloc == nil {
continue
}
if p.reschedule {
found++
}
}
if numRescheduled != found {
t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
}
}
func intRange(pairs ...int) []int {
if len(pairs)%2 != 0 {
return nil
@@ -919,6 +947,8 @@ func TestReconciler_DrainNode(t *testing.T) {
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
// These should not have the reschedule field set
assertPlacementsAreRescheduled(t, 0, r.place)
}
// Tests the reconciler properly handles draining nodes with allocations while
@@ -970,6 +1000,8 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
// These should not have the reschedule field set
assertPlacementsAreRescheduled(t, 0, r.place)
}
// Tests the reconciler properly handles draining nodes with allocations while
@@ -1021,6 +1053,8 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
// These should not have the reschedule field set
assertPlacementsAreRescheduled(t, 0, r.place)
}
// Tests the reconciler properly handles a task group being removed
@@ -1168,6 +1202,131 @@ func TestReconciler_MultiTG(t *testing.T) {
assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
}
// Tests rescheduling failed batch allocations
func TestReconciler_Reschedule_Batch(t *testing.T) {
// Set desired 4
job := mock.Job()
job.TaskGroups[0].Count = 4
// Set up reschedule policy
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour}
// Create 6 existing allocations - 2 running, 1 complete and 3 failed
var allocs []*structs.Allocation
for i := 0; i < 6; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark 3 as failed with restart tracking info
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
}}
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[1].ID,
PrevNodeID: uuid.Generate(),
},
}}
// Mark one as complete
allocs[5].ClientStatus = structs.AllocClientStatusComplete
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil)
r := reconciler.Compute()
// Two reschedule attempts were made, one more can be made
// Alloc 5 should not be replaced because it is terminal
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Ignore: 3,
},
},
})
assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
}
// Tests rescheduling failed service allocations with desired state stop
func TestReconciler_Reschedule_Service(t *testing.T) {
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Set up reschedule policy
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour}
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark two as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[1].ClientStatus = structs.AllocClientStatusFailed
// Mark one of them as already rescheduled once
allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
// Mark one as desired state stop
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil)
r := reconciler.Compute()
// Should place 2, one is rescheduled, one is past its reschedule limit and one is a new placement
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 2,
Ignore: 3,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 0, 4, 4), placeResultsToNames(r.place))
// 2 rescheduled allocs should have previous allocs
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
}
// Tests the reconciler cancels an old deployment when the job is being stopped
func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
job := mock.Job()
@@ -3148,3 +3307,92 @@ func TestReconciler_Batch_Rerun(t *testing.T) {
assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
}
// Test that a failed deployment will not result in rescheduling failed allocations
func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create an existing failed deployment that has some placed allocs
d := structs.NewDeployment(job)
d.Status = structs.DeploymentStatusFailed
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 5,
PlacedAllocs: 4,
}
// Create 4 allocations and mark two as failed
var allocs []*structs.Allocation
for i := 0; i < 4; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[3].ClientStatus = structs.AllocClientStatusFailed
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil)
r := reconciler.Compute()
// Assert that no rescheduled placements were created
assertResults(t, r, &resultExpectation{
place: 0,
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 2,
},
},
})
}
// Test that a running deployment with failed allocs will not result in rescheduling failed allocations
func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
d := structs.NewDeployment(job)
d.Status = structs.DeploymentStatusRunning
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredTotal: 5,
PlacedAllocs: 4,
}
// Create 4 allocations and mark two as failed
var allocs []*structs.Allocation
for i := 0; i < 4; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
allocs = append(allocs, alloc)
}
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[3].ClientStatus = structs.AllocClientStatusFailed
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil)
r := reconciler.Compute()
// Assert that no rescheduled placements were created
assertResults(t, r, &resultExpectation{
place: 0,
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 2,
},
},
})
}

View File

@@ -5,6 +5,8 @@ import (
"sort"
"strings"
"time"
"github.com/hashicorp/nomad/nomad/structs"
)
@@ -26,6 +28,9 @@ type placementResult interface {
// PreviousAllocation returns the previous allocation
PreviousAllocation() *structs.Allocation
// IsRescheduling returns whether the placement was rescheduling a failed allocation
IsRescheduling() bool
// StopPreviousAlloc returns whether the previous allocation should be
// stopped and if so the status description.
StopPreviousAlloc() (bool, string)
@@ -45,12 +50,14 @@ type allocPlaceResult struct {
canary bool
taskGroup *structs.TaskGroup
previousAlloc *structs.Allocation
reschedule bool
}
func (a allocPlaceResult) TaskGroup() *structs.TaskGroup { return a.taskGroup }
func (a allocPlaceResult) Name() string { return a.name }
func (a allocPlaceResult) Canary() bool { return a.canary }
func (a allocPlaceResult) PreviousAllocation() *structs.Allocation { return a.previousAlloc }
func (a allocPlaceResult) IsRescheduling() bool { return a.reschedule }
func (a allocPlaceResult) StopPreviousAlloc() (bool, string) { return false, "" }
// allocDestructiveResult contains the information required to do a destructive
@@ -67,6 +74,7 @@ func (a allocDestructiveResult) TaskGroup() *structs.TaskGroup { retur
func (a allocDestructiveResult) Name() string { return a.placeName }
func (a allocDestructiveResult) Canary() bool { return false }
func (a allocDestructiveResult) PreviousAllocation() *structs.Allocation { return a.stopAlloc }
func (a allocDestructiveResult) IsRescheduling() bool { return false }
func (a allocDestructiveResult) StopPreviousAlloc() (bool, string) {
return true, a.stopStatusDescription
}
@@ -206,11 +214,80 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi
untainted[alloc.ID] = alloc
continue
}
if n == nil || n.TerminalStatus() {
lost[alloc.ID] = alloc
if !alloc.TerminalStatus() {
if n == nil || n.TerminalStatus() {
lost[alloc.ID] = alloc
} else {
migrate[alloc.ID] = alloc
}
} else {
migrate[alloc.ID] = alloc
untainted[alloc.ID] = alloc
}
}
return
}
// filterByRescheduleable filters the allocation set to return the set of allocations that are either
// terminal or running, and a set of allocations that must be rescheduled
func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs.ReschedulePolicy) (untainted, reschedule allocSet) {
untainted = make(map[string]*structs.Allocation)
reschedule = make(map[string]*structs.Allocation)
rescheduledPrevAllocs := make(map[string]struct{}) // Track previous allocs from any restart trackers
now := time.Now()
for _, alloc := range a {
if isBatch {
// Allocs from batch jobs should be filtered when the desired status
// is terminal and the client did not finish or when the client
// status is failed so that they will be replaced. If they are
// complete but not failed, they shouldn't be replaced.
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
if alloc.RanSuccessfully() {
untainted[alloc.ID] = alloc
}
continue
default:
}
if alloc.ShouldReschedule(reschedulePolicy, now) {
reschedule[alloc.ID] = alloc
} else {
untainted[alloc.ID] = alloc
}
} else {
// ignore allocs whose desired state is stop/evict
// everything else is either rescheduleable or untainted
if alloc.ShouldReschedule(reschedulePolicy, now) {
reschedule[alloc.ID] = alloc
} else if alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict {
untainted[alloc.ID] = alloc
}
}
}
// Find allocs that exist in reschedule events from other allocs
// This needs another pass through allocs we marked as reschedulable
for _, alloc := range reschedule {
if alloc.RescheduleTracker != nil {
for _, rescheduleEvent := range alloc.RescheduleTracker.Events {
rescheduledPrevAllocs[rescheduleEvent.PrevAllocID] = struct{}{}
}
}
}
// Delete these from rescheduleable allocs
for allocId := range rescheduledPrevAllocs {
delete(reschedule, allocId)
}
return
}
// filterByTerminal filters out terminal allocs
func filterByTerminal(untainted allocSet) (nonTerminal allocSet) {
nonTerminal = make(map[string]*structs.Allocation)
for id, alloc := range untainted {
if !alloc.TerminalStatus() {
nonTerminal[id] = alloc
}
}
return

View File

@@ -3,18 +3,27 @@ package scheduler
// LimitIterator is a RankIterator used to limit the number of options
// that are returned before we artificially end the stream.
type LimitIterator struct {
ctx Context
source RankIterator
limit int
seen int
ctx Context
source RankIterator
limit int
maxSkip int
scoreThreshold float64
seen int
skippedNodes []*RankedNode
skippedNodeIndex int
}
// NewLimitIterator is returns a LimitIterator with a fixed limit of returned options
func NewLimitIterator(ctx Context, source RankIterator, limit int) *LimitIterator {
// NewLimitIterator returns a LimitIterator with a fixed limit of returned options.
// Up to maxSkip options whose score is below scoreThreshold are skipped
// if there are additional options available in the source iterator
func NewLimitIterator(ctx Context, source RankIterator, limit int, scoreThreshold float64, maxSkip int) *LimitIterator {
iter := &LimitIterator{
ctx: ctx,
source: source,
limit: limit,
ctx: ctx,
source: source,
limit: limit,
maxSkip: maxSkip,
scoreThreshold: scoreThreshold,
skippedNodes: make([]*RankedNode, 0, maxSkip),
}
return iter
}
@@ -27,19 +36,41 @@ func (iter *LimitIterator) Next() *RankedNode {
if iter.seen == iter.limit {
return nil
}
option := iter.source.Next()
option := iter.nextOption()
if option == nil {
return nil
}
if len(iter.skippedNodes) < iter.maxSkip {
// Try skipping ahead up to maxSkip to find an option with score lesser than the threshold
for option != nil && option.Score <= iter.scoreThreshold && len(iter.skippedNodes) < iter.maxSkip {
iter.skippedNodes = append(iter.skippedNodes, option)
option = iter.source.Next()
}
}
iter.seen += 1
if option == nil { // Didn't find anything, so use the skipped nodes instead
return iter.nextOption()
}
return option
}
// nextOption uses the iterator's list of skipped nodes if the source iterator is exhausted
func (iter *LimitIterator) nextOption() *RankedNode {
sourceOption := iter.source.Next()
if sourceOption == nil && iter.skippedNodeIndex < len(iter.skippedNodes) {
skippedOption := iter.skippedNodes[iter.skippedNodeIndex]
iter.skippedNodeIndex += 1
return skippedOption
}
return sourceOption
}
func (iter *LimitIterator) Reset() {
iter.source.Reset()
iter.seen = 0
iter.skippedNodes = make([]*RankedNode, 0, iter.maxSkip)
iter.skippedNodeIndex = 0
}
// MaxScoreIterator is a RankIterator used to return only a single result

View File

@@ -4,6 +4,8 @@ import (
"testing"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
)
func TestLimitIterator(t *testing.T) {
@@ -24,7 +26,7 @@ func TestLimitIterator(t *testing.T) {
}
static := NewStaticRankIterator(ctx, nodes)
limit := NewLimitIterator(ctx, static, 1)
limit := NewLimitIterator(ctx, static, 1, 0, 2)
limit.SetLimit(2)
out := collectRanked(limit)
@@ -50,6 +52,270 @@ func TestLimitIterator(t *testing.T) {
}
}
func TestLimitIterator_ScoreThreshold(t *testing.T) {
_, ctx := testContext(t)
type testCase struct {
desc string
nodes []*RankedNode
expectedOut []*RankedNode
threshold float64
limit int
maxSkip int
}
var nodes []*structs.Node
for i := 0; i < 5; i++ {
nodes = append(nodes, mock.Node())
}
testCases := []testCase{
{
desc: "Skips one low scoring node",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: 2,
},
{
Node: nodes[2],
Score: 3,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[1],
Score: 2,
},
{
Node: nodes[2],
Score: 3,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
},
{
desc: "Skips maxSkip scoring nodes",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: -2,
},
{
Node: nodes[2],
Score: 3,
},
{
Node: nodes[3],
Score: 4,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[2],
Score: 3,
},
{
Node: nodes[3],
Score: 4,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
},
{
desc: "maxSkip limit reached",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: -6,
},
{
Node: nodes[2],
Score: -3,
},
{
Node: nodes[3],
Score: -4,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[2],
Score: -3,
},
{
Node: nodes[3],
Score: -4,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
},
{
desc: "draw both from skipped nodes",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: -6,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: -6,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
}, {
desc: "one node above threshold, one skipped node",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: 5,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[1],
Score: 5,
},
{
Node: nodes[0],
Score: -1,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
},
{
desc: "low scoring nodes interspersed",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
{
Node: nodes[1],
Score: 5,
},
{
Node: nodes[2],
Score: -2,
},
{
Node: nodes[3],
Score: 2,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[1],
Score: 5,
},
{
Node: nodes[3],
Score: 2,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
},
{
desc: "only one node, score below threshold",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[0],
Score: -1,
},
},
threshold: -1,
limit: 2,
maxSkip: 2,
},
{
desc: "maxSkip is more than available nodes",
nodes: []*RankedNode{
{
Node: nodes[0],
Score: -2,
},
{
Node: nodes[1],
Score: 1,
},
},
expectedOut: []*RankedNode{
{
Node: nodes[1],
Score: 1,
},
{
Node: nodes[0],
Score: -2,
},
},
threshold: -1,
limit: 2,
maxSkip: 10,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
static := NewStaticRankIterator(ctx, tc.nodes)
limit := NewLimitIterator(ctx, static, 1, 0, 2)
limit.SetLimit(2)
out := collectRanked(limit)
require := require.New(t)
require.Equal(tc.expectedOut, out)
limit.Reset()
require.Equal(0, limit.skippedNodeIndex)
require.Equal(0, len(limit.skippedNodes))
})
}
}
func TestMaxScoreIterator(t *testing.T) {
_, ctx := testContext(t)
nodes := []*RankedNode{

View File

@@ -16,6 +16,18 @@ const (
// batchJobAntiAffinityPenalty is the same as the
// serviceJobAntiAffinityPenalty but for batch type jobs.
batchJobAntiAffinityPenalty = 10.0
// previousFailedAllocNodePenalty is a scoring penalty for nodes
// that a failed allocation was previously run on
previousFailedAllocNodePenalty = 50.0
// skipScoreThreshold is a threshold used in the limit iterator to skip nodes
// that have a score lower than this. -10 is the highest possible score for a
// node with penalty (based on batchJobAntiAffinityPenalty)
skipScoreThreshold = -10.0
// maxSkip limits the number of nodes that can be skipped in the limit iterator
maxSkip = 3
)
// Stack is a chained collection of iterators. The stack is used to
@@ -29,7 +41,12 @@ type Stack interface {
SetJob(job *structs.Job)
// Select is used to select a node for the task group
Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources)
Select(tg *structs.TaskGroup, options *SelectOptions) (*RankedNode, *structs.Resources)
}
type SelectOptions struct {
PenaltyNodeIDs map[string]struct{}
PreferredNodes []*structs.Node
}
// GenericStack is the Stack used for the Generic scheduler. It is
@@ -49,6 +66,7 @@ type GenericStack struct {
distinctPropertyConstraint *DistinctPropertyIterator
binPack *BinPackIterator
jobAntiAff *JobAntiAffinityIterator
nodeAntiAff *NodeAntiAffinityIterator
limit *LimitIterator
maxScore *MaxScoreIterator
}
@@ -111,8 +129,10 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
}
s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")
s.nodeAntiAff = NewNodeAntiAffinityIterator(ctx, s.jobAntiAff, previousFailedAllocNodePenalty)
// Apply a limit function. This is to avoid scanning *every* possible node.
s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)
s.limit = NewLimitIterator(ctx, s.nodeAntiAff, 2, skipScoreThreshold, maxSkip)
// Select the node with the maximum score for placement
s.maxScore = NewMaxScoreIterator(ctx, s.limit)
@@ -154,7 +174,23 @@ func (s *GenericStack) SetJob(job *structs.Job) {
}
}
func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) (*RankedNode, *structs.Resources) {
// This block handles trying to select from preferred nodes if options specify them
// It also sets back the set of nodes to the original nodes
if options != nil && len(options.PreferredNodes) > 0 {
originalNodes := s.source.nodes
s.source.SetNodes(options.PreferredNodes)
optionsNew := *options
optionsNew.PreferredNodes = nil
if option, resources := s.Select(tg, &optionsNew); option != nil {
s.source.SetNodes(originalNodes)
return option, resources
}
s.source.SetNodes(originalNodes)
return s.Select(tg, &optionsNew)
}
// Reset the max selector and context
s.maxScore.Reset()
s.ctx.Reset()
@@ -170,6 +206,9 @@ func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Reso
s.distinctPropertyConstraint.SetTaskGroup(tg)
s.wrappedChecks.SetTaskGroup(tg.Name)
s.binPack.SetTaskGroup(tg)
if options != nil {
s.nodeAntiAff.SetPenaltyNodes(options.PenaltyNodeIDs)
}
if contextual, ok := s.quota.(ContextualIterator); ok {
contextual.SetTaskGroup(tg)
@@ -190,19 +229,6 @@ func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Reso
return option, tgConstr.size
}
// SelectPreferredNode returns a node where an allocation of the task group can
// be placed, the node passed to it is preferred over the other available nodes
func (s *GenericStack) SelectPreferringNodes(tg *structs.TaskGroup, nodes []*structs.Node) (*RankedNode, *structs.Resources) {
originalNodes := s.source.nodes
s.source.SetNodes(nodes)
if option, resources := s.Select(tg); option != nil {
s.source.SetNodes(originalNodes)
return option, resources
}
s.source.SetNodes(originalNodes)
return s.Select(tg)
}
// SystemStack is the Stack used for the System scheduler. It is designed to
// attempt to make placements on all nodes.
type SystemStack struct {
@@ -276,7 +302,7 @@ func (s *SystemStack) SetJob(job *structs.Job) {
}
}
func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
func (s *SystemStack) Select(tg *structs.TaskGroup, options *SelectOptions) (*RankedNode, *structs.Resources) {
// Reset the binpack selector and context
s.binPack.Reset()
s.ctx.Reset()

View File

@@ -8,6 +8,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
)
func BenchmarkServiceStack_With_ComputedClass(b *testing.B) {
@@ -47,8 +48,9 @@ func benchmarkServiceStack_MetaKeyConstraint(b *testing.B, key string, numNodes,
stack.SetJob(job)
b.ResetTimer()
selectOptions := &SelectOptions{}
for i := 0; i < b.N; i++ {
stack.Select(job.TaskGroups[0])
stack.Select(job.TaskGroups[0], selectOptions)
}
}
@@ -104,7 +106,8 @@ func TestServiceStack_Select_Size(t *testing.T) {
job := mock.Job()
stack.SetJob(job)
node, size := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, size := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -138,7 +141,9 @@ func TestServiceStack_Select_PreferringNodes(t *testing.T) {
// Create a preferred node
preferredNode := mock.Node()
option, _ := stack.SelectPreferringNodes(job.TaskGroups[0], []*structs.Node{preferredNode})
prefNodes := []*structs.Node{preferredNode}
selectOptions := &SelectOptions{PreferredNodes: prefNodes}
option, _ := stack.Select(job.TaskGroups[0], selectOptions)
if option == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -146,12 +151,17 @@ func TestServiceStack_Select_PreferringNodes(t *testing.T) {
t.Fatalf("expected: %v, actual: %v", option.Node.ID, preferredNode.ID)
}
// Make sure select doesn't have a side effect on preferred nodes
require.Equal(t, prefNodes, selectOptions.PreferredNodes)
// Change the preferred node's kernel to windows and ensure the allocations
// are placed elsewhere
preferredNode1 := preferredNode.Copy()
preferredNode1.Attributes["kernel.name"] = "windows"
preferredNode1.ComputeClass()
option, _ = stack.SelectPreferringNodes(job.TaskGroups[0], []*structs.Node{preferredNode1})
prefNodes1 := []*structs.Node{preferredNode1}
selectOptions = &SelectOptions{PreferredNodes: prefNodes1}
option, _ = stack.Select(job.TaskGroups[0], selectOptions)
if option == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -159,6 +169,7 @@ func TestServiceStack_Select_PreferringNodes(t *testing.T) {
if option.Node.ID != nodes[0].ID {
t.Fatalf("expected: %#v, actual: %#v", nodes[0], option.Node)
}
require.Equal(t, prefNodes1, selectOptions.PreferredNodes)
}
func TestServiceStack_Select_MetricsReset(t *testing.T) {
@@ -174,7 +185,8 @@ func TestServiceStack_Select_MetricsReset(t *testing.T) {
job := mock.Job()
stack.SetJob(job)
n1, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
n1, _ := stack.Select(job.TaskGroups[0], selectOptions)
m1 := ctx.Metrics()
if n1 == nil {
t.Fatalf("missing node %#v", m1)
@@ -184,7 +196,7 @@ func TestServiceStack_Select_MetricsReset(t *testing.T) {
t.Fatalf("should only be 2")
}
n2, _ := stack.Select(job.TaskGroups[0])
n2, _ := stack.Select(job.TaskGroups[0], selectOptions)
m2 := ctx.Metrics()
if n2 == nil {
t.Fatalf("missing node %#v", m2)
@@ -215,7 +227,8 @@ func TestServiceStack_Select_DriverFilter(t *testing.T) {
job.TaskGroups[0].Tasks[0].Driver = "foo"
stack.SetJob(job)
node, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, _ := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -243,8 +256,8 @@ func TestServiceStack_Select_ConstraintFilter(t *testing.T) {
job := mock.Job()
job.Constraints[0].RTarget = "freebsd"
stack.SetJob(job)
node, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, _ := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -280,8 +293,8 @@ func TestServiceStack_Select_BinPack_Overflow(t *testing.T) {
job := mock.Job()
stack.SetJob(job)
node, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, _ := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -347,7 +360,8 @@ func TestSystemStack_Select_Size(t *testing.T) {
job := mock.Job()
stack.SetJob(job)
node, size := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, size := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -381,7 +395,8 @@ func TestSystemStack_Select_MetricsReset(t *testing.T) {
job := mock.Job()
stack.SetJob(job)
n1, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
n1, _ := stack.Select(job.TaskGroups[0], selectOptions)
m1 := ctx.Metrics()
if n1 == nil {
t.Fatalf("missing node %#v", m1)
@@ -391,7 +406,7 @@ func TestSystemStack_Select_MetricsReset(t *testing.T) {
t.Fatalf("should only be 1")
}
n2, _ := stack.Select(job.TaskGroups[0])
n2, _ := stack.Select(job.TaskGroups[0], selectOptions)
m2 := ctx.Metrics()
if n2 == nil {
t.Fatalf("missing node %#v", m2)
@@ -418,7 +433,8 @@ func TestSystemStack_Select_DriverFilter(t *testing.T) {
job.TaskGroups[0].Tasks[0].Driver = "foo"
stack.SetJob(job)
node, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, _ := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -435,7 +451,7 @@ func TestSystemStack_Select_DriverFilter(t *testing.T) {
stack = NewSystemStack(ctx)
stack.SetNodes(nodes)
stack.SetJob(job)
node, _ = stack.Select(job.TaskGroups[0])
node, _ = stack.Select(job.TaskGroups[0], selectOptions)
if node != nil {
t.Fatalf("node not filtered %#v", node)
}
@@ -460,7 +476,8 @@ func TestSystemStack_Select_ConstraintFilter(t *testing.T) {
job.Constraints[0].RTarget = "freebsd"
stack.SetJob(job)
node, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, _ := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}
@@ -497,7 +514,8 @@ func TestSystemStack_Select_BinPack_Overflow(t *testing.T) {
job := mock.Job()
stack.SetJob(job)
node, _ := stack.Select(job.TaskGroups[0])
selectOptions := &SelectOptions{}
node, _ := stack.Select(job.TaskGroups[0], selectOptions)
if node == nil {
t.Fatalf("missing node %#v", ctx.Metrics())
}

View File

@@ -275,7 +275,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
s.stack.SetNodes(nodes)
// Attempt to match the task group
option, _ := s.stack.Select(missing.TaskGroup)
option, _ := s.stack.Select(missing.TaskGroup, nil)
if option == nil {
// If nodes were filtered because of constraint mismatches and we

View File

@@ -511,7 +511,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
allocInPlace, "")
// Attempt to match the task group
option, _ := stack.Select(update.TaskGroup)
option, _ := stack.Select(update.TaskGroup, nil) // This select only looks at one node so we don't pass selectOptions
// Pop the allocation
ctx.Plan().PopUpdate(update.Alloc)
@@ -722,7 +722,7 @@ func updateNonTerminalAllocsToLost(plan *structs.Plan, tainted map[string]*struc
// genericAllocUpdateFn is a factory for the scheduler to create an allocUpdateType
// function to be passed into the reconciler. The factory takes objects that
// exist only in the scheduler context and returns a function that can be used
// by the reconciler to make decsions about how to update an allocation. The
// by the reconciler to make decisions about how to update an allocation. The
// factory allows the reconciler to be unaware of how to determine the type of
// update necessary and can minimize the set of objects it is exposed to.
func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateType {
@@ -767,7 +767,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
ctx.Plan().AppendUpdate(existing, structs.AllocDesiredStatusStop, allocInPlace, "")
// Attempt to match the task group
option, _ := stack.Select(newTG)
option, _ := stack.Select(newTG, nil) // This select only looks at one node so we don't pass selectOptions
// Pop the allocation
ctx.Plan().PopUpdate(existing)

View File

@@ -52,6 +52,17 @@ $ curl \
"EvalID": "5456bd7a-9fc0-c0dd-6131-cbee77f57577",
"Name": "example.cache[0]",
"NodeID": "fb2170a8-257d-3c64-b14d-bc06cc94e34c",
"PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
"NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b",
"RescheduleTracker": {
"Events": [
{
"PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
"PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e",
"RescheduleTime": 1517434161192946200
},
]
},
"JobID": "example",
"TaskGroup": "cache",
"DesiredStatus": "run",
@@ -184,6 +195,17 @@ $ curl \
"EvalID": "5456bd7a-9fc0-c0dd-6131-cbee77f57577",
"Name": "example.cache[0]",
"NodeID": "fb2170a8-257d-3c64-b14d-bc06cc94e34c",
"PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
"NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b",
"RescheduleTracker": {
"Events": [
{
"PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
"PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e",
"RescheduleTime": 1517434161192946200
},
]
},
"JobID": "example",
"Job": {
"Region": "global",

View File

@@ -185,6 +185,10 @@ The table below shows this endpoint's support for
"Delay": 25000000000,
"Mode": "delay"
},
"ReschedulePolicy": {
"Interval": 300000000000,
"Attempts": 10,
},
"EphemeralDisk": {
"SizeMB": 300
}
@@ -651,6 +655,17 @@ $ curl \
"EvalID": "a9c5effc-2242-51b2-f1fe-054ee11ab189",
"Name": "example.cache[0]",
"NodeID": "cb1f6030-a220-4f92-57dc-7baaabdc3823",
"PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
"NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b",
"RescheduleTracker": {
"Events": [
{
"PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
"PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e",
"RescheduleTime": 1517434161192946200
},
]
},
"JobID": "example",
"TaskGroup": "cache",
"DesiredStatus": "run",

View File

@@ -91,10 +91,14 @@ Below is the JSON representation of the job outputted by `$ nomad init`:
"Leader": false
}],
"RestartPolicy": {
"Interval": 1800000000000,
"Attempts": 2,
"Delay": 15000000000,
"Mode": "fail"
},
"ReschedulePolicy": {
"Interval": 300000000000,
"Attempts": 10,
"Delay": 25000000000,
"Mode": "delay"
},
"EphemeralDisk": {
"SizeMB": 300
@@ -231,6 +235,11 @@ The `Job` object supports the following keys:
}
```
- `ReschedulePolicy` - Specifies a reschedule policy to be applied to all task groups
within the job. When specified both at the job level and the task group level,
the reschedule blocks are merged, with the task group's taking precedence. For more
details on `ReschedulePolicy`, please see below.
### Task Group
`TaskGroups` is a list of `TaskGroup` objects, each supports the following
@@ -250,6 +259,10 @@ attributes:
If omitted, a default policy for batch and non-batch jobs is used based on the
job type. See the [restart policy reference](#restart_policy) for more details.
- `ReschedulePolicy` - Specifies the reschedule policy to be applied to tasks in this group.
If omitted, a default policy is used for batch and service jobs. System jobs are not eligible
for rescheduling. See the [reschedule policy reference](#reschedule_policy) for more details.
- `EphemeralDisk` - Specifies the group's ephemeral disk requirements. See the
[ephemeral disk reference](#ephemeral_disk) for more details.
@@ -497,6 +510,19 @@ The `EphemeralDisk` object supports the following keys:
`alloc/data` directories to the new allocation. Value is a boolean and the
default is false.
<a id="reschedule_policy"></a>
### Reschedule Policy
The `ReschedulePolicy` object supports the following keys:
- `Attempts` - `Attempts` is the number of reschedule attempts allowed
in an `Interval`.
- `Interval` - `Interval` is a time duration that is specified in nanoseconds.
The `Interval` is a sliding window within which at most `Attempts` number
of reschedule attempts are permitted.
<a id="restart_policy"></a>
### Restart Policy

View File

@@ -12,7 +12,8 @@ The `alloc-status` command displays status information and metadata about an
existing allocation and its tasks. It can be useful while debugging to reveal
the underlying reasons for scheduling decisions or failures, as well as the
current state of its tasks. As of Nomad 0.7.1, alloc status also shows allocation
modification time in addition to create time.
modification time in addition to create time. As of Nomad 0.8, alloc status shows
information about reschedule attempts.
## Usage
@@ -65,20 +66,22 @@ Full status of an alloc, which shows one of the tasks dying and then being resta
```
$ nomad alloc-status 0af996ed
ID = 0af996ed
Eval ID = be9bde98
Name = example.cache[0]
Node ID = 43c0b14e
Job ID = example
Job Version = 0
Client Status = running
Client Description = <none>
Desired Status = run
Desired Description = <none>
Created = 5m ago
Modified = 5m ago
Deployment ID = 0c83a3b1
Deployment Health = healthy
ID = 0af996ed
Eval ID = be9bde98
Name = example.cache[0]
Node ID = 43c0b14e
Job ID = example
Job Version = 0
Client Status = running
Client Description = <none>
Desired Status = run
Desired Description = <none>
Created = 5m ago
Modified = 5m ago
Deployment ID = 0c83a3b1
Deployment Health = healthy
Replacement Alloc ID = 0bc894ca
Reschedule Attempts = 1/3
Task "redis" is "running"
Task Resources
@@ -119,25 +122,27 @@ Verbose status can also be accessed:
```
$ nomad alloc-status -verbose 0af996ed
ID = 0af996ed-aff4-8ddb-a566-e55ebf8969c9
Eval ID = be9bde98-0490-1beb-ced0-012d10ddf22e
Name = example.cache[0]
Node ID = 43c0b14e-7f96-e432-a7da-06605257ce0c
Job ID = example
Job Version = 0
Client Status = running
Client Description = <none>
Desired Status = run
Desired Description = <none>
Created = 07/25/17 16:12:48 UTC
Modified = 07/25/17 16:12:48 UTC
Deployment ID = 0c83a3b1-8a7b-136b-0e11-8383dc6c9276
Deployment Health = healthy
Evaluated Nodes = 1
Filtered Nodes = 0
Exhausted Nodes = 0
Allocation Time = 38.474µs
Failures = 0
ID = 0af996ed-aff4-8ddb-a566-e55ebf8969c9
Eval ID = be9bde98-0490-1beb-ced0-012d10ddf22e
Name = example.cache[0]
Node ID = 43c0b14e-7f96-e432-a7da-06605257ce0c
Job ID = example
Job Version = 0
Client Status = running
Client Description = <none>
Desired Status = run
Desired Description = <none>
Created = 07/25/17 16:12:48 UTC
Modified = 07/25/17 16:12:48 UTC
Deployment ID = 0c83a3b1-8a7b-136b-0e11-8383dc6c9276
Deployment Health = healthy
Replacement Alloc ID = 0bc894ca
Reschedule Attempts = 1/3
Evaluated Nodes = 1
Filtered Nodes = 0
Exhausted Nodes = 0
Allocation Time = 38.474µs
Failures = 0
Task "redis" is "running"
Task Resources

View File

@@ -0,0 +1,107 @@
---
layout: "docs"
page_title: "reschedule Stanza - Job Specification"
sidebar_current: "docs-job-specification-reschedule"
description: |-
The "reschedule" stanza specifies the group's rescheduling strategy upon
allocation failures. The reschedule strategy can be configured with number
of attempts and a time interval. Nomad will only attempt to reschedule
failed allocations on to another node only after any local [restarts](docs/job-specification/restart.html)
have been exceeded.
---
# `reschedule` Stanza
<table class="table table-bordered table-striped">
<tr>
<th width="120">Placement</th>
<td>
<code>job -> **reschedule**</code>
</td>
<td>
<code>job -> group -> **reschedule**</code>
</td>
</tr>
</table>
The `reschedule` stanza specifies the group's rescheduling strategy. It can be
configured with number of attempts and a time interval. If specified at the job
level, the configuration will apply to all groups within the job. If the
reschedule stanza is present on both the job and the group, they are merged with
the group stanza taking the highest precedence and then the job.
Nomad will attempt to schedule the task on another node if any of its allocation
statuses become "failed". It prefers to create a replacement allocation on a node
that hasn't previously been used.
```hcl
job "docs" {
group "example" {
reschedule {
attempts = 3
interval = "15m"
}
}
}
```
~> The reschedule stanza does not apply to `system` jobs because they run on
every node.
## `reschedule` Parameters
- `attempts` `(int: <varies>)` - Specifies the number of reschedule attempts
allowed in the configured interval. Defaults vary by job type, see below
for more information.
- `interval` `(string: <varies>)` - Specifies the sliding window which begins
when the first reschedule attempt starts and ensures that only `attempts`
number of reschedule happen within it. If more than `attempts` number of
failures happen with this interval, Nomad will not reschedule any more.
Information about reschedule attempts are displayed in the CLI and API for
allocations. Rescheduling is enabled by default for service and batch jobs
with the options shown below.
### `reschedule` Parameter Defaults
The values for the `reschedule` parameters vary by job type. Below are the
defaults by job type:
- The Default Batch Reschedule Policy is:
```hcl
reschedule {
attempts = 1
interval = "24h"
}
```
- The Default Service Reschedule Policy is:
```hcl
reschedule {
interval = "1h"
attempts = 2
}
```
### Rescheduling during deployments
The [update stanza](docs/job-specification/update.html) controls rolling updates and canary deployments. A task
group's reschedule stanza does not take affect during a deployment. For example, if a new version of the job
is rolled out and the deployment failed due to a failing allocation, Nomad will not reschedule it.
### Disabling rescheduling ###
To disable rescheduling, set the `attempts` parameter to zero.
```hcl
job "docs" {
group "example" {
reschedule {
attempts = 0
}
}
}
```

View File

@@ -17,7 +17,8 @@ description: |-
</tr>
</table>
The `restart` stanza configures a group's behavior on task failure.
The `restart` stanza configures a group's behavior on task failure. Restarts
happen on the client that is running the task.
```hcl
job "docs" {
@@ -62,7 +63,7 @@ defaults by job type:
attempts = 15
delay = "15s"
interval = "168h"
mode = "delay"
mode = "fail"
}
```
@@ -73,7 +74,7 @@ defaults by job type:
interval = "1m"
attempts = 2
delay = "15s"
mode = "delay"
mode = "fail"
}
```

View File

@@ -62,6 +62,9 @@
<li<%= sidebar_current("docs-job-specification-periodic")%>>
<a href="/docs/job-specification/periodic.html">periodic</a>
</li>
<li<%= sidebar_current("docs-job-specification-reschedule")%>>
<a href="/docs/job-specification/reschedule.html">reschedule</a>
</li>
<li<%= sidebar_current("docs-job-specification-resources")%>>
<a href="/docs/job-specification/resources.html">resources</a>
</li>