From 532c106b632e0750224311b1e8821c7677a7c732 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 11 Apr 2018 14:56:20 -0500 Subject: [PATCH 1/6] Make system jobs fail validation if they contain a reschedule stanza --- api/tasks.go | 4 +--- command/agent/job_endpoint.go | 36 +++++++++++++++++++++++++++++------ nomad/structs/structs.go | 6 +++++- nomad/structs/structs_test.go | 20 +++++++++++++++++++ 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 3f72038f7..e66ed9557 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -430,14 +430,12 @@ func (g *TaskGroup) Canonicalize(job *Job) { MaxDelay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.MaxDelay), Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), } - default: - defaultReschedulePolicy = nil } if defaultReschedulePolicy != nil && g.ReschedulePolicy != nil { defaultReschedulePolicy.Merge(g.ReschedulePolicy) + g.ReschedulePolicy = defaultReschedulePolicy } - g.ReschedulePolicy = defaultReschedulePolicy // Merge the migrate strategy from the job if jm, tm := job.Migrate != nil, g.Migrate != nil; jm && tm { diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index ce1605728..cc8c9e114 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -6,6 +6,8 @@ import ( "strconv" "strings" + "time" + "github.com/golang/snappy" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/nomad/structs" @@ -639,13 +641,35 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) { } if taskGroup.ReschedulePolicy != nil { + attempts := 0 + if taskGroup.ReschedulePolicy.Attempts != nil { + attempts = *taskGroup.ReschedulePolicy.Attempts + } + var interval, delay, maxDelay time.Duration + if taskGroup.ReschedulePolicy.Interval != nil { + interval = *taskGroup.ReschedulePolicy.Interval + } + if taskGroup.ReschedulePolicy.Delay != nil { + delay = *taskGroup.ReschedulePolicy.Delay + } + if taskGroup.ReschedulePolicy.MaxDelay != nil { + maxDelay = *taskGroup.ReschedulePolicy.MaxDelay + } + delayFunction := "" + if taskGroup.ReschedulePolicy.DelayFunction != nil { + delayFunction = *taskGroup.ReschedulePolicy.DelayFunction + } + unlimited := false + if taskGroup.ReschedulePolicy.Unlimited != nil { + unlimited = *taskGroup.ReschedulePolicy.Unlimited + } tg.ReschedulePolicy = &structs.ReschedulePolicy{ - Attempts: *taskGroup.ReschedulePolicy.Attempts, - Interval: *taskGroup.ReschedulePolicy.Interval, - Delay: *taskGroup.ReschedulePolicy.Delay, - DelayFunction: *taskGroup.ReschedulePolicy.DelayFunction, - MaxDelay: *taskGroup.ReschedulePolicy.MaxDelay, - Unlimited: *taskGroup.ReschedulePolicy.Unlimited, + Attempts: attempts, + Interval: interval, + Delay: delay, + DelayFunction: delayFunction, + MaxDelay: maxDelay, + Unlimited: unlimited, } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e8f76b81d..39483ec38 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3301,7 +3301,11 @@ func (tg *TaskGroup) Validate(j *Job) error { mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) } - if j.Type != JobTypeSystem { + if j.Type == JobTypeSystem { + if tg.ReschedulePolicy != nil { + mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy but got %+v", tg.ReschedulePolicy)) + } + } else { if tg.ReschedulePolicy != nil { if err := tg.ReschedulePolicy.Validate(); err != nil { mErr.Errors = append(mErr.Errors, err) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 17a2a6329..4c9d51d2e 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -713,6 +713,26 @@ func TestTaskGroup_Validate(t *testing.T) { if !strings.Contains(err.Error(), "does not allow update block") { t.Fatalf("err: %s", err) } + + tg = &TaskGroup{ + Count: -1, + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + Mode: RestartPolicyModeDelay, + }, + ReschedulePolicy: &ReschedulePolicy{ + Interval: 5 * time.Minute, + Attempts: 5, + Delay: 5 * time.Second, + }, + } + j.Type = JobTypeSystem + err = tg.Validate(j) + if !strings.Contains(err.Error(), "System jobs should not have a reschedule policy") { + t.Fatalf("err: %s", err) + } } func TestTask_Validate(t *testing.T) { From beb1a013db3b31498f479854c9bbfe6fc4a9b98f Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 11 Apr 2018 15:26:01 -0500 Subject: [PATCH 2/6] Always merge with default reschedule policy if its not nil --- api/tasks.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/tasks.go b/api/tasks.go index e66ed9557..7a74a6355 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -103,6 +103,9 @@ type ReschedulePolicy struct { } func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { + if rp == nil { + return + } if rp.Interval != nil { r.Interval = rp.Interval } @@ -432,7 +435,7 @@ func (g *TaskGroup) Canonicalize(job *Job) { } } - if defaultReschedulePolicy != nil && g.ReschedulePolicy != nil { + if defaultReschedulePolicy != nil { defaultReschedulePolicy.Merge(g.ReschedulePolicy) g.ReschedulePolicy = defaultReschedulePolicy } From a181137991fc144a8751c4a3059c70fac11295d0 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 11 Apr 2018 15:49:23 -0500 Subject: [PATCH 3/6] Fix one more failing test --- nomad/structs/structs_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 4c9d51d2e..8883e7a0b 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -367,6 +367,7 @@ func TestJob_IsPeriodicActive(t *testing.T) { func TestJob_SystemJob_Validate(t *testing.T) { j := testJob() j.Type = JobTypeSystem + j.TaskGroups[0].ReschedulePolicy = nil j.Canonicalize() err := j.Validate() From b1b08a536bede679a88691cbeb2d0d5532e4248a Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 11 Apr 2018 15:51:24 -0500 Subject: [PATCH 4/6] Fix more tests --- nomad/mock/mock.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index c037289dd..1d39384a8 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -263,10 +263,6 @@ func SystemJob() *structs.Job { Delay: 1 * time.Minute, Mode: structs.RestartPolicyModeDelay, }, - ReschedulePolicy: &structs.ReschedulePolicy{ - Attempts: 2, - Interval: 10 * time.Minute, - }, EphemeralDisk: structs.DefaultEphemeralDisk(), Tasks: []*structs.Task{ { From ae74aa965474a6c33da0428ba99f6ec57065161b Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 11 Apr 2018 17:07:14 -0500 Subject: [PATCH 5/6] dont print reschedule policy in error message --- nomad/structs/structs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 39483ec38..1fd30e322 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3303,7 +3303,7 @@ func (tg *TaskGroup) Validate(j *Job) error { if j.Type == JobTypeSystem { if tg.ReschedulePolicy != nil { - mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy but got %+v", tg.ReschedulePolicy)) + mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) } } else { if tg.ReschedulePolicy != nil { From 20a029adaa035ef6164ff1dbc6a36606d5b5cfca Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 11 Apr 2018 18:47:27 -0500 Subject: [PATCH 6/6] add canonicalize for reschedulepolicy to simplify validation logic --- api/tasks.go | 89 ++++++++++++++++++++++++----------- command/agent/job_endpoint.go | 36 +++----------- 2 files changed, 68 insertions(+), 57 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 7a74a6355..4895312ef 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -126,6 +126,63 @@ func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { } } +func (r *ReschedulePolicy) Canonicalize(jobType string) { + dp := NewDefaultReschedulePolicy(jobType) + if r.Interval == nil { + r.Interval = dp.Interval + } + if r.Attempts == nil { + r.Attempts = dp.Attempts + } + if r.Delay == nil { + r.Delay = dp.Delay + } + if r.DelayFunction == nil { + r.DelayFunction = dp.DelayFunction + } + if r.MaxDelay == nil { + r.MaxDelay = dp.MaxDelay + } + if r.Unlimited == nil { + r.Unlimited = dp.Unlimited + } +} + +func NewDefaultReschedulePolicy(jobType string) *ReschedulePolicy { + var dp *ReschedulePolicy + switch jobType { + case "service": + dp = &ReschedulePolicy{ + Attempts: helper.IntToPtr(structs.DefaultServiceJobReschedulePolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Delay), + DelayFunction: helper.StringToPtr(structs.DefaultServiceJobReschedulePolicy.DelayFunction), + MaxDelay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.MaxDelay), + Unlimited: helper.BoolToPtr(structs.DefaultServiceJobReschedulePolicy.Unlimited), + } + case "batch": + dp = &ReschedulePolicy{ + Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), + DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), + MaxDelay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.MaxDelay), + Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), + } + + case "system": + dp = &ReschedulePolicy{ + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + Delay: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr(""), + MaxDelay: helper.TimeToPtr(0), + Unlimited: helper.BoolToPtr(false), + } + } + return dp +} + func (r *ReschedulePolicy) Copy() *ReschedulePolicy { if r == nil { return nil @@ -411,35 +468,13 @@ func (g *TaskGroup) Canonicalize(job *Job) { jobReschedule := job.Reschedule.Copy() g.ReschedulePolicy = jobReschedule } - - // Merge with default reschedule policy - var defaultReschedulePolicy *ReschedulePolicy - switch *job.Type { - case "service": - defaultReschedulePolicy = &ReschedulePolicy{ - Attempts: helper.IntToPtr(structs.DefaultServiceJobReschedulePolicy.Attempts), - Interval: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Interval), - Delay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Delay), - DelayFunction: helper.StringToPtr(structs.DefaultServiceJobReschedulePolicy.DelayFunction), - MaxDelay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.MaxDelay), - Unlimited: helper.BoolToPtr(structs.DefaultServiceJobReschedulePolicy.Unlimited), - } - case "batch": - defaultReschedulePolicy = &ReschedulePolicy{ - Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts), - Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), - Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), - DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), - MaxDelay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.MaxDelay), - Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), - } + // Only use default reschedule policy for non system jobs + if g.ReschedulePolicy == nil && *job.Type != "system" { + g.ReschedulePolicy = NewDefaultReschedulePolicy(*job.Type) } - - if defaultReschedulePolicy != nil { - defaultReschedulePolicy.Merge(g.ReschedulePolicy) - g.ReschedulePolicy = defaultReschedulePolicy + if g.ReschedulePolicy != nil { + g.ReschedulePolicy.Canonicalize(*job.Type) } - // Merge the migrate strategy from the job if jm, tm := job.Migrate != nil, g.Migrate != nil; jm && tm { jobMigrate := job.Migrate.Copy() diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index cc8c9e114..ce1605728 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -6,8 +6,6 @@ import ( "strconv" "strings" - "time" - "github.com/golang/snappy" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/nomad/structs" @@ -641,35 +639,13 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) { } if taskGroup.ReschedulePolicy != nil { - attempts := 0 - if taskGroup.ReschedulePolicy.Attempts != nil { - attempts = *taskGroup.ReschedulePolicy.Attempts - } - var interval, delay, maxDelay time.Duration - if taskGroup.ReschedulePolicy.Interval != nil { - interval = *taskGroup.ReschedulePolicy.Interval - } - if taskGroup.ReschedulePolicy.Delay != nil { - delay = *taskGroup.ReschedulePolicy.Delay - } - if taskGroup.ReschedulePolicy.MaxDelay != nil { - maxDelay = *taskGroup.ReschedulePolicy.MaxDelay - } - delayFunction := "" - if taskGroup.ReschedulePolicy.DelayFunction != nil { - delayFunction = *taskGroup.ReschedulePolicy.DelayFunction - } - unlimited := false - if taskGroup.ReschedulePolicy.Unlimited != nil { - unlimited = *taskGroup.ReschedulePolicy.Unlimited - } tg.ReschedulePolicy = &structs.ReschedulePolicy{ - Attempts: attempts, - Interval: interval, - Delay: delay, - DelayFunction: delayFunction, - MaxDelay: maxDelay, - Unlimited: unlimited, + Attempts: *taskGroup.ReschedulePolicy.Attempts, + Interval: *taskGroup.ReschedulePolicy.Interval, + Delay: *taskGroup.ReschedulePolicy.Delay, + DelayFunction: *taskGroup.ReschedulePolicy.DelayFunction, + MaxDelay: *taskGroup.ReschedulePolicy.MaxDelay, + Unlimited: *taskGroup.ReschedulePolicy.Unlimited, } }