From 83695cb5d10c09acc1e335081815b79d86e59e4f Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 15:51:39 -0700 Subject: [PATCH 01/12] Added support for parsing restart blocks --- command/init.go | 11 +++++ jobspec/parse.go | 83 +++++++++++++++++++++++++++++++-- jobspec/parse_test.go | 5 ++ jobspec/test-fixtures/basic.hcl | 5 ++ nomad/structs/structs.go | 39 ++++++++++++++++ 5 files changed, 139 insertions(+), 4 deletions(-) diff --git a/command/init.go b/command/init.go index 0b9be934b..8827f5e9d 100644 --- a/command/init.go +++ b/command/init.go @@ -104,6 +104,17 @@ job "example" { # Defaults to 1 # count = 1 + # Restart Policy - This block defines the restart policy for TaskGroups + # attempts defines the number of restarts Nomad will do if Tasks + # in this TaskGroup fails in a rolling window of interval duration + # The delay value makes Nomad wait for that duration to restart after a Task + # fails or crashes. + restart { + interval = 5m + attempts = 10 + delay = 25s + } + # Define a task to run task "redis" { # Use Docker to run the task. diff --git a/jobspec/parse.go b/jobspec/parse.go index f63ac5294..c3c71ac9c 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -30,6 +30,7 @@ func Parse(r io.Reader) (*structs.Job, error) { // Parse the buffer obj, err := hcl.Parse(buf.String()) + if err != nil { return nil, fmt.Errorf("error parsing: %s", err) } @@ -124,7 +125,7 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error { } } - // If we have tasks outside, do those + // If we have tasks outside, create TaskGroups for them if o := obj.Get("task", false); o != nil { var tasks []*structs.Task if err := parseTasks(&tasks, o); err != nil { @@ -134,9 +135,10 @@ func parseJob(result *structs.Job, obj *hclobj.Object) error { result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2) for i, t := range tasks { result.TaskGroups[i] = &structs.TaskGroup{ - Name: t.Name, - Count: 1, - Tasks: []*structs.Task{t}, + Name: t.Name, + Count: 1, + Tasks: []*structs.Task{t}, + RestartPolicy: structs.NewRestartPolicy(result.Type), } } } @@ -180,6 +182,7 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { delete(m, "constraint") delete(m, "meta") delete(m, "task") + delete(m, "restart") // Default count to 1 if not specified if _, ok := m["count"]; !ok { @@ -200,6 +203,10 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { } } + if err := parseRestartPolicy(structs.NewRestartPolicy(result.Type), o); err != nil { + return err + } + // Parse out meta fields. These are in HCL as a list so we need // to iterate over them and merge them. if metaO := o.Get("meta", false); metaO != nil { @@ -228,6 +235,42 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { return nil } +func parseRestartPolicy(result *structs.RestartPolicy, obj *hclobj.Object) error { + var restartHclObj *hclobj.Object + var m map[string]interface{} + if restartHclObj = obj.Get("restart", false); restartHclObj == nil { + return nil + } + if err := hcl.DecodeObject(&m, restartHclObj); err != nil { + return err + } + + if delay, ok := m["delay"]; ok { + d, err := toDuration(delay) + if err != nil { + return fmt.Errorf("Invalid Delay time in restart policy: %v", err) + } + result.Delay = d + } + + if interval, ok := m["interval"]; ok { + i, err := toDuration(interval) + if err != nil { + return fmt.Errorf("Invalid Interval time in restart policy: %v", err) + } + result.Interval = i + } + + if attempts, ok := m["attempts"]; ok { + a, err := toInteger(attempts) + if err != nil { + return fmt.Errorf("Invalid value in attempts: %v", err) + } + result.Attempts = a + } + return nil +} + func parseConstraints(result *[]*structs.Constraint, obj *hclobj.Object) error { for _, o := range obj.Elem(false) { var m map[string]interface{} @@ -477,3 +520,35 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error { } return nil } + +func toDuration(value interface{}) (time.Duration, error) { + var dur time.Duration + var err error + switch v := value.(type) { + case string: + dur, err = time.ParseDuration(v) + case int: + dur = time.Duration(v) * time.Second + default: + err = fmt.Errorf("Invalid time %s", value) + } + + return dur, err +} + +func toInteger(value interface{}) (int, error) { + var integer int + var err error + switch v := value.(type) { + case string: + var i int64 + i, err = strconv.ParseInt(v, 10, 32) + integer = int(i) + case int: + integer = v + default: + err = fmt.Errorf("Value: %v can't be parsed into int", value) + } + + return integer, err +} diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index f91789ddb..c3b91e785 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -48,6 +48,11 @@ func TestParse(t *testing.T) { &structs.TaskGroup{ Name: "outside", Count: 1, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 2, + Interval: 1 * time.Minute, + Delay: 15 * time.Second, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "outside", diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl index 941272b2d..bf81a6ae7 100644 --- a/jobspec/test-fixtures/basic.hcl +++ b/jobspec/test-fixtures/basic.hcl @@ -31,6 +31,11 @@ job "binstore-storagelocker" { group "binsl" { count = 5 + restart { + attempts = 5 + interval = "10m" + delay = "15s" + } task "binstore" { driver = "docker" config { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f5d20552a..8afe1c452 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -898,6 +898,37 @@ func (u *UpdateStrategy) Rolling() bool { return u.Stagger > 0 && u.MaxParallel > 0 } +// RestartPolicy influences how Nomad restarts Tasks when they +// crash or fail. +type RestartPolicy struct { + Attempts int + Interval time.Duration + Delay time.Duration +} + +func (r *RestartPolicy) Validate() error { + if time.Duration(r.Attempts)*r.Delay > r.Interval { + return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) + } + return nil +} + +func NewRestartPolicy(jobType string) *RestartPolicy { + defaultDelayBetweenRestarts := 15 * time.Second + defaultAttempts := 15 + var defaultRestartInterval time.Duration + + if jobType == "service" { + defaultRestartInterval = 1 * time.Minute + defaultAttempts = 2 + } + return &RestartPolicy{ + Attempts: defaultAttempts, + Interval: defaultRestartInterval, + Delay: defaultDelayBetweenRestarts, + } +} + // TaskGroup is an atomic unit of placement. Each task group belongs to // a job and may contain any number of tasks. A task group support running // in many replicas using the same configuration.. @@ -913,6 +944,9 @@ type TaskGroup struct { // all the tasks contained. Constraints []*Constraint + //RestartPolicy of a TaskGroup + RestartPolicy *RestartPolicy + // Tasks are the collection of tasks that this task group needs to run Tasks []*Task @@ -940,6 +974,10 @@ func (tg *TaskGroup) Validate() error { } } + if err := tg.RestartPolicy.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + // Check for duplicate tasks tasks := make(map[string]int) for idx, task := range tg.Tasks { @@ -954,6 +992,7 @@ func (tg *TaskGroup) Validate() error { // Validate the tasks for idx, task := range tg.Tasks { + if err := task.Validate(); err != nil { outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err) mErr.Errors = append(mErr.Errors, outer) From e2f61e25e9529f439d3a9b7fc836004816d30f6a Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 16:32:05 -0700 Subject: [PATCH 02/12] Sending restart policies to the Nomad API --- api/tasks.go | 23 ++++++++++++++++++----- jobspec/parse.go | 3 ++- jobspec/parse_test.go | 5 +++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index c1d5bf2ff..b2516e706 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -1,12 +1,25 @@ package api +import ( + "time" +) + +//RestartPolicy defines how the Nomad client restarts +//tasks in a taskgroup when they fail +type RestartPolicy struct { + Interval time.Duration + Attempts int + Delay time.Duration +} + // TaskGroup is the unit of scheduling. type TaskGroup struct { - Name string - Count int - Constraints []*Constraint - Tasks []*Task - Meta map[string]string + Name string + Count int + Constraints []*Constraint + Tasks []*Task + RestartPolicy *RestartPolicy + Meta map[string]string } // NewTaskGroup creates a new TaskGroup. diff --git a/jobspec/parse.go b/jobspec/parse.go index c3c71ac9c..548632239 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -202,8 +202,9 @@ func parseGroups(result *structs.Job, obj *hclobj.Object) error { return err } } + g.RestartPolicy = structs.NewRestartPolicy(result.Type) - if err := parseRestartPolicy(structs.NewRestartPolicy(result.Type), o); err != nil { + if err := parseRestartPolicy(g.RestartPolicy, o); err != nil { return err } diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index c3b91e785..e785443b7 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -82,6 +82,11 @@ func TestParse(t *testing.T) { "elb_interval": "10", "elb_checks": "3", }, + RestartPolicy: &structs.RestartPolicy{ + Interval: 10 * time.Minute, + Attempts: 5, + Delay: 15 * time.Second, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "binstore", From a035dcf2c0d96439bc3fe9bafd26f7039bae0381 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 16:49:08 -0700 Subject: [PATCH 03/12] Re-using toDuration while figuring out staggertime --- jobspec/parse.go | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/jobspec/parse.go b/jobspec/parse.go index 548632239..1c28d59ee 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -499,19 +499,11 @@ func parseUpdate(result *structs.UpdateStrategy, obj *hclobj.Object) error { } for _, key := range []string{"stagger", "Stagger"} { if raw, ok := m[key]; ok { - switch v := raw.(type) { - case string: - dur, err := time.ParseDuration(v) - if err != nil { - return fmt.Errorf("invalid stagger time '%s'", raw) - } - m[key] = dur - case int: - m[key] = time.Duration(v) * time.Second - default: - return fmt.Errorf("invalid type for stagger time '%s'", - raw) + staggerTime, err := toDuration(raw) + if err != nil { + return fmt.Errorf("Invalid stagger time: %v", err) } + m[key] = staggerTime } } From 93cdcb5ac24d8430bd0fc83ad97c0616b087c65f Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 18:34:23 -0700 Subject: [PATCH 04/12] Added the restart policies to mocks --- nomad/mock/mock.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 87c426dce..329ecd872 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -1,6 +1,9 @@ package mock -import "github.com/hashicorp/nomad/nomad/structs" +import ( + "github.com/hashicorp/nomad/nomad/structs" + "time" +) func Node() *structs.Node { node := &structs.Node{ @@ -71,6 +74,11 @@ func Job() *structs.Job { &structs.TaskGroup{ Name: "web", Count: 10, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "web", @@ -131,6 +139,11 @@ func SystemJob() *structs.Job { &structs.TaskGroup{ Name: "web", Count: 1, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + }, Tasks: []*structs.Task{ &structs.Task{ Name: "web", From 0d17430306ecf6a535f5786c681e87aa7688ef44 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 21:06:56 -0700 Subject: [PATCH 05/12] Fixed grammer of comment --- command/init.go | 4 ++-- jobspec/parse.go | 1 - nomad/structs/structs.go | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/command/init.go b/command/init.go index 8827f5e9d..851f366be 100644 --- a/command/init.go +++ b/command/init.go @@ -104,8 +104,8 @@ job "example" { # Defaults to 1 # count = 1 - # Restart Policy - This block defines the restart policy for TaskGroups - # attempts defines the number of restarts Nomad will do if Tasks + # Restart Policy - This block defines the restart policy for TaskGroups, + # the attempts value defines the number of restarts Nomad will do if Tasks # in this TaskGroup fails in a rolling window of interval duration # The delay value makes Nomad wait for that duration to restart after a Task # fails or crashes. diff --git a/jobspec/parse.go b/jobspec/parse.go index 1c28d59ee..77f9b819f 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -30,7 +30,6 @@ func Parse(r io.Reader) (*structs.Job, error) { // Parse the buffer obj, err := hcl.Parse(buf.String()) - if err != nil { return nil, fmt.Errorf("error parsing: %s", err) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 8afe1c452..a42a8f822 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -992,7 +992,6 @@ func (tg *TaskGroup) Validate() error { // Validate the tasks for idx, task := range tg.Tasks { - if err := task.Validate(); err != nil { outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err) mErr.Errors = append(mErr.Errors, outer) From 67c21e4b31b1d4ab6e4bef7f2bbad572cf4cbeef Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 21:28:56 -0700 Subject: [PATCH 06/12] Added a RestartPolicy to some mocks --- api/compose_test.go | 1 + api/tasks.go | 14 ++++++++++++-- api/tasks_test.go | 5 +++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/api/compose_test.go b/api/compose_test.go index 68801519f..2a509bc55 100644 --- a/api/compose_test.go +++ b/api/compose_test.go @@ -69,6 +69,7 @@ func TestCompose(t *testing.T) { Operand: "=", }, }, + RestartPolicy: NewRestartPolicy(), Tasks: []*Task{ &Task{ Name: "task1", diff --git a/api/tasks.go b/api/tasks.go index b2516e706..3ef918850 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -12,6 +12,14 @@ type RestartPolicy struct { Delay time.Duration } +func NewRestartPolicy() *RestartPolicy { + return &RestartPolicy{ + Attempts: 10, + Interval: 3 * time.Minute, + Delay: 5 * time.Second, + } +} + // TaskGroup is the unit of scheduling. type TaskGroup struct { Name string @@ -24,9 +32,11 @@ type TaskGroup struct { // NewTaskGroup creates a new TaskGroup. func NewTaskGroup(name string, count int) *TaskGroup { + restartPolicy := NewRestartPolicy() return &TaskGroup{ - Name: name, - Count: count, + Name: name, + Count: count, + RestartPolicy: restartPolicy, } } diff --git a/api/tasks_test.go b/api/tasks_test.go index 877f84d5c..945fdf9bf 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -8,8 +8,9 @@ import ( func TestTaskGroup_NewTaskGroup(t *testing.T) { grp := NewTaskGroup("grp1", 2) expect := &TaskGroup{ - Name: "grp1", - Count: 2, + Name: "grp1", + Count: 2, + RestartPolicy: NewRestartPolicy(), } if !reflect.DeepEqual(grp, expect) { t.Fatalf("expect: %#v, got: %#v", expect, grp) From 96f946b88e901641ad15ee1d93c6a1c36a6e4f83 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 30 Oct 2015 21:43:00 -0700 Subject: [PATCH 07/12] Not validating task groups if it's nil in a job --- nomad/structs/structs.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a42a8f822..cf81c6afb 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -974,8 +974,10 @@ func (tg *TaskGroup) Validate() error { } } - if err := tg.RestartPolicy.Validate(); err != nil { - mErr.Errors = append(mErr.Errors, err) + if tg.RestartPolicy != nil { + if err := tg.RestartPolicy.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } } // Check for duplicate tasks From ec819f9761acc65dad3f1df9153e6c5cdbcc7d7a Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 2 Nov 2015 13:24:59 -0800 Subject: [PATCH 08/12] Fixing tests to not create a TG without restart policies --- api/tasks.go | 4 ++-- nomad/structs/structs.go | 6 ++---- nomad/structs/structs_test.go | 35 +++++++++++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 3ef918850..2535d5ec5 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -4,8 +4,8 @@ import ( "time" ) -//RestartPolicy defines how the Nomad client restarts -//tasks in a taskgroup when they fail +// RestartPolicy defines how the Nomad client restarts +// tasks in a taskgroup when they fail type RestartPolicy struct { Interval time.Duration Attempts int diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index cf81c6afb..a42a8f822 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -974,10 +974,8 @@ func (tg *TaskGroup) Validate() error { } } - if tg.RestartPolicy != nil { - if err := tg.RestartPolicy.Validate(); err != nil { - mErr.Errors = append(mErr.Errors, err) - } + if err := tg.RestartPolicy.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) } // Check for duplicate tasks diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index cabf83dfa..1f107b095 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -1,11 +1,11 @@ package structs import ( + "github.com/hashicorp/go-multierror" "reflect" "strings" "testing" - - "github.com/hashicorp/go-multierror" + "time" ) func TestJob_Validate(t *testing.T) { @@ -44,11 +44,27 @@ func TestJob_Validate(t *testing.T) { TaskGroups: []*TaskGroup{ &TaskGroup{ Name: "web", + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, }, &TaskGroup{ Name: "web", + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, + }, + &TaskGroup{ + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, }, - &TaskGroup{}, }, } err = j.Validate() @@ -65,7 +81,13 @@ func TestJob_Validate(t *testing.T) { } func TestTaskGroup_Validate(t *testing.T) { - tg := &TaskGroup{} + tg := &TaskGroup{ + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, + } err := tg.Validate() mErr := err.(*multierror.Error) if !strings.Contains(mErr.Errors[0].Error(), "group name") { @@ -86,6 +108,11 @@ func TestTaskGroup_Validate(t *testing.T) { &Task{Name: "web"}, &Task{}, }, + RestartPolicy: &RestartPolicy{ + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + }, } err = tg.Validate() mErr = err.(*multierror.Error) From c7d31e56839f30c95756052029a6e3925cec1d56 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 2 Nov 2015 13:35:51 -0800 Subject: [PATCH 09/12] Declaring Batch and Service default restart policies --- nomad/structs/structs.go | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a42a8f822..f6feaa3de 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -14,8 +14,17 @@ import ( ) var ( - ErrNoLeader = fmt.Errorf("No cluster leader") - ErrNoRegionPath = fmt.Errorf("No path to region") + ErrNoLeader = fmt.Errorf("No cluster leader") + ErrNoRegionPath = fmt.Errorf("No path to region") + BatchJobRestartPolicy = RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 15, + } + ServiceJobRestartPolicy = RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 2, + Interval: 1 * time.Minute, + } ) type MessageType uint8 @@ -914,18 +923,13 @@ func (r *RestartPolicy) Validate() error { } func NewRestartPolicy(jobType string) *RestartPolicy { - defaultDelayBetweenRestarts := 15 * time.Second - defaultAttempts := 15 - var defaultRestartInterval time.Duration - - if jobType == "service" { - defaultRestartInterval = 1 * time.Minute - defaultAttempts = 2 - } - return &RestartPolicy{ - Attempts: defaultAttempts, - Interval: defaultRestartInterval, - Delay: defaultDelayBetweenRestarts, + switch jobType { + case JobTypeService: + return &ServiceJobRestartPolicy + case JobTypeBatch: + return &BatchJobRestartPolicy + default: + return nil } } From 795c786ca51bd6b79e6ea967ba269ebfc865b95e Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 2 Nov 2015 15:04:04 -0800 Subject: [PATCH 10/12] Fixed the tests --- nomad/structs/structs.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f6feaa3de..589781580 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -14,17 +14,8 @@ import ( ) var ( - ErrNoLeader = fmt.Errorf("No cluster leader") - ErrNoRegionPath = fmt.Errorf("No path to region") - BatchJobRestartPolicy = RestartPolicy{ - Delay: 15 * time.Second, - Attempts: 15, - } - ServiceJobRestartPolicy = RestartPolicy{ - Delay: 15 * time.Second, - Attempts: 2, - Interval: 1 * time.Minute, - } + ErrNoLeader = fmt.Errorf("No cluster leader") + ErrNoRegionPath = fmt.Errorf("No path to region") ) type MessageType uint8 @@ -925,9 +916,16 @@ func (r *RestartPolicy) Validate() error { func NewRestartPolicy(jobType string) *RestartPolicy { switch jobType { case JobTypeService: - return &ServiceJobRestartPolicy + return &RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 2, + Interval: 1 * time.Minute, + } case JobTypeBatch: - return &BatchJobRestartPolicy + return &RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 15, + } default: return nil } From ef841d5e89f638a7de184643ef5c05e956473f92 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 2 Nov 2015 17:00:17 -0800 Subject: [PATCH 11/12] Introducing vars to create default batch and service restart policies --- nomad/structs/structs.go | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 589781580..15e58d333 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -16,6 +16,15 @@ import ( var ( ErrNoLeader = fmt.Errorf("No cluster leader") ErrNoRegionPath = fmt.Errorf("No path to region") + defaultServiceJobRestartPolicy = RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 2, + Interval: 1 * time.Minute, + } + defaultBatchJobRestartPolicy = RestartPolicy{ + Delay: 15 * time.Second, + Attempts: 15, + } ) type MessageType uint8 @@ -916,19 +925,13 @@ func (r *RestartPolicy) Validate() error { func NewRestartPolicy(jobType string) *RestartPolicy { switch jobType { case JobTypeService: - return &RestartPolicy{ - Delay: 15 * time.Second, - Attempts: 2, - Interval: 1 * time.Minute, - } + rp := defaultServiceJobRestartPolicy + return &rp case JobTypeBatch: - return &RestartPolicy{ - Delay: 15 * time.Second, - Attempts: 15, - } - default: - return nil + rp := defaultBatchJobRestartPolicy + return &rp } + return nil } // TaskGroup is an atomic unit of placement. Each task group belongs to From 6a56218fb79e7be603d81d27ba43936cb3a81bec Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 2 Nov 2015 17:30:41 -0800 Subject: [PATCH 12/12] Fixed the restart policy syntax --- command/init.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command/init.go b/command/init.go index 851f366be..356337ae8 100644 --- a/command/init.go +++ b/command/init.go @@ -110,9 +110,9 @@ job "example" { # The delay value makes Nomad wait for that duration to restart after a Task # fails or crashes. restart { - interval = 5m + interval = "5m" attempts = 10 - delay = 25s + delay = "25s" } # Define a task to run