From a2132950a563e4edc5ddfea2240a78211d926980 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 2 Feb 2016 15:08:07 -0800 Subject: [PATCH 1/4] Restart on-success shouldn't be user specifiable --- api/tasks.go | 9 ++++----- client/alloc_runner.go | 4 ++-- client/alloc_runner_test.go | 2 +- client/restarts.go | 14 ++++++++++---- client/restarts_test.go | 17 ++++++++--------- command/init.go | 3 --- jobspec/parse_test.go | 9 ++++----- jobspec/test-fixtures/basic.hcl | 1 - nomad/mock/mock.go | 18 ++++++++---------- nomad/structs/structs.go | 22 ++++++++-------------- nomad/structs/structs_test.go | 18 ++++++++---------- website/source/docs/jobspec/index.html.md | 5 ----- 12 files changed, 53 insertions(+), 69 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 122cd804e..7fa476b72 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -7,11 +7,10 @@ import ( // RestartPolicy defines how the Nomad client restarts // tasks in a taskgroup when they fail type RestartPolicy struct { - Interval time.Duration - Attempts int - Delay time.Duration - RestartOnSuccess bool - Mode string + Interval time.Duration + Attempts int + Delay time.Duration + Mode string } // The ServiceCheck data model represents the consul health check that diff --git a/client/alloc_runner.go b/client/alloc_runner.go index 6b559ce91..69cdb1d59 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -115,7 +115,7 @@ func (r *AllocRunner) RestoreState() error { r.restored[name] = struct{}{} task := &structs.Task{Name: name} - restartTracker := newRestartTracker(r.RestartPolicy) + restartTracker := newRestartTracker(r.RestartPolicy, r.alloc.Job.Type) tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.alloc, task, restartTracker, r.consulService) r.tasks[name] = tr @@ -370,7 +370,7 @@ func (r *AllocRunner) Run() { // Merge in the task resources task.Resources = alloc.TaskResources[task.Name] - restartTracker := newRestartTracker(r.RestartPolicy) + restartTracker := newRestartTracker(r.RestartPolicy, r.alloc.Job.Type) tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.alloc, task, restartTracker, r.consulService) r.tasks[task.Name] = tr diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 80a966859..0d83ed4ca 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -33,7 +33,7 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { alloc := mock.Alloc() consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) if !restarts { - *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0, RestartOnSuccess: false} + *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} } ar := NewAllocRunner(logger, conf, upd.Update, alloc, consulClient) diff --git a/client/restarts.go b/client/restarts.go index 79aff54f3..a7ac78f34 100644 --- a/client/restarts.go +++ b/client/restarts.go @@ -10,9 +10,14 @@ import ( // jitter is the percent of jitter added to restart delays. const jitter = 0.25 -func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker { +func newRestartTracker(policy *structs.RestartPolicy, jobType string) *RestartTracker { + onSuccess := true + if jobType == structs.JobTypeBatch { + onSuccess = false + } return &RestartTracker{ startTime: time.Now(), + onSuccess: onSuccess, policy: policy, rand: rand.New(rand.NewSource(time.Now().Unix())), } @@ -20,6 +25,7 @@ func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker { type RestartTracker struct { count int // Current number of attempts. + onSuccess bool // Whether to restart on successful exit code. startTime time.Time // When the interval began policy *structs.RestartPolicy rand *rand.Rand @@ -57,9 +63,9 @@ func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) { } // shouldRestart returns whether a restart should occur based on the exit code -// and the RestartOnSuccess configuration. +// and job type. func (r *RestartTracker) shouldRestart(exitCode int) bool { - return exitCode != 0 || r.policy.RestartOnSuccess + return exitCode != 0 || r.onSuccess } // jitter returns the delay time plus a jitter. @@ -77,5 +83,5 @@ func (r *RestartTracker) jitter() time.Duration { // Returns a tracker that never restarts. func noRestartsTracker() *RestartTracker { policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail} - return newRestartTracker(policy) + return newRestartTracker(policy, structs.JobTypeBatch) } diff --git a/client/restarts_test.go b/client/restarts_test.go index 719cbd088..89dd18c1a 100644 --- a/client/restarts_test.go +++ b/client/restarts_test.go @@ -9,11 +9,10 @@ import ( func testPolicy(success bool, mode string) *structs.RestartPolicy { return &structs.RestartPolicy{ - Interval: 2 * time.Minute, - Delay: 1 * time.Second, - Attempts: 3, - Mode: mode, - RestartOnSuccess: success, + Interval: 2 * time.Minute, + Delay: 1 * time.Second, + Attempts: 3, + Mode: mode, } } @@ -27,7 +26,7 @@ func withinJitter(expected, actual time.Duration) bool { func TestClient_RestartTracker_ModeDelay(t *testing.T) { t.Parallel() p := testPolicy(true, structs.RestartPolicyModeDelay) - rt := newRestartTracker(p) + rt := newRestartTracker(p, structs.JobTypeService) for i := 0; i < p.Attempts; i++ { actual, when := rt.NextRestart(127) if !actual { @@ -53,7 +52,7 @@ func TestClient_RestartTracker_ModeDelay(t *testing.T) { func TestClient_RestartTracker_ModeFail(t *testing.T) { t.Parallel() p := testPolicy(true, structs.RestartPolicyModeFail) - rt := newRestartTracker(p) + rt := newRestartTracker(p, structs.JobTypeSystem) for i := 0; i < p.Attempts; i++ { actual, when := rt.NextRestart(127) if !actual { @@ -73,7 +72,7 @@ func TestClient_RestartTracker_ModeFail(t *testing.T) { func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) { t.Parallel() p := testPolicy(false, structs.RestartPolicyModeDelay) - rt := newRestartTracker(p) + rt := newRestartTracker(p, structs.JobTypeBatch) if shouldRestart, _ := rt.NextRestart(0); shouldRestart { t.Fatalf("NextRestart() returned %v, expected: %v", shouldRestart, false) } @@ -83,7 +82,7 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) { t.Parallel() p := testPolicy(true, structs.RestartPolicyModeFail) p.Attempts = 0 - rt := newRestartTracker(p) + rt := newRestartTracker(p, structs.JobTypeService) if actual, when := rt.NextRestart(1); actual { t.Fatalf("expect no restart, got restart/delay: %v", when) } diff --git a/command/init.go b/command/init.go index 61f6c822b..f968e034a 100644 --- a/command/init.go +++ b/command/init.go @@ -114,9 +114,6 @@ job "example" { # A delay between a task failing and a restart occuring. delay = "25s" - # Whether the tasks should be restarted if the exit successfully. - on_success = true - # Mode controls what happens when a task has restarted "attempts" # times within the interval. "delay" mode delays the next restart # till the next interval. "fail" mode does not restart the task if diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index e45203757..63e23b542 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -78,11 +78,10 @@ func TestParse(t *testing.T) { "elb_checks": "3", }, RestartPolicy: &structs.RestartPolicy{ - Interval: 10 * time.Minute, - Attempts: 5, - Delay: 15 * time.Second, - RestartOnSuccess: true, - Mode: "delay", + Interval: 10 * time.Minute, + Attempts: 5, + Delay: 15 * time.Second, + Mode: "delay", }, Tasks: []*structs.Task{ &structs.Task{ diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl index d7a313a33..352aeb7cd 100644 --- a/jobspec/test-fixtures/basic.hcl +++ b/jobspec/test-fixtures/basic.hcl @@ -35,7 +35,6 @@ job "binstore-storagelocker" { attempts = 5 interval = "10m" delay = "15s" - on_success = true mode = "delay" } task "binstore" { diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index f6fa7b1be..7aa718c4c 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -77,11 +77,10 @@ func Job() *structs.Job { Name: "web", Count: 10, RestartPolicy: &structs.RestartPolicy{ - Attempts: 3, - Interval: 10 * time.Minute, - Delay: 1 * time.Minute, - RestartOnSuccess: true, - Mode: structs.RestartPolicyModeDelay, + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + Mode: structs.RestartPolicyModeDelay, }, Tasks: []*structs.Task{ &structs.Task{ @@ -156,11 +155,10 @@ func SystemJob() *structs.Job { Name: "web", Count: 1, RestartPolicy: &structs.RestartPolicy{ - Attempts: 3, - Interval: 10 * time.Minute, - Delay: 1 * time.Minute, - RestartOnSuccess: true, - Mode: structs.RestartPolicyModeDelay, + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + Mode: structs.RestartPolicyModeDelay, }, Tasks: []*structs.Task{ &structs.Task{ diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 9742b47fc..dc236d618 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1146,18 +1146,16 @@ type PeriodicLaunch struct { var ( defaultServiceJobRestartPolicy = RestartPolicy{ - Delay: 15 * time.Second, - Attempts: 2, - Interval: 1 * time.Minute, - RestartOnSuccess: true, - Mode: RestartPolicyModeDelay, + Delay: 15 * time.Second, + Attempts: 2, + Interval: 1 * time.Minute, + Mode: RestartPolicyModeDelay, } defaultBatchJobRestartPolicy = RestartPolicy{ - Delay: 15 * time.Second, - Attempts: 15, - Interval: 7 * 24 * time.Hour, - RestartOnSuccess: false, - Mode: RestartPolicyModeDelay, + Delay: 15 * time.Second, + Attempts: 15, + Interval: 7 * 24 * time.Hour, + Mode: RestartPolicyModeDelay, } ) @@ -1183,10 +1181,6 @@ type RestartPolicy struct { // Delay is the time between a failure and a restart. Delay time.Duration - // RestartOnSuccess determines whether a task should be restarted if it - // exited successfully. - RestartOnSuccess bool `mapstructure:"on_success"` - // Mode controls what happens when the task restarts more than attempt times // in an interval. Mode string diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 58e288c57..076456c08 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -189,11 +189,10 @@ func TestJob_IsPeriodic(t *testing.T) { func TestTaskGroup_Validate(t *testing.T) { tg := &TaskGroup{ RestartPolicy: &RestartPolicy{ - Interval: 5 * time.Minute, - Delay: 10 * time.Second, - Attempts: 10, - RestartOnSuccess: true, - Mode: RestartPolicyModeDelay, + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + Mode: RestartPolicyModeDelay, }, } err := tg.Validate() @@ -217,11 +216,10 @@ func TestTaskGroup_Validate(t *testing.T) { &Task{}, }, RestartPolicy: &RestartPolicy{ - Interval: 5 * time.Minute, - Delay: 10 * time.Second, - Attempts: 10, - RestartOnSuccess: true, - Mode: RestartPolicyModeDelay, + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + Attempts: 10, + Mode: RestartPolicyModeDelay, }, } err = tg.Validate() diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md index 139a00835..7c5999181 100644 --- a/website/source/docs/jobspec/index.html.md +++ b/website/source/docs/jobspec/index.html.md @@ -309,9 +309,6 @@ The `restart` object supports the following keys: time duration using the `s`, `m`, and `h` suffixes, such as `30s`. A random jitter of up to 25% is added to the the delay. -* `on_success` - `on_success` controls whether a task is restarted when the - task exits successfully. - * `mode` - Controls the behavior when the task fails more than `attempts` times in an interval. Possible values are listed below: @@ -327,7 +324,6 @@ restart { attempts = 15 delay = "15s" interval = "168h" # 7 days - on_success = false mode = "delay" } ``` @@ -339,7 +335,6 @@ restart { interval = "1m" attempts = 2 delay = "15s" - on_success = true mode = "delay" } ``` From d04fc2647cd14c35d99a797a1afc569c00517d30 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 2 Feb 2016 15:18:35 -0800 Subject: [PATCH 2/4] Fix test --- client/task_runner_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/task_runner_test.go b/client/task_runner_test.go index 52c8c73b3..f0f6c92e2 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -49,7 +49,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) { ctx := driver.NewExecContext(allocDir, alloc.ID) rp := structs.NewRestartPolicy(structs.JobTypeService) - restartTracker := newRestartTracker(rp) + restartTracker := newRestartTracker(rp, alloc.Job.Type) if !restarts { restartTracker = noRestartsTracker() } From de6e1e791e66605558ac8003b30d92bcbc4d73e4 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 2 Feb 2016 15:35:25 -0800 Subject: [PATCH 3/4] Another test fix --- client/alloc_runner_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 0d83ed4ca..6b808b0e6 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -34,6 +34,7 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) if !restarts { *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} + alloc.Job.Type = structs.JobTypeBatch } ar := NewAllocRunner(logger, conf, upd.Update, alloc, consulClient) From 2b43ddfa7e91f7aa61e684a610692fdca87c5bc1 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 2 Feb 2016 17:39:01 -0800 Subject: [PATCH 4/4] Fix test --- jobspec/parse_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 63e23b542..399bb4a32 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -113,7 +113,7 @@ func TestParse(t *testing.T) { CPU: 500, MemoryMB: 128, DiskMB: 10, - IOPS: 1, + IOPS: 0, Networks: []*structs.NetworkResource{ &structs.NetworkResource{ MBits: 100,