diff --git a/api/tasks.go b/api/tasks.go index 82986085b..821ab8091 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -431,6 +431,7 @@ type TaskGroup struct { Services []*Service `hcl:"service,block"` ShutdownDelay *time.Duration `mapstructure:"shutdown_delay" hcl:"shutdown_delay,optional"` StopAfterClientDisconnect *time.Duration `mapstructure:"stop_after_client_disconnect" hcl:"stop_after_client_disconnect,optional"` + MaxClientDisconnect *time.Duration `mapstructure:"max_client_disconnect" hcl:"max_client_disconnect,optional"` Scaling *ScalingPolicy `hcl:"scaling,block"` Consul *Consul `hcl:"consul,block"` } diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 1e07c8768..fc2c6d5da 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1012,6 +1012,10 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta tg.StopAfterClientDisconnect = taskGroup.StopAfterClientDisconnect } + if taskGroup.MaxClientDisconnect != nil { + tg.MaxClientDisconnect = taskGroup.MaxClientDisconnect + } + if taskGroup.ReschedulePolicy != nil { tg.ReschedulePolicy = &structs.ReschedulePolicy{ Attempts: *taskGroup.ReschedulePolicy.Attempts, diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 6678b2a29..3c7cdb603 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -2558,6 +2558,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { }, }, }, + MaxClientDisconnect: helper.TimeToPtr(30 * time.Second), Tasks: []*api.Task{ { Name: "task1", @@ -2955,6 +2956,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { }, }, }, + MaxClientDisconnect: helper.TimeToPtr(30 * time.Second), Tasks: []*structs.Task{ { Name: "task1", diff --git a/nomad/structs/diff.go b/nomad/structs/diff.go index 91985a237..d64ec05a1 100644 --- a/nomad/structs/diff.go +++ b/nomad/structs/diff.go @@ -265,6 +265,20 @@ func (tg *TaskGroup) Diff(other *TaskGroup, contextual bool) (*TaskGroupDiff, er } } + // MaxClientDisconnect diff + if oldPrimitiveFlat != nil && newPrimitiveFlat != nil { + if tg.MaxClientDisconnect == nil { + oldPrimitiveFlat["MaxClientDisconnect"] = "" + } else { + oldPrimitiveFlat["MaxClientDisconnect"] = fmt.Sprintf("%d", *tg.MaxClientDisconnect) + } + if other.MaxClientDisconnect == nil { + newPrimitiveFlat["MaxClientDisconnect"] = "" + } else { + newPrimitiveFlat["MaxClientDisconnect"] = fmt.Sprintf("%d", *other.MaxClientDisconnect) + } + } + // Diff the primitive fields. diff.Fields = fieldDiffs(oldPrimitiveFlat, newPrimitiveFlat, false) diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index b7a8336d3..946e350e3 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -3910,6 +3910,75 @@ func TestTaskGroupDiff(t *testing.T) { }, }, }, + { + TestCase: "MaxClientDisconnect added", + Old: &TaskGroup{ + Name: "foo", + MaxClientDisconnect: nil, + }, + New: &TaskGroup{ + Name: "foo", + MaxClientDisconnect: helper.TimeToPtr(20 * time.Second), + }, + Expected: &TaskGroupDiff{ + Type: DiffTypeEdited, + Name: "foo", + Fields: []*FieldDiff{ + { + Type: DiffTypeAdded, + Name: "MaxClientDisconnect", + Old: "", + New: "20000000000", + }, + }, + }, + }, + { + TestCase: "MaxClientDisconnect updated", + Old: &TaskGroup{ + Name: "foo", + MaxClientDisconnect: helper.TimeToPtr(10 * time.Second), + }, + New: &TaskGroup{ + Name: "foo", + MaxClientDisconnect: helper.TimeToPtr(20 * time.Second), + }, + Expected: &TaskGroupDiff{ + Type: DiffTypeEdited, + Name: "foo", + Fields: []*FieldDiff{ + { + Type: DiffTypeEdited, + Name: "MaxClientDisconnect", + Old: "10000000000", + New: "20000000000", + }, + }, + }, + }, + { + TestCase: "MaxClientDisconnect deleted", + Old: &TaskGroup{ + Name: "foo", + MaxClientDisconnect: helper.TimeToPtr(10 * time.Second), + }, + New: &TaskGroup{ + Name: "foo", + MaxClientDisconnect: nil, + }, + Expected: &TaskGroupDiff{ + Type: DiffTypeEdited, + Name: "foo", + Fields: []*FieldDiff{ + { + Type: DiffTypeDeleted, + Name: "MaxClientDisconnect", + Old: "10000000000", + New: "", + }, + }, + }, + }, } for i, c := range cases { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2b13b00c0..3572eaf3c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6227,6 +6227,10 @@ func (tg *TaskGroup) Copy() *TaskGroup { ntg.StopAfterClientDisconnect = tg.StopAfterClientDisconnect } + if tg.MaxClientDisconnect != nil { + ntg.MaxClientDisconnect = tg.MaxClientDisconnect + } + return ntg } @@ -6290,6 +6294,10 @@ func (tg *TaskGroup) Validate(j *Job) error { mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) } + if tg.MaxClientDisconnect != nil && *tg.MaxClientDisconnect < 0 { + mErr.Errors = append(mErr.Errors, errors.New("max_client_disconnect cannot be negative")) + } + for idx, constr := range tg.Constraints { if err := constr.Validate(); err != nil { outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 39c288b9b..5634921ea 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -415,6 +415,7 @@ func testJob() *Job { "elb_check_interval": "30s", "elb_check_min": "3", }, + MaxClientDisconnect: helper.TimeToPtr(1 * time.Hour), }, }, Meta: map[string]string{ @@ -5365,6 +5366,51 @@ func TestAllocation_WaitClientStop(t *testing.T) { } } +func TestAllocation_DisconnectTimeout(t *testing.T) { + type testCase struct { + desc string + maxDisconnect *time.Duration + } + + testCases := []testCase{ + { + desc: "no max_client_disconnect", + maxDisconnect: nil, + }, + { + desc: "has max_client_disconnect", + maxDisconnect: helper.TimeToPtr(30 * time.Second), + }, + { + desc: "zero max_client_disconnect", + maxDisconnect: helper.TimeToPtr(0 * time.Second), + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + j := testJob() + a := &Allocation{ + Job: j, + } + + j.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect + a.TaskGroup = j.TaskGroups[0].Name + + now := time.Now() + + reschedTime := a.DisconnectTimeout(now) + + if tc.maxDisconnect == nil { + require.Equal(t, now, reschedTime, "expected to be now") + } else { + difference := reschedTime.Sub(now) + require.Equal(t, *tc.maxDisconnect, difference, "expected durations to be equal") + } + + }) + } +} + func TestAllocation_Canonicalize_Old(t *testing.T) { ci.Parallel(t) @@ -5580,6 +5626,23 @@ func TestJobConfig_Validate_StopAferClientDisconnect(t *testing.T) { require.NoError(t, err) } +func TestJobConfig_Validate_MaxClientDisconnect(t *testing.T) { + // Set up a job with an invalid max_client_disconnect value + job := testJob() + timeout := -1 * time.Minute + job.TaskGroups[0].MaxClientDisconnect = &timeout + + err := job.Validate() + require.Error(t, err) + require.Contains(t, err.Error(), "max_client_disconnect cannot be negative") + + // Modify the job with a valid max_client_disconnect value + timeout = 1 * time.Minute + job.TaskGroups[0].MaxClientDisconnect = &timeout + err = job.Validate() + require.NoError(t, err) +} + func TestParameterizedJobConfig_Canonicalize(t *testing.T) { ci.Parallel(t) diff --git a/website/content/api-docs/jobs.mdx b/website/content/api-docs/jobs.mdx index 7cf1d3604..34669e185 100644 --- a/website/content/api-docs/jobs.mdx +++ b/website/content/api-docs/jobs.mdx @@ -614,6 +614,7 @@ $ curl \ "SizeMB": 300, "Sticky": false }, + "MaxClientDisconnect": 300000000000, "Meta": null, "Migrate": { "HealthCheck": "checks", @@ -827,6 +828,7 @@ $ curl \ "SizeMB": 300, "Sticky": false }, + "MaxClientDisconnect": null, "Meta": null, "Migrate": { "HealthCheck": "checks", @@ -994,6 +996,7 @@ $ curl \ "SizeMB": 300, "Sticky": false }, + "MaxClientDisconnect": null, "Meta": null, "Migrate": { "HealthCheck": "checks",