From 279775082c3aa43828faa7e0d22b15561d36ce68 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 15 Jul 2025 10:47:02 -0400 Subject: [PATCH] sysbatch: correctly validate that reschedule policy is not allowed (#26279) System and sysbatch jobs don't support the reschedule block, because we'd always replace allocations back onto the same node. The job validation for system jobs asserts that the user hasn't set a `reschedule` block so that users aren't submitting jobs expecting it to be supported. But this validation was missing for sysbatch jobs. Validate that sysbatch jobs don't have a reschedule block. --- .changelog/26279.txt | 3 +++ api/tasks.go | 20 +++++-------------- nomad/structs/structs.go | 4 ++-- nomad/structs/structs_test.go | 2 +- .../content/docs/upgrade/upgrade-specific.mdx | 8 ++++++++ 5 files changed, 19 insertions(+), 18 deletions(-) create mode 100644 .changelog/26279.txt diff --git a/.changelog/26279.txt b/.changelog/26279.txt new file mode 100644 index 000000000..5551a6f52 --- /dev/null +++ b/.changelog/26279.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +sysbatch: Submitting a sysbatch job with a `reschedule` block will now return an error instead of being silently ignored +``` diff --git a/api/tasks.go b/api/tasks.go index 3c8c8554e..3f3ea4f07 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -202,6 +202,9 @@ func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { } func (r *ReschedulePolicy) Canonicalize(jobType string) { + if r == nil || jobType == JobTypeSystem || jobType == JobTypeSysbatch { + return + } dp := NewDefaultReschedulePolicy(jobType) if r.Interval == nil { r.Interval = dp.Interval @@ -282,16 +285,6 @@ func NewDefaultReschedulePolicy(jobType string) *ReschedulePolicy { Unlimited: pointerOf(false), } - case "system": - dp = &ReschedulePolicy{ - Attempts: pointerOf(0), - Interval: pointerOf(time.Duration(0)), - Delay: pointerOf(time.Duration(0)), - DelayFunction: pointerOf(""), - MaxDelay: pointerOf(time.Duration(0)), - Unlimited: pointerOf(false), - } - default: // GH-7203: it is possible an unknown job type is passed to this // function and we need to ensure a non-nil object is returned so that @@ -583,13 +576,10 @@ func (g *TaskGroup) Canonicalize(job *Job) { jobReschedule := job.Reschedule.Copy() g.ReschedulePolicy = jobReschedule } - // Only use default reschedule policy for non system jobs - if g.ReschedulePolicy == nil && *job.Type != "system" { + if g.ReschedulePolicy == nil && *job.Type != JobTypeSysbatch && *job.Type != JobTypeSystem { g.ReschedulePolicy = NewDefaultReschedulePolicy(*job.Type) } - if g.ReschedulePolicy != nil { - g.ReschedulePolicy.Canonicalize(*job.Type) - } + g.ReschedulePolicy.Canonicalize(*job.Type) // Merge the migrate strategy from the job if jm, tm := job.Migrate != nil, g.Migrate != nil; jm && tm { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 008649d80..184b4b632 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -7112,9 +7112,9 @@ func (tg *TaskGroup) Validate(j *Job) error { } } - if j.Type == JobTypeSystem { + if j.Type == JobTypeSystem || j.Type == JobTypeSysBatch { if tg.ReschedulePolicy != nil { - mErr = multierror.Append(mErr, fmt.Errorf("System jobs should not have a reschedule policy")) + mErr = multierror.Append(mErr, fmt.Errorf("System or sysbatch jobs should not have a reschedule policy")) } } else { if tg.ReschedulePolicy != nil { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index eab6d433a..d534df968 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -1512,7 +1512,7 @@ func TestTaskGroup_Validate(t *testing.T) { }, }, expErr: []string{ - "System jobs should not have a reschedule policy", + "System or sysbatch jobs should not have a reschedule policy", }, jobType: JobTypeSystem, }, diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index d3f5fb282..9254888f9 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -12,6 +12,14 @@ upgrade. However, specific versions of Nomad may have more details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 1.11.0 + +#### Sysbatch jobs will no longer accept `reschedule` blocks + +In Nomad 1.11.0, submitting a sysbatch job with a `reschedule` block returns +an error instead of being silently ignored, as it was in previous versions. The +same behavior applies to system jobs. + ## Nomad 1.10.2 #### Clients respect `telemetry.publish_allocation_metrics`