From 2d8bfb8d11ba1b7a579cb418ffd106025ed75583 Mon Sep 17 00:00:00 2001 From: Lang Martin Date: Wed, 19 Jun 2019 15:10:57 -0400 Subject: [PATCH] system_sched submits failed evals as blocked --- scheduler/generic_sched.go | 3 ++- scheduler/system_sched.go | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 9dacb0d73..23e0745be 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -257,7 +257,8 @@ func (s *GenericScheduler) process() (bool, error) { // If there are failed allocations, we need to create a blocked evaluation // to place the failed allocations when resources become available. If the - // current evaluation is already a blocked eval, we reuse it. + // current evaluation is already a blocked eval, we reuse it by submitting + // a new eval to the planner in createBlockedEval if s.eval.Status != structs.EvalStatusBlocked && len(s.failedTGAllocs) != 0 && s.blocked == nil { if err := s.createBlockedEval(false); err != nil { s.logger.Error("failed to make blocked eval", "error", err) diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index 7918bbcc0..b0fab7756 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -18,6 +18,7 @@ const ( // SystemScheduler is used for 'system' jobs. This scheduler is // designed for services that should be run on every client. +// One for each job, containing an allocation for each node type SystemScheduler struct { logger log.Logger state State @@ -61,7 +62,8 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error { switch eval.TriggeredBy { case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, structs.EvalTriggerFailedFollowUp, structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate, structs.EvalTriggerPreemption, - structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain, structs.EvalTriggerAllocStop: + structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain, structs.EvalTriggerAllocStop, + structs.EvalTriggerQueuedAllocs: default: desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) @@ -324,6 +326,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { // Actual failure to start this task on this candidate node, report it individually s.failedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics() + s.addBlocked(node) continue } @@ -390,3 +393,22 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { return nil } + +// addBlocked creates a new blocked eval for this job on this node +// and submit to the planner (worker.go), which keeps the eval for execution later +func (s *SystemScheduler) addBlocked(node *structs.Node) error { + e := s.ctx.Eligibility() + escaped := e.HasEscaped() + + // Only store the eligible classes if the eval hasn't escaped. + var classEligibility map[string]bool + if !escaped { + classEligibility = e.GetClasses() + } + + blocked := s.eval.CreateBlockedEval(classEligibility, escaped, e.QuotaLimitReached()) + blocked.StatusDescription = blockedEvalFailedPlacements + blocked.NodeID = node.ID + + return s.planner.CreateEval(blocked) +}