system_sched submits failed evals as blocked

This commit is contained in:
Lang Martin
2019-06-19 15:10:57 -04:00
parent 4b93a08a21
commit 2d8bfb8d11
2 changed files with 25 additions and 2 deletions

View File

@@ -257,7 +257,8 @@ func (s *GenericScheduler) process() (bool, error) {
// If there are failed allocations, we need to create a blocked evaluation
// to place the failed allocations when resources become available. If the
// current evaluation is already a blocked eval, we reuse it.
// current evaluation is already a blocked eval, we reuse it by submitting
// a new eval to the planner in createBlockedEval
if s.eval.Status != structs.EvalStatusBlocked && len(s.failedTGAllocs) != 0 && s.blocked == nil {
if err := s.createBlockedEval(false); err != nil {
s.logger.Error("failed to make blocked eval", "error", err)

View File

@@ -18,6 +18,7 @@ const (
// SystemScheduler is used for 'system' jobs. This scheduler is
// designed for services that should be run on every client.
// One for each job, containing an allocation for each node
type SystemScheduler struct {
logger log.Logger
state State
@@ -61,7 +62,8 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
switch eval.TriggeredBy {
case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, structs.EvalTriggerFailedFollowUp,
structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate, structs.EvalTriggerPreemption,
structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain, structs.EvalTriggerAllocStop:
structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain, structs.EvalTriggerAllocStop,
structs.EvalTriggerQueuedAllocs:
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
@@ -324,6 +326,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
// Actual failure to start this task on this candidate node, report it individually
s.failedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()
s.addBlocked(node)
continue
}
@@ -390,3 +393,22 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
return nil
}
// addBlocked creates a new blocked eval for this job on this node
// and submit to the planner (worker.go), which keeps the eval for execution later
func (s *SystemScheduler) addBlocked(node *structs.Node) error {
e := s.ctx.Eligibility()
escaped := e.HasEscaped()
// Only store the eligible classes if the eval hasn't escaped.
var classEligibility map[string]bool
if !escaped {
classEligibility = e.GetClasses()
}
blocked := s.eval.CreateBlockedEval(classEligibility, escaped, e.QuotaLimitReached())
blocked.StatusDescription = blockedEvalFailedPlacements
blocked.NodeID = node.ID
return s.planner.CreateEval(blocked)
}