mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
Log reason a plan gets rejected per node.
This PR adds a log explaining why a plan gets rejected. Should help debugging.
This commit is contained in:
@@ -2,6 +2,7 @@ package nomad
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
@@ -80,7 +81,7 @@ func (s *Server) planApply() {
|
||||
}
|
||||
|
||||
// Evaluate the plan
|
||||
result, err := evaluatePlan(pool, snap, pending.plan)
|
||||
result, err := evaluatePlan(pool, snap, pending.plan, s.logger)
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to evaluate plan: %v", err)
|
||||
pending.respond(nil, err)
|
||||
@@ -196,7 +197,7 @@ func (s *Server) asyncPlanWait(waitCh chan struct{}, future raft.ApplyFuture,
|
||||
// evaluatePlan is used to determine what portions of a plan
|
||||
// can be applied if any. Returns if there should be a plan application
|
||||
// which may be partial or if there was an error
|
||||
func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.Plan) (*structs.PlanResult, error) {
|
||||
func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.Plan, logger *log.Logger) (*structs.PlanResult, error) {
|
||||
defer metrics.MeasureSince([]string{"nomad", "plan", "evaluate"}, time.Now())
|
||||
|
||||
// Create a result holder for the plan
|
||||
@@ -229,13 +230,17 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P
|
||||
partialCommit := false
|
||||
|
||||
// handleResult is used to process the result of evaluateNodePlan
|
||||
handleResult := func(nodeID string, fit bool, err error) (cancel bool) {
|
||||
handleResult := func(nodeID string, fit bool, reason string, err error) (cancel bool) {
|
||||
// Evaluate the plan for this node
|
||||
if err != nil {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
return true
|
||||
}
|
||||
if !fit {
|
||||
// Log the reason why the node's allocations could not be made
|
||||
if reason != "" {
|
||||
logger.Printf("[DEBUG] nomad: plan for node %q rejected becasue: %v", nodeID, reason)
|
||||
}
|
||||
// Set that this is a partial commit
|
||||
partialCommit = true
|
||||
|
||||
@@ -283,7 +288,7 @@ OUTER:
|
||||
|
||||
// Handle a result that allows us to cancel evaluation,
|
||||
// which may save time processing additional entries.
|
||||
if cancel := handleResult(r.nodeID, r.fit, r.err); cancel {
|
||||
if cancel := handleResult(r.nodeID, r.fit, r.reason, r.err); cancel {
|
||||
didCancel = true
|
||||
break OUTER
|
||||
}
|
||||
@@ -294,7 +299,7 @@ OUTER:
|
||||
for outstanding > 0 {
|
||||
r := <-resp
|
||||
if !didCancel {
|
||||
if cancel := handleResult(r.nodeID, r.fit, r.err); cancel {
|
||||
if cancel := handleResult(r.nodeID, r.fit, r.reason, r.err); cancel {
|
||||
didCancel = true
|
||||
}
|
||||
}
|
||||
@@ -368,30 +373,34 @@ func correctDeploymentCanaries(result *structs.PlanResult) {
|
||||
|
||||
// evaluateNodePlan is used to evalute the plan for a single node,
|
||||
// returning if the plan is valid or if an error is encountered
|
||||
func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID string) (bool, error) {
|
||||
func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID string) (bool, string, error) {
|
||||
// If this is an evict-only plan, it always 'fits' since we are removing things.
|
||||
if len(plan.NodeAllocation[nodeID]) == 0 {
|
||||
return true, nil
|
||||
return true, "", nil
|
||||
}
|
||||
|
||||
// Get the node itself
|
||||
ws := memdb.NewWatchSet()
|
||||
node, err := snap.NodeByID(ws, nodeID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get node '%s': %v", nodeID, err)
|
||||
return false, "", fmt.Errorf("failed to get node '%s': %v", nodeID, err)
|
||||
}
|
||||
|
||||
// If the node does not exist or is not ready for schduling it is not fit
|
||||
// XXX: There is a potential race between when we do this check and when
|
||||
// the Raft commit happens.
|
||||
if node == nil || node.Status != structs.NodeStatusReady || node.Drain {
|
||||
return false, nil
|
||||
if node == nil {
|
||||
return false, "node does not exist", nil
|
||||
} else if node.Status != structs.NodeStatusReady {
|
||||
return false, "node is not ready for placements", nil
|
||||
} else if node.Drain {
|
||||
return false, "node is draining", nil
|
||||
}
|
||||
|
||||
// Get the existing allocations that are non-terminal
|
||||
existingAlloc, err := snap.AllocsByNodeTerminal(ws, nodeID, false)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get existing allocations for '%s': %v", nodeID, err)
|
||||
return false, "", fmt.Errorf("failed to get existing allocations for '%s': %v", nodeID, err)
|
||||
}
|
||||
|
||||
// Determine the proposed allocation by first removing allocations
|
||||
@@ -410,6 +419,6 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri
|
||||
proposed = append(proposed, plan.NodeAllocation[nodeID]...)
|
||||
|
||||
// Check if these allocations fit
|
||||
fit, _, _, err := structs.AllocsFit(node, proposed, nil)
|
||||
return fit, err
|
||||
fit, reason, _, err := structs.AllocsFit(node, proposed, nil)
|
||||
return fit, reason, err
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ type evaluateRequest struct {
|
||||
type evaluateResult struct {
|
||||
nodeID string
|
||||
fit bool
|
||||
reason string
|
||||
err error
|
||||
}
|
||||
|
||||
@@ -102,8 +103,8 @@ func (p *EvaluatePool) run(stopCh chan struct{}) {
|
||||
for {
|
||||
select {
|
||||
case req := <-p.req:
|
||||
fit, err := evaluateNodePlan(req.snap, req.plan, req.nodeID)
|
||||
p.res <- evaluateResult{req.nodeID, fit, err}
|
||||
fit, reason, err := evaluateNodePlan(req.snap, req.plan, req.nodeID)
|
||||
p.res <- evaluateResult{req.nodeID, fit, reason, err}
|
||||
|
||||
case <-stopCh:
|
||||
return
|
||||
|
||||
@@ -261,7 +261,7 @@ func TestPlanApply_EvalPlan_Simple(t *testing.T) {
|
||||
pool := NewEvaluatePool(workerPoolSize, workerPoolBufferSize)
|
||||
defer pool.Shutdown()
|
||||
|
||||
result, err := evaluatePlan(pool, snap, plan)
|
||||
result, err := evaluatePlan(pool, snap, plan, testLogger())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
@@ -306,7 +306,7 @@ func TestPlanApply_EvalPlan_Partial(t *testing.T) {
|
||||
pool := NewEvaluatePool(workerPoolSize, workerPoolBufferSize)
|
||||
defer pool.Shutdown()
|
||||
|
||||
result, err := evaluatePlan(pool, snap, plan)
|
||||
result, err := evaluatePlan(pool, snap, plan, testLogger())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
@@ -365,7 +365,7 @@ func TestPlanApply_EvalPlan_Partial_AllAtOnce(t *testing.T) {
|
||||
pool := NewEvaluatePool(workerPoolSize, workerPoolBufferSize)
|
||||
defer pool.Shutdown()
|
||||
|
||||
result, err := evaluatePlan(pool, snap, plan)
|
||||
result, err := evaluatePlan(pool, snap, plan, testLogger())
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
@@ -397,13 +397,16 @@ func TestPlanApply_EvalNodePlan_Simple(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason != "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeNotReady(t *testing.T) {
|
||||
@@ -420,13 +423,16 @@ func TestPlanApply_EvalNodePlan_NodeNotReady(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason == "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeDrain(t *testing.T) {
|
||||
@@ -443,13 +449,16 @@ func TestPlanApply_EvalNodePlan_NodeDrain(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason == "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeNotExist(t *testing.T) {
|
||||
@@ -464,13 +473,16 @@ func TestPlanApply_EvalNodePlan_NodeNotExist(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, nodeID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, nodeID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason == "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeFull(t *testing.T) {
|
||||
@@ -495,13 +507,16 @@ func TestPlanApply_EvalNodePlan_NodeFull(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason == "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_UpdateExisting(t *testing.T) {
|
||||
@@ -521,13 +536,16 @@ func TestPlanApply_EvalNodePlan_UpdateExisting(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason != "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeFull_Evict(t *testing.T) {
|
||||
@@ -554,13 +572,16 @@ func TestPlanApply_EvalNodePlan_NodeFull_Evict(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason != "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeFull_AllocEvict(t *testing.T) {
|
||||
@@ -582,13 +603,16 @@ func TestPlanApply_EvalNodePlan_NodeFull_AllocEvict(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason != "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanApply_EvalNodePlan_NodeDown_EvictOnly(t *testing.T) {
|
||||
@@ -612,11 +636,14 @@ func TestPlanApply_EvalNodePlan_NodeDown_EvictOnly(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
fit, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
fit, reason, err := evaluateNodePlan(snap, plan, node.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !fit {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
if reason != "" {
|
||||
t.Fatalf("bad")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,10 @@ func getPort() int {
|
||||
return int(atomic.AddUint32(&nextPort, 1))
|
||||
}
|
||||
|
||||
func testLogger() *log.Logger {
|
||||
return log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
|
||||
func tmpDir(t *testing.T) string {
|
||||
dir, err := ioutil.TempDir("", "nomad")
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user