From 42ba68a2012677880d7e56aca7fb6be55a553d45 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Wed, 13 Jul 2016 13:20:46 -0600 Subject: [PATCH] Removing the queued state of Job Summary and alloc desired status false --- command/monitor.go | 29 --------------- command/monitor_test.go | 62 --------------------------------- nomad/core_sched_test.go | 18 +++++----- nomad/state/state_store.go | 19 +++------- nomad/state/state_store_test.go | 35 ++++--------------- nomad/structs/structs.go | 15 +++----- scheduler/generic_sched.go | 2 +- scheduler/generic_sched_test.go | 4 +-- scheduler/system_sched_test.go | 2 +- 9 files changed, 29 insertions(+), 157 deletions(-) diff --git a/command/monitor.go b/command/monitor.go index 3ee6dcc12..7e99f24df 100644 --- a/command/monitor.go +++ b/command/monitor.go @@ -115,23 +115,6 @@ func (m *monitor) update(update *evalState) { for allocID, alloc := range update.allocs { if existing, ok := existing.allocs[allocID]; !ok { switch { - case alloc.desired == structs.AllocDesiredStatusFailed: - // New allocs with desired state failed indicate - // scheduling failure. - m.ui.Output(fmt.Sprintf("Scheduling error for group %q (%s)", - alloc.group, alloc.desiredDesc)) - - // Log the client status, if any provided - if alloc.clientDesc != "" { - m.ui.Output("Client reported status: " + alloc.clientDesc) - } - - // Generate a more descriptive error for why the allocation - // failed and dump it to the screen - if alloc.full != nil { - dumpAllocStatus(m.ui, alloc.full, m.length) - } - case alloc.index < update.index: // New alloc with create index lower than the eval // create index indicates modification @@ -275,18 +258,6 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int { clientDesc: alloc.ClientDescription, index: alloc.CreateIndex, } - - // If we have a scheduling error, query the full allocation - // to get the details. - if alloc.DesiredStatus == structs.AllocDesiredStatusFailed { - schedFailure = true - failed, _, err := m.client.Allocations().Info(alloc.ID, nil) - if err != nil { - m.ui.Error(fmt.Sprintf("Error querying allocation: %s", err)) - return 1 - } - state.allocs[alloc.ID].full = failed - } } // Update the state diff --git a/command/monitor_test.go b/command/monitor_test.go index f200eedcb..13c697ab0 100644 --- a/command/monitor_test.go +++ b/command/monitor_test.go @@ -133,68 +133,6 @@ func TestMonitor_Update_Allocs(t *testing.T) { } } -func TestMonitor_Update_SchedulingFailure(t *testing.T) { - ui := new(cli.MockUi) - mon := newMonitor(ui, nil, shortId) - - // New allocs with desired status failed warns - state := &evalState{ - allocs: map[string]*allocState{ - "alloc2": &allocState{ - id: "87654321-dcba-efab-cdef-123456789abc", - group: "group2", - desired: structs.AllocDesiredStatusFailed, - desiredDesc: "something failed", - client: structs.AllocClientStatusFailed, - clientDesc: "client failed", - index: 1, - - // Attach the full failed allocation - full: &api.Allocation{ - ID: "87654321-dcba-efab-cdef-123456789abc", - TaskGroup: "group2", - ClientStatus: structs.AllocClientStatusFailed, - DesiredStatus: structs.AllocDesiredStatusFailed, - Metrics: &api.AllocationMetric{ - NodesEvaluated: 3, - NodesFiltered: 3, - ConstraintFiltered: map[string]int{ - "$attr.kernel.name = linux": 3, - }, - }, - }, - }, - }, - } - mon.update(state) - - // Scheduling failure was logged - out := ui.OutputWriter.String() - if !strings.Contains(out, "group2") { - t.Fatalf("missing group\n\n%s", out) - } - if !strings.Contains(out, "Scheduling error") { - t.Fatalf("missing failure\n\n%s", out) - } - if !strings.Contains(out, "something failed") { - t.Fatalf("missing desired desc\n\n%s", out) - } - if !strings.Contains(out, "client failed") { - t.Fatalf("missing client desc\n\n%s", out) - } - - // Check that the allocation details were dumped - if !strings.Contains(out, "3/3") { - t.Fatalf("missing filter stats\n\n%s", out) - } - if !strings.Contains(out, structs.AllocDesiredStatusFailed) { - t.Fatalf("missing alloc status\n\n%s", out) - } - if !strings.Contains(out, "$attr.kernel.name = linux") { - t.Fatalf("missing constraint\n\n%s", out) - } -} - func TestMonitor_Update_AllocModification(t *testing.T) { ui := new(cli.MockUi) mon := newMonitor(ui, nil, fullId) diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index dca54da9d..527e94c75 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -26,7 +26,7 @@ func TestCoreScheduler_EvalGC(t *testing.T) { // Insert "dead" alloc alloc := mock.Alloc() alloc.EvalID = eval.ID - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop err = state.UpsertAllocs(1001, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) @@ -98,7 +98,7 @@ func TestCoreScheduler_EvalGC_Batch(t *testing.T) { alloc := mock.Alloc() alloc.JobID = job.ID alloc.EvalID = eval.ID - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop err = state.UpsertAllocs(1002, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) @@ -165,7 +165,7 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) { // Insert "dead" alloc alloc := mock.Alloc() alloc.EvalID = eval.ID - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop err = state.UpsertAllocs(1001, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) @@ -241,7 +241,7 @@ func TestCoreScheduler_EvalGC_Force(t *testing.T) { // Insert "dead" alloc alloc := mock.Alloc() alloc.EvalID = eval.ID - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop err = state.UpsertAllocs(1001, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) @@ -467,19 +467,19 @@ func TestCoreScheduler_JobGC(t *testing.T) { { test: "Terminal", evalStatus: structs.EvalStatusFailed, - allocStatus: structs.AllocDesiredStatusFailed, + allocStatus: structs.AllocDesiredStatusStop, shouldExist: false, }, { test: "Has Alloc", evalStatus: structs.EvalStatusFailed, - allocStatus: structs.AllocDesiredStatusRun, + allocStatus: structs.AllocDesiredStatusStop, shouldExist: true, }, { test: "Has Eval", evalStatus: structs.EvalStatusPending, - allocStatus: structs.AllocDesiredStatusFailed, + allocStatus: structs.AllocDesiredStatusStop, shouldExist: true, }, } @@ -678,7 +678,7 @@ func TestCoreScheduler_JobGC_Force(t *testing.T) { { test: "Terminal", evalStatus: structs.EvalStatusFailed, - allocStatus: structs.AllocDesiredStatusFailed, + allocStatus: structs.AllocDesiredStatusStop, shouldExist: false, }, { @@ -690,7 +690,7 @@ func TestCoreScheduler_JobGC_Force(t *testing.T) { { test: "Has Eval", evalStatus: structs.EvalStatusPending, - allocStatus: structs.AllocDesiredStatusFailed, + allocStatus: structs.AllocDesiredStatusStop, shouldExist: true, }, } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index d1062d475..2c6f11594 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1227,18 +1227,14 @@ func (s *StateStore) updateSummaryWithJob(job *structs.Job, txn *memdb.Txn) erro } } for _, tg := range job.TaskGroups { - if summary, ok := existing.Summary[tg.Name]; !ok { + if _, ok := existing.Summary[tg.Name]; !ok { newSummary := structs.TaskGroupSummary{ - Queued: tg.Count, Complete: 0, Failed: 0, Running: 0, Starting: 0, } existing.Summary[tg.Name] = newSummary - } else if summary.Queued > tg.Count { - summary.Queued = tg.Count - existing.Summary[tg.Name] = summary } } @@ -1274,19 +1270,14 @@ func (s *StateStore) updateSummaryWithAlloc(newAlloc *structs.Allocation, } if existingAlloc == nil { switch newAlloc.DesiredStatus { - case structs.AllocDesiredStatusFailed: - tgSummary.Failed += 1 - case structs.AllocDesiredStatusStop: - tgSummary.Complete += 1 + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: + s.logger.Printf("[WARN]: new allocation inserted into state store with id: %v and state: %v", newAlloc.DesiredStatus) } switch newAlloc.ClientStatus { case structs.AllocClientStatusPending: tgSummary.Starting += 1 - case structs.AllocClientStatusRunning: - tgSummary.Running += 1 - } - if tgSummary.Queued > 0 { - tgSummary.Queued -= 1 + case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: + s.logger.Printf("[WARN]: new allocation inserted into state store with id: %v and state: %v", newAlloc.ClientStatus) } } else if existingAlloc.ClientStatus != newAlloc.ClientStatus { // Incrementing the clint of the bin of the current state diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 75e3e91c0..a6e2d59bb 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -402,13 +402,10 @@ func TestStateStore_UpsertJob_Job(t *testing.T) { if summary.JobID != job.ID { t.Fatalf("bad summary id: %v", summary.JobID) } - webTgSummary, ok := summary.Summary["web"] + _, ok := summary.Summary["web"] if !ok { t.Fatalf("nil summary for task group") } - if webTgSummary.Queued != 10 { - t.Fatalf("wrong summary: %#v", webTgSummary) - } notify.verify(t) } @@ -469,13 +466,10 @@ func TestStateStore_UpdateUpsertJob_Job(t *testing.T) { if summary.JobID != job.ID { t.Fatalf("bad summary id: %v", summary.JobID) } - webTgSummary, ok := summary.Summary["web"] + _, ok := summary.Summary["web"] if !ok { t.Fatalf("nil summary for task group") } - if webTgSummary.Queued != 10 { - t.Fatalf("wrong summary: %#v", webTgSummary) - } notify.verify(t) } @@ -1083,7 +1077,7 @@ func TestStateStore_RestoreJobSummary(t *testing.T) { JobID: job.ID, Summary: map[string]structs.TaskGroupSummary{ "web": structs.TaskGroupSummary{ - Queued: 10, + Starting: 10, }, }, } @@ -1623,9 +1617,6 @@ func TestStateStore_UpdateAllocsFromClient(t *testing.T) { if tgSummary.Failed != 1 { t.Fatalf("expected failed: %v, actual: %v, summary: %#v", 1, tgSummary.Failed, tgSummary) } - if tgSummary.Queued != 9 { - t.Fatalf("expected queued: %v, actual: %v", 9, tgSummary.Running) - } summary2, err := state.JobSummaryByID(alloc2.JobID) if err != nil { @@ -1635,9 +1626,6 @@ func TestStateStore_UpdateAllocsFromClient(t *testing.T) { if tgSummary2.Running != 1 { t.Fatalf("expected running: %v, actual: %v", 1, tgSummary2.Failed) } - if tgSummary2.Queued != 9 { - t.Fatalf("expected queued: %v, actual: %v", 9, tgSummary2.Running) - } notify.verify(t) } @@ -1689,9 +1677,6 @@ func TestStateStore_UpsertAlloc_Alloc(t *testing.T) { if !ok { t.Fatalf("no summary for task group web") } - if tgSummary.Queued != 9 { - t.Fatalf("expected queued: %v, actual: %v", 9, tgSummary.Queued) - } if tgSummary.Starting != 1 { t.Fatalf("expected queued: %v, actual: %v", 1, tgSummary.Starting) } @@ -1720,9 +1705,6 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) { if tgSummary.Starting != 1 { t.Fatalf("expected starting: %v, actual: %v", 1, tgSummary.Starting) } - if tgSummary.Queued != 9 { - t.Fatalf("expected starting: %v, actual: %v", 9, tgSummary.Queued) - } alloc2 := mock.Alloc() alloc2.ID = alloc.ID @@ -1774,9 +1756,6 @@ func TestStateStore_UpdateAlloc_Alloc(t *testing.T) { if tgSummary.Starting != 1 { t.Fatalf("expected starting: %v, actual: %v", 1, tgSummary.Starting) } - if tgSummary.Queued != 9 { - t.Fatalf("expected starting: %v, actual: %v", 9, tgSummary.Queued) - } notify.verify(t) } @@ -2338,7 +2317,7 @@ func TestStateJobSummary_UpdateJobCount(t *testing.T) { t.Fatalf("err: %v", err) } summary, _ := state.JobSummaryByID(job.ID) - if summary.Summary["web"].Queued != 2 && summary.Summary["web"].Starting != 1 { + if summary.Summary["web"].Starting != 1 { t.Fatalf("bad job summary: %v", summary) } @@ -2427,7 +2406,7 @@ func TestJobSummary_UpdateClientStatus(t *testing.T) { t.Fatalf("err: %v", err) } summary, _ := state.JobSummaryByID(job.ID) - if summary.Summary["web"].Queued != 0 || summary.Summary["web"].Starting != 3 { + if summary.Summary["web"].Starting != 3 { t.Fatalf("bad job summary: %v", summary) } @@ -2453,7 +2432,7 @@ func TestJobSummary_UpdateClientStatus(t *testing.T) { t.Fatalf("err: %v", err) } summary, _ = state.JobSummaryByID(job.ID) - if summary.Summary["web"].Queued != 0 || summary.Summary["web"].Running != 1 || summary.Summary["web"].Failed != 1 || summary.Summary["web"].Complete != 1 { + if summary.Summary["web"].Running != 1 || summary.Summary["web"].Failed != 1 || summary.Summary["web"].Complete != 1 { t.Fatalf("bad job summary: %v", summary) } @@ -2465,7 +2444,7 @@ func TestJobSummary_UpdateClientStatus(t *testing.T) { t.Fatalf("err: %v", err) } summary, _ = state.JobSummaryByID(job.ID) - if summary.Summary["web"].Queued != 0 || summary.Summary["web"].Starting != 1 || summary.Summary["web"].Running != 1 || summary.Summary["web"].Failed != 1 || summary.Summary["web"].Complete != 1 { + if summary.Summary["web"].Starting != 1 || summary.Summary["web"].Running != 1 || summary.Summary["web"].Failed != 1 || summary.Summary["web"].Complete != 1 { t.Fatalf("bad job summary: %v", summary) } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 7bebe65c2..7865405f7 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -950,7 +950,6 @@ type JobSummary struct { // TaskGroup summarizes the state of all the allocations of a particular // TaskGroup type TaskGroupSummary struct { - Queued int Complete int Failed int Running int @@ -958,11 +957,6 @@ type TaskGroupSummary struct { Lost int } -// Total returns the total number of allocations for the task group. -func (s *TaskGroupSummary) Total() int { - return s.Queued + s.Complete + s.Failed + s.Running + s.Starting -} - // Job is the scope of a scheduling request to Nomad. It is the largest // scoped object, and is a named collection of task groups. Each task group // is further composed of tasks. A task group (TG) is the unit of scheduling @@ -2323,10 +2317,9 @@ func (c *Constraint) Validate() error { } const ( - AllocDesiredStatusRun = "run" // Allocation should run - AllocDesiredStatusStop = "stop" // Allocation should stop - AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted - AllocDesiredStatusFailed = "failed" // Allocation failed to be done + AllocDesiredStatusRun = "run" // Allocation should run + AllocDesiredStatusStop = "stop" // Allocation should stop + AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted ) const ( @@ -2435,7 +2428,7 @@ func (a *Allocation) TerminalStatus() bool { // First check the desired state and if that isn't terminal, check client // state. switch a.DesiredStatus { - case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: + case AllocDesiredStatusStop, AllocDesiredStatusEvict: return true default: } diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 6649c2337..e6cf2465c 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -273,7 +273,7 @@ func (s *GenericScheduler) filterCompleteAllocs(allocs []*structs.Allocation) [] // status is failed so that they will be replaced. If they are // complete but not failed, they shouldn't be replaced. switch a.DesiredStatus { - case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict, structs.AllocDesiredStatusFailed: + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: return !a.RanSuccessfully() default: } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 7798a6f84..daf85869e 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -644,7 +644,7 @@ func TestServiceSched_JobModify(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = nodes[i].ID alloc.Name = fmt.Sprintf("my-job.web[%d]", i) - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop terminal = append(terminal, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) @@ -833,7 +833,7 @@ func TestServiceSched_JobModify_CountZero(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = nodes[i].ID alloc.Name = fmt.Sprintf("my-job.web[%d]", i) - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop terminal = append(terminal, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) diff --git a/scheduler/system_sched_test.go b/scheduler/system_sched_test.go index 526b7cfee..1e3009826 100644 --- a/scheduler/system_sched_test.go +++ b/scheduler/system_sched_test.go @@ -304,7 +304,7 @@ func TestSystemSched_JobModify(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = nodes[i].ID alloc.Name = "my-job.web[0]" - alloc.DesiredStatus = structs.AllocDesiredStatusFailed + alloc.DesiredStatus = structs.AllocDesiredStatusStop terminal = append(terminal, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal))