[ui, deployments] Restarted and Rescheduled panel cells (#16972)

* Status panel shows failed and lost, but probably dont have the condition quite right

* Rescheduled and Replaced cells instead of a general failed/lost one

* Tests moving to acceptance

* Fixed desiredTotal and added acceptance test for restarted

* moved integration test into acceptance test generally

* Now that we represent Lost in the graph, have to make our unplaced testcase as Unknown

* No need to declare new vars for immediately returned getters

* Literal restart and resched add to the tallies, rather than 'would have but ran out of attampts' like before

* Testfixes now that weve redefined what restarts and reschedules are indicated by
This commit is contained in:
Phil Renaud
2023-05-01 15:24:21 -04:00
committed by GitHub
parent 13f293e6c9
commit 61f4d66dc7
10 changed files with 324 additions and 10 deletions

View File

@@ -0,0 +1,22 @@
<section class="failed-or-lost">
<h4>
{{@title}}
<span
class="tooltip multiline text-center"
role="tooltip"
aria-label={{@description}}
>
<FlightIcon @name="info" />
</span>
</h4>
<ConditionalLinkTo
@condition={{this.shouldLinkToAllocations}}
@route="jobs.job.allocations"
@model={{@job}}
@query={{hash status=(concat '["failed", "lost", "unknown"]') version=(concat '[' @job.latestDeployment.versionNumber ']')}}
@label="View Allocations"
@class="failed-or-lost-link"
>
{{@allocs.length}}
</ConditionalLinkTo>
</section>

View File

@@ -0,0 +1,7 @@
import Component from '@glimmer/component';
export default class JobStatusFailedOrLostComponent extends Component {
get shouldLinkToAllocations() {
return this.args.title !== 'Restarted' && this.args.allocs.length;
}
}

View File

@@ -102,6 +102,21 @@
</span>
</legend>
<JobStatus::FailedOrLost
@allocs={{this.rescheduledAllocs}}
@job={{@job}}
@title="Rescheduled"
@description="Allocations that have been rescheduled, on another node if possible, due to failure during deployment"
/>
<JobStatus::FailedOrLost
@allocs={{this.restartedAllocs}}
@job={{@job}}
@title="Restarted"
@description="Allocations that have been restarted in-place due to a task failure during deployment"
/>
</div>
<div class="history-and-params">

View File

@@ -14,7 +14,7 @@ export default class JobStatusPanelDeployingComponent extends Component {
'pending',
'failed',
// 'unknown',
// 'lost',
'lost',
// 'queued',
// 'complete',
'unplaced',
@@ -61,7 +61,7 @@ export default class JobStatusPanelDeployingComponent extends Component {
fail;
@alias('job.latestDeployment') deployment;
@alias('deployment.desiredTotal') desiredTotal;
@alias('totalAllocs') desiredTotal;
get oldVersionAllocBlocks() {
return this.job.allocations
@@ -114,6 +114,14 @@ export default class JobStatusPanelDeployingComponent extends Component {
: 'unhealthy';
if (allocationCategories[status]) {
// If status is failed or lost, we only want to show it IF it's used up its restarts/rescheds.
// Otherwise, we'd be showing an alloc that had been replaced.
if (alloc.willNotRestart) {
if (!alloc.willNotReschedule) {
// Dont count it
continue;
}
}
allocationCategories[status][health][canary].push(alloc);
availableSlotsToFill--;
}
@@ -144,6 +152,22 @@ export default class JobStatusPanelDeployingComponent extends Component {
];
}
get rescheduledAllocs() {
return this.job.allocations.filter(
(a) =>
a.jobVersion === this.job.latestDeployment.get('versionNumber') &&
a.hasBeenRescheduled
);
}
get restartedAllocs() {
return this.job.allocations.filter(
(a) =>
a.jobVersion === this.job.latestDeployment.get('versionNumber') &&
a.hasBeenRestarted
);
}
// #region legend
get newAllocsByStatus() {
return Object.entries(this.newVersionAllocBlocks).reduce(

View File

@@ -41,6 +41,20 @@
{{/each}}
</legend>
<JobStatus::FailedOrLost
@allocs={{this.rescheduledAllocs}}
@job={{@job}}
@title="Rescheduled"
@description="Allocations that have been rescheduled, on another node if possible, due to failure"
/>
<JobStatus::FailedOrLost
@allocs={{this.restartedAllocs}}
@job={{@job}}
@title="Restarted"
@description="Allocations that have been restarted in-place due to a task failure"
/>
<section class="versions">
<h4>Versions</h4>
<ul>

View File

@@ -11,7 +11,7 @@ export default class JobStatusPanelSteadyComponent extends Component {
'pending',
'failed',
// 'unknown',
// 'lost',
'lost',
// 'queued',
// 'complete',
'unplaced',
@@ -25,10 +25,10 @@ export default class JobStatusPanelSteadyComponent extends Component {
let availableSlotsToFill = this.totalAllocs;
// Only fill up to 100% of totalAllocs. Once we've filled up, we can stop counting.
let allocationsOfShowableType = this.allocTypes.reduce((blocks, type) => {
const jobAllocsOfType = this.args.job.allocations.filterBy(
'clientStatus',
type.label
);
const jobAllocsOfType = this.args.job.allocations
.sortBy('jobVersion') // Try counting from latest deployment's allocs and work backwards if needed
.reverse()
.filterBy('clientStatus', type.label);
if (availableSlotsToFill > 0) {
blocks[type.label] = {
healthy: {
@@ -84,4 +84,20 @@ export default class JobStatusPanelSteadyComponent extends Component {
[]
);
}
get rescheduledAllocs() {
return this.job.allocations.filter(
(a) =>
a.jobVersion === this.job.latestDeployment.get('versionNumber') &&
a.hasBeenRescheduled
);
}
get restartedAllocs() {
return this.job.allocations.filter(
(a) =>
a.jobVersion === this.job.latestDeployment.get('versionNumber') &&
a.hasBeenRestarted
);
}
}

View File

@@ -46,6 +46,7 @@ export default class Allocation extends Model {
@attr('string') clientStatus;
@attr('string') desiredStatus;
@attr() desiredTransition;
@attr() deploymentStatus;
get isCanary() {
@@ -56,6 +57,29 @@ export default class Allocation extends Model {
return this.deploymentStatus?.Healthy;
}
get willNotRestart() {
return this.clientStatus === 'failed' || this.clientStatus === 'lost';
}
get willNotReschedule() {
return (
this.willNotRestart &&
!this.get('nextAllocation.content') &&
!this.get('followUpEvaluation.content')
);
}
get hasBeenRescheduled() {
return this.get('followUpEvaluation.content');
}
get hasBeenRestarted() {
return this.states
.map((s) => s.events.content)
.flat()
.find((e) => e.type === 'Restarting');
}
@attr healthChecks;
async getServiceHealth() {

View File

@@ -49,7 +49,12 @@
// TODO: may revisit this grid-area later, but is currently used in 2 competing ways
display: grid;
gap: 0.5rem;
grid-template-columns: 50% 50%;
grid-template-columns: 55% 15% 15% 15%;
& > section > h4,
& > legend > h4 {
margin-bottom: 0.5rem;
}
legend {
display: grid;
@@ -71,6 +76,14 @@
}
}
}
.failed-or-lost {
.failed-or-lost-link {
display: block;
font-size: 1.5rem;
font-weight: bold;
}
}
}
// #endregion layout

View File

@@ -8,6 +8,7 @@ import {
find,
findAll,
fillIn,
settled,
triggerEvent,
} from '@ember/test-helpers';
@@ -276,7 +277,7 @@ module('Acceptance | job status panel', function (hooks) {
running: 0.5,
failed: 0.3,
pending: 0.1,
lost: 0.1,
unknown: 0.1,
},
groupTaskCount,
shallow: true,
@@ -290,7 +291,7 @@ module('Acceptance | job status panel', function (hooks) {
// 25 running: 9 ungrouped, 17 grouped
// 15 failed: 5 ungrouped, 10 grouped
// 5 pending: 0 ungrouped, 5 grouped
// 5 lost: 0 ungrouped, 5 grouped. Represented as "Unplaced"
// 5 unknown: 0 ungrouped, 5 grouped. Represented as "Unplaced"
assert
.dom('.ungrouped-allocs .represented-allocation.running')
@@ -449,6 +450,122 @@ module('Acceptance | job status panel', function (hooks) {
);
});
test('Restarted/Rescheduled/Failed numbers reflected correctly', async function (assert) {
this.store = this.owner.lookup('service:store');
let groupTaskCount = 10;
let job = server.create('job', {
status: 'running',
datacenters: ['*'],
type: 'service',
resourceSpec: ['M: 256, C: 500'], // a single group
createAllocations: true,
allocStatusDistribution: {
running: 0.5,
failed: 0.5,
unknown: 0,
lost: 0,
},
groupTaskCount,
activeDeployment: true,
shallow: true,
});
let state = server.create('task-state');
state.events = server.schema.taskEvents.where({ taskStateId: state.id });
server.schema.allocations.where({ jobId: job.id }).update({
taskStateIds: [state.id],
jobVersion: 0,
});
await visit(`/jobs/${job.id}`);
assert.dom('.job-status-panel').exists();
assert
.dom('.failed-or-lost')
.exists({ count: 2 }, 'Restarted and Rescheduled cells are both present');
let rescheduledCell = [...findAll('.failed-or-lost')][0];
let restartedCell = [...findAll('.failed-or-lost')][1];
// Check that the title in each cell has the right text
assert.dom(rescheduledCell.querySelector('h4')).hasText('Rescheduled');
assert.dom(restartedCell.querySelector('h4')).hasText('Restarted');
// Check that both values are zero and non-links
assert
.dom(rescheduledCell.querySelector('a'))
.doesNotExist('Rescheduled cell is not a link');
assert
.dom(rescheduledCell.querySelector('.failed-or-lost-link'))
.hasText('0', 'Rescheduled cell has zero value');
assert
.dom(restartedCell.querySelector('a'))
.doesNotExist('Restarted cell is not a link');
assert
.dom(restartedCell.querySelector('.failed-or-lost-link'))
.hasText('0', 'Restarted cell has zero value');
// A wild event appears! Change a recent task event to type "Restarting" in a task state:
this.store
.peekAll('job')
.objectAt(0)
.get('allocations')
.objectAt(0)
.get('states')
.objectAt(0)
.get('events')
.objectAt(0)
.set('type', 'Restarting');
await settled();
assert
.dom(restartedCell.querySelector('.failed-or-lost-link'))
.hasText(
'1',
'Restarted cell updates when a task event with type "Restarting" is added'
);
this.store
.peekAll('job')
.objectAt(0)
.get('allocations')
.objectAt(1)
.get('states')
.objectAt(0)
.get('events')
.objectAt(0)
.set('type', 'Restarting');
await settled();
// Trigger a reschedule! Set up a desiredTransition object with a Reschedule property on one of the allocations.
assert
.dom(restartedCell.querySelector('.failed-or-lost-link'))
.hasText(
'2',
'Restarted cell updates when a second task event with type "Restarting" is added'
);
this.store
.peekAll('job')
.objectAt(0)
.get('allocations')
.objectAt(0)
.get('followUpEvaluation')
.set('content', { 'test-key': 'not-empty' });
await settled();
assert
.dom(rescheduledCell.querySelector('.failed-or-lost-link'))
.hasText('1', 'Rescheduled cell updates when desiredTransition is set');
assert
.dom(rescheduledCell.querySelector('a'))
.exists('Rescheduled cell with a non-zero number is now a link');
});
module('deployment history', function () {
test('Deployment history can be searched', async function (assert) {
faker.seed(1);

View File

@@ -0,0 +1,62 @@
import { module, test } from 'qunit';
import { setupRenderingTest } from 'ember-qunit';
import { render } from '@ember/test-helpers';
import { hbs } from 'ember-cli-htmlbars';
import { componentA11yAudit } from 'nomad-ui/tests/helpers/a11y-audit';
module('Integration | Component | job-status/failed-or-lost', function (hooks) {
setupRenderingTest(hooks);
test('it renders', async function (assert) {
assert.expect(3);
let allocs = [
{
id: 1,
name: 'alloc1',
},
{
id: 2,
name: 'alloc2',
},
];
this.set('allocs', allocs);
await render(hbs`<JobStatus::FailedOrLost
@title="Rescheduled"
@description="Rescheduled Allocations"
@allocs={{this.allocs}}
/>`);
assert.dom('h4').hasText('Rescheduled');
assert.dom('.failed-or-lost-link').hasText('2');
await componentA11yAudit(this.element, assert);
});
test('it links or does not link appropriately', async function (assert) {
let allocs = [
{
id: 1,
name: 'alloc1',
},
{
id: 2,
name: 'alloc2',
},
];
this.set('allocs', allocs);
await render(hbs`<JobStatus::FailedOrLost
@title="Rescheduled"
@description="Rescheduled Allocations"
@allocs={{this.allocs}}
/>`);
// Ensure it's of type a
assert.dom('.failed-or-lost-link').hasTagName('a');
this.set('allocs', []);
assert.dom('.failed-or-lost-link').doesNotHaveTagName('a');
});
});