Compare commits

...

1 Commits

Author SHA1 Message Date
Vincent Koc
522b257955
fix(reports): split startup health latency 2026-05-03 23:27:50 -07:00
5 changed files with 132 additions and 14 deletions

View File

@ -92,7 +92,7 @@ export function evaluateRecord(record, scenario, options = {}) {
});
const finalGatewayState = record.finalMetrics?.service?.gatewayState ?? null;
const healthFailures = countHealthFailures(record);
const healthP95Ms = collectHealthP95(record);
const healthP95 = collectHealthP95Evidence(record);
const soakEvidence = collectSoakEvidence(allResults);
const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults);
const browserAutomationEvidence = collectBrowserAutomationEvidence(allResults);
@ -191,13 +191,15 @@ export function evaluateRecord(record, scenario, options = {}) {
});
}
if (typeof thresholds.healthP95Ms === "number" && healthP95Ms !== null && healthP95Ms > thresholds.healthP95Ms) {
if (typeof thresholds.healthP95Ms === "number" && healthP95.max !== null && healthP95.max > thresholds.healthP95Ms) {
const healthScope = healthP95.maxPhaseKind === "startup" ? "startup" : "post-ready";
const phaseSuffix = healthP95.maxPhaseId ? ` during ${healthP95.maxPhaseId}` : "";
violations.push({
kind: "health",
metric: "healthP95Ms",
expected: `<= ${thresholds.healthP95Ms}`,
actual: healthP95Ms,
message: `gateway health p95 ${healthP95Ms}ms exceeded threshold ${thresholds.healthP95Ms}ms`
actual: healthP95.max,
message: `gateway ${healthScope} health p95 ${healthP95.max}ms exceeded threshold ${thresholds.healthP95Ms}ms${phaseSuffix}`
});
}
@ -730,7 +732,11 @@ export function evaluateRecord(record, scenario, options = {}) {
missingDependencyErrors,
finalGatewayState,
healthFailures,
healthP95Ms,
healthP95Ms: healthP95.max,
startupHealthP95Ms: healthP95.startup,
postReadyHealthP95Ms: healthP95.postReady,
healthP95PhaseId: healthP95.maxPhaseId,
healthP95PhaseKind: healthP95.maxPhaseKind,
soakEvidence,
mcpBridgeEvidence,
mcpInitializeMs: mcpBridgeEvidence.initializeMs,
@ -1851,24 +1857,63 @@ function countGatewayRestarts(record) {
return commandRestarts + countLogMetric(record, "gatewayRestartMentions");
}
function collectHealthP95(record) {
const p95Values = [];
const STARTUP_HEALTH_PHASE_IDS = new Set([
"baseline",
"cold-start",
"gateway",
"gateway-start",
"post-upgrade",
"provision",
"restart",
"source-runtime",
"start",
"upgrade",
"warm-restart"
]);
function collectHealthP95Evidence(record) {
const entries = [];
for (const phase of record.phases ?? []) {
const p95 = phase.metrics?.healthSummary?.p95Ms;
if (typeof p95 === "number") {
p95Values.push(p95);
entries.push({
phaseId: phase.id ?? null,
phaseKind: classifyHealthPhase(phase.id),
p95
});
}
}
const finalP95 = record.finalMetrics?.healthSummary?.p95Ms;
if (typeof finalP95 === "number") {
p95Values.push(finalP95);
entries.push({
phaseId: "final",
phaseKind: "post-ready",
p95: finalP95
});
}
if (p95Values.length === 0) {
return null;
if (entries.length === 0) {
return {
max: null,
startup: null,
postReady: null,
maxPhaseId: null,
maxPhaseKind: null
};
}
return Math.max(...p95Values);
const maxEntry = entries.reduce((max, entry) => (entry.p95 > max.p95 ? entry : max), entries[0]);
return {
max: maxEntry.p95,
startup: maxNullable(...entries.filter((entry) => entry.phaseKind === "startup").map((entry) => entry.p95)),
postReady: maxNullable(...entries.filter((entry) => entry.phaseKind === "post-ready").map((entry) => entry.p95)),
maxPhaseId: maxEntry.phaseId,
maxPhaseKind: maxEntry.phaseKind
};
}
function classifyHealthPhase(phaseId) {
return STARTUP_HEALTH_PHASE_IDS.has(phaseId) ? "startup" : "post-ready";
}
function collectSoakEvidence(results) {

View File

@ -19,7 +19,9 @@ export const PERFORMANCE_METRICS = [
{ id: "agentCleanupMaxMs", title: "Agent Cleanup Max", unit: "ms", regressionKey: "agentLatencyRegressionPercent" },
{ id: "coldPreProviderMs", title: "Cold Pre-Provider", unit: "ms", regressionKey: "agentLatencyRegressionPercent" },
{ id: "warmPreProviderMs", title: "Warm Pre-Provider", unit: "ms", regressionKey: "agentLatencyRegressionPercent" },
{ id: "healthP95Ms", title: "Health p95", unit: "ms", regressionKey: "startupRegressionPercent" },
{ id: "healthP95Ms", title: "Phase Health p95", unit: "ms", regressionKey: "startupRegressionPercent" },
{ id: "startupHealthP95Ms", title: "Startup Health p95", unit: "ms", regressionKey: "startupRegressionPercent" },
{ id: "postReadyHealthP95Ms", title: "Post-Ready Health p95", unit: "ms", regressionKey: "eventLoopRegressionPercent" },
{ id: "runtimeDepsStagingMs", title: "Runtime Deps Staging", unit: "ms", regressionKey: "startupRegressionPercent" }
];

View File

@ -405,6 +405,8 @@ function metricDeltas(baseline, current) {
"timeToListeningMs",
"timeToHealthReadyMs",
"healthP95Ms",
"startupHealthP95Ms",
"postReadyHealthP95Ms",
"healthFailures",
"readinessFailures",
"missingDependencyErrors",

View File

@ -115,7 +115,12 @@ export function renderMarkdownReport(report) {
lines.push(`- Missing dependency errors: ${record.measurements.missingDependencyErrors ?? "unknown"}`);
lines.push(`- Final gateway state: ${record.measurements.finalGatewayState ?? "unknown"}`);
lines.push(`- Health failures: ${record.measurements.healthFailures ?? "unknown"}`);
lines.push(`- Health p95: ${record.measurements.healthP95Ms ?? "unknown"} ms`);
lines.push(`- Phase health p95: ${record.measurements.healthP95Ms ?? "unknown"} ms`);
lines.push(`- Startup health p95: ${record.measurements.startupHealthP95Ms ?? "unknown"} ms`);
lines.push(`- Post-ready health p95: ${record.measurements.postReadyHealthP95Ms ?? "unknown"} ms`);
if (record.measurements.healthP95PhaseId) {
lines.push(`- Slowest health phase: ${record.measurements.healthP95PhaseId} (${record.measurements.healthP95PhaseKind ?? "unknown"})`);
}
if (record.measurements.soakEvidence?.available) {
lines.push(`- Soak trend: duration ${record.measurements.soakDurationMs ?? "unknown"} ms; iterations ${record.measurements.soakIterations ?? "unknown"}; command p95 ${record.measurements.soakCommandP95Ms ?? "unknown"} ms; health p95 ${record.measurements.soakHealthP95Ms ?? "unknown"} ms; RSS growth ${record.measurements.rssGrowthMb ?? "unknown"} MB; gateway RSS growth ${record.measurements.gatewayRssGrowthMb ?? "unknown"} MB`);
}

View File

@ -338,6 +338,7 @@ export async function runSelfCheck(flags = {}) {
checks.push(markdownFailureCardsCheck());
checks.push(reportRecommendedNextScenarioCheck());
checks.push(readinessClassificationCheck());
checks.push(healthP95SplitEvaluationCheck());
checks.push(await resourceRoleAttributionCheck(tmp));
checks.push(await resourceRootCommandRoleBoundaryCheck());
checks.push(await resourceRolePollutionCheck());
@ -3648,6 +3649,69 @@ function readinessClassificationCheck() {
}
}
function healthP95SplitEvaluationCheck() {
try {
const record = {
scenario: "gateway-performance",
title: "Gateway Performance",
status: "PASS",
phases: [
{
id: "cold-start",
title: "Cold start",
results: [],
metrics: {
logs: zeroLogMetrics(),
health: { ok: true },
healthSummary: { count: 3, okCount: 3, failureCount: 0, p95Ms: 30, maxMs: 30 }
}
},
{
id: "api-latency",
title: "API latency",
results: [],
metrics: {
logs: zeroLogMetrics(),
health: { ok: true },
healthSummary: { count: 3, okCount: 3, failureCount: 0, p95Ms: 220, maxMs: 220 }
}
}
],
finalMetrics: {
service: { gatewayState: "running" },
logs: zeroLogMetrics(),
healthSummary: { count: 3, okCount: 3, failureCount: 0, p95Ms: 45, maxMs: 45 }
}
};
evaluateRecord(record, {
id: "gateway-performance",
thresholds: { healthP95Ms: 100 }
}, { surface: { thresholds: {} }, targetPlan: { kind: "local-build" } });
assertEqual(record.measurements.healthP95Ms, 220, "phase health p95");
assertEqual(record.measurements.startupHealthP95Ms, 30, "startup health p95");
assertEqual(record.measurements.postReadyHealthP95Ms, 220, "post-ready health p95");
assertEqual(record.measurements.healthP95PhaseId, "api-latency", "health p95 phase id");
assertEqual(record.measurements.healthP95PhaseKind, "post-ready", "health p95 phase kind");
assertEqual(record.violations.some((violation) => violation.message.includes("post-ready health p95") && violation.message.includes("api-latency")), true, "health violation names post-ready phase");
return {
id: "health-p95-split",
status: "PASS",
command: "evaluate health p95 startup/post-ready split",
durationMs: 0
};
} catch (error) {
return {
id: "health-p95-split",
status: "FAIL",
command: "evaluate health p95 startup/post-ready split",
durationMs: 0,
message: error.message
};
}
}
async function resourceRoleAttributionCheck(tmp) {
const command = "node -e 'setTimeout(() => {}, 650)'";
const artifactPath = join(tmp, "resource-role-attribution.jsonl");