feat: baseline import-loop profile metrics
This commit is contained in:
parent
1ee105e29d
commit
cc89d7cea7
@ -2,7 +2,9 @@
|
||||
|
||||
## Unreleased
|
||||
|
||||
_No unreleased changes._
|
||||
### Changed
|
||||
|
||||
- Report import-loop RSS and CPU as baseline-adjusted plugin deltas alongside raw subprocess metrics so Crabpot dashboards do not treat harness import cost as plugin runtime cost.
|
||||
|
||||
## 0.3.5 - 2026-04-29
|
||||
|
||||
|
||||
@ -56,8 +56,9 @@ export async function buildCiSummary(options = {}) {
|
||||
loaderJitiCandidates: reports.platform?.summary?.jitiAlternativeCount ?? 0,
|
||||
importLoopP50Ms: reports.importLoop?.summary?.p50WallMs ?? 0,
|
||||
importLoopP95Ms: reports.importLoop?.summary?.p95WallMs ?? 0,
|
||||
importLoopMaxRssMb: reports.importLoop?.summary?.maxPeakRssMb ?? 0,
|
||||
importLoopMaxCpuMs: reports.importLoop?.summary?.maxCpuMsEstimate ?? 0,
|
||||
importLoopMetricBasis: reports.importLoop?.summary?.maxPluginPeakRssDeltaMb === undefined ? "raw" : "baseline-adjusted",
|
||||
importLoopMaxRssMb: reports.importLoop?.summary?.maxPluginPeakRssDeltaMb ?? reports.importLoop?.summary?.maxPeakRssMb ?? 0,
|
||||
importLoopMaxCpuMs: reports.importLoop?.summary?.maxPluginCpuDeltaMsEstimate ?? reports.importLoop?.summary?.maxCpuMsEstimate ?? 0,
|
||||
importLoopRssSampleCount: metricSampleCount(reports.importLoop, "rss", "maxPeakRssMb"),
|
||||
importLoopCpuSampleCount: metricSampleCount(reports.importLoop, "cpu", "maxCpuMsEstimate"),
|
||||
},
|
||||
@ -150,7 +151,7 @@ export function renderCiSummaryMarkdown(summary) {
|
||||
["Jiti loader candidates", summary.summary.loaderJitiCandidates],
|
||||
[
|
||||
"Import loop",
|
||||
`p50 ${summary.summary.importLoopP50Ms} ms / p95 ${summary.summary.importLoopP95Ms} ms / max RSS ${formatSampledMetric(summary.summary.importLoopMaxRssMb, summary.summary.importLoopRssSampleCount)} / CPU ${formatSampledMetric(summary.summary.importLoopMaxCpuMs, summary.summary.importLoopCpuSampleCount, "ms")}`,
|
||||
importLoopSummaryLabel(summary.summary),
|
||||
],
|
||||
],
|
||||
["Metric", "Value"],
|
||||
@ -249,6 +250,11 @@ function inferSampleCount(samples = [], kind) {
|
||||
}, 0);
|
||||
}
|
||||
|
||||
function importLoopSummaryLabel(summary) {
|
||||
const metricLabel = summary.importLoopMetricBasis === "baseline-adjusted" ? "plugin delta" : "raw";
|
||||
return `p50 ${summary.importLoopP50Ms} ms / p95 ${summary.importLoopP95Ms} ms / ${metricLabel} RSS ${formatSampledMetric(summary.importLoopMaxRssMb, summary.importLoopRssSampleCount)} / ${metricLabel} CPU ${formatSampledMetric(summary.importLoopMaxCpuMs, summary.importLoopCpuSampleCount, "ms")}`;
|
||||
}
|
||||
|
||||
function formatSampledMetric(value, count, unit = "MB") {
|
||||
if ((count ?? 0) <= 0) {
|
||||
return "n/a";
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { renderPaddedMarkdownTable, writeJsonMarkdownArtifacts } from "./artifacts.js";
|
||||
@ -24,25 +24,38 @@ export async function buildImportLoopProfile(options = {}) {
|
||||
const entrypoint = options.entrypoint ?? defaultImportLoopProfileOptions.entrypoint;
|
||||
assertRunCount(runs, 20);
|
||||
|
||||
const baseline = await buildBaselineProfile({ ...options, rootDir, runs });
|
||||
const samples = [];
|
||||
for (let index = 0; index < runs; index += 1) {
|
||||
samples.push(await runCaptureSample({ ...options, entrypoint, index, rootDir }));
|
||||
const sample = await runCaptureSample({ ...options, entrypoint, index, rootDir });
|
||||
samples.push(applyBaselineAdjustment(sample, baseline));
|
||||
}
|
||||
|
||||
const wallMs = samples.map((sample) => sample.wallMs).sort((left, right) => left - right);
|
||||
const pluginWallDeltaMs = samples.map((sample) => sample.pluginWallDeltaMs).sort((left, right) => left - right);
|
||||
const rssSampleCount = samples.reduce((sum, sample) => sum + (sample.rssSampleCount ?? (sample.peakRssMb > 0 ? 1 : 0)), 0);
|
||||
const cpuSampleCount = samples.reduce((sum, sample) => sum + (sample.cpuSampleCount ?? 0), 0);
|
||||
const statSampleCount = samples.reduce((sum, sample) => sum + (sample.statSampleCount ?? 0), 0);
|
||||
return {
|
||||
generatedAt: options.generatedAt ?? defaultImportLoopProfileOptions.generatedAt,
|
||||
mode: options.mode ?? "subprocess-cold-import-loop",
|
||||
mode: options.mode ?? "baseline-adjusted-cold-capture-loop",
|
||||
entrypoint,
|
||||
baseline,
|
||||
summary: {
|
||||
runs,
|
||||
baselineRuns: baseline.runs,
|
||||
baselineFailCount: baseline.failCount,
|
||||
p50WallMs: percentile(wallMs, 0.5),
|
||||
p95WallMs: percentile(wallMs, 0.95),
|
||||
p50PluginWallDeltaMs: percentile(pluginWallDeltaMs, 0.5),
|
||||
p95PluginWallDeltaMs: percentile(pluginWallDeltaMs, 0.95),
|
||||
maxPeakRssMb: Math.max(0, ...samples.map((sample) => sample.peakRssMb)),
|
||||
maxCpuMsEstimate: Math.max(0, ...samples.map((sample) => sample.cpuMsEstimate)),
|
||||
maxPluginPeakRssDeltaMb: Math.max(0, ...samples.map((sample) => sample.pluginPeakRssDeltaMb)),
|
||||
maxPluginCpuDeltaMsEstimate: Math.max(0, ...samples.map((sample) => sample.pluginCpuDeltaMsEstimate)),
|
||||
baselineReferenceWallMs: baseline.reference.wallMs,
|
||||
baselineReferencePeakRssMb: baseline.reference.peakRssMb,
|
||||
baselineReferenceCpuMsEstimate: baseline.reference.cpuMsEstimate,
|
||||
statSampleCount,
|
||||
rssSampleCount,
|
||||
cpuSampleCount,
|
||||
@ -58,6 +71,9 @@ export function validateImportLoopProfile(report) {
|
||||
if (report.summary.failCount > 0) {
|
||||
errors.push(`import loop has ${report.summary.failCount} failed sample(s)`);
|
||||
}
|
||||
if ((report.summary.baselineFailCount ?? report.baseline?.failCount ?? 0) > 0) {
|
||||
errors.push("import loop baseline capture failed");
|
||||
}
|
||||
if (report.summary.capturedCount < report.summary.runs) {
|
||||
errors.push("import loop did not capture at least one contract per run");
|
||||
}
|
||||
@ -93,6 +109,10 @@ export function renderImportLoopProfileMarkdown(report, options = {}) {
|
||||
"",
|
||||
markdownTable(summaryRows(report), ["Metric", "Value"]),
|
||||
"",
|
||||
"## Harness Baseline",
|
||||
"",
|
||||
markdownTable(baselineRows(report), ["Metric", "Value"]),
|
||||
"",
|
||||
"## Samples",
|
||||
"",
|
||||
markdownTable(
|
||||
@ -100,23 +120,128 @@ export function renderImportLoopProfileMarkdown(report, options = {}) {
|
||||
sample.index,
|
||||
sample.status,
|
||||
sample.capturedCount,
|
||||
formatOptionalMetric(sample.pluginWallDeltaMs, "ms"),
|
||||
formatSampledMetric(sample.pluginPeakRssDeltaMb, sample.rssSampleCount),
|
||||
formatSampledMetric(sample.pluginCpuDeltaMsEstimate, sample.cpuSampleCount, "ms"),
|
||||
`${sample.wallMs} ms`,
|
||||
formatSampledMetric(sample.peakRssMb, sample.rssSampleCount),
|
||||
formatSampledMetric(sample.cpuMsEstimate, sample.cpuSampleCount, "ms"),
|
||||
`${sample.rssSampleCount ?? 0}/${sample.cpuSampleCount ?? 0}`,
|
||||
sample.exitCode,
|
||||
]),
|
||||
["Run", "Status", "Captured", "Wall", "Peak RSS", "CPU Estimate", "RSS/CPU samples", "Exit"],
|
||||
[
|
||||
"Run",
|
||||
"Status",
|
||||
"Captured",
|
||||
"Plugin Wall Delta",
|
||||
"Plugin RSS Delta",
|
||||
"Plugin CPU Delta",
|
||||
"Raw Wall",
|
||||
"Raw Peak RSS",
|
||||
"Raw CPU Estimate",
|
||||
"RSS/CPU samples",
|
||||
"Exit",
|
||||
],
|
||||
),
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
async function buildBaselineProfile(options) {
|
||||
const baselineRuns = options.baseline === false ? 0 : options.baselineRuns ?? Math.min(options.runs, 3);
|
||||
if (baselineRuns <= 0) {
|
||||
return emptyBaseline();
|
||||
}
|
||||
|
||||
const entrypoint = await writeBaselineEntrypoint(options);
|
||||
const samples = [];
|
||||
for (let index = 0; index < baselineRuns; index += 1) {
|
||||
samples.push(
|
||||
await runCaptureSample({
|
||||
...options,
|
||||
entrypoint,
|
||||
index,
|
||||
sampleName: "baseline",
|
||||
rootDir: options.rootDir,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
const wallMs = sortedMetric(samples, "wallMs");
|
||||
const peakRssMb = sortedMetric(samples, "peakRssMb");
|
||||
const cpuMsEstimate = sortedMetric(samples, "cpuMsEstimate");
|
||||
return {
|
||||
mode: "minimal-plugin-capture",
|
||||
runs: baselineRuns,
|
||||
entrypoint: path.relative(options.rootDir, entrypoint),
|
||||
reference: {
|
||||
wallMs: percentile(wallMs, 0.5),
|
||||
peakRssMb: percentile(peakRssMb, 0.5),
|
||||
cpuMsEstimate: percentile(cpuMsEstimate, 0.5),
|
||||
},
|
||||
max: {
|
||||
wallMs: wallMs.at(-1) ?? 0,
|
||||
peakRssMb: peakRssMb.at(-1) ?? 0,
|
||||
cpuMsEstimate: cpuMsEstimate.at(-1) ?? 0,
|
||||
},
|
||||
statSampleCount: samples.reduce((sum, sample) => sum + (sample.statSampleCount ?? 0), 0),
|
||||
rssSampleCount: samples.reduce((sum, sample) => sum + (sample.rssSampleCount ?? 0), 0),
|
||||
cpuSampleCount: samples.reduce((sum, sample) => sum + (sample.cpuSampleCount ?? 0), 0),
|
||||
failCount: samples.filter((sample) => sample.exitCode !== 0 || sample.status !== "captured").length,
|
||||
samples,
|
||||
};
|
||||
}
|
||||
|
||||
function emptyBaseline() {
|
||||
return {
|
||||
mode: "disabled",
|
||||
runs: 0,
|
||||
entrypoint: null,
|
||||
reference: {
|
||||
wallMs: 0,
|
||||
peakRssMb: 0,
|
||||
cpuMsEstimate: 0,
|
||||
},
|
||||
max: {
|
||||
wallMs: 0,
|
||||
peakRssMb: 0,
|
||||
cpuMsEstimate: 0,
|
||||
},
|
||||
statSampleCount: 0,
|
||||
rssSampleCount: 0,
|
||||
cpuSampleCount: 0,
|
||||
failCount: 0,
|
||||
samples: [],
|
||||
};
|
||||
}
|
||||
|
||||
async function writeBaselineEntrypoint(options) {
|
||||
const outputDir = resolveFromRoot(
|
||||
options.rootDir,
|
||||
options.outputDir ?? defaultImportLoopProfileOptions.outputDir,
|
||||
);
|
||||
const baselinePath = path.join(outputDir, "baseline-plugin.mjs");
|
||||
await mkdir(path.dirname(baselinePath), { recursive: true });
|
||||
await writeFile(
|
||||
baselinePath,
|
||||
[
|
||||
"export default {",
|
||||
" register(api) {",
|
||||
" api.registerTool({ name: 'baseline_tool', inputSchema: { type: 'object' }, run() {} });",
|
||||
" },",
|
||||
"};",
|
||||
"",
|
||||
].join("\n"),
|
||||
"utf8",
|
||||
);
|
||||
return baselinePath;
|
||||
}
|
||||
|
||||
async function runCaptureSample(options) {
|
||||
const outputDir = resolveFromRoot(
|
||||
options.rootDir,
|
||||
options.outputDir ?? defaultImportLoopProfileOptions.outputDir,
|
||||
);
|
||||
const outputPath = path.join(outputDir, `capture-${options.index}.json`);
|
||||
const outputPath = path.join(outputDir, `${options.sampleName ?? "capture"}-${options.index}.json`);
|
||||
await mkdir(path.dirname(outputPath), { recursive: true });
|
||||
|
||||
const command = buildCaptureCommand({ ...options, outputPath });
|
||||
@ -147,10 +272,33 @@ async function runCaptureSample(options) {
|
||||
function summaryRows(report) {
|
||||
return [
|
||||
["runs", report.summary.runs],
|
||||
["baselineRuns", report.summary.baselineRuns ?? report.baseline?.runs ?? 0],
|
||||
["baselineFailCount", report.summary.baselineFailCount ?? report.baseline?.failCount ?? 0],
|
||||
["p50WallMs", report.summary.p50WallMs],
|
||||
["p95WallMs", report.summary.p95WallMs],
|
||||
...(Number.isFinite(report.summary.p50PluginWallDeltaMs)
|
||||
? [
|
||||
["p50PluginWallDeltaMs", report.summary.p50PluginWallDeltaMs],
|
||||
["p95PluginWallDeltaMs", report.summary.p95PluginWallDeltaMs],
|
||||
["maxPluginPeakRssDeltaMb", formatSampledMetric(report.summary.maxPluginPeakRssDeltaMb, report.summary.rssSampleCount)],
|
||||
[
|
||||
"maxPluginCpuDeltaMsEstimate",
|
||||
formatSampledMetric(report.summary.maxPluginCpuDeltaMsEstimate, report.summary.cpuSampleCount, "ms"),
|
||||
],
|
||||
]
|
||||
: []),
|
||||
["maxPeakRssMb", formatSampledMetric(report.summary.maxPeakRssMb, report.summary.rssSampleCount)],
|
||||
["maxCpuMsEstimate", formatSampledMetric(report.summary.maxCpuMsEstimate, report.summary.cpuSampleCount, "ms")],
|
||||
...(Number.isFinite(report.summary.baselineReferenceWallMs)
|
||||
? [
|
||||
["baselineReferenceWallMs", `${report.summary.baselineReferenceWallMs} ms`],
|
||||
["baselineReferencePeakRssMb", formatSampledMetric(report.summary.baselineReferencePeakRssMb, report.baseline?.rssSampleCount ?? 0)],
|
||||
[
|
||||
"baselineReferenceCpuMsEstimate",
|
||||
formatSampledMetric(report.summary.baselineReferenceCpuMsEstimate, report.baseline?.cpuSampleCount ?? 0, "ms"),
|
||||
],
|
||||
]
|
||||
: []),
|
||||
["statSampleCount", report.summary.statSampleCount ?? 0],
|
||||
["rssSampleCount", report.summary.rssSampleCount ?? 0],
|
||||
["cpuSampleCount", report.summary.cpuSampleCount ?? 0],
|
||||
@ -159,6 +307,23 @@ function summaryRows(report) {
|
||||
];
|
||||
}
|
||||
|
||||
function baselineRows(report) {
|
||||
const baseline = report.baseline ?? emptyBaseline();
|
||||
return [
|
||||
["mode", baseline.mode],
|
||||
["runs", baseline.runs],
|
||||
["entrypoint", baseline.entrypoint ?? "-"],
|
||||
["referenceWallMs", `${baseline.reference?.wallMs ?? 0} ms`],
|
||||
["referencePeakRssMb", formatSampledMetric(baseline.reference?.peakRssMb ?? 0, baseline.rssSampleCount)],
|
||||
["referenceCpuMsEstimate", formatSampledMetric(baseline.reference?.cpuMsEstimate ?? 0, baseline.cpuSampleCount, "ms")],
|
||||
["maxWallMs", `${baseline.max?.wallMs ?? 0} ms`],
|
||||
["maxPeakRssMb", formatSampledMetric(baseline.max?.peakRssMb ?? 0, baseline.rssSampleCount)],
|
||||
["maxCpuMsEstimate", formatSampledMetric(baseline.max?.cpuMsEstimate ?? 0, baseline.cpuSampleCount, "ms")],
|
||||
["statSampleCount", baseline.statSampleCount ?? 0],
|
||||
["failCount", baseline.failCount ?? 0],
|
||||
];
|
||||
}
|
||||
|
||||
function formatSampledMetric(value, count, unit = "MB") {
|
||||
if ((count ?? 0) <= 0) {
|
||||
return "n/a";
|
||||
@ -166,6 +331,31 @@ function formatSampledMetric(value, count, unit = "MB") {
|
||||
return `${value} ${unit}`;
|
||||
}
|
||||
|
||||
function formatOptionalMetric(value, unit) {
|
||||
if (!Number.isFinite(value)) {
|
||||
return "n/a";
|
||||
}
|
||||
return `${value} ${unit}`;
|
||||
}
|
||||
|
||||
function applyBaselineAdjustment(sample, baseline) {
|
||||
return {
|
||||
...sample,
|
||||
pluginWallDeltaMs: roundNonNegative(sample.wallMs - baseline.reference.wallMs, 0),
|
||||
pluginPeakRssDeltaMb: roundNonNegative(sample.peakRssMb - baseline.reference.peakRssMb, 1),
|
||||
pluginCpuDeltaMsEstimate: roundNonNegative(sample.cpuMsEstimate - baseline.reference.cpuMsEstimate, 0),
|
||||
};
|
||||
}
|
||||
|
||||
function sortedMetric(samples, field) {
|
||||
return samples.map((sample) => sample[field]).sort((left, right) => left - right);
|
||||
}
|
||||
|
||||
function roundNonNegative(value, digits) {
|
||||
const scale = 10 ** digits;
|
||||
return Math.max(0, Math.round(value * scale) / scale);
|
||||
}
|
||||
|
||||
function buildCaptureCommand(options) {
|
||||
if (typeof options.captureCommand === "function") {
|
||||
return options.captureCommand({
|
||||
|
||||
@ -97,6 +97,8 @@ test("ci summary rolls up compatibility, policy, ref diff, and profile findings"
|
||||
p95WallMs: 75,
|
||||
maxPeakRssMb: 40,
|
||||
maxCpuMsEstimate: 30,
|
||||
maxPluginPeakRssDeltaMb: 8,
|
||||
maxPluginCpuDeltaMsEstimate: 6,
|
||||
rssSampleCount: 2,
|
||||
cpuSampleCount: 2,
|
||||
},
|
||||
@ -110,9 +112,11 @@ test("ci summary rolls up compatibility, policy, ref diff, and profile findings"
|
||||
assert.equal(summary.summary.platformWindowsRisks, 3);
|
||||
assert.equal(summary.summary.loaderJitiCandidates, 1);
|
||||
assert.equal(summary.summary.importLoopP50Ms, 50);
|
||||
assert.equal(summary.summary.importLoopMetricBasis, "baseline-adjusted");
|
||||
assert.equal(summary.summary.importLoopMaxRssMb, 8);
|
||||
assert.match(renderCiSummaryMarkdown(summary), /Crabpot CI Summary/);
|
||||
assert.match(renderCiSummaryMarkdown(summary), /Windows portability risks/);
|
||||
assert.match(renderCiSummaryMarkdown(summary), /p50 50 ms \/ p95 75 ms \/ max RSS 40 MB \/ CPU 30 ms/);
|
||||
assert.match(renderCiSummaryMarkdown(summary), /p50 50 ms \/ p95 75 ms \/ plugin delta RSS 8 MB \/ plugin delta CPU 6 ms/);
|
||||
assert.match(renderCiSummaryMarkdown(summary), /\| P0 issues\s+\| 1\s+\|/);
|
||||
});
|
||||
|
||||
|
||||
@ -30,12 +30,19 @@ test("import loop profile measures repeated cold capture subprocesses", async ()
|
||||
|
||||
assert.deepEqual(validateImportLoopProfile(profile), []);
|
||||
assert.equal(profile.summary.runs, 2);
|
||||
assert.equal(profile.summary.baselineRuns, 2);
|
||||
assert.equal(profile.summary.baselineFailCount, 0);
|
||||
assert.equal(profile.summary.failCount, 0);
|
||||
assert.ok(profile.summary.capturedCount >= 2);
|
||||
assert.ok(profile.summary.p50WallMs > 0);
|
||||
assert.ok(profile.summary.p50PluginWallDeltaMs >= 0);
|
||||
assert.ok(profile.summary.maxPluginPeakRssDeltaMb >= 0);
|
||||
assert.ok(profile.baseline.reference.wallMs > 0);
|
||||
assert.ok(profile.samples.every((sample) => Number.isFinite(sample.pluginCpuDeltaMsEstimate)));
|
||||
assert.ok(profile.samples.every((sample) => sample.exitCode === 0));
|
||||
assert.match(renderImportLoopProfileMarkdown(profile), /Import Loop Profile/);
|
||||
assert.match(renderImportLoopProfileMarkdown(profile), /CPU Estimate/);
|
||||
assert.match(renderImportLoopProfileMarkdown(profile), /Harness Baseline/);
|
||||
assert.match(renderImportLoopProfileMarkdown(profile), /Plugin CPU Delta/);
|
||||
});
|
||||
|
||||
test("import loop profile can use a custom capture script and opt-in env", async () => {
|
||||
@ -64,6 +71,7 @@ test("import loop profile can use a custom capture script and opt-in env", async
|
||||
});
|
||||
|
||||
assert.equal(profile.summary.failCount, 0);
|
||||
assert.equal(profile.summary.baselineRuns, 1);
|
||||
assert.equal(profile.summary.capturedCount, 1);
|
||||
});
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user