feat: baseline import-loop profile metrics

This commit is contained in:
Vincent Koc 2026-04-29 19:26:46 -07:00
parent 1ee105e29d
commit cc89d7cea7
No known key found for this signature in database
5 changed files with 221 additions and 11 deletions

View File

@ -2,7 +2,9 @@
## Unreleased
_No unreleased changes._
### Changed
- Report import-loop RSS and CPU as baseline-adjusted plugin deltas alongside raw subprocess metrics so Crabpot dashboards do not treat harness import cost as plugin runtime cost.
## 0.3.5 - 2026-04-29

View File

@ -56,8 +56,9 @@ export async function buildCiSummary(options = {}) {
loaderJitiCandidates: reports.platform?.summary?.jitiAlternativeCount ?? 0,
importLoopP50Ms: reports.importLoop?.summary?.p50WallMs ?? 0,
importLoopP95Ms: reports.importLoop?.summary?.p95WallMs ?? 0,
importLoopMaxRssMb: reports.importLoop?.summary?.maxPeakRssMb ?? 0,
importLoopMaxCpuMs: reports.importLoop?.summary?.maxCpuMsEstimate ?? 0,
importLoopMetricBasis: reports.importLoop?.summary?.maxPluginPeakRssDeltaMb === undefined ? "raw" : "baseline-adjusted",
importLoopMaxRssMb: reports.importLoop?.summary?.maxPluginPeakRssDeltaMb ?? reports.importLoop?.summary?.maxPeakRssMb ?? 0,
importLoopMaxCpuMs: reports.importLoop?.summary?.maxPluginCpuDeltaMsEstimate ?? reports.importLoop?.summary?.maxCpuMsEstimate ?? 0,
importLoopRssSampleCount: metricSampleCount(reports.importLoop, "rss", "maxPeakRssMb"),
importLoopCpuSampleCount: metricSampleCount(reports.importLoop, "cpu", "maxCpuMsEstimate"),
},
@ -150,7 +151,7 @@ export function renderCiSummaryMarkdown(summary) {
["Jiti loader candidates", summary.summary.loaderJitiCandidates],
[
"Import loop",
`p50 ${summary.summary.importLoopP50Ms} ms / p95 ${summary.summary.importLoopP95Ms} ms / max RSS ${formatSampledMetric(summary.summary.importLoopMaxRssMb, summary.summary.importLoopRssSampleCount)} / CPU ${formatSampledMetric(summary.summary.importLoopMaxCpuMs, summary.summary.importLoopCpuSampleCount, "ms")}`,
importLoopSummaryLabel(summary.summary),
],
],
["Metric", "Value"],
@ -249,6 +250,11 @@ function inferSampleCount(samples = [], kind) {
}, 0);
}
function importLoopSummaryLabel(summary) {
const metricLabel = summary.importLoopMetricBasis === "baseline-adjusted" ? "plugin delta" : "raw";
return `p50 ${summary.importLoopP50Ms} ms / p95 ${summary.importLoopP95Ms} ms / ${metricLabel} RSS ${formatSampledMetric(summary.importLoopMaxRssMb, summary.importLoopRssSampleCount)} / ${metricLabel} CPU ${formatSampledMetric(summary.importLoopMaxCpuMs, summary.importLoopCpuSampleCount, "ms")}`;
}
function formatSampledMetric(value, count, unit = "MB") {
if ((count ?? 0) <= 0) {
return "n/a";

View File

@ -1,4 +1,4 @@
import { mkdir } from "node:fs/promises";
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { renderPaddedMarkdownTable, writeJsonMarkdownArtifacts } from "./artifacts.js";
@ -24,25 +24,38 @@ export async function buildImportLoopProfile(options = {}) {
const entrypoint = options.entrypoint ?? defaultImportLoopProfileOptions.entrypoint;
assertRunCount(runs, 20);
const baseline = await buildBaselineProfile({ ...options, rootDir, runs });
const samples = [];
for (let index = 0; index < runs; index += 1) {
samples.push(await runCaptureSample({ ...options, entrypoint, index, rootDir }));
const sample = await runCaptureSample({ ...options, entrypoint, index, rootDir });
samples.push(applyBaselineAdjustment(sample, baseline));
}
const wallMs = samples.map((sample) => sample.wallMs).sort((left, right) => left - right);
const pluginWallDeltaMs = samples.map((sample) => sample.pluginWallDeltaMs).sort((left, right) => left - right);
const rssSampleCount = samples.reduce((sum, sample) => sum + (sample.rssSampleCount ?? (sample.peakRssMb > 0 ? 1 : 0)), 0);
const cpuSampleCount = samples.reduce((sum, sample) => sum + (sample.cpuSampleCount ?? 0), 0);
const statSampleCount = samples.reduce((sum, sample) => sum + (sample.statSampleCount ?? 0), 0);
return {
generatedAt: options.generatedAt ?? defaultImportLoopProfileOptions.generatedAt,
mode: options.mode ?? "subprocess-cold-import-loop",
mode: options.mode ?? "baseline-adjusted-cold-capture-loop",
entrypoint,
baseline,
summary: {
runs,
baselineRuns: baseline.runs,
baselineFailCount: baseline.failCount,
p50WallMs: percentile(wallMs, 0.5),
p95WallMs: percentile(wallMs, 0.95),
p50PluginWallDeltaMs: percentile(pluginWallDeltaMs, 0.5),
p95PluginWallDeltaMs: percentile(pluginWallDeltaMs, 0.95),
maxPeakRssMb: Math.max(0, ...samples.map((sample) => sample.peakRssMb)),
maxCpuMsEstimate: Math.max(0, ...samples.map((sample) => sample.cpuMsEstimate)),
maxPluginPeakRssDeltaMb: Math.max(0, ...samples.map((sample) => sample.pluginPeakRssDeltaMb)),
maxPluginCpuDeltaMsEstimate: Math.max(0, ...samples.map((sample) => sample.pluginCpuDeltaMsEstimate)),
baselineReferenceWallMs: baseline.reference.wallMs,
baselineReferencePeakRssMb: baseline.reference.peakRssMb,
baselineReferenceCpuMsEstimate: baseline.reference.cpuMsEstimate,
statSampleCount,
rssSampleCount,
cpuSampleCount,
@ -58,6 +71,9 @@ export function validateImportLoopProfile(report) {
if (report.summary.failCount > 0) {
errors.push(`import loop has ${report.summary.failCount} failed sample(s)`);
}
if ((report.summary.baselineFailCount ?? report.baseline?.failCount ?? 0) > 0) {
errors.push("import loop baseline capture failed");
}
if (report.summary.capturedCount < report.summary.runs) {
errors.push("import loop did not capture at least one contract per run");
}
@ -93,6 +109,10 @@ export function renderImportLoopProfileMarkdown(report, options = {}) {
"",
markdownTable(summaryRows(report), ["Metric", "Value"]),
"",
"## Harness Baseline",
"",
markdownTable(baselineRows(report), ["Metric", "Value"]),
"",
"## Samples",
"",
markdownTable(
@ -100,23 +120,128 @@ export function renderImportLoopProfileMarkdown(report, options = {}) {
sample.index,
sample.status,
sample.capturedCount,
formatOptionalMetric(sample.pluginWallDeltaMs, "ms"),
formatSampledMetric(sample.pluginPeakRssDeltaMb, sample.rssSampleCount),
formatSampledMetric(sample.pluginCpuDeltaMsEstimate, sample.cpuSampleCount, "ms"),
`${sample.wallMs} ms`,
formatSampledMetric(sample.peakRssMb, sample.rssSampleCount),
formatSampledMetric(sample.cpuMsEstimate, sample.cpuSampleCount, "ms"),
`${sample.rssSampleCount ?? 0}/${sample.cpuSampleCount ?? 0}`,
sample.exitCode,
]),
["Run", "Status", "Captured", "Wall", "Peak RSS", "CPU Estimate", "RSS/CPU samples", "Exit"],
[
"Run",
"Status",
"Captured",
"Plugin Wall Delta",
"Plugin RSS Delta",
"Plugin CPU Delta",
"Raw Wall",
"Raw Peak RSS",
"Raw CPU Estimate",
"RSS/CPU samples",
"Exit",
],
),
].join("\n");
}
async function buildBaselineProfile(options) {
const baselineRuns = options.baseline === false ? 0 : options.baselineRuns ?? Math.min(options.runs, 3);
if (baselineRuns <= 0) {
return emptyBaseline();
}
const entrypoint = await writeBaselineEntrypoint(options);
const samples = [];
for (let index = 0; index < baselineRuns; index += 1) {
samples.push(
await runCaptureSample({
...options,
entrypoint,
index,
sampleName: "baseline",
rootDir: options.rootDir,
}),
);
}
const wallMs = sortedMetric(samples, "wallMs");
const peakRssMb = sortedMetric(samples, "peakRssMb");
const cpuMsEstimate = sortedMetric(samples, "cpuMsEstimate");
return {
mode: "minimal-plugin-capture",
runs: baselineRuns,
entrypoint: path.relative(options.rootDir, entrypoint),
reference: {
wallMs: percentile(wallMs, 0.5),
peakRssMb: percentile(peakRssMb, 0.5),
cpuMsEstimate: percentile(cpuMsEstimate, 0.5),
},
max: {
wallMs: wallMs.at(-1) ?? 0,
peakRssMb: peakRssMb.at(-1) ?? 0,
cpuMsEstimate: cpuMsEstimate.at(-1) ?? 0,
},
statSampleCount: samples.reduce((sum, sample) => sum + (sample.statSampleCount ?? 0), 0),
rssSampleCount: samples.reduce((sum, sample) => sum + (sample.rssSampleCount ?? 0), 0),
cpuSampleCount: samples.reduce((sum, sample) => sum + (sample.cpuSampleCount ?? 0), 0),
failCount: samples.filter((sample) => sample.exitCode !== 0 || sample.status !== "captured").length,
samples,
};
}
function emptyBaseline() {
return {
mode: "disabled",
runs: 0,
entrypoint: null,
reference: {
wallMs: 0,
peakRssMb: 0,
cpuMsEstimate: 0,
},
max: {
wallMs: 0,
peakRssMb: 0,
cpuMsEstimate: 0,
},
statSampleCount: 0,
rssSampleCount: 0,
cpuSampleCount: 0,
failCount: 0,
samples: [],
};
}
async function writeBaselineEntrypoint(options) {
const outputDir = resolveFromRoot(
options.rootDir,
options.outputDir ?? defaultImportLoopProfileOptions.outputDir,
);
const baselinePath = path.join(outputDir, "baseline-plugin.mjs");
await mkdir(path.dirname(baselinePath), { recursive: true });
await writeFile(
baselinePath,
[
"export default {",
" register(api) {",
" api.registerTool({ name: 'baseline_tool', inputSchema: { type: 'object' }, run() {} });",
" },",
"};",
"",
].join("\n"),
"utf8",
);
return baselinePath;
}
async function runCaptureSample(options) {
const outputDir = resolveFromRoot(
options.rootDir,
options.outputDir ?? defaultImportLoopProfileOptions.outputDir,
);
const outputPath = path.join(outputDir, `capture-${options.index}.json`);
const outputPath = path.join(outputDir, `${options.sampleName ?? "capture"}-${options.index}.json`);
await mkdir(path.dirname(outputPath), { recursive: true });
const command = buildCaptureCommand({ ...options, outputPath });
@ -147,10 +272,33 @@ async function runCaptureSample(options) {
function summaryRows(report) {
return [
["runs", report.summary.runs],
["baselineRuns", report.summary.baselineRuns ?? report.baseline?.runs ?? 0],
["baselineFailCount", report.summary.baselineFailCount ?? report.baseline?.failCount ?? 0],
["p50WallMs", report.summary.p50WallMs],
["p95WallMs", report.summary.p95WallMs],
...(Number.isFinite(report.summary.p50PluginWallDeltaMs)
? [
["p50PluginWallDeltaMs", report.summary.p50PluginWallDeltaMs],
["p95PluginWallDeltaMs", report.summary.p95PluginWallDeltaMs],
["maxPluginPeakRssDeltaMb", formatSampledMetric(report.summary.maxPluginPeakRssDeltaMb, report.summary.rssSampleCount)],
[
"maxPluginCpuDeltaMsEstimate",
formatSampledMetric(report.summary.maxPluginCpuDeltaMsEstimate, report.summary.cpuSampleCount, "ms"),
],
]
: []),
["maxPeakRssMb", formatSampledMetric(report.summary.maxPeakRssMb, report.summary.rssSampleCount)],
["maxCpuMsEstimate", formatSampledMetric(report.summary.maxCpuMsEstimate, report.summary.cpuSampleCount, "ms")],
...(Number.isFinite(report.summary.baselineReferenceWallMs)
? [
["baselineReferenceWallMs", `${report.summary.baselineReferenceWallMs} ms`],
["baselineReferencePeakRssMb", formatSampledMetric(report.summary.baselineReferencePeakRssMb, report.baseline?.rssSampleCount ?? 0)],
[
"baselineReferenceCpuMsEstimate",
formatSampledMetric(report.summary.baselineReferenceCpuMsEstimate, report.baseline?.cpuSampleCount ?? 0, "ms"),
],
]
: []),
["statSampleCount", report.summary.statSampleCount ?? 0],
["rssSampleCount", report.summary.rssSampleCount ?? 0],
["cpuSampleCount", report.summary.cpuSampleCount ?? 0],
@ -159,6 +307,23 @@ function summaryRows(report) {
];
}
function baselineRows(report) {
const baseline = report.baseline ?? emptyBaseline();
return [
["mode", baseline.mode],
["runs", baseline.runs],
["entrypoint", baseline.entrypoint ?? "-"],
["referenceWallMs", `${baseline.reference?.wallMs ?? 0} ms`],
["referencePeakRssMb", formatSampledMetric(baseline.reference?.peakRssMb ?? 0, baseline.rssSampleCount)],
["referenceCpuMsEstimate", formatSampledMetric(baseline.reference?.cpuMsEstimate ?? 0, baseline.cpuSampleCount, "ms")],
["maxWallMs", `${baseline.max?.wallMs ?? 0} ms`],
["maxPeakRssMb", formatSampledMetric(baseline.max?.peakRssMb ?? 0, baseline.rssSampleCount)],
["maxCpuMsEstimate", formatSampledMetric(baseline.max?.cpuMsEstimate ?? 0, baseline.cpuSampleCount, "ms")],
["statSampleCount", baseline.statSampleCount ?? 0],
["failCount", baseline.failCount ?? 0],
];
}
function formatSampledMetric(value, count, unit = "MB") {
if ((count ?? 0) <= 0) {
return "n/a";
@ -166,6 +331,31 @@ function formatSampledMetric(value, count, unit = "MB") {
return `${value} ${unit}`;
}
function formatOptionalMetric(value, unit) {
if (!Number.isFinite(value)) {
return "n/a";
}
return `${value} ${unit}`;
}
function applyBaselineAdjustment(sample, baseline) {
return {
...sample,
pluginWallDeltaMs: roundNonNegative(sample.wallMs - baseline.reference.wallMs, 0),
pluginPeakRssDeltaMb: roundNonNegative(sample.peakRssMb - baseline.reference.peakRssMb, 1),
pluginCpuDeltaMsEstimate: roundNonNegative(sample.cpuMsEstimate - baseline.reference.cpuMsEstimate, 0),
};
}
function sortedMetric(samples, field) {
return samples.map((sample) => sample[field]).sort((left, right) => left - right);
}
function roundNonNegative(value, digits) {
const scale = 10 ** digits;
return Math.max(0, Math.round(value * scale) / scale);
}
function buildCaptureCommand(options) {
if (typeof options.captureCommand === "function") {
return options.captureCommand({

View File

@ -97,6 +97,8 @@ test("ci summary rolls up compatibility, policy, ref diff, and profile findings"
p95WallMs: 75,
maxPeakRssMb: 40,
maxCpuMsEstimate: 30,
maxPluginPeakRssDeltaMb: 8,
maxPluginCpuDeltaMsEstimate: 6,
rssSampleCount: 2,
cpuSampleCount: 2,
},
@ -110,9 +112,11 @@ test("ci summary rolls up compatibility, policy, ref diff, and profile findings"
assert.equal(summary.summary.platformWindowsRisks, 3);
assert.equal(summary.summary.loaderJitiCandidates, 1);
assert.equal(summary.summary.importLoopP50Ms, 50);
assert.equal(summary.summary.importLoopMetricBasis, "baseline-adjusted");
assert.equal(summary.summary.importLoopMaxRssMb, 8);
assert.match(renderCiSummaryMarkdown(summary), /Crabpot CI Summary/);
assert.match(renderCiSummaryMarkdown(summary), /Windows portability risks/);
assert.match(renderCiSummaryMarkdown(summary), /p50 50 ms \/ p95 75 ms \/ max RSS 40 MB \/ CPU 30 ms/);
assert.match(renderCiSummaryMarkdown(summary), /p50 50 ms \/ p95 75 ms \/ plugin delta RSS 8 MB \/ plugin delta CPU 6 ms/);
assert.match(renderCiSummaryMarkdown(summary), /\| P0 issues\s+\| 1\s+\|/);
});

View File

@ -30,12 +30,19 @@ test("import loop profile measures repeated cold capture subprocesses", async ()
assert.deepEqual(validateImportLoopProfile(profile), []);
assert.equal(profile.summary.runs, 2);
assert.equal(profile.summary.baselineRuns, 2);
assert.equal(profile.summary.baselineFailCount, 0);
assert.equal(profile.summary.failCount, 0);
assert.ok(profile.summary.capturedCount >= 2);
assert.ok(profile.summary.p50WallMs > 0);
assert.ok(profile.summary.p50PluginWallDeltaMs >= 0);
assert.ok(profile.summary.maxPluginPeakRssDeltaMb >= 0);
assert.ok(profile.baseline.reference.wallMs > 0);
assert.ok(profile.samples.every((sample) => Number.isFinite(sample.pluginCpuDeltaMsEstimate)));
assert.ok(profile.samples.every((sample) => sample.exitCode === 0));
assert.match(renderImportLoopProfileMarkdown(profile), /Import Loop Profile/);
assert.match(renderImportLoopProfileMarkdown(profile), /CPU Estimate/);
assert.match(renderImportLoopProfileMarkdown(profile), /Harness Baseline/);
assert.match(renderImportLoopProfileMarkdown(profile), /Plugin CPU Delta/);
});
test("import loop profile can use a custom capture script and opt-in env", async () => {
@ -64,6 +71,7 @@ test("import loop profile can use a custom capture script and opt-in env", async
});
assert.equal(profile.summary.failCount, 0);
assert.equal(profile.summary.baselineRuns, 1);
assert.equal(profile.summary.capturedCount, 1);
});