diff --git a/CHANGELOG.md b/CHANGELOG.md index be6239d..89a04c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,9 @@ ## Unreleased -_No unreleased changes._ +### Changed + +- Report import-loop RSS and CPU as baseline-adjusted plugin deltas alongside raw subprocess metrics so Crabpot dashboards do not treat harness import cost as plugin runtime cost. ## 0.3.5 - 2026-04-29 diff --git a/src/ci-summary.js b/src/ci-summary.js index 958502d..379e6d1 100644 --- a/src/ci-summary.js +++ b/src/ci-summary.js @@ -56,8 +56,9 @@ export async function buildCiSummary(options = {}) { loaderJitiCandidates: reports.platform?.summary?.jitiAlternativeCount ?? 0, importLoopP50Ms: reports.importLoop?.summary?.p50WallMs ?? 0, importLoopP95Ms: reports.importLoop?.summary?.p95WallMs ?? 0, - importLoopMaxRssMb: reports.importLoop?.summary?.maxPeakRssMb ?? 0, - importLoopMaxCpuMs: reports.importLoop?.summary?.maxCpuMsEstimate ?? 0, + importLoopMetricBasis: reports.importLoop?.summary?.maxPluginPeakRssDeltaMb === undefined ? "raw" : "baseline-adjusted", + importLoopMaxRssMb: reports.importLoop?.summary?.maxPluginPeakRssDeltaMb ?? reports.importLoop?.summary?.maxPeakRssMb ?? 0, + importLoopMaxCpuMs: reports.importLoop?.summary?.maxPluginCpuDeltaMsEstimate ?? reports.importLoop?.summary?.maxCpuMsEstimate ?? 0, importLoopRssSampleCount: metricSampleCount(reports.importLoop, "rss", "maxPeakRssMb"), importLoopCpuSampleCount: metricSampleCount(reports.importLoop, "cpu", "maxCpuMsEstimate"), }, @@ -150,7 +151,7 @@ export function renderCiSummaryMarkdown(summary) { ["Jiti loader candidates", summary.summary.loaderJitiCandidates], [ "Import loop", - `p50 ${summary.summary.importLoopP50Ms} ms / p95 ${summary.summary.importLoopP95Ms} ms / max RSS ${formatSampledMetric(summary.summary.importLoopMaxRssMb, summary.summary.importLoopRssSampleCount)} / CPU ${formatSampledMetric(summary.summary.importLoopMaxCpuMs, summary.summary.importLoopCpuSampleCount, "ms")}`, + importLoopSummaryLabel(summary.summary), ], ], ["Metric", "Value"], @@ -249,6 +250,11 @@ function inferSampleCount(samples = [], kind) { }, 0); } +function importLoopSummaryLabel(summary) { + const metricLabel = summary.importLoopMetricBasis === "baseline-adjusted" ? "plugin delta" : "raw"; + return `p50 ${summary.importLoopP50Ms} ms / p95 ${summary.importLoopP95Ms} ms / ${metricLabel} RSS ${formatSampledMetric(summary.importLoopMaxRssMb, summary.importLoopRssSampleCount)} / ${metricLabel} CPU ${formatSampledMetric(summary.importLoopMaxCpuMs, summary.importLoopCpuSampleCount, "ms")}`; +} + function formatSampledMetric(value, count, unit = "MB") { if ((count ?? 0) <= 0) { return "n/a"; diff --git a/src/import-loop-profile.js b/src/import-loop-profile.js index 69f6d31..389e675 100644 --- a/src/import-loop-profile.js +++ b/src/import-loop-profile.js @@ -1,4 +1,4 @@ -import { mkdir } from "node:fs/promises"; +import { mkdir, writeFile } from "node:fs/promises"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { renderPaddedMarkdownTable, writeJsonMarkdownArtifacts } from "./artifacts.js"; @@ -24,25 +24,38 @@ export async function buildImportLoopProfile(options = {}) { const entrypoint = options.entrypoint ?? defaultImportLoopProfileOptions.entrypoint; assertRunCount(runs, 20); + const baseline = await buildBaselineProfile({ ...options, rootDir, runs }); const samples = []; for (let index = 0; index < runs; index += 1) { - samples.push(await runCaptureSample({ ...options, entrypoint, index, rootDir })); + const sample = await runCaptureSample({ ...options, entrypoint, index, rootDir }); + samples.push(applyBaselineAdjustment(sample, baseline)); } const wallMs = samples.map((sample) => sample.wallMs).sort((left, right) => left - right); + const pluginWallDeltaMs = samples.map((sample) => sample.pluginWallDeltaMs).sort((left, right) => left - right); const rssSampleCount = samples.reduce((sum, sample) => sum + (sample.rssSampleCount ?? (sample.peakRssMb > 0 ? 1 : 0)), 0); const cpuSampleCount = samples.reduce((sum, sample) => sum + (sample.cpuSampleCount ?? 0), 0); const statSampleCount = samples.reduce((sum, sample) => sum + (sample.statSampleCount ?? 0), 0); return { generatedAt: options.generatedAt ?? defaultImportLoopProfileOptions.generatedAt, - mode: options.mode ?? "subprocess-cold-import-loop", + mode: options.mode ?? "baseline-adjusted-cold-capture-loop", entrypoint, + baseline, summary: { runs, + baselineRuns: baseline.runs, + baselineFailCount: baseline.failCount, p50WallMs: percentile(wallMs, 0.5), p95WallMs: percentile(wallMs, 0.95), + p50PluginWallDeltaMs: percentile(pluginWallDeltaMs, 0.5), + p95PluginWallDeltaMs: percentile(pluginWallDeltaMs, 0.95), maxPeakRssMb: Math.max(0, ...samples.map((sample) => sample.peakRssMb)), maxCpuMsEstimate: Math.max(0, ...samples.map((sample) => sample.cpuMsEstimate)), + maxPluginPeakRssDeltaMb: Math.max(0, ...samples.map((sample) => sample.pluginPeakRssDeltaMb)), + maxPluginCpuDeltaMsEstimate: Math.max(0, ...samples.map((sample) => sample.pluginCpuDeltaMsEstimate)), + baselineReferenceWallMs: baseline.reference.wallMs, + baselineReferencePeakRssMb: baseline.reference.peakRssMb, + baselineReferenceCpuMsEstimate: baseline.reference.cpuMsEstimate, statSampleCount, rssSampleCount, cpuSampleCount, @@ -58,6 +71,9 @@ export function validateImportLoopProfile(report) { if (report.summary.failCount > 0) { errors.push(`import loop has ${report.summary.failCount} failed sample(s)`); } + if ((report.summary.baselineFailCount ?? report.baseline?.failCount ?? 0) > 0) { + errors.push("import loop baseline capture failed"); + } if (report.summary.capturedCount < report.summary.runs) { errors.push("import loop did not capture at least one contract per run"); } @@ -93,6 +109,10 @@ export function renderImportLoopProfileMarkdown(report, options = {}) { "", markdownTable(summaryRows(report), ["Metric", "Value"]), "", + "## Harness Baseline", + "", + markdownTable(baselineRows(report), ["Metric", "Value"]), + "", "## Samples", "", markdownTable( @@ -100,23 +120,128 @@ export function renderImportLoopProfileMarkdown(report, options = {}) { sample.index, sample.status, sample.capturedCount, + formatOptionalMetric(sample.pluginWallDeltaMs, "ms"), + formatSampledMetric(sample.pluginPeakRssDeltaMb, sample.rssSampleCount), + formatSampledMetric(sample.pluginCpuDeltaMsEstimate, sample.cpuSampleCount, "ms"), `${sample.wallMs} ms`, formatSampledMetric(sample.peakRssMb, sample.rssSampleCount), formatSampledMetric(sample.cpuMsEstimate, sample.cpuSampleCount, "ms"), `${sample.rssSampleCount ?? 0}/${sample.cpuSampleCount ?? 0}`, sample.exitCode, ]), - ["Run", "Status", "Captured", "Wall", "Peak RSS", "CPU Estimate", "RSS/CPU samples", "Exit"], + [ + "Run", + "Status", + "Captured", + "Plugin Wall Delta", + "Plugin RSS Delta", + "Plugin CPU Delta", + "Raw Wall", + "Raw Peak RSS", + "Raw CPU Estimate", + "RSS/CPU samples", + "Exit", + ], ), ].join("\n"); } +async function buildBaselineProfile(options) { + const baselineRuns = options.baseline === false ? 0 : options.baselineRuns ?? Math.min(options.runs, 3); + if (baselineRuns <= 0) { + return emptyBaseline(); + } + + const entrypoint = await writeBaselineEntrypoint(options); + const samples = []; + for (let index = 0; index < baselineRuns; index += 1) { + samples.push( + await runCaptureSample({ + ...options, + entrypoint, + index, + sampleName: "baseline", + rootDir: options.rootDir, + }), + ); + } + + const wallMs = sortedMetric(samples, "wallMs"); + const peakRssMb = sortedMetric(samples, "peakRssMb"); + const cpuMsEstimate = sortedMetric(samples, "cpuMsEstimate"); + return { + mode: "minimal-plugin-capture", + runs: baselineRuns, + entrypoint: path.relative(options.rootDir, entrypoint), + reference: { + wallMs: percentile(wallMs, 0.5), + peakRssMb: percentile(peakRssMb, 0.5), + cpuMsEstimate: percentile(cpuMsEstimate, 0.5), + }, + max: { + wallMs: wallMs.at(-1) ?? 0, + peakRssMb: peakRssMb.at(-1) ?? 0, + cpuMsEstimate: cpuMsEstimate.at(-1) ?? 0, + }, + statSampleCount: samples.reduce((sum, sample) => sum + (sample.statSampleCount ?? 0), 0), + rssSampleCount: samples.reduce((sum, sample) => sum + (sample.rssSampleCount ?? 0), 0), + cpuSampleCount: samples.reduce((sum, sample) => sum + (sample.cpuSampleCount ?? 0), 0), + failCount: samples.filter((sample) => sample.exitCode !== 0 || sample.status !== "captured").length, + samples, + }; +} + +function emptyBaseline() { + return { + mode: "disabled", + runs: 0, + entrypoint: null, + reference: { + wallMs: 0, + peakRssMb: 0, + cpuMsEstimate: 0, + }, + max: { + wallMs: 0, + peakRssMb: 0, + cpuMsEstimate: 0, + }, + statSampleCount: 0, + rssSampleCount: 0, + cpuSampleCount: 0, + failCount: 0, + samples: [], + }; +} + +async function writeBaselineEntrypoint(options) { + const outputDir = resolveFromRoot( + options.rootDir, + options.outputDir ?? defaultImportLoopProfileOptions.outputDir, + ); + const baselinePath = path.join(outputDir, "baseline-plugin.mjs"); + await mkdir(path.dirname(baselinePath), { recursive: true }); + await writeFile( + baselinePath, + [ + "export default {", + " register(api) {", + " api.registerTool({ name: 'baseline_tool', inputSchema: { type: 'object' }, run() {} });", + " },", + "};", + "", + ].join("\n"), + "utf8", + ); + return baselinePath; +} + async function runCaptureSample(options) { const outputDir = resolveFromRoot( options.rootDir, options.outputDir ?? defaultImportLoopProfileOptions.outputDir, ); - const outputPath = path.join(outputDir, `capture-${options.index}.json`); + const outputPath = path.join(outputDir, `${options.sampleName ?? "capture"}-${options.index}.json`); await mkdir(path.dirname(outputPath), { recursive: true }); const command = buildCaptureCommand({ ...options, outputPath }); @@ -147,10 +272,33 @@ async function runCaptureSample(options) { function summaryRows(report) { return [ ["runs", report.summary.runs], + ["baselineRuns", report.summary.baselineRuns ?? report.baseline?.runs ?? 0], + ["baselineFailCount", report.summary.baselineFailCount ?? report.baseline?.failCount ?? 0], ["p50WallMs", report.summary.p50WallMs], ["p95WallMs", report.summary.p95WallMs], + ...(Number.isFinite(report.summary.p50PluginWallDeltaMs) + ? [ + ["p50PluginWallDeltaMs", report.summary.p50PluginWallDeltaMs], + ["p95PluginWallDeltaMs", report.summary.p95PluginWallDeltaMs], + ["maxPluginPeakRssDeltaMb", formatSampledMetric(report.summary.maxPluginPeakRssDeltaMb, report.summary.rssSampleCount)], + [ + "maxPluginCpuDeltaMsEstimate", + formatSampledMetric(report.summary.maxPluginCpuDeltaMsEstimate, report.summary.cpuSampleCount, "ms"), + ], + ] + : []), ["maxPeakRssMb", formatSampledMetric(report.summary.maxPeakRssMb, report.summary.rssSampleCount)], ["maxCpuMsEstimate", formatSampledMetric(report.summary.maxCpuMsEstimate, report.summary.cpuSampleCount, "ms")], + ...(Number.isFinite(report.summary.baselineReferenceWallMs) + ? [ + ["baselineReferenceWallMs", `${report.summary.baselineReferenceWallMs} ms`], + ["baselineReferencePeakRssMb", formatSampledMetric(report.summary.baselineReferencePeakRssMb, report.baseline?.rssSampleCount ?? 0)], + [ + "baselineReferenceCpuMsEstimate", + formatSampledMetric(report.summary.baselineReferenceCpuMsEstimate, report.baseline?.cpuSampleCount ?? 0, "ms"), + ], + ] + : []), ["statSampleCount", report.summary.statSampleCount ?? 0], ["rssSampleCount", report.summary.rssSampleCount ?? 0], ["cpuSampleCount", report.summary.cpuSampleCount ?? 0], @@ -159,6 +307,23 @@ function summaryRows(report) { ]; } +function baselineRows(report) { + const baseline = report.baseline ?? emptyBaseline(); + return [ + ["mode", baseline.mode], + ["runs", baseline.runs], + ["entrypoint", baseline.entrypoint ?? "-"], + ["referenceWallMs", `${baseline.reference?.wallMs ?? 0} ms`], + ["referencePeakRssMb", formatSampledMetric(baseline.reference?.peakRssMb ?? 0, baseline.rssSampleCount)], + ["referenceCpuMsEstimate", formatSampledMetric(baseline.reference?.cpuMsEstimate ?? 0, baseline.cpuSampleCount, "ms")], + ["maxWallMs", `${baseline.max?.wallMs ?? 0} ms`], + ["maxPeakRssMb", formatSampledMetric(baseline.max?.peakRssMb ?? 0, baseline.rssSampleCount)], + ["maxCpuMsEstimate", formatSampledMetric(baseline.max?.cpuMsEstimate ?? 0, baseline.cpuSampleCount, "ms")], + ["statSampleCount", baseline.statSampleCount ?? 0], + ["failCount", baseline.failCount ?? 0], + ]; +} + function formatSampledMetric(value, count, unit = "MB") { if ((count ?? 0) <= 0) { return "n/a"; @@ -166,6 +331,31 @@ function formatSampledMetric(value, count, unit = "MB") { return `${value} ${unit}`; } +function formatOptionalMetric(value, unit) { + if (!Number.isFinite(value)) { + return "n/a"; + } + return `${value} ${unit}`; +} + +function applyBaselineAdjustment(sample, baseline) { + return { + ...sample, + pluginWallDeltaMs: roundNonNegative(sample.wallMs - baseline.reference.wallMs, 0), + pluginPeakRssDeltaMb: roundNonNegative(sample.peakRssMb - baseline.reference.peakRssMb, 1), + pluginCpuDeltaMsEstimate: roundNonNegative(sample.cpuMsEstimate - baseline.reference.cpuMsEstimate, 0), + }; +} + +function sortedMetric(samples, field) { + return samples.map((sample) => sample[field]).sort((left, right) => left - right); +} + +function roundNonNegative(value, digits) { + const scale = 10 ** digits; + return Math.max(0, Math.round(value * scale) / scale); +} + function buildCaptureCommand(options) { if (typeof options.captureCommand === "function") { return options.captureCommand({ diff --git a/test/ci-summary.test.js b/test/ci-summary.test.js index 4bd84c7..0875a2c 100644 --- a/test/ci-summary.test.js +++ b/test/ci-summary.test.js @@ -97,6 +97,8 @@ test("ci summary rolls up compatibility, policy, ref diff, and profile findings" p95WallMs: 75, maxPeakRssMb: 40, maxCpuMsEstimate: 30, + maxPluginPeakRssDeltaMb: 8, + maxPluginCpuDeltaMsEstimate: 6, rssSampleCount: 2, cpuSampleCount: 2, }, @@ -110,9 +112,11 @@ test("ci summary rolls up compatibility, policy, ref diff, and profile findings" assert.equal(summary.summary.platformWindowsRisks, 3); assert.equal(summary.summary.loaderJitiCandidates, 1); assert.equal(summary.summary.importLoopP50Ms, 50); + assert.equal(summary.summary.importLoopMetricBasis, "baseline-adjusted"); + assert.equal(summary.summary.importLoopMaxRssMb, 8); assert.match(renderCiSummaryMarkdown(summary), /Crabpot CI Summary/); assert.match(renderCiSummaryMarkdown(summary), /Windows portability risks/); - assert.match(renderCiSummaryMarkdown(summary), /p50 50 ms \/ p95 75 ms \/ max RSS 40 MB \/ CPU 30 ms/); + assert.match(renderCiSummaryMarkdown(summary), /p50 50 ms \/ p95 75 ms \/ plugin delta RSS 8 MB \/ plugin delta CPU 6 ms/); assert.match(renderCiSummaryMarkdown(summary), /\| P0 issues\s+\| 1\s+\|/); }); diff --git a/test/import-loop-profile.test.js b/test/import-loop-profile.test.js index 2ccb641..9be4cbe 100644 --- a/test/import-loop-profile.test.js +++ b/test/import-loop-profile.test.js @@ -30,12 +30,19 @@ test("import loop profile measures repeated cold capture subprocesses", async () assert.deepEqual(validateImportLoopProfile(profile), []); assert.equal(profile.summary.runs, 2); + assert.equal(profile.summary.baselineRuns, 2); + assert.equal(profile.summary.baselineFailCount, 0); assert.equal(profile.summary.failCount, 0); assert.ok(profile.summary.capturedCount >= 2); assert.ok(profile.summary.p50WallMs > 0); + assert.ok(profile.summary.p50PluginWallDeltaMs >= 0); + assert.ok(profile.summary.maxPluginPeakRssDeltaMb >= 0); + assert.ok(profile.baseline.reference.wallMs > 0); + assert.ok(profile.samples.every((sample) => Number.isFinite(sample.pluginCpuDeltaMsEstimate))); assert.ok(profile.samples.every((sample) => sample.exitCode === 0)); assert.match(renderImportLoopProfileMarkdown(profile), /Import Loop Profile/); - assert.match(renderImportLoopProfileMarkdown(profile), /CPU Estimate/); + assert.match(renderImportLoopProfileMarkdown(profile), /Harness Baseline/); + assert.match(renderImportLoopProfileMarkdown(profile), /Plugin CPU Delta/); }); test("import loop profile can use a custom capture script and opt-in env", async () => { @@ -64,6 +71,7 @@ test("import loop profile can use a custom capture script and opt-in env", async }); assert.equal(profile.summary.failCount, 0); + assert.equal(profile.summary.baselineRuns, 1); assert.equal(profile.summary.capturedCount, 1); });