feat: compare source and release diagnostics
This commit is contained in:
parent
b4bd818aae
commit
bf0da9ff29
143
src/compare.mjs
143
src/compare.mjs
@ -115,13 +115,16 @@ export function compareReports(baseline, current, options = {}) {
|
||||
}
|
||||
|
||||
const regressionCount = scenarios.reduce((count, scenario) => count + scenario.regressions.length, 0);
|
||||
const sourceRelease = compareSourceReleaseDiagnostics(baseline, current);
|
||||
const sourceReleaseBlockingCount = sourceRelease?.blockingCount ?? 0;
|
||||
return {
|
||||
schemaVersion: "kova.compare.v1",
|
||||
generatedAt: new Date().toISOString(),
|
||||
baseline: reportSummary(baseline),
|
||||
current: reportSummary(current),
|
||||
thresholds,
|
||||
ok: regressionCount === 0,
|
||||
sourceRelease,
|
||||
ok: regressionCount === 0 && sourceReleaseBlockingCount === 0,
|
||||
regressionCount,
|
||||
scenarios
|
||||
};
|
||||
@ -142,6 +145,14 @@ export function renderCompareFixerSummary(comparison) {
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
if (comparison.sourceRelease && comparison.sourceRelease.blockingCount > 0) {
|
||||
lines.push("Source/release diagnostic comparison:");
|
||||
for (const finding of comparison.sourceRelease.findings.filter((item) => item.severity === "blocking")) {
|
||||
lines.push(`- ${finding.message}`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
for (const scenario of comparison.scenarios.filter((item) => item.regressions.length > 0)) {
|
||||
lines.push(`Scenario: ${scenario.key}`);
|
||||
lines.push(`Status: ${scenario.baselineStatus ?? "missing"} -> ${scenario.currentStatus ?? "missing"}`);
|
||||
@ -172,6 +183,17 @@ export function renderCompareSummary(comparison) {
|
||||
}
|
||||
}
|
||||
|
||||
if (comparison.sourceRelease) {
|
||||
lines.push("");
|
||||
lines.push("Source/release diagnostics:");
|
||||
lines.push(`- Status: ${comparison.sourceRelease.ok ? "OK" : "NEEDS_WORK"}`);
|
||||
lines.push(`- Pairs: ${comparison.sourceRelease.pairCount}`);
|
||||
lines.push(`- Blocking: ${comparison.sourceRelease.blockingCount}`);
|
||||
for (const finding of comparison.sourceRelease.findings.slice(0, 8)) {
|
||||
lines.push(`- ${finding.severity.toUpperCase()} ${finding.key ?? "comparison"}: ${finding.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
@ -193,11 +215,130 @@ function reportSummary(report) {
|
||||
mode: report.mode ?? null,
|
||||
profile: report.profile?.id ?? null,
|
||||
target: report.target ?? null,
|
||||
targetKind: targetKind(report.target),
|
||||
generatedAt: report.generatedAt ?? null,
|
||||
statuses: report.summary?.statuses ?? {}
|
||||
};
|
||||
}
|
||||
|
||||
function compareSourceReleaseDiagnostics(leftReport, rightReport) {
|
||||
const leftLane = targetLane(leftReport.target);
|
||||
const rightLane = targetLane(rightReport.target);
|
||||
if (!leftLane || !rightLane || leftLane === rightLane) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const sourceReport = leftLane === "source-build" ? leftReport : rightReport;
|
||||
const releaseReport = leftLane === "release-runtime" ? leftReport : rightReport;
|
||||
const sourceRecords = indexRecords(sourceReport.records ?? []);
|
||||
const releaseRecords = indexRecords(releaseReport.records ?? []);
|
||||
const keys = [...sourceRecords.keys()].filter((key) => releaseRecords.has(key)).sort();
|
||||
const findings = [];
|
||||
const pairs = [];
|
||||
|
||||
if (keys.length === 0) {
|
||||
findings.push({
|
||||
severity: "blocking",
|
||||
key: null,
|
||||
message: "source-build and release-runtime reports have no shared scenario/state records, so diagnostic parity cannot be evaluated"
|
||||
});
|
||||
}
|
||||
|
||||
for (const key of keys) {
|
||||
const source = sourceRecords.get(key);
|
||||
const release = releaseRecords.get(key);
|
||||
const pair = sourceReleasePair(key, source, release);
|
||||
pairs.push(pair);
|
||||
if (!pair.source.timelineAvailable) {
|
||||
findings.push({
|
||||
severity: "blocking",
|
||||
key,
|
||||
message: `${key} source-build report did not include OpenClaw timeline diagnostics`
|
||||
});
|
||||
}
|
||||
if (!pair.release.timelineAvailable) {
|
||||
findings.push({
|
||||
severity: "info",
|
||||
key,
|
||||
message: `${key} release-runtime report has no timeline; use outside-in timings for released packages`
|
||||
});
|
||||
}
|
||||
if (typeof pair.source.agentPreProviderMs === "number" && typeof pair.release.agentPreProviderMs === "number") {
|
||||
const delta = pair.release.agentPreProviderMs - pair.source.agentPreProviderMs;
|
||||
if (delta > defaultThresholds.coldPreProviderMs) {
|
||||
findings.push({
|
||||
severity: "warning",
|
||||
key,
|
||||
message: `${key} release pre-provider latency exceeded source-build by ${delta}ms (${pair.source.agentPreProviderMs}ms -> ${pair.release.agentPreProviderMs}ms)`
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const blockingCount = findings.filter((finding) => finding.severity === "blocking").length;
|
||||
const warningCount = findings.filter((finding) => finding.severity === "warning").length;
|
||||
const infoCount = findings.filter((finding) => finding.severity === "info").length;
|
||||
return {
|
||||
schemaVersion: "kova.sourceReleaseComparison.v1",
|
||||
sourceTarget: sourceReport.target ?? null,
|
||||
releaseTarget: releaseReport.target ?? null,
|
||||
ok: blockingCount === 0,
|
||||
pairCount: pairs.length,
|
||||
blockingCount,
|
||||
warningCount,
|
||||
infoCount,
|
||||
pairs,
|
||||
findings
|
||||
};
|
||||
}
|
||||
|
||||
function sourceReleasePair(key, source, release) {
|
||||
return {
|
||||
key,
|
||||
scenario: source.scenario ?? release.scenario ?? null,
|
||||
state: source.state?.id ?? release.state?.id ?? null,
|
||||
surface: source.surface ?? release.surface ?? source.measurements?.surface ?? release.measurements?.surface ?? null,
|
||||
source: diagnosticRecordSummary(source),
|
||||
release: diagnosticRecordSummary(release)
|
||||
};
|
||||
}
|
||||
|
||||
function diagnosticRecordSummary(record) {
|
||||
const measurements = record?.measurements ?? {};
|
||||
return {
|
||||
status: record?.status ?? null,
|
||||
timelineAvailable: measurements.openclawTimelineAvailable === true,
|
||||
timelineEventCount: measurements.openclawTimelineEventCount ?? null,
|
||||
slowestSpanName: measurements.openclawSlowestSpanName ?? null,
|
||||
slowestSpanMs: measurements.openclawSlowestSpanMs ?? null,
|
||||
openRequiredSpanCount: measurements.openclawOpenRequiredSpanCount ?? null,
|
||||
agentTurnMs: measurements.agentTurnMs ?? measurements.coldAgentTurnMs ?? null,
|
||||
agentPreProviderMs: measurements.agentPreProviderMs ?? measurements.coldPreProviderMs ?? null,
|
||||
providerFinalMs: measurements.agentProviderFinalMs ?? measurements.coldProviderFinalMs ?? null,
|
||||
runtimeDepsStagingMs: measurements.runtimeDepsStagingMs ?? null,
|
||||
timeToHealthReadyMs: measurements.timeToHealthReadyMs ?? null,
|
||||
peakRssMb: measurements.peakRssMb ?? null
|
||||
};
|
||||
}
|
||||
|
||||
function targetLane(target) {
|
||||
const kind = targetKind(target);
|
||||
if (kind === "local-build") {
|
||||
return "source-build";
|
||||
}
|
||||
if (["npm", "channel", "runtime"].includes(kind)) {
|
||||
return "release-runtime";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function targetKind(target) {
|
||||
if (typeof target !== "string" || !target.includes(":")) {
|
||||
return null;
|
||||
}
|
||||
return target.split(":", 1)[0];
|
||||
}
|
||||
|
||||
function statusRank(status) {
|
||||
const ranks = {
|
||||
PASS: 0,
|
||||
|
||||
@ -30,6 +30,7 @@ import {
|
||||
} from "./collectors/provider.mjs";
|
||||
import { captureProcessSnapshot, diffProcessSnapshots } from "./collectors/resources.mjs";
|
||||
import { renderMarkdownReport, renderPasteSummary, renderReportSummary } from "./report.mjs";
|
||||
import { compareReports, renderCompareSummary } from "./compare.mjs";
|
||||
|
||||
export async function runSelfCheck(flags = {}) {
|
||||
const checks = [];
|
||||
@ -175,6 +176,7 @@ export async function runSelfCheck(flags = {}) {
|
||||
checks.push(await mockProviderBehaviorCheck(tmp));
|
||||
checks.push(providerFailureEvaluationCheck());
|
||||
checks.push(agentColdWarmEvaluationCheck());
|
||||
checks.push(sourceReleaseCompareCheck());
|
||||
checks.push(await concurrentAgentRunnerCheck(tmp));
|
||||
checks.push(providerConcurrentEvaluationCheck());
|
||||
checks.push(agentAuthFailureEvaluationCheck());
|
||||
@ -2107,6 +2109,92 @@ function agentColdWarmEvaluationCheck() {
|
||||
}
|
||||
}
|
||||
|
||||
function sourceReleaseCompareCheck() {
|
||||
try {
|
||||
const releaseReport = syntheticCompareReport({
|
||||
runId: "release-run",
|
||||
target: "npm:2026.4.27",
|
||||
timelineAvailable: false,
|
||||
preProviderMs: 62000,
|
||||
slowestSpanMs: null
|
||||
});
|
||||
const sourceReport = syntheticCompareReport({
|
||||
runId: "source-run",
|
||||
target: "local-build:/tmp/openclaw",
|
||||
timelineAvailable: true,
|
||||
preProviderMs: 4000,
|
||||
slowestSpanMs: 3200
|
||||
});
|
||||
const comparison = compareReports(releaseReport, sourceReport);
|
||||
assertEqual(comparison.ok, true, "source/release comparison with source timeline should pass");
|
||||
assertEqual(comparison.sourceRelease?.pairCount, 1, "source/release pair count");
|
||||
assertEqual(comparison.sourceRelease?.infoCount, 1, "release missing timeline should be informational");
|
||||
assertEqual(comparison.sourceRelease?.pairs?.[0]?.source?.timelineAvailable, true, "source timeline available");
|
||||
assertEqual(comparison.sourceRelease?.pairs?.[0]?.release?.timelineAvailable, false, "release timeline missing");
|
||||
|
||||
const missingTimelineComparison = compareReports(releaseReport, syntheticCompareReport({
|
||||
runId: "source-no-timeline",
|
||||
target: "local-build:/tmp/openclaw",
|
||||
timelineAvailable: false,
|
||||
preProviderMs: 4000,
|
||||
slowestSpanMs: null
|
||||
}));
|
||||
assertEqual(missingTimelineComparison.ok, false, "source missing timeline should fail comparison");
|
||||
assertEqual(missingTimelineComparison.sourceRelease?.blockingCount, 1, "source missing timeline blocking count");
|
||||
assertEqual(
|
||||
renderCompareSummary(missingTimelineComparison).includes("source-build report did not include OpenClaw timeline diagnostics"),
|
||||
true,
|
||||
"compare summary includes source timeline blocker"
|
||||
);
|
||||
|
||||
return {
|
||||
id: "source-release-compare",
|
||||
status: "PASS",
|
||||
command: "evaluate synthetic source-build versus release-runtime comparison",
|
||||
durationMs: 0
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
id: "source-release-compare",
|
||||
status: "FAIL",
|
||||
command: "evaluate synthetic source-build versus release-runtime comparison",
|
||||
durationMs: 0,
|
||||
message: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function syntheticCompareReport({ runId, target, timelineAvailable, preProviderMs, slowestSpanMs }) {
|
||||
return {
|
||||
runId,
|
||||
mode: "execution",
|
||||
target,
|
||||
generatedAt: "2026-05-01T00:00:00.000Z",
|
||||
platform: { os: "darwin", arch: "arm64", release: "test", node: "test" },
|
||||
summary: { statuses: { PASS: 1 } },
|
||||
records: [{
|
||||
scenario: "agent-cold-warm-message",
|
||||
surface: "agent-message",
|
||||
state: { id: "mock-openai-provider" },
|
||||
status: "PASS",
|
||||
measurements: {
|
||||
openclawTimelineAvailable: timelineAvailable,
|
||||
openclawTimelineEventCount: timelineAvailable ? 20 : 0,
|
||||
openclawSlowestSpanName: timelineAvailable ? "agent.prepare" : null,
|
||||
openclawSlowestSpanMs: slowestSpanMs,
|
||||
coldAgentTurnMs: preProviderMs + 800,
|
||||
coldPreProviderMs: preProviderMs,
|
||||
coldProviderFinalMs: 800,
|
||||
agentTurnMs: preProviderMs + 800,
|
||||
agentPreProviderMs: preProviderMs,
|
||||
agentProviderFinalMs: 800,
|
||||
runtimeDepsStagingMs: 0,
|
||||
peakRssMb: 100
|
||||
}
|
||||
}]
|
||||
};
|
||||
}
|
||||
|
||||
async function diagnosticsTimelineCheck() {
|
||||
try {
|
||||
const text = await readFile("fixtures/diagnostics/timeline.jsonl", "utf8");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user