499 lines
16 KiB
JavaScript
499 lines
16 KiB
JavaScript
import { measurementMetricValue } from "../health.mjs";
|
|
|
|
const defaultThresholds = {
|
|
missingDependencyErrors: 0,
|
|
pluginLoadFailures: 0,
|
|
peakRssMb: 100,
|
|
cpuPercentMax: 25,
|
|
coldReadyMs: 5000,
|
|
warmReadyMs: 3000,
|
|
upgradeMs: 10000,
|
|
statusMs: 1000,
|
|
pluginsListMs: 1000,
|
|
modelsListMs: 3000,
|
|
agentTurnMs: 10000,
|
|
coldAgentTurnMs: 10000,
|
|
warmAgentTurnMs: 5000,
|
|
agentColdWarmDeltaMs: 10000,
|
|
coldPreProviderMs: 5000,
|
|
warmPreProviderMs: 2500,
|
|
agentMetadataScanCount: 5,
|
|
agentMetadataScanTotalMs: 1000,
|
|
agentEventLoopMaxMs: 250,
|
|
agentSessionPollCount: 10,
|
|
tcpConnectMaxMs: 250,
|
|
readinessListeningMs: 3000,
|
|
readinessHealthReadyMs: 5000,
|
|
readinessFailures: 0,
|
|
startupHealthFailures: 0,
|
|
postReadyHealthFailures: 0,
|
|
finalHealthFailures: 0,
|
|
startupHealthP95Ms: 1000,
|
|
postReadyHealthP95Ms: 1000,
|
|
gatewayRestartCount: 0,
|
|
providerTimeoutMentions: 0,
|
|
eventLoopDelayMentions: 0,
|
|
metadataScanMentions: 10,
|
|
configNormalizationMentions: 10,
|
|
pluginMetadataScanCount: 10,
|
|
configNormalizationCount: 10,
|
|
runtimeDepsStagingMs: 5000,
|
|
eventLoopDelayMs: 250,
|
|
providerModelTimingMs: 5000,
|
|
diagnosticArtifactBytes: 25 * 1024 * 1024,
|
|
heapSnapshotBytes: 50 * 1024 * 1024,
|
|
resourcePeakCommandTreeRssMb: 100,
|
|
resourcePeakGatewayRssMb: 100,
|
|
openclawTimelineParseErrors: 0,
|
|
openclawSlowestSpanMs: 5000,
|
|
openclawEventLoopMaxMs: 250,
|
|
openclawProviderRequestMaxMs: 5000,
|
|
openclawChildProcessFailedCount: 0,
|
|
nodeProfileArtifactBytes: 100 * 1024 * 1024,
|
|
nodeProfileTopFunctionMs: 5000
|
|
};
|
|
|
|
export function compareReports(baseline, current, options = {}) {
|
|
const thresholds = resolveThresholds(options.thresholds);
|
|
const baselineRecords = indexRecords(baseline.records ?? []);
|
|
const currentRecords = current.records ?? [];
|
|
const scenarios = [];
|
|
|
|
for (const currentRecord of currentRecords) {
|
|
const key = recordKey(currentRecord);
|
|
const baselineRecord = baselineRecords.get(key);
|
|
if (!baselineRecord) {
|
|
scenarios.push({
|
|
key,
|
|
scenario: currentRecord.scenario,
|
|
state: currentRecord.state?.id ?? null,
|
|
status: "NEW",
|
|
currentStatus: currentRecord.status,
|
|
baselineStatus: null,
|
|
regressions: [],
|
|
metrics: metricDeltas(null, currentRecord.measurements ?? {})
|
|
});
|
|
continue;
|
|
}
|
|
|
|
const regressions = [];
|
|
if (statusRank(currentRecord.status) > statusRank(baselineRecord.status)) {
|
|
regressions.push({
|
|
kind: "status",
|
|
metric: "status",
|
|
baseline: baselineRecord.status,
|
|
current: currentRecord.status,
|
|
message: `status regressed from ${baselineRecord.status} to ${currentRecord.status}`
|
|
});
|
|
}
|
|
|
|
regressions.push(...metricRegressions(baselineRecord.measurements ?? {}, currentRecord.measurements ?? {}, thresholds));
|
|
|
|
scenarios.push({
|
|
key,
|
|
scenario: currentRecord.scenario,
|
|
state: currentRecord.state?.id ?? null,
|
|
status: regressions.length > 0 ? "REGRESSED" : "OK",
|
|
currentStatus: currentRecord.status,
|
|
baselineStatus: baselineRecord.status,
|
|
regressions,
|
|
metrics: metricDeltas(baselineRecord.measurements ?? {}, currentRecord.measurements ?? {})
|
|
});
|
|
}
|
|
|
|
for (const [key, baselineRecord] of baselineRecords.entries()) {
|
|
if (currentRecords.some((record) => recordKey(record) === key)) {
|
|
continue;
|
|
}
|
|
scenarios.push({
|
|
key,
|
|
scenario: baselineRecord.scenario,
|
|
state: baselineRecord.state?.id ?? null,
|
|
status: "MISSING",
|
|
currentStatus: null,
|
|
baselineStatus: baselineRecord.status,
|
|
regressions: [{
|
|
kind: "coverage",
|
|
metric: "scenario",
|
|
baseline: "present",
|
|
current: "missing",
|
|
message: "scenario/state entry missing from current report"
|
|
}],
|
|
metrics: {}
|
|
});
|
|
}
|
|
|
|
const regressionCount = scenarios.reduce((count, scenario) => count + scenario.regressions.length, 0);
|
|
const sourceRelease = compareSourceReleaseDiagnostics(baseline, current);
|
|
const sourceReleaseBlockingCount = sourceRelease?.blockingCount ?? 0;
|
|
return {
|
|
schemaVersion: "kova.compare.v1",
|
|
generatedAt: new Date().toISOString(),
|
|
baseline: reportSummary(baseline),
|
|
current: reportSummary(current),
|
|
thresholds,
|
|
sourceRelease,
|
|
ok: regressionCount === 0 && sourceReleaseBlockingCount === 0,
|
|
regressionCount,
|
|
scenarios
|
|
};
|
|
}
|
|
|
|
export function renderCompareFixerSummary(comparison) {
|
|
const lines = [
|
|
"Kova OpenClaw Regression Summary",
|
|
"",
|
|
`Baseline: ${comparison.baseline.runId ?? "unknown"} (${comparison.baseline.target ?? "unknown"})`,
|
|
`Current: ${comparison.current.runId ?? "unknown"} (${comparison.current.target ?? "unknown"})`,
|
|
`Result: ${comparison.ok ? "OK" : "REGRESSED"}`,
|
|
""
|
|
];
|
|
|
|
if (comparison.ok) {
|
|
lines.push("No blocking regressions were detected.");
|
|
return lines.join("\n");
|
|
}
|
|
|
|
if (comparison.sourceRelease && comparison.sourceRelease.blockingCount > 0) {
|
|
lines.push("Source/release diagnostic comparison:");
|
|
for (const finding of comparison.sourceRelease.findings.filter((item) => item.severity === "blocking")) {
|
|
lines.push(`- ${finding.message}`);
|
|
}
|
|
lines.push("");
|
|
}
|
|
|
|
for (const scenario of comparison.scenarios.filter((item) => item.regressions.length > 0)) {
|
|
lines.push(`Scenario: ${scenario.key}`);
|
|
lines.push(`Status: ${scenario.baselineStatus ?? "missing"} -> ${scenario.currentStatus ?? "missing"}`);
|
|
lines.push("Fixer notes:");
|
|
for (const regression of scenario.regressions) {
|
|
lines.push(`- ${regression.message}`);
|
|
}
|
|
lines.push("");
|
|
}
|
|
|
|
return lines.join("\n").trimEnd();
|
|
}
|
|
|
|
export function renderCompareSummary(comparison) {
|
|
const lines = [
|
|
`Baseline: ${comparison.baseline.runId ?? "unknown"} (${comparison.baseline.target ?? "unknown"})`,
|
|
`Current: ${comparison.current.runId ?? "unknown"} (${comparison.current.target ?? "unknown"})`,
|
|
`Result: ${comparison.ok ? "OK" : "REGRESSED"}`,
|
|
`Regressions: ${comparison.regressionCount}`,
|
|
"",
|
|
"Scenarios:"
|
|
];
|
|
|
|
for (const scenario of comparison.scenarios) {
|
|
lines.push(`- ${scenario.status} ${scenario.key}`);
|
|
for (const regression of scenario.regressions) {
|
|
lines.push(` ${regression.message}`);
|
|
}
|
|
}
|
|
|
|
if (comparison.sourceRelease) {
|
|
lines.push("");
|
|
lines.push("Source/release diagnostics:");
|
|
lines.push(`- Status: ${comparison.sourceRelease.ok ? "OK" : "NEEDS_WORK"}`);
|
|
lines.push(`- Pairs: ${comparison.sourceRelease.pairCount}`);
|
|
lines.push(`- Blocking: ${comparison.sourceRelease.blockingCount}`);
|
|
for (const finding of comparison.sourceRelease.findings.slice(0, 8)) {
|
|
lines.push(`- ${finding.severity.toUpperCase()} ${finding.key ?? "comparison"}: ${finding.message}`);
|
|
}
|
|
}
|
|
|
|
return lines.join("\n");
|
|
}
|
|
|
|
function indexRecords(records) {
|
|
const index = new Map();
|
|
for (const record of records) {
|
|
index.set(recordKey(record), record);
|
|
}
|
|
return index;
|
|
}
|
|
|
|
function recordKey(record) {
|
|
return `${record.scenario}:${record.state?.id ?? "none"}`;
|
|
}
|
|
|
|
function reportSummary(report) {
|
|
return {
|
|
runId: report.runId ?? null,
|
|
mode: report.mode ?? null,
|
|
profile: report.profile?.id ?? null,
|
|
target: report.target ?? null,
|
|
targetKind: targetKind(report.target),
|
|
generatedAt: report.generatedAt ?? null,
|
|
statuses: report.summary?.statuses ?? {}
|
|
};
|
|
}
|
|
|
|
function compareSourceReleaseDiagnostics(leftReport, rightReport) {
|
|
const leftLane = targetLane(leftReport.target);
|
|
const rightLane = targetLane(rightReport.target);
|
|
if (!leftLane || !rightLane || leftLane === rightLane) {
|
|
return null;
|
|
}
|
|
|
|
const sourceReport = leftLane === "source-build" ? leftReport : rightReport;
|
|
const releaseReport = leftLane === "release-runtime" ? leftReport : rightReport;
|
|
const sourceRecords = indexRecords(sourceReport.records ?? []);
|
|
const releaseRecords = indexRecords(releaseReport.records ?? []);
|
|
const keys = [...sourceRecords.keys()].filter((key) => releaseRecords.has(key)).sort();
|
|
const findings = [];
|
|
const pairs = [];
|
|
|
|
if (keys.length === 0) {
|
|
findings.push({
|
|
severity: "blocking",
|
|
key: null,
|
|
message: "source-build and release-runtime reports have no shared scenario/state records, so diagnostic parity cannot be evaluated"
|
|
});
|
|
}
|
|
|
|
for (const key of keys) {
|
|
const source = sourceRecords.get(key);
|
|
const release = releaseRecords.get(key);
|
|
const pair = sourceReleasePair(key, source, release);
|
|
pairs.push(pair);
|
|
if (!pair.source.timelineAvailable) {
|
|
findings.push({
|
|
severity: "blocking",
|
|
key,
|
|
message: `${key} source-build report did not include OpenClaw timeline diagnostics`
|
|
});
|
|
}
|
|
if (!pair.release.timelineAvailable) {
|
|
findings.push({
|
|
severity: "info",
|
|
key,
|
|
message: `${key} release-runtime report has no timeline; use outside-in timings for released packages`
|
|
});
|
|
}
|
|
if (typeof pair.source.agentPreProviderMs === "number" && typeof pair.release.agentPreProviderMs === "number") {
|
|
const delta = pair.release.agentPreProviderMs - pair.source.agentPreProviderMs;
|
|
if (delta > defaultThresholds.coldPreProviderMs) {
|
|
findings.push({
|
|
severity: "warning",
|
|
key,
|
|
message: `${key} release pre-provider latency exceeded source-build by ${delta}ms (${pair.source.agentPreProviderMs}ms -> ${pair.release.agentPreProviderMs}ms)`
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
const blockingCount = findings.filter((finding) => finding.severity === "blocking").length;
|
|
const warningCount = findings.filter((finding) => finding.severity === "warning").length;
|
|
const infoCount = findings.filter((finding) => finding.severity === "info").length;
|
|
return {
|
|
schemaVersion: "kova.sourceReleaseComparison.v1",
|
|
sourceTarget: sourceReport.target ?? null,
|
|
releaseTarget: releaseReport.target ?? null,
|
|
ok: blockingCount === 0,
|
|
pairCount: pairs.length,
|
|
blockingCount,
|
|
warningCount,
|
|
infoCount,
|
|
pairs,
|
|
findings
|
|
};
|
|
}
|
|
|
|
function sourceReleasePair(key, source, release) {
|
|
return {
|
|
key,
|
|
scenario: source.scenario ?? release.scenario ?? null,
|
|
state: source.state?.id ?? release.state?.id ?? null,
|
|
surface: source.surface ?? release.surface ?? source.measurements?.surface ?? release.measurements?.surface ?? null,
|
|
source: diagnosticRecordSummary(source),
|
|
release: diagnosticRecordSummary(release)
|
|
};
|
|
}
|
|
|
|
function diagnosticRecordSummary(record) {
|
|
const measurements = record?.measurements ?? {};
|
|
return {
|
|
status: record?.status ?? null,
|
|
timelineAvailable: measurements.openclawTimelineAvailable === true,
|
|
timelineEventCount: measurements.openclawTimelineEventCount ?? null,
|
|
slowestSpanName: measurements.openclawSlowestSpanName ?? null,
|
|
slowestSpanMs: measurements.openclawSlowestSpanMs ?? null,
|
|
openRequiredSpanCount: measurements.openclawOpenRequiredSpanCount ?? null,
|
|
agentTurnMs: measurements.agentTurnMs ?? measurements.coldAgentTurnMs ?? null,
|
|
agentPreProviderMs: measurements.agentPreProviderMs ?? measurements.coldPreProviderMs ?? null,
|
|
providerFinalMs: measurements.agentProviderFinalMs ?? measurements.coldProviderFinalMs ?? null,
|
|
agentMetadataScanCount: measurements.agentMetadataScanCount ?? null,
|
|
agentMetadataScanTotalMs: measurements.agentMetadataScanTotalMs ?? null,
|
|
agentEventLoopMaxMs: measurements.agentEventLoopMaxMs ?? null,
|
|
agentSessionPollCount: measurements.agentSessionPollCount ?? null,
|
|
runtimeDepsStagingMs: measurements.runtimeDepsStagingMs ?? null,
|
|
readinessHealthReadyMs: measurementMetricValue(measurements, "readinessHealthReadyMs"),
|
|
startupHealthP95Ms: measurementMetricValue(measurements, "startupHealthP95Ms"),
|
|
postReadyHealthP95Ms: measurementMetricValue(measurements, "postReadyHealthP95Ms"),
|
|
peakRssMb: measurements.peakRssMb ?? null
|
|
};
|
|
}
|
|
|
|
function targetLane(target) {
|
|
const kind = targetKind(target);
|
|
if (kind === "local-build") {
|
|
return "source-build";
|
|
}
|
|
if (["npm", "channel", "runtime"].includes(kind)) {
|
|
return "release-runtime";
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function targetKind(target) {
|
|
if (typeof target !== "string" || !target.includes(":")) {
|
|
return null;
|
|
}
|
|
return target.split(":", 1)[0];
|
|
}
|
|
|
|
function statusRank(status) {
|
|
const ranks = {
|
|
PASS: 0,
|
|
"DRY-RUN": 0,
|
|
SKIPPED: 1,
|
|
FAIL: 2,
|
|
BLOCKED: 3
|
|
};
|
|
return ranks[status] ?? 2;
|
|
}
|
|
|
|
function metricRegressions(baseline, current, thresholds) {
|
|
const regressions = [];
|
|
for (const [metric, tolerance] of Object.entries(thresholds)) {
|
|
addIncreaseRegression(regressions, baseline, current, metric, tolerance);
|
|
}
|
|
return regressions;
|
|
}
|
|
|
|
function addIncreaseRegression(regressions, baseline, current, metric, tolerance) {
|
|
const baselineValue = measurementMetricValue(baseline, metric);
|
|
const currentValue = measurementMetricValue(current, metric);
|
|
if (typeof baselineValue !== "number" || typeof currentValue !== "number") {
|
|
return;
|
|
}
|
|
|
|
const delta = currentValue - baselineValue;
|
|
if (delta <= tolerance) {
|
|
return;
|
|
}
|
|
|
|
regressions.push({
|
|
kind: "metric",
|
|
metric,
|
|
baseline: baselineValue,
|
|
current: currentValue,
|
|
delta,
|
|
tolerance,
|
|
message: `${metric} increased by ${delta} (${baselineValue} -> ${currentValue}), over tolerance ${tolerance}`
|
|
});
|
|
}
|
|
|
|
function metricDeltas(baseline, current) {
|
|
const metrics = {};
|
|
for (const metric of [
|
|
"peakRssMb",
|
|
"cpuPercentMax",
|
|
"coldReadyMs",
|
|
"warmReadyMs",
|
|
"upgradeMs",
|
|
"statusMs",
|
|
"pluginsListMs",
|
|
"modelsListMs",
|
|
"agentTurnMs",
|
|
"coldAgentTurnMs",
|
|
"warmAgentTurnMs",
|
|
"agentColdWarmDeltaMs",
|
|
"coldPreProviderMs",
|
|
"warmPreProviderMs",
|
|
"agentColdWarmPreProviderDeltaMs",
|
|
"coldPreProviderAttributedMs",
|
|
"warmPreProviderAttributedMs",
|
|
"coldPreProviderUnattributedMs",
|
|
"warmPreProviderUnattributedMs",
|
|
"coldPreProviderAttributionCoverage",
|
|
"warmPreProviderAttributionCoverage",
|
|
"coldProviderFinalMs",
|
|
"warmProviderFinalMs",
|
|
"agentMetadataScanCount",
|
|
"agentMetadataScanTotalMs",
|
|
"agentMetadataScanMaxMs",
|
|
"agentEventLoopMaxMs",
|
|
"agentEventLoopSampleCount",
|
|
"agentSessionPollCount",
|
|
"agentSessionPollErrorCount",
|
|
"tcpConnectMaxMs",
|
|
"readinessListeningMs",
|
|
"readinessHealthReadyMs",
|
|
"startupHealthP95Ms",
|
|
"postReadyHealthP95Ms",
|
|
"startupHealthFailures",
|
|
"postReadyHealthFailures",
|
|
"finalHealthFailures",
|
|
"readinessFailures",
|
|
"missingDependencyErrors",
|
|
"pluginLoadFailures",
|
|
"gatewayRestartCount",
|
|
"metadataScanMentions",
|
|
"configNormalizationMentions",
|
|
"providerLoadMentions",
|
|
"modelCatalogMentions",
|
|
"providerTimeoutMentions",
|
|
"eventLoopDelayMentions",
|
|
"v8ReportCount",
|
|
"heapSnapshotCount",
|
|
"diagnosticArtifactBytes",
|
|
"nodeCpuProfileCount",
|
|
"nodeHeapProfileCount",
|
|
"nodeTraceEventCount",
|
|
"nodeProfileArtifactBytes",
|
|
"nodeProfileTopFunctionMs",
|
|
"heapSnapshotBytes",
|
|
"resourceSampleCount",
|
|
"resourcePeakCommandTreeRssMb",
|
|
"resourcePeakGatewayRssMb",
|
|
"openclawTimelineEventCount",
|
|
"openclawTimelineParseErrors",
|
|
"openclawSlowestSpanMs",
|
|
"openclawRepeatedSpanCount",
|
|
"openclawEventLoopMaxMs",
|
|
"openclawProviderRequestMaxMs",
|
|
"openclawChildProcessFailedCount",
|
|
"pluginMetadataScanCount",
|
|
"configNormalizationCount",
|
|
"runtimeDepsStagingMs",
|
|
"eventLoopDelayMs",
|
|
"providerModelTimingMs"
|
|
]) {
|
|
const currentValue = measurementMetricValue(current, metric);
|
|
const baselineValue = measurementMetricValue(baseline, metric);
|
|
metrics[metric] = {
|
|
baseline: baselineValue,
|
|
current: currentValue,
|
|
delta: typeof baselineValue === "number" && typeof currentValue === "number" ? currentValue - baselineValue : null
|
|
};
|
|
}
|
|
return metrics;
|
|
}
|
|
|
|
function resolveThresholds(raw) {
|
|
if (!raw) {
|
|
return { ...defaultThresholds };
|
|
}
|
|
const overrides = raw.metrics && typeof raw.metrics === "object" ? raw.metrics : raw;
|
|
const thresholds = { ...defaultThresholds };
|
|
for (const [key, value] of Object.entries(overrides)) {
|
|
if (typeof value === "number" && Number.isFinite(value)) {
|
|
thresholds[key] = value;
|
|
}
|
|
}
|
|
return thresholds;
|
|
}
|