feat: add calibrated threshold policy

This commit is contained in:
Shakker 2026-05-01 09:54:47 +01:00
parent 4819803596
commit fba1ea8b6d
No known key found for this signature in database
9 changed files with 425 additions and 14 deletions

View File

@ -32,6 +32,8 @@
"maxCpuPercent",
"missingDependencyErrors",
"modelsListMs",
"openclawSlowestSpanMs",
"openclawTimelineParseErrors",
"peakRssMb",
"pluginIndexPresent",
"pluginLoadFailures",

View File

@ -18,6 +18,52 @@
"agent.cleanup"
]
},
"calibration": {
"roles": {
"gateway": { "peakRssMb": 1000, "maxCpuPercent": 350 },
"command-tree": { "peakRssMb": 1400, "maxCpuPercent": 450 },
"runtime-staging": { "peakRssMb": 900, "maxCpuPercent": 350 },
"package-manager": { "peakRssMb": 900, "maxCpuPercent": 350 },
"agent-cli": { "peakRssMb": 1200, "maxCpuPercent": 350 },
"mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 }
},
"surfaces": {
"release-runtime-startup": {
"thresholds": {
"gatewayReadyMs": 60000,
"gatewayReadyHardTimeoutMs": 180000,
"runtimeDepsStagingMs": 45000,
"openclawTimelineParseErrors": 0,
"openclawSlowestSpanMs": 45000
}
},
"agent-message": {
"thresholds": {
"coldAgentTurnMs": 60000,
"warmAgentTurnMs": 15000,
"coldPreProviderMs": 15000,
"providerFinalMs": 3000,
"agentCleanupMs": 5000,
"openclawSlowestSpanMs": 45000
}
},
"bundled-runtime-deps": {
"thresholds": {
"runtimeDepsStagingMs": 45000,
"warmRuntimeDepsRestageCount": 0,
"openclawSlowestSpanMs": 45000
}
},
"gateway-performance": {
"thresholds": {
"gatewayReadyMs": 60000,
"healthP95Ms": 1000,
"peakRssMb": 1000,
"openclawSlowestSpanMs": 45000
}
}
}
},
"gate": {
"id": "openclaw-diagnostic",
"coverage": {

View File

@ -2,6 +2,74 @@
"id": "release",
"title": "Release Matrix",
"objective": "Broad OpenClaw release confidence across install, upgrade, bundled plugins, model/provider, UI, failure, soak, and platform smoke scenarios.",
"calibration": {
"roles": {
"gateway": { "peakRssMb": 900, "maxCpuPercent": 300 },
"command-tree": { "peakRssMb": 1200, "maxCpuPercent": 400 },
"runtime-management": { "peakRssMb": 900, "maxCpuPercent": 350 },
"package-manager": { "peakRssMb": 900, "maxCpuPercent": 350 },
"agent-cli": { "peakRssMb": 1100, "maxCpuPercent": 350 },
"agent-process": { "peakRssMb": 900, "maxCpuPercent": 300 },
"plugin-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
"model-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
"doctor-cli": { "peakRssMb": 700, "maxCpuPercent": 300 },
"tui-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
"dashboard-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
"browser-sidecar": { "peakRssMb": 500, "maxCpuPercent": 250 },
"mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 }
},
"surfaces": {
"release-runtime-startup": {
"thresholds": {
"gatewayReadyMs": 45000,
"gatewayReadyHardTimeoutMs": 120000,
"runtimeDepsStagingMs": 30000,
"missingDependencyErrors": 0,
"pluginLoadFailures": 0,
"peakRssMb": 950
}
},
"upgrade-existing-user": {
"thresholds": {
"upgradeMs": 180000,
"gatewayReadyMs": 60000,
"statusMs": 10000,
"missingDependencyErrors": 0,
"pluginLoadFailures": 0
}
},
"agent-message": {
"thresholds": {
"coldAgentTurnMs": 45000,
"warmAgentTurnMs": 15000,
"coldPreProviderMs": 10000,
"warmPreProviderMs": 2500,
"providerFinalMs": 3000,
"agentCleanupMs": 5000,
"agentProcessLeaks": 0
}
},
"bundled-runtime-deps": {
"thresholds": {
"runtimeDepsStagingMs": 30000,
"warmRuntimeDepsRestageCount": 0,
"warmRuntimeDepsStagingMs": 1000,
"missingDependencyErrors": 0,
"pluginLoadFailures": 0
}
},
"soak": {
"thresholds": {
"soakMinDurationMs": 60000,
"soakCommandFailures": 0,
"soakHealthFailures": 0,
"soakCommandP95Ms": 10000,
"soakHealthP95Ms": 1000,
"gatewayRssGrowthMb": 250
}
}
}
},
"gate": {
"id": "openclaw-release",
"coverage": {

View File

@ -0,0 +1,83 @@
export function resolveThresholdPolicy({ profile = null, surface = null, scenario = null } = {}) {
const surfaceCalibration = profile?.calibration?.surfaces?.[surface?.id] ?? {};
const thresholds = mergeObjects(
surface?.thresholds,
surfaceCalibration.thresholds,
scenario?.thresholds
);
const roleThresholds = mergeRoleThresholds(
profile?.calibration?.roles,
surface?.roleThresholds,
surfaceCalibration.roleThresholds,
scenario?.thresholds?.roleThresholds
);
return {
thresholds,
roleThresholds,
report: {
schemaVersion: "kova.thresholdPolicy.v1",
profileId: profile?.id ?? null,
surfaceId: surface?.id ?? null,
scenarioId: scenario?.id ?? null,
sources: thresholdSources({ profile, surface, surfaceCalibration, scenario }),
thresholds,
roleThresholds
}
};
}
function thresholdSources({ profile, surface, surfaceCalibration, scenario }) {
const sources = [];
if (surface?.thresholds && Object.keys(surface.thresholds).length > 0) {
sources.push({ kind: "surface", id: surface.id, thresholds: Object.keys(surface.thresholds).sort() });
}
if (surface?.roleThresholds && Object.keys(surface.roleThresholds).length > 0) {
sources.push({ kind: "surface-role", id: surface.id, roles: Object.keys(surface.roleThresholds).sort() });
}
if (surfaceCalibration?.thresholds && Object.keys(surfaceCalibration.thresholds).length > 0) {
sources.push({ kind: "profile-surface", id: `${profile?.id}:${surface?.id}`, thresholds: Object.keys(surfaceCalibration.thresholds).sort() });
}
if (surfaceCalibration?.roleThresholds && Object.keys(surfaceCalibration.roleThresholds).length > 0) {
sources.push({ kind: "profile-surface-role", id: `${profile?.id}:${surface?.id}`, roles: Object.keys(surfaceCalibration.roleThresholds).sort() });
}
if (profile?.calibration?.roles && Object.keys(profile.calibration.roles).length > 0) {
sources.push({ kind: "profile-role", id: profile.id, roles: Object.keys(profile.calibration.roles).sort() });
}
if (scenario?.thresholds && Object.keys(scenario.thresholds).length > 0) {
sources.push({ kind: "scenario", id: scenario.id, thresholds: Object.keys(scenario.thresholds).sort() });
}
return sources;
}
function mergeObjects(...objects) {
const merged = {};
for (const object of objects) {
if (!object || typeof object !== "object" || Array.isArray(object)) {
continue;
}
for (const [key, value] of Object.entries(object)) {
if (key === "roleThresholds") {
continue;
}
merged[key] = value;
}
}
return merged;
}
function mergeRoleThresholds(...sets) {
const merged = {};
for (const set of sets) {
if (!set || typeof set !== "object" || Array.isArray(set)) {
continue;
}
for (const [role, thresholds] of Object.entries(set)) {
merged[role] = {
...(merged[role] ?? {}),
...(thresholds ?? {})
};
}
}
return merged;
}

View File

@ -1,11 +1,17 @@
import { buildAgentTurnBreakdown } from "./collectors/agent-turns.mjs";
import { computeProviderTurnAttribution } from "./collectors/provider.mjs";
import { summarizeRuntimeDepsLogs } from "./collectors/logs.mjs";
import { resolveThresholdPolicy } from "./evaluation/thresholds.mjs";
export function evaluateRecord(record, scenario, options = {}) {
const originalStatus = record.status;
const thresholds = { ...(options.surface?.thresholds ?? {}), ...(scenario.thresholds ?? {}) };
const roleThresholds = mergeRoleThresholds(options.surface?.roleThresholds, scenario.thresholds?.roleThresholds);
const thresholdPolicy = resolveThresholdPolicy({
profile: options.profile,
surface: options.surface,
scenario
});
const thresholds = thresholdPolicy.thresholds;
const roleThresholds = thresholdPolicy.roleThresholds;
const violations = [];
const allResults = collectResults(record);
const resourceSummary = collectResourceSummary(allResults);
@ -580,6 +586,7 @@ export function evaluateRecord(record, scenario, options = {}) {
providerModelTimingMs
})
};
record.thresholdPolicy = thresholdPolicy.report;
if (violations.length > 0) {
if (originalStatus === "PASS") {
@ -1648,17 +1655,6 @@ function checkRoleThresholds(violations, byRole, roleThresholds) {
}
}
function mergeRoleThresholds(base, override) {
const merged = {};
for (const [sourceRole, sourceThresholds] of Object.entries(base ?? {})) {
merged[sourceRole] = { ...sourceThresholds };
}
for (const [sourceRole, sourceThresholds] of Object.entries(override ?? {})) {
merged[sourceRole] = { ...(merged[sourceRole] ?? {}), ...sourceThresholds };
}
return merged;
}
function collectResults(record) {
const results = [];
for (const phase of record.phases ?? []) {

View File

@ -712,6 +712,10 @@ function profileSummary(profile) {
entryCount: profile.entries.length,
targetKinds: profile.targetKinds ?? null,
diagnostics: profile.diagnostics ?? null,
calibration: profile.calibration ? {
surfaceCount: Object.keys(profile.calibration.surfaces ?? {}).length,
roleCount: Object.keys(profile.calibration.roles ?? {}).length
} : null,
gate: profile.gate ? {
id: profile.gate.id ?? `${profile.id}-gate`,
blockingCount: Array.isArray(profile.gate.blocking) ? profile.gate.blocking.length : profile.entries.length,

View File

@ -24,6 +24,7 @@ export function validateProfileShape(profile, sourceName = "profile") {
requireArray(profile, "entries", errors);
validateStringArray(profile.targetKinds, "targetKinds", errors, { optional: true });
validateDiagnostics(profile.diagnostics, "diagnostics", errors);
validateCalibration(profile.calibration, "calibration", errors);
validateEntries(profile.entries, errors);
if (profile.gate !== undefined) {
@ -33,6 +34,50 @@ export function validateProfileShape(profile, sourceName = "profile") {
assertNoShapeErrors(errors, sourceName);
}
function validateCalibration(calibration, prefix, errors) {
if (calibration === undefined) {
return;
}
if (!calibration || typeof calibration !== "object" || Array.isArray(calibration)) {
errors.push(`${prefix} must be an object when set`);
return;
}
validateThresholdMap(calibration.roles, `${prefix}.roles`, errors, { keyed: true });
if (calibration.surfaces !== undefined) {
if (!calibration.surfaces || typeof calibration.surfaces !== "object" || Array.isArray(calibration.surfaces)) {
errors.push(`${prefix}.surfaces must be an object when set`);
} else {
for (const [surfaceId, surfaceCalibration] of Object.entries(calibration.surfaces)) {
if (!surfaceCalibration || typeof surfaceCalibration !== "object" || Array.isArray(surfaceCalibration)) {
errors.push(`${prefix}.surfaces.${surfaceId} must be an object`);
continue;
}
validateThresholdMap(surfaceCalibration.thresholds, `${prefix}.surfaces.${surfaceId}.thresholds`, errors);
validateThresholdMap(surfaceCalibration.roleThresholds, `${prefix}.surfaces.${surfaceId}.roleThresholds`, errors, { keyed: true });
}
}
}
}
function validateThresholdMap(map, prefix, errors, options = {}) {
if (map === undefined) {
return;
}
if (!map || typeof map !== "object" || Array.isArray(map)) {
errors.push(`${prefix} must be an object when set`);
return;
}
for (const [key, value] of Object.entries(map)) {
if (options.keyed) {
validateThresholdMap(value, `${prefix}.${key}`, errors);
continue;
}
if (typeof value !== "number" || !Number.isFinite(value)) {
errors.push(`${prefix}.${key} must be a finite number`);
}
}
}
function validateDiagnostics(diagnostics, prefix, errors) {
if (diagnostics === undefined) {
return;

View File

@ -83,7 +83,7 @@ export function validateRegistryReferences({ scenarios, states, profiles, surfac
}
for (const profile of profiles) {
validateProfileReferences(profile, { scenarioIds, stateIds, surfaceIds, traitIds, scenarioById, stateById, surfaceById }, errors);
validateProfileReferences(profile, { scenarioIds, stateIds, surfaceIds, processRoleIds, metricIds, traitIds, scenarioById, stateById, surfaceById }, errors);
}
if (errors.length > 0) {
@ -178,6 +178,35 @@ function validateProfileReferences(profile, refs, errors) {
validateCoverageRefs(profile, refs, errors, "traits", refs.traitIds);
validatePlatformCoverageRefs(profile, errors);
validateStateSurfaceCoverageRefs(profile, refs, errors);
validateCalibrationRefs(profile, refs, errors);
}
function validateCalibrationRefs(profile, refs, errors) {
const calibration = profile.calibration;
if (!calibration) {
return;
}
for (const role of Object.keys(calibration.roles ?? {})) {
if (!refs.processRoleIds.has(role)) {
errors.push(`profile '${profile.id}' calibration.roles references unknown process role '${role}'`);
continue;
}
validateThresholdMetrics(calibration.roles[role], refs.metricIds, errors, `profile '${profile.id}' calibration.roles.${role}`);
}
for (const [surfaceId, surfaceCalibration] of Object.entries(calibration.surfaces ?? {})) {
if (!refs.surfaceIds.has(surfaceId)) {
errors.push(`profile '${profile.id}' calibration.surfaces references unknown surface '${surfaceId}'`);
continue;
}
validateThresholdMetrics(surfaceCalibration.thresholds ?? {}, refs.metricIds, errors, `profile '${profile.id}' calibration.surfaces.${surfaceId}.thresholds`);
for (const [role, thresholds] of Object.entries(surfaceCalibration.roleThresholds ?? {})) {
if (!refs.processRoleIds.has(role)) {
errors.push(`profile '${profile.id}' calibration.surfaces.${surfaceId}.roleThresholds references unknown process role '${role}'`);
continue;
}
validateThresholdMetrics(thresholds, refs.metricIds, errors, `profile '${profile.id}' calibration.surfaces.${surfaceId}.roleThresholds.${role}`);
}
}
}
function validatePlatformCoverageRefs(profile, errors) {

View File

@ -92,8 +92,11 @@ export async function runSelfCheck(flags = {}) {
assertEqual(data.coverage?.schemaVersion, "kova.coverage.v1", "coverage schema");
assertArrayNotEmpty(data.coverage?.scenarioSurfaceMap, "scenario surface map");
const releaseCoverage = data.coverage?.profiles?.find((profile) => profile.id === "release");
const releaseProfile = data.profiles?.find((profile) => profile.id === "release");
assertArrayNotEmpty(releaseCoverage?.required?.platforms, "release required platform coverage");
assertArrayNotEmpty(releaseCoverage?.currentPlatformKeys, "current platform coverage keys");
assertEqual((releaseProfile?.calibration?.surfaceCount ?? 0) > 0, true, "release profile calibrated surfaces");
assertEqual((releaseProfile?.calibration?.roleCount ?? 0) > 0, true, "release profile calibrated roles");
if (data.scenarios.some((scenario) => typeof scenario.surface !== "string" || scenario.surface.length === 0)) {
throw new Error("every scenario must expose a surface");
}
@ -167,6 +170,7 @@ export async function runSelfCheck(flags = {}) {
checks.push(await resourceRoleAttributionCheck(tmp));
checks.push(await processSnapshotCheck(tmp));
checks.push(roleThresholdEvaluationCheck());
checks.push(thresholdPolicyCalibrationCheck());
checks.push(stateRegistryValidationCheck());
checks.push(scenarioStateCompatibilityCheck());
checks.push(await cpuProfileParserCheck());
@ -2751,6 +2755,99 @@ function roleThresholdEvaluationCheck() {
}
}
function thresholdPolicyCalibrationCheck() {
try {
const record = {
scenario: "synthetic-threshold-policy",
title: "Synthetic Threshold Policy",
status: "PASS",
phases: [{
id: "sample",
results: [{
command: "ocm start kova-threshold-test",
status: 0,
durationMs: 150,
resourceSamples: {
schemaVersion: "kova.resourceSamples.v1",
sampleCount: 1,
peakTotalRssMb: 250,
maxTotalCpuPercent: 80,
byRole: {
gateway: {
peakRssMb: 250,
maxCpuPercent: 80,
peakRssAtMs: 10,
peakCpuAtMs: 10,
peakProcessCount: 1
}
},
topRolesByRss: [{ role: "gateway", peakRssMb: 250, maxCpuPercent: 80 }],
topRolesByCpu: [{ role: "gateway", peakRssMb: 250, maxCpuPercent: 80 }],
topByRss: [],
topByCpu: []
}
}],
metrics: { logs: zeroLogMetrics() }
}],
finalMetrics: {
service: { gatewayState: "running" },
logs: zeroLogMetrics()
}
};
evaluateRecord(record, {
id: "synthetic-threshold-policy",
thresholds: {}
}, {
profile: {
id: "release",
calibration: {
roles: {
gateway: { peakRssMb: 200 }
},
surfaces: {
"release-runtime-startup": {
thresholds: { coldReadyMs: 100 }
}
}
}
},
surface: {
id: "release-runtime-startup",
thresholds: { coldReadyMs: 1000 },
roleThresholds: {}
}
});
assertEqual(record.status, "FAIL", "profile calibration threshold should fail record");
assertEqual(record.thresholdPolicy?.profileId, "release", "threshold policy profile id");
assertEqual(record.thresholdPolicy?.thresholds?.coldReadyMs, 100, "profile surface threshold override");
assertEqual(record.thresholdPolicy?.roleThresholds?.gateway?.peakRssMb, 200, "profile role threshold");
assertEqual(
record.violations.some((violation) => violation.metric === "coldReadyMs"),
true,
"profile calibrated duration violation"
);
assertEqual(
record.violations.some((violation) => violation.metric === "resourceByRole.gateway.peakRssMb"),
true,
"profile calibrated role violation"
);
return {
id: "threshold-policy-calibration",
status: "PASS",
command: "evaluate synthetic profile threshold calibration",
durationMs: 0
};
} catch (error) {
return {
id: "threshold-policy-calibration",
status: "FAIL",
command: "evaluate synthetic profile threshold calibration",
durationMs: 0,
message: error.message
};
}
}
function stateRegistryValidationCheck() {
try {
let rejectedTrait = false;
@ -2904,6 +3001,47 @@ function stateRegistryValidationCheck() {
}
assertEqual(rejectedMetric, true, "unknown scenario metric rejected");
let rejectedCalibration = false;
try {
validateRegistryReferences({
scenarios: [],
states: [],
profiles: [{
id: "profile",
entries: [],
calibration: {
roles: {
missingRole: { peakRssMb: 100 }
},
surfaces: {
missingSurface: {
thresholds: { peakRssMb: 100 }
},
knownSurface: {
thresholds: { madeUpMetric: 1 },
roleThresholds: {
knownRole: { peakRssMb: 100 }
}
}
}
}
}],
surfaces: [{
id: "knownSurface",
processRoles: [],
requiredStates: [],
targetKinds: []
}],
processRoles: [{ id: "knownRole" }],
metrics: [{ id: "peakRssMb" }]
});
} catch (error) {
rejectedCalibration = /calibration\.roles references unknown process role/.test(error.message) &&
/calibration\.surfaces references unknown surface/.test(error.message) &&
/unknown metric 'madeUpMetric'/.test(error.message);
}
assertEqual(rejectedCalibration, true, "invalid profile calibration rejected");
let rejectedPlatform = false;
try {
validateRegistryReferences({