feat: mark live provider evidence
This commit is contained in:
parent
d970889133
commit
eef4038545
@ -215,6 +215,8 @@ export function authDisplay(policy) {
|
||||
source: policy.source,
|
||||
externalCli: policy.externalCli ?? null,
|
||||
setup: policy.setup === true,
|
||||
deterministic: policy.mode === "mock",
|
||||
environmentDependent: policy.mode === "live",
|
||||
envVars: policy.envVars ?? [],
|
||||
mockProvider: policy.mockProvider ? mockProviderDisplay(policy.mockProvider) : null,
|
||||
secretValues: "redacted"
|
||||
@ -233,7 +235,8 @@ export function authReportSummary(authContext) {
|
||||
externalCli: authContext.live.externalCli ?? null,
|
||||
verification: authContext.live.verification ?? null,
|
||||
envVars: authContext.live.envVars,
|
||||
reason: authContext.live.reason
|
||||
reason: authContext.live.reason,
|
||||
environmentDependent: authContext.requestedMode === "live"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@ -7,13 +7,20 @@ export const PROVIDER_EVIDENCE_SCHEMA = "kova.providerEvidence.v1";
|
||||
export async function collectProviderEvidence(artifactDir, options = {}) {
|
||||
const startedAt = Date.now();
|
||||
const requestLogPath = options.requestLogPath ?? join(artifactDir, "mock-openai", "requests.jsonl");
|
||||
const timelinePath = options.timelinePath ?? join(artifactDir, "openclaw", "timeline.jsonl");
|
||||
const authMode = options.authPolicy?.mode ?? options.authMode ?? null;
|
||||
const dirs = collectorArtifactDirs(artifactDir);
|
||||
const summaryPath = join(dirs.provider, "provider-evidence.json");
|
||||
const evidence = {
|
||||
schemaVersion: PROVIDER_EVIDENCE_SCHEMA,
|
||||
collectedAt: new Date().toISOString(),
|
||||
available: false,
|
||||
source: null,
|
||||
authMode,
|
||||
deterministic: authMode === "mock",
|
||||
environmentDependent: authMode === "live",
|
||||
requestLogPath,
|
||||
timelinePath,
|
||||
summaryPath,
|
||||
requestCount: 0,
|
||||
firstRequestStartAt: null,
|
||||
@ -38,14 +45,13 @@ export async function collectProviderEvidence(artifactDir, options = {}) {
|
||||
const text = await readFile(requestLogPath, "utf8");
|
||||
const parsed = parseProviderRequestLog(text);
|
||||
Object.assign(evidence, parsed, {
|
||||
source: "mock-provider-log",
|
||||
available: parsed.requestCount > 0,
|
||||
artifacts: [requestLogPath, summaryPath]
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.code === "ENOENT") {
|
||||
evidence.error = "provider request log not found";
|
||||
evidence.statusLabel = "INFO";
|
||||
evidence.artifacts = [];
|
||||
await applyTimelineProviderEvidence(evidence, timelinePath, summaryPath);
|
||||
} else {
|
||||
evidence.error = error.message;
|
||||
evidence.commandStatus = 1;
|
||||
@ -62,6 +68,32 @@ export async function collectProviderEvidence(artifactDir, options = {}) {
|
||||
return evidence;
|
||||
}
|
||||
|
||||
async function applyTimelineProviderEvidence(evidence, timelinePath, summaryPath) {
|
||||
try {
|
||||
const text = await readFile(timelinePath, "utf8");
|
||||
const parsed = parseTimelineProviderRequestLog(text);
|
||||
Object.assign(evidence, parsed, {
|
||||
source: "openclaw-timeline",
|
||||
available: parsed.requestCount > 0,
|
||||
artifacts: parsed.requestCount > 0 ? [timelinePath, summaryPath] : []
|
||||
});
|
||||
if (parsed.requestCount === 0) {
|
||||
evidence.error = "provider request log not found and OpenClaw timeline contained no provider.request events";
|
||||
evidence.statusLabel = "INFO";
|
||||
}
|
||||
} catch (timelineError) {
|
||||
if (timelineError.code === "ENOENT") {
|
||||
evidence.error = "provider request log not found";
|
||||
evidence.statusLabel = "INFO";
|
||||
evidence.artifacts = [];
|
||||
return;
|
||||
}
|
||||
evidence.error = timelineError.message;
|
||||
evidence.commandStatus = 1;
|
||||
evidence.statusLabel = "WARN";
|
||||
}
|
||||
}
|
||||
|
||||
export function parseProviderRequestLog(text) {
|
||||
const requests = [];
|
||||
const parseErrors = [];
|
||||
@ -119,6 +151,40 @@ export function parseProviderRequestLog(text) {
|
||||
};
|
||||
}
|
||||
|
||||
export function parseTimelineProviderRequestLog(text) {
|
||||
const requests = [];
|
||||
const parseErrors = [];
|
||||
for (const [index, rawLine] of String(text ?? "").split(/\r?\n/).entries()) {
|
||||
const line = rawLine.trim();
|
||||
if (!line) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const event = JSON.parse(line);
|
||||
if (!event || typeof event !== "object" || Array.isArray(event)) {
|
||||
parseErrors.push({ kind: "parse", line: index + 1, error: "event is not an object" });
|
||||
continue;
|
||||
}
|
||||
if (event.type !== "provider.request" && event.name !== "provider.request") {
|
||||
continue;
|
||||
}
|
||||
requests.push(normalizeTimelineProviderRequest(event, index + 1));
|
||||
} catch (error) {
|
||||
parseErrors.push({
|
||||
kind: "parse",
|
||||
line: index + 1,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
const summary = summarizeProviderRequests(requests);
|
||||
return {
|
||||
...summary,
|
||||
errors: [...parseErrors, ...summary.errors],
|
||||
requests
|
||||
};
|
||||
}
|
||||
|
||||
export function computeProviderTurnAttribution(result, providerEvidence) {
|
||||
if (!result) {
|
||||
return null;
|
||||
@ -218,6 +284,84 @@ function requestsWithinCommand(requests, commandStartedAt, commandFinishedAt) {
|
||||
.toSorted((left, right) => left.receivedAtEpochMs - right.receivedAtEpochMs);
|
||||
}
|
||||
|
||||
function normalizeTimelineProviderRequest(event, line) {
|
||||
const receivedAtEpochMs = numberOrParsedTime(event.receivedAtEpochMs, event.receivedAt ?? event.timestamp ?? event.time);
|
||||
const durationMs = numberOrNull(event.durationMs ?? event.elapsedMs ?? event.ms);
|
||||
const respondedAtEpochMs = numberOrParsedTime(event.respondedAtEpochMs, event.respondedAt) ??
|
||||
(typeof receivedAtEpochMs === "number" && typeof durationMs === "number" ? receivedAtEpochMs + durationMs : null);
|
||||
const provider = event.provider ?? event.attributes?.provider ?? null;
|
||||
const operation = event.operation ?? event.attributes?.operation ?? null;
|
||||
const route = event.route ?? event.path ?? operation ?? "provider.request";
|
||||
const ok = typeof event.ok === "boolean" ? event.ok : event.status === undefined || Number(event.status) < 400;
|
||||
return {
|
||||
schemaVersion: "kova.provider.request.fromTimeline.v1",
|
||||
line,
|
||||
requestId: event.requestId ?? event.spanId ?? `timeline-provider-${line}`,
|
||||
receivedAt: event.receivedAt ?? event.timestamp ?? isoOrNull(receivedAtEpochMs),
|
||||
receivedAtEpochMs,
|
||||
respondedAt: event.respondedAt ?? isoOrNull(respondedAtEpochMs),
|
||||
respondedAtEpochMs,
|
||||
durationMs: durationMs ?? durationBetween(receivedAtEpochMs, respondedAtEpochMs),
|
||||
firstByteAt: null,
|
||||
firstByteAtEpochMs: null,
|
||||
firstByteLatencyMs: null,
|
||||
firstChunkAt: null,
|
||||
firstChunkAtEpochMs: null,
|
||||
firstChunkLatencyMs: null,
|
||||
method: event.method ?? null,
|
||||
mode: event.mode ?? null,
|
||||
behavior: event.behavior ?? event.mode ?? null,
|
||||
outcome: event.outcome ?? (ok ? "completed" : "error"),
|
||||
errorClass: event.errorClass ?? (ok ? null : "provider-error"),
|
||||
providerCallIndex: numberOrNull(event.providerCallIndex),
|
||||
route,
|
||||
path: event.path ?? route,
|
||||
operation,
|
||||
provider,
|
||||
model: event.model ?? event.modelId ?? event.attributes?.model ?? null,
|
||||
stream: typeof event.stream === "boolean" ? event.stream : null,
|
||||
status: numberOrNull(event.status),
|
||||
statusClass: typeof event.status === "number" ? `${Math.floor(event.status / 100)}xx` : null,
|
||||
bodyBytes: null,
|
||||
parseError: null
|
||||
};
|
||||
}
|
||||
|
||||
function summarizeProviderRequests(requests) {
|
||||
const sorted = requests
|
||||
.filter((request) => typeof request.receivedAtEpochMs === "number")
|
||||
.toSorted((left, right) => left.receivedAtEpochMs - right.receivedAtEpochMs);
|
||||
const first = sorted[0] ?? null;
|
||||
const last = sorted
|
||||
.filter((request) => typeof request.respondedAtEpochMs === "number")
|
||||
.toSorted((left, right) => left.respondedAtEpochMs - right.respondedAtEpochMs)
|
||||
.at(-1) ?? null;
|
||||
const firstByte = sorted
|
||||
.filter((request) => typeof request.firstByteLatencyMs === "number")
|
||||
.toSorted((left, right) => left.firstByteLatencyMs - right.firstByteLatencyMs)[0] ?? null;
|
||||
const firstChunk = sorted
|
||||
.filter((request) => typeof request.firstChunkLatencyMs === "number")
|
||||
.toSorted((left, right) => left.firstChunkLatencyMs - right.firstChunkLatencyMs)[0] ?? null;
|
||||
|
||||
return {
|
||||
requestCount: requests.length,
|
||||
firstRequestStartAt: first?.receivedAt ?? null,
|
||||
firstRequestStartEpochMs: first?.receivedAtEpochMs ?? null,
|
||||
lastResponseEndAt: last?.respondedAt ?? null,
|
||||
lastResponseEndEpochMs: last?.respondedAtEpochMs ?? null,
|
||||
providerDurationMs: first && last ? Math.max(0, last.respondedAtEpochMs - first.receivedAtEpochMs) : null,
|
||||
firstByteLatencyMs: firstByte?.firstByteLatencyMs ?? null,
|
||||
firstChunkLatencyMs: firstChunk?.firstChunkLatencyMs ?? null,
|
||||
routes: summarizeBy(requests, "route"),
|
||||
models: summarizeBy(requests, "model"),
|
||||
modes: summarizeBy(requests, "mode"),
|
||||
outcomes: summarizeBy(requests, "outcome"),
|
||||
errorClasses: summarizeBy(requests, "errorClass"),
|
||||
statuses: summarizeBy(requests, "status"),
|
||||
errors: requestErrors(requests)
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeProviderRequest(raw, line) {
|
||||
const receivedAtEpochMs = numberOrParsedTime(raw.receivedAtEpochMs, raw.receivedAt);
|
||||
const respondedAtEpochMs = numberOrParsedTime(raw.respondedAtEpochMs, raw.respondedAt);
|
||||
|
||||
@ -74,6 +74,9 @@ export function renderMarkdownReport(report) {
|
||||
}
|
||||
if (record.auth) {
|
||||
lines.push(`- Auth: ${record.auth.mode} (${record.auth.source}; provider ${record.auth.providerId ?? "none"})`);
|
||||
if (record.auth.environmentDependent) {
|
||||
lines.push("- Live provider lane: environment-dependent; compare separately from deterministic mock baselines.");
|
||||
}
|
||||
if (record.auth.mockProvider) {
|
||||
lines.push(`- Mock provider mode: ${record.auth.mockProvider.mode}`);
|
||||
}
|
||||
@ -143,6 +146,8 @@ export function renderMarkdownReport(report) {
|
||||
lines.push(`- Provider evidence: ${record.measurements.agentProviderRequestCount ?? 0} request(s); provider work ${record.measurements.agentProviderFinalMs ?? "unknown"} ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"} ms; post-provider ${record.measurements.agentPostProviderMs ?? "unknown"} ms`);
|
||||
} else if (record.providerEvidence?.available) {
|
||||
lines.push(`- Provider evidence: ${record.providerEvidence.requestCount ?? 0} request(s); provider duration ${record.providerEvidence.providerDurationMs ?? "unknown"} ms`);
|
||||
} else if (record.auth?.mode === "live") {
|
||||
lines.push(`- Provider evidence: unavailable for live lane (${record.providerEvidence?.error ?? "no provider events captured"})`);
|
||||
}
|
||||
if (record.measurements.agentLatencyDiagnosis) {
|
||||
lines.push(`- Agent latency diagnosis: ${record.measurements.agentLatencyDiagnosis.summary}`);
|
||||
|
||||
@ -179,7 +179,7 @@ export async function executeScenario(scenario, context) {
|
||||
} finally {
|
||||
record.finishedAt = new Date().toISOString();
|
||||
record.finalMetrics = await collectEnvMetrics(envName, metricOptions(context, scenario, null, artifactDir));
|
||||
record.providerEvidence = await collectProviderEvidence(artifactDir);
|
||||
record.providerEvidence = await collectProviderEvidence(artifactDir, { authPolicy });
|
||||
evaluateRecord(record, scenario, evaluatorContext(context, scenario));
|
||||
|
||||
if (shouldCaptureFailureDiagnostics(record, context)) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user