fix: tell contributors how to refresh proof reviews

This commit is contained in:
pashpashpash 2026-05-05 16:05:09 -07:00
parent 90dd661e0d
commit 37cbd19c19
4 changed files with 33 additions and 15 deletions

View File

@ -9,7 +9,7 @@ checkpoint, and status-only commits are intentionally omitted.
### Added
- Added agent-led real behavior proof judgement so ClawSweeper can inspect linked screenshots, videos, logs, and terminal output with a read-only GitHub token, explain the proof verdict in the review comment, and sync `proof: sufficient` when the evidence is convincing.
- Added agent-led real behavior proof judgement so ClawSweeper can inspect linked screenshots, videos, logs, and terminal output with a read-only GitHub token, explain the proof verdict in the review comment, tell contributors how to trigger a fresh review after adding proof, and sync `proof: sufficient` when the evidence is convincing.
- Added a real behavior proof assessment to PR reviews so missing, mock-only, or insufficient contributor proof blocks pass/automerge markers and asks for screenshots, terminal output, redacted logs, recordings, linked artifacts, or copied live output instead.
- Added `config/automation-limits.json` plus docs and a drift check so review,
commit-review, repair, and issue-implementation capacity defaults have one

View File

@ -325,8 +325,11 @@ nice-to-have. Missing, mock-only, or insufficient proof should appear near the
top of the public review as "needs real behavior proof before merge"; tell the
contributor that terminal screenshots, console output, copied live output,
linked artifacts, recordings, and redacted logs count. If the proof links to
public or GitHub-hosted media, inspect it when possible before deciding. Use
`evidenceKind: "none"` when proof is absent or mock-only, and set
public or GitHub-hosted media, inspect it when possible before deciding. Also
tell contributors that after they add proof, updating the PR body should trigger
a fresh ClawSweeper review automatically; if it does not, they can ask a
maintainer to comment `@clawsweeper re-review`. Use `evidenceKind: "none"` when
proof is absent or mock-only, and set
`needsContributorAction: false` only for `sufficient`, `override`, or
`not_applicable`.

View File

@ -4166,6 +4166,18 @@ function publicSecurityReviewLine(review: SecurityReview): string {
return `${prefix}: ${sentence(review.summary)}`;
}
function realBehaviorProofReReviewGuidance(): string {
return "After adding proof, update the PR body; ClawSweeper should re-review automatically. If it does not, ask a maintainer to comment `@clawsweeper re-review`.";
}
function realBehaviorProofBlockerSummary(summary: string, fallback: string): string {
const body = sentence(summary) || fallback;
if (/\b(?:@clawsweeper re-review|re-review automatically|update the PR body)\b/i.test(body)) {
return body;
}
return `${body} ${realBehaviorProofReReviewGuidance()}`;
}
function publicRealBehaviorProofLine(proof: RealBehaviorProof): string {
const summary = sentence(proof.summary);
switch (proof.status) {
@ -4174,20 +4186,20 @@ function publicRealBehaviorProofLine(proof: RealBehaviorProof): string {
case "override":
return `Override: ${summary || "A maintainer applied proof: override."}`;
case "missing":
return `Needs real behavior proof before merge: ${
summary ||
"the PR must include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count."
}`;
return `Needs real behavior proof before merge: ${realBehaviorProofBlockerSummary(
summary,
"The PR must include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count.",
)}`;
case "mock_only":
return `Needs real behavior proof before merge: ${
summary ||
"tests, mocks, snapshots, lint, typechecks, and CI are supplemental only. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count."
}`;
return `Needs real behavior proof before merge: ${realBehaviorProofBlockerSummary(
summary,
"Tests, mocks, snapshots, lint, typechecks, and CI are supplemental only. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count.",
)}`;
case "insufficient":
return `Needs stronger real behavior proof before merge: ${
summary ||
"include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count."
}`;
return `Needs stronger real behavior proof before merge: ${realBehaviorProofBlockerSummary(
summary,
"Include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count.",
)}`;
case "not_applicable":
return summary ? `Not applicable: ${summary}` : "";
}

View File

@ -1876,6 +1876,8 @@ Full review comments:
assert.match(comment, /Codex review: needs real behavior proof before merge\./);
assert.match(comment, /\*\*Real behavior proof\*\*/);
assert.match(comment, /terminal screenshots, console output, copied live output/);
assert.match(comment, /update the PR body; ClawSweeper should re-review automatically/);
assert.match(comment, /@clawsweeper re-review/);
assert.match(markers, /clawsweeper-verdict:needs-human/);
assert.doesNotMatch(markers, /clawsweeper-verdict:pass/);
assert.doesNotMatch(markers, /clawsweeper-action:fix-required/);
@ -2499,6 +2501,7 @@ test("review prompt requires real behavior proof for PR reviews", () => {
assert.match(prompt, /generate stills or contact sheets from videos/);
assert.match(prompt, /compare the proof against the PR diff/);
assert.match(prompt, /scratch directory/);
assert.match(prompt, /@clawsweeper re-review/);
assert.match(
prompt,
/Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental only/,