diff --git a/CHANGELOG.md b/CHANGELOG.md index 99c7c75b67..19a2762e10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ checkpoint, and status-only commits are intentionally omitted. ### Added -- Added agent-led real behavior proof judgement so ClawSweeper can inspect linked screenshots, videos, logs, and terminal output with a read-only GitHub token, explain the proof verdict in the review comment, and sync `proof: sufficient` when the evidence is convincing. +- Added agent-led real behavior proof judgement so ClawSweeper can inspect linked screenshots, videos, logs, and terminal output with a read-only GitHub token, explain the proof verdict in the review comment, tell contributors how to trigger a fresh review after adding proof, and sync `proof: sufficient` when the evidence is convincing. - Added a real behavior proof assessment to PR reviews so missing, mock-only, or insufficient contributor proof blocks pass/automerge markers and asks for screenshots, terminal output, redacted logs, recordings, linked artifacts, or copied live output instead. - Added `config/automation-limits.json` plus docs and a drift check so review, commit-review, repair, and issue-implementation capacity defaults have one diff --git a/prompts/review-item.md b/prompts/review-item.md index 88675403e2..36913d945f 100644 --- a/prompts/review-item.md +++ b/prompts/review-item.md @@ -325,8 +325,11 @@ nice-to-have. Missing, mock-only, or insufficient proof should appear near the top of the public review as "needs real behavior proof before merge"; tell the contributor that terminal screenshots, console output, copied live output, linked artifacts, recordings, and redacted logs count. If the proof links to -public or GitHub-hosted media, inspect it when possible before deciding. Use -`evidenceKind: "none"` when proof is absent or mock-only, and set +public or GitHub-hosted media, inspect it when possible before deciding. Also +tell contributors that after they add proof, updating the PR body should trigger +a fresh ClawSweeper review automatically; if it does not, they can ask a +maintainer to comment `@clawsweeper re-review`. Use `evidenceKind: "none"` when +proof is absent or mock-only, and set `needsContributorAction: false` only for `sufficient`, `override`, or `not_applicable`. diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index dafa74c1a0..fc1f856122 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -4166,6 +4166,18 @@ function publicSecurityReviewLine(review: SecurityReview): string { return `${prefix}: ${sentence(review.summary)}`; } +function realBehaviorProofReReviewGuidance(): string { + return "After adding proof, update the PR body; ClawSweeper should re-review automatically. If it does not, ask a maintainer to comment `@clawsweeper re-review`."; +} + +function realBehaviorProofBlockerSummary(summary: string, fallback: string): string { + const body = sentence(summary) || fallback; + if (/\b(?:@clawsweeper re-review|re-review automatically|update the PR body)\b/i.test(body)) { + return body; + } + return `${body} ${realBehaviorProofReReviewGuidance()}`; +} + function publicRealBehaviorProofLine(proof: RealBehaviorProof): string { const summary = sentence(proof.summary); switch (proof.status) { @@ -4174,20 +4186,20 @@ function publicRealBehaviorProofLine(proof: RealBehaviorProof): string { case "override": return `Override: ${summary || "A maintainer applied proof: override."}`; case "missing": - return `Needs real behavior proof before merge: ${ - summary || - "the PR must include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count." - }`; + return `Needs real behavior proof before merge: ${realBehaviorProofBlockerSummary( + summary, + "The PR must include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count.", + )}`; case "mock_only": - return `Needs real behavior proof before merge: ${ - summary || - "tests, mocks, snapshots, lint, typechecks, and CI are supplemental only. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count." - }`; + return `Needs real behavior proof before merge: ${realBehaviorProofBlockerSummary( + summary, + "Tests, mocks, snapshots, lint, typechecks, and CI are supplemental only. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count.", + )}`; case "insufficient": - return `Needs stronger real behavior proof before merge: ${ - summary || - "include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count." - }`; + return `Needs stronger real behavior proof before merge: ${realBehaviorProofBlockerSummary( + summary, + "Include after-fix evidence from a real setup. Terminal screenshots, console output, copied live output, linked artifacts, and redacted logs count.", + )}`; case "not_applicable": return summary ? `Not applicable: ${summary}` : ""; } diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index 1411a7bf20..4a2ec88152 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -1876,6 +1876,8 @@ Full review comments: assert.match(comment, /Codex review: needs real behavior proof before merge\./); assert.match(comment, /\*\*Real behavior proof\*\*/); assert.match(comment, /terminal screenshots, console output, copied live output/); + assert.match(comment, /update the PR body; ClawSweeper should re-review automatically/); + assert.match(comment, /@clawsweeper re-review/); assert.match(markers, /clawsweeper-verdict:needs-human/); assert.doesNotMatch(markers, /clawsweeper-verdict:pass/); assert.doesNotMatch(markers, /clawsweeper-action:fix-required/); @@ -2499,6 +2501,7 @@ test("review prompt requires real behavior proof for PR reviews", () => { assert.match(prompt, /generate stills or contact sheets from videos/); assert.match(prompt, /compare the proof against the PR diff/); assert.match(prompt, /scratch directory/); + assert.match(prompt, /@clawsweeper re-review/); assert.match( prompt, /Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental only/,