From ed5a07b3bb3511b6bb7433deb59248cbade45cd2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 29 Apr 2026 17:53:15 +0100 Subject: [PATCH] feat: add ClawSweeper review status sections --- .github/workflows/sweep.yml | 4 +- README.md | 11 +- docs/pr-review-comments.md | 8 + prompts/review-item.md | 9 +- schema/clawsweeper-decision.schema.json | 54 ++++ src/clawsweeper.ts | 312 +++++++++++++++++++++++- test/clawsweeper.test.mjs | 119 +++++++++ 7 files changed, 508 insertions(+), 9 deletions(-) diff --git a/.github/workflows/sweep.yml b/.github/workflows/sweep.yml index cee968a49f..499db5f7bc 100644 --- a/.github/workflows/sweep.yml +++ b/.github/workflows/sweep.yml @@ -640,7 +640,7 @@ jobs: fetch-depth: 0 persist-credentials: false - - name: Create target read token + - name: Create target review token id: target-read-token uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 with: @@ -649,7 +649,7 @@ jobs: owner: ${{ needs.plan.outputs.target_repo_owner }} repositories: ${{ needs.plan.outputs.target_repo_name }} permission-contents: read - permission-issues: read + permission-issues: write permission-pull-requests: read - uses: ./clawsweeper/.github/actions/setup-pnpm diff --git a/README.md b/README.md index 9c13c9770c..49b09bd9ab 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,14 @@ It has two independent lanes: maintainer-facing comment, runtime metadata, and GitHub snapshot hash. - **Durable review comments:** ClawSweeper syncs one marker-backed public review comment per item and edits it in place instead of posting repeated comments. - Pull request comments include hidden verdict markers, and actionable PR - follow-up includes a hidden `clawsweeper-action:fix-required` marker for the - trusted ClawSweeper repair loop. See + When a review starts and no ClawSweeper comment exists yet, it posts a short + crustacean-friendly status placeholder first, then replaces that same comment + with the completed review. Completed comments include a dedicated security + review section for supply-chain, permission, secret-handling, and code + execution concerns. Pull request comments include hidden verdict markers, and + actionable PR follow-up includes a hidden + `clawsweeper-action:fix-required` marker for the trusted ClawSweeper repair + loop. See [`docs/pr-review-comments.md`](docs/pr-review-comments.md). - **Guarded apply:** apply mode re-fetches live GitHub state, checks labels, maintainer authorship, paired issue/PR state, snapshot drift, and repository diff --git a/docs/pr-review-comments.md b/docs/pr-review-comments.md index 7e41ff6804..9fb29a7ed4 100644 --- a/docs/pr-review-comments.md +++ b/docs/pr-review-comments.md @@ -27,6 +27,11 @@ Each synced comment includes the durable identity marker: ClawSweeper edits that comment in place instead of posting repeated comments. Report front matter stores the synced comment id, URL, hash, and sync time. +When review starts and no ClawSweeper-owned comment exists yet, the review +shard posts a short status placeholder with the same durable identity marker. +The placeholder is intentionally light and crustacean-friendly, then the final +review sync edits that exact comment in place. + For a PR that needs work, the visible comment starts with: ```text @@ -40,6 +45,9 @@ report has: merge verdict or maintainer follow-up summary - `Required change before merge:` or `Maintainer follow-up before merge:` from the work-candidate reason or next action +- `Security review:` from the typed `securityReview` field, so supply-chain, + permission, secret-handling, and code-execution concerns have a dedicated + visible pass - `Review findings:` for Codex `/review`-style findings, using typed priority, confidence, file, and line-range data from the report - `Best possible solution:` only when it adds a distinct end-state that is not diff --git a/prompts/review-item.md b/prompts/review-item.md index fd86190b30..b9e6288489 100644 --- a/prompts/review-item.md +++ b/prompts/review-item.md @@ -56,7 +56,7 @@ ownership beyond this PR. If the PR author is only the proposer/reporter, you may mention that in evidence or summary when useful, but do not make them a likely owner. -For PRs, include a dedicated security review pass in addition to the functional review. Inspect whether the diff could introduce a security or supply-chain regression, especially when it touches CI workflows, GitHub Action refs, dependency sources, lockfiles, install/build/release scripts, package publishing metadata, secrets handling, permissions, downloaded artifacts, generated/vendor/minified files, or other code execution paths. Check whether those changes are consistent with the PR title, body, discussion, and stated purpose before deciding. Be cautious when a small or unrelated functional change also introduces new third-party code execution, broadens secret or permission access, changes package resolution, adds lifecycle hooks, downloads and executes artifacts, or mixes infrastructure changes into otherwise cosmetic work. Do not infer malicious intent without concrete evidence, but note unexplained security-sensitive changes in `risks` and `evidence` with the observable risk, relevant file/path, and why it matters. +For PRs, include a dedicated security review pass in addition to the functional review. Inspect whether the diff could introduce a security or supply-chain regression, especially when it touches CI workflows, GitHub Action refs, dependency sources, lockfiles, install/build/release scripts, package publishing metadata, secrets handling, permissions, downloaded artifacts, generated/vendor/minified files, or other code execution paths. Check whether those changes are consistent with the PR title, body, discussion, and stated purpose before deciding. Be cautious when a small or unrelated functional change also introduces new third-party code execution, broadens secret or permission access, changes package resolution, adds lifecycle hooks, downloads and executes artifacts, or mixes infrastructure changes into otherwise cosmetic work. Do not infer malicious intent without concrete evidence. Always summarize this pass in `securityReview`; set `status: "cleared"` when the diff has no concrete security or supply-chain concern, `status: "needs_attention"` when there is a concrete concern, and `status: "not_applicable"` for non-PR items without a security-sensitive report. Put concrete security concerns in `securityReview.concerns` with file/line when possible, and also include blocking concerns in `risks` and `evidence` when they affect the merge/close decision. For PRs, also emit Codex `/review`-style findings in `reviewFindings`. Review the diff as another engineer's proposed patch and list every discrete, @@ -231,6 +231,13 @@ For PRs, use these fields for the concise reviewer feedback that should appear near the top of the public ClawSweeper comment; the rest of the evidence can stay in the collapsed details. +Always fill `securityReview`. This is the dedicated public security section, +separate from functional findings. Use `cleared` with a concise summary when no +security or supply-chain issue was found, `needs_attention` with one or more +typed concerns when the patch or discussion raises a concrete security issue, +and `not_applicable` for ordinary non-PR issue triage where no patch security +review applies. + Always fill the work-lane fields too. For non-candidates, use `workCandidate: "none"`, low confidence/priority, an empty `workPrompt`, and empty arrays. For manual-review items, use `workCandidate: "manual_review"` and diff --git a/schema/clawsweeper-decision.schema.json b/schema/clawsweeper-decision.schema.json index c9c69f6250..4ae5bec558 100644 --- a/schema/clawsweeper-decision.schema.json +++ b/schema/clawsweeper-decision.schema.json @@ -13,6 +13,7 @@ "risks", "bestSolution", "reviewFindings", + "securityReview", "overallCorrectness", "overallConfidenceScore", "fixedRelease", @@ -185,6 +186,59 @@ } } }, + "securityReview": { + "type": "object", + "additionalProperties": false, + "description": "Dedicated security and supply-chain review for the item. For PRs, summarize whether the diff introduces security-sensitive concerns. For issues, use not_applicable unless the issue itself is security-sensitive.", + "required": ["status", "summary", "concerns"], + "properties": { + "status": { + "type": "string", + "enum": ["cleared", "needs_attention", "not_applicable"] + }, + "summary": { + "type": "string", + "description": "One concise sentence explaining the security review result." + }, + "concerns": { + "type": "array", + "description": "Concrete security or supply-chain concerns found during the dedicated pass. Empty when cleared or not applicable.", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["title", "body", "severity", "confidenceScore", "file", "line"], + "properties": { + "title": { + "type": "string", + "description": "Short concern title, at most 80 characters." + }, + "body": { + "type": "string", + "description": "Brief explanation of the observable security risk and why it matters." + }, + "severity": { + "type": "string", + "enum": ["high", "medium", "low"] + }, + "confidenceScore": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "file": { + "type": ["string", "null"], + "description": "Repository-relative file path when tied to a file; null for issue-wide or discussion-only concerns." + }, + "line": { + "type": ["integer", "null"], + "minimum": 1, + "description": "Relevant line when tied to a specific file; null when not applicable." + } + } + } + } + } + }, "overallCorrectness": { "type": "string", "enum": ["patch is correct", "patch is incorrect", "not a patch"], diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index 357a2d84e7..3357d0be91 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -64,6 +64,8 @@ type ApplyKind = ItemKind | "all"; type DecisionKind = "close" | "keep_open"; type WorkCandidateKind = "none" | "manual_review" | "queue_fix_pr"; type OverallCorrectness = "patch is correct" | "patch is incorrect" | "not a patch"; +type SecurityReviewStatus = "cleared" | "needs_attention" | "not_applicable"; +type SecurityConcernSeverity = "high" | "medium" | "low"; type CloseReason = | "implemented_on_main" | "cannot_reproduce" @@ -125,6 +127,16 @@ interface Item { activeLockReason?: string | null; } +export interface ReviewStartStatusCommentOptions { + number: number; + kind: string; + title: string; + position?: number; + total?: number; + shardIndex?: number; + shardCount?: number; +} + interface ExistingReview { path: string; markdown: string; @@ -176,6 +188,21 @@ interface ReviewFinding { lineEnd: number; } +interface SecurityConcern { + title: string; + body: string; + severity: SecurityConcernSeverity; + confidenceScore: number; + file: string | null; + line: number | null; +} + +interface SecurityReview { + status: SecurityReviewStatus; + summary: string; + concerns: SecurityConcern[]; +} + interface Decision { decision: DecisionKind; closeReason: CloseReason; @@ -187,6 +214,7 @@ interface Decision { risks: string[]; bestSolution: string; reviewFindings: ReviewFinding[]; + securityReview: SecurityReview; overallCorrectness: OverallCorrectness; overallConfidenceScore: number; fixedRelease?: string | null; @@ -498,6 +526,7 @@ const DEFAULT_REASONING_EFFORT = "high"; const DEFAULT_SERVICE_TIER = "fast"; const REVIEW_POLICY_VERSION = "2026-04-29-policy-v12"; const REVIEW_COMMENT_MARKER_PREFIX = "`; +} + +export function renderReviewStartStatusComment(options: ReviewStartStatusCommentOptions): string { + const subject = options.kind === "pull_request" ? "pull request" : "issue"; + const progress = + Number.isInteger(options.position) && Number.isInteger(options.total) + ? ` This is item ${options.position}/${options.total} in the current shard.` + : ""; + const shard = + Number.isInteger(options.shardIndex) && Number.isInteger(options.shardCount) + ? ` Shard ${options.shardIndex}/${options.shardCount}.` + : ""; + const title = options.title.trim(); + const heading = title + ? `I am starting a fresh review of this ${subject}: ${title}` + : `I am starting a fresh review of this ${subject}.`; + return markedReviewCommentBody( + options.number, + [ + "ClawSweeper status: review started.", + "", + `${heading}${progress}${shard}`, + "", + "This placeholder means the worker is alive and reading the current context. I will edit this same comment with the actual review when the claws are done clicking.", + "", + "Crustacean status: shell secured, claws on keyboard, evidence pebbles being sorted.", + "", + reviewStartStatusCommentMarker(options.number), + ].join("\n"), + ); +} + export function isCodexReviewCommentBody(body: string): boolean { return ( body.includes("Codex review:") || @@ -3709,9 +3923,12 @@ function commentBodyMatches(comment: Record | undefined, body: } const PATCHABLE_REVIEW_COMMENT_AUTHORS = new Set( - ["openclaw-clawsweeper[bot]", process.env.CLAWSWEEPER_COMMENT_AUTHOR_LOGIN].filter( - (login): login is string => typeof login === "string" && login.length > 0, - ), + [ + "clawsweeper", + "clawsweeper[bot]", + "openclaw-clawsweeper[bot]", + process.env.CLAWSWEEPER_COMMENT_AUTHOR_LOGIN, + ].filter((login): login is string => typeof login === "string" && login.length > 0), ); function commentAuthorLogin(comment: Record | undefined): string | undefined { @@ -3802,6 +4019,35 @@ function upsertReviewComment( return issueReviewComment(number, [markedBody]); } +function postReviewStartStatusComment(options: { + item: Item; + position: number; + total: number; + shardIndex: number; + shardCount: number; +}): "posted" | "existing" { + if (issueReviewComment(options.item.number)) return "existing"; + const body = renderReviewStartStatusComment({ + number: options.item.number, + kind: options.item.kind, + title: options.item.title, + position: options.position, + total: options.total, + shardIndex: options.shardIndex, + shardCount: options.shardCount, + }); + const payload = writeCommentPayload(options.item.number, body); + ghWithRetry([ + "api", + `repos/${targetRepo()}/issues/${options.item.number}/comments`, + "--method", + "POST", + "--input", + payload, + ]); + return "posted"; +} + function closeItem(options: { number: number; kind: ItemKind; reason: CloseReason }): void { if (options.kind === "pull_request") { ghWithRetry(["pr", "close", String(options.number)]); @@ -3904,6 +4150,43 @@ function renderReviewFindingsReportSection(decision: Decision): string { return lines.join("\n"); } +function securityConcernLocation(concern: SecurityConcern): string { + if (!concern.file) return "not tied to a single file"; + return `${concern.file}${concern.line ? `:${concern.line}` : ""}`; +} + +function renderSecurityReviewReportSection(decision: Decision): string { + const lines = [ + `Status: ${decision.securityReview.status}`, + "", + `Summary: ${sentence(decision.securityReview.summary)}`, + "", + "Concerns:", + "", + ]; + if (!decision.securityReview.concerns.length) { + lines.push("- none"); + return lines.join("\n"); + } + lines.push( + decision.securityReview.concerns + .map((concern) => { + const location = securityConcernLocation(concern); + const heading = + location === "not tied to a single file" + ? `- **[${concern.severity}] ${concern.title}:**` + : `- **[${concern.severity}] ${concern.title}:** \`${location}\``; + return [ + heading, + ` - body: ${sentence(concern.body)}`, + ` - confidence: ${confidenceText(concern.confidenceScore)}`, + ].join("\n"); + }) + .join("\n"), + ); + return lines.join("\n"); +} + function markdownFor(options: { item: Item; context: ItemContext; @@ -3950,6 +4233,7 @@ function markdownFor(options: { : "- none"; const bestSolution = options.decision.bestSolution.trim() || "_Not provided._"; const reviewFindings = renderReviewFindingsReportSection(options.decision); + const securityReview = renderSecurityReviewReportSection(options.decision); const workCandidateSection = renderWorkCandidateReportSection(options.decision); const repairWorkPromptSection = renderRepairWorkPromptReportSection(options.decision); return `--- @@ -4052,6 +4336,10 @@ ${bestSolution} ${reviewFindings} +## ${REVIEW_SECTIONS.securityReview} + +${securityReview} + ## ${REVIEW_SECTIONS.workCandidate} ${workCandidateSection}${repairWorkPromptSection} @@ -4175,6 +4463,24 @@ function reviewCommand(args: Args): void { ); const context = collectItemContext(item); const snapshotHash = itemSnapshotHash(item, context); + try { + const startComment = postReviewStartStatusComment({ + item, + position: completed + 1, + total: candidates.length, + shardIndex, + shardCount, + }); + console.error( + `[review] ${new Date().toISOString()} shard=${shardIndex}/${shardCount} start-comment=${startComment} #${item.number}`, + ); + } catch (error) { + console.error( + `[review] ${new Date().toISOString()} shard=${shardIndex}/${shardCount} start-comment=failed #${item.number}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } let decision: Decision; try { decision = runCodex({ diff --git a/test/clawsweeper.test.mjs b/test/clawsweeper.test.mjs index 4d1692d124..bf0eb08a4c 100644 --- a/test/clawsweeper.test.mjs +++ b/test/clawsweeper.test.mjs @@ -28,6 +28,7 @@ import { parseDecision, protectedLabels, relatedTitleSearchTerms, + renderReviewStartStatusComment, reviewArtifactDestination, reviewAutomationMarkersFromReport, reviewActionForDecision, @@ -123,6 +124,11 @@ function closeDecision(overrides = {}) { risks: [], bestSolution: "Keep the implementation as-is.", reviewFindings: [], + securityReview: { + status: "not_applicable", + summary: "No patch security review is needed for this issue cleanup decision.", + concerns: [], + }, overallCorrectness: "not a patch", overallConfidenceScore: 0.75, fixedRelease: null, @@ -843,11 +849,31 @@ test("comment matcher recognizes old and new Codex review comments", () => { }); test("review comment patching only targets ClawSweeper-owned comments", () => { + assert.equal(canPatchReviewComment({ user: { login: "clawsweeper" } }), true); + assert.equal(canPatchReviewComment({ user: { login: "clawsweeper[bot]" } }), true); assert.equal(canPatchReviewComment({ user: { login: "openclaw-clawsweeper[bot]" } }), true); assert.equal(canPatchReviewComment({ user: { login: "steipete" } }), false); assert.equal(canPatchReviewComment(undefined), false); }); +test("review start status comment is marker-backed and crustacean-friendly", () => { + const comment = renderReviewStartStatusComment({ + number: 74453, + kind: "pull_request", + title: "fix webhook limiter", + position: 1, + total: 3, + shardIndex: 0, + shardCount: 2, + }); + + assert.match(comment, /ClawSweeper status: review started\./); + assert.match(comment, /claws on keyboard/); + assert.match(comment, //); + assert.match(comment, //); + assert.doesNotMatch(comment, /Codex review:/); +}); + test("pull request keep-open review comments label the change summary", () => { const comment = renderReviewCommentFromReport( `${reportFrontMatter({ @@ -901,6 +927,86 @@ Reason: Maintainers should review the tests after the targeted lane is green. assert.match(comment, /