fix: recover issue implementation job races

This commit is contained in:
Peter Steinberger 2026-05-03 15:44:14 +01:00
parent b0b53c1219
commit 0dc8a96fbd
No known key found for this signature in database
5 changed files with 198 additions and 160 deletions

View File

@ -100,86 +100,7 @@ jobs:
id: check_job
env:
JOB_PATH: ${{ inputs.job }}
run: |
restore_automerge_job() {
case "$JOB_PATH" in
jobs/*/inbox/automerge-*.md) ;;
*) return 1 ;;
esac
filename="${JOB_PATH##*/}"
stem="${filename%.md}"
rest="${stem#automerge-}"
number="${rest##*-}"
repo_slug="${rest%-${number}}"
owner="${JOB_PATH#jobs/}"
owner="${owner%%/*}"
repo_name="${repo_slug#${owner}-}"
if [ -z "$number" ] || [ "$number" = "$rest" ] || [ -z "$repo_name" ] || [ "$repo_name" = "$repo_slug" ]; then
return 1
fi
repo="$owner/$repo_name"
ref="#$number"
branch="clawsweeper/automerge-$repo_slug-$number"
mkdir -p "$(dirname "$JOB_PATH")"
cat > "$JOB_PATH" <<EOF
---
repo: $repo
cluster_id: automerge-$repo_slug-$number
mode: autonomous
allowed_actions:
- comment
- label
- fix
- raise_pr
blocked_actions:
- close
- merge
require_human_for:
- close
- merge
canonical:
- $ref
candidates:
- $ref
cluster_refs:
- $ref
allow_instant_close: false
allow_fix_pr: true
allow_merge: false
allow_unmerged_fix_close: false
allow_post_merge_close: false
require_fix_before_close: true
security_policy: central_security_only
security_sensitive: false
target_branch: $branch
source: pr_automerge
---
# ClawSweeper adopted PR repair candidate
Maintainer opted $ref into ClawSweeper automerge.
Source PR: https://github.com/$repo/pull/$number
Title: PR $ref
ClawSweeper should use this job only for the bounded ClawSweeper review/fix loop:
- If ClawSweeper emits an explicit repair marker, requests changes, or finds failing checks/rebase work, and the PR branch is safe to update, emit a fix artifact with \`repair_strategy: "repair_contributor_branch"\` and \`source_prs: ["https://github.com/$repo/pull/$number"]\`.
- If the PR branch cannot be safely updated, emit a narrow credited replacement only when the artifact can preserve the original contributor credit; otherwise return \`needs_human\`.
- Do not merge, close, or bypass review gates from the worker. The comment router owns final merge only after a passing ClawSweeper verdict for the exact current head.
- Keep repair scope limited to actionable ClawSweeper findings, failing relevant checks, and required review feedback on this PR.
EOF
}
if [ -f "$JOB_PATH" ]; then
echo "job_exists=1" >> "$GITHUB_OUTPUT"
elif restore_automerge_job; then
echo "job_exists=1" >> "$GITHUB_OUTPUT"
echo "::notice title=Restored automerge repair job::Job file '$JOB_PATH' was missing from the state checkout; reconstructed it from the workflow input."
else
echo "job_exists=0" >> "$GITHUB_OUTPUT"
echo "::notice title=Stale repair dispatch::Job file '$JOB_PATH' no longer exists on the current state checkout; skipping this worker."
fi
run: scripts/restore-repair-job.sh "$JOB_PATH" "this worker"
- name: Capture execution gates
id: capture_gates
@ -331,86 +252,7 @@ jobs:
id: check_job
env:
JOB_PATH: ${{ inputs.job }}
run: |
restore_automerge_job() {
case "$JOB_PATH" in
jobs/*/inbox/automerge-*.md) ;;
*) return 1 ;;
esac
filename="${JOB_PATH##*/}"
stem="${filename%.md}"
rest="${stem#automerge-}"
number="${rest##*-}"
repo_slug="${rest%-${number}}"
owner="${JOB_PATH#jobs/}"
owner="${owner%%/*}"
repo_name="${repo_slug#${owner}-}"
if [ -z "$number" ] || [ "$number" = "$rest" ] || [ -z "$repo_name" ] || [ "$repo_name" = "$repo_slug" ]; then
return 1
fi
repo="$owner/$repo_name"
ref="#$number"
branch="clawsweeper/automerge-$repo_slug-$number"
mkdir -p "$(dirname "$JOB_PATH")"
cat > "$JOB_PATH" <<EOF
---
repo: $repo
cluster_id: automerge-$repo_slug-$number
mode: autonomous
allowed_actions:
- comment
- label
- fix
- raise_pr
blocked_actions:
- close
- merge
require_human_for:
- close
- merge
canonical:
- $ref
candidates:
- $ref
cluster_refs:
- $ref
allow_instant_close: false
allow_fix_pr: true
allow_merge: false
allow_unmerged_fix_close: false
allow_post_merge_close: false
require_fix_before_close: true
security_policy: central_security_only
security_sensitive: false
target_branch: $branch
source: pr_automerge
---
# ClawSweeper adopted PR repair candidate
Maintainer opted $ref into ClawSweeper automerge.
Source PR: https://github.com/$repo/pull/$number
Title: PR $ref
ClawSweeper should use this job only for the bounded ClawSweeper review/fix loop:
- If ClawSweeper emits an explicit repair marker, requests changes, or finds failing checks/rebase work, and the PR branch is safe to update, emit a fix artifact with \`repair_strategy: "repair_contributor_branch"\` and \`source_prs: ["https://github.com/$repo/pull/$number"]\`.
- If the PR branch cannot be safely updated, emit a narrow credited replacement only when the artifact can preserve the original contributor credit; otherwise return \`needs_human\`.
- Do not merge, close, or bypass review gates from the worker. The comment router owns final merge only after a passing ClawSweeper verdict for the exact current head.
- Keep repair scope limited to actionable ClawSweeper findings, failing relevant checks, and required review feedback on this PR.
EOF
}
if [ -f "$JOB_PATH" ]; then
echo "job_exists=1" >> "$GITHUB_OUTPUT"
elif restore_automerge_job; then
echo "job_exists=1" >> "$GITHUB_OUTPUT"
echo "::notice title=Restored automerge repair job::Job file '$JOB_PATH' was missing from the state checkout; reconstructed it from the workflow input."
else
echo "job_exists=0" >> "$GITHUB_OUTPUT"
echo "::notice title=Stale repair dispatch::Job file '$JOB_PATH' no longer exists on the current state checkout; skipping execute."
fi
run: scripts/restore-repair-job.sh "$JOB_PATH" "execute"
- uses: ./.github/actions/setup-pnpm
if: ${{ steps.check_job.outputs.job_exists == '1' }}

View File

@ -34,6 +34,9 @@ checkpoint, and status-only commits are intentionally omitted.
- Accepted spaced `auto merge` command aliases everywhere `automerge` and
`auto-merge` are accepted, including the top-level `/auto merge` shorthand.
- Recovered issue implementation workers from state propagation races by
reconstructing minimal `source: issue_implementation` jobs from the dispatched
job path instead of skipping the worker as stale.
- Made `/clawsweeper stop` revoke repair-loop labels and block older
automerge/autofix comments from continuing, so a trusted pass marker cannot
clear a human-review pause and merge after a maintainer stop.

View File

@ -282,6 +282,9 @@ dispatch the repair worker for one open issue and ask it to create or update a
single ClawSweeper implementation PR. The generated job uses
`source: issue_implementation`, `repair_strategy: new_fix_pr`, blocks merge and
close actions, and reuses `clawsweeper/issue-<repo>-<number>` on reruns.
Workers can reconstruct this minimal job from the requested `jobs/.../issue-*.md`
path when a dispatch races ahead of state propagation, so the request does not
silently skip as stale.
When `CLAWSWEEPER_AUTO_IMPLEMENT_REPRO_BUGS=1`, review publish can also dispatch
the same lane automatically for strict bug reports only: `item_category: bug`,
`reproduction_status: reproduced`, `reproduction_confidence: high`, high

View File

@ -45,6 +45,11 @@ have an open PR reference or existing ClawSweeper implementation PR, writes the
normal `source: issue_implementation` job, commits the ledger, then dispatches
`repair-cluster-worker.yml` in autonomous mode.
Comment-triggered issue implementation uses the same durable job format. If a
worker starts before the new state commit is visible in its checkout, the worker
reconstructs the minimal `source: issue_implementation` job from the job path
and continues instead of treating the dispatch as stale.
PRs created from this path are labeled `clawsweeper` and
`clawsweeper:autogenerated`. The lane is PR-only: it does not merge or close the
source issue.

185
scripts/restore-repair-job.sh Executable file
View File

@ -0,0 +1,185 @@
#!/usr/bin/env bash
set -euo pipefail
JOB_PATH="${1:-${JOB_PATH:-}}"
SKIP_TARGET="${2:-this worker}"
if [ -z "$JOB_PATH" ]; then
echo "JOB_PATH is required" >&2
exit 1
fi
write_output() {
local key="$1"
local value="$2"
if [ -n "${GITHUB_OUTPUT:-}" ]; then
echo "$key=$value" >> "$GITHUB_OUTPUT"
else
echo "$key=$value"
fi
}
restore_automerge_job() {
case "$JOB_PATH" in
jobs/*/inbox/automerge-*.md) ;;
*) return 1 ;;
esac
local filename stem rest number repo_slug owner repo_name repo ref branch
filename="${JOB_PATH##*/}"
stem="${filename%.md}"
rest="${stem#automerge-}"
number="${rest##*-}"
repo_slug="${rest%-${number}}"
owner="${JOB_PATH#jobs/}"
owner="${owner%%/*}"
repo_name="${repo_slug#${owner}-}"
if [ -z "$number" ] || [ "$number" = "$rest" ] || [ -z "$repo_name" ] || [ "$repo_name" = "$repo_slug" ]; then
return 1
fi
repo="$owner/$repo_name"
ref="#$number"
branch="clawsweeper/automerge-$repo_slug-$number"
mkdir -p "$(dirname "$JOB_PATH")"
cat > "$JOB_PATH" <<EOF
---
repo: $repo
cluster_id: automerge-$repo_slug-$number
mode: autonomous
allowed_actions:
- comment
- label
- fix
- raise_pr
blocked_actions:
- close
- merge
require_human_for:
- close
- merge
canonical:
- $ref
candidates:
- $ref
cluster_refs:
- $ref
allow_instant_close: false
allow_fix_pr: true
allow_merge: false
allow_unmerged_fix_close: false
allow_post_merge_close: false
require_fix_before_close: true
security_policy: central_security_only
security_sensitive: false
target_branch: $branch
source: pr_automerge
---
# ClawSweeper adopted PR repair candidate
Maintainer opted $ref into ClawSweeper automerge.
Source PR: https://github.com/$repo/pull/$number
Title: PR $ref
ClawSweeper should use this job only for the bounded ClawSweeper review/fix loop:
- If ClawSweeper emits an explicit repair marker, requests changes, or finds failing checks/rebase work, and the PR branch is safe to update, emit a fix artifact with \`repair_strategy: "repair_contributor_branch"\` and \`source_prs: ["https://github.com/$repo/pull/$number"]\`.
- If the PR branch cannot be safely updated, emit a narrow credited replacement only when the artifact can preserve the original contributor credit; otherwise return \`needs_human\`.
- Do not merge, close, or bypass review gates from the worker. The comment router owns final merge only after a passing ClawSweeper verdict for the exact current head.
- Keep repair scope limited to actionable ClawSweeper findings, failing relevant checks, and required review feedback on this PR.
EOF
}
restore_issue_implementation_job() {
case "$JOB_PATH" in
jobs/*/inbox/issue-*.md) ;;
*) return 1 ;;
esac
local filename stem rest number repo_slug owner repo_name repo ref branch
filename="${JOB_PATH##*/}"
stem="${filename%.md}"
rest="${stem#issue-}"
number="${rest##*-}"
repo_slug="${rest%-${number}}"
owner="${JOB_PATH#jobs/}"
owner="${owner%%/*}"
repo_name="${repo_slug#${owner}-}"
if [ -z "$number" ] || [ "$number" = "$rest" ] || [ -z "$repo_name" ] || [ "$repo_name" = "$repo_slug" ]; then
return 1
fi
repo="$owner/$repo_name"
ref="#$number"
branch="clawsweeper/issue-$repo_slug-$number"
mkdir -p "$(dirname "$JOB_PATH")"
cat > "$JOB_PATH" <<EOF
---
repo: $repo
cluster_id: issue-$repo_slug-$number
mode: autonomous
allowed_actions:
- comment
- label
- fix
- raise_pr
blocked_actions:
- close
- merge
require_human_for:
- close
- merge
canonical:
- $ref
candidates:
- $ref
cluster_refs:
- $ref
allow_instant_close: false
allow_fix_pr: true
allow_merge: false
allow_unmerged_fix_close: false
allow_post_merge_close: false
require_fix_before_close: false
security_policy: central_security_only
security_sensitive: false
target_branch: $branch
source: issue_implementation
required_pr_labels:
- clawsweeper:autogenerated
---
# ClawSweeper issue implementation candidate
ClawSweeper Repair should create or update one implementation PR from \`$branch\`.
Source issue: https://github.com/$repo/issues/$number
Title: Issue $ref
## Operator Prompt
Use the source issue as the product request or bug report. Verify the request is still valid on latest \`$repo@main\`, inspect nearby code, and make the narrowest implementation that directly satisfies the issue. If the issue is too broad, underspecified, security-sensitive, already fixed, or not safely implementable by automation, do not change code; report the exact blocker.
When code changes are appropriate, emit a fix artifact with \`repair_strategy: "new_fix_pr"\`, \`source_prs: []\`, this issue in \`linked_refs\`, and validation commands for the touched surface.
## Guardrails
- Do not merge.
- Do not close the issue from this lane.
- Keep one PR for this issue; reuse \`$branch\` if it already exists.
- Keep the diff narrow and avoid unrelated refactors.
- Preserve issue context and link https://github.com/$repo/issues/$number in the PR body.
- Add a changelog entry when the target repo expects one.
EOF
}
if [ -f "$JOB_PATH" ]; then
write_output job_exists 1
elif restore_automerge_job; then
write_output job_exists 1
echo "::notice title=Restored automerge repair job::Job file '$JOB_PATH' was missing from the state checkout; reconstructed it from the workflow input."
elif restore_issue_implementation_job; then
write_output job_exists 1
echo "::notice title=Restored issue implementation job::Job file '$JOB_PATH' was missing from the state checkout; reconstructed it from the workflow input."
else
write_output job_exists 0
echo "::notice title=Stale repair dispatch::Job file '$JOB_PATH' no longer exists on the current state checkout; skipping $SKIP_TARGET."
fi