Compare commits

..

88 Commits

Author SHA1 Message Date
Peter Steinberger
469d89bc1a
chore: prepare gitcrawl 0.3.1
Some checks failed
CI / Go / ${{ matrix.os }} (macos-latest) (push) Has been cancelled
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Has been cancelled
Pages / Deploy docs (push) Has been cancelled
Security Gate: Secret Scanning / Scan for Verified Secrets (push) Has been cancelled
2026-05-08 09:56:02 +01:00
Peter Steinberger
a94a53217d
docs: update gitcrawl changelog and command docs 2026-05-08 09:50:20 +01:00
Peter Steinberger
7671a6b999
fix: harden gitcrawl command surface 2026-05-08 09:50:17 +01:00
Peter Steinberger
f2d60276f9
feat: prepare gitcrawl 0.3.0 2026-05-08 06:20:35 +01:00
Peter Steinberger
a1be2e57c5
docs: clarify gitcrawl skill paths 2026-05-08 01:13:01 +01:00
Vincent Koc
01d62c1afc
docs: note dependency updates
Some checks are pending
CI / Go / ${{ matrix.os }} (macos-latest) (push) Waiting to run
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Waiting to run
Security Gate: Secret Scanning / Scan for Verified Secrets (push) Waiting to run
2026-05-07 02:52:17 -07:00
dependabot[bot]
fc7001e21e
chore(deps): bump goreleaser/goreleaser-action from 7.1.0 to 7.2.1 (#11)
Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 7.1.0 to 7.2.1.
- [Release notes](https://github.com/goreleaser/goreleaser-action/releases)
- [Commits](https://github.com/goreleaser/goreleaser-action/compare/v7.1.0...v7.2.1)

---
updated-dependencies:
- dependency-name: goreleaser/goreleaser-action
  dependency-version: 7.2.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-05-07 02:41:45 -07:00
Peter Steinberger
025e92b858
ci: update homebrew tap on release
Some checks failed
CI / Go / ${{ matrix.os }} (macos-latest) (push) Waiting to run
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Waiting to run
Security Gate: Secret Scanning / Scan for Verified Secrets (push) Waiting to run
Pages / Deploy docs (push) Has been cancelled
2026-05-07 03:56:51 +01:00
Vincent Koc
eafeabf8fd
build(deps): bump crawlkit to v0.4.1 (#13) 2026-05-06 14:52:53 -07:00
Vincent Koc
fdc3f7473e
fix(docs): avoid regex tag stripping in toc (#12) 2026-05-06 02:10:03 -07:00
Vincent Koc
858f824719
Merge pull request #10 from openclaw/ci-security-baseline
chore(ci): add crawl security baseline
2026-05-06 01:55:22 -07:00
Vincent Koc
2a011cfef3
docs: document SQL archive queries 2026-05-06 01:54:34 -07:00
Vincent Koc
71d32d8ef2
docs: update changelog for agent skill 2026-05-06 01:38:43 -07:00
Vincent Koc
a4ab91b035
chore(security): add verified secret scanning 2026-05-06 01:37:04 -07:00
Vincent Koc
f205d3abe4
chore: add Go repository hygiene files 2026-05-06 01:37:03 -07:00
Vincent Koc
86f67bea8b
docs: add gitcrawl agent skill 2026-05-06 01:29:00 -07:00
Vincent Koc
ad2a4344a6
chore(ci): rely on CodeQL default setup 2026-05-06 00:42:35 -07:00
Vincent Koc
94a25db94a
chore(ci): add stale issue automation 2026-05-06 00:30:16 -07:00
Vincent Koc
43d9491b81
chore(ci): add CodeQL analysis 2026-05-06 00:30:14 -07:00
Vincent Koc
c35210ad31
chore(security): add protected automation owners 2026-05-06 00:30:13 -07:00
Vincent Koc
bed1da5471
docs: document crawlkit control surface 2026-05-05 19:16:51 -07:00
Vincent Koc
ec7a91465c
test(ci): cover crawlkit control commands 2026-05-05 18:48:48 -07:00
Vincent Koc
1ca61691c0
merge: use crawlkit infrastructure
* feat/use-crawlkit: (33 commits)
  fix(tui): allow empty json smoke
  chore(deps): use crawlkit v0.4.0
  fix(tui): use compact-pane crawlkit
  fix(tui): pick up crawlkit renderer
  fix(sync): log thread progress percentages
  chore(deps): bump crawlkit to v0.3.13
  chore(deps): bump crawlkit to v0.3.12
  chore(deps): update crawlkit to v0.3.11
  chore(deps): tidy crawlkit checksums
  chore(deps): update crawlkit to v0.3.10
  chore(deps): tidy crawlkit checksum
  chore(deps): update crawlkit to v0.3.9
  chore(deps): update crawlkit to v0.3.8
  docs(changelog): note TUI alignment
  chore(deps): update crawlkit to v0.3.7
  chore(deps): update crawlkit to v0.3.6
  chore(deps): update crawlkit to v0.3.5
  fix(tui): use crawlkit empty-json fix
  fix(tui): use crawlkit safe renderer
  fix(cli): document portable help
  ...
2026-05-05 18:20:49 -07:00
Vincent Koc
7342912545
fix(tui): allow empty json smoke 2026-05-05 18:16:02 -07:00
Vincent Koc
b7176c3569
chore(deps): use crawlkit v0.4.0 2026-05-05 18:16:02 -07:00
Vincent Koc
b78370f2ba
fix(tui): use compact-pane crawlkit 2026-05-05 18:16:02 -07:00
Vincent Koc
11455a6a17
fix(tui): pick up crawlkit renderer 2026-05-05 18:16:01 -07:00
Vincent Koc
5d8b59c79b
fix(sync): log thread progress percentages 2026-05-05 18:16:00 -07:00
Vincent Koc
ec8de7a53d
chore(deps): bump crawlkit to v0.3.13 2026-05-05 18:13:56 -07:00
Vincent Koc
32f4a13a8e
chore(deps): bump crawlkit to v0.3.12 2026-05-05 18:13:56 -07:00
Vincent Koc
c39fad757f
chore(deps): update crawlkit to v0.3.11 2026-05-05 18:13:55 -07:00
Vincent Koc
990d2616d6
chore(deps): tidy crawlkit checksums 2026-05-05 18:13:55 -07:00
Vincent Koc
ef89a1d876
chore(deps): update crawlkit to v0.3.10 2026-05-05 18:13:55 -07:00
Vincent Koc
b68852bde2
chore(deps): tidy crawlkit checksum 2026-05-05 18:13:55 -07:00
Vincent Koc
8c460f1a34
chore(deps): update crawlkit to v0.3.9 2026-05-05 18:13:55 -07:00
Vincent Koc
be832aa57c
chore(deps): update crawlkit to v0.3.8 2026-05-05 18:13:54 -07:00
Vincent Koc
87616b1860
docs(changelog): note TUI alignment 2026-05-05 18:13:54 -07:00
Vincent Koc
360037b3ad
chore(deps): update crawlkit to v0.3.7 2026-05-05 18:13:54 -07:00
Vincent Koc
73de21871d
chore(deps): update crawlkit to v0.3.6 2026-05-05 18:13:54 -07:00
Vincent Koc
b543bdc172
chore(deps): update crawlkit to v0.3.5 2026-05-05 18:13:54 -07:00
Vincent Koc
6b3032649b
fix(tui): use crawlkit empty-json fix 2026-05-05 18:13:53 -07:00
Vincent Koc
75215c9389
fix(tui): use crawlkit safe renderer 2026-05-05 18:13:53 -07:00
Vincent Koc
7a9bac31b5
fix(cli): document portable help 2026-05-05 18:13:53 -07:00
Vincent Koc
bf21271477
chore(deps): tidy crawlkit module sums 2026-05-05 18:13:52 -07:00
Vincent Koc
3bfaef0761
ci: smoke crawlkit control surface 2026-05-05 18:13:52 -07:00
Vincent Koc
e13976fbea
feat(cli): add crawlkit control surface 2026-05-05 18:13:52 -07:00
Vincent Koc
92c839cae2
chore: bump crawlkit to v0.3.1 2026-05-05 18:13:52 -07:00
Vincent Koc
77f981725e
chore: tidy crawlkit module sums 2026-05-05 18:13:52 -07:00
Vincent Koc
863d8599e5
refactor: use crawlkit package nouns 2026-05-05 18:13:52 -07:00
Vincent Koc
8cd92156f8
chore: use crawlkit v0.2.0 2026-05-05 18:13:52 -07:00
Vincent Koc
90c90204c1
docs(tui): mark gitcrawl as browser reference 2026-05-05 18:13:51 -07:00
Vincent Koc
af0ea88c98
chore: use crawlkit v0.1.1 2026-05-05 18:13:51 -07:00
Vincent Koc
afed848dc7
chore: use crawlkit v0.1.0 2026-05-05 18:13:51 -07:00
Vincent Koc
511603d0b1
refactor(store): use crawlkit sqlite openers 2026-05-05 18:13:51 -07:00
Vincent Koc
c352cb4e6a
refactor(config): route paths through crawlkit 2026-05-05 18:13:51 -07:00
Vincent Koc
47cc722d33
chore: add crawlkit module dependency 2026-05-05 18:13:51 -07:00
Peter Steinberger
3e43d1a5d5
docs: style homepage action links 2026-05-06 00:20:13 +01:00
Peter Steinberger
d91eec3973
docs: add syntax highlighting 2026-05-05 23:43:45 +01:00
Peter Steinberger
54f7107df9
test: enforce 85 percent coverage gate 2026-05-05 22:00:07 +01:00
Peter Steinberger
e5621d1b78
feat: improve gh shim cache observability 2026-05-05 21:23:39 +01:00
Peter Steinberger
5e441a9e48
docs: open 0.3.0 changelog
Some checks failed
CI / Go / ${{ matrix.os }} (macos-latest) (push) Has been cancelled
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Has been cancelled
Pages / Deploy docs (push) Has been cancelled
2026-05-05 09:31:21 +01:00
Peter Steinberger
1350779782
docs: fix homepage command rendering 2026-05-05 09:19:54 +01:00
Peter Steinberger
d5530b3dd9
docs: sharpen homepage positioning 2026-05-05 09:17:35 +01:00
Peter Steinberger
ae1b334ccb
docs: fix gitcrawl brew install path 2026-05-05 09:15:00 +01:00
Peter Steinberger
8c6d568b0f
docs: prepare 0.2.0 release 2026-05-05 09:05:02 +01:00
Peter Steinberger
6e338ddd9a
docs: polish generated docs site 2026-05-05 09:00:21 +01:00
Peter Steinberger
17c09e1580
docs: document gh cache improvements 2026-05-05 09:00:08 +01:00
Peter Steinberger
c341231048
feat: improve gh shim cache behavior 2026-05-05 08:59:49 +01:00
Peter Steinberger
bcd9d1d381
docs: clarify gh cache behavior 2026-05-05 08:02:54 +01:00
Peter Steinberger
53c62c4e91
test: cover explicit gh cache keys 2026-05-05 07:48:23 +01:00
Peter Steinberger
084fe02e93
feat: cache explicit gh api reads 2026-05-05 07:30:44 +01:00
Peter Steinberger
6adb810aa8
docs: add social card metadata 2026-05-05 06:55:30 +01:00
Peter Steinberger
14c02b5510
feat(gh): tune fallback cache telemetry 2026-05-05 06:49:07 +01:00
Peter Steinberger
a7be92d39a
docs: replace jekyll pages build 2026-05-05 06:24:12 +01:00
Peter Steinberger
69a4bc56f7
feat(portable): add v2 backup data 2026-05-05 05:21:13 +01:00
Peter Steinberger
1a2f5ba6e0
docs: improve site install and links 2026-05-05 04:38:46 +01:00
Peter Steinberger
fc12f81b6a
docs: add gitcrawl.sh site 2026-05-05 04:29:32 +01:00
Peter Steinberger
126059701c
feat(gh): absorb ghx cache policy 2026-05-05 04:12:17 +01:00
Peter Steinberger
93290e290f
feat(gh): auto-hydrate PR detail cache 2026-05-05 03:52:29 +01:00
Peter Steinberger
d3215f9e42
Create CNAME 2026-05-05 03:38:45 +01:00
Peter Steinberger
7222fef197
feat(gh): cache hydrated PR details 2026-05-05 03:26:39 +01:00
Peter Steinberger
72c81e2533
feat(gh): reduce shim GitHub API load 2026-05-05 03:09:15 +01:00
Peter Steinberger
9fa2423e37
feat(gh): add gitcrawl-backed gh shim 2026-05-05 02:56:18 +01:00
Peter Steinberger
5d3906a4d0
fix(embed): cap embedding inputs by bytes 2026-05-05 02:21:41 +01:00
Peter Steinberger
cfdd81709e
docs(changelog): add 0.1.2 release notes
Some checks are pending
CI / Go / ${{ matrix.os }} (macos-latest) (push) Waiting to run
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Waiting to run
2026-05-04 05:47:55 +01:00
Peter Steinberger
b7d36e4468
chore: update Go dependencies 2026-05-04 01:37:20 +01:00
Peter Steinberger
d2fb836741
fix(embed): refresh vectors when input cap changes
Some checks are pending
CI / Go / ${{ matrix.os }} (macos-latest) (push) Waiting to run
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Waiting to run
2026-05-03 18:43:23 +01:00
Vincent Koc
e1c3044b27
fix(tui): polish cluster browser interactions (#8)
Some checks failed
CI / Go / ${{ matrix.os }} (macos-latest) (push) Has been cancelled
CI / Go / ${{ matrix.os }} (ubuntu-latest) (push) Has been cancelled
* fix(tui): separate action menu contexts

* style(tui): tune open row palette

* fix(tui): preserve cluster viewport on refresh

* style(tui): soften selected row contrast

* fix(tui): toggle age sort direction

* fix(tui): buffer trackpad wheel bursts
2026-05-01 04:56:23 -07:00
92 changed files with 13014 additions and 283 deletions

View File

@ -0,0 +1,109 @@
---
name: gitcrawl
description: Use for local GitHub issue/PR archive search, sync freshness, clusters, durable maintainer triage, gh-shim cache reads, and Gitcrawl repo/release work.
---
# Gitcrawl
Use local archive data first for GitHub issue and pull request questions. Browse
or hit live GitHub APIs only when the local archive is stale, missing the
requested scope, or the user asks for current external context.
## Sources
- Config: `~/.config/gitcrawl/config.toml`
- DB: resolve with `gitcrawl doctor --json`; portable-store installs may point at `~/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db` instead of the default local DB
- Cache: `~/.config/gitcrawl/cache`
- Vectors: `~/.config/gitcrawl/vectors`
- Repo: `openclaw/gitcrawl`; on ClawSweeper this is checked out at `~/clawsweeper-workspace/gitcrawl`
- Preferred CLI: `gitcrawl`; fallback to `go run ./cmd/gitcrawl` from a verified repo checkout if the installed binary is stale
## Freshness
For recent/current questions, check freshness before analysis:
```bash
gitcrawl doctor --json
```
Routine refresh:
```bash
gitcrawl doctor
gitcrawl refresh owner/repo
```
Targeted refresh:
```bash
gitcrawl sync owner/repo --numbers 123,456 --with pr-details
```
For agent-driven discovery, prefer bounded freshness:
```bash
gitcrawl search issues "query" -R owner/repo --state open --sync-if-stale 5m --json number,title,url
```
## Query Workflow
1. Resolve scope: owner/repo, issue/PR number, cluster id, keyword, label, author, state, or date range.
2. Check freshness for recent/current requests.
3. Use CLI for normal reads; use read-only SQL for precise counts/rankings.
4. Report absolute date spans, repo names, issue/PR numbers, cluster ids, and known gaps.
Common commands:
```bash
gitcrawl search issues "query" -R owner/repo --state open --json number,title,url
gitcrawl clusters owner/repo --sort size --min-size 5
gitcrawl cluster-detail owner/repo --id <id>
gitcrawl gh pr view 123 -R owner/repo --json number,title,state,url
```
## SQL
`gitcrawl` does not currently expose a first-class `sql` command. For exact
local archive counts or rankings, use SQLite read-only mode against the
configured DB and prefer CLI commands for normal reads.
Useful examples:
```bash
db="$(gitcrawl doctor --json | jq -r .db_path)"
sqlite3 -readonly "$db" \
"select count(*) as threads from threads;"
sqlite3 -readonly "$db" \
"select r.full_name, count(*) as threads from threads t join repositories r on r.id = t.repo_id group by r.full_name order by threads desc limit 20;"
sqlite3 -readonly "$db" \
"select state, count(*) as threads from threads group by state;"
```
Do not run mutating SQL against the archive. Use local maintainer commands for
overrides instead of writing database rows directly.
When the installed CLI lacks a new feature, build or run from
a verified `openclaw/gitcrawl` checkout before concluding the feature is missing.
## Maintainer Boundaries
`close-thread`, `close-cluster`, exclusions, and canonical-member choices are
local maintainer overrides; they do not write back to GitHub. Set
`GITCRAWL_GH_PATH` explicitly when using the gh shim so it cannot recurse into
itself.
## Verification
For repo edits, prefer existing Go gates:
```bash
GOWORK=off go test ./...
```
Then run targeted CLI smoke for the touched surface, for example:
```bash
gitcrawl doctor --json
gitcrawl status --json
gitcrawl search issues "test" -R openclaw/gitcrawl --state open --limit 5
```

12
.editorconfig Normal file
View File

@ -0,0 +1,12 @@
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
indent_style = tab
indent_size = 4
[*.{md,yml,yaml,json,toml}]
indent_style = space
indent_size = 2

6
.gitattributes vendored Normal file
View File

@ -0,0 +1,6 @@
* text=auto
*.go text eol=lf
*.md text eol=lf
*.toml text eol=lf
*.yml text eol=lf
*.yaml text eol=lf

11
.github/CODEOWNERS vendored Normal file
View File

@ -0,0 +1,11 @@
# Protect ownership and automation rules.
/.github/CODEOWNERS @openclaw/openclaw-secops
/.github/dependabot.yml @openclaw/openclaw-secops
/.github/workflows/ @openclaw/openclaw-secops
# Release and package integrity surfaces.
/.goreleaser.yaml @openclaw/openclaw-secops
/go.mod @openclaw/openclaw-secops
/go.sum @openclaw/openclaw-secops
/scripts/*release* @openclaw/openclaw-secops
/scripts/*publish* @openclaw/openclaw-secops

13
.github/dependabot.yml vendored Normal file
View File

@ -0,0 +1,13 @@
version: 2
updates:
- package-ecosystem: gomod
directory: /
schedule:
interval: weekly
open-pull-requests-limit: 10
- package-ecosystem: github-actions
directory: /
schedule:
interval: weekly
open-pull-requests-limit: 10

View File

@ -54,14 +54,28 @@ jobs:
- name: Vet
run: go vet ./...
- name: Test
run: go test ./...
- name: Test with coverage
run: |
go test ./... -covermode=atomic -coverprofile=coverage.out
total="$(go tool cover -func=coverage.out | awk '/^total:/ { sub(/%/, "", $3); print $3 }')"
echo "total coverage: ${total}%"
awk -v total="$total" 'BEGIN { if (total + 0 < 85.0) { printf("coverage %.1f%% is below 85.0%%\n", total); exit 1 } }'
- name: Build
run: go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=${GITHUB_SHA:0:7}" -o bin/gitcrawl ./cmd/gitcrawl
- name: Smoke test TUI help
run: |
set -euo pipefail
test -n "$(./bin/gitcrawl --version)"
./bin/gitcrawl metadata --json | grep -q '"schema_version"'
./bin/gitcrawl status --json | grep -q '"databases"'
output="$(./bin/gitcrawl help tui)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "gitcrawl tui"
- name: Snapshot release build
uses: goreleaser/goreleaser-action@v7.1.0
uses: goreleaser/goreleaser-action@v7.2.1
with:
distribution: goreleaser
version: "~> v2"

52
.github/workflows/pages.yml vendored Normal file
View File

@ -0,0 +1,52 @@
name: Pages
on:
push:
branches:
- main
paths:
- "docs/**"
- "scripts/build-docs-site.mjs"
- ".github/workflows/pages.yml"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: pages
cancel-in-progress: false
jobs:
deploy:
name: Deploy docs
runs-on: ubuntu-latest
timeout-minutes: 10
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Check out
uses: actions/checkout@v6
- name: Set up Node
uses: actions/setup-node@v6
with:
node-version: 24
- name: Build site
run: node scripts/build-docs-site.mjs
- name: Configure Pages
uses: actions/configure-pages@v6
- name: Upload artifact
uses: actions/upload-pages-artifact@v5
with:
path: dist/docs-site
- name: Deploy
id: deployment
uses: actions/deploy-pages@v5

View File

@ -37,10 +37,69 @@ jobs:
run: git checkout ${{ inputs.tag }}
- name: GoReleaser
uses: goreleaser/goreleaser-action@v7.1.0
uses: goreleaser/goreleaser-action@v7.2.1
with:
distribution: goreleaser
version: "~> v2"
args: release --clean --config /tmp/.goreleaser.yaml
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
update-homebrew-tap:
runs-on: ubuntu-latest
needs: goreleaser
steps:
- name: Resolve release tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "RELEASE_TAG=${{ inputs.tag }}" >> "$GITHUB_ENV"
else
echo "RELEASE_TAG=${{ github.ref_name }}" >> "$GITHUB_ENV"
fi
- name: Dispatch tap formula update
env:
GH_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}
run: |
if [ -z "$GH_TOKEN" ]; then
echo "::error::Set HOMEBREW_TAP_TOKEN with workflow access to openclaw/homebrew-tap"
exit 1
fi
request_id="gitcrawl-${RELEASE_TAG}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
expected_title="Update gitcrawl for ${RELEASE_TAG} (${request_id})"
gh workflow run update-formula.yml \
--repo openclaw/homebrew-tap \
--ref main \
-f formula=gitcrawl \
-f tag="$RELEASE_TAG" \
-f repository=openclaw/gitcrawl \
-f artifact_template="{formula}_{version}_{target}.tar.gz" \
-f request_id="$request_id"
run_id=""
for _ in {1..30}; do
run_id=$(gh run list \
--repo openclaw/homebrew-tap \
--workflow update-formula.yml \
--branch main \
--event workflow_dispatch \
--limit 20 \
--json databaseId,displayTitle \
--jq ".[] | select(.displayTitle == \"$expected_title\") | .databaseId" | head -n1)
if [ -n "$run_id" ]; then
break
fi
sleep 5
done
if [ -z "$run_id" ]; then
echo "::error::Could not find tap workflow run with title: $expected_title"
exit 1
fi
gh run watch "$run_id" \
--repo openclaw/homebrew-tap \
--exit-status \
--interval 10

63
.github/workflows/secret-scan.yml vendored Normal file
View File

@ -0,0 +1,63 @@
name: "Security Gate: Secret Scanning"
on:
push:
branches: ["**"]
pull_request:
branches: [main, master]
permissions: {}
jobs:
trufflehog:
name: Scan for Verified Secrets
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Resolve scan range
id: scan_range
env:
EVENT_NAME: ${{ github.event_name }}
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
PUSH_BASE_SHA: ${{ github.event.before }}
PUSH_HEAD_SHA: ${{ github.sha }}
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
run: |
set -euo pipefail
zero_sha="0000000000000000000000000000000000000000"
if [[ "$EVENT_NAME" == "pull_request" ]]; then
base="$PR_BASE_SHA"
head="$PR_HEAD_SHA"
else
base="$PUSH_BASE_SHA"
head="$PUSH_HEAD_SHA"
if [[ -z "$base" || "$base" == "$zero_sha" ]]; then
base="origin/$DEFAULT_BRANCH"
fi
fi
echo "base=$base" >> "$GITHUB_OUTPUT"
echo "head=$head" >> "$GITHUB_OUTPUT"
- name: TruffleHog OSS
id: trufflehog
uses: trufflesecurity/trufflehog@v3.95.2
with:
path: ./
base: ${{ steps.scan_range.outputs.base }}
head: ${{ steps.scan_range.outputs.head }}
extra_args: --only-verified --debug
- name: Notify on failure
if: steps.trufflehog.outcome == 'failure'
run: |
echo "::error::Verified secrets found. Rotate the credential before merging."
exit 1

86
.github/workflows/stale.yml vendored Normal file
View File

@ -0,0 +1,86 @@
name: Stale
on:
schedule:
- cron: "21 4 * * *"
workflow_dispatch:
permissions: {}
jobs:
stale:
permissions:
issues: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- name: Mark stale unassigned issues and pull requests
uses: actions/stale@v10
with:
days-before-issue-stale: 14
days-before-issue-close: 7
days-before-pr-stale: 14
days-before-pr-close: 7
stale-issue-label: stale
stale-pr-label: stale
exempt-issue-labels: enhancement,maintainer,pinned,security,no-stale
exempt-pr-labels: maintainer,no-stale
operations-per-run: 1000
ascending: true
exempt-all-assignees: true
remove-stale-when-updated: true
stale-issue-message: |
This issue has been automatically marked as stale due to inactivity.
Please add updated gitcrawl details or it will be closed.
stale-pr-message: |
This pull request has been automatically marked as stale due to inactivity.
Please update it or it will be closed.
close-issue-message: |
Closing due to inactivity.
If this still affects gitcrawl, open a new issue with current reproduction details.
close-issue-reason: not_planned
close-pr-message: |
Closing due to inactivity.
If this PR should be revived, reopen it with current context and validation.
- name: Mark stale assigned issues
uses: actions/stale@v10
with:
days-before-issue-stale: 30
days-before-issue-close: 10
days-before-pr-stale: -1
days-before-pr-close: -1
stale-issue-label: stale
exempt-issue-labels: enhancement,maintainer,pinned,security,no-stale
operations-per-run: 1000
ascending: true
include-only-assigned: true
remove-stale-when-updated: true
stale-issue-message: |
This assigned issue has been automatically marked as stale after 30 days of inactivity.
Please add an update or it will be closed.
close-issue-message: |
Closing due to inactivity.
If this still affects gitcrawl, reopen or file a new issue with current evidence.
close-issue-reason: not_planned
- name: Mark stale assigned pull requests
uses: actions/stale@v10
with:
days-before-issue-stale: -1
days-before-issue-close: -1
days-before-pr-stale: 27
days-before-pr-close: 7
stale-pr-label: stale
exempt-pr-labels: maintainer,no-stale
operations-per-run: 1000
ascending: true
include-only-assigned: true
ignore-pr-updates: true
remove-stale-when-updated: true
stale-pr-message: |
This assigned pull request has been automatically marked as stale after being open for 27 days.
Please add an update or it will be closed.
close-pr-message: |
Closing due to inactivity.
If this PR should be revived, reopen it with current context and validation.

View File

@ -1,5 +1,51 @@
# Changelog
## Unreleased
## 0.3.1 - 2026-05-08
- Fix gh-shim portable-store auto-hydration so exact issue/PR refreshes write to the runtime mirror instead of dirtying the Git checkout, clear stale portable refresh locks, and make empty open issue discovery fall through when only targeted sync history exists.
- Keep `cluster-detail` aligned with the default cluster list by showing closed historical members unless `--hide-closed` is passed, and fail fast when `GITCRAWL_GH_PATH` points back at the `gitcrawl` shim.
## 0.3.0 - 2026-05-08
- Bump routine release workflow dependencies.
- Add a repo-local `gitcrawl` agent skill for local archive, freshness, gh-shim, cluster, and verification workflows.
- Accept full GitHub issue and pull request URLs anywhere `gitcrawl` expects a thread number, including sync filters, gh-shim views/diffs, governance commands, neighbor lookup, embedding, and TUI jumps.
- Document read-only SQLite query examples in the repo-local agent skill so agents can do exact local archive counts without mutating state.
- Document the crawlkit control surface now available on `main`, including `metadata --json`, `status --json`, and `doctor --json` for local launchers and CI.
- Clarify that `gitcrawl tui` remains the reference terminal browser for the crawl app family while shared `crawlkit/tui` converges on the same panes, sorting, action menus, and status chrome.
- Add command-reference coverage for the read-only metadata/status commands.
- Add broader CLI, gh-shim, TUI, and store regression coverage for the verified release surface.
## 0.2.1 - 2026-05-05
- Improve `gh` shim cache coordination and observability with stale-while-revalidate reads, finer Actions/API TTLs, recent-window stats, top miss keys, and `xcache snapshot`.
## 0.2.0 - 2026-05-05
- Add Homebrew tap installation via `brew install openclaw/tap/gitcrawl`.
- Improve the `gh` shim cache with canonicalized keys, targeted mutation invalidation, stale-on-rate-limit fallback reads, completed-run TTLs, hit-rate stats, counter reset, and issue auto-hydration.
- Add dark-mode support, a theme toggle, and clearer navigation styling to the generated docs site.
- Force embedding refreshes when the embedding input rune cap changes, so stale larger-cap vectors are not reused.
- Expand the `gh` shim with local list filters, PR diff caching by cached head SHA, xcache GC, hit/miss/write counters, and throttled portable-store refreshes to reduce GitHub API pressure across agent sessions.
- Add explicit PR-detail hydration for files, commits, checks, and workflow runs so `gh pr view`, `gh pr checks`, and `gh run list/view` can answer common review reads from the existing SQLite cache.
- Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`.
- Cache more ghx-style read-only fallthroughs, including release, workflow, secret, variable, project, ruleset, gist, org, and search reads; cache repeat read failures by default; and clear the fallthrough cache after the corresponding mutating `gh` commands.
- Promote portable backups to the v2 format: keep compact comments, PR files, commits, checks, and workflow runs while stripping raw JSON, generated documents, vectors, clusters, and run history.
- Add crawlkit control metadata/status surfaces with command-local `metadata --json`, `status --json`, and `doctor --json`.
- Include the primary SQLite database inventory in status JSON so local control surfaces can discover archive storage without opening live stores.
- Route config path handling and SQLite openers through `crawlkit` so GitHub archive tooling shares the same foundation as the Slack, Discord, and Notion crawlers.
- Keep shared crawl app TUI nomenclature aligned while `gitcrawl tui` remains the richer cluster-browser reference implementation.
- Keep the existing `gitcrawl tui` as the family reference terminal interface and add CI smoke coverage for its help surface.
## 0.1.2 - 2026-05-01
- Polish the TUI cluster browser interaction model, including separate cluster/member action menus, softer row state colors, stable viewport refresh, bidirectional age sorting, and buffered trackpad scrolling.
- Add OpenAI embedding retry handling for transient failures and cap oversized embedding inputs before sending them upstream.
- Improve GitHub pagination and retry behavior by surfacing page totals and honoring retry and rate-limit response headers.
- Harden human-key hash parsing and tidy the module graph.
## 0.1.1 - 2026-04-30
- Fix portable store refreshes when local Git pull configuration tries to rebase onto multiple branch merge refs.

1
CNAME Normal file
View File

@ -0,0 +1 @@
gitcrawl.sh

View File

@ -1,7 +1,7 @@
BINARY := gitcrawl
VERSION ?= dev
.PHONY: build test run clean
.PHONY: build test test-coverage run clean
build:
go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(VERSION)" -o bin/$(BINARY) ./cmd/gitcrawl
@ -9,6 +9,12 @@ build:
test:
go test ./...
test-coverage:
go test ./... -covermode=atomic -coverprofile=coverage.out
@total="$$(go tool cover -func=coverage.out | awk '/^total:/ { sub(/%/, "", $$3); print $$3 }')"; \
echo "total coverage: $${total}%"; \
awk -v total="$$total" 'BEGIN { if (total + 0 < 85.0) { printf("coverage %.1f%% is below 85.0%%\n", total); exit 1 } }'
run:
go run ./cmd/gitcrawl $(ARGS)

View File

@ -4,6 +4,8 @@
`gitcrawl` is a local-first GitHub issue and pull request crawler for maintainer triage. Data stays local in SQLite. The primary runtime surfaces are the CLI, JSON command output, and the terminal UI. There is no local HTTP API.
Full documentation: [gitcrawl.sh](https://gitcrawl.sh)
## Status
Early bootstrap. The implementation is being built in small commits.
@ -13,9 +15,12 @@ Early bootstrap. The implementation is being built in small commits.
```bash
gitcrawl init
gitcrawl doctor
gitcrawl metadata --json
gitcrawl status --json
gitcrawl sync owner/repo
gitcrawl sync owner/repo --state open
gitcrawl sync owner/repo --numbers 123,456 --include-comments
gitcrawl sync owner/repo --numbers https://github.com/owner/repo/issues/123 --with pr-details
gitcrawl refresh owner/repo
gitcrawl cluster owner/repo --threshold 0.80
gitcrawl clusters owner/repo
@ -23,6 +28,7 @@ gitcrawl durable-clusters owner/repo
gitcrawl cluster-detail owner/repo --id 123
gitcrawl cluster-explain owner/repo --id 123
gitcrawl close-thread owner/repo --number 123 --reason "duplicate handled"
gitcrawl close-thread owner/repo --number https://github.com/owner/repo/issues/123 --reason "handled"
gitcrawl reopen-thread owner/repo --number 123
gitcrawl close-cluster owner/repo --id 123 --reason "handled"
gitcrawl reopen-cluster owner/repo --id 123
@ -30,21 +36,34 @@ gitcrawl exclude-cluster-member owner/repo --id 123 --number 456 --reason "not t
gitcrawl include-cluster-member owner/repo --id 123 --number 456
gitcrawl set-cluster-canonical owner/repo --id 123 --number 456
gitcrawl neighbors owner/repo --number 123 --limit 10
gitcrawl neighbors owner/repo --number https://github.com/owner/repo/pull/456 --limit 10
gitcrawl search owner/repo --query "download stalls"
gitcrawl search issues "download stalls" -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30
gitcrawl search prs "manifest cache" -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20
gitcrawl search issues "hot loop" -R owner/repo --state open --sync-if-stale 5m --json number,title,url
gitcrawl sync owner/repo --numbers 123 --with pr-details
gitcrawl gh search issues "download stalls" -R owner/repo --state open --match comments --json number,title,url
gitcrawl gh pr view 123 -R owner/repo --json number,title,state,url
gitcrawl gh pr view https://github.com/owner/repo/pull/123 --json number,title,state,url
gitcrawl gh pr checks https://github.com/owner/repo/pull/123 --json name,state,conclusion
gitcrawl gh run view 123456789 -R owner/repo --json status,conclusion
gitcrawl gh xcache stats
gitcrawl tui
gitcrawl tui owner/repo
```
`gitcrawl clusters` and `gitcrawl tui` match ghcrawl's display view: latest raw run clusters first, closed durable rows merged as historical context, sorted by size by default. Pass `--hide-closed` to focus only currently open clusters. `gitcrawl durable-clusters` stays on governed durable rows and needs `--include-closed` for inactive rows.
`gitcrawl metadata --json`, `gitcrawl status --json`, and `gitcrawl doctor --json` are crawlkit control surfaces for launchers, local automation, and CI checks. They are read-only and do not mutate archive data.
`gitcrawl cluster` and `gitcrawl refresh` build ghcrawl-shaped durable clusters by default (`--threshold 0.80`, `--min-size 1`, `--max-cluster-size 40`, `--k 16`, `--cross-kind-threshold 0.93`): every active vector-backed thread is represented, singleton rows use `singleton_orphan`, multi-member rows use `duplicate_candidate`, and stable IDs are derived from the representative thread. They also add deterministic GitHub reference evidence for direct issue/PR links such as `#123`, `issues/123`, and `pull/123`. Weak embedding edges need concrete title-token overlap unless their similarity is already high, which keeps generic low-confidence bridges from forming unrelated clusters.
`gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`.
`gitcrawl sync` fetches open issues and pull requests by default. Pass `--state all` or `--state closed` for explicit backfill workflows; incremental open syncs with `--since` also sweep recently closed items so local open state does not rot.
Pass `--numbers` to refresh exact issue or pull request rows without relying on list ordering or updated-time windows.
Thread-reference inputs accept bare numbers, `#123`, `issues/123`, `pull/123`, `owner/repo#123`, and full GitHub issue/PR URLs. This applies to sync filters, `--number` flags, governance member commands, neighbor/embed lookups, gh-shim `view`/`checks`/`diff`, and TUI jump input. For gh-shim view/checks/diff, a full GitHub URL also supplies the repository, so `-R owner/repo` can be omitted.
Pass `--with pr-details` or `--include-pr-details` to hydrate pull request files, commits, checks, and workflow runs for local review. The `gh` shim can also auto-hydrate one exact PR on a PR-detail miss, then retry locally.
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration.
`gitcrawl gh` is a gh-compatible shim for agent workflows. It answers broad `gh search issues|prs`, `gh issue/pr list`, supported `gh issue/pr view --json` fields, hydrated `gh pr checks`, and hydrated `gh run list/view` from local SQLite, then falls through to the real GitHub CLI for unsupported commands. Local `gh issue/pr list` supports common filters such as `--author`, `--assignee`, and repeated `--label`; empty open issue discovery falls through when the local repo only has targeted sync history. Read-only fallthroughs such as `gh pr diff`, `gh repo view/list`, `gh release list/view`, `gh workflow list/view`, `gh secret list`, `gh variable get/list`, `gh label list`, read-only `gh search` kinds, GET-only REST `gh api` calls, and read-only `gh api graphql` queries use a command-aware persistent cache under `cache/gh-shim`; Actions run/job logs get longer TTLs, completed run/job reads are kept much longer than active CI status, user profile reads get a 7-day TTL, read-only GraphQL gets a 6-hour TTL, and `gh pr diff` entries are keyed by the cached PR head SHA when available. Explicit API paths and explicit repositories share cache entries across sibling checkouts even when agents set different `GH_REPO` values; implicit repo reads stay isolated by `GH_REPO` or current working directory. Cache keys canonicalize common flags such as `-R`/`--repo` and sorted `--json` fields so equivalent agent commands coalesce. Repeat read failures are cached by default so agents do not rediscover the same missing release or workflow, but rate-limit error entries expire quickly; if GitHub rate-limits a refresh and an expired successful entry exists, the shim serves the stale response with a warning instead of failing the read. When another process is refreshing an expired successful entry, peers may serve stale inside a short grace window instead of joining the backend stampede. Set `GITCRAWL_GH_STALE_GRACE=0` to disable stale-while-revalidate, or `GITCRAWL_GH_CACHE_ERRORS=0` to disable error caching. Mutating commands pass through, increment write counters, and invalidate matching cache tags instead of flushing unrelated entries. `gh xcache stats|keys|gc|flush|reset|snapshot` inspects, garbage-collects, clears, resets, or snapshots fallthrough-cache counters, including hit rate plus per-command, per-route, per-key, and `--since` recent-window miss counters. Set `GITCRAWL_GH_PATH` to choose the backend `gh`, and symlink or install the binary as `gh`/`gitcrawl-gh` to run the shim directly.
The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default, so the first screen is the useful cluster workload instead of singleton noise. Pass `--min-size 1` when you intentionally want singleton clusters. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds.
`gitcrawl tui` remains the reference terminal interaction model for the crawl app family: pane focus, sortable headers, mouse/right-click actions, detail rendering, and status chrome are the behavior the shared `crawlkit/tui` browser is converging on for Slack, Discord, and Notion archives.
## Local Defaults
@ -57,12 +76,18 @@ The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default
## Requirements
- Go 1.26+
- a GitHub token for sync commands
- a GitHub token for sync commands, either via `GITHUB_TOKEN` or `gh auth token`
- an OpenAI API key only for summary and embedding commands
## Install
Download a release archive from GitHub releases or build from source:
Install from Homebrew:
```bash
brew install openclaw/tap/gitcrawl
```
Or download a release archive from GitHub releases or build from source:
```bash
git clone https://github.com/openclaw/gitcrawl.git
@ -76,4 +101,5 @@ go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(git de
```bash
go test ./...
go build ./cmd/gitcrawl
go run ./cmd/gitcrawl help tui
```

25
SPEC.md
View File

@ -76,6 +76,7 @@ Public commands:
- `cluster-explain`
- `neighbors`
- `search`
- `gh`
- `close-thread`
- `close-cluster`
- `exclude-cluster-member`
@ -102,6 +103,30 @@ gitcrawl search issues <query> -R owner/repo --state open --sync-if-stale 5m --j
This compatibility path reads from local SQLite by default. It avoids GitHub REST search quota and is not a replacement for final live `gh` verification before comments, closes, labels, or merges. `--sync-if-stale <duration>` may run one metadata sync first when the repository mirror is older than the requested max age; the search result itself still comes from SQLite.
`gh` is the agent-facing compatibility shim. It may be invoked as `gitcrawl gh ...` or by installing the binary as `gh`/`gitcrawl-gh`. Supported local reads:
```text
gitcrawl gh search issues|prs <query> -R owner/repo --state open --match comments --json number,title,url
gitcrawl gh issue view 123 -R owner/repo --json number,title,state,url,body
gitcrawl gh pr view 123 -R owner/repo --json number,title,state,url,isDraft,author
gitcrawl gh issue list -R owner/repo --state open --search "hot loop" --json number,title,url
gitcrawl gh pr list -R owner/repo --state open --search "manifest cache" --json number,title,url
```
Unsupported commands fall through to the real GitHub CLI. Read-only fallthroughs use a command-aware persistent cache in `cache/gh-shim` for repeated agent calls (`run list/view`, `pr diff/checks/list/status/view`, `issue list/status/view`, `repo view/list`, `release list/view`, `workflow list/view`, `secret list`, `variable get/list`, `project` list/view reads, `ruleset` reads, `gist` reads, `org list`, `label list`, read-only `search` kinds, and GET-only `api`). Actions run/job logs are cached much longer than CI status reads, completed run reads receive longer TTLs, and `xcache stats` records hit rate plus backend misses by command and normalized route so remaining GitHub-heavy patterns are visible. Repeat read failures are cached by default so many agents do not rediscover the same missing release, workflow, or field, with short caps for error entries and rate-limit responses; if GitHub rate-limits a refresh and a stale successful entry exists, the stale entry is served with a warning. Set `GITCRAWL_GH_CACHE_ERRORS=0` to disable error caching. Mutating commands are never cached and invalidate matching cache-tag entries on success. Unknown mutation scope falls back to clearing the fallthrough cache. The shim does not add GitHub write-back behavior of its own; writes remain delegated to `gh`.
Cache inspection commands:
```text
gitcrawl gh xcache stats
gitcrawl gh xcache keys
gitcrawl gh xcache reset
gitcrawl gh xcache flush
gitcrawl gh xcache snapshot [--reset]
```
The cache key includes the resolved gitcrawl config path, current working directory, `GH_HOST`, `GH_REPO`, stable PR-diff identity when available, and canonicalized `gh` arguments. This keeps sibling checkouts and portable stores isolated while still coalescing equivalent agent calls such as reordered flags or sorted `--json` fields. Concurrent cache misses use a lock file so one process populates the entry while peers wait for the result; if an expired successful entry is still inside its stale grace window, peers may serve stale while the lock holder refreshes it. `xcache stats --since <duration>` reports recent-window counters from hourly buckets, and miss maps include command, normalized route, and canonical key views.
## Config
Default config path:

View File

@ -4,12 +4,19 @@ import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/openclaw/gitcrawl/internal/cli"
)
func main() {
if err := cli.New().Run(context.Background(), os.Args[1:]); err != nil {
args := os.Args[1:]
name := strings.TrimSuffix(filepath.Base(os.Args[0]), ".exe")
if name == "gh" || name == "gitcrawl-gh" {
args = append([]string{"gh"}, args...)
}
if err := cli.New().Run(context.Background(), args); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(cli.ExitCode(err))
}

View File

@ -3,6 +3,8 @@ package main
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
)
@ -31,3 +33,38 @@ func TestMainPrintsVersion(t *testing.T) {
t.Fatal("version output was empty")
}
}
func TestMainUsesGHShimWhenBinaryNameIsGH(t *testing.T) {
oldArgs := os.Args
oldStdout := os.Stdout
t.Cleanup(func() {
os.Args = oldArgs
os.Stdout = oldStdout
})
dir := t.TempDir()
ghPath := filepath.Join(dir, "real-gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho shim-fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
t.Setenv("GITCRAWL_CONFIG", filepath.Join(dir, "config.toml"))
t.Setenv("GH_REPO", "openclaw/openclaw")
read, write, err := os.Pipe()
if err != nil {
t.Fatalf("pipe: %v", err)
}
os.Stdout = write
os.Args = []string{filepath.Join(dir, "gh"), "run", "view", "123"}
main()
if err := write.Close(); err != nil {
t.Fatalf("close stdout pipe: %v", err)
}
var out bytes.Buffer
if _, err := out.ReadFrom(read); err != nil {
t.Fatalf("read stdout: %v", err)
}
if got := strings.TrimSpace(out.String()); got != "shim-fallback:run view 123" {
t.Fatalf("output = %q", got)
}
}

1
docs/CNAME Normal file
View File

@ -0,0 +1 @@
gitcrawl.sh

179
docs/automation.md Normal file
View File

@ -0,0 +1,179 @@
---
title: Automation
nav_order: 14
permalink: /automation/
---
# Automation
{: .no_toc }
Stable JSON contracts, agent recipes, and patterns for keeping the local mirror warm without manual ceremony.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## JSON output is first class
Every command supports `--json` (or the global `--format json`). The resulting payload is pretty-printed with stable field names so you can pipe it directly into `jq` or feed it to an agent as structured context.
```bash
gitcrawl sync owner/repo --json | jq '{run_id, inserted, updated}'
gitcrawl clusters owner/repo --json --sort size --min-size 5 \
| jq '.clusters[] | {id, members: .member_count, latest: .latest_thread_number}'
```
For the full per-command JSON shapes, see the individual feature pages and the [Commands reference](/commands/).
## Exit codes
- `0` — success
- non-zero — usage error, command not implemented, runtime error
Stderr always carries a human-readable error message; stdout is reserved for the requested output (text or JSON) so you can pipe stdout to `jq` without losing diagnostics.
## Keeping the mirror fresh
Three patterns, in increasing order of automation:
### On-demand staleness check
Use `--sync-if-stale` on `gitcrawl search` (or the gh-shim's search):
```bash
gitcrawl search issues "manifest cache" \
-R owner/repo \
--sync-if-stale 5m \
--json number,title,url
```
Best for ad-hoc agent tools that should bound staleness but minimize sync calls.
### Auto-hydration via the gh shim
Symlink the gitcrawl binary as `gh` (or `gitcrawl-gh`) and let the shim pull a single PR's detail when an agent calls `gh pr view` or `gh pr checks` against an unhydrated PR. See [gh shim → auto-hydration](/gh-shim/#auto-hydration).
This is the lowest-overhead pattern for fleets of agents — no scheduling required.
### Periodic background refresh
Run `gitcrawl refresh owner/repo` on a cron, systemd timer, or `launchd` agent every few minutes per repo. Combine with the gh shim and your agents almost never have to wait on GitHub.
```cron
# Every 5 minutes, refresh the active repos.
*/5 * * * * $HOME/bin/gitcrawl refresh openclaw/gitcrawl --json > /tmp/gitcrawl.openclaw.json 2>&1
```
For multiple repos, loop in a small shell script — gitcrawl is happy to run sequentially against a shared SQLite file.
## Agent recipes
### "Look up an issue without burning quota"
```bash
gh issue view 123 -R owner/repo --json number,title,state,body,labels,author
```
With the shim symlinked as `gh`, this answers from local SQLite if the issue is cached. Auto-hydration covers PR-detail fields. The agent prompt does not change.
### "Find candidates, hydrate them, summarize"
```bash
NUMS=$(gh search issues "checksum mismatch" -R owner/repo \
--json number --limit 30 \
| jq -r '[.[].number] | join(",")')
gitcrawl sync owner/repo --numbers "$NUMS" --include-comments --with pr-details
gitcrawl cluster-detail owner/repo --id "$(gitcrawl clusters owner/repo --json \
| jq '.clusters[0].id')"
```
Search is local; the targeted sync brings exactly the rows you need; cluster-detail returns the structured triage view.
### "Find duplicates around a new bug report"
```bash
NUM=789
gitcrawl sync owner/repo --numbers "$NUM" --include-comments
gitcrawl embed owner/repo --number "$NUM"
gitcrawl neighbors owner/repo --number "$NUM" --limit 10 --json
```
### "Triage a cluster end to end"
```bash
ID=42
# Read.
gitcrawl cluster-detail owner/repo --id "$ID" --body-chars 600 --json
# Decide canonical, then close locally.
gitcrawl set-cluster-canonical owner/repo --id "$ID" --number 123
gitcrawl close-cluster owner/repo --id "$ID" --reason "consolidated under #123"
# Comment upstream via real gh.
gh issue comment 456 -R owner/repo --body "Duplicate of #123"
```
### "Prove the shim is paying off"
```bash
# Periodically log cache stats — watch local_hits climb relative to backend_misses.
gitcrawl gh xcache stats --json \
| jq '{local: .counters.local_hits, fallback: .counters.fallback_hits, github: .counters.backend_misses}'
# During release/debug sessions, compare a recent window or snapshot before reset.
gitcrawl gh xcache stats --since 1h --json
gitcrawl gh xcache snapshot --reset --json
```
## Multi-repo automation
A single `gitcrawl.db` can hold many repositories. Loop in shell:
```bash
for repo in openclaw/gitcrawl steipete/repo-a octocat/repo-b; do
gitcrawl refresh "$repo" --json | jq '{repo: "'"$repo"'", sync: .sync, embed: .embed}'
done
```
Or maintain a small script that reads a list of repos from a file and runs them on a schedule.
## Output formats
| Format | When to use |
| --- | --- |
| `text` (default) | Humans at a terminal |
| `json` (or `--json`) | Pipelines, scripts, agents |
| `log` | Internal logging output; structured key/value pairs |
Force a format globally with `--format json` or per-command with `--json`. The `log` format is mostly used internally and is subject to change.
## CI integration
Run gitcrawl in CI to validate a portable store's freshness, sanity-check cluster shapes, or produce a triage report:
```yaml
- name: Refresh and snapshot clusters
run: |
gitcrawl init --portable-store $PORTABLE_STORE_URL
gitcrawl refresh openclaw/gitcrawl --json > sync.json
gitcrawl clusters openclaw/gitcrawl --json --sort size --min-size 5 > clusters.json
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- uses: actions/upload-artifact@v4
with: { name: triage, path: "*.json" }
```
The artifact gives reviewers a structured view of what changed and how the cluster graph looks today.
## Best practices
- **Set both tokens in a single place.** Either env or `[env]` in `config.toml`. Mixing sources tends to confuse `doctor` reports.
- **Bound the staleness window.** `--sync-if-stale` on every agent-driven search is cheaper than a hot cron loop.
- **Monitor `xcache stats`.** If `backend_misses` dwarfs `local_hits`, you are not yet getting the shim's benefit — usually means agents are calling `gh` directly without going through the symlink.
- **Re-cluster after a backfill.** A large `--state all` sync should be followed by `gitcrawl refresh --no-sync` (or just `gitcrawl embed && gitcrawl cluster`) so the durable graph reflects the new content.
- **Pin the `gh` binary.** Set `GITCRAWL_GH_PATH` explicitly so the shim does not accidentally invoke itself.

160
docs/clustering.md Normal file
View File

@ -0,0 +1,160 @@
---
title: Clustering
nav_order: 9
permalink: /clustering/
---
# Clustering
{: .no_toc }
Group related issues and pull requests using vector similarity, hardened with deterministic GitHub reference evidence and cross-kind safeguards.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## How it works
Clustering builds a sparse nearest-neighbor graph over the local vector store. For each thread, gitcrawl picks the top `k` most similar threads (default 16). Edges below the cosine threshold (default 0.80) are dropped. The remaining graph is split into connected components capped at `--max-cluster-size` members.
Two safeguards keep mega-clusters from forming:
- **Title-token overlap.** A weak embedding edge needs concrete shared title tokens (4+ char alphanumeric tokens) unless its similarity is already high (≥ 0.90) or there is direct GitHub reference evidence (`#123`, `pull/123`, `issues/123`).
- **Cross-kind pruning.** Edges connecting issues to pull requests need a higher floor (`--cross-kind-threshold`, default 0.93) than issue↔issue or PR↔PR edges.
GitHub references found in titles or in the first ~240 characters of bodies generate **deterministic reference edges** with score 0.94. Body-only references later in the document are treated as weak evidence (need title-token overlap or other support). Single-digit numbers in prose are ignored as ambiguous; references must be at least two digits or use a fully qualified form.
The result is written to two tables that survive across runs:
- `durable_clusters` — stable cluster rows with stable IDs derived from the representative thread
- `durable_cluster_members` — thread-to-cluster mappings with override metadata
## Generate clusters
```bash
gitcrawl cluster owner/repo
```
The defaults match ghcrawl's tuning so the output is comparable across tools:
| Flag | Default | Description |
| --- | --- | --- |
| `--threshold <float>` | `0.80` | Minimum cosine score for an edge |
| `--cross-kind-threshold <float>` | `0.93` | Minimum cosine score for issue↔PR edges |
| `--min-size <n>` | `1` | Minimum members per emitted cluster |
| `--max-cluster-size <n>` | `40` | Hard cap on cluster size |
| `--k <n>` | `16` | Nearest-neighbor fanout per thread |
| `--limit <n>` | _(no limit)_ | Maximum vector rows to consider |
| `--model <name>` | _(config)_ | Embedding model override |
| `--basis <name>` | _(config)_ | Embedding basis override |
| `--include-closed` | _(off)_ | Include closed threads |
Every active vector-backed thread is represented in the result: singleton clusters use `kind = singleton_orphan`, multi-member clusters use `kind = duplicate_candidate`.
## List clusters
```bash
gitcrawl clusters owner/repo
gitcrawl clusters owner/repo --sort size --min-size 5
gitcrawl clusters owner/repo --sort recent
gitcrawl clusters owner/repo --hide-closed
```
| Flag | Default | Description |
| --- | --- | --- |
| `--sort recent\|oldest\|size` | `size` | Ordering |
| `--min-size <n>` | _(none)_ | Minimum active member count |
| `--limit <n>` | _(no limit)_ | Maximum cluster rows |
| `--hide-closed` | _(off)_ | Hide locally closed clusters |
| `--include-closed` | _(deprecated)_ | Closed clusters are included by default |
`gitcrawl clusters` shows the latest raw run's clusters first and merges closed durable rows in as historical context. For a strict durable-only audit view (no merging with the latest run), use:
```bash
gitcrawl durable-clusters owner/repo --include-closed
```
GitHub-closed members are hidden from latest-run cluster summaries by default; pass `--include-closed` to see the full historical view.
## Inspect a cluster
```bash
gitcrawl cluster-detail owner/repo --id 123
gitcrawl cluster-explain owner/repo --id 123 # alias
```
| Flag | Default | Description |
| --- | --- | --- |
| `--id <n>` | _(required)_ | Cluster ID |
| `--member-limit <n>` | _(no limit)_ | Maximum members to return |
| `--body-chars <n>` | `280` | Body snippet length per member |
| `--hide-closed` | _(off)_ | Hide locally closed members |
`cluster-explain` is the same command — it exists so the verb reads naturally in agent prompts ("explain why these things ended up together").
## Find similar threads (neighbors)
```bash
gitcrawl neighbors owner/repo --number 123 --limit 10
```
| Flag | Default | Description |
| --- | --- | --- |
| `--number <n>` | _(required)_ | Source issue/PR |
| `--limit <n>` | `10` | Maximum neighbors |
| `--threshold <float>` | `0.2` | Minimum cosine score |
Useful for "what else looks like this?" without committing to a cluster. The TUI's `n` shortcut and "Enter on a member" both call this path.
## Tuning recipes
### My clusters are too greedy
Symptom: unrelated bug reports merged together.
```bash
gitcrawl cluster owner/repo --threshold 0.85 --cross-kind-threshold 0.95
```
Tighter thresholds drop more weak edges. The `--cross-kind-threshold` raise specifically helps when an issue and a PR keep getting glued together because of shared boilerplate.
### My clusters are too sparse
Symptom: clear duplicates landing in separate clusters.
```bash
gitcrawl cluster owner/repo --threshold 0.75 --k 24
```
Lower threshold + higher fanout. Watch for false merges via `cluster-detail`.
### Make a single big cluster smaller
Symptom: one cluster has 40 members and is incoherent.
```bash
gitcrawl cluster owner/repo --max-cluster-size 20
```
Or slice it manually:
```bash
gitcrawl exclude-cluster-member owner/repo --id 12 --number 456 --reason "different repro"
```
See [Governance](/governance/) for the full override workflow.
## Re-clustering and stable IDs
Durable cluster IDs are derived from the representative thread, so they survive re-runs of `cluster` and `refresh`. This means:
- Local closes (`close-cluster`), exclusions, and canonical member overrides persist across re-clustering
- You can safely re-cluster after every refresh without losing maintainer state
Cluster runs are recorded in `run_records` and visible via `gitcrawl runs --kind cluster`.
## See also
- [Governance](/governance/) — close clusters, exclude members, set canonical
- [TUI](/tui/) — the interactive cluster browser
- [Concepts](/concepts/#cluster) — durable clusters and cluster kinds

139
docs/commands.md Normal file
View File

@ -0,0 +1,139 @@
---
title: Commands reference
nav_order: 15
permalink: /commands/
---
# Commands reference
{: .no_toc }
Complete CLI surface, one row per command. Use as a lookup table; deep documentation lives in the feature pages.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## Global flags
These work on every command.
| Flag | Default | Description |
| --- | --- | --- |
| `--config <path>` | `$GITCRAWL_CONFIG` or default | Override config path |
| `--format text\|json\|log` | `text` | Output format |
| `--json` | _(off)_ | Shorthand for `--format json` |
| `--no-color` | _(off)_ | Suppress ANSI color |
| `--version` | _(off)_ | Print version and exit (global only) |
| `--help` / `-h` | — | Print usage |
## Setup
| Command | Purpose | Detailed docs |
| --- | --- | --- |
| `gitcrawl init [--db --portable-store --portable-db --store-dir --json]` | Create config, database, runtime directories; optionally clone a portable store | [Installation](/installation/), [Portable stores](/portable-stores/) |
| `gitcrawl doctor [--json]` | Health check for config, database, credentials, model selection, repo/thread counts | [Configuration](/configuration/#gitcrawl-doctor) |
| `gitcrawl metadata [--json]` | Print the crawlkit command/control manifest for launchers and automation | — |
| `gitcrawl status [--json]` | Print read-only archive status, database inventory, and control state | — |
| `gitcrawl configure [--summary-model --embed-model --embedding-basis --json]` | Update model fields in `config.toml` | [Configuration](/configuration/#gitcrawl-configure) |
| `gitcrawl version` | Print version | — |
## Sync
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl sync owner/repo [--state --since --numbers <refs> --limit --include-comments --include-pr-details --with pr-details --json]` | Sync issues and PRs from GitHub into local SQLite | [Sync](/sync/) |
| `gitcrawl refresh owner/repo [--no-sync --no-embed --no-cluster ...]` | Wrapper that runs sync → embed → cluster | [Refresh and embed](/refresh-and-embed/) |
| `gitcrawl embed owner/repo [--number <ref> --limit --force --include-closed --json]` | Generate OpenAI embeddings for thread documents | [Refresh and embed](/refresh-and-embed/#embed) |
| `gitcrawl runs owner/repo [--kind sync\|embedding\|cluster --limit --json]` | List recorded run history | [Refresh and embed](/refresh-and-embed/#runs) |
## Inspect
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl threads owner/repo [--include-closed --numbers --limit --json]` | List threads from local cache | — |
| `gitcrawl search owner/repo --query <text> [--mode keyword\|semantic\|hybrid --limit --json]` | Local search (direct mode) | [Search](/search/) |
| `gitcrawl search issues\|prs <query> -R owner/repo [--state --json --limit --sync-if-stale]` | Local search (`gh search` shape) | [Search](/search/#gh-search-compatibility-mode) |
| `gitcrawl neighbors owner/repo --number <ref> [--limit --threshold --json]` | Vector-similar threads to a specific issue/PR | [Clustering](/clustering/#find-similar-threads-neighbors) |
## Thread References
Commands that accept a thread number also accept thread references:
- bare numbers: `123`
- hash references: `#123`
- path references: `issues/123`, `pull/123`
- scoped references: `owner/repo#123`
- full GitHub issue or pull request URLs
This applies to `sync --numbers`, `threads --numbers`, `embed --number`,
`neighbors --number`, all governance `--number` flags, gh-shim
`issue/pr view`, `pr checks`, `pr diff`, and TUI jump input. In gh-shim
`view`/`checks`/`diff`, a full GitHub URL also supplies `owner/repo`, so
`-R owner/repo` is optional.
## Cluster
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl cluster owner/repo [--threshold --min-size --max-cluster-size --k --cross-kind-threshold --limit --model --basis --include-closed --json]` | Build durable clusters from vectors | [Clustering](/clustering/#generate-clusters) |
| `gitcrawl clusters owner/repo [--sort size\|recent\|oldest --min-size --limit --hide-closed --json]` | Latest-run cluster summary, merged with closed durable rows | [Clustering](/clustering/#list-clusters) |
| `gitcrawl durable-clusters owner/repo [--include-closed --sort --min-size --limit --json]` | Strict durable-cluster audit view | [Clustering](/clustering/#list-clusters) |
| `gitcrawl cluster-detail owner/repo --id <n> [--member-limit --body-chars --hide-closed --json]` | Cluster + members detail | [Clustering](/clustering/#inspect-a-cluster) |
| `gitcrawl cluster-explain owner/repo --id <n> [...]` | Alias for `cluster-detail` | [Clustering](/clustering/#inspect-a-cluster) |
## Governance
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl close-thread owner/repo --number <ref> [--reason --json]` | Local close on a thread | [Governance](/governance/#local-close) |
| `gitcrawl reopen-thread owner/repo --number <ref> [--json]` | Inverse | — |
| `gitcrawl close-cluster owner/repo --id <n> [--reason --json]` | Local close on a cluster | [Governance](/governance/#local-close) |
| `gitcrawl reopen-cluster owner/repo --id <n> [--json]` | Inverse | — |
| `gitcrawl exclude-cluster-member owner/repo --id <n> --number <ref> [--reason --json]` | Pull a thread out of a cluster | [Governance](/governance/#member-exclusion) |
| `gitcrawl include-cluster-member owner/repo --id <n> --number <ref> [--reason --json]` | Inverse | — |
| `gitcrawl set-cluster-canonical owner/repo --id <n> --number <ref> [--reason --json]` | Pin canonical thread for a cluster | [Governance](/governance/#canonical-member) |
## TUI
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl tui [owner/repo] [--min-size --sort --limit --hide-closed --json]` | Interactive cluster browser; `--json` emits a snapshot instead of launching the UI | [TUI](/tui/) |
## gh shim
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl gh search issues\|prs <query> -R owner/repo [...]` | Local-first `gh search` | [gh shim](/gh-shim/) |
| `gitcrawl gh issue view <n-or-url> [-R owner/repo] --json <fields>` | Local-first thread view | [gh shim](/gh-shim/) |
| `gitcrawl gh pr view <n-or-url> [-R owner/repo] --json <fields>` | Same, for PRs (with auto-hydration) | [gh shim](/gh-shim/) |
| `gitcrawl gh issue list -R owner/repo [--state --search --author --assignee --label --json]` | Local-first list | [gh shim](/gh-shim/) |
| `gitcrawl gh pr list -R owner/repo [...]` | Same, for PRs | [gh shim](/gh-shim/) |
| `gitcrawl gh pr checks <n-or-url> [-R owner/repo] --json <fields>` | Cached PR checks (auto-hydrates if stale) | [gh shim](/gh-shim/) |
| `gitcrawl gh pr diff <n-or-url> [-R owner/repo]` | Falls through; cached by head SHA | [gh shim](/gh-shim/) |
| `gitcrawl gh run list -R owner/repo [--branch --commit --json]` | Cached workflow runs | [gh shim](/gh-shim/) |
| `gitcrawl gh run view <run-id> -R owner/repo [--json]` | Same, single run | [gh shim](/gh-shim/) |
| `gitcrawl gh repo view\|list ...` | Falls through; cached briefly | [gh shim](/gh-shim/) |
| `gitcrawl gh release list\|view ...` | Falls through; cached briefly | [gh shim](/gh-shim/#read-only-fallthroughs-cached) |
| `gitcrawl gh workflow list\|view ...` | Falls through; cached briefly | [gh shim](/gh-shim/#read-only-fallthroughs-cached) |
| `gitcrawl gh secret list ...` / `variable get\|list ...` | Falls through; cached briefly | [gh shim](/gh-shim/#read-only-fallthroughs-cached) |
| `gitcrawl gh label list ...` | Falls through; cached briefly | [gh shim](/gh-shim/) |
| `gitcrawl gh api <GET path>` | Falls through; cached briefly (GET-only REST) | [gh shim](/gh-shim/) |
| `gitcrawl gh api graphql -f query=...` | Falls through; read-only queries are cached | [gh shim](/gh-shim/#read-only-fallthroughs-cached) |
| `gitcrawl gh xcache stats [--since <duration>] \| keys \| gc \| flush \| reset \| snapshot [--reset] [--json]` | Cache inspection / housekeeping | [gh shim](/gh-shim/#cache-inspection-xcache) |
| _Anything else_ | Falls through to real `gh` | [gh shim](/gh-shim/) |
The shim binary can be installed standalone by symlinking the `gitcrawl` binary as `gh` or `gitcrawl-gh`.
## Portable stores
| Command | Purpose | Docs |
| --- | --- | --- |
| `gitcrawl portable prune [--body-chars --no-vacuum --json]` | Build a compact portable v2 backup and (optionally) `VACUUM` for publishing | [Portable stores](/portable-stores/#publishing-gitcrawl-portable-prune) |
## Not yet implemented
These appear in `SPEC.md` but currently return a "not implemented" error. They are reserved for future versions:
`summarize`, `key-summaries`, `merge-clusters`, `split-cluster`, `export-sync`, `import-sync`, `validate-sync`, `portable-size`, `sync-status`, `optimize`, `completion`
If you need any of these to land sooner, [open an issue](https://github.com/openclaw/gitcrawl/issues).

100
docs/concepts.md Normal file
View File

@ -0,0 +1,100 @@
---
title: Concepts
nav_order: 4
permalink: /concepts/
---
# Concepts
{: .no_toc }
The handful of nouns gitcrawl uses, and how they connect.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## Repository mirror
A **repository** is the `owner/repo` you sync. Every gitcrawl command takes one, and most state in SQLite is keyed by it. You can mirror as many repos as you like into a single `gitcrawl.db`; commands always scope to the one you name.
The mirror is metadata-first: titles, bodies, authors, labels, state, timestamps, and IDs land in SQLite immediately. Comments, reviews, review comments, and full PR detail (files, commits, checks, workflow runs) are opt-in on a per-sync basis (see [Sync](/sync/)).
## Thread
A **thread** is a single GitHub issue or pull request, with its body and metadata. The CLI exposes threads via `gitcrawl threads` and via the `gh` shim's `gh issue/pr view` and `gh issue/pr list` paths.
Threads have two state dimensions:
- **GitHub state**`open` or `closed` upstream.
- **Local close** — a maintainer-only override stored locally. `gitcrawl close-thread` and `reopen-thread` flip this without touching GitHub. Local closes drive the `--hide-closed` and `--include-closed` filters across `clusters`, `cluster-detail`, the TUI, and search.
Local close is for triage workflow: "I have handled this duplicate locally, I do not need it shown next time." It does not write back to GitHub.
## Document
A **document** is the canonical text gitcrawl indexes for a thread — title plus body, with comments folded in when present. Documents back the FTS index used by `gitcrawl search` and feed the embedding pipeline.
Most users never interact with documents directly; they show up in JSON output as a `document` field on neighbors and search hits.
## Embedding
An **embedding** is a vector representation of a thread's document, produced by an OpenAI model (default `text-embedding-3-small`, 1024 dimensions). Vectors live in `~/.config/gitcrawl/vectors` and are referenced from the `thread_vectors` table.
The **embedding basis** controls what text gets embedded. The default `title_original` uses title plus an excerpt of the original body. This is configurable via `gitcrawl configure --embedding-basis ...` but only `title_original` is currently implemented.
`gitcrawl embed` is the explicit command that fills the vector table. `gitcrawl refresh` runs it automatically as part of its sync → embed → cluster pipeline.
When the embedding input rune cap or model changes, vectors are rebuilt to avoid stale comparisons.
## Cluster
A **cluster** is a group of related threads inferred from vector similarity, with deterministic GitHub reference evidence (`#123`, `pull/123`, `issues/123`) folded in to harden weak edges.
Clustering is run by `gitcrawl cluster` (or as part of `gitcrawl refresh`). Defaults are tuned to ghcrawl's profile: `--threshold 0.80`, `--min-size 1`, `--max-cluster-size 40`, `--k 16` nearest-neighbor fanout, `--cross-kind-threshold 0.93` for issue↔PR edges.
Two safeguards keep mega-clusters from forming:
- **Title-token overlap.** A weak embedding edge needs concrete shared title tokens unless its similarity is already high or there is direct GitHub reference evidence.
- **Cross-kind pruning.** Issue↔PR edges need a higher similarity floor (`--cross-kind-threshold`) than issue↔issue or PR↔PR.
### Cluster kinds
Every cluster ships with a kind that explains its shape:
- `singleton_orphan` — one member, no neighbors above threshold. Useful for surfacing isolated reports.
- `duplicate_candidate` — multiple members above the merge threshold. The default duplicate triage row.
### Durable clusters
A **durable cluster** is a stable, long-lived row in `durable_clusters` with a stable ID derived from its representative thread. Durable cluster IDs survive re-runs of `cluster` and `refresh`, so the local close, exclusion, and canonical-member overrides you apply persist across re-clustering.
`gitcrawl clusters` and `gitcrawl tui` show the latest raw run's clusters first, with closed durable rows merged in as historical context. Use `gitcrawl durable-clusters` for an audit view that stays on the durable rows.
### Cluster overrides (governance)
Per-cluster maintainer overrides let you correct what the algorithm produced without re-tuning thresholds:
- **Local close** (`close-cluster`/`reopen-cluster`) — hides a duplicate-candidate from active triage.
- **Member exclusion** (`exclude-cluster-member`/`include-cluster-member`) — pulls a specific thread out of a cluster and remembers why.
- **Canonical member** (`set-cluster-canonical`) — pins which thread represents the cluster.
See [Governance](/governance/) for the full workflow.
## Run
Every sync, embed, and cluster operation records a **run** in `run_records` with start/finish timestamps, status, and stage-specific stats. `gitcrawl runs --kind sync|embedding|cluster` lists them, useful for debugging or auditing.
## Portable store
A **portable store** is a Git-backed publish target for a `gitcrawl.db` plus its derived bodies, designed for sharing a local cache across agents or machines without a hosted service.
`gitcrawl init --portable-store https://github.com/org/repo` clones a portable store into `~/.config/gitcrawl/portable/`, points the runtime at it, and `gitcrawl portable prune --body-chars 256` keeps the published payload small while retaining comments, PR details, checks, and workflow runs. Read-only commands run against portable stores refresh the checkout before reading. See [Portable stores](/portable-stores/).
## Cache
The `cache/` directory under `~/.config/gitcrawl/` holds:
- `cache/gh-shim/` — the short-lived fallthrough cache for the `gh` shim, keyed by config path, CWD, `GH_HOST`, `GH_REPO`, and command args. Inspect or clean it with `gitcrawl gh xcache stats|keys|gc|flush`.
- `cache/pr/` — hydrated PR detail blobs used to answer `gh pr view`, `gh pr checks`, and `gh run` reads from local SQLite.
See [gh shim](/gh-shim/) for the cache key composition and TTL behavior.

146
docs/configuration.md Normal file
View File

@ -0,0 +1,146 @@
---
title: Configuration
nav_order: 5
permalink: /configuration/
---
# Configuration
{: .no_toc }
Where gitcrawl reads settings from, and how to override them.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## Resolution order
For each setting, gitcrawl looks in this order and uses the first match:
1. CLI flag (e.g., `--config`, `--summary-model`)
2. Environment variable (`GITCRAWL_*`, then standard `GITHUB_TOKEN` / `OPENAI_API_KEY`)
3. `[env]` table inside `config.toml`
4. Top-level config field inside `config.toml`
5. Built-in default
## Default paths
| Path | Purpose |
| --- | --- |
| `~/.config/gitcrawl/config.toml` | Configuration file |
| `~/.config/gitcrawl/gitcrawl.db` | SQLite database |
| `~/.config/gitcrawl/cache/` | Caches (PR detail, gh shim fallthrough) |
| `~/.config/gitcrawl/cache/gh-shim/` | gh shim fallthrough cache |
| `~/.config/gitcrawl/vectors/` | Vector store backing embeddings |
| `~/.config/gitcrawl/logs/` | Operational logs |
Override the config root by setting `GITCRAWL_CONFIG=/path/to/config.toml` or by passing `--config` to any command.
## `config.toml`
`gitcrawl init` writes a minimal config. You can edit it by hand or with `gitcrawl configure`:
```toml
summary_model = "gpt-5.4"
embed_model = "text-embedding-3-small"
embed_dimensions = 1024
embedding_basis = "title_original"
[env]
GITHUB_TOKEN = "<github-token>"
OPENAI_API_KEY = "<openai-api-key>"
[portable_store]
url = "https://github.com/org/portable-store.git"
db_path = "data/openclaw__openclaw.sync.db"
checkout_dir = "/Users/me/.config/gitcrawl/portable"
```
### Notable fields
| Field | Default | Notes |
| --- | --- | --- |
| `summary_model` | `gpt-5.4` | Reserved for future summary commands |
| `embed_model` | `text-embedding-3-small` | OpenAI embedding model |
| `embed_dimensions` | `1024` | Must match the model |
| `embedding_basis` | `title_original` | Only `title_original` is implemented |
| `[env]` | _(empty)_ | Sets process env at startup; useful for tokens you do not want in your shell rc |
| `[portable_store]` | _(empty)_ | Used when working from a shared, Git-backed cache |
## Environment variables
### Core
| Variable | Purpose |
| --- | --- |
| `GITCRAWL_CONFIG` | Override config path |
| `GITCRAWL_DB_PATH` | Override database path |
| `GITHUB_TOKEN` | GitHub API token (required for `sync`, `gh` shim fallthroughs) |
| `OPENAI_API_KEY` | OpenAI API key (required for `embed`) |
### Model overrides
| Variable | Purpose |
| --- | --- |
| `GITCRAWL_SUMMARY_MODEL` | Override summary model |
| `GITCRAWL_EMBED_MODEL` | Override embedding model |
| `GITCRAWL_OPENAI_RETRY_DISABLED` | Set to `1` to disable OpenAI retry/backoff |
| `GITCRAWL_OPENAI_BASE_URL` / `OPENAI_BASE_URL` | Custom OpenAI endpoint (e.g., for a proxy) |
### GitHub overrides
| Variable | Purpose |
| --- | --- |
| `GITCRAWL_GITHUB_BASE_URL` / `GITHUB_BASE_URL` | Custom GitHub API endpoint (used by `sync` and the `gh` shim) |
| `GH_HOST` | GitHub host; included in the `gh` shim cache key |
| `GH_REPO` | Default repo when `-R` is omitted; included in the `gh` shim cache key |
### gh shim
| Variable | Purpose |
| --- | --- |
| `GITCRAWL_GH_PATH` | Path to the real `gh` binary used for fallthrough |
| `GITCRAWL_GH_AUTO_HYDRATE` | Set to `0` to disable PR auto-hydration on cache miss |
| `GITCRAWL_GH_CACHE_TTL` | Override fallthrough cache TTL (e.g., `5m`, `1h`) |
| `GITCRAWL_GH_STALE_GRACE` | Override stale-while-revalidate grace for expired successful fallthrough entries |
| `GITCRAWL_GH_CACHE_ERRORS` | Set to `0` to avoid caching non-zero read-only fallthroughs |
If `GITCRAWL_GH_PATH` is unset, the shim probes common Homebrew install paths and then your `PATH`. Set it explicitly when you symlink the gitcrawl binary as `gh` (otherwise the shim will recurse into itself).
## Global flags
These flags work on every command:
| Flag | Default | Description |
| --- | --- | --- |
| `--config <path>` | `$GITCRAWL_CONFIG` or default | Override config path for this invocation |
| `--format text\|json\|log` | `text` | Output format |
| `--json` | _(off)_ | Shorthand for `--format json` |
| `--no-color` | _(off)_ | Suppress ANSI color codes |
| `--version` | _(off)_ | Print version and exit (global only) |
`--json` overrides `--format`. Both are honored on subcommands that produce output.
## `gitcrawl configure`
Interactive-friendly config edits without opening the file:
```bash
gitcrawl configure --summary-model gpt-5.4
gitcrawl configure --embed-model text-embedding-3-small
gitcrawl configure --embedding-basis title_original
gitcrawl configure --json
```
Returns the resolved config path, the values that were updated, and the now-current model selection. See `gitcrawl configure --help`.
## `gitcrawl doctor`
A health check for everything covered above:
```bash
gitcrawl doctor # human-readable
gitcrawl doctor --json # for scripts
```
Reports config path and existence, database path, whether `GITHUB_TOKEN` and `OPENAI_API_KEY` are present (and whether they came from env vs. config), the active summary/embed models, the embedding basis, and counts of repositories, threads, open threads, clusters, plus the last sync timestamp. If the API call surface is unsupported (older Go, missing crypto), `api_supported: false` is reported so you can investigate.

208
docs/gh-shim.md Normal file
View File

@ -0,0 +1,208 @@
---
title: gh shim
nav_order: 12
permalink: /gh-shim/
---
# gh shim
{: .no_toc }
A `gh`-compatible binary that answers from local SQLite first and falls through to the real `gh` for everything else. The fastest way to cut GitHub API load across an agent fleet.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## What it is
The same `gitcrawl` binary serves a `gh`-compatible mode. Invoked as `gitcrawl gh ...`, or as `gh` / `gitcrawl-gh` via symlink, it intercepts read-only commands and serves them from the local mirror. Anything it cannot serve locally falls through to the real `gh` binary you already have installed, with a short persistent cache layered on top.
The shim never adds GitHub write behavior. Mutating commands (`gh issue close`, `gh pr merge`, `gh api -X POST ...`, `gh label create`, etc.) pass straight through to the real `gh`, increment a write counter, and clear the relevant cache entries on success.
## Install
```bash
# Side-by-side: agents opt in by calling `gitcrawl-gh`.
mkdir -p "$HOME/bin"
ln -sf "$(command -v gitcrawl)" "$HOME/bin/gitcrawl-gh"
# Or replace the global `gh` so every caller picks up the cache automatically.
REAL_GH="$(command -v gh)" # capture this before shadowing gh
ln -sf "$(command -v gitcrawl)" "$HOME/bin/gh"
export GITCRAWL_GH_PATH="$REAL_GH" # tell the shim where the real gh is
```
Make sure `~/bin` is on `PATH` before the original `gh` location if you want the shim to be picked up as `gh`. If `GITCRAWL_GH_PATH` is unset, the shim probes common Homebrew paths and then `PATH`. Set it explicitly when you replace the global `gh` so the shim does not recurse into itself.
## Supported local reads
### `gh search issues|prs`
```bash
gh search issues "download stalls" -R owner/repo --state open \
--match comments --json number,title,url
gh search prs "manifest cache" -R owner/repo --state open \
--json number,title,url --limit 20
```
Answered from the local FTS index. Honors `--state`, `--json`, `--limit`. `--match` is accepted for parity (the local index already covers documents). Falls through if an unsupported filter combination is requested.
### `gh issue view` / `gh pr view`
```bash
gh issue view 123 -R owner/repo --json number,title,state,url,body,labels,author
gh pr view 123 -R owner/repo --json number,title,state,url,isDraft,author,headRef,baseRef
gh issue view https://github.com/owner/repo/issues/123 --json number,title,url
gh pr view https://github.com/owner/repo/pull/123 --json number,title,url
```
Full GitHub issue/PR URLs provide both the repository and thread number when
`-R`/`--repo` is omitted.
Supported JSON fields include `number`, `title`, `state`, `url`, `body`, `author`, `createdAt`, `updatedAt`, `closedAt`, `labels`, plus PR-specific `isDraft`, `headRef`, `baseRef`. PR detail fields (`files`, `commits`, `checks`, `statusCheckRollup`) are answered from cached PR detail and trigger [auto-hydration](#auto-hydration) on miss.
### `gh issue list` / `gh pr list`
```bash
gh issue list -R owner/repo --state open --search "hot loop" \
--author octocat --label bug --label triage --json number,title,url
gh pr list -R owner/repo --state open --search "manifest cache" \
--assignee me --json number,title,url
```
Supports `--state`, `--search` (keyword search), `--author`, `--assignee`, repeated `--label`, `--limit`, and `--json`. Falls through for unsupported filters.
### `gh pr checks`
```bash
gh pr checks 123 -R owner/repo --json name,state,conclusion,detailsUrl
gh pr checks https://github.com/owner/repo/pull/123 --json name,state,conclusion
```
Returns the cached check/status summary for the PR. If the cached PR detail is older than 90 seconds or its head SHA is stale, [auto-hydration](#auto-hydration) refreshes it before answering. Supported fields: `name`, `state`, `status`, `conclusion`, `detailsUrl`, `workflow`, `startedAt`, `completedAt`.
Like `gh pr view`, a full pull request URL can supply both repository and
number.
### `gh run list` / `gh run view`
```bash
gh run list -R owner/repo --branch main --limit 20 \
--json databaseId,workflowName,status,conclusion
gh run view 123456789 -R owner/repo --json status,conclusion,headSha
```
Workflow runs come from cached PR detail. Filters: `--branch`, `--commit` (head SHA). Supported fields: `databaseId`, `id`, `number`, `workflowName`, `name`, `displayTitle`, `status`, `conclusion`, `url`, `event`, `headBranch`, `headSha`, `createdAt`, `updatedAt`.
## Read-only fallthroughs (cached)
These commands always run real `gh` but the response body is cached for the next caller in the same workspace:
- `gh pr diff <number-or-url>` — keyed by the cached PR head SHA when available, so the cache is stable across many sequential agent reads; full PR URLs can omit `-R`
- `gh issue list/status/view`, `gh pr list/status/view/checks`, and unsupported read-only local shim shapes
- `gh release list/view`, `gh workflow list/view`, `gh secret list`, and `gh variable get/list`
- `gh project list/view/field-list/item-list`, `gh ruleset check/list/view`, `gh gist list/view`, and `gh org list`
- `gh repo view` / `gh repo list`
- `gh search code/commits/issues/prs/repos`
- `gh label list`
- `gh api <GET path>` — only `GET` requests for REST; never cached for `POST`/`PATCH`/`DELETE`/`PUT`.
- `gh api graphql` — cached only when the `query` field is a read-only query. Mutations, file-backed query fields, and `--input` calls pass through uncached.
Common Actions REST reads such as run status, job lists, and logs get Actions-aware TTLs.
Default cache TTLs are command-aware: active `gh run list` and run-status reads use `30s`; completed run views, completed Actions job lists, and run/job logs are kept for `12h`; completed run lists are kept for `30m`; workflow reads use `15m`; search reads use `15m`; release metadata uses `1h`; GitHub user profile reads use `7d`; read-only GraphQL queries use `6h`; GitHub Pages metadata uses `15m` to `30m`; tagged/SHA `contents` API reads use `7d`; `gh pr diff` uses `5m` without a stable SHA and `7d` with one. Most other read-only fallthroughs use `5m` to `10m`. Override with `GITCRAWL_GH_CACHE_TTL=5m` or similar.
Repeat read failures are cached by default too. That avoids a fleet of agents all rediscovering the same missing release, workflow, secret, or unsupported field. Error entries are capped to shorter lifetimes, and rate-limit errors are capped at `2m` so a reset is not masked all day. If GitHub returns a rate-limit error while refreshing an expired successful entry, the shim serves that stale success with a warning instead of failing the read. When another process is already refreshing an expired successful entry, peers can serve that stale entry within a short command-aware grace window instead of joining the backend stampede. Set `GITCRAWL_GH_STALE_GRACE=0` to disable stale-while-revalidate, or `GITCRAWL_GH_CACHE_ERRORS=0` to cache successful reads only.
## Auto-hydration
When a local issue or PR read misses the cache, the shim can auto-hydrate exactly one thread before falling back:
1. Shim detects a missing issue/PR row or stale PR detail (older than 90s, or head SHA mismatch)
2. If `GITCRAWL_GH_AUTO_HYDRATE != 0` (the default), runs `gitcrawl sync --numbers <n>` and adds `--with pr-details` for PR detail reads
3. Retries the local query against the freshly populated cache
4. Falls through to the real `gh` if hydration failed
This keeps `gh issue view`, `gh pr view`, `gh pr checks`, and `gh run` reads cheap and fresh without manual sync orchestration. Disable with `GITCRAWL_GH_AUTO_HYDRATE=0` if you want the shim to be strictly cache-or-fallthrough.
When the configured database comes from a portable store, auto-hydration writes to the local runtime mirror, not the Git checkout. Broad empty open-issue discovery is also guarded: if `gh issue list` or empty-query `gh search issues --state open` would return no rows but the repo only has targeted sync history, the shim falls through to the real `gh` instead of treating that incomplete local snapshot as authoritative.
## Cache inspection: `xcache`
```bash
gitcrawl gh xcache stats # summary
gitcrawl gh xcache keys # per-entry detail
gitcrawl gh xcache gc # remove expired entries + stale lock files
gitcrawl gh xcache flush # clear everything
gitcrawl gh xcache reset # reset counters without deleting entries
gitcrawl gh xcache snapshot # write a counter snapshot for later comparison
```
All accept `--json` for scripting. `stats` accepts `--since 1h` for recent-window counters. `snapshot` accepts `--reset` to checkpoint counters before a noisy release/debugging session.
`stats` JSON:
```json
{
"cache_dir": "/Users/me/.config/gitcrawl/cache/gh-shim",
"entries": 142,
"expired": 6,
"locks": 0,
"bytes": 1841234,
"cache_hits": 629,
"total_reads": 641,
"hit_rate_percent": 98.1,
"counters": {
"local_hits": 540,
"fallback_hits": 88,
"stale_hits": 1,
"backend_misses": 12,
"pass_through_writes": 4,
"backend_misses_by_command": {
"run view": 7,
"api": 5
},
"backend_misses_by_route": {
"api repos/:owner/:repo/actions/runs/:id/logs": 3
},
"backend_misses_by_key": {
"api repos/openclaw/gitcrawl/actions/runs/123/logs -i": 2
}
},
"commands": {
"pr diff": { "entries": 30, "bytes": 184320 },
"release view": { "entries": 14, "bytes": 18230 }
}
}
```
`local_hits` are answered from SQLite; `fallback_hits` are answered from the fallthrough cache; `stale_hits` are expired successful cache entries served after a backend rate-limit response or while another process refreshes the key; `backend_misses` actually hit GitHub. The per-command, per-route, and per-key miss maps show which shapes still escape the cache, which is usually the fastest way to find the next optimization.
## Cache key composition
Cache keys are deterministic SHA-256 hashes of:
- A version tag (`v4`)
- The resolved gitcrawl config path
- The current working directory when the command depends on implicit repo resolution
- The `GH_HOST` env var
- The `GH_REPO` env var when the command relies on it for implicit repo resolution
- An explicit-scope marker for commands that include their own API path or repository
- For `gh pr diff`: the stable identity `pr-diff:owner/repo:number:head-sha` (when available)
- A canonicalized command argument vector, null-separated. Common equivalent forms such as `-R` vs. `--repo`, flag ordering, and `--json a,b` vs. `--json b,a` share the same cache key.
This isolates implicit repo reads in sibling checkouts while still coalescing explicit reads such as `gh api users/octocat`, `gh api repos/openclaw/openclaw/...`, and `gh repo view openclaw/gitcrawl` across those checkouts. Explicit reads ignore unrelated `GH_REPO` values so agents with different ambient repo settings still share cache entries when the command itself names the target. Concurrent cache misses use a lock file so one process populates the entry while peers wait for the result, instead of all of them firing at GitHub.
## What does not flow through the shim
- **Mutating commands**`gh issue close`, `gh pr merge`, `gh pr comment`, `gh api -X POST`, etc. These pass straight through, increment `pass_through_writes`, and invalidate matching cache tags on success. Unknown mutation scope falls back to clearing all entries.
- **Auth flows**`gh auth login`, `gh auth refresh`, etc. Always real `gh`.
- **Anything the shim does not recognize** — falls through unmodified.
## Agent integration
Pattern: replace `gh` with `gitcrawl-gh` (or symlink to `gh`) for every agent in the fleet, then keep your existing prompts and tools. Most read-only triage flows ("look up this issue", "check the PR status", "list open issues for this label") become local-only without any prompt changes.
For best results, schedule a periodic `gitcrawl refresh owner/repo` (every few minutes per repo, depending on activity) so the local mirror stays warm. The shim's `--sync-if-stale` (via `gitcrawl search`) and auto-hydration handle the rest.
See [Automation](/automation/) for full agent recipes and JSON contracts.

138
docs/governance.md Normal file
View File

@ -0,0 +1,138 @@
---
title: Governance
nav_order: 10
permalink: /governance/
---
# Governance
{: .no_toc }
Maintainer overrides on top of the cluster algorithm. All changes are local; gitcrawl never writes back to GitHub.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## Why governance exists
The cluster algorithm is good but not perfect. Sometimes it misses an obvious duplicate, or glues two unrelated reports together, or picks a poor representative thread. Governance commands let you correct the result without re-tuning thresholds or re-running embeddings.
Every override is recorded with a reason and persists across `cluster`/`refresh` runs because durable cluster IDs are stable. The TUI exposes the same actions via right-click and the `a` action menu.
## Local close
Mark a thread or a cluster as "handled locally — do not show me this again."
```bash
gitcrawl close-thread owner/repo --number 123 --reason "duplicate handled"
gitcrawl close-thread owner/repo --number https://github.com/owner/repo/issues/123 --reason "duplicate handled"
gitcrawl reopen-thread owner/repo --number 123
gitcrawl close-cluster owner/repo --id 42 --reason "all members handled"
gitcrawl reopen-cluster owner/repo --id 42
```
The reason defaults to `CLI manual close` and is stored alongside the override for audit. Locally closed threads and clusters are filtered out by `--hide-closed` across `clusters`, `cluster-detail`, the TUI, and search.
This **does not** change anything on GitHub. It is purely a local triage signal — useful when you have already commented "duplicate of #X" on the upstream issue and want to clear it from your maintainer view.
JSON output:
```json
{ "repository": "owner/repo", "number": 123, "reason": "duplicate handled", "closed": true }
```
## Member exclusion
Pull a single thread out of a cluster, or pull it back in.
```bash
gitcrawl exclude-cluster-member owner/repo --id 42 --number 456 --reason "different repro"
gitcrawl exclude-cluster-member owner/repo --id 42 --number owner/repo#456 --reason "different repro"
gitcrawl include-cluster-member owner/repo --id 42 --number 456
```
Use this when the algorithm is mostly right but caught one false positive. The override travels with the cluster's stable ID, so re-clustering does not undo your decision.
JSON output:
```json
{
"repository": "owner/repo",
"override": { "cluster_id": 42, "thread_number": 456, "kind": "exclude", "reason": "different repro", "created_at": "..." },
"excluded": true
}
```
## Canonical member
Pin which thread represents the cluster — this is what shows up as the row title in `clusters` and the TUI summary.
```bash
gitcrawl set-cluster-canonical owner/repo --id 42 --number 123 --reason "main tracking issue"
```
The chosen `--number` must already be a member of the cluster. The TUI's right-click menu has a "set canonical" entry that calls this command.
All governance `--number` flags accept the same thread-reference forms as sync:
bare numbers, `#123`, `issues/123`, `pull/123`, `owner/repo#123`, and full
GitHub issue or pull request URLs. The command still applies only to the
`owner/repo` argument you pass to gitcrawl; URL input is accepted so copied
GitHub links can be pasted directly.
## Reopen and undo
There is no separate `undo`. The inverse commands are explicit:
| Action | Inverse |
| --- | --- |
| `close-thread` | `reopen-thread` |
| `close-cluster` | `reopen-cluster` |
| `exclude-cluster-member` | `include-cluster-member` |
| `set-cluster-canonical` | `set-cluster-canonical --number <other>` |
Each call records a fresh override row, so the audit history is preserved.
## Reading overrides
`gitcrawl cluster-detail` returns active overrides as part of the JSON payload, and `gitcrawl runs --kind cluster` lists when each clustering run was performed. To inspect raw override history you can query SQLite directly:
```bash
sqlite3 ~/.config/gitcrawl/gitcrawl.db \
"SELECT cluster_id, thread_number, kind, reason, created_at
FROM cluster_member_overrides ORDER BY created_at DESC LIMIT 20;"
```
(The schema is internal and may change between versions — prefer the JSON outputs from the CLI for stable contracts.)
## Workflow patterns
### "Triage this cluster, then move on"
```bash
gitcrawl cluster-detail owner/repo --id 42 --body-chars 600 | less
# ...read, decide canonical, add labels via gh, comment via gh...
gitcrawl set-cluster-canonical owner/repo --id 42 --number 123
gitcrawl close-cluster owner/repo --id 42 --reason "consolidated under #123"
```
### "This thread doesn't belong here"
```bash
gitcrawl exclude-cluster-member owner/repo --id 42 --number 456 --reason "different repro"
gitcrawl neighbors owner/repo --number 456 --limit 10 # find a better home manually
```
### "I'm done with this issue locally even though upstream is still open"
```bash
gitcrawl close-thread owner/repo --number 789 --reason "answered in chat"
```
The thread stays open on GitHub; only your local triage view hides it.
## What governance does *not* do
- It does not edit, label, comment on, or close GitHub issues. Use `gh` for that.
- It does not retrain embeddings or reshape the underlying graph — it overlays decisions on top of the algorithm output.
- It does not propagate to other gitcrawl installations unless you publish your database via a [portable store](/portable-stores/).

69
docs/index.md Normal file
View File

@ -0,0 +1,69 @@
---
title: Home
layout: home
nav_order: 1
description: "gitcrawl is a local-first GitHub issue and pull request crawler for maintainer triage."
permalink: /
---
# gitcrawl
{: .fs-9 }
A local-first GitHub triage tool **and** a drop-in caching `gh` shim. Sync issues and PRs into SQLite for search and clustering — then let agents call `gh` against that same cache so you stop burning the API rate limit.
{: .fs-6 .fw-300 }
[Quickstart](/quickstart/){: .btn .btn-primary .fs-5 .mb-4 .mb-md-0 .mr-2 }
[View on GitHub](https://github.com/openclaw/gitcrawl){: .btn .fs-5 .mb-4 .mb-md-0 }
---
## Two jobs, one binary
`gitcrawl` mirrors a GitHub repository's issues and pull requests into local SQLite, then layers semantic clustering, full-text search, and a `gh`-compatible shim on top — so a maintainer (or an agent acting on their behalf) can triage threads *and* serve everyday `gh` reads without burning live API quota.
- **Local SQLite first.** All issues, PRs, comments, reviews, files, commits, checks, and workflow runs land in `~/.config/gitcrawl/gitcrawl.db`. Queries hit the disk, not GitHub.
- **Drop-in `gh` cache.** Symlink `gitcrawl-gh` as `gh` and most read-only calls (`gh search`, `gh issue/pr view`, `gh pr checks`, `gh run`, REST GETs, GraphQL queries) answer from local SQLite. Agents stop hitting rate limits; mutating commands pass through unchanged. Run `gh xcache stats` to see hit rate, per-command misses, and evictions.
- **Semantic clustering.** OpenAI embeddings group related reports, with deterministic GitHub reference evidence (`#123`, `pull/123`) preventing weak similarity bridges from forming mega-clusters.
- **Terminal UI.** `gitcrawl tui` is a keyboard- and mouse-driven cluster browser with bidirectional sort, jump-to-number, neighbors, and member-level governance actions.
- **Agent-friendly JSON.** Every command supports `--json` for clean automation surfaces.
---
## Pick your path
<div class="code-example" markdown="1">
### I want to try it
[Quickstart](/quickstart/) walks you from `git clone` to a populated cluster view in five minutes.
### I want to wire up an agent
The [`gh` shim](/gh-shim/) is the fastest way to cut GitHub API load — point your agent at `gitcrawl-gh`, keep the agent's `gh` calls intact.
### I want to triage a busy repo
Read [Sync](/sync/) to bring data local, then [Clustering](/clustering/) and the [TUI](/tui/) for the maintainer workflow.
### I want the full reference
[Commands](/commands/) lists every flag and JSON field. [Configuration](/configuration/) covers env vars and paths.
</div>
---
## Project status
Early bootstrap. The implementation is being built in small commits — see the [changelog](https://github.com/openclaw/gitcrawl/blob/main/CHANGELOG.md) for what shipped recently.
The product contract in [SPEC.md](https://github.com/openclaw/gitcrawl/blob/main/SPEC.md) is the source of truth for what is in and out of scope.
## Out of scope
- Local HTTP API
- Hosted service runtime
- Browser web UI
- GitHub write-back actions (use `gh` for those)
---
## License
Released under the [MIT license](https://github.com/openclaw/gitcrawl/blob/main/LICENSE).

93
docs/installation.md Normal file
View File

@ -0,0 +1,93 @@
---
title: Installation
nav_order: 2
permalink: /installation/
---
# Installation
{: .no_toc }
1. TOC
{:toc}
## Requirements
- **Go 1.26+** if building from source
- **Git** for cloning the repository (and for portable stores)
- **A GitHub token** for any command that talks to GitHub (`sync`, `refresh`, `gh` shim fallthroughs)
- **An OpenAI API key** only for `embed`, `refresh` (embed stage), and any future summary commands
- **`gh` CLI** if you want the shim to fall through to the real GitHub CLI for unsupported commands
gitcrawl runs on macOS and Linux. Windows is not actively tested.
## Install from Homebrew
```bash
brew install openclaw/tap/gitcrawl
```
Homebrew installs the `gitcrawl` binary. If you also want the GitHub CLI shim behavior, add a `gh` or `gitcrawl-gh` symlink as shown below.
## Install from a GitHub release
Each tagged release publishes archives for `darwin_amd64`, `darwin_arm64`, `linux_amd64`, and `linux_arm64` via [GoReleaser](https://github.com/openclaw/gitcrawl/blob/main/.goreleaser.yaml).
```bash
# Replace VERSION and PLATFORM with the values you want.
VERSION=v0.1.2
PLATFORM=darwin_arm64
mkdir -p "$HOME/bin"
curl -L "https://github.com/openclaw/gitcrawl/releases/download/${VERSION}/gitcrawl_${VERSION#v}_${PLATFORM}.tar.gz" \
| tar -xz -C "$HOME/bin" gitcrawl
gitcrawl --version
```
Browse the [releases page](https://github.com/openclaw/gitcrawl/releases) for the latest tag and the full asset list. Use a directory that is already on your `PATH`; `~/bin` and `~/.local/bin` avoid needing elevated permissions.
## Install from source
```bash
git clone https://github.com/openclaw/gitcrawl.git
cd gitcrawl
go build \
-ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(git describe --tags --always --dirty)" \
-o bin/gitcrawl ./cmd/gitcrawl
./bin/gitcrawl --version
```
Symlink or copy `bin/gitcrawl` somewhere on your `PATH` (`~/bin`, `/usr/local/bin`, `~/.local/bin`).
## Install the `gh` shim
The shim is the same binary. Symlink it as `gh` (replacing the real CLI) or as `gitcrawl-gh` (running side by side):
```bash
# Side-by-side install — agents can opt in by calling `gitcrawl-gh`.
mkdir -p "$HOME/bin"
ln -sf "$(command -v gitcrawl)" "$HOME/bin/gitcrawl-gh"
# Or replace the global `gh` so every agent picks up the cache automatically.
REAL_GH="$(command -v gh)" # capture this before shadowing gh
ln -sf "$(command -v gitcrawl)" "$HOME/bin/gh"
export GITCRAWL_GH_PATH="$REAL_GH" # point shim at the real gh
```
When invoked as `gh` or `gitcrawl-gh`, the binary auto-detects shim mode. See [the gh shim guide](/gh-shim/) for details.
## Verify the install
```bash
gitcrawl init # creates ~/.config/gitcrawl/{config.toml,gitcrawl.db,...}
gitcrawl doctor # confirms config, database, and credential discovery
gitcrawl doctor --json # same, machine-readable
```
`doctor` reports whether `GITHUB_TOKEN` and `OPENAI_API_KEY` are present, where they came from, the version, repository count, and the last sync timestamp. If anything is missing, the message tells you which env var or config field to set.
## Updating
- **Release archives:** download the new tarball and replace the binary.
- **Source builds:** `git pull && go build ...` — the version string comes from `git describe`.
- **Configuration is forward-compatible.** Existing `config.toml` and `gitcrawl.db` files are reused across versions; no migration step is needed for normal point releases.

125
docs/portable-stores.md Normal file
View File

@ -0,0 +1,125 @@
---
title: Portable stores
nav_order: 13
permalink: /portable-stores/
---
# Portable stores
{: .no_toc }
A Git-backed publish target for a `gitcrawl.db` plus its derived bodies — share a local cache across agents and machines without running a hosted service.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## When to use one
- You want every agent on a team to read from a shared, recently synced cache without each agent making its own GitHub calls.
- You want a backup of the SQLite cache that someone else can clone and use immediately.
- You want a deterministic snapshot of "what gitcrawl knew at time T" for reproducible triage.
A portable store is just a Git repository whose contents include a SQLite database. Anyone with read access to the repository can `git clone` it and have a fully populated gitcrawl mirror in seconds.
## Setup: pointing gitcrawl at a portable store
```bash
gitcrawl init \
--portable-store https://github.com/openclaw/gitcrawl-store.git \
--portable-db data/openclaw__openclaw.sync.db \
--store-dir ~/.config/gitcrawl/portable
```
`init` will:
1. Clone the portable store to `--store-dir`
2. Wire `~/.config/gitcrawl/config.toml` to use the database at `--portable-db` inside that checkout
3. Create the runtime cache, vector, and log directories in the standard locations
JSON output reports `portable_store_url`, `portable_store_dir`, and `portable_store: cloned|pulled|reset-pulled` so automation can tell what happened.
## How read-only commands behave
Read-only commands (`search`, `threads`, `clusters`, `cluster-detail`, `neighbors`, the TUI) refresh the portable-store checkout before reading, so they always see the latest published data:
- The refresh is best-effort and non-interactive
- SSH attempts are bounded so an offline remote does not hang the CLI
- Stale SQLite sidecars (WAL, SHM) are cleared after the pull so queries see freshly pulled data
- Local Git pull configuration that tries to rebase onto multiple branch merge refs is handled cleanly
If the remote is unreachable, the read still answers from the local checkout.
## How write commands behave
Write commands (`embed`, `refresh`, `cluster`, neighbor generation) need to persist new data without mutating the published portable store. They open a **writable runtime mirror** alongside the portable checkout so vectors and overrides land in the runtime cache while the portable database remains read-only.
This separation means:
- You can `gitcrawl embed` against a portable store without dirtying the Git checkout
- gh-shim exact-thread auto-hydration writes into the same runtime mirror
- Local cluster overrides (`close-cluster`, exclusions, canonicals) live in the runtime mirror
- Only the publishing workflow writes back into the portable checkout
## Publishing: `gitcrawl portable prune`
```bash
gitcrawl portable prune
gitcrawl portable prune --body-chars 256 # default
gitcrawl portable prune --body-chars 512 --no-vacuum
gitcrawl portable prune --json
```
`prune` converts the database into the portable v2 backup format and (by default) runs SQLite `VACUUM` to reclaim space. The result is a smaller database suitable for committing back to Git.
Portable v2 keeps the data agents most often need for offline GitHub reads:
- Repositories, issues, pull requests, labels, authors, and timestamps
- Compact issue/PR body excerpts plus original body lengths
- Compact comments, reviews, and review-comment excerpts plus original body lengths
- PR details, files, commits, status checks, and workflow runs
- Thread fingerprints used by duplicate and cluster-oriented workflows
It strips the data that is large, easy to regenerate, or mainly useful for exact API replay: raw GitHub JSON, generated documents and FTS indexes, embeddings and vectors, code snapshots and diff blobs, cluster run history, similarity edges, and blob storage. The database records this contract in `portable_metadata` with `schema=gitcrawl-portable-sync-v2`, `includes`, `excluded`, and `capabilities` keys.
| Flag | Default | Description |
| --- | --- | --- |
| `--body-chars <n>` | `256` | Maximum body characters to keep per thread/comment excerpt |
| `--no-vacuum` | _(off)_ | Skip the post-prune `VACUUM` |
| `--json` | _(off)_ | JSON output |
After pruning, commit and push the database file from the portable checkout the way you would for any Git repository.
## A typical publishing flow
```bash
# In the portable store checkout, refresh upstream data into the local runtime mirror.
gitcrawl refresh owner/repo
# Prune for a small, shareable footprint.
gitcrawl portable prune --body-chars 256
# Commit and push using normal Git.
cd ~/.config/gitcrawl/portable
git add data/openclaw__openclaw.sync.db
git commit -m "data: refresh openclaw/gitcrawl"
git push
```
Other agents and machines pull the new commit on their next read-only command.
## Cached search against a portable store
`gitcrawl search` (and the gh-shim's search) work against portable-store data with one wrinkle: when the portable store has been pruned, generated document indexes may not be present. Search falls back to compact thread title/body data automatically — you keep useful results without the publisher needing to ship the full document indexes.
The v2 backup also keeps comments and PR-detail tables, so common shim reads such as `gh issue view --json comments`, `gh pr view --json files,commits,statusCheckRollup`, `gh pr checks`, and `gh run list` can be answered from the shared checkout when those details were synced before publishing.
## Caveats
- The portable store carries the SQLite database. It does not carry the runtime fallthrough cache.
- Vectors regenerated on each consumer's machine after `embed` are not shared; portable pruning removes vector tables from the published database.
- Portable stores are read-mostly. Multiple writers pushing concurrently will race the way any Git workflow does — gate writes through a single publisher or a CI workflow.
## See also
- [Sync](/sync/) — what gets written into the database that ends up in the portable store
- [gh shim](/gh-shim/) — agents reading a shared portable store benefit doubly from the shim's local-first answers

134
docs/quickstart.md Normal file
View File

@ -0,0 +1,134 @@
---
title: Quickstart
nav_order: 3
permalink: /quickstart/
---
# Quickstart
{: .no_toc }
Five minutes from clean machine to a populated cluster view.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## 1. Install and initialize
```bash
# Build (or download a release archive — see Installation).
git clone https://github.com/openclaw/gitcrawl.git
cd gitcrawl
mkdir -p "$HOME/bin"
go build -o "$HOME/bin/gitcrawl" ./cmd/gitcrawl
# Create config + database under ~/.config/gitcrawl.
gitcrawl init
```
Defaults written:
- `~/.config/gitcrawl/config.toml`
- `~/.config/gitcrawl/gitcrawl.db`
- `~/.config/gitcrawl/cache/`
- `~/.config/gitcrawl/vectors/`
- `~/.config/gitcrawl/logs/`
## 2. Set credentials
```bash
export GITHUB_TOKEN="<github-token>" # required for sync
export OPENAI_API_KEY="<openai-api-key>" # required for embeddings
```
Either set them in your shell profile or store them in `~/.config/gitcrawl/config.toml`:
```toml
[env]
GITHUB_TOKEN = "<github-token>"
OPENAI_API_KEY = "<openai-api-key>"
```
`gitcrawl doctor` confirms the credentials are visible and reports their source.
## 3. Sync a repository
```bash
gitcrawl sync openclaw/gitcrawl
```
By default this fetches **open** issues and pull requests, plus a sweep of recently closed rows so the local store does not rot. Add `--include-comments` for review threads, `--include-pr-details` (or `--with pr-details`) for PR files, commits, checks, and workflow runs.
Need exact rows? Use `--numbers`:
```bash
gitcrawl sync openclaw/gitcrawl --numbers 123,456 --include-comments
```
## 4. Embed and cluster
The `refresh` command runs sync → embed → cluster end to end:
```bash
gitcrawl refresh openclaw/gitcrawl
```
You can run the stages individually if you want finer control — see [Refresh and embed](/refresh-and-embed/) and [Clustering](/clustering/).
## 5. Browse clusters
Open the TUI:
```bash
gitcrawl tui openclaw/gitcrawl
# or just `gitcrawl tui` and the most recently synced repo is inferred
```
- `↑`/`↓` navigate clusters, `Enter` opens member detail
- `a` opens the action menu, `#` jumps to a number, `n` loads neighbors, `p` switches repo
- Right-click and mouse wheel work in most terminals
For a non-interactive view:
```bash
gitcrawl clusters openclaw/gitcrawl --sort size --min-size 5
gitcrawl cluster-detail openclaw/gitcrawl --id 12
gitcrawl neighbors openclaw/gitcrawl --number 123 --limit 10
```
## 6. Search the local cache
```bash
gitcrawl search openclaw/gitcrawl --query "download stalls" --mode hybrid
```
The same command also accepts the `gh search` shape, which makes it a drop-in for scripts that already speak `gh`:
```bash
gitcrawl search issues "manifest cache" \
-R openclaw/gitcrawl \
--state open \
--json number,title,state,url,updatedAt,labels \
--limit 30
```
Add `--sync-if-stale 5m` to refresh the local mirror first when it is older than the duration you tolerate.
## 7. Wire up the `gh` shim (optional)
```bash
mkdir -p "$HOME/bin"
ln -sf "$(command -v gitcrawl)" "$HOME/bin/gitcrawl-gh"
gitcrawl-gh search issues "download stalls" -R openclaw/gitcrawl --json number,title,url
gitcrawl-gh pr view 123 -R openclaw/gitcrawl --json number,title,state,url
gitcrawl-gh xcache stats
```
Most read-only `gh` calls answer locally, mutating commands pass straight through to the real `gh`. See [gh shim](/gh-shim/) for the full surface.
## Where to next
- [Concepts](/concepts/) — what threads, durable clusters, and embeddings actually mean
- [Sync](/sync/) — every flag for hydrating the local store
- [Clustering](/clustering/) — tuning the cluster graph for a specific repo
- [Automation](/automation/) — JSON contracts for agents and scripts

176
docs/reference.md Normal file
View File

@ -0,0 +1,176 @@
---
title: Reference
nav_order: 16
permalink: /reference/
---
# Reference
{: .no_toc }
Lookup tables for paths, environment variables, and defaults.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## Paths
| Path | Purpose |
| --- | --- |
| `~/.config/gitcrawl/config.toml` | Configuration file |
| `~/.config/gitcrawl/gitcrawl.db` | SQLite database |
| `~/.config/gitcrawl/cache/` | Caches (PR detail, gh-shim fallthrough) |
| `~/.config/gitcrawl/cache/gh-shim/` | gh-shim fallthrough cache |
| `~/.config/gitcrawl/vectors/` | Vector store backing embeddings |
| `~/.config/gitcrawl/logs/` | Operational logs |
| `~/.config/gitcrawl/portable/` | Portable-store checkout (when configured) |
Override the config root with `--config <path>` or `GITCRAWL_CONFIG`.
## Environment variables
### Core
| Variable | Default | Used by | Purpose |
| --- | --- | --- | --- |
| `GITCRAWL_CONFIG` | `~/.config/gitcrawl/config.toml` | All commands | Override config path |
| `GITCRAWL_DB_PATH` | `~/.config/gitcrawl/gitcrawl.db` | All commands | Override database path |
| `GITHUB_TOKEN` | _(none)_ | `sync`, `gh` shim | GitHub API token |
| `OPENAI_API_KEY` | _(none)_ | `embed`, `refresh` | OpenAI API key |
### Models
| Variable | Default | Purpose |
| --- | --- | --- |
| `GITCRAWL_SUMMARY_MODEL` | `gpt-5.4` | Summary model (reserved for future commands) |
| `GITCRAWL_EMBED_MODEL` | `text-embedding-3-small` | OpenAI embedding model |
| `GITCRAWL_OPENAI_RETRY_DISABLED` | _(off)_ | Set `1` to disable OpenAI retry/backoff |
| `GITCRAWL_OPENAI_BASE_URL` / `OPENAI_BASE_URL` | OpenAI default | Custom OpenAI endpoint |
### GitHub overrides
| Variable | Default | Purpose |
| --- | --- | --- |
| `GITCRAWL_GITHUB_BASE_URL` / `GITHUB_BASE_URL` | GitHub default | Custom GitHub API endpoint |
| `GH_HOST` | _(none)_ | Included in gh-shim cache key |
| `GH_REPO` | _(none)_ | Default `-R` value; included in gh-shim cache key |
### gh shim
| Variable | Default | Purpose |
| --- | --- | --- |
| `GITCRAWL_GH_PATH` | _(probed)_ | Path to the real `gh` binary |
| `GITCRAWL_GH_AUTO_HYDRATE` | _(on)_ | Set `0` to disable PR auto-hydration on cache miss |
| `GITCRAWL_GH_CACHE_TTL` | `30s` for most commands | Override fallthrough cache TTL (e.g., `5m`, `1h`) |
| `GITCRAWL_GH_CACHE_ERRORS` | _(on)_ | Set `0` to avoid caching non-zero read-only fallthroughs |
## Configuration defaults
| Field | Default |
| --- | --- |
| `summary_model` | `gpt-5.4` |
| `embed_model` | `text-embedding-3-small` |
| `embed_dimensions` | `1024` |
| `embedding_basis` | `title_original` |
| `batch_size` (embeddings) | `64` |
| `concurrency` (embeddings) | `2` |
| `tui_default_sort` | `size` |
## Clustering defaults
| Parameter | Default | Source |
| --- | --- | --- |
| `--threshold` | `0.80` | `cluster`, `refresh` |
| `--cross-kind-threshold` | `0.93` | `cluster`, `refresh` |
| `--min-size` | `1` | `cluster`, `refresh` |
| `--max-cluster-size` | `40` | `cluster`, `refresh` |
| `--k` (nearest-neighbor fanout) | `16` | `cluster`, `refresh` |
| Weak-edge title overlap floor | `0.18` | internal |
| High-confidence edge score | `0.90` | internal |
| Deterministic reference edge score | `0.94` | internal |
| Body-only reference prefix length | `240` chars | internal |
## TUI defaults
| Parameter | Default |
| --- | --- |
| `--min-size` | `5` |
| `--sort` | `size` |
| Working set limit | `500` rows |
| Refresh interval | `15s` |
## gh shim cache TTLs
| Cache class | TTL |
| --- | --- |
| Most read-only fallthroughs | `5m`-`10m` |
| `gh run list` / run status | `30s` |
| `gh run view --log` / `--log-failed` | `12h` |
| `gh run view --job` | `1m` |
| `gh search ...` | `15m` |
| `gh release ...` | `1h` |
| `gh api` Actions run status | `30s` |
| `gh api` Actions job lists | `1m` active, `12h` completed |
| `gh api` workflow reads | `15m` |
| `gh api` Actions run/job logs | `12h` |
| `gh api` Pages metadata | `15m`-`30m` |
| `gh api` tagged/SHA contents | `7d` |
| `gh pr diff` without stable head SHA | `5m` |
| `gh pr diff` with stable head SHA | `7d` |
| Override | `GITCRAWL_GH_CACHE_TTL` |
| Stale-while-revalidate grace | command-aware; override with `GITCRAWL_GH_STALE_GRACE` |
| Cache read failures | on by default; error TTL is capped (`2m` for rate-limit errors); disable with `GITCRAWL_GH_CACHE_ERRORS=0` |
## gh shim cache key composition
A SHA-256 hash of:
- Version tag (`v2`)
- Resolved gitcrawl config path
- Current working directory
- `GH_HOST` env var
- `GH_REPO` env var
- For `gh pr diff`: `pr-diff:owner/repo:number:head-sha` (when head SHA is known)
- Full command argument vector (null-separated)
This isolates sibling checkouts and portable stores while coalescing repeated calls from the same workspace.
## Output formats
| Format | Where to use |
| --- | --- |
| `text` | Human terminal use (default) |
| `json` | Pipelines, scripts, agents (also via `--json`) |
| `log` | Internal structured logging output |
## Exit codes
- `0` — success
- non-zero — usage error, "not implemented" command, or runtime failure
stderr always carries error messages. stdout is reserved for command output.
## File-system layout (worked example)
```
~/.config/gitcrawl/
├── config.toml
├── gitcrawl.db # SQLite mirror
├── gitcrawl.db-shm # SQLite shared-memory file
├── gitcrawl.db-wal # SQLite write-ahead log
├── cache/
│ ├── gh-shim/ # gh fallthrough cache; inspect with xcache
│ └── pr/ # hydrated PR detail blobs
├── vectors/ # vector store backing embeddings
├── logs/
└── portable/ # portable-store checkout (optional)
└── data/
└── owner__repo.sync.db
```
## See also
- [Configuration](/configuration/) — narrative version of this reference
- [Commands](/commands/) — every command and flag, in one table
- [SPEC.md](https://github.com/openclaw/gitcrawl/blob/main/SPEC.md) — product contract
- [CHANGELOG.md](https://github.com/openclaw/gitcrawl/blob/main/CHANGELOG.md) — what shipped recently

133
docs/refresh-and-embed.md Normal file
View File

@ -0,0 +1,133 @@
---
title: Refresh and embed
nav_order: 7
permalink: /refresh-and-embed/
---
# Refresh and embed
{: .no_toc }
`gitcrawl refresh` is the one command most users want. It runs sync → embed → cluster in order, with the same flags you would use individually.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## refresh
```bash
gitcrawl refresh owner/repo
```
By default this performs:
1. **Sync** — open + recently closed issues and PRs (see [Sync](/sync/))
2. **Embed** — fill `thread_vectors` for any thread whose document changed
3. **Cluster** — rebuild durable clusters with the standard thresholds
Disable any stage with `--no-sync`, `--no-embed`, `--no-cluster`. The remaining stages still run; failures are reported per stage in the JSON output.
### Stage-specific flags
`refresh` forwards flags through to the underlying stages:
| Forwarded to | Flag |
| --- | --- |
| sync | `--since`, `--state`, `--limit`, `--include-comments` |
| embed | `--limit` |
| cluster | `--threshold` (0.80), `--min-size` (1), `--max-cluster-size` (40), `--k` (16), `--cross-kind-threshold` (0.93) |
`--include-code` is accepted but currently a no-op.
### JSON output
```bash
gitcrawl refresh owner/repo --json
```
```json
{
"repository": "owner/repo",
"sync": { "selected": 124, "inserted": 12, "updated": 9, "run_id": 42 },
"embed": { "selected": 21, "embedded": 21, "skipped": 0, "failed": 0, "model": "text-embedding-3-small", "run_id": 43 },
"cluster": {
"threshold": 0.8, "cross_kind": 0.93, "min_size": 1, "max_size": 40, "k": 16,
"vector_count": 312, "edge_count": 1042, "cluster_count": 87, "member_count": 312, "run_id": 44
}
}
```
Each stage object mirrors the JSON shape of the standalone command. You can read the per-stage `run_id` later via `gitcrawl runs --kind sync|embedding|cluster`.
## embed
```bash
gitcrawl embed owner/repo
```
Generates OpenAI embeddings for any thread whose document hash has changed since its last embedding. Works through the database in batches (default size 64) with bounded concurrency (default 2).
### Flags
| Flag | Default | Description |
| --- | --- | --- |
| `--number <ref>` | _(any)_ | Embed a single issue/PR by number or copied GitHub URL |
| `--limit <n>` | _(no limit)_ | Maximum rows to embed in this run |
| `--force` | _(off)_ | Re-embed every selected row, ignoring content hash |
| `--include-closed` | _(off)_ | Include closed threads |
`--number` accepts bare numbers, `#123`, `issues/123`, `pull/123`,
`owner/repo#123`, and full GitHub issue or pull request URLs.
### When to `--force`
You should rarely need it. The pipeline auto-forces a rebuild when:
- The configured embedding model changes (`GITCRAWL_EMBED_MODEL` or `embed_model` in config)
- The embedding input rune cap changes (so older, larger-cap vectors are not silently mixed in)
Use `--force` manually if you have manually edited vectors, or want to confirm an output is reproducible from scratch.
### Failure handling
OpenAI errors are retried with backoff unless `GITCRAWL_OPENAI_RETRY_DISABLED=1`. The JSON output includes a `failures` array with batch-level diagnostics (`batch_start`, `batch_end`, `attempts`, `status`, `code`, `message`) so partial failures do not silently drop rows.
Oversized inputs are capped before being sent upstream so a single huge body cannot exceed the model's input limit.
### JSON output
```json
{
"repository": "owner/repo",
"model": "text-embedding-3-small",
"basis": "title_original",
"selected": 21,
"embedded": 20,
"skipped": 0,
"failed": 1,
"retries": 3,
"status": "ok",
"failures": [
{ "batch_start": 16, "batch_end": 17, "attempts": 3, "status": 429, "type": "rate_limit", "code": "rate_limit_exceeded", "message": "..." }
],
"run_id": 43
}
```
## runs
Inspect what `refresh`, `sync`, `embed`, or `cluster` actually did:
```bash
gitcrawl runs owner/repo --kind sync # default kind
gitcrawl runs owner/repo --kind embedding
gitcrawl runs owner/repo --kind cluster
```
Each row carries `started_at`, `finished_at`, `status`, and `stats_json` — useful when an agent needs to know whether a sync is fresh enough or whether the last cluster pass converged.
## Cost notes
- **GitHub.** Sync uses standard REST endpoints; the API quota is the dominant cost on busy repos. Use `--include-comments` and `--with pr-details` selectively.
- **OpenAI.** `text-embedding-3-small` is inexpensive but not free. `embed` is bounded by `--limit` if you want to stay under a budget on initial backfills.
- **Disk.** Vectors, generated documents, and raw API payloads grow with the repo. The portable-store flow includes `gitcrawl portable prune` to keep published payloads small while retaining compact comments and PR details — see [Portable stores](/portable-stores/).

127
docs/search.md Normal file
View File

@ -0,0 +1,127 @@
---
title: Search
nav_order: 8
permalink: /search/
---
# Search
{: .no_toc }
Local full-text and semantic search over the SQLite mirror, plus a `gh search`-compatible surface for scripts.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## Why local search
`gitcrawl search` runs against the local SQLite cache and the local vector store. It does not consume GitHub REST search quota and it returns deterministically ordered hits with full thread metadata. It is intended for **discovery**, not for write actions — use `gh` for the final live verification before commenting, closing, labeling, or merging.
## Direct mode
```bash
gitcrawl search owner/repo --query "download stalls"
gitcrawl search owner/repo --query "manifest cache" --mode hybrid --limit 30 --json
```
| Flag | Default | Description |
| --- | --- | --- |
| `--query <text>` | _(required)_ | Search text |
| `--mode keyword\|semantic\|hybrid` | `keyword` | `keyword` uses SQLite FTS, `semantic` uses vector cosine, `hybrid` blends them |
| `--limit <n>` | _(implementation default)_ | Maximum hits |
**Hybrid mode** is the most robust default — it blends full-text recall with semantic neighbors so typos, synonyms, and stack-trace fragments still surface relevant rows.
JSON output:
```json
{
"repository": "owner/repo",
"query": "download stalls",
"mode": "hybrid",
"hits": [
{ "number": 123, "kind": "issue", "title": "...", "score": 0.81, "url": "...", "updated_at": "..." }
]
}
```
## `gh search` compatibility mode
The same command also accepts the `gh search` shape so scripts that already speak `gh` work without rewriting:
```bash
gitcrawl search issues "download stalls" \
-R owner/repo \
--state open \
--json number,title,state,url,updatedAt,labels \
--limit 30
gitcrawl search prs "manifest cache" \
-R owner/repo \
--state open \
--json number,title,state,url,updatedAt,isDraft,author \
--limit 20
```
Recognized flags in this mode:
| Flag | Description |
| --- | --- |
| `-R` / `--repo` | Target repository (also reads `GH_REPO`) |
| `--state open\|closed\|all` | Issue state filter |
| `--json` | Comma-separated field list (gh-compatible) |
| `--limit` / `-L` | Maximum rows |
| `--match` | Accepted for parity; the local FTS index already covers documents |
| `--sort` / `--order` | Accepted for parity |
| `--sync-if-stale <duration>` | Run one metadata sync first if the local mirror is older than the duration |
The output shape matches `gh search issues|prs --json ...` exactly so you can pipe into the same `jq` filters you already have.
## `--sync-if-stale`
```bash
gitcrawl search issues "hot loop" \
-R owner/repo \
--state open \
--sync-if-stale 5m \
--json number,title,url
```
If the most recent successful sync for this repo is older than `5m`, gitcrawl runs one metadata sync first and then answers the search from the freshly populated cache. The search result still comes from SQLite — only the staleness check triggers GitHub.
This is the right pattern for agents: keep latency predictable on cache hits, and bound the staleness window for everything else.
## Search vs. the `gh` shim
There are two ways to run cached searches:
| Command | Best for |
| --- | --- |
| `gitcrawl search issues|prs ...` | Human use; mixes naturally with the rest of the gitcrawl CLI |
| `gitcrawl gh search issues|prs ...` | Agents and scripts that call `gh` directly — symlinked as `gh` or `gitcrawl-gh` it is invisible to callers |
Both paths share the same local cache and produce gh-shaped JSON. The shim adds the additional `gh issue/pr view`, `gh issue/pr list`, `gh pr checks`, `gh run`, and `xcache` surface — see [gh shim](/gh-shim/).
## Combining with sync
A common discovery pattern:
```bash
# 1. Find candidates locally.
NUMS=$(gitcrawl search issues "download stalls" -R owner/repo \
--json number --limit 20 \
| jq -r '[.[].number] | join(",")')
# 2. Hydrate them with comments + PR detail in one round-trip.
gitcrawl sync owner/repo --numbers "$NUMS" --include-comments --with pr-details
# 3. Re-query with full conversational context (or open in TUI).
gitcrawl tui owner/repo
```
## Limits
- The keyword index covers titles, bodies, and (when synced) comments and review comments.
- Semantic search relies on the local vector store. Run `gitcrawl embed` first.
- Hybrid mode degrades gracefully: with no vectors, it behaves like keyword.
- Closed threads are included by the FTS index when synced; locally closed threads are filtered out by the `--hide-closed` flag where applicable.

BIN
docs/social-card.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

75
docs/social-card.svg Normal file
View File

@ -0,0 +1,75 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630" viewBox="0 0 1200 630" role="img" aria-labelledby="title desc">
<title id="title">gitcrawl social card</title>
<desc id="desc">Local-first GitHub issue and pull request crawler for maintainer triage.</desc>
<defs>
<linearGradient id="bg" x1="0" y1="0" x2="1" y2="1">
<stop offset="0" stop-color="#f8fafc"/>
<stop offset="0.48" stop-color="#eef6ff"/>
<stop offset="1" stop-color="#f5f7fb"/>
</linearGradient>
<linearGradient id="panel" x1="0" y1="0" x2="1" y2="1">
<stop offset="0" stop-color="#111827"/>
<stop offset="1" stop-color="#0b1020"/>
</linearGradient>
<linearGradient id="accent" x1="0" y1="0" x2="1" y2="1">
<stop offset="0" stop-color="#2563eb"/>
<stop offset="1" stop-color="#14b8a6"/>
</linearGradient>
<filter id="shadow" x="-10%" y="-10%" width="120%" height="130%">
<feDropShadow dx="0" dy="22" stdDeviation="22" flood-color="#0f172a" flood-opacity="0.18"/>
</filter>
</defs>
<rect width="1200" height="630" fill="url(#bg)"/>
<path d="M76 512C201 462 311 473 421 545C531 617 681 600 804 529C943 449 1034 456 1128 507V630H76Z" fill="#e1eefb"/>
<path d="M910 64h190v10H910zM910 94h144v10H910zM910 124h166v10H910z" fill="#d6e2f0"/>
<g transform="translate(76 72)">
<rect x="0" y="0" width="118" height="118" rx="26" fill="#0f1115"/>
<circle cx="38" cy="38" r="9" fill="#60a5fa"/>
<circle cx="38" cy="80" r="9" fill="#2dd4bf"/>
<circle cx="80" cy="59" r="9" fill="#93c5fd"/>
<path d="M38 47v24M46 41c12 6 20 10 34 17M46 77c12-6 20-10 34-17" stroke="#c7f9ff" stroke-width="6" stroke-linecap="round" fill="none"/>
</g>
<g transform="translate(76 232)">
<text x="0" y="0" fill="#0f172a" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="88" font-weight="800" letter-spacing="0">gitcrawl</text>
<text x="3" y="68" fill="#334155" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="32" font-weight="600" letter-spacing="0">Local-first GitHub triage.</text>
<text x="3" y="121" fill="#64748b" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="25" font-weight="500" letter-spacing="0">Search locally. Cache gh reads.</text>
<text x="3" y="159" fill="#64748b" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="25" font-weight="500" letter-spacing="0">Save the rate limit.</text>
</g>
<g transform="translate(76 486)">
<rect x="0" y="0" width="196" height="44" rx="10" fill="#0f172a"/>
<text x="22" y="29" fill="#e0f2fe" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="19" font-weight="600" letter-spacing="0">gitcrawl.sh</text>
<rect x="216" y="0" width="210" height="44" rx="10" fill="#ffffff" stroke="#cbd5e1"/>
<text x="238" y="29" fill="#334155" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="18" font-weight="600" letter-spacing="0">gh xcache stats</text>
</g>
<g transform="translate(640 118)" filter="url(#shadow)">
<rect x="0" y="0" width="474" height="338" rx="22" fill="url(#panel)"/>
<rect x="0" y="0" width="474" height="46" rx="22" fill="#182033"/>
<path d="M0 28h474v18H0z" fill="#182033"/>
<circle cx="28" cy="23" r="6" fill="#f87171"/>
<circle cx="49" cy="23" r="6" fill="#facc15"/>
<circle cx="70" cy="23" r="6" fill="#34d399"/>
<text x="30" y="86" fill="#93c5fd" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="19" font-weight="600" letter-spacing="0">$ gitcrawl search prs cache</text>
<text x="30" y="126" fill="#e5e7eb" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="18" letter-spacing="0">#77622 hydrated PR details</text>
<text x="30" y="160" fill="#e5e7eb" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="18" letter-spacing="0">#77481 Actions log cache</text>
<text x="30" y="194" fill="#e5e7eb" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="18" letter-spacing="0">#77109 local issue thread</text>
<rect x="30" y="228" width="414" height="70" rx="14" fill="#0b1326" stroke="#334155"/>
<text x="54" y="260" fill="#a7f3d0" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="16" letter-spacing="0">local_hits 112</text>
<text x="54" y="284" fill="#bfdbfe" font-family="JetBrains Mono, SFMono-Regular, Menlo, monospace" font-size="16" letter-spacing="0">fallback_hits 70</text>
<path d="M272 264h112" stroke="url(#accent)" stroke-width="10" stroke-linecap="round"/>
</g>
<g transform="translate(858 450)">
<path d="M40 32C92 0 145 6 190 40M40 32c51 55 105 70 178 42M190 40c19 13 29 24 28 34" stroke="#2563eb" stroke-width="5" stroke-linecap="round" fill="none"/>
<circle cx="40" cy="32" r="15" fill="#2563eb"/>
<circle cx="190" cy="40" r="15" fill="#14b8a6"/>
<circle cx="218" cy="74" r="15" fill="#38bdf8"/>
<circle cx="112" cy="86" r="10" fill="#64748b"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 5.0 KiB

160
docs/sync.md Normal file
View File

@ -0,0 +1,160 @@
---
title: Sync
nav_order: 6
permalink: /sync/
---
# Sync
{: .no_toc }
Bring GitHub issues and pull requests into local SQLite. Idempotent, incremental, and tunable per workflow.
{: .fs-6 .fw-300 }
1. TOC
{:toc}
## The default
```bash
gitcrawl sync owner/repo
```
This fetches **open** issues and pull requests for the repository. To keep local state from rotting, an incremental sync also sweeps recently closed items so that issues and PRs closed between runs are reflected locally.
A sync writes:
- `repositories` — repo metadata
- `threads` — issues and PRs (titles, bodies, authors, labels, state, timestamps)
- `documents` — canonical thread documents (when bodies change)
- `run_records` — sync run statistics
## State filters
```bash
gitcrawl sync owner/repo --state open # default
gitcrawl sync owner/repo --state closed # only closed
gitcrawl sync owner/repo --state all # full backfill
```
`--state all` is the right choice for a one-shot historical backfill on a new repository. After that, the default `--state open` (with its closed sweep) is enough for ongoing freshness.
## Time-windowed sync
```bash
gitcrawl sync owner/repo --since 2026-04-01T00:00:00Z
```
`--since` accepts an RFC 3339 timestamp and limits the GitHub query to threads updated after that point. Combine with `--state` to scope tightly:
```bash
gitcrawl sync owner/repo --state all --since 2026-04-01T00:00:00Z
```
## Exact rows
```bash
gitcrawl sync owner/repo --numbers 123,456 --include-comments
gitcrawl sync owner/repo --numbers https://github.com/owner/repo/issues/123 --with pr-details
```
`--numbers` is the safest way to refresh specific issues or PRs — it bypasses list ordering and the updated-time window, fetching exactly the rows you ask for. Pair it with `--include-comments` and/or `--include-pr-details` to hydrate the conversation and PR-only data at the same time.
`--numbers` accepts comma-separated thread references, not just integers:
`123`, `#123`, `issues/123`, `pull/123`, `owner/repo#123`, and full GitHub
issue or pull request URLs.
This is also what the `gh` shim uses internally for [auto-hydration](/gh-shim/#auto-hydration).
## Hydration depth
| Flag | What it adds |
| --- | --- |
| `--include-comments` | Issue comments, PR review comments, reviews |
| `--include-pr-details` | PR files, commits, status checks, workflow runs |
| `--with pr-details` | Same as `--include-pr-details` (gh-style flag) |
PR details land in `pr_files`, `pr_commits`, `pr_checks`, and `pr_runs` tables and back the `gh pr view`, `gh pr checks`, and `gh run list/view` shim paths. See [gh shim](/gh-shim/).
`--include-code` is accepted for compatibility but is currently a no-op.
## Limit and pagination
```bash
gitcrawl sync owner/repo --limit 200
```
`--limit` caps the number of rows fetched in this invocation. The underlying GitHub paginator surfaces total page counts in run records and honors GitHub's `Retry-After` and rate-limit response headers, so partial syncs interrupted by rate limiting resume cleanly.
## JSON output
```bash
gitcrawl sync owner/repo --json
```
```json
{
"repository": "owner/repo",
"state": "open",
"since": "",
"selected": 124,
"inserted": 12,
"updated": 9,
"deleted": 0,
"comments_inserted": 0,
"comments_updated": 0,
"reviews_inserted": 0,
"pr_files_inserted": 0,
"pr_commits_inserted": 0,
"run_id": 42,
"started_at": "2026-05-05T07:30:11Z",
"finished_at": "2026-05-05T07:30:43Z"
}
```
## Common workflows
### First-time setup for a repo
```bash
gitcrawl sync owner/repo --state all --include-comments
gitcrawl embed owner/repo
gitcrawl cluster owner/repo
```
Or in one step:
```bash
gitcrawl refresh owner/repo --include-comments
```
### Periodic incremental refresh
```bash
gitcrawl sync owner/repo
```
The closed sweep keeps the open list honest without paying for a full backfill.
### Pull a specific issue + comments + PR detail
```bash
gitcrawl sync owner/repo --numbers 123 --include-comments --with pr-details
```
### Refresh a batch you got from search
```bash
NUMS=$(gitcrawl gh search issues "manifest cache" -R owner/repo --json number --limit 20 \
| jq -r '[.[].number] | join(",")')
gitcrawl sync owner/repo --numbers "$NUMS" --with pr-details
```
## Required credentials
`sync` requires a GitHub token. gitcrawl resolves it from `GITHUB_TOKEN`, the `[env]` table in `config.toml`, or from `gh auth token` if the real `gh` CLI is installed and authenticated. `gitcrawl doctor` reports the source.
## See also
- [Refresh and embed](/refresh-and-embed/) — the wrapper that runs sync, embed, and cluster end to end
- [gh shim](/gh-shim/) — how synced PR details power `gh pr view` / `gh pr checks` / `gh run` from local cache
- [Portable stores](/portable-stores/) — sharing the synced cache across machines

122
docs/tui.md Normal file
View File

@ -0,0 +1,122 @@
---
title: TUI
nav_order: 11
permalink: /tui/
---
# TUI
{: .no_toc }
`gitcrawl tui` is the interactive cluster browser. Keyboard-first, mouse-friendly, refreshes from local SQLite every 15 seconds.
{: .fs-6 .fw-300 }
It is also the reference terminal interaction model for the crawl app family.
The shared `crawlkit/tui` browser used by Slack, Discord, and Notion archives
is expected to match its pane focus, sortable headers, mouse/right-click
actions, detail rendering, and status chrome wherever the data model allows it.
1. TOC
{:toc}
## Launching
```bash
gitcrawl tui owner/repo
gitcrawl tui # infers the most recently updated local repo
gitcrawl tui --min-size 5 # default; show clusters with ≥5 active members
gitcrawl tui --sort recent # alternate sort
gitcrawl tui --hide-closed # focus only on currently open clusters
```
| Flag | Default | Description |
| --- | --- | --- |
| `--min-size <n>` | `5` | Minimum active member count |
| `--sort recent\|oldest\|size` | `size` | Cluster ordering |
| `--limit <n>` | `500` | Working-set cap (rows fetched into the TUI) |
| `--hide-closed` | _(off)_ | Hide locally closed clusters |
| `--include-closed` | _(deprecated)_ | Closed clusters are included by default |
| `--json` | _(off)_ | Emit a non-interactive JSON snapshot instead of launching the UI |
When `--json` is passed, the TUI command produces the same cluster summary the interactive view would render — useful for CI checks or for an agent that wants the same view a human would see.
## Default behavior
The TUI starts at `--min-size 5` and `--sort size` so the first screen is the useful triage workload, not singleton noise. Pass `--min-size 1` when you intentionally want singletons (e.g., looking for orphans).
The view auto-refreshes from the local store every 15 seconds. There is no GitHub call from the TUI itself — to bring in fresh upstream data, run `gitcrawl sync` (or `refresh`) in another terminal and the TUI picks it up on the next tick.
## Keyboard
| Key | Action |
| --- | --- |
| `↑` / `↓` | Move within the active pane |
| `Tab` / `Shift+Tab` | Switch panes |
| `Enter` | Open detail for selected cluster or member; on a member, loads neighbors first |
| `a` | Open the action menu (cluster or member, depending on focus) |
| `#` | Jump to a specific issue or PR number or copied GitHub issue/PR URL |
| `n` | Load neighbors for the selected issue or PR |
| `p` | Switch between repositories already present in the local store |
| `s` | Cycle sort mode (`size` ↔ `recent``oldest`, both directions) |
| `/` | Filter rows by substring |
| `q` | Quit |
The action menu opened with `a` mirrors the right-click menu, so every mouse action has a keyboard equivalent.
Jump input accepts the same thread references as the CLI: bare numbers, `#123`,
`issues/123`, `pull/123`, `owner/repo#123`, and full GitHub issue or pull
request URLs.
## Mouse
Mouse support is built in and works in most modern terminals (iTerm2, Kitty, Alacritty, WezTerm, recent macOS Terminal):
- **Click** a row to select it
- **Double-click** to open detail
- **Wheel** scrolls the focused pane
- **Right-click** opens the cluster or member action menu
- **Trackpad scroll** is buffered to avoid jumpy redraws
If your terminal does not pass through mouse events, all actions remain available via keyboard.
## Action menu
Cluster actions:
- Copy issue/PR URL or number
- Sort cluster members
- Filter to a member subset
- Jump to a referenced issue or PR
- Open canonical thread on GitHub
- Load neighbors for the canonical
- Local close / reopen
- Set canonical member
- Exclude / include member
Member actions:
- Copy URL / number
- Load neighbors
- Open on GitHub
- Local close / reopen this thread
- Exclude from cluster
These map directly onto the [governance](/governance/) commands. Anything you can do interactively, you can also script.
## Display rules
`gitcrawl clusters` and the TUI use the same display rules:
- Latest raw run clusters first
- Closed durable rows merged in as historical context
- Default sort is `size` (largest active membership first)
- GitHub-closed members are hidden from the latest-run view; pass `--include-closed` to see the full historical cluster
For an audit-style view that does not merge with the latest run, use `gitcrawl durable-clusters --include-closed`.
## Tips
- Resize your terminal — the panes reflow.
- A single repo with thousands of threads is fine; the working set is capped at 500 rows so the UI stays snappy.
- Run `gitcrawl refresh owner/repo` periodically in a sibling terminal; the TUI reflects new data on the next 15s tick.
- If the cluster you are looking for is missing, check `--min-size` and `--hide-closed`.
- The status bar at the bottom shows the active sort, filter, repo, and any warnings (e.g., "vector model mismatch — re-run embed").

24
go.mod
View File

@ -6,37 +6,37 @@ require (
github.com/charmbracelet/bubbles v1.0.0
github.com/charmbracelet/bubbletea v1.3.10
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
github.com/charmbracelet/x/ansi v0.11.6
github.com/mattn/go-isatty v0.0.20
github.com/pelletier/go-toml/v2 v2.3.0
modernc.org/sqlite v1.50.0
github.com/charmbracelet/x/ansi v0.11.7
github.com/mattn/go-isatty v0.0.22
github.com/vincentkoc/crawlkit v0.4.1
)
require (
github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/colorprofile v0.4.1 // indirect
github.com/charmbracelet/colorprofile v0.4.3 // indirect
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
github.com/clipperhouse/displaywidth v0.9.0 // indirect
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
github.com/clipperhouse/displaywidth v0.11.0 // indirect
github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.23 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/text v0.30.0 // indirect
modernc.org/libc v1.72.0 // indirect
golang.org/x/text v0.36.0 // indirect
modernc.org/libc v1.72.1 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.11.0 // indirect
modernc.org/sqlite v1.50.0 // indirect
)

65
go.sum
View File

@ -8,24 +8,22 @@ github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5f
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q=
github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q=
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 h1:ZR7e0ro+SZZiIZD7msJyA+NjkCNNavuiPBLgerbOziE=
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834/go.mod h1:aKC/t2arECF6rNOnaKaVU6y4t4ZeHQzqfxedE/VkVhA=
github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI=
github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ=
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
@ -36,14 +34,14 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4=
github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw=
github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
@ -52,32 +50,33 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/vincentkoc/crawlkit v0.4.1 h1:qDUF+Kk7nqADmpGMcnWTHEQMiX3bSD2DdFywKyT3kWs=
github.com/vincentkoc/crawlkit v0.4.1/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI=
golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s=
golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0=
modernc.org/cc/v4 v4.28.1 h1:XpLbkYVQ24E8tX5u8+yWGvaxerxkR/S4zqxI8ZoSBuc=
modernc.org/cc/v4 v4.28.1/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
modernc.org/ccgo/v4 v4.33.0 h1:dspBCm75jsj8Y/ufwAMVfe375L2iYdMyQ2QG/v3hL54=
modernc.org/ccgo/v4 v4.33.0/go.mod h1:+RhXBoRYzRwaH21mV/aj6XvQRDtfjcZfAlPMsQo8CR0=
modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
@ -86,14 +85,14 @@ modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
modernc.org/libc v1.72.1 h1:db1xwJ6u1kE3KHTFTTbe2GCrczHPKzlURP0aDC4NGD0=
modernc.org/libc v1.72.1/go.mod h1:HRMiC/PhPGLIPM7GzAFCbI+oSgE3dhZ8FWftmRrHVlY=
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
modernc.org/sqlite v1.50.0 h1:eMowQSWLK0MeiQTdmz3lqoF5dqclujdlIKeJA11+7oM=

View File

@ -7,6 +7,7 @@ import (
"flag"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
@ -23,6 +24,7 @@ import (
"github.com/openclaw/gitcrawl/internal/store"
"github.com/openclaw/gitcrawl/internal/syncer"
"github.com/openclaw/gitcrawl/internal/vector"
"github.com/vincentkoc/crawlkit/control"
)
const (
@ -39,6 +41,9 @@ const (
)
var threadReferencePattern = regexp.MustCompile(`(?i)(?:\b[\w.-]+/[\w.-]+#(\d+)|(?:issues|pull)/(\d+)|#(\d{2,}))`)
var githubThreadURLPattern = regexp.MustCompile(`(?i)^https?://github\.com/([\w.-]+)/([\w.-]+)/(?:issues|pull)/(\d+)(?:[/?#].*)?$`)
var ownerRepoThreadPattern = regexp.MustCompile(`(?i)^([\w.-]+)/([\w.-]+)#(\d+)$`)
var pathThreadPattern = regexp.MustCompile(`(?i)(?:^|/)(?:issues|pull)/(\d+)(?:[/?#].*)?$`)
var titleTokenPattern = regexp.MustCompile(`[A-Za-z0-9]{4,}`)
type referenceEvidence struct {
@ -124,12 +129,16 @@ func (a *App) Run(ctx context.Context, args []string) error {
switch rest[0] {
case "version":
return a.writeOutput("version", map[string]string{"version": version}, false)
case "metadata":
return a.runMetadata(rest[1:])
case "serve":
return usageErr(fmt.Errorf("serve is not supported in gitcrawl"))
case "init":
return a.runInit(ctx, rest[1:])
case "doctor":
return a.runDoctor(ctx, rest[1:])
case "status":
return a.runStatus(ctx, rest[1:])
case "sync":
return a.runSync(ctx, rest[1:])
case "threads":
@ -152,6 +161,8 @@ func (a *App) Run(ctx context.Context, args []string) error {
return a.runRuns(ctx, rest[1:])
case "search":
return a.runSearch(ctx, rest[1:])
case "gh":
return a.runGHShim(ctx, rest[1:])
case "configure":
return a.runConfigure(rest[1:])
case "refresh":
@ -453,7 +464,7 @@ func (a *App) runNeighbors(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
number, err := parseRequiredPositiveInt("number", *numberRaw)
number, err := parseRequiredThreadNumber("number", *numberRaw)
if err != nil {
return usageErr(err)
}
@ -688,7 +699,7 @@ func (a *App) runEmbed(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
number, err := parseOptionalPositiveInt(*numberRaw)
number, err := parseOptionalThreadNumber(*numberRaw)
if err != nil {
return usageErr(err)
}
@ -783,7 +794,7 @@ func (a *App) embedRepository(ctx context.Context, owner, repoName string, optio
fmt.Fprintf(a.Stderr, "[embed] embedding %d-%d of %d (attempt %d)\n", batch.start+1, batch.end, len(tasks), batch.attempts)
if batch.attempts == 1 {
if truncated := truncatedEmbeddingTaskCount(slice); truncated > 0 {
fmt.Fprintf(a.Stderr, "[embed] truncated %d input(s) to %d runes\n", truncated, store.MaxEmbeddingTextRunes)
fmt.Fprintf(a.Stderr, "[embed] truncated %d input(s) to embedding input budget (%d runes/%d bytes)\n", truncated, store.MaxEmbeddingTextRunes, store.MaxEmbeddingTextBytes)
}
}
vectors, err := client.Embed(ctx, rt.Config.OpenAI.EmbedModel, texts)
@ -1075,23 +1086,35 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
rt, err = a.openLocalRuntimeReadOnly(ctx)
}
if err != nil {
if !interactive && errors.Is(err, os.ErrNotExist) {
cfg := config.Default()
if cfgErr := cfg.Normalize(); cfgErr != nil {
return cfgErr
}
sort, sortErr := resolveTUISort(*sortMode, cfg)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, cfg, cfg.DBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
defer rt.Store.Close()
repo, inferred, err := a.resolveOptionalRepository(ctx, rt, fs.Args())
if err != nil {
if !interactive && len(fs.Args()) == 0 && strings.Contains(err.Error(), "no local repositories found") {
sort, sortErr := resolveTUISort(*sortMode, rt.Config)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, rt.Config, rt.SourceDBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
sort := strings.TrimSpace(*sortMode)
if sort == "" {
sort = strings.TrimSpace(rt.Config.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return usageErr(fmt.Errorf("unsupported sort %q", sort))
sort, err := resolveTUISort(*sortMode, rt.Config)
if err != nil {
return err
}
showClosed := !*hideClosed || *includeClosed
@ -1146,6 +1169,38 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
return a.runInteractiveTUI(ctx, rt.Store, repo.ID, payload)
}
func resolveTUISort(raw string, cfg config.Config) (string, error) {
sort := strings.TrimSpace(raw)
if sort == "" {
sort = strings.TrimSpace(cfg.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return "", usageErr(fmt.Errorf("unsupported sort %q", sort))
}
return sort, nil
}
func emptyClusterBrowserPayload(ctx context.Context, cfg config.Config, sourceDBPath, sort string, minSize, limit int, hideClosed bool) clusterBrowserPayload {
if strings.TrimSpace(sourceDBPath) == "" {
sourceDBPath = cfg.DBPath
}
return clusterBrowserPayload{
Mode: "cluster-browser",
DBSource: databaseSourceKind(sourceDBPath),
DBLocation: databaseSourceLocation(ctx, sourceDBPath),
Sort: sort,
MinSize: minSize,
Limit: limit,
HideClosed: hideClosed,
EmbedModel: cfg.OpenAI.EmbedModel,
EmbeddingBasis: cfg.EmbeddingBasis,
Clusters: []store.ClusterSummary{},
}
}
func databaseSourceKind(dbPath string) string {
if _, ok := portableStoreRoot(dbPath); ok {
return "remote"
@ -1250,7 +1305,8 @@ func (a *App) runClusterDetail(ctx context.Context, args []string) error {
clusterIDRaw := fs.String("id", "", "cluster id")
memberLimitRaw := fs.String("member-limit", "", "maximum member rows")
bodyCharsRaw := fs.String("body-chars", "", "maximum body snippet characters")
includeClosed := fs.Bool("include-closed", false, "include closed clusters and members")
includeClosed := fs.Bool("include-closed", false, "deprecated; closed cluster members are shown by default")
hideClosed := fs.Bool("hide-closed", false, "hide locally closed members")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "member-limit": true, "body-chars": true})); err != nil {
return usageErr(err)
@ -1291,7 +1347,7 @@ func (a *App) runClusterDetail(ctx context.Context, args []string) error {
detail, err := rt.Store.ClusterDetail(ctx, store.ClusterDetailOptions{
RepoID: repo.ID,
ClusterID: int64(clusterID),
IncludeClosed: *includeClosed,
IncludeClosed: *includeClosed || !*hideClosed,
MemberLimit: memberLimit,
BodyChars: bodyChars,
})
@ -1366,7 +1422,7 @@ func (a *App) runThreads(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
numbers, err := parseOptionalPositiveIntList(*numbersRaw)
numbers, err := parseOptionalThreadNumberList(*numbersRaw)
if err != nil {
return usageErr(err)
}
@ -1417,7 +1473,7 @@ func (a *App) runCloseThread(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
number, err := parseOptionalPositiveInt(*numberRaw)
number, err := parseOptionalThreadNumber(*numberRaw)
if err != nil {
return usageErr(err)
}
@ -1462,7 +1518,7 @@ func (a *App) runReopenThread(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
number, err := parseOptionalPositiveInt(*numberRaw)
number, err := parseOptionalThreadNumber(*numberRaw)
if err != nil {
return usageErr(err)
}
@ -1715,8 +1771,10 @@ func (a *App) runSync(ctx context.Context, args []string) error {
limitRaw := fs.String("limit", "", "maximum issue/PR rows")
jsonOut := fs.Bool("json", false, "write JSON output")
includeComments := fs.Bool("include-comments", false, "hydrate issue comments, PR reviews, and PR review comments")
includePRDetails := fs.Bool("include-pr-details", false, "hydrate PR files, commits, checks, and workflow runs")
withRaw := fs.String("with", "", "extra hydration: pr-details")
fs.Bool("include-code", false, "accepted for compatibility; code hydration is not implemented yet")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "since": true, "state": true, "limit": true})); err != nil {
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "since": true, "state": true, "limit": true, "with": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
@ -1731,17 +1789,22 @@ func (a *App) runSync(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
numbers, err := parseOptionalPositiveIntList(*numbersRaw)
numbers, err := parseOptionalThreadNumberList(*numbersRaw)
if err != nil {
return usageErr(err)
}
with, err := parseSyncWith(*withRaw)
if err != nil {
return usageErr(err)
}
stats, err := a.syncRepository(ctx, owner, repo, syncOptions{
Since: strings.TrimSpace(*since),
State: strings.TrimSpace(*state),
Limit: limit,
Numbers: numbers,
IncludeComments: *includeComments,
Since: strings.TrimSpace(*since),
State: strings.TrimSpace(*state),
Limit: limit,
Numbers: numbers,
IncludeComments: *includeComments,
IncludePRDetails: *includePRDetails || with["pr-details"],
})
if err != nil {
return err
@ -1750,11 +1813,29 @@ func (a *App) runSync(ctx context.Context, args []string) error {
}
type syncOptions struct {
Since string
State string
Limit int
Numbers []int
IncludeComments bool
Since string
State string
Limit int
Numbers []int
IncludeComments bool
IncludePRDetails bool
}
func parseSyncWith(value string) (map[string]bool, error) {
out := map[string]bool{}
for _, part := range strings.Split(value, ",") {
name := strings.TrimSpace(part)
if name == "" {
continue
}
switch name {
case "pr-details":
out[name] = true
default:
return nil, fmt.Errorf("unsupported --with value %q", name)
}
}
return out, nil
}
func (a *App) syncRepository(ctx context.Context, owner, repo string, options syncOptions) (syncer.Stats, error) {
@ -1762,32 +1843,34 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
if err != nil {
return syncer.Stats{}, err
}
token := config.ResolveGitHubToken(cfg)
token := a.resolveGitHubToken(ctx, cfg)
if token.Value == "" {
return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s", cfg.GitHub.TokenEnv)
return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s or authenticate gh", cfg.GitHub.TokenEnv)
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return syncer.Stats{}, err
}
st, err := store.Open(ctx, cfg.DBPath)
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return syncer.Stats{}, err
}
defer st.Close()
defer rt.Store.Close()
client := gh.New(gh.Options{Token: token.Value, BaseURL: githubBaseURL()})
service := syncer.New(client, st)
service := syncer.New(client, rt.Store)
stats, err := service.Sync(ctx, syncer.Options{
Owner: owner,
Repo: repo,
State: strings.TrimSpace(options.State),
Since: strings.TrimSpace(options.Since),
Limit: options.Limit,
Numbers: options.Numbers,
IncludeComments: options.IncludeComments,
Owner: owner,
Repo: repo,
State: strings.TrimSpace(options.State),
Since: strings.TrimSpace(options.Since),
Limit: options.Limit,
Numbers: options.Numbers,
IncludeComments: options.IncludeComments,
IncludePRDetails: options.IncludePRDetails,
Reporter: func(message string) {
fmt.Fprintln(a.Stderr, message)
},
Logger: progressLogger(a.Stderr),
})
if err != nil {
return syncer.Stats{}, err
@ -1795,6 +1878,17 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
return stats, nil
}
func progressLogger(w io.Writer) *slog.Logger {
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}
func (a *App) runInit(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("init", flag.ContinueOnError)
fs.SetOutput(io.Discard)
@ -1859,6 +1953,8 @@ func (a *App) runPortable(ctx context.Context, args []string) error {
return usageErr(fmt.Errorf("portable requires a subcommand"))
}
switch args[0] {
case "help", "--help", "-h":
return a.printCommandUsage("portable")
case "prune":
return a.runPortablePrune(ctx, args[1:])
default:
@ -2169,6 +2265,113 @@ func (a *App) runDoctor(ctx context.Context, args []string) error {
}, true)
}
func (a *App) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("metadata takes flags only"))
}
cfg := config.Default()
manifest := control.NewManifest("gitcrawl", "Git Crawl", "gitcrawl")
manifest.Description = "Local-first GitHub issue and pull request crawler."
manifest.Branding = control.Branding{SymbolName: "point.3.connected.trianglepath.dotted", AccentColor: "#2da44e"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "search", "tui", "portable", "clusters", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: false, ExportsSecrets: false, LocalOnlyScopes: []string{"github", "sqlite", "portable"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"gitcrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"gitcrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync repository", Argv: []string{"gitcrawl", "sync", "--json"}, JSON: true, Mutates: true},
"search": {Title: "Search", Argv: []string{"gitcrawl", "search", "--json"}, JSON: true},
"tui": {Title: "Terminal cluster browser", Argv: []string{"gitcrawl", "tui"}},
"tui-json": {Title: "Terminal cluster data", Argv: []string{"gitcrawl", "tui", "--json"}, JSON: true},
"portable": {Title: "Portable store tools", Argv: []string{"gitcrawl", "portable", "prune", "--json"}, JSON: true, Mutates: true},
"clusters": {Title: "Clusters", Argv: []string{"gitcrawl", "clusters", "--json"}, JSON: true},
"legacy-sync-api": {Title: "Legacy sync-status alias", Argv: []string{"gitcrawl", "sync-status"}, Legacy: true, Deprecated: true},
}
return a.writeOutput("metadata", manifest, false)
}
func (a *App) runStatus(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("status takes flags only"))
}
cfg, err := config.Load(a.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return err
}
}
status := store.Status{DBPath: cfg.DBPath}
if _, err := os.Stat(cfg.DBPath); err == nil {
st, err := store.OpenReadOnly(ctx, cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
status, err = st.Status(ctx)
if err != nil {
return err
}
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
status.DBPath = cfg.DBPath
return a.writeOutput("status", controlStatus(config.ResolvePath(a.configPath), cfg, status), false)
}
func controlStatus(configPath string, cfg config.Config, status store.Status) control.Status {
counts := []control.Count{
control.NewCount("repositories", "Repositories", int64(status.RepositoryCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("open_threads", "Open threads", int64(status.OpenThreadCount)),
control.NewCount("clusters", "Clusters", int64(status.ClusterCount)),
}
out := control.NewStatus("gitcrawl", fmt.Sprintf("%d threads across %d repositories", status.ThreadCount, status.RepositoryCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "GitHub archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}
func (a *App) applyCommandJSON(enabled bool) {
if enabled {
a.format = FormatJSON
@ -2199,6 +2402,9 @@ func resolveOutputFormat(value string, jsonOut bool) (OutputFormat, error) {
}
func parseOwnerRepo(value string) (string, string, error) {
if ref, ok := parseThreadReference(value); ok && ref.Owner != "" && ref.Repo != "" {
return ref.Owner, ref.Repo, nil
}
parts := strings.Split(value, "/")
if len(parts) != 2 || strings.TrimSpace(parts[0]) == "" || strings.TrimSpace(parts[1]) == "" {
return "", "", fmt.Errorf("expected owner/repo, got %q", value)
@ -2206,6 +2412,60 @@ func parseOwnerRepo(value string) (string, string, error) {
return strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]), nil
}
type threadReference struct {
Owner string
Repo string
Number int
}
func (ref threadReference) FullName() string {
if ref.Owner == "" || ref.Repo == "" {
return ""
}
return ref.Owner + "/" + ref.Repo
}
func parseThreadReference(value string) (threadReference, bool) {
value = strings.TrimSpace(value)
value = strings.Trim(value, "<>()[]{}\"'`")
value = strings.TrimRight(value, ".,;")
if value == "" {
return threadReference{}, false
}
if number, ok := parsePositiveIntLiteral(value); ok {
return threadReference{Number: number}, true
}
if strings.HasPrefix(value, "#") {
if number, ok := parsePositiveIntLiteral(strings.TrimPrefix(value, "#")); ok {
return threadReference{Number: number}, true
}
}
if match := githubThreadURLPattern.FindStringSubmatch(value); match != nil {
if number, ok := parsePositiveIntLiteral(match[3]); ok {
return threadReference{Owner: match[1], Repo: match[2], Number: number}, true
}
}
if match := ownerRepoThreadPattern.FindStringSubmatch(value); match != nil {
if number, ok := parsePositiveIntLiteral(match[3]); ok {
return threadReference{Owner: match[1], Repo: match[2], Number: number}, true
}
}
if match := pathThreadPattern.FindStringSubmatch(value); match != nil {
if number, ok := parsePositiveIntLiteral(match[1]); ok {
return threadReference{Number: number}, true
}
}
return threadReference{}, false
}
func parsePositiveIntLiteral(value string) (int, bool) {
if !isDecimalString(value) {
return 0, false
}
number, err := strconv.Atoi(value)
return number, err == nil && number > 0
}
func parseOptionalPositiveInt(value string) (int, error) {
if strings.TrimSpace(value) == "" {
return 0, nil
@ -2228,6 +2488,28 @@ func parseRequiredPositiveInt(name, value string) (int, error) {
return parsed, nil
}
func parseOptionalThreadNumber(value string) (int, error) {
if strings.TrimSpace(value) == "" {
return 0, nil
}
ref, ok := parseThreadReference(value)
if !ok || ref.Number <= 0 {
return 0, fmt.Errorf("expected positive issue or pull request number, got %q", value)
}
return ref.Number, nil
}
func parseRequiredThreadNumber(name, value string) (int, error) {
parsed, err := parseOptionalThreadNumber(value)
if err != nil {
return 0, err
}
if parsed == 0 {
return 0, fmt.Errorf("missing --%s", name)
}
return parsed, nil
}
func parseClusterMemberCommandIDs(command, clusterIDRaw, numberRaw string) (int, int, error) {
clusterID, err := parseOptionalPositiveInt(clusterIDRaw)
if err != nil {
@ -2236,7 +2518,7 @@ func parseClusterMemberCommandIDs(command, clusterIDRaw, numberRaw string) (int,
if clusterID == 0 {
return 0, 0, fmt.Errorf("%s requires --id", command)
}
number, err := parseOptionalPositiveInt(numberRaw)
number, err := parseOptionalThreadNumber(numberRaw)
if err != nil {
return 0, 0, err
}
@ -2591,6 +2873,22 @@ func parseOptionalPositiveIntList(value string) ([]int, error) {
return out, nil
}
func parseOptionalThreadNumberList(value string) ([]int, error) {
if strings.TrimSpace(value) == "" {
return nil, nil
}
parts := strings.Split(value, ",")
out := make([]int, 0, len(parts))
for _, part := range parts {
parsed, err := parseOptionalThreadNumber(strings.TrimSpace(part))
if err != nil {
return nil, err
}
out = append(out, parsed)
}
return out, nil
}
func (a *App) writeOutput(title string, payload any, allowLog bool) error {
switch a.format {
case FormatJSON:
@ -2654,7 +2952,17 @@ func (a *App) printUsage() {
}
func (a *App) printCommandUsage(command string) error {
if text, ok := commandUsageTexts[command]; ok {
fmt.Fprint(a.Stdout, text)
return nil
}
switch command {
case "cluster-explain":
fmt.Fprint(a.Stdout, commandUsageTexts["cluster-detail"])
return nil
case "portable":
fmt.Fprint(a.Stdout, portableUsageText)
return nil
case "tui":
fmt.Fprint(a.Stdout, tuiUsageText)
return nil
@ -2676,10 +2984,13 @@ Global flags:
--version print version
Core commands:
metadata print crawlkit control metadata
status print fast read-only archive status
init create config, optionally from a portable store
doctor check config, token, and database readiness
sync sync GitHub issue and pull request metadata
refresh run sync, enrichment, embedding, and clustering pipeline
embed generate OpenAI embeddings for local thread documents
threads list local issue and pull request rows
cluster build durable clusters from local thread vectors
close-thread locally hide one issue or pull request row
@ -2698,12 +3009,138 @@ Core commands:
cluster-explain alias for cluster-detail
neighbors list vector-nearest local issue and pull request rows
search search local thread documents; also supports search issues|prs gh syntax
gh gh-compatible local cache shim with fallback to real gh
portable prune prune volatile payloads from a portable store
tui [owner/repo] browse clusters in the terminal UI; repo is inferred when omitted
No API server is provided. There is intentionally no serve command.
`
var commandUsageTexts = map[string]string{
"metadata": `gitcrawl metadata prints crawlkit control metadata.
Usage:
gitcrawl metadata [--json]
`,
"status": `gitcrawl status prints fast read-only archive status.
Usage:
gitcrawl status [--json]
`,
"init": `gitcrawl init creates a local config and SQLite database.
Usage:
gitcrawl init [--db path] [--portable-store URL] [--json]
`,
"configure": `gitcrawl configure updates model fields in the config.
Usage:
gitcrawl configure [--summary-model name] [--embed-model name] [--embedding-basis title_original] [--json]
`,
"doctor": `gitcrawl doctor checks config, token, and database readiness.
Usage:
gitcrawl doctor [--json]
`,
"sync": `gitcrawl sync mirrors GitHub issue and pull request metadata.
Usage:
gitcrawl sync owner/repo [--state open|closed|all] [--numbers refs] [--with pr-details] [--include-pr-details] [--json]
`,
"refresh": `gitcrawl refresh runs sync, enrichment, embedding, and clustering.
Usage:
gitcrawl refresh owner/repo [--state open|closed|all] [--sync-if-stale duration] [--no-sync] [--no-embed] [--no-cluster] [--json]
`,
"embed": `gitcrawl embed generates OpenAI embeddings for local thread documents.
Usage:
gitcrawl embed owner/repo [--number ref] [--limit N] [--force] [--include-closed] [--json]
`,
"threads": `gitcrawl threads lists local issue and pull request rows.
Usage:
gitcrawl threads owner/repo [--include-closed] [--numbers refs] [--limit N] [--json]
`,
"search": `gitcrawl search queries local thread documents, or accepts gh-shaped issue and PR search.
Usage:
gitcrawl search owner/repo --query text [--mode keyword|semantic] [--limit N] [--json]
gitcrawl search issues|prs <query> -R owner/repo [--state open|closed|all] [--json fields] [--limit N]
`,
"cluster": `gitcrawl cluster builds durable clusters from local thread vectors.
Usage:
gitcrawl cluster owner/repo [--threshold N] [--min-size N] [--max-cluster-size N] [--k N] [--cross-kind-threshold N] [--limit N] [--model name] [--basis semantic|references|hybrid] [--include-closed] [--json]
`,
"clusters": `gitcrawl clusters lists latest display clusters with durable fallback.
Usage:
gitcrawl clusters owner/repo [--sort size|recent|oldest] [--min-size N] [--limit N] [--hide-closed] [--json]
`,
"durable-clusters": `gitcrawl durable-clusters lists governed durable cluster groups.
Usage:
gitcrawl durable-clusters owner/repo [--include-closed] [--sort size|recent|oldest] [--min-size N] [--limit N] [--json]
`,
"cluster-detail": `gitcrawl cluster-detail dumps one cluster and its member rows.
Usage:
gitcrawl cluster-detail owner/repo --id N [--member-limit N] [--body-chars N] [--hide-closed] [--json]
`,
"neighbors": `gitcrawl neighbors lists vector-nearest local issue and pull request rows.
Usage:
gitcrawl neighbors owner/repo --number ref [--limit N] [--json]
`,
"runs": `gitcrawl runs lists local pipeline run history.
Usage:
gitcrawl runs owner/repo [--kind sync|summary|embedding|cluster] [--limit N] [--json]
`,
"close-thread": `gitcrawl close-thread locally hides one issue or pull request row.
Usage:
gitcrawl close-thread owner/repo --number ref [--reason text] [--json]
`,
"reopen-thread": `gitcrawl reopen-thread clears a local thread hide.
Usage:
gitcrawl reopen-thread owner/repo --number ref [--json]
`,
"close-cluster": `gitcrawl close-cluster locally hides one durable cluster.
Usage:
gitcrawl close-cluster owner/repo --id N [--reason text] [--json]
`,
"reopen-cluster": `gitcrawl reopen-cluster clears a local cluster hide.
Usage:
gitcrawl reopen-cluster owner/repo --id N [--json]
`,
"exclude-cluster-member": `gitcrawl exclude-cluster-member locally removes one row from a durable cluster.
Usage:
gitcrawl exclude-cluster-member owner/repo --id N --number ref [--reason text] [--json]
`,
"include-cluster-member": `gitcrawl include-cluster-member restores one row to a durable cluster.
Usage:
gitcrawl include-cluster-member owner/repo --id N --number ref [--json]
`,
"set-cluster-canonical": `gitcrawl set-cluster-canonical sets the canonical row for a durable cluster.
Usage:
gitcrawl set-cluster-canonical owner/repo --id N --number ref [--reason text] [--json]
`,
"gh": `gitcrawl gh runs a gh-compatible local cache shim with fallback to real gh.
Usage:
gitcrawl gh <gh command>
gitcrawl gh xcache stats|keys|gc|flush|reset|snapshot [--json]
`,
}
const tuiUsageText = `gitcrawl tui opens the local terminal cluster browser.
Usage:
@ -2719,3 +3156,12 @@ Press n to load neighbors for the selected issue or PR.
Enter from the members pane also loads neighbors before opening detail.
The TUI quietly refreshes from the local store every 15 seconds and leaves the current status alone when nothing changed.
`
const portableUsageText = `gitcrawl portable manages local portable-store snapshots.
Usage:
gitcrawl portable prune [--body-chars N] [--no-vacuum] [--json]
Subcommands:
prune prune volatile payloads from the configured portable store
`

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
@ -58,6 +59,181 @@ func TestInitDefaultOutputIsHumanReadable(t *testing.T) {
}
}
func TestMetadataStatusAndControlStatusJSON(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
init := New()
if err := init.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
if err := os.WriteFile(dbPath+"-wal", []byte("wal"), 0o600); err != nil {
t.Fatalf("write wal: %v", err)
}
for _, tc := range []struct {
name string
args []string
want string
}{
{name: "metadata", args: []string{"--config", configPath, "metadata", "--json"}, want: "commands"},
{name: "status", args: []string{"--config", configPath, "status", "--json"}, want: "databases"},
{name: "status missing config", args: []string{"--config", filepath.Join(dir, "missing.toml"), "status", "--json"}, want: "counts"},
} {
t.Run(tc.name, func(t *testing.T) {
app := New()
var stdout bytes.Buffer
app.Stdout = &stdout
if err := app.Run(ctx, tc.args); err != nil {
t.Fatalf("run %s: %v", tc.name, err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode %s output %q: %v", tc.name, stdout.String(), err)
}
if payload["app_id"] != "gitcrawl" && payload["id"] != "gitcrawl" {
t.Fatalf("expected gitcrawl payload, got %#v", payload)
}
if _, ok := payload[tc.want]; !ok {
t.Fatalf("expected %s in %#v", tc.want, payload)
}
})
}
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
sizePath := filepath.Join(dir, "sized.db")
if err := os.WriteFile(sizePath, []byte("db"), 0o600); err != nil {
t.Fatalf("write sized db: %v", err)
}
if err := os.WriteFile(sizePath+"-wal", []byte("wal"), 0o600); err != nil {
t.Fatalf("write sized wal: %v", err)
}
lastSync := time.Unix(100, 0)
out := controlStatus(configPath, cfg, store.Status{
DBPath: sizePath,
RepositoryCount: 2,
ThreadCount: 3,
OpenThreadCount: 1,
ClusterCount: 4,
LastSyncAt: lastSync,
})
if out.DatabaseBytes == 0 {
t.Fatalf("database bytes should be populated: %#v", out)
}
if out.WALBytes != 3 {
t.Fatalf("wal bytes = %d, want 3", out.WALBytes)
}
if out.LastSyncAt != lastSync.UTC().Format(time.RFC3339) {
t.Fatalf("last sync = %q", out.LastSyncAt)
}
if len(out.Databases) != 1 || out.Databases[0].Path != sizePath || !out.Databases[0].IsPrimary {
t.Fatalf("database metadata = %#v", out.Databases)
}
if got := fileSize(filepath.Join(dir, "missing.db")); got != 0 {
t.Fatalf("missing file size = %d, want 0", got)
}
var helpOut bytes.Buffer
help := New()
help.Stdout = &helpOut
if err := help.printCommandUsage("portable"); err != nil {
t.Fatalf("portable help: %v", err)
}
if !strings.Contains(helpOut.String(), "portable") {
t.Fatalf("portable help output = %q", helpOut.String())
}
helpOut.Reset()
if err := help.printCommandUsage("tui"); err != nil {
t.Fatalf("tui help: %v", err)
}
if !strings.Contains(helpOut.String(), "cluster browser") {
t.Fatalf("tui help output = %q", helpOut.String())
}
for _, topic := range []string{"metadata", "status", "init", "configure", "doctor", "sync", "refresh", "embed", "threads", "search", "cluster", "clusters", "durable-clusters", "cluster-detail", "cluster-explain", "neighbors", "runs", "close-thread", "reopen-thread", "close-cluster", "reopen-cluster", "exclude-cluster-member", "include-cluster-member", "set-cluster-canonical", "gh"} {
helpOut.Reset()
if err := help.printCommandUsage(topic); err != nil {
t.Fatalf("%s help: %v", topic, err)
}
if !strings.Contains(helpOut.String(), "Usage:") {
t.Fatalf("%s help output = %q", topic, helpOut.String())
}
}
if err := New().Run(ctx, []string{"--config", configPath, "status", "extra"}); err == nil {
t.Fatal("status extra arg should fail")
}
}
func TestControlRepositoryAndClusterHelperBranches(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "gitcrawl.db")
payload := emptyClusterBrowserPayload(ctx, cfg, "", "recent", 2, 50, true)
if payload.DBSource != "local" || payload.DBLocation != "gitcrawl.db" {
t.Fatalf("empty payload source = %s/%s", payload.DBSource, payload.DBLocation)
}
if payload.Sort != "recent" || payload.MinSize != 2 || payload.Limit != 50 || !payload.HideClosed {
t.Fatalf("empty payload options = %#v", payload)
}
rt := localRuntime{Config: cfg}
if got := remoteRefreshSource(rt); got != "" {
t.Fatalf("local refresh source = %q", got)
}
if got := remoteRuntimePath(rt); got != "" {
t.Fatalf("local runtime path = %q", got)
}
rt.RemoteSource = true
rt.SourceDBPath = filepath.Join(dir, "store", "data", "archive.db")
if got := remoteRefreshSource(rt); got != rt.SourceDBPath {
t.Fatalf("remote refresh source = %q", got)
}
if got := remoteRuntimePath(rt); got != cfg.DBPath {
t.Fatalf("remote runtime path = %q", got)
}
if got := githubRepoFromRemote("git@github.com:openclaw/gitcrawl-store.git"); got != "openclaw/gitcrawl-store" {
t.Fatalf("ssh remote repo = %q", got)
}
if got := githubRepoFromRemote("https://github.com/openclaw/gitcrawl-store.git"); got != "openclaw/gitcrawl-store" {
t.Fatalf("https remote repo = %q", got)
}
if got := githubRepoFromRemote("ssh://git@github.com/openclaw/gitcrawl-store.git"); got != "openclaw/gitcrawl-store" {
t.Fatalf("ssh url remote repo = %q", got)
}
if got := githubRepoFromRemote("https://example.com/openclaw/gitcrawl-store.git"); got != "" {
t.Fatalf("non-github remote repo = %q", got)
}
if got := githubRepoFromRemote("https://github.com/openclaw"); got != "" {
t.Fatalf("short github remote repo = %q", got)
}
with, err := parseSyncWith(" pr-details, ")
if err != nil || !with["pr-details"] {
t.Fatalf("parse sync with = %#v, %v", with, err)
}
if _, err := parseSyncWith("reviews"); err == nil {
t.Fatal("unsupported sync --with value should fail")
}
maxSize, fanout, crossKind, err := parseClusterShapeOptions("cluster", "", "", "")
if err != nil {
t.Fatalf("default cluster shape: %v", err)
}
if maxSize != defaultClusterMaxSize || fanout != defaultClusterFanout || crossKind != defaultCrossKindMinScore {
t.Fatalf("default cluster shape = %d/%d/%f", maxSize, fanout, crossKind)
}
if _, _, _, err := parseClusterShapeOptions("cluster", "2", "3", "1.5"); err == nil {
t.Fatal("out-of-range cross-kind threshold should fail")
}
if !stateIncludesClosed("all") || !stateIncludesClosed(" closed ") || stateIncludesClosed("open") {
t.Fatal("state closed helper mismatch")
}
}
func TestInitRejectsDBAndPortableStore(t *testing.T) {
dir := t.TempDir()
app := New()
@ -355,6 +531,21 @@ func TestReadCommandRefreshesPortableStore(t *testing.T) {
if _, err := os.Stat(mirrorPath); err != nil {
t.Fatalf("runtime mirror db was not created: %v", err)
}
seedPortableThread(t, filepath.Join(remoteDir, dbRel), 3, "too soon issue")
if err := runGit(ctx, remoteDir, "add", dbRel); err != nil {
t.Fatalf("git add second update: %v", err)
}
if err := runGit(ctx, remoteDir, "-c", "user.email=test@example.com", "-c", "user.name=Test", "commit", "-m", "second update"); err != nil {
t.Fatalf("git commit second update: %v", err)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "threads", "openclaw/openclaw", "--numbers", "3", "--json"}); err != nil {
t.Fatalf("threads within refresh ttl: %v", err)
}
if strings.Contains(stdout.String(), "too soon issue") {
t.Fatalf("read command should not refresh portable store again within ttl, got %q", stdout.String())
}
}
func TestReadCommandUsesCachedPortableStoreWhenRefreshFails(t *testing.T) {
@ -748,6 +939,18 @@ func TestAppOutputModesAndUsageBranches(t *testing.T) {
if _, err := parseOptionalPositiveIntList("1, 0"); err == nil {
t.Fatal("bad int list should fail")
}
if owner, repo, err := parseOwnerRepo("https://github.com/openclaw/openclaw/issues/78601"); err != nil || owner != "openclaw" || repo != "openclaw" {
t.Fatalf("full issue URL owner/repo = %q/%q err=%v", owner, repo, err)
}
if got, err := parseOptionalThreadNumber("https://github.com/openclaw/openclaw/issues/78601"); err != nil || got != 78601 {
t.Fatalf("full issue URL number = %d err=%v", got, err)
}
if got, err := parseOptionalThreadNumber("https://github.com/openclaw/openclaw/pull/78602#issuecomment-1"); err != nil || got != 78602 {
t.Fatalf("full pull URL number = %d err=%v", got, err)
}
if got, err := parseOptionalThreadNumberList("https://github.com/openclaw/openclaw/issues/78601, openclaw/openclaw#78602, pull/78603, #78604"); err != nil || len(got) != 4 || got[0] != 78601 || got[1] != 78602 || got[2] != 78603 || got[3] != 78604 {
t.Fatalf("thread ref list = %#v err=%v", got, err)
}
if _, _, _, err := parseClusterShapeOptions("test", "bad", "1", "0.5"); err == nil {
t.Fatal("bad cluster shape should fail")
}
@ -786,7 +989,7 @@ func TestGlobalCommandBranches(t *testing.T) {
}{
{args: []string{"--help"}, wantOut: "Usage:"},
{args: []string{"help"}, wantOut: "Usage:"},
{args: []string{"help", "sync"}, wantErr: true, exitCode: 2},
{args: []string{"help", "sync"}, wantOut: "gitcrawl sync"},
{args: []string{"--version"}, wantOut: "dev"},
{args: []string{"version"}, wantOut: "dev"},
{args: []string{"--json", "version"}, wantOut: `"version"`},
@ -1007,6 +1210,60 @@ func TestTUIInfersRepository(t *testing.T) {
}
}
func TestTUIJSONUsesDefaultsWhenConfigMissing(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "missing.toml")
t.Setenv("GITCRAWL_DB_PATH", filepath.Join(dir, "missing.db"))
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
t.Fatalf("tui: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
}
if payload["mode"] != "cluster-browser" {
t.Fatalf("mode = %#v", payload["mode"])
}
clusters, ok := payload["clusters"].([]any)
if !ok || len(clusters) != 0 {
t.Fatalf("clusters = %#v", payload["clusters"])
}
if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) {
t.Fatalf("config file should not be created, stat err=%v", err)
}
}
func TestTUIJSONHandlesEmptyStoreWithoutRepository(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
t.Fatalf("tui: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
}
clusters, ok := payload["clusters"].([]any)
if !ok || len(clusters) != 0 {
t.Fatalf("clusters = %#v", payload["clusters"])
}
}
func TestTUIRequiresInteractiveTerminalByDefault(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
@ -2078,6 +2335,36 @@ func TestClustersDefaultShowsActivePrimaryMembers(t *testing.T) {
if len(all.Clusters) != 1 || all.Clusters[0].MemberCount != 1 {
t.Fatalf("hide-closed should focus active members, got %#v", all.Clusters)
}
stdout.Reset()
detail := New()
detail.Stdout = &stdout
if err := detail.Run(ctx, []string{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", "90"}); err != nil {
t.Fatalf("cluster-detail: %v", err)
}
var detailPayload struct {
Members []store.ClusterMemberDetail `json:"members"`
}
if err := json.Unmarshal(stdout.Bytes(), &detailPayload); err != nil {
t.Fatalf("decode cluster detail: %v\n%s", err, stdout.String())
}
if len(detailPayload.Members) != 2 {
t.Fatalf("default cluster-detail should match visible cluster members, got %#v", detailPayload.Members)
}
stdout.Reset()
hideDetail := New()
hideDetail.Stdout = &stdout
if err := hideDetail.Run(ctx, []string{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", "90", "--hide-closed"}); err != nil {
t.Fatalf("cluster-detail hide closed: %v", err)
}
detailPayload.Members = nil
if err := json.Unmarshal(stdout.Bytes(), &detailPayload); err != nil {
t.Fatalf("decode hide-closed cluster detail: %v\n%s", err, stdout.String())
}
if len(detailPayload.Members) != 1 || detailPayload.Members[0].Thread.Number != 90 {
t.Fatalf("hide-closed cluster-detail should focus open members, got %#v", detailPayload.Members)
}
}
func TestClusterMemberOverrideCommands(t *testing.T) {

View File

@ -0,0 +1,268 @@
package cli
import (
"bytes"
"context"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestCLIAppCommandCoveragePaths(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
st, err := store.Open(ctx, cfg.DBPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
repo, err := st.RepositoryByFullName(ctx, "openclaw/openclaw")
if err != nil {
t.Fatalf("repo: %v", err)
}
threads, err := st.ListThreadsFiltered(ctx, store.ThreadListOptions{RepoID: repo.ID, IncludeClosed: true, Numbers: []int{10, 12}})
if err != nil {
t.Fatalf("threads: %v", err)
}
if len(threads) != 2 {
t.Fatalf("seed threads = %+v", threads)
}
result, err := st.SaveDurableClusters(ctx, repo.ID, []store.DurableClusterInput{{
StableKey: "cli:10,12",
StableSlug: "cli-10-12",
RepresentativeThreadID: threads[0].ID,
Title: "CLI command cluster",
Members: []store.DurableClusterMemberInput{
{ThreadID: threads[0].ID, Role: "canonical"},
{ThreadID: threads[1].ID, Role: "member"},
},
}})
if err != nil {
t.Fatalf("save cluster: %v", err)
}
if _, err := st.RecordRun(ctx, store.RunRecord{RepoID: repo.ID, Kind: "sync", Scope: "open", Status: "success", StartedAt: "2026-05-08T01:00:00Z", FinishedAt: "2026-05-08T01:00:01Z", StatsJSON: "{}"}); err != nil {
t.Fatalf("record run: %v", err)
}
clusterID, err := st.ClusterIDForThreadNumber(ctx, repo.ID, 10, true)
if err != nil {
t.Fatalf("cluster id: %v", err)
}
if result.RunID == 0 {
t.Fatal("cluster run id should be non-zero")
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
commands := [][]string{
{"--config", configPath, "--json", "configure", "--summary-model", "gpt-test", "--embed-model", "embed-test", "--embedding-basis", "title_original"},
{"--config", configPath, "--json", "metadata"},
{"--config", configPath, "--json", "status"},
{"--config", configPath, "--json", "threads", "openclaw/openclaw", "--numbers", "https://github.com/openclaw/openclaw/issues/10,https://github.com/openclaw/openclaw/pull/12", "--include-closed", "--limit", "2"},
{"--config", configPath, "--json", "runs", "openclaw/openclaw", "--kind", "sync", "--limit", "1"},
{"--config", configPath, "--json", "clusters", "openclaw/openclaw", "--include-closed", "--sort", "oldest", "--min-size", "1", "--limit", "5"},
{"--config", configPath, "--json", "durable-clusters", "openclaw/openclaw", "--include-closed", "--sort", "size", "--min-size", "1", "--limit", "5"},
{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", strconv.FormatInt(clusterID, 10), "--member-limit", "2", "--body-chars", "10", "--include-closed"},
{"--config", configPath, "--json", "close-thread", "openclaw/openclaw", "--number", "https://github.com/openclaw/openclaw/issues/10", "--reason", "covered"},
{"--config", configPath, "--json", "reopen-thread", "openclaw/openclaw", "--number", "10"},
{"--config", configPath, "--json", "close-cluster", "openclaw/openclaw", "--id", strconv.FormatInt(clusterID, 10), "--reason", "covered"},
{"--config", configPath, "--json", "reopen-cluster", "openclaw/openclaw", "--id", strconv.FormatInt(clusterID, 10)},
{"--config", configPath, "--json", "exclude-cluster-member", "openclaw/openclaw", "--id", strconv.FormatInt(clusterID, 10), "--number", "12", "--reason", "covered"},
{"--config", configPath, "--json", "include-cluster-member", "openclaw/openclaw", "--id", strconv.FormatInt(clusterID, 10), "--number", "12", "--reason", "covered"},
{"--config", configPath, "--json", "set-cluster-canonical", "openclaw/openclaw", "--id", strconv.FormatInt(clusterID, 10), "--number", "12", "--reason", "covered"},
}
for _, args := range commands {
app := New()
var stdout, stderr bytes.Buffer
app.Stdout = &stdout
app.Stderr = &stderr
if err := app.Run(ctx, args); err != nil {
t.Fatalf("%v failed: %v\nstdout=%s\nstderr=%s", args, err, stdout.String(), stderr.String())
}
if stdout.Len() == 0 {
t.Fatalf("%v produced no output", args)
}
}
if clusterID <= 0 {
t.Fatalf("cluster id = %d", clusterID)
}
}
func TestCLIAppHumanAndLogOutputE2E(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
textCommands := [][]string{
{"--config", configPath, "version"},
{"--config", configPath, "metadata"},
{"--config", configPath, "status"},
{"--config", configPath, "doctor"},
{"--config", configPath, "help", "portable"},
{"--config", configPath, "help", "tui"},
}
for _, args := range textCommands {
app := New()
var stdout bytes.Buffer
app.Stdout = &stdout
if err := app.Run(ctx, args); err != nil {
t.Fatalf("%v failed: %v", args, err)
}
if strings.TrimSpace(stdout.String()) == "" {
t.Fatalf("%v produced no text output", args)
}
}
logCommands := [][]string{
{"--config", configPath, "--format", "log", "configure", "--summary-model", "gpt-log"},
{"--config", configPath, "--format", "log", "doctor"},
}
for _, args := range logCommands {
app := New()
var stdout bytes.Buffer
app.Stdout = &stdout
if err := app.Run(ctx, args); err != nil {
t.Fatalf("%v failed: %v", args, err)
}
if !strings.Contains(stdout.String(), "=") {
t.Fatalf("%v log output = %q", args, stdout.String())
}
}
jsonVersion := New()
var jsonOut bytes.Buffer
jsonVersion.Stdout = &jsonOut
if err := jsonVersion.Run(ctx, []string{"--config", configPath, "--format", "json", "version"}); err != nil {
t.Fatalf("json version: %v", err)
}
if !strings.Contains(jsonOut.String(), `"version"`) {
t.Fatalf("json version output = %q", jsonOut.String())
}
}
func TestCLIAppVectorFallbackCoveragePaths(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
repoID, firstID, secondID := seedCommandFlowStore(t, dbPath)
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
now := time.Now().UTC().Format(time.RFC3339Nano)
for _, vector := range []store.ThreadVector{
{ThreadID: firstID, Basis: "other_basis", Model: "other-model", Dimensions: 2, ContentHash: "v1", Vector: []float64{1, 0}, CreatedAt: now, UpdatedAt: now},
{ThreadID: secondID, Basis: "other_basis", Model: "other-model", Dimensions: 2, ContentHash: "v2", Vector: []float64{0.95, 0.05}, CreatedAt: now, UpdatedAt: now},
} {
if err := st.UpsertThreadVector(ctx, vector); err != nil {
t.Fatalf("upsert vector: %v", err)
}
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
configure := New()
if err := configure.Run(ctx, []string{"--config", configPath, "configure", "--embed-model", "missing-model", "--embedding-basis", "missing-basis"}); err != nil {
t.Fatalf("configure: %v", err)
}
for _, args := range [][]string{
{"--config", configPath, "--json", "neighbors", "openclaw/openclaw", "--number", "101", "--limit", "1", "--threshold", "0.99"},
{"--config", configPath, "--json", "cluster", "openclaw/openclaw", "--threshold", "0.5", "--min-size", "2", "--limit", "2"},
{"--config", configPath, "--json", "refresh", "openclaw/openclaw", "--no-sync", "--no-embed", "--threshold", "0.5", "--min-size", "2"},
{"--config", configPath, "--json", "search", "openclaw/openclaw", "--query", "gateway", "--mode", ""},
} {
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, args); err != nil {
t.Fatalf("%v failed: %v\n%s", args, err, stdout.String())
}
}
if repoID == 0 {
t.Fatal("seed repo id should be non-zero")
}
}
func TestCLIAppUsageBranches(t *testing.T) {
ctx := context.Background()
configPath := filepath.Join(t.TempDir(), "config.toml")
cases := [][]string{
{"--format", "yaml", "status"},
{"serve"},
{"unknown"},
{"configure", "--bad"},
{"metadata", "extra"},
{"status", "extra"},
{"portable"},
{"portable", "unknown"},
{"portable", "prune", "extra"},
{"portable", "prune", "--body-chars", "bad"},
{"threads"},
{"threads", "bad-repo"},
{"threads", "openclaw/openclaw", "--numbers", "bad"},
{"threads", "openclaw/openclaw", "--limit", "bad"},
{"runs"},
{"runs", "openclaw/openclaw", "--limit", "bad"},
{"cluster-detail", "openclaw/openclaw", "--id", "bad"},
{"close-thread", "openclaw/openclaw"},
{"reopen-thread", "openclaw/openclaw", "--number", "bad"},
{"close-cluster", "openclaw/openclaw"},
{"reopen-cluster", "openclaw/openclaw", "--id", "bad"},
{"exclude-cluster-member", "openclaw/openclaw", "--id", "1"},
{"include-cluster-member", "openclaw/openclaw", "--id", "bad", "--number", "1"},
{"set-cluster-canonical", "openclaw/openclaw", "--id", "1", "--number", "bad"},
{"sync", "openclaw/openclaw", "--with", "bad"},
{"refresh"},
{"refresh", "openclaw/openclaw", "--no-sync", "--no-embed", "--no-cluster"},
{"refresh", "bad-repo"},
{"refresh", "openclaw/openclaw", "--limit", "bad"},
{"refresh", "openclaw/openclaw", "--threshold", "bad"},
{"refresh", "openclaw/openclaw", "--threshold", "2"},
{"refresh", "openclaw/openclaw", "--min-size", "bad"},
{"refresh", "openclaw/openclaw", "--k", "bad"},
{"search"},
{"search", "openclaw/openclaw"},
{"search", "bad-repo", "--query", "x"},
{"search", "openclaw/openclaw", "--query", "x", "--limit", "bad"},
{"search", "openclaw/openclaw", "--query", "x", "--mode", "bad"},
{"neighbors"},
{"neighbors", "bad-repo"},
{"neighbors", "openclaw/openclaw"},
{"neighbors", "openclaw/openclaw", "--number", "bad"},
{"neighbors", "openclaw/openclaw", "--number", "1", "--limit", "bad"},
{"neighbors", "openclaw/openclaw", "--number", "1", "--threshold", "bad"},
{"cluster"},
{"cluster", "bad-repo"},
{"cluster", "openclaw/openclaw", "--threshold", "bad"},
{"cluster", "openclaw/openclaw", "--threshold", "2"},
{"cluster", "openclaw/openclaw", "--min-size", "bad"},
{"cluster", "openclaw/openclaw", "--max-cluster-size", "bad"},
{"cluster", "openclaw/openclaw", "--limit", "bad"},
{"embed"},
{"embed", "bad-repo"},
{"embed", "openclaw/openclaw", "--number", "bad"},
{"embed", "openclaw/openclaw", "--limit", "bad"},
{"tui", "one", "two"},
{"tui", "--sort", "bad"},
}
for _, args := range cases {
app := New()
app.Stdout = &bytes.Buffer{}
app.Stderr = &bytes.Buffer{}
full := append([]string{"--config", configPath}, args...)
if err := app.Run(ctx, full); err == nil {
t.Fatalf("%v succeeded, want error", args)
}
}
}

67
internal/cli/gh_path.go Normal file
View File

@ -0,0 +1,67 @@
package cli
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
)
func resolveRealGHPath() (string, error) {
envPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH"))
candidates := []string{}
if envPath != "" {
candidates = append(candidates, envPath)
}
candidates = append(candidates,
"/opt/homebrew/opt/gh/bin/gh",
"/usr/local/opt/gh/bin/gh",
"/usr/local/bin/gh",
"/usr/bin/gh",
)
if lookPath, err := exec.LookPath("gh"); err == nil {
candidates = append(candidates, lookPath)
}
seen := map[string]bool{}
for _, candidate := range candidates {
candidate = strings.TrimSpace(candidate)
if candidate == "" || seen[candidate] {
continue
}
seen[candidate] = true
info, err := os.Stat(candidate)
if err != nil || info.IsDir() {
if envPath != "" && candidate == envPath {
return "", fmt.Errorf("real gh not found at GITCRAWL_GH_PATH %q", envPath)
}
continue
}
if isGitcrawlShimPath(candidate) {
if envPath != "" && candidate == envPath {
return "", fmt.Errorf("GITCRAWL_GH_PATH points to the gitcrawl shim (%s); set it to the real gh binary", envPath)
}
continue
}
return candidate, nil
}
return "", fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH")
}
func isGitcrawlShimPath(path string) bool {
if path == "" {
return false
}
resolved := path
if eval, err := filepath.EvalSymlinks(path); err == nil {
resolved = eval
}
for _, value := range []string{path, resolved} {
base := strings.ToLower(filepath.Base(value))
if base == "gitcrawl" || base == "gitcrawl-gh" {
return true
}
}
return false
}

View File

@ -44,9 +44,14 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
limitRaw := fs.String("limit", "", "maximum rows")
limitShortRaw := fs.String("L", "", "maximum rows")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter for JSON output")
fs.String("match", "", "accepted for gh compatibility; local search covers indexed thread documents")
fs.String("sort", "", "accepted for gh compatibility")
fs.String("order", "", "accepted for gh compatibility")
syncIfStaleRaw := fs.String("sync-if-stale", "", "sync owner/repo first when the local mirror is older than this duration")
if err := fs.Parse(normalizeCommandArgs(args[1:], map[string]bool{
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "sync-if-stale": true,
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "jq": true,
"match": true, "sort": true, "order": true, "sync-if-stale": true,
})); err != nil {
return usageErr(err)
}
@ -99,6 +104,15 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
if err != nil {
return err
}
if len(threads) == 0 && ghSearchNeedsLiveEmptyCheck(kind, query, state) {
lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state)
if err != nil {
return err
}
if lastSync.IsZero() {
return localGHUnsupported(fmt.Errorf("empty local %s search has no broad %s sync", args[0], ghDefaultListState(state)))
}
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || a.format == FormatJSON {
@ -109,12 +123,7 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
data, err := json.MarshalIndent(rows, "", " ")
if err != nil {
return err
}
_, err = fmt.Fprintf(a.Stdout, "%s\n", data)
return err
return a.writeJSONValue(rows, strings.TrimSpace(*jqRaw))
}
for _, thread := range threads {
@ -126,7 +135,7 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
}
func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) error {
stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, maxAge)
stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, state, maxAge)
if err != nil {
return err
}
@ -142,7 +151,7 @@ func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state st
return err
}
func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, maxAge time.Duration) (bool, time.Time, error) {
func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) (bool, time.Time, error) {
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
@ -158,7 +167,7 @@ func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, ma
}
return false, time.Time{}, err
}
lastSync, err := rt.Store.LastSuccessfulSyncAt(ctx, repo.ID)
lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state)
if err != nil {
return false, time.Time{}, err
}
@ -168,6 +177,20 @@ func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, ma
return time.Since(lastSync) > maxAge, lastSync, nil
}
func ghSearchNeedsLiveEmptyCheck(kind, query, state string) bool {
if strings.TrimSpace(query) != "" || kind != "issue" {
return false
}
return ghDefaultListState(state) == "open"
}
func ghDefaultListState(state string) string {
if strings.TrimSpace(state) == "" {
return "open"
}
return strings.TrimSpace(state)
}
func parseGHSearchQuery(value string) (query string, repo string, state string) {
var queryParts []string
for _, part := range strings.Fields(value) {
@ -260,6 +283,8 @@ func ghSearchJSONValue(thread store.Thread, field string) (any, error) {
switch field {
case "number":
return thread.Number, nil
case "id":
return thread.GitHubID, nil
case "title":
return thread.Title, nil
case "state":

View File

@ -66,6 +66,16 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
t.Fatalf("repo: %v", err)
}
finishedAt := time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339Nano)
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
Scope: "numbers:13",
Status: "success",
StartedAt: time.Now().UTC().Format(time.RFC3339Nano),
FinishedAt: time.Now().UTC().Format(time.RFC3339Nano),
}); err != nil {
t.Fatalf("record targeted sync: %v", err)
}
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
@ -74,7 +84,7 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
StartedAt: finishedAt,
FinishedAt: finishedAt,
}); err != nil {
t.Fatalf("record sync: %v", err)
t.Fatalf("record broad sync: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
@ -82,14 +92,14 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
run := New()
run.configPath = configPath
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 2*time.Hour)
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", "open", 2*time.Hour)
if err != nil {
t.Fatalf("freshness check: %v", err)
}
if stale || lastSync.IsZero() {
t.Fatalf("expected cache to be fresh, stale=%v lastSync=%s", stale, lastSync)
}
stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 30*time.Minute)
stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", "open", 30*time.Minute)
if err != nil {
t.Fatalf("stale freshness check: %v", err)
}
@ -110,7 +120,7 @@ func TestGHSearchCacheStaleWhenRepoMissing(t *testing.T) {
run := New()
run.configPath = configPath
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", time.Minute)
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", "open", time.Minute)
if err != nil {
t.Fatalf("freshness check: %v", err)
}

451
internal/cli/gh_shim.go Normal file
View File

@ -0,0 +1,451 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/openclaw/gitcrawl/internal/store"
)
func (a *App) runGHShim(ctx context.Context, args []string) error {
if len(args) == 0 {
return a.execRealGH(ctx, args)
}
switch args[0] {
case "xcache":
return a.runGHXCache(args[1:])
case "search":
if len(args) >= 2 && isGHSearchKind(args[1]) {
if err := a.runGHSearch(ctx, args[1:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
_ = a.incrementGHXCacheCounter("local_hits")
return nil
}
case "issue", "pr":
if len(args) >= 2 {
switch args[1] {
case "view":
if err := a.runGHThreadView(ctx, args[0], args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
_ = a.incrementGHXCacheCounter("local_hits")
return nil
case "checks":
if args[0] == "pr" {
if err := a.runGHPRChecks(ctx, args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
_ = a.incrementGHXCacheCounter("local_hits")
return nil
}
case "list":
if err := a.runGHThreadList(ctx, args[0], args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
_ = a.incrementGHXCacheCounter("local_hits")
return nil
}
}
case "run":
if len(args) >= 2 {
switch args[1] {
case "list":
if err := a.runGHRunList(ctx, args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
_ = a.incrementGHXCacheCounter("local_hits")
return nil
case "view":
if err := a.runGHRunView(ctx, args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
_ = a.incrementGHXCacheCounter("local_hits")
return nil
}
}
}
return a.execRealGHMaybeCached(ctx, args)
}
func (a *App) runGHThreadView(ctx context.Context, resource string, args []string) error {
fs := flag.NewFlagSet(resource+" view", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"R": true, "repo": true, "json": true, "jq": true})); err != nil {
return usageErr(err)
}
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("gh %s view requires a number or GitHub URL", resource))
}
ref, _ := parseThreadReference(fs.Arg(0))
number, err := parseThreadNumber(fs.Arg(0))
if err != nil {
return usageErr(err)
}
repoArg := firstNonEmpty(*repoShort, *repoLong)
if repoArg == "" {
repoArg = ref.FullName()
}
repoValue, err := a.resolveGHRepo(ctx, repoArg)
if err != nil {
return localGHUnsupported(err)
}
thread, err := a.localGHThread(ctx, repoValue, ghResourceKind(resource), number)
if err != nil {
if a.shouldAutoHydrateGHThread(err) {
owner, repoName, parseErr := parseOwnerRepo(repoValue)
if parseErr != nil {
return localGHUnsupported(parseErr)
}
if _, syncErr := a.syncRepository(ctx, owner, repoName, syncOptions{
Numbers: []int{number},
IncludePRDetails: resource == "pr",
}); syncErr != nil {
return localGHUnsupported(syncErr)
}
thread, err = a.localGHThread(ctx, repoValue, ghResourceKind(resource), number)
}
if err != nil {
if errors.Is(err, errLocalGHUnsupported) {
return err
}
return err
}
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
if jsonFields == "" {
jsonFields = "number,title,state,url"
}
row, err := a.ghThreadViewJSONRow(ctx, repoValue, thread, jsonFields)
if err != nil {
return localGHUnsupported(err)
}
return a.writeJSONValue(row, strings.TrimSpace(*jqRaw))
}
_, err = fmt.Fprintf(a.Stdout, "title:\t%s\nstate:\t%s\nurl:\t%s\n\n%s\n", thread.Title, thread.State, thread.HTMLURL, strings.TrimSpace(thread.Body))
return err
}
func (a *App) runGHThreadList(ctx context.Context, resource string, args []string) error {
fs := flag.NewFlagSet(resource+" list", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
stateRaw := fs.String("state", "open", "state")
limitRaw := fs.String("limit", "", "maximum rows")
limitShortRaw := fs.String("L", "", "maximum rows")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
searchRaw := fs.String("search", "", "local search query")
authorRaw := fs.String("author", "", "filter by author")
assigneeRaw := fs.String("assignee", "", "filter by assignee")
var labels stringListFlag
fs.Var(&labels, "label", "filter by label")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "jq": true,
"search": true, "author": true, "assignee": true, "label": true,
})); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("unexpected gh %s list arguments: %s", resource, strings.Join(fs.Args(), " ")))
}
if err := validateGHSearchState(strings.TrimSpace(*stateRaw)); err != nil {
return usageErr(err)
}
limit, err := parseGHSearchLimit(*limitRaw, *limitShortRaw)
if err != nil {
return usageErr(err)
}
repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong))
if err != nil {
return localGHUnsupported(err)
}
threads, err := a.localGHThreads(ctx, ghThreadListRequest{
Repo: repoValue,
Kind: ghResourceKind(resource),
State: strings.TrimSpace(*stateRaw),
Query: strings.TrimSpace(*searchRaw),
Author: strings.TrimSpace(*authorRaw),
Assignee: strings.TrimSpace(*assigneeRaw),
Labels: labels.Values(),
Limit: limit,
})
if err != nil {
return err
}
if len(threads) == 0 && ghThreadListNeedsLiveEmptyCheck(ghThreadListRequest{
Kind: ghResourceKind(resource),
State: strings.TrimSpace(*stateRaw),
Query: strings.TrimSpace(*searchRaw),
Author: strings.TrimSpace(*authorRaw),
Assignee: strings.TrimSpace(*assigneeRaw),
Labels: labels.Values(),
}) {
fresh, err := a.localGHThreadListHasBroadSync(ctx, repoValue, strings.TrimSpace(*stateRaw))
if err != nil {
return err
}
if !fresh {
return localGHUnsupported(fmt.Errorf("empty local %s list has no broad %s sync", resource, ghDefaultListState(*stateRaw)))
}
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
if jsonFields == "" {
jsonFields = "number,title,state,url"
}
rows, err := ghSearchJSONRows(threads, jsonFields)
if err != nil {
return localGHUnsupported(err)
}
return a.writeJSONValue(rows, strings.TrimSpace(*jqRaw))
}
for _, thread := range threads {
if _, err := fmt.Fprintf(a.Stdout, "%d\t%s\t%s\n", thread.Number, thread.Title, thread.HTMLURL); err != nil {
return err
}
}
return nil
}
func (a *App) localGHThread(ctx context.Context, repoValue, kind string, number int) (store.Thread, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return store.Thread{}, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return store.Thread{}, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return store.Thread{}, localGHUnsupported(err)
}
threads, err := rt.Store.ListThreadsFiltered(ctx, store.ThreadListOptions{
RepoID: repo.ID,
IncludeClosed: true,
Numbers: []int{number},
})
if err != nil {
return store.Thread{}, err
}
for _, thread := range threads {
if thread.Number == number && thread.Kind == kind {
return thread, nil
}
}
return store.Thread{}, localGHUnsupported(fmt.Errorf("thread #%d was not found in local cache", number))
}
type ghThreadListRequest struct {
Repo string
Kind string
State string
Query string
Author string
Assignee string
Labels []string
Limit int
}
func (a *App) localGHThreads(ctx context.Context, req ghThreadListRequest) ([]store.Thread, error) {
owner, repoName, err := parseOwnerRepo(req.Repo)
if err != nil {
return nil, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return nil, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return nil, localGHUnsupported(err)
}
return rt.Store.SearchThreads(ctx, store.ThreadSearchOptions{
RepoID: repo.ID,
Query: req.Query,
Kind: req.Kind,
State: req.State,
Author: req.Author,
Assignee: req.Assignee,
Labels: req.Labels,
IncludeLocallyClosed: true,
Limit: req.Limit,
})
}
func ghThreadListNeedsLiveEmptyCheck(req ghThreadListRequest) bool {
if req.Kind != "issue" || strings.TrimSpace(req.Query) != "" || strings.TrimSpace(req.Author) != "" || strings.TrimSpace(req.Assignee) != "" || len(req.Labels) > 0 {
return false
}
return ghDefaultListState(req.State) == "open"
}
func (a *App) localGHThreadListHasBroadSync(ctx context.Context, repoValue, state string) (bool, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return false, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return false, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return false, localGHUnsupported(err)
}
lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state)
if err != nil {
return false, err
}
return !lastSync.IsZero(), nil
}
func (a *App) resolveGHRepo(ctx context.Context, explicit string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return strings.TrimSpace(explicit), nil
}
if envRepo := strings.TrimSpace(os.Getenv("GH_REPO")); envRepo != "" {
return envRepo, nil
}
cmd := exec.CommandContext(ctx, "git", "remote", "get-url", "origin")
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("repository is required outside a git checkout; pass -R owner/repo")
}
repo, err := ownerRepoFromGitRemote(strings.TrimSpace(string(out)))
if err != nil {
return "", err
}
return repo, nil
}
func (a *App) execRealGH(ctx context.Context, args []string) error {
ghPath, err := resolveRealGHPath()
if err != nil {
return err
}
cmd := exec.CommandContext(ctx, ghPath, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = a.Stdout
cmd.Stderr = a.Stderr
return cmd.Run()
}
func (a *App) writeJSONValue(value any, jqExpr string) error {
data, err := json.MarshalIndent(value, "", " ")
if err != nil {
return err
}
if strings.TrimSpace(jqExpr) == "" {
_, err = fmt.Fprintf(a.Stdout, "%s\n", data)
return err
}
jqPath, err := exec.LookPath("jq")
if err != nil {
return localGHUnsupported(fmt.Errorf("--jq requires jq executable"))
}
cmd := exec.Command(jqPath, jqExpr)
cmd.Stdin = bytes.NewReader(data)
cmd.Stdout = a.Stdout
cmd.Stderr = a.Stderr
return cmd.Run()
}
func ghResourceKind(resource string) string {
if resource == "pr" {
return "pull_request"
}
return "issue"
}
func parseThreadNumber(value string) (int, error) {
return parseOptionalThreadNumber(value)
}
func ownerRepoFromGitRemote(value string) (string, error) {
value = strings.TrimSuffix(strings.TrimSpace(value), ".git")
value = strings.TrimPrefix(value, "git@github.com:")
if strings.HasPrefix(value, "https://github.com/") {
value = strings.TrimPrefix(value, "https://github.com/")
}
if strings.HasPrefix(value, "ssh://git@github.com/") {
value = strings.TrimPrefix(value, "ssh://git@github.com/")
}
parts := strings.Split(value, "/")
if len(parts) < 2 {
return "", fmt.Errorf("could not infer owner/repo from origin remote")
}
repo := filepath.Join(parts[len(parts)-2], parts[len(parts)-1])
return strings.ReplaceAll(repo, string(os.PathSeparator), "/"), nil
}
var errLocalGHUnsupported = errors.New("local gh shim unsupported")
func localGHUnsupported(err error) error {
if err == nil {
return errLocalGHUnsupported
}
return fmt.Errorf("%w: %v", errLocalGHUnsupported, err)
}
func isLocalGHUnsupported(err error) bool {
return errors.Is(err, errLocalGHUnsupported) || strings.Contains(err.Error(), "unsupported --json field")
}
type stringListFlag []string
func (f *stringListFlag) String() string {
return strings.Join(*f, ",")
}
func (f *stringListFlag) Set(value string) error {
*f = append(*f, strings.TrimSpace(value))
return nil
}
func (f *stringListFlag) Values() []string {
values := make([]string, 0, len(*f))
for _, value := range *f {
if trimmed := strings.TrimSpace(value); trimmed != "" {
values = append(values, trimmed)
}
}
return values
}

View File

@ -0,0 +1,132 @@
package cli
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"os"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/store"
)
const ghPRDetailFreshness = 90 * time.Second
func (a *App) ensureFreshGHPullRequestCache(ctx context.Context, repoValue string, number int) (store.PullRequestCache, error) {
return a.loadGHPullRequestCache(ctx, repoValue, number, true)
}
func (a *App) loadGHPullRequestCache(ctx context.Context, repoValue string, number int, requireFresh bool) (store.PullRequestCache, error) {
cache, err := a.localGHPullRequestCache(ctx, repoValue, number)
if err == nil && (!requireFresh || ghPullRequestCacheFresh(cache)) {
return cache, nil
}
if !a.shouldAutoHydrateGHPRDetails(err) {
return cache, err
}
owner, repoName, parseErr := parseOwnerRepo(repoValue)
if parseErr != nil {
return store.PullRequestCache{}, parseErr
}
if _, syncErr := a.syncRepository(ctx, owner, repoName, syncOptions{
Numbers: []int{number},
IncludePRDetails: true,
}); syncErr != nil {
return store.PullRequestCache{}, localGHUnsupported(syncErr)
}
return a.localGHPullRequestCache(ctx, repoValue, number)
}
func ghPRFieldsNeedFresh(fields []string) bool {
for _, field := range fields {
switch field {
case "statusCheckRollup", "mergeStateStatus":
return true
}
}
return false
}
func (a *App) shouldAutoHydrateGHPRDetails(err error) bool {
return a.shouldAutoHydrateGHThread(err)
}
func (a *App) shouldAutoHydrateGHThread(err error) bool {
if strings.EqualFold(strings.TrimSpace(os.Getenv("GITCRAWL_GH_AUTO_HYDRATE")), "0") {
return false
}
if err == nil {
return true
}
return isMissingLocalPRCache(err) || errors.Is(err, errLocalGHUnsupported)
}
func ghPullRequestCacheFresh(cache store.PullRequestCache) bool {
if rawHead := ghPRHeadSHAFromRawJSON(cache.Detail.RawJSON); rawHead != "" && !strings.EqualFold(cache.Detail.HeadSHA, rawHead) {
return false
}
parsed, err := time.Parse(time.RFC3339Nano, cache.Detail.FetchedAt)
if err != nil {
return false
}
return time.Since(parsed) <= ghPRDetailFreshness
}
func isMissingLocalPRCache(err error) bool {
if err == nil {
return false
}
return errors.Is(err, sql.ErrNoRows) ||
strings.Contains(err.Error(), "pull request detail") ||
strings.Contains(err.Error(), "was not found")
}
func (a *App) findGHPullRequestNumberByBranch(ctx context.Context, repoValue, branch string) (int, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return 0, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return 0, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return 0, localGHUnsupported(err)
}
threads, err := rt.Store.SearchThreads(ctx, store.ThreadSearchOptions{
RepoID: repo.ID,
Kind: "pull_request",
State: "open",
IncludeLocallyClosed: true,
Limit: 100,
})
if err != nil {
return 0, err
}
for _, thread := range threads {
if branch == ghPRHeadRefFromRawJSON(thread.RawJSON) {
return thread.Number, nil
}
if cache, cacheErr := rt.Store.PullRequestCache(ctx, repo.ID, thread.Number); cacheErr == nil && branch == cache.Detail.HeadRef {
return thread.Number, nil
}
}
return 0, localGHUnsupported(fmt.Errorf("cached PR branch %q was not found", branch))
}
func ghPRHeadRefFromRawJSON(raw string) string {
var payload struct {
Head struct {
Ref string `json:"ref"`
} `json:"head"`
}
if err := json.Unmarshal([]byte(raw), &payload); err != nil {
return ""
}
return strings.TrimSpace(payload.Head.Ref)
}

View File

@ -0,0 +1,438 @@
package cli
import (
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/config"
)
func (a *App) execRealGHMaybeCached(ctx context.Context, args []string) error {
if !cacheableGHRead(args) {
err := a.execRealGH(ctx, args)
if err == nil && mutatingGHCommand(args) {
_ = a.incrementGHXCacheCounter("pass_through_writes")
_ = a.clearGHCommandCacheForMutation(ctx, args)
}
return err
}
cacheDir, err := a.ghCommandCacheDir()
if err != nil {
return a.execRealGH(ctx, args)
}
ttl := a.ghCommandCacheTTL(ctx, args)
entryPath := filepath.Join(cacheDir, a.ghCommandCacheKey(ctx, args)+".json")
staleEntry, hasStaleEntry := readGHCommandCacheEntry(entryPath)
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
_ = a.incrementGHXCacheCounter("fallback_hits")
return a.writeGHCommandCacheEntry(entry)
}
lockPath := entryPath + ".lock"
lock, locked := tryGHCommandCacheLock(lockPath)
if !locked {
if entry, hit, ok := waitGHCommandCache(entryPath, lockPath, ttl, staleEntry, hasStaleEntry); ok {
_ = a.incrementGHXCacheCounter(hit)
return a.writeGHCommandCacheEntry(entry)
}
lock, locked = tryGHCommandCacheLock(lockPath)
}
if locked {
defer func() {
_ = lock.Close()
_ = os.Remove(lockPath)
}()
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
_ = a.incrementGHXCacheCounter("fallback_hits")
return a.writeGHCommandCacheEntry(entry)
}
}
stdout, stderr, exitCode, err := a.captureRealGH(ctx, args)
_ = a.incrementGHXCacheBackendMiss(args)
if err != nil && hasStaleEntry && ghCommandCacheEntryCanServeStale(staleEntry, ttl) && ghCommandOutputLooksRateLimited(stdout, stderr) {
_ = a.incrementGHXCacheCounter("stale_hits")
_, _ = fmt.Fprintf(a.Stderr, "gitcrawl: GitHub rate limited; serving stale cached gh response from %s ago\n", time.Since(staleEntry.CreatedAt).Round(time.Second))
return a.writeGHCommandCacheEntry(staleEntry)
}
if err == nil || cacheGHReadErrors() {
_ = writeGHCommandCache(entryPath, ghCommandCacheEntry{
CreatedAt: time.Now().UTC(),
Args: append([]string(nil), args...),
Tags: a.ghCommandCacheTags(ctx, args),
ExitCode: exitCode,
Stdout: stdout,
Stderr: stderr,
})
}
_, _ = io.WriteString(a.Stdout, stdout)
_, _ = io.WriteString(a.Stderr, stderr)
return err
}
func cacheGHReadErrors() bool {
return !strings.EqualFold(strings.TrimSpace(os.Getenv("GITCRAWL_GH_CACHE_ERRORS")), "0")
}
func (a *App) captureRealGH(ctx context.Context, args []string) (string, string, int, error) {
ghPath, err := resolveRealGHPath()
if err != nil {
return "", "", 127, err
}
var stdout, stderr bytes.Buffer
cmd := exec.CommandContext(ctx, ghPath, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err = cmd.Run()
exitCode := 0
if err != nil {
exitCode = 1
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
exitCode = exitErr.ExitCode()
}
}
return stdout.String(), stderr.String(), exitCode, err
}
func (a *App) ghCommandCacheDir() (string, error) {
cfg, err := config.Load(a.configPath)
if err != nil {
cfg = config.Default()
}
dir := filepath.Join(cfg.CacheDir, "gh-shim")
if err := os.MkdirAll(dir, 0o755); err != nil {
return "", err
}
return dir, nil
}
func (a *App) clearGHCommandCache() error {
_, err := a.clearGHCommandCacheCount()
return err
}
func (a *App) clearGHCommandCacheCount() (int, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return 0, err
}
entries, err := os.ReadDir(dir)
if err != nil {
return 0, err
}
removed := 0
for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".lock") || isGHCommandCacheEntryFile(name) {
if err := os.Remove(filepath.Join(dir, entry.Name())); err == nil {
removed++
}
}
}
return removed, nil
}
const ghXCacheStatsFile = "_stats.json"
func isGHCommandCacheEntryFile(name string) bool {
return strings.HasSuffix(name, ".json") && !strings.HasPrefix(name, "_")
}
type ghCommandCacheEntry struct {
CreatedAt time.Time `json:"created_at"`
Args []string `json:"args"`
Tags []string `json:"tags,omitempty"`
ExitCode int `json:"exit_code"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
}
func (a *App) writeGHCommandCacheEntry(entry ghCommandCacheEntry) error {
_, _ = io.WriteString(a.Stdout, entry.Stdout)
_, _ = io.WriteString(a.Stderr, entry.Stderr)
if entry.ExitCode != 0 {
return fmt.Errorf("cached gh command failed with exit code %d", entry.ExitCode)
}
return nil
}
func readGHCommandCache(path string, ttl time.Duration) (ghCommandCacheEntry, bool) {
entry, ok := readGHCommandCacheEntry(path)
if !ok {
return ghCommandCacheEntry{}, false
}
if entry.CreatedAt.IsZero() || time.Since(entry.CreatedAt) > ghCommandCacheEntryTTL(entry, ttl) {
return ghCommandCacheEntry{}, false
}
return entry, true
}
func readGHCommandCacheEntry(path string) (ghCommandCacheEntry, bool) {
data, err := os.ReadFile(path)
if err != nil {
return ghCommandCacheEntry{}, false
}
var entry ghCommandCacheEntry
if err := json.Unmarshal(data, &entry); err != nil {
return ghCommandCacheEntry{}, false
}
return entry, true
}
func ghCommandCacheEntryTTL(entry ghCommandCacheEntry, ttl time.Duration) time.Duration {
if entry.ExitCode == 0 {
if completedTTL := ghCompletedRunCacheTTL(entry); completedTTL > ttl {
return completedTTL
}
return ttl
}
errorTTL := 5 * time.Minute
if ghCommandCacheEntryLooksRateLimited(entry) {
errorTTL = 2 * time.Minute
}
if ttl > errorTTL {
return errorTTL
}
return ttl
}
func ghCommandCacheEntryCanServeStale(entry ghCommandCacheEntry, ttl time.Duration) bool {
if entry.ExitCode != 0 || entry.CreatedAt.IsZero() {
return false
}
age := time.Since(entry.CreatedAt)
if age <= ghCommandCacheEntryTTL(entry, ttl) {
return true
}
return age <= ghCommandCacheEntryTTL(entry, ttl)+ghCommandCacheStaleGrace(entry.Args)
}
func ghCommandCacheStaleGrace(args []string) time.Duration {
if raw := strings.TrimSpace(os.Getenv("GITCRAWL_GH_STALE_GRACE")); raw != "" {
if duration, err := time.ParseDuration(raw); err == nil && duration >= 0 {
return duration
}
}
if len(args) == 0 {
return 5 * time.Minute
}
switch args[0] {
case "run":
return 2 * time.Minute
case "api":
route := normalizeGHAPIRoute(args[1:])
switch {
case strings.Contains(route, "/actions/runs"):
return 2 * time.Minute
case strings.Contains(route, "/pages"):
return 30 * time.Minute
case strings.Contains(route, "/contents"):
return 6 * time.Hour
case strings.HasPrefix(route, "api users/"):
return 24 * time.Hour
}
case "release", "workflow", "repo":
return 30 * time.Minute
}
return 10 * time.Minute
}
func ghCommandCacheEntryLooksRateLimited(entry ghCommandCacheEntry) bool {
return ghCommandOutputLooksRateLimited(entry.Stdout, entry.Stderr)
}
func ghCommandOutputLooksRateLimited(stdout, stderr string) bool {
text := strings.ToLower(stdout + "\n" + stderr)
return strings.Contains(text, "api rate limit") ||
strings.Contains(text, "secondary rate limit") ||
strings.Contains(text, "rate limit exceeded") ||
strings.Contains(text, "x-ratelimit-remaining")
}
func writeGHCommandCache(path string, entry ghCommandCacheEntry) error {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
data, err := json.Marshal(entry)
if err != nil {
return err
}
temp, err := os.CreateTemp(filepath.Dir(path), "."+filepath.Base(path)+".tmp-*")
if err != nil {
return err
}
tempPath := temp.Name()
cleanup := true
defer func() {
if cleanup {
_ = os.Remove(tempPath)
}
}()
if _, err := temp.Write(data); err != nil {
_ = temp.Close()
return err
}
if err := temp.Close(); err != nil {
return err
}
if err := os.Rename(tempPath, path); err != nil {
return err
}
cleanup = false
return nil
}
func tryGHCommandCacheLock(path string) (*os.File, bool) {
lock, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil {
return nil, false
}
_, _ = fmt.Fprintf(lock, "%d\n", os.Getpid())
return lock, true
}
func waitGHCommandCache(entryPath, lockPath string, ttl time.Duration, staleEntry ghCommandCacheEntry, hasStaleEntry bool) (ghCommandCacheEntry, string, bool) {
if hasStaleEntry && ghCommandCacheEntryCanServeStale(staleEntry, ttl) {
time.Sleep(250 * time.Millisecond)
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
return entry, "fallback_hits", true
}
if _, err := os.Stat(lockPath); err == nil {
return staleEntry, "stale_hits", true
}
}
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
time.Sleep(100 * time.Millisecond)
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
return entry, "fallback_hits", true
}
if _, err := os.Stat(lockPath); os.IsNotExist(err) {
return ghCommandCacheEntry{}, "", false
}
}
_ = os.Remove(lockPath)
return ghCommandCacheEntry{}, "", false
}
func (a *App) ghCommandCacheKey(ctx context.Context, args []string) string {
material := strings.Join([]string{
"v4",
config.ResolvePath(a.configPath),
ghCommandCacheScope(args),
os.Getenv("GH_HOST"),
ghCommandCacheRepoEnv(args),
a.ghCommandStableIdentity(ctx, args),
strings.Join(canonicalGHCommandArgs(args), "\x00"),
}, "\x00")
sum := sha256.Sum256([]byte(material))
return hex.EncodeToString(sum[:])
}
func ghCommandCacheScope(args []string) string {
if ghCommandHasOwnExplicitIdentity(args) {
return "explicit"
}
if os.Getenv("GH_REPO") != "" {
return "env-repo"
}
cwd, _ := os.Getwd()
return "cwd:" + cwd
}
func ghCommandCacheRepoEnv(args []string) string {
if ghCommandHasOwnExplicitIdentity(args) {
return ""
}
return os.Getenv("GH_REPO")
}
func ghCommandHasOwnExplicitIdentity(args []string) bool {
if len(args) == 0 {
return false
}
if args[0] == "api" {
return ghAPIPathArg(args[1:]) != ""
}
if hasGHExplicitRepoFlag(args) {
return true
}
if len(args) >= 3 && args[0] == "repo" && args[1] == "view" {
return firstGHPositionalArg(args[2:]) != ""
}
return false
}
func hasGHExplicitRepoFlag(args []string) bool {
for index := 0; index < len(args); index++ {
arg := args[index]
switch arg {
case "-R", "--repo":
return index+1 < len(args) && strings.TrimSpace(args[index+1]) != ""
default:
if strings.HasPrefix(arg, "--repo=") && strings.TrimSpace(strings.TrimPrefix(arg, "--repo=")) != "" {
return true
}
}
}
return false
}
func firstGHPositionalArg(args []string) string {
for index := 0; index < len(args); index++ {
arg := args[index]
if strings.HasPrefix(arg, "-") {
if !strings.Contains(arg, "=") && index+1 < len(args) {
index++
}
continue
}
return strings.TrimSpace(arg)
}
return ""
}
func (a *App) ghCommandStableIdentity(ctx context.Context, args []string) string {
if !isGHPRDiff(args) {
return ""
}
repo, number, ok := parseGHPRDiffIdentityArgs(args)
if !ok {
return ""
}
thread, err := a.localGHThread(ctx, repo, "pull_request", number)
if err != nil {
return ""
}
sha := ""
owner, repoName, err := parseOwnerRepo(repo)
if err == nil {
if rt, openErr := a.openLocalRuntimeReadOnly(ctx); openErr == nil {
if localRepo, repoErr := rt.repository(ctx, owner, repoName); repoErr == nil {
if cache, cacheErr := rt.Store.PullRequestCache(ctx, localRepo.ID, number); cacheErr == nil {
sha = cache.Detail.HeadSHA
}
}
_ = rt.Store.Close()
}
}
if sha == "" {
sha = ghPRHeadSHAFromRawJSON(thread.RawJSON)
}
if sha == "" {
return ""
}
return fmt.Sprintf("pr-diff:%s:%d:%s", repo, number, sha)
}

View File

@ -0,0 +1,535 @@
package cli
import (
"context"
"encoding/json"
"net/url"
"os"
"strings"
"time"
)
func cacheableGHRead(args []string) bool {
if len(args) == 0 || hasAnyGHFlag(args, "--web", "--browser", "--interactive") {
return false
}
switch args[0] {
case "api":
return ghAPIReadOnly(args[1:])
case "cache":
return len(args) >= 2 && args[1] == "list"
case "gist":
return len(args) >= 2 && (args[1] == "list" || args[1] == "view")
case "label":
return len(args) >= 2 && args[1] == "list"
case "org":
return len(args) >= 2 && args[1] == "list"
case "project":
return len(args) >= 2 && (args[1] == "list" || args[1] == "view" || args[1] == "field-list" || args[1] == "item-list")
case "run":
return len(args) >= 2 && (args[1] == "list" || args[1] == "view")
case "pr":
return len(args) >= 2 && (args[1] == "diff" || args[1] == "checks" || args[1] == "list" || args[1] == "status" || args[1] == "view")
case "issue":
return len(args) >= 2 && (args[1] == "list" || args[1] == "status" || args[1] == "view")
case "release":
return len(args) >= 2 && (args[1] == "list" || args[1] == "view")
case "repo":
return len(args) >= 2 && (args[1] == "view" || args[1] == "list")
case "ruleset":
return len(args) >= 2 && (args[1] == "check" || args[1] == "list" || args[1] == "view")
case "search":
return len(args) >= 2 && (args[1] == "code" || args[1] == "commits" || args[1] == "issues" || args[1] == "prs" || args[1] == "repos")
case "secret":
return len(args) >= 2 && args[1] == "list"
case "variable":
return len(args) >= 2 && (args[1] == "get" || args[1] == "list")
case "workflow":
return len(args) >= 2 && (args[1] == "list" || args[1] == "view")
default:
return false
}
}
func ghCommandName(args []string) string {
if len(args) == 0 {
return ""
}
if args[0] == "api" {
return "api"
}
if len(args) == 1 {
return args[0]
}
return args[0] + " " + args[1]
}
func ghAPIReadOnly(args []string) bool {
method := "GET"
path := ghAPIPathArg(args)
if path == "graphql" {
return ghGraphQLReadOnly(args)
}
for index := 0; index < len(args); index++ {
arg := args[index]
switch arg {
case "--input", "-F", "-f", "--field", "--raw-field":
return false
case "--method", "-X":
if index+1 >= len(args) {
return false
}
method = strings.ToUpper(args[index+1])
index++
default:
if strings.HasPrefix(arg, "--method=") {
method = strings.ToUpper(strings.TrimPrefix(arg, "--method="))
}
}
}
return method == "GET"
}
func ghGraphQLReadOnly(args []string) bool {
method := "POST"
query := ""
for index := 0; index < len(args); index++ {
arg := args[index]
switch arg {
case "--input":
return false
case "--method", "-X":
if index+1 >= len(args) {
return false
}
method = strings.ToUpper(args[index+1])
index++
case "-f", "-F", "--field", "--raw-field":
if index+1 >= len(args) {
return false
}
name, value, ok := strings.Cut(args[index+1], "=")
if ok && strings.HasPrefix(strings.TrimSpace(value), "@") {
return false
}
if ok && name == "query" {
query = value
}
index++
default:
for _, prefix := range []string{"-f=", "-F=", "--field=", "--raw-field="} {
if strings.HasPrefix(arg, prefix) {
name, value, ok := strings.Cut(strings.TrimPrefix(arg, prefix), "=")
if ok && strings.HasPrefix(strings.TrimSpace(value), "@") {
return false
}
if ok && name == "query" {
query = value
}
}
}
if strings.HasPrefix(arg, "--method=") {
method = strings.ToUpper(strings.TrimPrefix(arg, "--method="))
}
}
}
if method != "GET" && method != "POST" {
return false
}
query = strings.TrimSpace(query)
if query == "" || strings.HasPrefix(query, "@") {
return false
}
lower := strings.ToLower(query)
return strings.HasPrefix(lower, "query") || strings.HasPrefix(lower, "{")
}
func (a *App) ghCommandCacheTTL(ctx context.Context, args []string) time.Duration {
return ghCommandCacheTTLBase(args, a.ghCommandStableIdentity(ctx, args) != "")
}
func ghCommandCacheTTL(args []string) time.Duration {
return ghCommandCacheTTLBase(args, false)
}
func ghCommandCacheTTLBase(args []string, stablePRDiff bool) time.Duration {
if raw := strings.TrimSpace(os.Getenv("GITCRAWL_GH_CACHE_TTL")); raw != "" {
if duration, err := time.ParseDuration(raw); err == nil && duration > 0 {
return duration
}
}
if len(args) >= 2 {
if args[0] == "pr" && args[1] == "diff" {
if stablePRDiff {
return 7 * 24 * time.Hour
}
return 5 * time.Minute
}
if args[0] == "api" {
return ghAPICacheTTL(args[1:])
}
switch args[0] {
case "run":
return ghRunCacheTTL(args[1:])
case "workflow":
return 15 * time.Minute
case "search":
return 15 * time.Minute
case "release":
return 30 * time.Minute
case "repo", "ruleset":
return 15 * time.Minute
case "secret", "variable", "label", "org", "project", "gist", "cache":
return 10 * time.Minute
case "issue", "pr":
return 5 * time.Minute
}
}
return 5 * time.Minute
}
func ghRunCacheTTL(args []string) time.Duration {
if len(args) == 0 {
return 30 * time.Second
}
switch args[0] {
case "view":
if hasAnyGHFlag(args[1:], "--log", "--log-failed") {
return 12 * time.Hour
}
if hasAnyGHFlag(args[1:], "--job") {
return 1 * time.Minute
}
return 30 * time.Second
case "list":
return 30 * time.Second
default:
return 30 * time.Second
}
}
func ghAPICacheTTL(args []string) time.Duration {
route := normalizeGHAPIRoute(args)
switch {
case route == "api graphql":
return 6 * time.Hour
case strings.HasPrefix(route, "api users/"):
return 7 * 24 * time.Hour
case strings.Contains(route, "/contents"):
if ghAPIContentRefIsStable(args) {
return 7 * 24 * time.Hour
}
return 30 * time.Minute
case strings.Contains(route, "/pages/builds/latest"):
return 2 * time.Minute
case strings.Contains(route, "/pages/health"):
return 15 * time.Minute
case strings.Contains(route, "/pages"):
return 30 * time.Minute
case strings.Contains(route, "/actions/runs/:id/logs"):
return 12 * time.Hour
case strings.Contains(route, "/actions/jobs/:id/logs"):
return 12 * time.Hour
case strings.Contains(route, "/actions/runs/:id/jobs"):
return 1 * time.Minute
case strings.Contains(route, "/actions/jobs/:id"):
return 1 * time.Minute
case strings.Contains(route, "/pending_deployments"):
return 30 * time.Second
case strings.Contains(route, "/actions/runs/:id"):
return 30 * time.Second
case strings.Contains(route, "/actions/workflows/"):
return 15 * time.Minute
case strings.Contains(route, "/actions/runs"):
return 30 * time.Second
case strings.Contains(route, "/releases"):
return 1 * time.Hour
case strings.Contains(route, "/branches") || strings.Contains(route, "/commits"):
return 10 * time.Minute
default:
return 5 * time.Minute
}
}
func ghAPIContentRefIsStable(args []string) bool {
path := ghAPIPathArg(args)
_, rawQuery, found := strings.Cut(path, "?")
if !found {
return false
}
for _, part := range strings.Split(rawQuery, "&") {
name, value, ok := strings.Cut(part, "=")
if !ok || name != "ref" {
continue
}
value = strings.TrimSpace(value)
if decoded, err := url.QueryUnescape(value); err == nil {
value = strings.TrimSpace(decoded)
}
if len(value) == 40 && isHexString(value) {
return true
}
if ghAPIContentRefIsStableReleaseTag(value) {
return true
}
}
return false
}
func ghAPIContentRefIsStableReleaseTag(value string) bool {
value = strings.TrimSpace(value)
if strings.HasPrefix(value, "refs/heads/") {
return false
}
value = strings.TrimPrefix(value, "refs/tags/")
if strings.HasPrefix(value, "refs/") {
return false
}
if strings.HasPrefix(value, "v") {
value = strings.TrimPrefix(value, "v")
}
core := value
if before, _, found := strings.Cut(core, "+"); found {
core = before
}
if before, _, found := strings.Cut(core, "-"); found {
core = before
}
parts := strings.Split(core, ".")
if len(parts) != 3 {
return false
}
for _, part := range parts {
if !isDecimalString(part) {
return false
}
}
return true
}
func isGHPRDiff(args []string) bool {
return len(args) >= 2 && args[0] == "pr" && args[1] == "diff"
}
func parseGHPRDiffIdentityArgs(args []string) (string, int, bool) {
if !isGHPRDiff(args) {
return "", 0, false
}
var repo string
var number int
for index := 2; index < len(args); index++ {
arg := args[index]
switch arg {
case "-R", "--repo":
if index+1 >= len(args) {
return "", 0, false
}
repo = strings.TrimSpace(args[index+1])
index++
default:
if strings.HasPrefix(arg, "--repo=") {
repo = strings.TrimSpace(strings.TrimPrefix(arg, "--repo="))
continue
}
if strings.HasPrefix(arg, "-") || number != 0 {
continue
}
if ref, ok := parseThreadReference(arg); ok && ref.FullName() != "" && repo == "" {
repo = ref.FullName()
}
parsed, err := parseThreadNumber(arg)
if err != nil {
return "", 0, false
}
number = parsed
}
}
if repo == "" {
if envRepo := strings.TrimSpace(os.Getenv("GH_REPO")); envRepo != "" {
repo = envRepo
}
}
return repo, number, repo != "" && number > 0
}
func ghPRHeadSHAFromRawJSON(raw string) string {
var payload struct {
Head struct {
SHA string `json:"sha"`
} `json:"head"`
}
if err := json.Unmarshal([]byte(raw), &payload); err != nil {
return ""
}
return strings.TrimSpace(payload.Head.SHA)
}
func normalizeGHAPIRoute(args []string) string {
path := ghAPIPathArg(args)
path = strings.TrimPrefix(path, "https://api.github.com/")
path = strings.TrimPrefix(path, "http://api.github.com/")
path = strings.TrimPrefix(path, "/")
if before, _, found := strings.Cut(path, "?"); found {
path = before
}
if path == "" {
return "api"
}
parts := strings.Split(path, "/")
for index, part := range parts {
if part == "" {
continue
}
if index >= 4 && len(parts) > 3 && parts[3] == "contents" {
parts = append(parts[:4], ":path")
break
}
if index >= 5 && len(parts) > 4 && parts[3] == "git" && parts[4] == "ref" {
parts = append(parts[:5], ":ref")
break
}
switch {
case isDecimalString(part):
parts[index] = ":id"
case index >= 2 && parts[index-2] == "repos":
// Preserve owner/repo placeholders without leaking every repo into the route cardinality.
parts[index-1] = ":owner"
parts[index] = ":repo"
}
}
return "api " + strings.Join(parts, "/")
}
func ghAPIPathArg(args []string) string {
for index := 0; index < len(args); index++ {
arg := args[index]
switch arg {
case "-X", "--method":
index++
continue
case "--paginate":
continue
case "-H", "--header", "--hostname", "--jq", "-q", "--preview", "--template", "-t", "--input":
if index+1 < len(args) && !strings.Contains(arg, "=") {
index++
}
continue
case "-f", "-F", "--field", "--raw-field":
index++
continue
default:
if strings.HasPrefix(arg, "-") {
continue
}
return strings.TrimSpace(arg)
}
}
return ""
}
func isDecimalString(value string) bool {
if value == "" {
return false
}
for _, r := range value {
if r < '0' || r > '9' {
return false
}
}
return true
}
func isHexString(value string) bool {
if value == "" {
return false
}
for _, r := range value {
if (r < '0' || r > '9') && (r < 'a' || r > 'f') && (r < 'A' || r > 'F') {
return false
}
}
return true
}
func mutatingGHCommand(args []string) bool {
if len(args) < 2 {
return false
}
switch args[0] {
case "cache":
return args[1] == "delete"
case "gist":
switch args[1] {
case "create", "delete", "edit":
return true
}
case "issue":
switch args[1] {
case "close", "comment", "create", "delete", "edit", "lock", "pin", "reopen", "transfer", "unlock", "unpin":
return true
}
case "label":
switch args[1] {
case "clone", "create", "delete", "edit":
return true
}
case "pr":
switch args[1] {
case "checkout":
return false
case "close", "comment", "create", "edit", "lock", "merge", "ready", "reopen", "review", "unlock":
return true
}
case "project":
switch args[1] {
case "close", "copy", "create", "delete", "edit", "field-create", "field-delete", "item-add", "item-archive", "item-create", "item-delete", "item-edit", "link", "mark-template", "unlink":
return true
}
case "release":
switch args[1] {
case "create", "delete", "delete-asset", "edit", "upload":
return true
}
case "repo":
switch args[1] {
case "archive", "create", "delete", "edit", "fork", "rename", "sync":
return true
}
case "ruleset":
return args[1] == "delete"
case "run":
switch args[1] {
case "cancel", "delete", "rerun":
return true
}
case "secret":
switch args[1] {
case "delete", "remove", "set":
return true
}
case "variable":
switch args[1] {
case "delete", "remove", "set":
return true
}
case "workflow":
switch args[1] {
case "disable", "enable", "run":
return true
}
case "api":
return !ghAPIReadOnly(args[1:])
}
return false
}
func hasAnyGHFlag(args []string, flags ...string) bool {
for _, arg := range args {
for _, flag := range flags {
if arg == flag || strings.HasPrefix(arg, flag+"=") {
return true
}
}
}
return false
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,360 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestGHCacheDescriptorAndPolicyBranches(t *testing.T) {
if got := canonicalGHCommandArgs(nil); got != nil {
t.Fatalf("nil canonical args = %+v", got)
}
canonical := canonicalGHCommandArgs([]string{"pr", "view", "12", "--json", "title,number", "-R", " openclaw/openclaw ", "--method", "get", "--flag"})
if strings.Join(canonical, " ") != "pr view 12 --flag --json=number,title --method=GET --repo=openclaw/openclaw" {
t.Fatalf("canonical args = %+v", canonical)
}
if got := canonicalGHCommandArgs([]string{"pr", "view", "--repo"}); strings.Join(got, " ") != "pr view --repo" {
t.Fatalf("missing value canonical args = %+v", got)
}
if !ghCacheTagsMatch([]string{"repo:openclaw/openclaw", "issues"}, stringSet([]string{"issues", "repo:openclaw/openclaw"})) {
t.Fatal("specific issue tag should match")
}
if ghCacheTagsMatch([]string{"repo:openclaw/openclaw"}, stringSet([]string{"repo:openclaw/openclaw", "issues"})) {
t.Fatal("repo tag alone should not match specific mutation")
}
app := New()
t.Setenv("GH_REPO", "openclaw/from-env")
tagCases := [][]string{
app.ghCommandCacheTags(context.Background(), []string{"issue", "view", "https://github.com/openclaw/openclaw/issues/10", "-R", "openclaw/openclaw"}),
app.ghCommandCacheTags(context.Background(), []string{"pr", "view", "12"}),
app.ghMutationInvalidationTags(context.Background(), []string{"run", "rerun", "99", "-R", "openclaw/openclaw"}),
app.ghCommandCacheTags(context.Background(), []string{"workflow", "view", "ci.yml", "-R", "openclaw/openclaw"}),
app.ghCommandCacheTags(context.Background(), []string{"release", "view", "v0.7.0", "-R", "openclaw/openclaw"}),
app.ghCommandCacheTags(context.Background(), []string{"api", "repos/openclaw/openclaw/actions/runs/99/jobs"}),
app.ghMutationInvalidationTags(context.Background(), []string{"cache", "delete"}),
}
for _, tags := range tagCases {
if len(tags) == 0 {
t.Fatalf("empty tags")
}
}
if repo := ghCommandRepo([]string{"repo", "view", "openclaw/openclaw"}); repo != "openclaw/openclaw" {
t.Fatalf("repo view repo = %q", repo)
}
if repo := ghAPIRepo([]string{"https://api.github.com/repos/openclaw/openclaw/issues/10"}); repo != "openclaw/openclaw" {
t.Fatalf("api repo = %q", repo)
}
if tags := ghAPITags([]string{"repos/openclaw/openclaw/releases/latest"}); len(tags) < 2 || tags[1] != "releases" {
t.Fatalf("release api tags = %+v", tags)
}
if got := firstGHNumberArg([]string{"--repo", "openclaw/openclaw", "https://github.com/openclaw/openclaw/pull/12"}); got != "12" {
t.Fatalf("first number = %q", got)
}
if got := uniqueStrings([]string{"", "a", " a ", "b"}); len(got) != 2 || got[0] != "a" || got[1] != "b" {
t.Fatalf("unique = %+v", got)
}
completedRun := ghCommandCacheEntry{Args: []string{"run", "view", "99"}, Stdout: `{"status":"completed"}`}
if ttl := ghCompletedRunCacheTTL(completedRun); ttl != 12*time.Hour {
t.Fatalf("run view ttl = %s", ttl)
}
completedList := ghCommandCacheEntry{Args: []string{"api", "repos/openclaw/openclaw/actions/runs"}, Stdout: `{"workflow_runs":[{"status":"completed"}]}`}
if ttl := ghCompletedRunCacheTTL(completedList); ttl != 30*time.Minute {
t.Fatalf("run list ttl = %s", ttl)
}
jobs := ghCommandCacheEntry{Args: []string{"api", "repos/openclaw/openclaw/actions/runs/99/jobs"}, Stdout: `{"jobs":[{"conclusion":"success"}]}`}
if ttl := ghCompletedRunCacheTTL(jobs); ttl != 12*time.Hour {
t.Fatalf("jobs ttl = %s", ttl)
}
if ghJSONStatusCompleted(`{`) || ghJSONCollectionCompleted(`[]`) || allGHStatusMapsCompleted([]map[string]any{{"status": "queued"}}) {
t.Fatal("incomplete JSON status classified as completed")
}
if !cacheableGHRead([]string{"label", "list"}) || !cacheableGHRead([]string{"org", "list"}) || !cacheableGHRead([]string{"search", "repos"}) {
t.Fatal("expected read-only gh commands to be cacheable")
}
if ghCommandName(nil) != "" || ghCommandName([]string{"pr"}) != "pr" || ghCommandName([]string{"api", "repos/x/y"}) != "api" {
t.Fatal("gh command name mismatch")
}
if ghRunCacheTTL(nil) != 30*time.Second || ghRunCacheTTL([]string{"view", "--job", "1"}) != time.Minute || ghRunCacheTTL([]string{"rerun"}) != 30*time.Second {
t.Fatal("run ttl mismatch")
}
if ttl := ghAPICacheTTL([]string{"repos/openclaw/openclaw/actions/runs/99/jobs"}); ttl != time.Minute {
t.Fatalf("jobs ttl = %s", ttl)
}
if ttl := ghAPICacheTTL([]string{"repos/openclaw/openclaw/contents/file?ref=main"}); ttl != 30*time.Minute {
t.Fatalf("unstable content ttl = %s", ttl)
}
if !ghAPIContentRefIsStableReleaseTag("refs/tags/v1.2.3") || !ghAPIContentRefIsStableReleaseTag("v1.2.3+build") || ghAPIContentRefIsStableReleaseTag("v1.2") {
t.Fatal("version ref classification mismatch")
}
}
func TestPortableRuntimeHelperBranches(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
root := filepath.Join(dir, "store")
dbPath := filepath.Join(root, "data", "openclaw__openclaw.sync.db")
if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
t.Fatalf("mkdir db dir: %v", err)
}
if err := os.Mkdir(filepath.Join(root, ".git"), 0o755); err != nil {
t.Fatalf("mkdir git dir: %v", err)
}
if err := os.WriteFile(dbPath, []byte("db-v1"), 0o644); err != nil {
t.Fatalf("write db: %v", err)
}
app := New()
app.configPath = filepath.Join(dir, "config.toml")
mirror, err := app.portableRuntimeDBPath(dbPath)
if err != nil {
t.Fatalf("runtime path: %v", err)
}
changed, err := refreshPortableRuntimeDB(ctx, dbPath, mirror, false)
if err != nil || !changed {
t.Fatalf("initial runtime copy changed=%v err=%v", changed, err)
}
changed, err = refreshPortableRuntimeDB(ctx, dbPath, mirror, false)
if err != nil || changed {
t.Fatalf("second runtime copy changed=%v err=%v", changed, err)
}
if needs, err := portableRuntimeNeedsCopy(filepath.Join(dir, "missing.db"), mirror); err == nil || needs {
t.Fatalf("missing source needs=%v err=%v", needs, err)
}
if _, ok := portableStoreRoot(filepath.Join(dir, "plain", "db.sqlite")); ok {
t.Fatal("plain db should not have portable root")
}
if gitWorktreeClean(ctx, root) {
t.Fatal("fake git directory should not be a clean worktree")
}
statePath := portableStoreRefreshStatePath(mirror)
state := portableStoreRefreshState{LastSuccess: time.Now().UTC().Format(time.RFC3339Nano)}
if err := writePortableStoreRefreshState(statePath, state); err != nil {
t.Fatalf("write state: %v", err)
}
if got := readPortableStoreRefreshState(statePath); got.LastSuccess == "" {
t.Fatalf("read state = %+v", got)
}
if got := readPortableStoreRefreshState(filepath.Join(dir, "missing.json")); got.LastSuccess != "" {
t.Fatalf("missing state = %+v", got)
}
if !recentPortableRefresh(state.LastSuccess, time.Now().UTC(), time.Hour) || recentPortableRefresh("bad", time.Now().UTC(), time.Hour) || recentPortableRefresh("", time.Now().UTC(), time.Hour) {
t.Fatal("recent refresh classification mismatch")
}
t.Setenv("GITCRAWL_PORTABLE_REFRESH_TTL", "0")
if portableStoreRefreshInterval() != 0 {
t.Fatal("zero refresh ttl not honored")
}
if err := copyFileAtomic(filepath.Join(dir, "missing"), filepath.Join(dir, "out", "db")); err == nil {
t.Fatal("missing source copy should fail")
}
}
func TestGHCacheClearMatchingBranches(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
app := New()
app.configPath = configPath
dir, err := app.ghCommandCacheDir()
if err != nil {
t.Fatalf("cache dir: %v", err)
}
entry := ghCommandCacheEntry{
Args: []string{"issue", "view", "10", "-R", "openclaw/openclaw"},
Stdout: "{}",
Stderr: "",
ExitCode: 0,
CreatedAt: time.Now(),
Tags: []string{"repo:openclaw/openclaw", "issues", "issue:10"},
}
data, err := json.Marshal(entry)
if err != nil {
t.Fatalf("marshal entry: %v", err)
}
entryPath := filepath.Join(dir, "entry.json")
if err := os.WriteFile(entryPath, data, 0o644); err != nil {
t.Fatalf("write entry: %v", err)
}
if err := os.WriteFile(filepath.Join(dir, "entry.lock"), []byte("lock"), 0o644); err != nil {
t.Fatalf("write lock: %v", err)
}
if err := os.WriteFile(filepath.Join(dir, "ignore.txt"), []byte("x"), 0o644); err != nil {
t.Fatalf("write ignored entry: %v", err)
}
if err := app.clearGHCommandCacheMatching([]string{"issue:10"}); err != nil {
t.Fatalf("clear matching: %v", err)
}
if _, err := os.Stat(entryPath); !os.IsNotExist(err) {
t.Fatalf("entry still exists: %v", err)
}
if _, err := os.Stat(filepath.Join(dir, "entry.lock")); !os.IsNotExist(err) {
t.Fatalf("lock still exists: %v", err)
}
if err := os.WriteFile(entryPath, data, 0o644); err != nil {
t.Fatalf("rewrite entry: %v", err)
}
if err := app.clearGHCommandCacheForMutation(ctx, []string{"cache", "delete"}); err != nil {
t.Fatalf("clear global mutation: %v", err)
}
if _, err := os.Stat(entryPath); !os.IsNotExist(err) {
t.Fatalf("global clear left entry: %v", err)
}
if cfg.CacheDir == "" {
t.Fatal("seed config cache dir should not be empty")
}
}
func TestGHMetricsSearchRunsAndXCacheBranches(t *testing.T) {
var counters ghXCacheCounters
if incrementGHXCacheCounters(&counters, "unknown", nil) {
t.Fatal("unknown counter should not increment")
}
for _, name := range []string{"local_hits", "fallback_hits", "stale_hits", "backend_misses", "pass_through_writes"} {
if !incrementGHXCacheCounters(&counters, name, []string{"api", "repos/openclaw/openclaw/actions/runs/99"}) {
t.Fatalf("counter %s did not increment", name)
}
}
var bucket ghXCacheCounterBucket
for _, name := range []string{"local_hits", "fallback_hits", "stale_hits", "backend_misses", "pass_through_writes"} {
if !incrementGHXCacheCounterBucket(&bucket, name, []string{"pr", "view", "12", "-R", "openclaw/openclaw"}) {
t.Fatalf("bucket counter %s did not increment", name)
}
}
if incrementGHXCacheCounterBucket(&bucket, "bad", nil) {
t.Fatal("unknown bucket counter should not increment")
}
if got := ghCommandMissKey([]string{"pr", "view", strings.Repeat("x", 220)}); len(got) != 180 || !strings.HasSuffix(got, "...") {
t.Fatalf("miss key = %q len=%d", got, len(got))
}
if route := ghCommandRoute([]string{"api", "repos/openclaw/openclaw/actions/runs/99"}); !strings.Contains(route, "/actions/runs/:id") {
t.Fatalf("api route = %q", route)
}
if route := ghCommandRoute([]string{"pr"}); route != "pr" {
t.Fatalf("single route = %q", route)
}
now := time.Now().UTC()
counters.Hourly = map[string]ghXCacheCounterBucket{
"old": {StartedAt: now.Add(-2 * time.Hour), LocalHits: 9},
"new": {StartedAt: now.Add(-5 * time.Minute), LocalHits: 1, BackendMissesByCommand: map[string]int64{"api": 2}},
"zero": {LocalHits: 8},
}
recent := counters.since(time.Hour, now)
if recent.LocalHits != 1 || recent.BackendMissesByCommand["api"] != 2 {
t.Fatalf("recent counters = %+v", recent)
}
mergeCounterMap(&recent.BackendMissesByRoute, map[string]int64{"r": 3})
if recent.BackendMissesByRoute["r"] != 3 {
t.Fatalf("merged counters = %+v", recent.BackendMissesByRoute)
}
buckets := map[string]ghXCacheCounterBucket{"old": {StartedAt: now.Add(-8 * 24 * time.Hour)}, "new": {StartedAt: now}}
pruneGHXCacheBuckets(buckets, now.Add(-7*24*time.Hour))
if _, ok := buckets["old"]; ok || buckets["new"].StartedAt.IsZero() {
t.Fatalf("pruned buckets = %+v", buckets)
}
if _, start := ghXCacheCurrentBucket(now); !start.Equal(now.Truncate(time.Hour)) {
t.Fatalf("bucket start = %s", start)
}
if staleGHCommandCacheLock(fakeFileInfo{mod: now.Add(-3 * time.Minute)}) != true || staleGHCommandCacheLock(fakeFileInfo{mod: now}) {
t.Fatal("stale lock classification mismatch")
}
thread := store.Thread{
GitHubID: "99", Number: 99, Title: "Title", State: "open", HTMLURL: "https://example.com/99",
LabelsJSON: `["bug",""]`, AuthorLogin: "alice", AuthorType: "User", Body: "body",
UpdatedAt: "2026-05-08T00:00:00Z", CreatedAtGitHub: "2026-05-07T00:00:00Z", ClosedAtGitHub: "", IsDraft: true,
}
fields := "number,id,title,state,url,updatedAt,createdAt,closedAt,mergedAt,labels,isDraft,author,body"
rows, err := ghSearchJSONRows([]store.Thread{thread}, fields)
if err != nil || rows[0]["number"] != 99 {
t.Fatalf("search rows=%+v err=%v", rows, err)
}
if labels := ghLabelsFromJSON(`not-json`); labels != nil {
t.Fatalf("bad labels = %+v", labels)
}
if labels := ghLabelsFromJSON(`[{"name":"bug","color":"red"}]`); len(labels) != 1 || labels[0].Name != "bug" {
t.Fatalf("object labels = %+v", labels)
}
if _, err := ghSearchJSONRows([]store.Thread{thread}, "unsupported"); err == nil {
t.Fatal("unsupported search json field should fail")
}
if _, err := ghSearchJSONRows([]store.Thread{thread}, " "); err == nil {
t.Fatal("empty search json fields should fail")
}
query, repo, state := parseGHSearchQuery("repo:openclaw/openclaw is:pr is:open crash")
if query != "crash" || repo != "openclaw/openclaw" || state != "open" {
t.Fatalf("query=%q repo=%q state=%q", query, repo, state)
}
if !isGHSearchKind("pull-requests") || ghSearchKind("pulls") != "pull_request" || ghSearchKind("issues") != "issue" {
t.Fatal("search kind mismatch")
}
if _, err := parseGHSearchDuration("0"); err == nil {
t.Fatal("zero duration should fail")
}
if duration, err := parseGHSearchDuration("5"); err != nil || duration != 5*time.Second {
t.Fatalf("seconds duration=%s err=%v", duration, err)
}
if _, err := parseGHSearchLimit("5", "6"); err == nil {
t.Fatal("disagreeing limits should fail")
}
runs := []store.WorkflowRun{{
RunID: "99", RunNumber: 7, WorkflowName: "CI", Status: "completed", Conclusion: "success",
HTMLURL: "https://example.com/run", Event: "push", HeadBranch: "main", HeadSHA: "abc",
CreatedAtGH: "2026-05-08T00:00:00Z", UpdatedAtGH: "2026-05-08T00:01:00Z",
}, {RunID: "not-number", WorkflowName: "Deploy"}}
runRows := ghWorkflowRunJSONRows(runs, "databaseId,id,number,workflowName,name,displayTitle,status,conclusion,url,event,headBranch,headSha,createdAt,updatedAt")
if runRows[0]["databaseId"] != int64(99) || runRows[1]["databaseId"] != "not-number" {
t.Fatalf("run rows = %+v", runRows)
}
dir := t.TempDir()
entry := ghCommandCacheEntry{Args: []string{"run", "list"}, CreatedAt: time.Now().Add(-time.Hour), Stdout: "[]"}
data, err := json.Marshal(entry)
if err != nil {
t.Fatalf("marshal entry: %v", err)
}
if err := os.WriteFile(filepath.Join(dir, "good.json"), data, 0o644); err != nil {
t.Fatalf("write entry: %v", err)
}
if err := os.WriteFile(filepath.Join(dir, "bad.json"), []byte("{"), 0o644); err != nil {
t.Fatalf("write bad entry: %v", err)
}
entries, err := os.ReadDir(dir)
if err != nil {
t.Fatalf("read dir: %v", err)
}
found := false
for _, entry := range entries {
if info, ok := ghCommandCacheKeyInfoFromDirEntry(dir, entry); ok && info.Key == "good" {
found = true
}
}
if !found {
t.Fatal("cache key info did not parse good entry")
}
var buf bytes.Buffer
printGHXCacheMisses(&buf, "Misses", map[string]int64{"b": 1, "a": 2})
if !strings.Contains(buf.String(), "Misses") {
t.Fatalf("miss output = %q", buf.String())
}
}
type fakeFileInfo struct{ mod time.Time }
func (f fakeFileInfo) Name() string { return "fake" }
func (f fakeFileInfo) Size() int64 { return 0 }
func (f fakeFileInfo) Mode() os.FileMode { return 0 }
func (f fakeFileInfo) ModTime() time.Time { return f.mod }
func (f fakeFileInfo) IsDir() bool { return false }
func (f fakeFileInfo) Sys() any { return nil }

View File

@ -0,0 +1,411 @@
package cli
import (
"context"
"encoding/json"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
)
func canonicalGHCommandArgs(args []string) []string {
if len(args) == 0 {
return nil
}
valueFlags := map[string]string{
"-R": "--repo", "--repo": "--repo",
"-L": "--limit", "--limit": "--limit",
"-q": "--jq", "--jq": "--jq",
"-t": "--template", "--template": "--template",
"-X": "--method", "--method": "--method",
"-H": "--header", "--header": "--header",
"-f": "--field", "-F": "--field", "--field": "--field", "--raw-field": "--raw-field",
"--hostname": "--hostname", "--preview": "--preview",
"--state": "--state", "--author": "--author", "--assignee": "--assignee", "--label": "--label",
"--branch": "--branch", "--commit": "--commit", "--workflow": "--workflow", "--job": "--job",
"--json": "--json", "--search": "--search",
}
valueFlagNames := make(map[string]struct{}, len(valueFlags))
for _, name := range valueFlags {
valueFlagNames[name] = struct{}{}
}
var positionals []string
var flags []string
for index := 0; index < len(args); index++ {
arg := args[index]
if canonical, ok := valueFlags[arg]; ok {
if index+1 < len(args) {
flags = append(flags, canonical+"="+canonicalGHFlagValue(canonical, args[index+1]))
index++
} else {
flags = append(flags, canonical)
}
continue
}
if strings.HasPrefix(arg, "--") {
name, value, hasValue := strings.Cut(arg, "=")
if _, ok := valueFlagNames[name]; ok && hasValue {
flags = append(flags, name+"="+canonicalGHFlagValue(name, value))
continue
}
flags = append(flags, arg)
continue
}
if strings.HasPrefix(arg, "-") {
flags = append(flags, arg)
continue
}
positionals = append(positionals, arg)
}
sort.Strings(flags)
out := make([]string, 0, len(positionals)+len(flags))
out = append(out, positionals...)
out = append(out, flags...)
return out
}
func canonicalGHFlagValue(name, value string) string {
value = strings.TrimSpace(value)
switch name {
case "--json":
if value == "" {
return value
}
fields := strings.Split(value, ",")
for index := range fields {
fields[index] = strings.TrimSpace(fields[index])
}
sort.Strings(fields)
return strings.Join(fields, ",")
case "--method":
return strings.ToUpper(value)
default:
return value
}
}
func (a *App) clearGHCommandCacheForMutation(ctx context.Context, args []string) error {
tags := a.ghMutationInvalidationTags(ctx, args)
if len(tags) == 0 {
return a.clearGHCommandCache()
}
return a.clearGHCommandCacheMatching(tags)
}
func (a *App) clearGHCommandCacheMatching(tags []string) error {
dir, err := a.ghCommandCacheDir()
if err != nil {
return err
}
tagSet := stringSet(tags)
if _, ok := tagSet["global"]; ok {
return a.clearGHCommandCache()
}
entries, err := os.ReadDir(dir)
if err != nil {
return err
}
for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".lock") {
_ = os.Remove(filepath.Join(dir, name))
continue
}
if !entry.Type().IsRegular() || !isGHCommandCacheEntryFile(name) {
continue
}
path := filepath.Join(dir, name)
cached, ok := readGHCommandCacheEntry(path)
if !ok || ghCacheTagsMatch(cached.Tags, tagSet) {
_ = os.Remove(path)
}
}
return nil
}
func stringSet(values []string) map[string]struct{} {
set := make(map[string]struct{}, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
set[value] = struct{}{}
}
}
return set
}
func ghCacheTagsMatch(entryTags []string, mutationTags map[string]struct{}) bool {
hasSpecificMutationTag := false
for tag := range mutationTags {
if tag != "global" && !strings.HasPrefix(tag, "repo:") {
hasSpecificMutationTag = true
break
}
}
for _, tag := range entryTags {
if _, ok := mutationTags[tag]; !ok {
continue
}
if hasSpecificMutationTag && strings.HasPrefix(tag, "repo:") {
continue
}
return true
}
return false
}
func (a *App) ghCommandCacheTags(ctx context.Context, args []string) []string {
return uniqueStrings(a.ghCommandTags(ctx, args, false))
}
func (a *App) ghMutationInvalidationTags(ctx context.Context, args []string) []string {
return uniqueStrings(a.ghCommandTags(ctx, args, true))
}
func (a *App) ghCommandTags(ctx context.Context, args []string, mutation bool) []string {
var tags []string
repo := ghCommandRepo(args)
if repo == "" {
repo = strings.TrimSpace(os.Getenv("GH_REPO"))
}
if repo != "" {
tags = append(tags, "repo:"+repo)
}
if len(args) < 2 {
if mutation {
tags = append(tags, "global")
}
return tags
}
switch args[0] {
case "issue", "pr":
kind := args[0]
if kind == "issue" {
tags = append(tags, "issues")
} else {
tags = append(tags, "pulls")
}
if number := firstGHNumberArg(args[2:]); number != "" {
tags = append(tags, kind+":"+number)
}
case "run":
tags = append(tags, "actions")
if args[1] == "view" || mutation {
if id := firstGHNumberArg(args[2:]); id != "" {
tags = append(tags, "run:"+id)
}
}
case "workflow":
tags = append(tags, "actions")
case "release":
tags = append(tags, "releases")
if id := firstGHPositionalArg(args[2:]); id != "" {
tags = append(tags, "release:"+id)
}
case "api":
tags = append(tags, ghAPITags(args[1:])...)
}
if mutation && repo != "" {
switch args[0] {
case "issue":
tags = append(tags, "issues")
case "pr":
tags = append(tags, "pulls")
case "run", "workflow":
tags = append(tags, "actions")
case "release":
tags = append(tags, "releases")
}
}
if mutation && len(tags) == 0 {
tags = append(tags, "global")
}
return tags
}
func ghCommandRepo(args []string) string {
for index := 0; index < len(args); index++ {
arg := args[index]
switch arg {
case "-R", "--repo":
if index+1 < len(args) {
return strings.TrimSpace(args[index+1])
}
default:
if strings.HasPrefix(arg, "--repo=") {
return strings.TrimSpace(strings.TrimPrefix(arg, "--repo="))
}
}
}
if len(args) >= 3 && args[0] == "repo" && args[1] == "view" {
if repo := firstGHPositionalArg(args[2:]); strings.Contains(repo, "/") {
return repo
}
}
if len(args) > 0 && args[0] == "api" {
return ghAPIRepo(args[1:])
}
return ""
}
func ghAPIRepo(args []string) string {
path := ghAPIPathArg(args)
path = strings.TrimPrefix(path, "https://api.github.com/")
path = strings.TrimPrefix(path, "http://api.github.com/")
path = strings.TrimPrefix(path, "/")
parts := strings.Split(path, "/")
if len(parts) >= 3 && parts[0] == "repos" && parts[1] != "" && parts[2] != "" {
return parts[1] + "/" + parts[2]
}
return ""
}
func ghAPITags(args []string) []string {
path := strings.TrimPrefix(ghAPIPathArg(args), "https://api.github.com/")
path = strings.TrimPrefix(path, "http://api.github.com/")
path = strings.TrimPrefix(path, "/")
if before, _, found := strings.Cut(path, "?"); found {
path = before
}
parts := strings.Split(path, "/")
if len(parts) < 4 || parts[0] != "repos" {
return nil
}
var tags []string
if repo := ghAPIRepo(args); repo != "" {
tags = append(tags, "repo:"+repo)
}
switch parts[3] {
case "actions":
tags = append(tags, "actions")
if len(parts) >= 6 && parts[4] == "runs" && isDecimalString(parts[5]) {
tags = append(tags, "run:"+parts[5])
}
case "issues":
tags = append(tags, "issues")
if len(parts) >= 5 && isDecimalString(parts[4]) {
tags = append(tags, "issue:"+parts[4])
}
case "pulls":
tags = append(tags, "pulls")
if len(parts) >= 5 && isDecimalString(parts[4]) {
tags = append(tags, "pr:"+parts[4])
}
case "releases":
tags = append(tags, "releases")
}
return tags
}
func firstGHNumberArg(args []string) string {
for index := 0; index < len(args); index++ {
arg := args[index]
if strings.HasPrefix(arg, "-") {
if !strings.Contains(arg, "=") && index+1 < len(args) {
index++
}
continue
}
if ref, ok := parseThreadReference(arg); ok && ref.Number > 0 {
return strconv.Itoa(ref.Number)
}
}
return ""
}
func uniqueStrings(values []string) []string {
seen := map[string]struct{}{}
out := make([]string, 0, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value == "" {
continue
}
if _, ok := seen[value]; ok {
continue
}
seen[value] = struct{}{}
out = append(out, value)
}
return out
}
func ghCompletedRunCacheTTL(entry ghCommandCacheEntry) time.Duration {
if len(entry.Args) == 0 {
return 0
}
if entry.Args[0] == "run" {
if len(entry.Args) >= 2 && entry.Args[1] == "view" && ghJSONStatusCompleted(entry.Stdout) {
return 12 * time.Hour
}
if len(entry.Args) >= 2 && entry.Args[1] == "list" && ghJSONCollectionCompleted(entry.Stdout) {
return 30 * time.Minute
}
}
if entry.Args[0] == "api" {
route := normalizeGHAPIRoute(entry.Args[1:])
if strings.Contains(route, "/actions/runs/:id/jobs") && ghJSONJobsCompleted(entry.Stdout) {
return 12 * time.Hour
}
if strings.Contains(route, "/actions/jobs/:id") && ghJSONStatusCompleted(entry.Stdout) {
return 12 * time.Hour
}
if strings.Contains(route, "/actions/runs/:id") && ghJSONStatusCompleted(entry.Stdout) {
return 12 * time.Hour
}
if strings.Contains(route, "/actions/runs") && ghJSONCollectionCompleted(entry.Stdout) {
return 30 * time.Minute
}
}
return 0
}
func ghJSONJobsCompleted(raw string) bool {
var payload struct {
Jobs []map[string]any `json:"jobs"`
}
if err := json.Unmarshal([]byte(raw), &payload); err == nil {
return len(payload.Jobs) > 0 && allGHStatusMapsCompleted(payload.Jobs)
}
return ghJSONCollectionCompleted(raw)
}
func ghJSONStatusCompleted(raw string) bool {
var payload map[string]any
if err := json.Unmarshal([]byte(raw), &payload); err != nil {
return false
}
return ghStatusMapCompleted(payload)
}
func ghJSONCollectionCompleted(raw string) bool {
var rows []map[string]any
if err := json.Unmarshal([]byte(raw), &rows); err == nil {
return len(rows) > 0 && allGHStatusMapsCompleted(rows)
}
var payload struct {
WorkflowRuns []map[string]any `json:"workflow_runs"`
}
if err := json.Unmarshal([]byte(raw), &payload); err == nil {
return len(payload.WorkflowRuns) > 0 && allGHStatusMapsCompleted(payload.WorkflowRuns)
}
return false
}
func allGHStatusMapsCompleted(rows []map[string]any) bool {
for _, row := range rows {
if !ghStatusMapCompleted(row) {
return false
}
}
return true
}
func ghStatusMapCompleted(row map[string]any) bool {
status, _ := row["status"].(string)
conclusion, _ := row["conclusion"].(string)
return strings.EqualFold(status, "completed") || strings.TrimSpace(conclusion) != ""
}

View File

@ -0,0 +1,228 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestGHShimViewAndListUseLocalCache(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,title,isDraft,author,comments"}); err != nil {
t.Fatalf("gh pr view: %v", err)
}
var view map[string]any
if err := json.Unmarshal(stdout.Bytes(), &view); err != nil {
t.Fatalf("decode view: %v\n%s", err, stdout.String())
}
comments := view["comments"].([]any)
if int(view["number"].(float64)) != 12 || view["isDraft"] != true || len(comments) != 1 || comments[0].(map[string]any)["body"] != "cache path looks good" {
t.Fatalf("view = %#v", view)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,files,commits,statusCheckRollup,headRefOid,headRefName"}); err != nil {
t.Fatalf("gh pr rich view: %v", err)
}
if err := json.Unmarshal(stdout.Bytes(), &view); err != nil {
t.Fatalf("decode rich view: %v\n%s", err, stdout.String())
}
if view["headRefOid"] != "abc123" || len(view["files"].([]any)) != 1 || len(view["commits"].([]any)) != 1 {
t.Fatalf("rich view = %#v", view)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "12", "-R", "openclaw/openclaw", "--json", "name,state,detailsUrl,workflow"}); err != nil {
t.Fatalf("gh pr checks: %v", err)
}
var checks []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &checks); err != nil {
t.Fatalf("decode checks: %v\n%s", err, stdout.String())
}
if len(checks) != 1 || checks[0]["name"] != "test" || checks[0]["state"] != "SUCCESS" {
t.Fatalf("checks = %#v", checks)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "https://github.com/openclaw/openclaw/pull/12", "--json", "name,state"}); err != nil {
t.Fatalf("gh pr checks URL: %v", err)
}
if err := json.Unmarshal(stdout.Bytes(), &checks); err != nil {
t.Fatalf("decode URL checks: %v\n%s", err, stdout.String())
}
if len(checks) != 1 || checks[0]["name"] != "test" || checks[0]["state"] != "SUCCESS" {
t.Fatalf("URL checks = %#v", checks)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "run", "list", "-R", "openclaw/openclaw", "--branch", "manifest-cache", "--json", "databaseId,workflowName,status,conclusion,headSha"}); err != nil {
t.Fatalf("gh run list: %v", err)
}
var runs []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &runs); err != nil {
t.Fatalf("decode runs: %v\n%s", err, stdout.String())
}
if len(runs) != 1 || int(runs[0]["databaseId"].(float64)) != 99 || runs[0]["headSha"] != "abc123" {
t.Fatalf("runs = %#v", runs)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "run", "view", "99", "-R", "openclaw/openclaw", "--json", "databaseId,url"}); err != nil {
t.Fatalf("gh run view: %v", err)
}
var runView map[string]any
if err := json.Unmarshal(stdout.Bytes(), &runView); err != nil {
t.Fatalf("decode run view: %v\n%s", err, stdout.String())
}
if int(runView["databaseId"].(float64)) != 99 {
t.Fatalf("run view = %#v", runView)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number,title"}); err != nil {
t.Fatalf("gh issue list: %v", err)
}
var list []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &list); err != nil {
t.Fatalf("decode list: %v\n%s", err, stdout.String())
}
if len(list) != 1 || int(list[0]["number"].(float64)) != 10 {
t.Fatalf("list = %#v", list)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "view", "10", "-R", "openclaw/openclaw", "--json", "number,comments"}); err != nil {
t.Fatalf("gh issue view comments: %v", err)
}
if err := json.Unmarshal(stdout.Bytes(), &view); err != nil {
t.Fatalf("decode issue comments: %v\n%s", err, stdout.String())
}
comments = view["comments"].([]any)
if len(comments) != 1 || comments[0].(map[string]any)["body"] != "same hot loop here" {
t.Fatalf("issue comments = %#v", view)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--author", "alice", "--assignee", "peter", "--label", "bug", "--json", "number,title"}); err != nil {
t.Fatalf("gh issue list filtered: %v", err)
}
if err := json.Unmarshal(stdout.Bytes(), &list); err != nil {
t.Fatalf("decode filtered list: %v\n%s", err, stdout.String())
}
if len(list) != 1 || int(list[0]["number"].(float64)) != 10 {
t.Fatalf("filtered list = %#v", list)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "view", "10", "-R", "openclaw/openclaw"}); err != nil {
t.Fatalf("gh issue human view: %v", err)
}
if got := stdout.String(); !strings.Contains(got, "title:\tHot loop burns CPU") || !strings.Contains(got, "runtime has a hot loop") {
t.Fatalf("human issue view = %q", got)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--limit", "1"}); err != nil {
t.Fatalf("gh issue human list: %v", err)
}
if got := stdout.String(); !strings.Contains(got, "10\tHot loop burns CPU") {
t.Fatalf("human issue list = %q", got)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "list", "-R", "openclaw/openclaw", "--limit", "1"}); err != nil {
t.Fatalf("gh pr human list: %v", err)
}
if got := stdout.String(); !strings.Contains(got, "12\tManifest cache update") {
t.Fatalf("human pr list = %q", got)
}
}
func TestGHShimAutoHydratesPRDetailsOnMiss(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
st, err := store.Open(ctx, cfg.DBPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
for _, table := range []string{"pull_request_checks", "pull_request_commits", "pull_request_files", "pull_request_details", "github_workflow_runs", "threads", "repositories"} {
if _, err := st.DB().ExecContext(ctx, "delete from "+table); err != nil {
t.Fatalf("clear %s: %v", table, err)
}
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/repos/openclaw/openclaw":
_ = json.NewEncoder(w).Encode(map[string]any{"id": 123, "open_issues_count": 1})
case "/repos/openclaw/openclaw/issues/12":
_ = json.NewEncoder(w).Encode(map[string]any{
"id": 12, "number": 12, "state": "open", "title": "Manifest cache update",
"body": "", "html_url": "https://github.com/openclaw/openclaw/pull/12",
"labels": []map[string]any{}, "assignees": []map[string]any{},
"user": map[string]any{"login": "bob", "type": "User"},
"pull_request": map[string]any{"url": "https://api.github.test/repos/openclaw/openclaw/pulls/12"},
})
case "/repos/openclaw/openclaw/pulls/12":
_ = json.NewEncoder(w).Encode(map[string]any{
"number": 12, "head": map[string]any{"sha": "auto123", "ref": "auto-branch", "repo": map[string]any{"full_name": "openclaw/openclaw"}},
"base": map[string]any{"sha": "base123"}, "mergeable_state": "clean", "changed_files": 1,
})
case "/repos/openclaw/openclaw/pulls/12/files":
_ = json.NewEncoder(w).Encode([]map[string]any{{"filename": "auto.go", "status": "modified"}})
case "/repos/openclaw/openclaw/pulls/12/commits":
_ = json.NewEncoder(w).Encode([]map[string]any{{"sha": "commit123", "commit": map[string]any{"message": "test"}}})
case "/repos/openclaw/openclaw/commits/auto123/check-runs":
_ = json.NewEncoder(w).Encode(map[string]any{"check_runs": []map[string]any{{"name": "auto-test", "status": "completed", "conclusion": "success"}}})
case "/repos/openclaw/openclaw/actions/runs":
_ = json.NewEncoder(w).Encode(map[string]any{"workflow_runs": []map[string]any{{"id": 12345, "head_branch": "auto-branch", "head_sha": "auto123", "status": "completed", "conclusion": "success", "name": "CI"}}})
default:
t.Fatalf("unexpected request: %s", r.URL.String())
}
}))
defer server.Close()
t.Setenv("GITHUB_TOKEN", "test-token")
t.Setenv("GITCRAWL_GITHUB_BASE_URL", server.URL)
t.Setenv("GITCRAWL_GH_PATH", "/tmp/no-real-gh")
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,files,commits,statusCheckRollup,headRefOid"}); err != nil {
t.Fatalf("auto hydrate view: %v", err)
}
var view map[string]any
if err := json.Unmarshal(stdout.Bytes(), &view); err != nil {
t.Fatalf("decode view: %v\n%s", err, stdout.String())
}
if view["headRefOid"] != "auto123" || len(view["files"].([]any)) != 1 {
t.Fatalf("view = %#v", view)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "12", "-R", "openclaw/openclaw", "--json", "name,state"}); err != nil {
t.Fatalf("auto hydrate checks: %v", err)
}
var checks []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &checks); err != nil {
t.Fatalf("decode checks: %v\n%s", err, stdout.String())
}
if len(checks) != 1 || checks[0]["name"] != "auto-test" || checks[0]["state"] != "SUCCESS" {
t.Fatalf("checks = %#v", checks)
}
}

View File

@ -0,0 +1,27 @@
package cli
import (
"context"
"testing"
"github.com/openclaw/gitcrawl/internal/store"
)
func prIDForTest(t *testing.T, ctx context.Context, st *store.Store, repoID int64, number int) int64 {
t.Helper()
threads, err := st.ListThreadsFiltered(ctx, store.ThreadListOptions{
RepoID: repoID,
IncludeClosed: true,
Numbers: []int{number},
})
if err != nil {
t.Fatalf("list PR for test: %v", err)
}
for _, thread := range threads {
if thread.Number == number && thread.Kind == "pull_request" {
return thread.ID
}
}
t.Fatalf("missing PR #%d", number)
return 0
}

View File

@ -0,0 +1,278 @@
package cli
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"time"
)
type ghXCacheCounters struct {
LocalHits int64 `json:"local_hits"`
FallbackHits int64 `json:"fallback_hits"`
StaleHits int64 `json:"stale_hits"`
BackendMisses int64 `json:"backend_misses"`
PassThroughWrites int64 `json:"pass_through_writes"`
BackendMissesByCommand map[string]int64 `json:"backend_misses_by_command,omitempty"`
BackendMissesByRoute map[string]int64 `json:"backend_misses_by_route,omitempty"`
BackendMissesByKey map[string]int64 `json:"backend_misses_by_key,omitempty"`
Hourly map[string]ghXCacheCounterBucket `json:"hourly,omitempty"`
}
type ghXCacheCounterBucket struct {
StartedAt time.Time `json:"started_at"`
LocalHits int64 `json:"local_hits,omitempty"`
FallbackHits int64 `json:"fallback_hits,omitempty"`
StaleHits int64 `json:"stale_hits,omitempty"`
BackendMisses int64 `json:"backend_misses,omitempty"`
PassThroughWrites int64 `json:"pass_through_writes,omitempty"`
BackendMissesByCommand map[string]int64 `json:"backend_misses_by_command,omitempty"`
BackendMissesByRoute map[string]int64 `json:"backend_misses_by_route,omitempty"`
BackendMissesByKey map[string]int64 `json:"backend_misses_by_key,omitempty"`
}
func (a *App) ghXCacheCounters() (ghXCacheCounters, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return ghXCacheCounters{}, err
}
return readGHXCacheCounters(filepath.Join(dir, ghXCacheStatsFile)), nil
}
func (a *App) resetGHXCacheCounters() error {
dir, err := a.ghCommandCacheDir()
if err != nil {
return err
}
return writeAtomicFile(filepath.Join(dir, ghXCacheStatsFile), []byte("{}"), 0o600)
}
func (a *App) incrementGHXCacheCounter(name string) error {
return a.incrementGHXCacheCounterWithArgs(name, nil)
}
func (a *App) incrementGHXCacheBackendMiss(args []string) error {
return a.incrementGHXCacheCounterWithArgs("backend_misses", args)
}
func (a *App) incrementGHXCacheCounterWithArgs(name string, args []string) error {
dir, err := a.ghCommandCacheDir()
if err != nil {
return err
}
path := filepath.Join(dir, ghXCacheStatsFile)
lockPath := path + ".lock"
lock, locked := tryGHCommandCacheLock(lockPath)
if !locked {
return nil
}
defer func() {
_ = lock.Close()
_ = os.Remove(lockPath)
}()
stats := readGHXCacheCounters(path)
if !incrementGHXCacheCounters(&stats, name, args) {
return nil
}
bucketKey, bucketStart := ghXCacheCurrentBucket(time.Now())
if stats.Hourly == nil {
stats.Hourly = map[string]ghXCacheCounterBucket{}
}
bucket := stats.Hourly[bucketKey]
if bucket.StartedAt.IsZero() {
bucket.StartedAt = bucketStart
}
_ = incrementGHXCacheCounterBucket(&bucket, name, args)
stats.Hourly[bucketKey] = bucket
pruneGHXCacheBuckets(stats.Hourly, time.Now().Add(-7*24*time.Hour))
data, err := json.Marshal(stats)
if err != nil {
return err
}
return writeAtomicFile(path, data, 0o600)
}
func incrementGHXCacheCounters(stats *ghXCacheCounters, name string, args []string) bool {
switch name {
case "local_hits":
stats.LocalHits++
case "fallback_hits":
stats.FallbackHits++
case "stale_hits":
stats.StaleHits++
case "backend_misses":
stats.BackendMisses++
incrementGHXCacheMissMaps(&stats.BackendMissesByCommand, &stats.BackendMissesByRoute, &stats.BackendMissesByKey, args)
case "pass_through_writes":
stats.PassThroughWrites++
default:
return false
}
return true
}
func incrementGHXCacheCounterBucket(bucket *ghXCacheCounterBucket, name string, args []string) bool {
switch name {
case "local_hits":
bucket.LocalHits++
case "fallback_hits":
bucket.FallbackHits++
case "stale_hits":
bucket.StaleHits++
case "backend_misses":
bucket.BackendMisses++
incrementGHXCacheMissMaps(&bucket.BackendMissesByCommand, &bucket.BackendMissesByRoute, &bucket.BackendMissesByKey, args)
case "pass_through_writes":
bucket.PassThroughWrites++
default:
return false
}
return true
}
func incrementGHXCacheMissMaps(byCommand, byRoute, byKey *map[string]int64, args []string) {
if len(args) == 0 {
return
}
if *byCommand == nil {
*byCommand = map[string]int64{}
}
(*byCommand)[ghCommandName(args)]++
if route := ghCommandRoute(args); route != "" {
if *byRoute == nil {
*byRoute = map[string]int64{}
}
(*byRoute)[route]++
}
if key := ghCommandMissKey(args); key != "" {
if *byKey == nil {
*byKey = map[string]int64{}
}
(*byKey)[key]++
}
}
func ghCommandMissKey(args []string) string {
if len(args) == 0 {
return ""
}
canonical := canonicalGHCommandArgs(args)
if len(canonical) == 0 {
return ghCommandName(args)
}
key := strings.Join(canonical, " ")
if len(key) > 180 {
key = key[:177] + "..."
}
return key
}
func ghCommandRoute(args []string) string {
if len(args) == 0 {
return ""
}
if args[0] == "api" {
return normalizeGHAPIRoute(args[1:])
}
if len(args) >= 2 {
return ghCommandName(args)
}
return args[0]
}
func readGHXCacheCounters(path string) ghXCacheCounters {
data, err := os.ReadFile(path)
if err != nil {
return ghXCacheCounters{}
}
var stats ghXCacheCounters
if err := json.Unmarshal(data, &stats); err != nil {
return ghXCacheCounters{}
}
return stats
}
func (c ghXCacheCounters) since(since time.Duration, now time.Time) ghXCacheCounters {
if since <= 0 {
return c
}
cutoff := now.Add(-since)
var out ghXCacheCounters
for _, bucket := range c.Hourly {
if bucket.StartedAt.IsZero() || bucket.StartedAt.Before(cutoff) {
continue
}
out.LocalHits += bucket.LocalHits
out.FallbackHits += bucket.FallbackHits
out.StaleHits += bucket.StaleHits
out.BackendMisses += bucket.BackendMisses
out.PassThroughWrites += bucket.PassThroughWrites
mergeCounterMap(&out.BackendMissesByCommand, bucket.BackendMissesByCommand)
mergeCounterMap(&out.BackendMissesByRoute, bucket.BackendMissesByRoute)
mergeCounterMap(&out.BackendMissesByKey, bucket.BackendMissesByKey)
}
return out
}
func mergeCounterMap(dst *map[string]int64, src map[string]int64) {
if len(src) == 0 {
return
}
if *dst == nil {
*dst = map[string]int64{}
}
for key, value := range src {
(*dst)[key] += value
}
}
func ghXCacheCurrentBucket(now time.Time) (string, time.Time) {
start := now.UTC().Truncate(time.Hour)
return start.Format("2006-01-02T15:00:00Z"), start
}
func pruneGHXCacheBuckets(buckets map[string]ghXCacheCounterBucket, cutoff time.Time) {
for key, bucket := range buckets {
if !bucket.StartedAt.IsZero() && bucket.StartedAt.Before(cutoff) {
delete(buckets, key)
}
}
}
func writeAtomicFile(path string, data []byte, perm os.FileMode) error {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
temp, err := os.CreateTemp(filepath.Dir(path), "."+filepath.Base(path)+".tmp-*")
if err != nil {
return err
}
tempPath := temp.Name()
cleanup := true
defer func() {
if cleanup {
_ = os.Remove(tempPath)
}
}()
if _, err := temp.Write(data); err != nil {
_ = temp.Close()
return err
}
if err := temp.Chmod(perm); err != nil {
_ = temp.Close()
return err
}
if err := temp.Close(); err != nil {
return err
}
if err := os.Rename(tempPath, path); err != nil {
return err
}
cleanup = false
return nil
}
func staleGHCommandCacheLock(info os.FileInfo) bool {
return time.Since(info.ModTime()) > 2*time.Minute
}

View File

@ -0,0 +1,251 @@
package cli
import (
"bytes"
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestGHShimPRCacheAndPolicyHelperBranches(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
app := New()
app.configPath = configPath
var stdout bytes.Buffer
app.Stdout = &stdout
if err := app.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "12", "-R", "openclaw/openclaw"}); err != nil {
t.Fatalf("human pr checks: %v", err)
}
if !strings.Contains(stdout.String(), "test\tcompleted\tsuccess") {
t.Fatalf("human checks = %q", stdout.String())
}
cache, err := app.localGHPullRequestCache(ctx, "openclaw/openclaw", 12)
if err != nil {
t.Fatalf("local pr cache: %v", err)
}
if _, err := app.loadGHPullRequestCache(ctx, "openclaw/openclaw", 12, false); err != nil {
t.Fatalf("load cached pr detail without freshness: %v", err)
}
if _, err := app.loadGHPullRequestCache(ctx, "openclaw/openclaw", 12, true); err != nil {
t.Fatalf("load fresh cached pr detail: %v", err)
}
if !ghPullRequestCacheFresh(cache) {
t.Fatalf("seeded cache should be fresh: %+v", cache.Detail)
}
cache.Detail.RawJSON = `{"head":{"sha":"different"}}`
if ghPullRequestCacheFresh(cache) {
t.Fatal("mismatched raw head sha should be stale")
}
cache.Detail.RawJSON = `{"head":{"sha":"abc123"}}`
cache.Detail.FetchedAt = "bad"
if ghPullRequestCacheFresh(cache) {
t.Fatal("bad fetched timestamp should be stale")
}
if !app.shouldAutoHydrateGHPRDetails(localGHUnsupported(errors.New("pull request detail: sql: no rows in result set"))) {
t.Fatal("missing local PR cache should auto-hydrate")
}
t.Setenv("GITCRAWL_GH_AUTO_HYDRATE", "0")
if app.shouldAutoHydrateGHThread(nil) {
t.Fatal("auto-hydrate env disable not honored")
}
if _, err := app.loadGHPullRequestCache(ctx, "openclaw/openclaw", 9999, true); err == nil {
t.Fatal("missing PR cache with auto-hydrate disabled should fail")
}
t.Setenv("GITCRAWL_GH_AUTO_HYDRATE", "")
if isMissingLocalPRCache(nil) || !isMissingLocalPRCache(localGHUnsupported(errors.New("cached PR branch \"x\" was not found"))) {
t.Fatal("missing cache classification mismatch")
}
number, err := app.findGHPullRequestNumberByBranch(ctx, "openclaw/openclaw", "manifest-cache")
if err != nil || number != 12 {
t.Fatalf("branch lookup number=%d err=%v", number, err)
}
if _, err := app.findGHPullRequestNumberByBranch(ctx, "openclaw/openclaw", "missing"); err == nil {
t.Fatal("missing branch lookup should fail")
}
if got := ghPRHeadRefFromRawJSON(`{"head":{"ref":" feature/cache "}}`); got != "feature/cache" {
t.Fatalf("head ref = %q", got)
}
if got := ghPRHeadRefFromRawJSON(`{`); got != "" {
t.Fatalf("invalid head ref = %q", got)
}
if !ghPRFieldsNeedFresh([]string{"number", "statusCheckRollup"}) || !ghPRFieldsNeedFresh([]string{"mergeStateStatus"}) || ghPRFieldsNeedFresh([]string{"files"}) {
t.Fatal("fresh field detection mismatch")
}
thread := store.Thread{IsDraft: true}
for _, field := range []string{"headRepositoryOwner", "headRepository", "mergeStateStatus", "additions", "deletions", "changedFiles", "isDraft"} {
if _, err := ghPRDetailJSONValue(thread, cache, field); err != nil {
t.Fatalf("field %s: %v", field, err)
}
}
if _, err := ghPRDetailJSONValue(thread, cache, "unsupported"); err == nil {
t.Fatal("unsupported PR detail field should fail")
}
var out bytes.Buffer
app.Stdout = &out
if err := app.writeJSONValue(map[string]any{"value": 1}, ""); err != nil || !strings.Contains(out.String(), `"value": 1`) {
t.Fatalf("write json out=%q err=%v", out.String(), err)
}
if err := app.writeJSONValue(make(chan int), ""); err == nil {
t.Fatal("unmarshalable JSON value should fail")
}
out.Reset()
if err := app.writeJSONValue(map[string]any{"value": 2}, ".value"); err != nil || strings.TrimSpace(out.String()) != "2" {
t.Fatalf("write json jq out=%q err=%v", out.String(), err)
}
t.Setenv("PATH", "")
if err := app.writeJSONValue(map[string]any{"value": 2}, ".value"); err == nil {
t.Fatal("jq expression without jq executable should fail")
}
}
func TestGHShimCachePolicyExtraBranches(t *testing.T) {
if cacheableGHRead(nil) || cacheableGHRead([]string{"repo", "view", "--web"}) {
t.Fatal("interactive or empty gh commands should not be cacheable")
}
if !cacheableGHRead([]string{"gist", "view", "1"}) || !cacheableGHRead([]string{"project", "item-list"}) || !cacheableGHRead([]string{"cache", "list"}) {
t.Fatal("expected read-only command to be cacheable")
}
if ghAPIReadOnly([]string{"repos/openclaw/gitcrawl/issues", "-f", "title=x"}) || ghAPIReadOnly([]string{"repos/openclaw/gitcrawl", "-X"}) || ghAPIReadOnly([]string{"repos/openclaw/gitcrawl", "--method=PATCH"}) {
t.Fatal("mutating or malformed API command should not be read-only")
}
if got := ghAPIPathArg([]string{"--paginate", "-H", "Accept: json", "--jq", ".[]", "--template", "{{.}}", "repos/openclaw/gitcrawl/issues"}); got != "repos/openclaw/gitcrawl/issues" {
t.Fatalf("api path with skipped flags = %q", got)
}
if got := ghAPIPathArg([]string{"-f", "x=y"}); got != "" {
t.Fatalf("api path with only fields = %q", got)
}
if !ghAPIReadOnly([]string{"repos/openclaw/gitcrawl", "--method=GET"}) {
t.Fatal("GET API command should be read-only")
}
if ghGraphQLReadOnly([]string{"graphql"}) || ghGraphQLReadOnly([]string{"graphql", "-X"}) || ghGraphQLReadOnly([]string{"graphql", "-X", "PUT", "-f", "query={ viewer { login } }"}) || ghGraphQLReadOnly([]string{"graphql", "--field=query=@query.graphql"}) {
t.Fatal("malformed or mutating GraphQL command should not be read-only")
}
if !ghGraphQLReadOnly([]string{"graphql", "--field=query=query { viewer { login } }"}) {
t.Fatal("GraphQL query should be read-only")
}
t.Setenv("GITCRAWL_GH_CACHE_TTL", "2m")
if got := ghCommandCacheTTL([]string{"repo", "view"}); got != 2*time.Minute {
t.Fatalf("env ttl = %s", got)
}
t.Setenv("GITCRAWL_GH_CACHE_TTL", "")
ttlCases := []struct {
args []string
want time.Duration
}{
{[]string{"api", "repos/openclaw/gitcrawl/pages/builds/latest"}, 2 * time.Minute},
{[]string{"api", "repos/openclaw/gitcrawl/pages/health"}, 15 * time.Minute},
{[]string{"api", "repos/openclaw/gitcrawl/actions/jobs/123/logs"}, 12 * time.Hour},
{[]string{"api", "repos/openclaw/gitcrawl/actions/jobs/123"}, time.Minute},
{[]string{"api", "repos/openclaw/gitcrawl/actions/runs/123/pending_deployments"}, 30 * time.Second},
{[]string{"api", "repos/openclaw/gitcrawl/actions/workflows/ci.yml"}, 15 * time.Minute},
{[]string{"api", "repos/openclaw/gitcrawl/releases/latest"}, time.Hour},
{[]string{"api", "repos/openclaw/gitcrawl/branches/main"}, 10 * time.Minute},
{[]string{"workflow", "list"}, 15 * time.Minute},
{[]string{"issue", "view"}, 5 * time.Minute},
{[]string{"unknown"}, 5 * time.Minute},
}
for _, tc := range ttlCases {
if got := ghCommandCacheTTL(tc.args); got != tc.want {
t.Fatalf("ttl %v = %s, want %s", tc.args, got, tc.want)
}
}
if !ghAPIContentRefIsStable([]string{"repos/openclaw/gitcrawl/contents/a?ref=v1.2.3-beta+1"}) || ghAPIContentRefIsStable([]string{"repos/openclaw/gitcrawl/contents/a?ref=refs/heads/v1.2.3"}) || ghAPIContentRefIsStable([]string{"repos/openclaw/gitcrawl/contents/a?ref=v1.2"}) {
t.Fatal("stable content ref classification mismatch")
}
t.Setenv("GH_REPO", "openclaw/from-env")
repo, number, ok := parseGHPRDiffIdentityArgs([]string{"pr", "diff", "42"})
if !ok || repo != "openclaw/from-env" || number != 42 {
t.Fatalf("diff identity repo=%q number=%d ok=%v", repo, number, ok)
}
repo, number, ok = parseGHPRDiffIdentityArgs([]string{"pr", "diff", "https://github.com/openclaw/openclaw/pull/78601"})
if !ok || repo != "openclaw/openclaw" || number != 78601 {
t.Fatalf("diff URL identity repo=%q number=%d ok=%v", repo, number, ok)
}
repo, number, ok = parseGHPRDiffIdentityArgs([]string{"pr", "diff", "https://github.com/openclaw/openclaw/issues/78601"})
if !ok || repo != "openclaw/openclaw" || number != 78601 {
t.Fatalf("diff issue URL identity repo=%q number=%d ok=%v", repo, number, ok)
}
for _, args := range [][]string{{"issue", "close"}, {"pr", "merge"}, {"project", "item-add"}, {"release", "upload"}, {"repo", "delete"}, {"run", "rerun"}, {"secret", "set"}, {"variable", "delete"}, {"workflow", "disable"}, {"api", "repos/openclaw/gitcrawl/issues", "-f", "title=x"}} {
if !mutatingGHCommand(args) {
t.Fatalf("%v should be mutating", args)
}
}
if mutatingGHCommand([]string{"pr", "checkout"}) || mutatingGHCommand([]string{"repo", "view"}) || mutatingGHCommand([]string{"api", "repos/openclaw/gitcrawl"}) {
t.Fatal("read-only commands classified as mutating")
}
for _, remote := range []string{"git@github.com:openclaw/gitcrawl.git", "https://github.com/openclaw/gitcrawl.git", "ssh://git@github.com/openclaw/gitcrawl.git"} {
if got, err := ownerRepoFromGitRemote(remote); err != nil || got != "openclaw/gitcrawl" {
t.Fatalf("remote %q => %q err=%v", remote, got, err)
}
}
if _, err := ownerRepoFromGitRemote("not-a-github-remote"); err == nil {
t.Fatal("bad remote should fail")
}
app := New()
if got, err := app.resolveGHRepo(context.Background(), " openclaw/explicit "); err != nil || got != "openclaw/explicit" {
t.Fatalf("explicit repo = %q err=%v", got, err)
}
if got, err := app.resolveGHRepo(context.Background(), ""); err != nil || got != "openclaw/from-env" {
t.Fatalf("env repo = %q err=%v", got, err)
}
t.Setenv("GH_REPO", "")
repoDir := t.TempDir()
if err := runGit(context.Background(), repoDir, "init", "-b", "main"); err != nil {
t.Fatalf("init git repo: %v", err)
}
if err := runGit(context.Background(), repoDir, "remote", "add", "origin", "https://github.com/openclaw/gitcrawl.git"); err != nil {
t.Fatalf("add origin: %v", err)
}
original, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
defer func() { _ = os.Chdir(original) }()
if err := os.Chdir(repoDir); err != nil {
t.Fatalf("chdir repo: %v", err)
}
if got, err := app.resolveGHRepo(context.Background(), ""); err != nil || got != "openclaw/gitcrawl" {
t.Fatalf("git remote repo = %q err=%v", got, err)
}
ghPath := filepath.Join(t.TempDir(), "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho real-gh:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
var ghOut bytes.Buffer
app.Stdout = &ghOut
if err := app.runGHShim(context.Background(), nil); err != nil {
t.Fatalf("empty gh shim fallback: %v", err)
}
if strings.TrimSpace(ghOut.String()) != "real-gh:" {
t.Fatalf("empty gh shim output = %q", ghOut.String())
}
shimPath := filepath.Join(t.TempDir(), "gitcrawl-gh")
if err := os.WriteFile(shimPath, []byte("#!/bin/sh\necho shim\n"), 0o755); err != nil {
t.Fatalf("write fake shim: %v", err)
}
shimLink := filepath.Join(t.TempDir(), "gh")
if err := os.Symlink(shimPath, shimLink); err != nil {
t.Fatalf("symlink fake shim: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", shimLink)
if _, err := resolveRealGHPath(); err == nil || !strings.Contains(err.Error(), "gitcrawl shim") {
t.Fatalf("shim path should fail fast, err=%v", err)
}
t.Setenv("GITCRAWL_GH_STALE_GRACE", "3m")
if got := ghCommandCacheStaleGrace([]string{"api", "users/octocat"}); got != 3*time.Minute {
t.Fatalf("env stale grace = %s", got)
}
t.Setenv("GITCRAWL_GH_STALE_GRACE", "")
if got := ghCommandCacheStaleGrace([]string{"api", "users/octocat"}); got != 24*time.Hour {
t.Fatalf("user stale grace = %s", got)
}
}

View File

@ -0,0 +1,258 @@
package cli
import (
"context"
"flag"
"fmt"
"io"
"strings"
"github.com/openclaw/gitcrawl/internal/store"
)
func (a *App) ghThreadViewJSONRow(ctx context.Context, repoValue string, thread store.Thread, fieldsRaw string) (map[string]any, error) {
fields := parseJSONFields(fieldsRaw)
if len(fields) == 0 {
return nil, fmt.Errorf("--json requires at least one field")
}
row := make(map[string]any, len(fields))
var cache *store.PullRequestCache
for _, field := range fields {
if field == "comments" {
comments, err := a.localGHThreadComments(ctx, thread.ID)
if err != nil {
return nil, err
}
row[field] = ghCommentsJSONValue(comments)
continue
}
value, err := ghSearchJSONValue(thread, field)
if err == nil {
row[field] = value
continue
}
if thread.Kind != "pull_request" {
return nil, err
}
if cache == nil {
loaded, loadErr := a.loadGHPullRequestCache(ctx, repoValue, thread.Number, ghPRFieldsNeedFresh(fields))
if loadErr != nil {
return nil, loadErr
}
cache = &loaded
}
value, err = ghPRDetailJSONValue(thread, *cache, field)
if err != nil {
return nil, err
}
row[field] = value
}
return row, nil
}
func (a *App) localGHPullRequestCache(ctx context.Context, repoValue string, number int) (store.PullRequestCache, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return store.PullRequestCache{}, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return store.PullRequestCache{}, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return store.PullRequestCache{}, localGHUnsupported(err)
}
cache, err := rt.Store.PullRequestCache(ctx, repo.ID, number)
if err != nil {
return store.PullRequestCache{}, localGHUnsupported(err)
}
return cache, nil
}
func (a *App) localGHThreadComments(ctx context.Context, threadID int64) ([]store.Comment, error) {
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return nil, localGHUnsupported(err)
}
defer rt.Store.Close()
comments, err := rt.Store.ListComments(ctx, threadID)
if err != nil {
return nil, localGHUnsupported(err)
}
return comments, nil
}
func ghCommentsJSONValue(comments []store.Comment) []map[string]any {
out := make([]map[string]any, 0, len(comments))
for _, comment := range comments {
out = append(out, map[string]any{
"id": comment.GitHubID,
"author": map[string]any{"login": comment.AuthorLogin, "type": comment.AuthorType},
"body": comment.Body,
"createdAt": comment.CreatedAtGitHub,
"updatedAt": comment.UpdatedAtGitHub,
})
}
return out
}
func ghPRDetailJSONValue(thread store.Thread, cache store.PullRequestCache, field string) (any, error) {
switch field {
case "files":
files := make([]map[string]any, 0, len(cache.Files))
for _, file := range cache.Files {
files = append(files, map[string]any{
"path": file.Path,
"additions": file.Additions,
"deletions": file.Deletions,
"status": file.Status,
})
}
return files, nil
case "commits":
commits := make([]map[string]any, 0, len(cache.Commits))
for _, commit := range cache.Commits {
headline := commit.Message
if index := strings.IndexByte(headline, '\n'); index >= 0 {
headline = headline[:index]
}
commits = append(commits, map[string]any{
"oid": commit.SHA,
"messageHeadline": headline,
"messageBody": commit.Message,
"authoredDate": commit.CommittedAt,
"url": commit.HTMLURL,
"authors": []map[string]any{{
"login": commit.AuthorLogin,
"name": commit.AuthorName,
}},
})
}
return commits, nil
case "statusCheckRollup":
return ghStatusCheckRollup(cache.Checks), nil
case "headRefName":
return cache.Detail.HeadRef, nil
case "headRefOid":
return cache.Detail.HeadSHA, nil
case "baseRefOid":
return cache.Detail.BaseSHA, nil
case "headRepositoryOwner":
owner := strings.Split(cache.Detail.HeadRepoFullName, "/")[0]
return map[string]any{"login": owner}, nil
case "headRepository":
return map[string]any{"nameWithOwner": cache.Detail.HeadRepoFullName}, nil
case "mergeStateStatus":
return strings.ToUpper(cache.Detail.MergeableState), nil
case "additions":
return cache.Detail.Additions, nil
case "deletions":
return cache.Detail.Deletions, nil
case "changedFiles":
return cache.Detail.ChangedFiles, nil
case "isDraft":
return thread.IsDraft, nil
default:
return nil, fmt.Errorf("unsupported --json field %q", field)
}
}
func ghStatusCheckRollup(checks []store.PullRequestCheck) []map[string]any {
out := make([]map[string]any, 0, len(checks))
for _, check := range checks {
state := strings.ToUpper(firstNonEmpty(check.Conclusion, check.Status))
out = append(out, map[string]any{
"__typename": "CheckRun",
"name": check.Name,
"status": strings.ToUpper(check.Status),
"conclusion": strings.ToUpper(check.Conclusion),
"state": state,
"detailsUrl": check.DetailsURL,
"workflowName": check.WorkflowName,
"startedAt": check.StartedAt,
"completedAt": check.CompletedAt,
})
}
return out
}
func (a *App) runGHPRChecks(ctx context.Context, args []string) error {
if hasAnyGHFlag(args, "--watch", "--web") {
return localGHUnsupported(fmt.Errorf("interactive PR checks flags require live gh"))
}
fs := flag.NewFlagSet("pr checks", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"R": true, "repo": true, "json": true, "jq": true})); err != nil {
return usageErr(err)
}
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("gh pr checks requires a number or GitHub URL"))
}
ref, _ := parseThreadReference(fs.Arg(0))
number, err := parseThreadNumber(fs.Arg(0))
if err != nil {
return usageErr(err)
}
repoArg := firstNonEmpty(*repoShort, *repoLong)
if repoArg == "" {
repoArg = ref.FullName()
}
repoValue, err := a.resolveGHRepo(ctx, repoArg)
if err != nil {
return localGHUnsupported(err)
}
cache, err := a.ensureFreshGHPullRequestCache(ctx, repoValue, number)
if err != nil {
return err
}
if len(cache.Checks) == 0 {
return localGHUnsupported(fmt.Errorf("cached PR checks are empty"))
}
if strings.TrimSpace(*jsonFieldsRaw) != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
fields := firstNonEmpty(strings.TrimSpace(*jsonFieldsRaw), "name,state,conclusion,detailsUrl,workflow")
rows := ghPRChecksJSONRows(cache.Checks, fields)
return a.writeJSONValue(rows, strings.TrimSpace(*jqRaw))
}
for _, check := range cache.Checks {
if _, err := fmt.Fprintf(a.Stdout, "%s\t%s\t%s\t%s\n", check.Name, check.Status, check.Conclusion, check.DetailsURL); err != nil {
return err
}
}
return nil
}
func ghPRChecksJSONRows(checks []store.PullRequestCheck, fieldsRaw string) []map[string]any {
fields := parseJSONFields(fieldsRaw)
rows := make([]map[string]any, 0, len(checks))
for _, check := range checks {
row := make(map[string]any, len(fields))
for _, field := range fields {
switch field {
case "name":
row[field] = check.Name
case "state":
row[field] = strings.ToUpper(firstNonEmpty(check.Conclusion, check.Status))
case "status":
row[field] = check.Status
case "conclusion":
row[field] = check.Conclusion
case "detailsUrl", "link":
row[field] = check.DetailsURL
case "workflow":
row[field] = check.WorkflowName
case "startedAt":
row[field] = check.StartedAt
case "completedAt":
row[field] = check.CompletedAt
}
}
rows = append(rows, row)
}
return rows
}

View File

@ -0,0 +1,169 @@
package cli
import (
"context"
"flag"
"fmt"
"io"
"strconv"
"strings"
"github.com/openclaw/gitcrawl/internal/store"
)
func (a *App) runGHRunList(ctx context.Context, args []string) error {
if hasAnyGHFlag(args, "--web") {
return localGHUnsupported(fmt.Errorf("web workflow run flags require live gh"))
}
fs := flag.NewFlagSet("run list", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
branchRaw := fs.String("branch", "", "branch")
commitRaw := fs.String("commit", "", "head sha")
limitRaw := fs.String("limit", "", "maximum rows")
limitShortRaw := fs.String("L", "", "maximum rows")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{
"R": true, "repo": true, "branch": true, "commit": true, "limit": true, "L": true, "json": true, "jq": true,
})); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("unexpected gh run list arguments: %s", strings.Join(fs.Args(), " ")))
}
limit, err := parseGHSearchLimit(*limitRaw, *limitShortRaw)
if err != nil {
return usageErr(err)
}
repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong))
if err != nil {
return localGHUnsupported(err)
}
branch := strings.TrimSpace(*branchRaw)
if branch != "" && strings.TrimSpace(*commitRaw) == "" {
if number, findErr := a.findGHPullRequestNumberByBranch(ctx, repoValue, branch); findErr == nil {
if _, hydrateErr := a.ensureFreshGHPullRequestCache(ctx, repoValue, number); hydrateErr != nil {
return hydrateErr
}
}
}
runs, err := a.localGHWorkflowRuns(ctx, repoValue, store.WorkflowRunListOptions{
Branch: branch,
HeadSHA: strings.TrimSpace(*commitRaw),
Limit: limit,
})
if err != nil {
return err
}
if len(runs) == 0 {
return localGHUnsupported(fmt.Errorf("no cached workflow runs"))
}
if strings.TrimSpace(*jsonFieldsRaw) != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
fields := firstNonEmpty(strings.TrimSpace(*jsonFieldsRaw), "databaseId,workflowName,status,conclusion,url,createdAt,updatedAt")
return a.writeJSONValue(ghWorkflowRunJSONRows(runs, fields), strings.TrimSpace(*jqRaw))
}
for _, run := range runs {
if _, err := fmt.Fprintf(a.Stdout, "%s\t%s\t%s\t%s\n", run.RunID, run.WorkflowName, run.Status, run.HTMLURL); err != nil {
return err
}
}
return nil
}
func (a *App) runGHRunView(ctx context.Context, args []string) error {
if hasAnyGHFlag(args, "--web", "--log", "--log-failed") {
return localGHUnsupported(fmt.Errorf("workflow run logs require live gh"))
}
fs := flag.NewFlagSet("run view", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"R": true, "repo": true, "json": true, "jq": true})); err != nil {
return usageErr(err)
}
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("gh run view requires a run id"))
}
runID := strings.TrimSpace(fs.Arg(0))
repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong))
if err != nil {
return localGHUnsupported(err)
}
runs, err := a.localGHWorkflowRuns(ctx, repoValue, store.WorkflowRunListOptions{Limit: 100})
if err != nil {
return err
}
for _, run := range runs {
if run.RunID != runID {
continue
}
if strings.TrimSpace(*jsonFieldsRaw) != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
fields := firstNonEmpty(strings.TrimSpace(*jsonFieldsRaw), "databaseId,workflowName,status,conclusion,url,createdAt,updatedAt")
return a.writeJSONValue(ghWorkflowRunJSONRows([]store.WorkflowRun{run}, fields)[0], strings.TrimSpace(*jqRaw))
}
_, err := fmt.Fprintf(a.Stdout, "run: %s\nworkflow: %s\nstatus: %s\nurl: %s\n", run.RunID, run.WorkflowName, run.Status, run.HTMLURL)
return err
}
return localGHUnsupported(fmt.Errorf("cached workflow run %s was not found", runID))
}
func (a *App) localGHWorkflowRuns(ctx context.Context, repoValue string, options store.WorkflowRunListOptions) ([]store.WorkflowRun, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return nil, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return nil, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return nil, localGHUnsupported(err)
}
return rt.Store.ListWorkflowRuns(ctx, repo.ID, options)
}
func ghWorkflowRunJSONRows(runs []store.WorkflowRun, fieldsRaw string) []map[string]any {
fields := parseJSONFields(fieldsRaw)
rows := make([]map[string]any, 0, len(runs))
for _, run := range runs {
row := make(map[string]any, len(fields))
for _, field := range fields {
switch field {
case "databaseId", "id":
if id, err := strconv.ParseInt(run.RunID, 10, 64); err == nil {
row[field] = id
} else {
row[field] = run.RunID
}
case "number":
row[field] = run.RunNumber
case "workflowName", "name", "displayTitle":
row[field] = run.WorkflowName
case "status":
row[field] = run.Status
case "conclusion":
row[field] = run.Conclusion
case "url":
row[field] = run.HTMLURL
case "event":
row[field] = run.Event
case "headBranch":
row[field] = run.HeadBranch
case "headSha":
row[field] = run.HeadSHA
case "createdAt":
row[field] = run.CreatedAtGH
case "updatedAt":
row[field] = run.UpdatedAtGH
}
}
rows = append(rows, row)
}
return rows
}

View File

@ -0,0 +1,447 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestGHShimSearchAcceptsGHFlags(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{
"--config", configPath,
"gh", "search", "issues", "hot loop",
"-R", "openclaw/openclaw",
"--state", "open",
"--match", "comments",
"--sort", "updated",
"--order", "desc",
"--json", "number,title,state,url",
"--limit", "10",
}); err != nil {
t.Fatalf("gh shim search: %v", err)
}
var rows []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &rows); err != nil {
t.Fatalf("decode search: %v\n%s", err, stdout.String())
}
if len(rows) != 1 || int(rows[0]["number"].(float64)) != 10 {
t.Fatalf("rows = %#v", rows)
}
}
func TestGHShimFallsBackForUnsupportedRead(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "unsupportedField"}); err != nil {
t.Fatalf("fallback: %v", err)
}
if got := strings.TrimSpace(stdout.String()); got != "fallback:pr view 12 -R openclaw/openclaw --json unsupportedField" {
t.Fatalf("fallback output = %q", got)
}
}
func TestGHShimFallsBackForEmptyOpenIssueListWithoutBroadSync(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimEmptyRepo(t, ctx)
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number"}); err != nil {
t.Fatalf("fallback: %v", err)
}
if got := strings.TrimSpace(stdout.String()); got != "fallback:issue list -R openclaw/openclaw --state open --json number" {
t.Fatalf("fallback output = %q", got)
}
}
func TestGHShimSearchFallsBackForEmptyOpenRepoWithoutBroadSync(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimEmptyRepo(t, ctx)
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "search", "issues", "-R", "openclaw/openclaw", "--state", "open", "--json", "number"}); err != nil {
t.Fatalf("fallback: %v", err)
}
if got := strings.TrimSpace(stdout.String()); got != "fallback:search issues -R openclaw/openclaw --state open --json number" {
t.Fatalf("fallback output = %q", got)
}
}
func TestGHShimAutoHydratePortableStoreWritesRuntimeMirror(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remoteDir := filepath.Join(dir, "remote")
checkoutDir := filepath.Join(dir, "checkout")
dbRel := filepath.Join("data", "openclaw__openclaw.sync.db")
if err := os.MkdirAll(filepath.Join(remoteDir, "data"), 0o755); err != nil {
t.Fatalf("mkdir remote data: %v", err)
}
if err := runGit(ctx, remoteDir, "init", "-b", "main"); err != nil {
t.Fatalf("git init: %v", err)
}
seedPortableThread(t, filepath.Join(remoteDir, dbRel), 1, "portable issue")
if err := runGit(ctx, remoteDir, "add", dbRel); err != nil {
t.Fatalf("git add seed: %v", err)
}
if err := runGit(ctx, remoteDir, "-c", "user.email=test@example.com", "-c", "user.name=Test", "commit", "-m", "seed store"); err != nil {
t.Fatalf("git commit seed: %v", err)
}
if _, err := syncPortableStore(ctx, remoteDir, checkoutDir); err != nil {
t.Fatalf("clone portable store: %v", err)
}
configPath := filepath.Join(dir, "config.toml")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", filepath.Join(checkoutDir, dbRel)}); err != nil {
t.Fatalf("init config: %v", err)
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/repos/openclaw/openclaw":
_ = json.NewEncoder(w).Encode(map[string]any{"id": 101, "full_name": "openclaw/openclaw"})
case "/repos/openclaw/openclaw/issues/2":
_ = json.NewEncoder(w).Encode(map[string]any{
"id": 502,
"number": 2,
"state": "open",
"title": "runtime-only issue",
"body": "hydrate into runtime mirror",
"html_url": "https://github.com/openclaw/openclaw/issues/2",
"created_at": "2026-05-08T00:00:00Z",
"updated_at": "2026-05-08T00:00:00Z",
"labels": []map[string]any{},
"assignees": []map[string]any{},
"user": map[string]any{"login": "alice", "type": "User"},
})
default:
t.Fatalf("unexpected path: %s", r.URL.String())
}
}))
defer server.Close()
t.Setenv("GITHUB_TOKEN", "test-token")
t.Setenv("GITCRAWL_GITHUB_BASE_URL", server.URL)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "view", "2", "-R", "openclaw/openclaw", "--json", "number,title"}); err != nil {
t.Fatalf("gh issue view: %v", err)
}
if !strings.Contains(stdout.String(), `"number": 2`) || !strings.Contains(stdout.String(), "runtime-only issue") {
t.Fatalf("view output = %q", stdout.String())
}
if !gitWorktreeClean(ctx, checkoutDir) {
t.Fatal("auto-hydrate dirtied portable checkout")
}
assertPortableThreadPresence(t, ctx, filepath.Join(checkoutDir, dbRel), 2, false)
mirrorPath, err := run.portableRuntimeDBPath(filepath.Join(checkoutDir, dbRel))
if err != nil {
t.Fatalf("runtime db path: %v", err)
}
assertPortableThreadPresence(t, ctx, mirrorPath, 2, true)
}
func TestGHShimViewAcceptsFullGitHubURL(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{
"--config", configPath,
"gh", "issue", "view", "https://github.com/openclaw/openclaw/issues/10",
"--json", "number,title,url",
}); err != nil {
t.Fatalf("gh issue view URL: %v", err)
}
var row map[string]any
if err := json.Unmarshal(stdout.Bytes(), &row); err != nil {
t.Fatalf("decode issue view: %v\n%s", err, stdout.String())
}
if int(row["number"].(float64)) != 10 || row["url"] != "https://github.com/openclaw/openclaw/issues/10" {
t.Fatalf("row = %#v", row)
}
}
func seedGHShimEmptyRepo(t *testing.T, ctx context.Context) string {
t.Helper()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
cfg.CacheDir = filepath.Join(dir, "cache")
if err := config.Save(configPath, cfg); err != nil {
t.Fatalf("save config: %v", err)
}
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
repoID, err := st.UpsertRepository(ctx, store.Repository{
Owner: "openclaw",
Name: "openclaw",
FullName: "openclaw/openclaw",
RawJSON: "{}",
UpdatedAt: "2026-05-08T00:00:00Z",
})
if err != nil {
t.Fatalf("seed repository: %v", err)
}
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
Scope: "numbers:13",
Status: "success",
StartedAt: "2026-05-08T00:00:00Z",
FinishedAt: "2026-05-08T00:00:01Z",
}); err != nil {
t.Fatalf("record targeted sync: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
return configPath
}
func assertPortableThreadPresence(t *testing.T, ctx context.Context, dbPath string, number int, want bool) {
t.Helper()
st, err := store.OpenReadOnly(ctx, dbPath)
if err != nil {
t.Fatalf("open store %s: %v", dbPath, err)
}
defer st.Close()
repo, err := st.RepositoryByFullName(ctx, "openclaw/openclaw")
if err != nil {
t.Fatalf("repository %s: %v", dbPath, err)
}
threads, err := st.ListThreadsFiltered(ctx, store.ThreadListOptions{RepoID: repo.ID, IncludeClosed: true, Numbers: []int{number}})
if err != nil {
t.Fatalf("list threads %s: %v", dbPath, err)
}
got := len(threads) > 0
if got != want {
t.Fatalf("thread %d presence in %s = %v, want %v", number, dbPath, got, want)
}
}
func seedGHShimRepo(t *testing.T, ctx context.Context) string {
t.Helper()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
cfg.CacheDir = filepath.Join(dir, "cache")
if err := config.Save(configPath, cfg); err != nil {
t.Fatalf("save config: %v", err)
}
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
repoID, err := st.UpsertRepository(ctx, store.Repository{
Owner: "openclaw",
Name: "openclaw",
FullName: "openclaw/openclaw",
RawJSON: "{}",
UpdatedAt: "2026-04-27T00:00:00Z",
})
if err != nil {
t.Fatalf("seed repository: %v", err)
}
issueID, err := st.UpsertThread(ctx, store.Thread{
RepoID: repoID,
GitHubID: "10",
Number: 10,
Kind: "issue",
State: "open",
Title: "Hot loop burns CPU",
Body: "the runtime has a hot loop",
AuthorLogin: "alice",
AuthorType: "User",
HTMLURL: "https://github.com/openclaw/openclaw/issues/10",
LabelsJSON: `[{"name":"bug","color":"d73a4a"}]`,
AssigneesJSON: `[{"login":"peter"}]`,
RawJSON: "{}",
ContentHash: "issue-10",
UpdatedAtGitHub: "2026-04-27T01:00:00Z",
UpdatedAt: "2026-04-27T01:00:00Z",
})
if err != nil {
t.Fatalf("seed issue: %v", err)
}
if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: issueID, Title: "Hot loop burns CPU", RawText: "runtime hot loop burns CPU", DedupeText: "runtime hot loop burns cpu", UpdatedAt: "2026-04-27T01:00:00Z"}); err != nil {
t.Fatalf("seed issue document: %v", err)
}
if _, err := st.UpsertComment(ctx, store.Comment{
ThreadID: issueID,
GitHubID: "1001",
CommentType: "issue_comment",
AuthorLogin: "carol",
AuthorType: "User",
Body: "same hot loop here",
RawJSON: "{}",
CreatedAtGitHub: "2026-04-27T01:10:00Z",
UpdatedAtGitHub: "2026-04-27T01:10:00Z",
}); err != nil {
t.Fatalf("seed issue comment: %v", err)
}
prID, err := st.UpsertThread(ctx, store.Thread{
RepoID: repoID,
GitHubID: "12",
Number: 12,
Kind: "pull_request",
State: "open",
Title: "Manifest cache update",
AuthorLogin: "bob",
AuthorType: "User",
HTMLURL: "https://github.com/openclaw/openclaw/pull/12",
LabelsJSON: "[]",
AssigneesJSON: "[]",
RawJSON: `{"head":{"sha":"abc123"}}`,
ContentHash: "pr-12",
IsDraft: true,
UpdatedAtGitHub: "2026-04-27T02:00:00Z",
UpdatedAt: "2026-04-27T02:00:00Z",
})
if err != nil {
t.Fatalf("seed pr: %v", err)
}
if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: prID, Title: "Manifest cache update", RawText: "manifest cache refresh", DedupeText: "manifest cache refresh", UpdatedAt: "2026-04-27T02:00:00Z"}); err != nil {
t.Fatalf("seed pr document: %v", err)
}
if _, err := st.UpsertComment(ctx, store.Comment{
ThreadID: prID,
GitHubID: "1201",
CommentType: "review_comment",
AuthorLogin: "dana",
AuthorType: "User",
Body: "cache path looks good",
RawJSON: "{}",
CreatedAtGitHub: "2026-04-27T02:10:00Z",
UpdatedAtGitHub: "2026-04-27T02:10:00Z",
}); err != nil {
t.Fatalf("seed pr comment: %v", err)
}
fetchedAt := time.Now().UTC().Format(time.RFC3339Nano)
if err := st.UpsertPullRequestCache(ctx, store.PullRequestDetail{
ThreadID: prID,
RepoID: repoID,
Number: 12,
BaseSHA: "base123",
HeadSHA: "abc123",
HeadRef: "manifest-cache",
HeadRepoFullName: "openclaw/openclaw",
MergeableState: "clean",
Additions: 10,
Deletions: 2,
ChangedFiles: 1,
RawJSON: `{"head":{"sha":"abc123"}}`,
FetchedAt: fetchedAt,
UpdatedAt: fetchedAt,
}, []store.PullRequestFile{{
ThreadID: prID,
Path: "internal/cache.go",
Status: "modified",
Additions: 10,
Deletions: 2,
Changes: 12,
RawJSON: "{}",
FetchedAt: fetchedAt,
}}, []store.PullRequestCommit{{
ThreadID: prID,
SHA: "commit123",
Message: "feat: cache",
AuthorLogin: "alice",
AuthorName: "Alice",
CommittedAt: "2026-04-27T01:00:00Z",
HTMLURL: "https://github.com/openclaw/openclaw/commit/commit123",
RawJSON: "{}",
FetchedAt: fetchedAt,
}}, []store.PullRequestCheck{{
ThreadID: prID,
Name: "test",
Status: "completed",
Conclusion: "success",
DetailsURL: "https://github.com/openclaw/openclaw/actions/runs/99",
WorkflowName: "CI",
RawJSON: "{}",
FetchedAt: fetchedAt,
}}, []store.WorkflowRun{{
RepoID: repoID,
RunID: "99",
RunNumber: 7,
HeadBranch: "manifest-cache",
HeadSHA: "abc123",
Status: "completed",
Conclusion: "success",
WorkflowName: "CI",
Event: "pull_request",
HTMLURL: "https://github.com/openclaw/openclaw/actions/runs/99",
CreatedAtGH: "2026-04-27T01:00:00Z",
UpdatedAtGH: "2026-04-27T02:00:00Z",
RawJSON: "{}",
FetchedAt: fetchedAt,
}}); err != nil {
t.Fatalf("seed pr cache: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
return configPath
}

View File

@ -0,0 +1,375 @@
package cli
import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
type ghCommandCacheStats struct {
CacheDir string `json:"cache_dir"`
Entries int `json:"entries"`
Expired int `json:"expired"`
Locks int `json:"locks"`
Bytes int64 `json:"bytes"`
Since string `json:"since,omitempty"`
CacheHits int64 `json:"cache_hits"`
TotalReads int64 `json:"total_reads"`
HitRatePercent float64 `json:"hit_rate_percent"`
Counters ghXCacheCounters `json:"counters"`
CumulativeCounters *ghXCacheCounters `json:"cumulative_counters,omitempty"`
Commands map[string]ghCommandCacheCount `json:"commands"`
}
type ghCommandCacheCount struct {
Entries int `json:"entries"`
Bytes int64 `json:"bytes"`
}
type ghCommandCacheKeyInfo struct {
Key string `json:"key"`
CreatedAt time.Time `json:"created_at"`
Age string `json:"age"`
Command string `json:"command"`
Args []string `json:"args"`
Tags []string `json:"tags,omitempty"`
Bytes int64 `json:"bytes"`
Expired bool `json:"expired"`
}
func (a *App) runGHXCache(args []string) error {
if len(args) == 0 {
return usageErr(fmt.Errorf("usage: gh xcache <stats|keys|gc|flush|reset|snapshot>"))
}
fs := flag.NewFlagSet("xcache "+args[0], flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
sinceRaw := fs.String("since", "", "show stats for the recent duration (stats only)")
resetAfterSnapshot := fs.Bool("reset", false, "reset counters after writing a snapshot (snapshot only)")
if err := fs.Parse(args[1:]); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
switch args[0] {
case "stats":
var since time.Duration
if strings.TrimSpace(*sinceRaw) != "" {
parsed, err := time.ParseDuration(strings.TrimSpace(*sinceRaw))
if err != nil || parsed <= 0 {
return usageErr(fmt.Errorf("invalid --since duration %q", *sinceRaw))
}
since = parsed
}
return a.runGHXCacheStats(since)
case "keys":
return a.runGHXCacheKeys()
case "gc":
return a.runGHXCacheGC()
case "flush":
return a.runGHXCacheFlush()
case "reset":
return a.runGHXCacheReset()
case "snapshot":
return a.runGHXCacheSnapshot(*resetAfterSnapshot)
default:
return usageErr(fmt.Errorf("unknown xcache command %q", args[0]))
}
}
func (a *App) runGHXCacheStats(since time.Duration) error {
stats, err := a.ghCommandCacheStats(since)
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(stats, "")
}
_, err = fmt.Fprintf(a.Stdout, "Cache Dir: %s\nEntries: %d\nExpired: %d\nLocks: %d\nBytes: %d\n",
stats.CacheDir, stats.Entries, stats.Expired, stats.Locks, stats.Bytes)
if err != nil {
return err
}
if len(stats.Commands) > 0 {
_, _ = fmt.Fprintln(a.Stdout, "\nCommands:")
for command, count := range stats.Commands {
_, _ = fmt.Fprintf(a.Stdout, " %-16s %d entries / %d bytes\n", command, count.Entries, count.Bytes)
}
}
if stats.Since != "" {
_, _ = fmt.Fprintf(a.Stdout, "\nSince: %s\n", stats.Since)
}
_, _ = fmt.Fprintf(a.Stdout, "\nCounters:\n local hits: %d\n fallback hits: %d\n stale hits: %d\n backend misses: %d\n pass-through writes: %d\n hit rate: %.1f%% (%d/%d reads)\n",
stats.Counters.LocalHits, stats.Counters.FallbackHits, stats.Counters.StaleHits, stats.Counters.BackendMisses, stats.Counters.PassThroughWrites,
stats.HitRatePercent, stats.CacheHits, stats.TotalReads)
printGHXCacheMisses(a.Stdout, "Backend Misses by Command", stats.Counters.BackendMissesByCommand)
printGHXCacheMisses(a.Stdout, "Backend Misses by Route", stats.Counters.BackendMissesByRoute)
printGHXCacheMisses(a.Stdout, "Backend Misses by Key", stats.Counters.BackendMissesByKey)
return nil
}
func printGHXCacheMisses(w io.Writer, title string, misses map[string]int64) {
if len(misses) == 0 {
return
}
type row struct {
name string
count int64
}
rows := make([]row, 0, len(misses))
for name, count := range misses {
rows = append(rows, row{name: name, count: count})
}
sort.Slice(rows, func(i, j int) bool {
if rows[i].count == rows[j].count {
return rows[i].name < rows[j].name
}
return rows[i].count > rows[j].count
})
_, _ = fmt.Fprintf(w, "\n%s:\n", title)
for index, row := range rows {
if index >= 10 {
break
}
_, _ = fmt.Fprintf(w, " %-40s %d\n", row.name, row.count)
}
}
func (a *App) runGHXCacheKeys() error {
keys, err := a.ghCommandCacheKeys()
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(keys, "")
}
for _, key := range keys {
if _, err := fmt.Fprintf(a.Stdout, "%s\t%s\t%s\t%s\n", key.Key, key.Age, key.Command, strings.Join(key.Args, " ")); err != nil {
return err
}
}
return nil
}
func (a *App) runGHXCacheFlush() error {
removed, err := a.clearGHCommandCacheCount()
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(map[string]any{"removed": removed}, "")
}
_, err = fmt.Fprintf(a.Stdout, "Flushed %d cache entrie(s)\n", removed)
return err
}
func (a *App) runGHXCacheReset() error {
if err := a.resetGHXCacheCounters(); err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(map[string]any{"reset": true}, "")
}
_, err := fmt.Fprintln(a.Stdout, "Reset xcache counters")
return err
}
type ghCommandCacheSnapshotResult struct {
SnapshotPath string `json:"snapshot_path"`
Reset bool `json:"reset"`
}
func (a *App) runGHXCacheSnapshot(reset bool) error {
stats, err := a.ghCommandCacheStats(0)
if err != nil {
return err
}
dir, err := a.ghCommandCacheDir()
if err != nil {
return err
}
snapshotDir := filepath.Join(dir, "_snapshots")
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
return err
}
path := filepath.Join(snapshotDir, time.Now().UTC().Format("20060102T150405Z")+".json")
data, err := json.MarshalIndent(stats, "", " ")
if err != nil {
return err
}
if err := writeAtomicFile(path, data, 0o600); err != nil {
return err
}
if reset {
if err := a.resetGHXCacheCounters(); err != nil {
return err
}
}
result := ghCommandCacheSnapshotResult{SnapshotPath: path, Reset: reset}
if a.format == FormatJSON {
return a.writeJSONValue(result, "")
}
_, err = fmt.Fprintf(a.Stdout, "Wrote xcache snapshot: %s\n", path)
if err == nil && reset {
_, err = fmt.Fprintln(a.Stdout, "Reset xcache counters")
}
return err
}
type ghCommandCacheGCResult struct {
Removed int `json:"removed"`
LocksRemoved int `json:"locks_removed"`
}
func (a *App) runGHXCacheGC() error {
result, err := a.gcGHCommandCache()
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(result, "")
}
_, err = fmt.Fprintf(a.Stdout, "Removed %d expired entrie(s), %d stale lock(s)\n", result.Removed, result.LocksRemoved)
return err
}
func (a *App) ghCommandCacheStats(since time.Duration) (ghCommandCacheStats, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return ghCommandCacheStats{}, err
}
keys, locks, err := a.collectGHCommandCacheKeys(dir)
if err != nil {
return ghCommandCacheStats{}, err
}
counters, _ := a.ghXCacheCounters()
cumulative := counters
stats := ghCommandCacheStats{CacheDir: dir, Locks: locks, Counters: counters, Commands: map[string]ghCommandCacheCount{}}
if since > 0 {
stats.Since = since.String()
stats.CumulativeCounters = &cumulative
stats.Counters = counters.since(since, time.Now())
}
stats.CacheHits = stats.Counters.LocalHits + stats.Counters.FallbackHits + stats.Counters.StaleHits
stats.TotalReads = stats.CacheHits + stats.Counters.BackendMisses
if stats.TotalReads > 0 {
stats.HitRatePercent = float64(stats.CacheHits) / float64(stats.TotalReads) * 100
}
for _, key := range keys {
if key.Expired {
stats.Expired++
} else {
stats.Entries++
}
stats.Bytes += key.Bytes
count := stats.Commands[key.Command]
count.Entries++
count.Bytes += key.Bytes
stats.Commands[key.Command] = count
}
return stats, nil
}
func (a *App) gcGHCommandCache() (ghCommandCacheGCResult, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return ghCommandCacheGCResult{}, err
}
entries, err := os.ReadDir(dir)
if err != nil {
return ghCommandCacheGCResult{}, err
}
var result ghCommandCacheGCResult
for _, entry := range entries {
name := entry.Name()
path := filepath.Join(dir, name)
if strings.HasSuffix(name, ".lock") {
info, err := entry.Info()
if err == nil && staleGHCommandCacheLock(info) {
if err := os.Remove(path); err == nil {
result.LocksRemoved++
}
}
continue
}
if !entry.Type().IsRegular() || !isGHCommandCacheEntryFile(name) {
continue
}
key, ok := ghCommandCacheKeyInfoFromDirEntry(dir, entry)
if ok && key.Expired {
if err := os.Remove(path); err == nil {
result.Removed++
}
}
}
return result, nil
}
func (a *App) ghCommandCacheKeys() ([]ghCommandCacheKeyInfo, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return nil, err
}
keys, _, err := a.collectGHCommandCacheKeys(dir)
return keys, err
}
func (a *App) collectGHCommandCacheKeys(dir string) ([]ghCommandCacheKeyInfo, int, error) {
entries, err := os.ReadDir(dir)
if err != nil {
return nil, 0, err
}
keys := make([]ghCommandCacheKeyInfo, 0)
locks := 0
for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".lock") {
locks++
continue
}
if !entry.Type().IsRegular() || !isGHCommandCacheEntryFile(name) {
continue
}
key, ok := ghCommandCacheKeyInfoFromDirEntry(dir, entry)
if ok {
keys = append(keys, key)
}
}
sort.Slice(keys, func(i, j int) bool {
return keys[i].CreatedAt.After(keys[j].CreatedAt)
})
return keys, locks, nil
}
func ghCommandCacheKeyInfoFromDirEntry(dir string, entry os.DirEntry) (ghCommandCacheKeyInfo, bool) {
name := entry.Name()
info, err := entry.Info()
if err != nil {
return ghCommandCacheKeyInfo{}, false
}
data, err := os.ReadFile(filepath.Join(dir, name))
if err != nil {
return ghCommandCacheKeyInfo{}, false
}
var cached ghCommandCacheEntry
if err := json.Unmarshal(data, &cached); err != nil {
return ghCommandCacheKeyInfo{}, false
}
ttl := ghCommandCacheTTL(cached.Args)
age := time.Since(cached.CreatedAt)
return ghCommandCacheKeyInfo{
Key: strings.TrimSuffix(name, ".json"),
CreatedAt: cached.CreatedAt,
Age: age.Round(time.Second).String(),
Command: ghCommandName(cached.Args),
Args: cached.Args,
Tags: cached.Tags,
Bytes: info.Size(),
Expired: cached.CreatedAt.IsZero() || age > ghCommandCacheEntryTTL(cached, ttl),
}, true
}

View File

@ -0,0 +1,90 @@
package cli
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/config"
)
func (a *App) resolveGitHubToken(ctx context.Context, cfg config.Config) config.TokenResolution {
token := config.ResolveGitHubToken(cfg)
if token.Value != "" {
return token
}
if value, err := a.githubAuthToken(ctx); err == nil && value != "" {
return config.TokenResolution{Value: value, Source: "gh auth token"}
}
return token
}
func (a *App) githubAuthToken(ctx context.Context) (string, error) {
candidates := candidateRealGHPaths()
var lastErr error
for _, candidate := range candidates {
if !usableRealGHPath(candidate) {
continue
}
tokenCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
cmd := exec.CommandContext(tokenCtx, candidate, "auth", "token")
out, err := cmd.Output()
cancel()
if err != nil {
lastErr = err
continue
}
if token := strings.TrimSpace(string(out)); token != "" {
return token, nil
}
}
if lastErr != nil {
return "", lastErr
}
return "", fmt.Errorf("real gh not found")
}
func candidateRealGHPaths() []string {
var paths []string
if envPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH")); envPath != "" {
paths = append(paths, envPath)
}
paths = append(paths,
"/opt/homebrew/opt/gh/bin/gh",
"/usr/local/bin/gh",
"/usr/bin/gh",
)
if lookPath, err := exec.LookPath("gh"); err == nil {
paths = append(paths, lookPath)
}
seen := map[string]bool{}
unique := paths[:0]
for _, path := range paths {
if path = strings.TrimSpace(path); path != "" && !seen[path] && !isGitcrawlShimPath(path) {
seen[path] = true
unique = append(unique, path)
}
}
return unique
}
func usableRealGHPath(path string) bool {
info, err := os.Stat(path)
if err != nil || info.IsDir() || info.Mode()&0111 == 0 {
return false
}
exe, err := os.Executable()
if err != nil {
return true
}
candidateReal, candidateErr := filepath.EvalSymlinks(path)
exeReal, exeErr := filepath.EvalSymlinks(exe)
if candidateErr == nil && exeErr == nil && candidateReal == exeReal {
return false
}
return true
}

View File

@ -0,0 +1,25 @@
package cli
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/openclaw/gitcrawl/internal/config"
)
func TestResolveGitHubTokenFallsBackToGHAuthToken(t *testing.T) {
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\nif [ \"$1\" = auth ] && [ \"$2\" = token ]; then echo gh-fallback-token; exit 0; fi\nexit 1\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITHUB_TOKEN", "")
t.Setenv("GITCRAWL_GH_PATH", ghPath)
token := New().resolveGitHubToken(context.Background(), config.Default())
if token.Value != "gh-fallback-token" || token.Source != "gh auth token" {
t.Fatalf("token = %#v", token)
}
}

View File

@ -2,6 +2,8 @@ package cli
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
@ -22,6 +24,10 @@ type localRuntime struct {
}
const portableStoreRefreshTimeout = 15 * time.Second
const portableStoreRefreshTTL = 2 * time.Minute
const portableStoreRefreshFailureBackoff = time.Minute
var errPortableStoreDirty = errors.New("portable store checkout has local changes")
func (a *App) openLocalRuntime(ctx context.Context) (localRuntime, error) {
cfg, err := config.Load(a.configPath)
@ -88,7 +94,7 @@ func refreshPortableStoreForDB(ctx context.Context, dbPath string) error {
return nil
}
if !gitWorktreeClean(ctx, root) {
return nil
return errPortableStoreDirty
}
pullCtx, cancel := context.WithTimeout(ctx, portableStoreRefreshTimeout)
defer cancel()
@ -129,7 +135,7 @@ func refreshPortableRuntimeDB(ctx context.Context, sourceDBPath, mirrorPath stri
portableRuntimeMu.Lock()
defer portableRuntimeMu.Unlock()
if refresh {
_ = refreshPortableStoreForDB(ctx, sourceDBPath)
_ = refreshPortableStoreForDBIfDue(ctx, sourceDBPath, mirrorPath)
}
needsCopy, err := portableRuntimeNeedsCopy(sourceDBPath, mirrorPath)
if err != nil {
@ -144,6 +150,111 @@ func refreshPortableRuntimeDB(ctx context.Context, sourceDBPath, mirrorPath stri
return true, nil
}
type portableStoreRefreshState struct {
LastAttempt string `json:"last_attempt,omitempty"`
LastSuccess string `json:"last_success,omitempty"`
LastFailure string `json:"last_failure,omitempty"`
Error string `json:"error,omitempty"`
}
func refreshPortableStoreForDBIfDue(ctx context.Context, sourceDBPath, mirrorPath string) error {
ttl := portableStoreRefreshInterval()
statePath := portableStoreRefreshStatePath(mirrorPath)
state := readPortableStoreRefreshState(statePath)
now := time.Now().UTC()
if ttl > 0 && recentPortableRefresh(state.LastSuccess, now, ttl) {
return nil
}
if ttl > 0 && recentPortableRefresh(state.LastFailure, now, portableStoreRefreshFailureBackoff) {
return nil
}
lockPath := statePath + ".lock"
if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil {
return err
}
removeStalePortableRefreshLock(lockPath, now)
lock, locked := tryGHCommandCacheLock(lockPath)
if !locked {
return nil
}
defer func() {
_ = lock.Close()
_ = os.Remove(lockPath)
}()
state = readPortableStoreRefreshState(statePath)
now = time.Now().UTC()
if ttl > 0 && recentPortableRefresh(state.LastSuccess, now, ttl) {
return nil
}
state.LastAttempt = now.Format(time.RFC3339Nano)
err := refreshPortableStoreForDB(ctx, sourceDBPath)
if err != nil {
state.LastFailure = time.Now().UTC().Format(time.RFC3339Nano)
state.Error = err.Error()
_ = writePortableStoreRefreshState(statePath, state)
return err
}
state.LastSuccess = time.Now().UTC().Format(time.RFC3339Nano)
state.LastFailure = ""
state.Error = ""
return writePortableStoreRefreshState(statePath, state)
}
func removeStalePortableRefreshLock(path string, now time.Time) {
info, err := os.Stat(path)
if err != nil {
return
}
if now.Sub(info.ModTime()) <= 2*portableStoreRefreshTimeout {
return
}
_ = os.Remove(path)
}
func portableStoreRefreshInterval() time.Duration {
if raw := strings.TrimSpace(os.Getenv("GITCRAWL_PORTABLE_REFRESH_TTL")); raw != "" {
if duration, err := time.ParseDuration(raw); err == nil && duration >= 0 {
return duration
}
}
return portableStoreRefreshTTL
}
func portableStoreRefreshStatePath(mirrorPath string) string {
return filepath.Join(filepath.Dir(mirrorPath), ".portable-refresh.json")
}
func readPortableStoreRefreshState(path string) portableStoreRefreshState {
data, err := os.ReadFile(path)
if err != nil {
return portableStoreRefreshState{}
}
var state portableStoreRefreshState
if err := json.Unmarshal(data, &state); err != nil {
return portableStoreRefreshState{}
}
return state
}
func writePortableStoreRefreshState(path string, state portableStoreRefreshState) error {
data, err := json.Marshal(state)
if err != nil {
return err
}
return writeAtomicFile(path, data, 0o600)
}
func recentPortableRefresh(value string, now time.Time, maxAge time.Duration) bool {
if strings.TrimSpace(value) == "" {
return false
}
parsed, err := time.Parse(time.RFC3339Nano, value)
if err != nil {
return false
}
return now.Sub(parsed) <= maxAge
}
func portableRuntimeNeedsCopy(sourceDBPath, mirrorPath string) (bool, error) {
sourceInfo, err := os.Stat(sourceDBPath)
if err != nil {

View File

@ -0,0 +1,96 @@
package cli
import (
"context"
"os"
"path/filepath"
"testing"
"time"
)
func TestPortableRuntimeUtilityBranches(t *testing.T) {
dir := t.TempDir()
source := filepath.Join(dir, "source.db")
mirror := filepath.Join(dir, "runtime", "source.db")
if _, err := portableRuntimeNeedsCopy(source, mirror); err == nil {
t.Fatal("missing source should fail")
}
if err := os.WriteFile(source, []byte("v1"), 0o644); err != nil {
t.Fatalf("write source: %v", err)
}
needs, err := portableRuntimeNeedsCopy(source, mirror)
if err != nil || !needs {
t.Fatalf("missing mirror needs copy=%v err=%v", needs, err)
}
if err := copyFileAtomic(source, mirror); err != nil {
t.Fatalf("copy mirror: %v", err)
}
if err := os.WriteFile(mirror+"-wal", []byte("wal"), 0o644); err != nil {
t.Fatalf("write wal: %v", err)
}
if err := os.WriteFile(mirror+"-shm", []byte("shm"), 0o644); err != nil {
t.Fatalf("write shm: %v", err)
}
if err := os.Chtimes(mirror, time.Now().Add(time.Hour), time.Now().Add(time.Hour)); err != nil {
t.Fatalf("age mirror: %v", err)
}
needs, err = portableRuntimeNeedsCopy(source, mirror)
if err != nil || needs {
t.Fatalf("fresh mirror needs copy=%v err=%v", needs, err)
}
if err := copyFileAtomic(source, mirror); err != nil {
t.Fatalf("recopy mirror: %v", err)
}
if _, err := os.Stat(mirror + "-wal"); !os.IsNotExist(err) {
t.Fatalf("wal sidecar should be removed, err=%v", err)
}
if _, err := os.Stat(mirror + "-shm"); !os.IsNotExist(err) {
t.Fatalf("shm sidecar should be removed, err=%v", err)
}
statePath := portableStoreRefreshStatePath(mirror)
state := portableStoreRefreshState{LastAttempt: "attempt", LastSuccess: time.Now().UTC().Format(time.RFC3339Nano)}
if err := writePortableStoreRefreshState(statePath, state); err != nil {
t.Fatalf("write state: %v", err)
}
if got := readPortableStoreRefreshState(statePath); got.LastAttempt != "attempt" || got.LastSuccess == "" {
t.Fatalf("state = %+v", got)
}
if err := os.WriteFile(statePath, []byte("{"), 0o600); err != nil {
t.Fatalf("write invalid state: %v", err)
}
if got := readPortableStoreRefreshState(statePath); got.LastAttempt != "" {
t.Fatalf("invalid state should decode empty, got %+v", got)
}
now := time.Now().UTC()
if recentPortableRefresh("", now, time.Minute) || recentPortableRefresh("bad", now, time.Minute) || !recentPortableRefresh(now.Format(time.RFC3339Nano), now, time.Minute) {
t.Fatal("recent refresh classification mismatch")
}
lockPath := filepath.Join(dir, "refresh.lock")
if err := os.WriteFile(lockPath, []byte("123\n"), 0o600); err != nil {
t.Fatalf("write lock: %v", err)
}
removeStalePortableRefreshLock(lockPath, now)
if _, err := os.Stat(lockPath); err != nil {
t.Fatalf("fresh lock should remain: %v", err)
}
old := now.Add(-3 * portableStoreRefreshTimeout)
if err := os.Chtimes(lockPath, old, old); err != nil {
t.Fatalf("age lock: %v", err)
}
removeStalePortableRefreshLock(lockPath, now)
if _, err := os.Stat(lockPath); !os.IsNotExist(err) {
t.Fatalf("stale lock should be removed, err=%v", err)
}
t.Setenv("GITCRAWL_PORTABLE_REFRESH_TTL", "0")
if got := portableStoreRefreshInterval(); got != 0 {
t.Fatalf("zero ttl = %s", got)
}
t.Setenv("GITCRAWL_PORTABLE_REFRESH_TTL", "bad")
if got := portableStoreRefreshInterval(); got != portableStoreRefreshTTL {
t.Fatalf("bad ttl fallback = %s", got)
}
if err := refreshPortableStoreForDB(context.Background(), source); err != nil {
t.Fatalf("non-portable refresh should be no-op: %v", err)
}
}

View File

@ -9,7 +9,6 @@ import (
"regexp"
"runtime"
"sort"
"strconv"
"strings"
"time"
@ -1113,7 +1112,7 @@ func (m *clusterBrowserModel) startJumpInput() tea.Cmd {
m.showHelp = false
m.closeMenu("")
m.searchInput.Prompt = "# "
m.searchInput.Placeholder = "issue or PR number"
m.searchInput.Placeholder = "issue, PR, or GitHub URL"
m.searchInput.SetValue("")
m.status = "Jump to issue/PR"
return m.searchInput.Focus()
@ -1123,9 +1122,9 @@ func (m clusterBrowserModel) handleJumpKey(msg tea.KeyMsg) (clusterBrowserModel,
switch msg.String() {
case "enter":
m.jumping = false
value := strings.TrimPrefix(strings.TrimSpace(m.searchInput.Value()), "#")
value := strings.TrimSpace(m.searchInput.Value())
m.searchInput.Blur()
number, err := strconv.Atoi(value)
number, err := parseOptionalThreadNumber(value)
if err != nil || number <= 0 {
m.status = "Enter a positive issue or PR number"
return m, nil

View File

@ -0,0 +1,179 @@
package cli
import (
"context"
"strings"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestTUIRemainingActionAndErrorBranches(t *testing.T) {
thread := store.Thread{
ID: 1, Number: 10, Kind: "issue", State: "open", Title: "Thread title",
Body: "Body with https://example.com/docs", HTMLURL: "https://github.com/openclaw/openclaw/issues/10",
UpdatedAt: "2026-05-08T00:00:00Z",
}
cluster := store.ClusterSummary{
ID: 7, Source: store.ClusterSourceRun, StableSlug: "cluster-7", Status: "active",
Title: "Cluster title", RepresentativeNumber: 10, RepresentativeKind: "issue",
RepresentativeTitle: "Thread title", MemberCount: 1, UpdatedAt: "2026-05-08T00:00:00Z",
}
detail := store.ClusterDetail{
Cluster: cluster,
Members: []store.ClusterMemberDetail{{
Thread: thread,
Role: "member",
State: "active",
BodySnippet: "Body with https://example.com/docs",
Summaries: map[string]string{"problem_summary": "summary"},
}},
}
model := newClusterBrowserModel(context.Background(), nil, 0, clusterBrowserPayload{
Repository: "openclaw/openclaw",
Sort: "size",
MinSize: 1,
Clusters: []store.ClusterSummary{cluster},
})
model.detailCache[7] = detail
model.loadSelectedCluster()
model.memberIndex = 0
model.neighborCache[thread.ID] = []tuiNeighbor{{Thread: thread, Score: 0.9}}
for _, action := range []string{"sort-oldest", "member-sort-oldest", "toggle-closed", "close-menu"} {
if !model.runAction(action) {
t.Fatalf("action %s was not handled", action)
}
}
if model.payload.Sort != "oldest" || model.memberSort != memberSortOldest {
t.Fatalf("sort actions failed sort=%q member=%q", model.payload.Sort, model.memberSort)
}
t.Setenv("PATH", "")
errorActions := []string{
"open-cluster-representative",
"copy-cluster-url",
"copy-thread-detail",
"copy-body-preview",
"copy-summaries",
"copy-neighbors",
"copy-cluster-id",
"copy-cluster-name",
"copy-cluster-title",
"copy-member-list",
"copy-cluster",
"copy-visible-clusters",
"copy-reference-links",
"open",
"copy-url",
"copy-markdown",
"copy-title",
"open-first-link",
"copy-first-link",
}
for _, action := range errorActions {
model.status = ""
handled := model.runMenuItem(tuiMenuItem{label: action, action: action, value: "https://example.com/docs"})
if !handled || model.status == "" {
t.Fatalf("error action %s handled=%v status=%q", action, handled, model.status)
}
}
model.openReferenceLinkMenu("copy")
model.runAction("back-to-actions")
if model.menuTitle != "Actions" {
t.Fatalf("back to actions failed title=%q", model.menuTitle)
}
model.runMenuItem(tuiMenuItem{label: "Open picked", action: "open-picked-link", value: "https://example.com/docs"})
model.runMenuItem(tuiMenuItem{label: "Copy picked", action: "copy-picked-link", value: "https://example.com/docs"})
model.closeSelectedClusterLocally()
if !strings.Contains(model.status, "only available for durable clusters") {
t.Fatalf("raw cluster local close status=%q", model.status)
}
model.reopenSelectedClusterLocally()
model.excludeSelectedClusterMemberLocally()
model.includeSelectedClusterMemberLocally()
model.setSelectedClusterCanonicalLocally()
if !strings.Contains(model.status, "only available for durable clusters") {
t.Fatalf("raw member local action status=%q", model.status)
}
}
func TestTUIRemainingHelperBranches(t *testing.T) {
model := newClusterBrowserModel(context.Background(), nil, 0, clusterBrowserPayload{
Repository: "openclaw/openclaw",
MinSize: 1,
Limit: 1,
Clusters: []store.ClusterSummary{
{ID: 1, Status: "active", RepresentativeNumber: 101, MemberCount: 1, UpdatedAt: "2026-05-08T00:00:00Z"},
},
})
if model.currentClusterID() != 1 {
t.Fatalf("current cluster id = %d", model.currentClusterID())
}
if model.clusterRefreshLimit() != 1 {
t.Fatalf("cluster refresh limit = %d", model.clusterRefreshLimit())
}
model.ensureClusterInWorkingSet(store.ClusterSummary{ID: 2, Status: "closed", ClosedAt: "2026-05-08T00:00:00Z", MemberCount: 2})
if !model.selectClusterIDForJump(2) || !model.showClosed || model.minSize != 1 {
t.Fatalf("jump selection showClosed=%v minSize=%d selected=%d", model.showClosed, model.minSize, model.selected)
}
model.payload.Clusters = nil
if model.currentClusterID() != 0 || model.clusterSignature() != "" {
t.Fatalf("empty cluster helpers id=%d sig=%q", model.currentClusterID(), model.clusterSignature())
}
if _, ok := model.clusterFromWorkingSet(999); ok {
t.Fatal("missing working-set cluster should not resolve")
}
model.applyClusterRefresh(nil, 0)
if model.payload.Clusters == nil {
t.Fatal("nil refresh should normalize clusters")
}
model.autoRefreshFromStore()
if model.status != "Refresh unavailable for this view" {
t.Fatalf("auto refresh status=%q", model.status)
}
if cmd := model.autoRefreshCmd(); cmd != nil {
t.Fatalf("auto refresh command without store = %v", cmd)
}
model.switchRepository("")
if model.status != "Repository picker unavailable for this view" {
t.Fatalf("switch repository no store status=%q", model.status)
}
if label := (clusterBrowserModel{}).clusterPositionLabel(); label != "0" {
t.Fatalf("zero cluster position label = %q", label)
}
if label := model.clusterPositionLabel(); label != "0" {
t.Fatalf("empty model cluster position label = %q", label)
}
memberModel := model
memberModel.memberRows = []memberRow{}
if label := memberModel.memberPositionLabel(); label != "0" {
t.Fatalf("zero member position label = %q", label)
}
if got := formatRelativeTime(time.Now().Add(-30 * time.Minute).Format(time.RFC3339Nano)); got != "30m ago" {
t.Fatalf("minute age = %q", got)
}
if got := formatRelativeTime(time.Now().Add(-75 * 24 * time.Hour).Format(time.RFC3339Nano)); !strings.Contains(got, "mo ago") {
t.Fatalf("month age = %q", got)
}
if got := formatRelativeTime(""); got != "never" {
t.Fatalf("empty age = %q", got)
}
if got := formatRelativeTime("bad-time"); got != "bad-time" {
t.Fatalf("bad age = %q", got)
}
if got := wrapPlain("", 10); len(got) != 1 || got[0] != "" {
t.Fatalf("empty wrap = %+v", got)
}
if got := clampInt(5, 10, 1); got != 10 {
t.Fatalf("inverted clamp = %d", got)
}
if got := padCells("abcdef", 0); got != "" {
t.Fatalf("zero pad = %q", got)
}
if got := fitBlock("a\nb", 2, 1); got != "a " {
t.Fatalf("fit block = %q", got)
}
}

View File

@ -0,0 +1,326 @@
package cli
import (
"bytes"
"context"
"strings"
"testing"
"github.com/charmbracelet/bubbles/textinput"
tea "github.com/charmbracelet/bubbletea"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestFloatingMenuRenderingBranches(t *testing.T) {
base := strings.Join([]string{
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
"01234567890123456789",
}, "\n")
model := clusterBrowserModel{
width: 20,
height: 6,
menuTitle: "Actions",
menuContext: focusClusters,
menuIndex: 2,
menuOff: 1,
menuFloating: true,
menuRect: tuiRect{x: 2, y: 1, w: 16, h: 8},
menuItems: []tuiMenuItem{
tuiMenuSection("Hidden"),
{label: "Open", action: "open"},
{label: "Close", action: "close"},
{label: "Skip", action: ""},
{label: "Refresh", action: "refresh"},
},
}
rendered := model.renderFloatingMenu(base)
if rendered == base || !strings.Contains(rendered, "Actions") || !strings.Contains(rendered, "Open") {
t.Fatalf("rendered menu = %q", rendered)
}
if got := (clusterBrowserModel{}).renderFloatingMenu(base); got != base {
t.Fatalf("empty rect should keep base view")
}
submenu := model
submenu.menuTitle = "Repository"
if lines := submenu.menuLines(14); !strings.Contains(strings.Join(lines, "\n"), "b back") {
t.Fatalf("submenu lines = %#v", lines)
}
if got := actionMenuSubtitle(focusMembers); got != "selected member scope" {
t.Fatalf("member subtitle = %q", got)
}
if got := actionMenuSubtitle(focusDetail); got != "detail scope" {
t.Fatalf("detail subtitle = %q", got)
}
if got := actionMenuSubtitle(""); got != "current selection" {
t.Fatalf("default subtitle = %q", got)
}
if palette := actionMenuColors(focusMembers); palette.accent == "" || palette.background == "" {
t.Fatalf("member palette = %+v", palette)
}
if style := floatingMenuStyle(1, 1, actionMenuColors("")); style.GetWidth() != 1 || style.GetHeight() != 1 {
t.Fatalf("minimum style size width=%d height=%d", style.GetWidth(), style.GetHeight())
}
if index, ok := visibleMenuShortcutIndex("2", model.menuItems, 1, 4); !ok || index != 2 {
t.Fatalf("shortcut index=%d ok=%v", index, ok)
}
if _, ok := visibleMenuShortcutIndex("x", model.menuItems, 1, 4); ok {
t.Fatal("non-numeric shortcut should not match")
}
}
func TestTUIMenuNavigationAndWheelBranches(t *testing.T) {
model := clusterBrowserModel{
width: 100,
height: 30,
menuIndex: 0,
menuOff: 4,
menuFloating: true,
menuRect: tuiRect{x: 0, y: 0, w: 20, h: 8},
menuItems: []tuiMenuItem{
tuiMenuSection("top"),
{label: "one", action: "one"},
{label: "two", action: "two"},
tuiMenuSection("middle"),
{label: "three", action: "three"},
{label: "four", action: "four"},
},
payload: clusterBrowserPayload{Clusters: []store.ClusterSummary{
{ID: 10, Title: "first"},
{ID: 11, Title: "second"},
}},
}
if model.firstSelectableMenuIndex() != 1 || model.lastSelectableMenuIndex() != 5 {
t.Fatalf("selectable bounds first=%d last=%d", model.firstSelectableMenuIndex(), model.lastSelectableMenuIndex())
}
if got := model.nextSelectableMenuIndex(1); got != 1 {
t.Fatalf("next selectable = %d", got)
}
if got := model.nearestSelectableMenuIndex(3, 1); got != 4 {
t.Fatalf("nearest forward = %d", got)
}
if got := model.nearestSelectableMenuIndex(3, -1); got != 2 {
t.Fatalf("nearest backward = %d", got)
}
empty := clusterBrowserModel{}
if got := empty.nearestSelectableMenuIndex(10, 1); got != 0 {
t.Fatalf("empty nearest = %d", got)
}
model.menuIndex = 5
model.keepMenuVisible()
if model.menuOff > model.menuIndex {
t.Fatalf("menu off=%d index=%d", model.menuOff, model.menuIndex)
}
layout := tuiLayout{
clusters: tuiRect{x: 0, y: 2, w: 20, h: 8},
members: tuiRect{x: 20, y: 2, w: 20, h: 8},
detail: tuiRect{x: 40, y: 2, w: 20, h: 8},
}
if got := model.actionMenuContextAt(layout, 1, 3); got != focusClusters {
t.Fatalf("cluster context = %q", got)
}
if got := model.actionMenuContextAt(layout, 21, 3); got != focusMembers {
t.Fatalf("member context = %q", got)
}
if got := model.actionMenuContextAt(layout, 41, 3); got != focusDetail {
t.Fatalf("detail context = %q", got)
}
if got := model.actionMenuContextAt(layout, 99, 99); got != "" {
t.Fatalf("outside context = %q", got)
}
if index, ok := model.menuIndexAtMouse(layout, 1, 4); !ok || index != 6 {
t.Fatalf("menu index at mouse index=%d ok=%v", index, ok)
}
model.menuFloating = false
if index, ok := model.menuIndexAtMouse(layout, 41, 6); !ok || index != 5 {
t.Fatalf("detail menu index at mouse index=%d ok=%v", index, ok)
}
if _, ok := model.menuIndexAtMouse(layout, 99, 99); ok {
t.Fatal("outside mouse should not hit menu")
}
if step := (clusterBrowserModel{width: 100, height: 30}).pageStep(); step <= 0 {
t.Fatalf("cluster page step = %d", step)
}
detailModel := clusterBrowserModel{focus: focusDetail}
detailModel.detailView.Height = 3
if step := detailModel.pageStep(); step != 3 {
t.Fatalf("detail page step = %d", step)
}
model.selected = 0
cmd := model.moveClusterByWheel(1)
if cmd == nil || model.selected != 1 || model.status != "Cluster 11" {
t.Fatalf("wheel move selected=%d status=%q cmd=%v", model.selected, model.status, cmd)
}
if cmd := model.moveClusterByWheel(1); cmd != nil {
t.Fatalf("boundary wheel move should not tick: %v", cmd)
}
model.wheelDelta = -1
model.wheelFocus = focusClusters
if cmd := model.applyQueuedWheelScroll(); cmd == nil || model.focus != focusClusters {
t.Fatalf("queued wheel cmd=%v focus=%q", cmd, model.focus)
}
model.wheelDelta = 0
if cmd := model.applyQueuedWheelScroll(); cmd != nil {
t.Fatalf("zero queued wheel should be nil: %v", cmd)
}
}
func TestTUISelectionAndVisibilityHelperBranches(t *testing.T) {
model := clusterBrowserModel{
payload: clusterBrowserPayload{Limit: 2, Clusters: []store.ClusterSummary{
{ID: 1, RepresentativeNumber: 101, MemberCount: 2, UpdatedAt: "2026-05-05T10:00:00Z"},
{ID: 2, RepresentativeNumber: 202, MemberCount: 1, UpdatedAt: "2026-05-05T11:00:00Z"},
}},
allClusters: []store.ClusterSummary{
{ID: 3, RepresentativeNumber: 303, MemberCount: 5, UpdatedAt: "2026-05-05T12:00:00Z"},
},
hasDetail: true,
detail: store.ClusterDetail{
Cluster: store.ClusterSummary{ID: 9, RepresentativeNumber: 909},
Members: []store.ClusterMemberDetail{{
Thread: store.Thread{Number: 909, State: "open"},
}},
},
detailCache: map[int64]store.ClusterDetail{
8: {Cluster: store.ClusterSummary{ID: 8}, Members: []store.ClusterMemberDetail{{Thread: store.Thread{Number: 808, State: "open"}}}},
},
memberRows: []memberRow{
{label: "header"},
{selectable: true, member: store.ClusterMemberDetail{Thread: store.Thread{Number: 202, State: "open"}}},
},
}
if got := model.currentClusterID(); got != 1 {
t.Fatalf("current cluster = %d", got)
}
if got := model.clusterRefreshLimit(); got != 2 {
t.Fatalf("refresh limit = %d", got)
}
if got := model.findLoadedClusterIDForThreadNumber(909); got != 9 {
t.Fatalf("detail cluster lookup = %d", got)
}
if got := model.findLoadedClusterIDForThreadNumber(808); got != 8 {
t.Fatalf("cache cluster lookup = %d", got)
}
if got := model.findLoadedClusterIDForThreadNumber(303); got != 3 {
t.Fatalf("working-set cluster lookup = %d", got)
}
if _, ok := model.clusterFromWorkingSet(404); ok {
t.Fatal("missing cluster should not be found")
}
if !model.selectMemberByNumber(202) || model.memberIndex != 1 {
t.Fatalf("member selection index = %d", model.memberIndex)
}
if model.selectMemberByNumber(999) {
t.Fatal("missing member should not be selected")
}
openThread := store.Thread{State: "open"}
closedThread := store.Thread{State: "closed"}
localClosedThread := store.Thread{State: "open", ClosedAtLocal: "2026-05-05T00:00:00Z"}
if !threadVisible(openThread, false) || threadVisible(closedThread, false) || threadVisible(localClosedThread, false) || !threadVisible(closedThread, true) {
t.Fatal("thread visibility mismatch")
}
if got := memberDisplayState(store.ClusterMemberDetail{State: "removed", Thread: openThread}); got != "removed" {
t.Fatalf("member state = %q", got)
}
if got := memberDisplayState(store.ClusterMemberDetail{Thread: localClosedThread}); got != "local" {
t.Fatalf("local member state = %q", got)
}
if memberVisible(store.ClusterMemberDetail{State: "removed", Thread: openThread}, false) || !memberVisible(store.ClusterMemberDetail{State: "removed", Thread: closedThread}, true) {
t.Fatal("member visibility mismatch")
}
noLimit := clusterBrowserModel{payload: clusterBrowserPayload{Clusters: model.payload.Clusters}, allClusters: model.allClusters}
if got := noLimit.clusterRefreshLimit(); got < len(model.allClusters) {
t.Fatalf("no-limit refresh limit = %d", got)
}
}
func TestTUIJumpToThreadNumberLoadsClusterFromStore(t *testing.T) {
st, repoID, clusterID := seedTUIDurableStore(t)
defer st.Close()
model := clusterBrowserModel{
ctx: context.Background(),
store: st,
repoID: repoID,
detailCache: map[int64]store.ClusterDetail{},
payload: clusterBrowserPayload{Limit: 1, Sort: "recent"},
minSize: 99,
}
model.jumpToThreadNumber(0)
if model.status != "Enter a positive issue or PR number" {
t.Fatalf("bad jump status = %q", model.status)
}
model.jumpToThreadNumber(202)
if model.focus != focusMembers || !strings.Contains(model.status, "Jumped to #202") {
t.Fatalf("jump focus=%q status=%q", model.focus, model.status)
}
if len(model.payload.Clusters) == 0 || model.payload.Clusters[model.selected].ID != clusterID {
t.Fatalf("selected clusters = %+v selected=%d want cluster %d", model.payload.Clusters, model.selected, clusterID)
}
if model.memberIndex < 0 || model.memberRows[model.memberIndex].thread().Number != 202 {
t.Fatalf("member rows index=%d rows=%+v", model.memberIndex, model.memberRows)
}
if _, ok := model.detailCache[clusterID]; !ok {
t.Fatalf("detail cache missing cluster %d", clusterID)
}
model.jumpToThreadNumber(999)
if model.status == "" || strings.Contains(model.status, "Jumped") {
t.Fatalf("missing jump status = %q", model.status)
}
}
func TestTUIJumpKeyAndRefreshCommandBranches(t *testing.T) {
input := textinput.New()
input.SetValue("#0")
model := clusterBrowserModel{searchInput: input, jumping: true}
next, cmd := model.handleJumpKey(tea.KeyMsg{Type: tea.KeyEnter})
if cmd != nil || next.jumping || next.status != "Enter a positive issue or PR number" {
t.Fatalf("bad enter next=%+v cmd=%v", next, cmd)
}
input = textinput.New()
input.SetValue("https://github.com/openclaw/openclaw/issues/123")
model = clusterBrowserModel{
searchInput: input,
jumping: true,
payload: clusterBrowserPayload{Clusters: []store.ClusterSummary{{ID: 1, RepresentativeNumber: 123}}},
allClusters: []store.ClusterSummary{{ID: 1, RepresentativeNumber: 123}},
detailCache: map[int64]store.ClusterDetail{},
}
next, cmd = model.handleJumpKey(tea.KeyMsg{Type: tea.KeyEnter})
if cmd != nil || next.jumping || !strings.Contains(next.status, "outside loaded members") {
t.Fatalf("valid enter next status=%q cmd=%v", next.status, cmd)
}
model = clusterBrowserModel{searchInput: textinput.New(), jumping: true}
next, cmd = model.handleJumpKey(tea.KeyMsg{Type: tea.KeyEsc})
if cmd != nil || next.jumping || next.status != "Jump cancelled" {
t.Fatalf("esc next=%+v cmd=%v", next, cmd)
}
next, cmd = model.handleJumpKey(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune("4")})
if next.jumping != true {
t.Fatalf("rune input should keep jump mode, next=%+v cmd=%v", next, cmd)
}
if (clusterBrowserModel{}).remoteRefreshTickCmd() == nil || (clusterBrowserModel{}).autoRefreshCmd() != nil || (clusterBrowserModel{store: &store.Store{}, repoID: 1}).autoRefreshCmd() == nil {
t.Fatal("refresh tick commands should be scheduled")
}
}
func TestInteractiveTUIFallsBackToJSONForNonFileOutput(t *testing.T) {
app := New()
var out bytes.Buffer
app.Stdout = &out
if app.canRunInteractiveTUI() {
t.Fatal("buffer stdout should not be interactive")
}
payload := clusterBrowserPayload{Repository: "openclaw/openclaw", Mode: "clusters", Clusters: []store.ClusterSummary{{ID: 1, MemberCount: 2}}}
if err := app.runInteractiveTUI(context.Background(), nil, 0, payload); err != nil {
t.Fatalf("run tui fallback: %v", err)
}
if !strings.Contains(out.String(), `"repository": "openclaw/openclaw"`) || !strings.Contains(out.String(), `"clusters"`) {
t.Fatalf("fallback tui output = %q", out.String())
}
}

View File

@ -6,7 +6,7 @@ import (
"path/filepath"
"strings"
"github.com/pelletier/go-toml/v2"
crawlconfig "github.com/vincentkoc/crawlkit/config"
)
const (
@ -49,15 +49,24 @@ type TokenResolution struct {
Source string
}
var appConfig = crawlconfig.App{Name: "gitcrawl", ConfigEnv: DefaultConfigEnv}
func Default() Config {
home := homeDir()
base := filepath.Join(home, ".config", "gitcrawl")
paths, err := appConfig.DefaultPaths()
if err != nil {
paths = crawlconfig.Paths{
DBPath: filepath.Join(homeDir(), ".config", "gitcrawl", "gitcrawl.db"),
CacheDir: filepath.Join(homeDir(), ".config", "gitcrawl", "cache"),
LogDir: filepath.Join(homeDir(), ".config", "gitcrawl", "logs"),
}
}
base := filepath.Dir(paths.DBPath)
return Config{
Version: 1,
DBPath: filepath.Join(base, "gitcrawl.db"),
CacheDir: filepath.Join(base, "cache"),
DBPath: paths.DBPath,
CacheDir: paths.CacheDir,
VectorDir: filepath.Join(base, "vectors"),
LogDir: filepath.Join(base, "logs"),
LogDir: paths.LogDir,
EmbeddingBasis: "title_original",
GitHub: GitHubConfig{
TokenEnv: DefaultTokenEnv,
@ -77,26 +86,19 @@ func Default() Config {
}
func ResolvePath(flagPath string) string {
if strings.TrimSpace(flagPath) != "" {
return expandHome(flagPath)
path, err := appConfig.ResolveConfigPath(flagPath)
if err != nil {
return filepath.Join(homeDir(), ".config", "gitcrawl", "config.toml")
}
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
return expandHome(envPath)
}
home := homeDir()
return filepath.Join(home, ".config", "gitcrawl", "config.toml")
return path
}
func Load(path string) (Config, error) {
cfg := Default()
resolved := ResolvePath(path)
data, err := os.ReadFile(resolved)
if err != nil {
if err := crawlconfig.LoadTOML(resolved, &cfg); err != nil {
return Config{}, err
}
if err := toml.Unmarshal(data, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Normalize(); err != nil {
return Config{}, err
}
@ -108,21 +110,19 @@ func Save(path string, cfg Config) error {
return err
}
resolved := ResolvePath(path)
if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil {
return fmt.Errorf("create config dir: %w", err)
}
data, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("marshal config: %w", err)
}
return os.WriteFile(resolved, data, 0o600)
return crawlconfig.WriteTOML(resolved, cfg, 0o600)
}
func EnsureRuntimeDirs(cfg Config) error {
for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} {
if err := os.MkdirAll(expandHome(path), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", path, err)
}
if err := crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
DBPath: cfg.DBPath,
CacheDir: cfg.CacheDir,
LogDir: cfg.LogDir,
}); err != nil {
return err
}
if err := os.MkdirAll(crawlconfig.ExpandHome(cfg.VectorDir), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", cfg.VectorDir, err)
}
return nil
}
@ -200,13 +200,7 @@ func envOrDefault(primary, fallback string) string {
}
func expandHome(path string) string {
if path == "~" {
return homeDir()
}
if strings.HasPrefix(path, "~/") {
return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/"))
}
return path
return crawlconfig.ExpandHome(path)
}
func homeDir() string {

View File

@ -37,6 +37,12 @@ type ListIssuesOptions struct {
ExpectedTotal int
}
type ListWorkflowRunsOptions struct {
Branch string
HeadSHA string
Limit int
}
type RequestError struct {
Method string
URL string
@ -137,6 +143,45 @@ func (c *Client) ListPullFiles(ctx context.Context, owner, repo string, number i
return c.paginate(ctx, path, 0, 0, reporter)
}
func (c *Client) ListPullCommits(ctx context.Context, owner, repo string, number int, reporter Reporter) ([]map[string]any, error) {
path := fmt.Sprintf("/repos/%s/%s/pulls/%d/commits?per_page=100", pathEscape(owner), pathEscape(repo), number)
return c.paginate(ctx, path, 0, 0, reporter)
}
func (c *Client) ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter Reporter) ([]map[string]any, error) {
var payload struct {
CheckRuns []map[string]any `json:"check_runs"`
}
path := fmt.Sprintf("/repos/%s/%s/commits/%s/check-runs?per_page=100", pathEscape(owner), pathEscape(repo), pathEscape(ref))
if err := c.doJSON(ctx, http.MethodGet, path, nil, reporter, &payload); err != nil {
return nil, err
}
return payload.CheckRuns, nil
}
func (c *Client) ListWorkflowRuns(ctx context.Context, owner, repo string, options ListWorkflowRunsOptions, reporter Reporter) ([]map[string]any, error) {
values := url.Values{}
values.Set("per_page", "100")
if options.Branch != "" {
values.Set("branch", options.Branch)
}
if options.HeadSHA != "" {
values.Set("head_sha", options.HeadSHA)
}
path := fmt.Sprintf("/repos/%s/%s/actions/runs?%s", pathEscape(owner), pathEscape(repo), values.Encode())
var payload struct {
WorkflowRuns []map[string]any `json:"workflow_runs"`
}
if err := c.doJSON(ctx, http.MethodGet, path, nil, reporter, &payload); err != nil {
return nil, err
}
rows := payload.WorkflowRuns
if options.Limit > 0 && len(rows) > options.Limit {
rows = rows[:options.Limit]
}
return rows, nil
}
func (c *Client) paginate(ctx context.Context, firstPath string, limit int, expectedItems int, reporter Reporter) ([]map[string]any, error) {
var out []map[string]any
nextPath := firstPath

View File

@ -158,8 +158,13 @@ func TestClientSingleResourceAndCollectionEndpoints(t *testing.T) {
case "/repos/openclaw/gitcrawl/issues/7/comments",
"/repos/openclaw/gitcrawl/pulls/8/reviews",
"/repos/openclaw/gitcrawl/pulls/8/comments",
"/repos/openclaw/gitcrawl/pulls/8/files":
"/repos/openclaw/gitcrawl/pulls/8/files",
"/repos/openclaw/gitcrawl/pulls/8/commits":
_ = json.NewEncoder(w).Encode([]map[string]any{{"id": 1}})
case "/repos/openclaw/gitcrawl/commits/abc/check-runs":
_ = json.NewEncoder(w).Encode(map[string]any{"check_runs": []map[string]any{{"name": "test"}}})
case "/repos/openclaw/gitcrawl/actions/runs":
_ = json.NewEncoder(w).Encode(map[string]any{"workflow_runs": []map[string]any{{"id": 99}}})
default:
t.Fatalf("unexpected path: %s", r.URL.String())
}
@ -183,14 +188,21 @@ func TestClientSingleResourceAndCollectionEndpoints(t *testing.T) {
"review-comments": func() ([]map[string]any, error) {
return client.ListPullReviewComments(ctx, "openclaw", "gitcrawl", 8, nil)
},
"files": func() ([]map[string]any, error) { return client.ListPullFiles(ctx, "openclaw", "gitcrawl", 8, nil) },
"files": func() ([]map[string]any, error) { return client.ListPullFiles(ctx, "openclaw", "gitcrawl", 8, nil) },
"commits": func() ([]map[string]any, error) { return client.ListPullCommits(ctx, "openclaw", "gitcrawl", 8, nil) },
"checks": func() ([]map[string]any, error) {
return client.ListCommitCheckRuns(ctx, "openclaw", "gitcrawl", "abc", nil)
},
"runs": func() ([]map[string]any, error) {
return client.ListWorkflowRuns(ctx, "openclaw", "gitcrawl", ListWorkflowRunsOptions{HeadSHA: "abc"}, nil)
},
} {
rows, err := fn()
if err != nil || len(rows) != 1 {
t.Fatalf("%s rows = %+v err=%v", name, rows, err)
}
}
if len(requests) != 7 {
if len(requests) != 10 {
t.Fatalf("requests = %+v", requests)
}
}

View File

@ -18,6 +18,7 @@ const (
defaultBaseURL = "https://api.openai.com/v1"
maxEmbeddingResponseBytes = 64 << 20
maxEmbeddingInputRunes = 6_000
maxEmbeddingInputBytes = 7_000
)
type RetryConfig struct {
@ -304,11 +305,21 @@ func isContextErr(err error) bool {
func capEmbeddingInputs(texts []string) []string {
out := make([]string, len(texts))
for index, text := range texts {
runes := []rune(text)
if len(runes) > maxEmbeddingInputRunes {
text = string(runes[:maxEmbeddingInputRunes])
}
out[index] = text
out[index] = capEmbeddingInput(text)
}
return out
}
func capEmbeddingInput(text string) string {
runes := 0
bytes := 0
for end, r := range text {
runeBytes := len(string(r))
if runes >= maxEmbeddingInputRunes || bytes+runeBytes > maxEmbeddingInputBytes {
return text[:end]
}
runes++
bytes += runeBytes
}
return text
}

View File

@ -4,14 +4,22 @@ import (
"context"
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync/atomic"
"testing"
"time"
"unicode/utf8"
)
type roundTripFunc func(*http.Request) (*http.Response, error)
func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return f(req)
}
func TestEmbedAcceptsLargeBatchResponse(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var request embeddingRequest
@ -85,6 +93,42 @@ func TestEmbedCapsOversizedInputsBeforeRequest(t *testing.T) {
}
}
func TestEmbedCapsTokenDenseInputsByBytesBeforeRequest(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var request embeddingRequest
if err := json.NewDecoder(r.Body).Decode(&request); err != nil {
t.Fatalf("decode request: %v", err)
}
if len(request.Input) != 1 {
t.Fatalf("inputs = %d, want 1", len(request.Input))
}
input := request.Input[0]
if got := len([]byte(input)); got > maxEmbeddingInputBytes {
t.Fatalf("input bytes = %d, want <= %d", got, maxEmbeddingInputBytes)
}
if !utf8.ValidString(input) {
t.Fatal("input was truncated in the middle of a UTF-8 rune")
}
if got := len([]rune(input)); got >= maxEmbeddingInputRunes {
t.Fatalf("input runes = %d, want byte cap to apply before rune cap %d", got, maxEmbeddingInputRunes)
}
_ = json.NewEncoder(w).Encode(embeddingResponse{Data: []struct {
Index int `json:"index"`
Embedding []float64 `json:"embedding"`
}{{Index: 0, Embedding: []float64{1}}}})
}))
defer server.Close()
input := strings.Repeat("界", maxEmbeddingInputRunes)
vectors, err := New(Options{APIKey: "test", BaseURL: server.URL}).Embed(context.Background(), "text-embedding-3-small", []string{input})
if err != nil {
t.Fatalf("embed: %v", err)
}
if len(vectors) != 1 || len(vectors[0]) != 1 {
t.Fatalf("vectors = %#v", vectors)
}
}
func TestEmbedErrorBranches(t *testing.T) {
noRetry := NoRetry()
client := New(Options{APIKey: "test", Retry: &noRetry})
@ -320,3 +364,79 @@ func TestEmbedRetryAfterDateForm(t *testing.T) {
t.Fatalf("expected ~3s sleep from HTTP-date Retry-After, got %v", slept)
}
}
func TestOpenAIErrorAndRetryHelpers(t *testing.T) {
apiErr := &APIError{Status: http.StatusBadGateway, Type: "overloaded_error", Code: "overloaded", Message: "try later"}
if got := apiErr.Error(); !strings.Contains(got, "status=502") || !strings.Contains(got, "message=try later") {
t.Fatalf("error string = %q", got)
}
if !apiErr.Retryable() || !apiErr.IsOverloaded() {
t.Fatalf("retryable/overloaded = %v/%v", apiErr.Retryable(), apiErr.IsOverloaded())
}
if (*APIError)(nil).Retryable() || !(&APIError{Status: http.StatusGatewayTimeout}).Retryable() || (&APIError{Status: http.StatusTooManyRequests, Type: "insufficient_quota"}).Retryable() {
t.Fatal("unexpected retryable classification")
}
if AsAPIError(nil) != nil || AsAPIError(errors.New("plain")) != nil {
t.Fatal("unexpected APIError extraction")
}
now := time.Date(2026, 5, 5, 10, 0, 0, 0, time.UTC)
if got := parseRetryAfter("1.5", now); got != 1500*time.Millisecond {
t.Fatalf("float retry-after = %s", got)
}
if got := parseRetryAfter("-1", now); got != 0 {
t.Fatalf("negative retry-after = %s", got)
}
if got := parseRetryAfter(now.Add(-time.Minute).Format(http.TimeFormat), now); got != 0 {
t.Fatalf("past retry-after = %s", got)
}
retry := RetryConfig{MaxAttempts: -1, BaseDelay: 0, MaxDelay: 50 * time.Millisecond, MaxElapsed: 0, Jitter: 0}
client := New(Options{APIKey: "test", Retry: &retry})
if client.retry.MaxAttempts != 1 {
t.Fatalf("max attempts = %d, want normalized 1", client.retry.MaxAttempts)
}
if got := client.backoff(10, 0, time.Second); got != 50*time.Millisecond {
t.Fatalf("retry-after should be clamped to max delay, got %s", got)
}
if got := client.backoff(10, 0, 0); got != 50*time.Millisecond {
t.Fatalf("exponential backoff should be clamped to max delay, got %s", got)
}
if !client.canSleep(now, 24*time.Hour) {
t.Fatal("max elapsed <= 0 should allow sleeping")
}
if err := sleepCtx(context.Background(), 0); err != nil {
t.Fatalf("zero sleep: %v", err)
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
if err := sleepCtx(ctx, time.Hour); !errors.Is(err, context.Canceled) {
t.Fatalf("canceled sleep err = %v", err)
}
}
func TestEmbedRetriesTransportError(t *testing.T) {
var calls int
client := New(Options{
APIKey: "test",
BaseURL: "https://example.invalid",
Retry: &RetryConfig{MaxAttempts: 2, BaseDelay: time.Millisecond, MaxDelay: time.Millisecond, MaxElapsed: time.Hour, Jitter: 0},
Sleep: func(context.Context, time.Duration) error { return nil },
HTTPClient: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
calls++
if calls == 1 {
return nil, errors.New("temporary network break")
}
return &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"data":[{"index":0,"embedding":[0.5]}]}`)),
}, nil
})},
})
vectors, err := client.Embed(context.Background(), "model", []string{"hi"})
if err != nil {
t.Fatalf("embed: %v", err)
}
if calls != 2 || len(vectors) != 1 || vectors[0][0] != 0.5 {
t.Fatalf("calls=%d vectors=%v", calls, vectors)
}
}

View File

@ -2,6 +2,7 @@ package store
import (
"context"
"database/sql"
"fmt"
)
@ -40,3 +41,38 @@ func (s *Store) UpsertComment(ctx context.Context, comment Comment) (int64, erro
}
return id, nil
}
func (s *Store) ListComments(ctx context.Context, threadID int64) ([]Comment, error) {
if !s.tableExists(ctx, "comments") {
return nil, nil
}
rows, err := s.q().QueryContext(ctx, `
select id, thread_id, github_id, comment_type, author_login, author_type, body, is_bot, raw_json, created_at_gh, updated_at_gh
from comments
where thread_id = ?
order by created_at_gh, id
`, threadID)
if err != nil {
return nil, fmt.Errorf("list comments: %w", err)
}
defer rows.Close()
var comments []Comment
for rows.Next() {
var comment Comment
var authorLogin, authorType, createdAt, updatedAt sql.NullString
var isBot int
if err := rows.Scan(&comment.ID, &comment.ThreadID, &comment.GitHubID, &comment.CommentType, &authorLogin, &authorType, &comment.Body, &isBot, &comment.RawJSON, &createdAt, &updatedAt); err != nil {
return nil, fmt.Errorf("scan comment: %w", err)
}
comment.AuthorLogin = authorLogin.String
comment.AuthorType = authorType.String
comment.IsBot = isBot != 0
comment.CreatedAtGitHub = createdAt.String
comment.UpdatedAtGitHub = updatedAt.String
comments = append(comments, comment)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterate comments: %w", err)
}
return comments, nil
}

View File

@ -36,4 +36,18 @@ func TestUpsertComment(t *testing.T) {
if id == 0 {
t.Fatal("expected comment id")
}
if _, err := st.UpsertComment(ctx, Comment{
ThreadID: threadID, GitHubID: "c0", CommentType: "issue_comment",
AuthorLogin: "octobot", AuthorType: "Bot", Body: "earlier bot note", IsBot: true, RawJSON: "{}",
CreatedAtGitHub: "2026-04-25T00:00:00Z", UpdatedAtGitHub: "2026-04-25T00:01:00Z",
}); err != nil {
t.Fatalf("second comment: %v", err)
}
comments, err := st.ListComments(ctx, threadID)
if err != nil {
t.Fatalf("list comments: %v", err)
}
if len(comments) != 2 || comments[0].GitHubID != "c0" || !comments[0].IsBot || comments[1].GitHubID != "c1" {
t.Fatalf("comments = %+v", comments)
}
}

View File

@ -205,6 +205,9 @@ func TestPortablePruneCanonicalizesSchemaAndMetadata(t *testing.T) {
if err := st.UpsertThreadVector(ctx, ThreadVector{ThreadID: threadIDs[0], Basis: "title_original", Model: "test", Dimensions: 2, ContentHash: "hash", Vector: []float64{1, 0}, CreatedAt: "2026-04-30T00:00:00Z", UpdatedAt: "2026-04-30T00:00:00Z"}); err != nil {
t.Fatalf("upsert vector: %v", err)
}
if _, err := st.UpsertComment(ctx, Comment{ThreadID: threadIDs[0], GitHubID: "c1", CommentType: "issue_comment", AuthorLogin: "alice", Body: "portable comment body", RawJSON: `{"body":"portable comment body"}`, CreatedAtGitHub: "2026-04-30T00:00:00Z", UpdatedAtGitHub: "2026-04-30T00:00:00Z"}); err != nil {
t.Fatalf("upsert comment: %v", err)
}
if _, err := st.DB().ExecContext(ctx, `insert into sync_runs(repo_id, scope, status, started_at, finished_at, stats_json) values(?, 'open', 'success', '2026-04-30T00:00:00Z', '2026-04-30T00:01:00Z', '{}')`, repoID); err != nil {
t.Fatalf("seed sync run: %v", err)
}
@ -218,6 +221,22 @@ func TestPortablePruneCanonicalizesSchemaAndMetadata(t *testing.T) {
if !st.tableExists(ctx, "portable_metadata") || st.hasColumn(ctx, "threads", "body") {
t.Fatalf("portable schema was not canonicalized")
}
if !st.tableExists(ctx, "comments") {
t.Fatalf("comments should remain in portable v2")
}
var schema, includes, excluded string
if err := st.DB().QueryRowContext(ctx, `select value from portable_metadata where key = 'schema'`).Scan(&schema); err != nil {
t.Fatalf("schema metadata: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select value from portable_metadata where key = 'includes'`).Scan(&includes); err != nil {
t.Fatalf("includes metadata: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select value from portable_metadata where key = 'excluded'`).Scan(&excluded); err != nil {
t.Fatalf("excluded metadata: %v", err)
}
if schema != "gitcrawl-portable-sync-v2" || !strings.Contains(includes, "comments") || strings.Contains(excluded, "comments") {
t.Fatalf("portable metadata schema=%q includes=%q excluded=%q", schema, includes, excluded)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
@ -235,6 +254,70 @@ func TestPortablePruneCanonicalizesSchemaAndMetadata(t *testing.T) {
}
}
func TestPortablePruneClearsPRRawJSONBlobPointersAndFingerprints(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
repoID, threadIDs := seedVectorThreads(t, ctx, st)
threadID := threadIDs[1]
if _, err := st.DB().ExecContext(ctx, `
insert into blobs(id, sha256, media_type, compression, size_bytes, storage_kind, inline_text, created_at)
values(1, 'sha', 'application/json', 'none', 2, 'inline', '{}', '2026-05-05T00:00:00Z');
insert into thread_revisions(id, thread_id, source_updated_at, content_hash, title_hash, body_hash, labels_hash, raw_json_blob_id, created_at)
values(1, ?, '2026-05-05T00:00:00Z', 'content', 'title', 'body', 'labels', 1, '2026-05-05T00:00:00Z');
insert into thread_fingerprints(thread_revision_id, algorithm_version, fingerprint_hash, fingerprint_slug, title_tokens_json, body_token_hash, linked_refs_json, file_set_hash, module_buckets_json, simhash64, feature_json, created_at)
values(1, 'v1', 'hash', 'slug', '["token"]', 'body', '["#1"]', 'files', '["module"]', '1', '{"x":1}', '2026-05-05T00:00:00Z');
`, threadID); err != nil {
t.Fatalf("seed revision/fingerprint: %v", err)
}
if _, err := st.UpsertComment(ctx, Comment{ThreadID: threadID, GitHubID: "raw-comment", CommentType: "issue_comment", Body: "comment body that is long", RawJSON: `{"raw":true}`, CreatedAtGitHub: "2026-05-05T00:00:00Z"}); err != nil {
t.Fatalf("seed comment: %v", err)
}
if _, err := st.DB().ExecContext(ctx, `update comments set raw_json_blob_id = 1 where github_id = 'raw-comment'`); err != nil {
t.Fatalf("link comment blob: %v", err)
}
if err := st.UpsertPullRequestCache(ctx,
PullRequestDetail{ThreadID: threadID, RepoID: repoID, Number: 302, HeadSHA: "head", RawJSON: `{"detail":true}`, FetchedAt: "2026-05-05T00:00:00Z", UpdatedAt: "2026-05-05T00:00:00Z"},
[]PullRequestFile{{Path: "a.go", RawJSON: `{"file":true}`, FetchedAt: "2026-05-05T00:00:00Z"}},
[]PullRequestCommit{{SHA: "abc", RawJSON: `{"commit":true}`, FetchedAt: "2026-05-05T00:00:00Z"}},
[]PullRequestCheck{{Name: "ci", RawJSON: `{"check":true}`, FetchedAt: "2026-05-05T00:00:00Z"}},
[]WorkflowRun{{RepoID: repoID, RunID: "1", RawJSON: `{"run":true}`, FetchedAt: "2026-05-05T00:00:00Z"}},
); err != nil {
t.Fatalf("seed pr cache: %v", err)
}
stats, err := st.PrunePortablePayloads(ctx, PortablePruneOptions{BodyChars: 4})
if err != nil {
t.Fatalf("prune portable: %v", err)
}
if stats.RawJSONPruned < 6 || stats.FingerprintsPruned != 1 || stats.CommentsPruned != 1 {
t.Fatalf("portable stats = %+v", stats)
}
var commentRaw string
var commentBlob, revisionBlob any
if err := st.DB().QueryRowContext(ctx, `select raw_json, raw_json_blob_id from comments where github_id = 'raw-comment'`).Scan(&commentRaw, &commentBlob); err != nil {
t.Fatalf("read pruned comment: %v", err)
}
if commentRaw != "" || commentBlob != nil {
t.Fatalf("comment raw=%q blob=%v", commentRaw, commentBlob)
}
if err := st.DB().QueryRowContext(ctx, `select raw_json_blob_id from thread_revisions where id = 1`).Scan(&revisionBlob); err != nil {
t.Fatalf("read pruned revision: %v", err)
}
if revisionBlob != nil {
t.Fatalf("revision blob=%v", revisionBlob)
}
var titleTokens, linkedRefs, modules, features string
if err := st.DB().QueryRowContext(ctx, `select title_tokens_json, linked_refs_json, module_buckets_json, feature_json from thread_fingerprints where id = 1`).Scan(&titleTokens, &linkedRefs, &modules, &features); err != nil {
t.Fatalf("read pruned fingerprint: %v", err)
}
if titleTokens != "[]" || linkedRefs != "[]" || modules != "[]" || features != "{}" {
t.Fatalf("fingerprint title=%q refs=%q modules=%q features=%q", titleTokens, linkedRefs, modules, features)
}
}
func TestClusterHelperBranches(t *testing.T) {
summaries := []ClusterSummary{
{ID: 1, MemberCount: 1, UpdatedAt: "2026-04-30T01:00:00Z"},
@ -248,6 +331,23 @@ func TestClusterHelperBranches(t *testing.T) {
if summaries[0].ID != 1 {
t.Fatalf("recent sort = %+v", summaries)
}
summaries = []ClusterSummary{
{ID: 3, MemberCount: 2, UpdatedAt: "2026-04-30T01:00:00Z"},
{ID: 2, MemberCount: 2, UpdatedAt: "2026-04-30T01:00:00Z"},
{ID: 1, MemberCount: 3, UpdatedAt: "2026-04-30T00:00:00Z"},
}
sortClusterSummaries(summaries, "size")
if summaries[0].ID != 1 || summaries[1].ID != 2 {
t.Fatalf("size tie sort = %+v", summaries)
}
sortClusterSummaries(summaries, "oldest")
if summaries[0].ID != 1 || summaries[1].ID != 2 {
t.Fatalf("oldest tie sort = %+v", summaries)
}
sortClusterSummaries(summaries, "recent")
if summaries[0].ID != 2 || summaries[1].ID != 3 {
t.Fatalf("recent tie sort = %+v", summaries)
}
if ids := parseIDSet(`1, 2, 0, bad, 3`); len(ids) != 3 || !ids[2] {
t.Fatalf("parse id set = %+v", ids)
}
@ -257,6 +357,15 @@ func TestClusterHelperBranches(t *testing.T) {
if got := snippetRunes("abcdef", 3); got != "abc" {
t.Fatalf("snippet = %q", got)
}
if got := rowsAffected(errorResult{}); got != 0 {
t.Fatalf("error rows affected = %d", got)
}
if got := nullString(""); got.Valid {
t.Fatalf("empty null string = %+v", got)
}
if got := nullString("x"); !got.Valid || got.String != "x" {
t.Fatalf("non-empty null string = %+v", got)
}
if func() (panicked bool) {
defer func() { panicked = recover() != nil }()
_ = sqliteIdentifier(`bad"name`)
@ -737,6 +846,16 @@ func TestPortableVacuumAndVectorQueryBranches(t *testing.T) {
}
}
type errorResult struct{}
func (errorResult) LastInsertId() (int64, error) {
return 0, sql.ErrNoRows
}
func (errorResult) RowsAffected() (int64, error) {
return 0, sql.ErrNoRows
}
func seedVectorThreads(t *testing.T, ctx context.Context, st *Store) (int64, []int64) {
t.Helper()
now := time.Now().UTC().Format(time.RFC3339Nano)
@ -761,3 +880,106 @@ func seedVectorThreads(t *testing.T, ctx context.Context, st *Store) (int64, []i
}
return repoID, ids
}
func TestClosedStoreErrorBranches(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
repoID, threadIDs := seedVectorThreads(t, ctx, st)
if _, err := st.SaveDurableClusters(ctx, repoID, []DurableClusterInput{{
StableKey: "closed-store",
RepresentativeThreadID: threadIDs[0],
Members: []DurableClusterMemberInput{{ThreadID: threadIDs[0]}, {ThreadID: threadIDs[1]}},
}}); err != nil {
t.Fatalf("seed durable cluster: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
checks := []struct {
name string
fn func() error
}{
{"display summaries", func() error {
_, err := st.ListDisplayClusterSummaries(ctx, ClusterSummaryOptions{RepoID: repoID, IncludeClosed: true})
return err
}},
{"run summaries", func() error {
_, err := st.ListRunClusterSummaries(ctx, ClusterSummaryOptions{RepoID: repoID})
return err
}},
{"durable summaries", func() error {
_, err := st.ListClusterSummaries(ctx, ClusterSummaryOptions{RepoID: repoID})
return err
}},
{"cluster detail", func() error {
_, err := st.ClusterDetail(ctx, ClusterDetailOptions{RepoID: repoID, ClusterID: 1})
return err
}},
{"durable detail", func() error {
_, err := st.DurableClusterDetail(ctx, ClusterDetailOptions{RepoID: repoID, ClusterID: 1})
return err
}},
{"thread cluster", func() error {
_, err := st.ClusterIDForThreadNumber(ctx, repoID, 301, true)
return err
}},
{"close cluster", func() error {
return st.CloseClusterLocally(ctx, repoID, 1, "closed")
}},
{"reopen cluster", func() error {
return st.ReopenClusterLocally(ctx, repoID, 1)
}},
{"save durable", func() error {
_, err := st.SaveDurableClusters(ctx, repoID, []DurableClusterInput{{
StableKey: "after-close",
RepresentativeThreadID: threadIDs[0],
Members: []DurableClusterMemberInput{{ThreadID: threadIDs[0]}},
}})
return err
}},
{"exclude member", func() error {
_, err := st.ExcludeClusterMemberLocally(ctx, repoID, 1, 301, "closed")
return err
}},
{"include member", func() error {
_, err := st.IncludeClusterMemberLocally(ctx, repoID, 1, 301, "closed")
return err
}},
{"canonical member", func() error {
_, err := st.SetClusterCanonicalLocally(ctx, repoID, 1, 301, "closed")
return err
}},
{"summaries", func() error {
_, err := st.summariesByThreadIDs(ctx, threadIDs)
return err
}},
{"portable prune", func() error {
_, err := st.PrunePortablePayloads(ctx, PortablePruneOptions{BodyChars: 8})
return err
}},
{"status", func() error {
_, err := st.Status(ctx)
return err
}},
{"repositories", func() error {
_, err := st.ListRepositories(ctx)
return err
}},
{"runs", func() error {
_, err := st.ListRuns(ctx, repoID, "sync", 1)
return err
}},
}
errorsSeen := 0
for _, check := range checks {
if err := check.fn(); err != nil {
errorsSeen++
}
}
if errorsSeen == 0 {
t.Fatal("closed store checks did not exercise any errors")
}
}

View File

@ -32,7 +32,8 @@ type EmbeddingTaskOptions struct {
const (
MaxEmbeddingTextRunes = 6_000
embeddingContentHashVersion = "embedding:v2:max_runes=24000"
MaxEmbeddingTextBytes = 7_000
embeddingContentHashVersion = "embedding:v4"
)
func (s *Store) ListEmbeddingTasks(ctx context.Context, options EmbeddingTaskOptions) ([]EmbeddingTask, error) {
@ -146,17 +147,37 @@ func embeddingTextForBasisWithMeta(basis, title, body, rawText, dedupeText, keyS
}
func capEmbeddingText(text string) (string, embeddingTextMeta) {
runes := []rune(strings.TrimSpace(text))
text = strings.TrimSpace(text)
runes := []rune(text)
meta := embeddingTextMeta{OriginalRunes: len(runes), Runes: len(runes)}
if len(runes) <= MaxEmbeddingTextRunes {
return string(runes), meta
capped := capStringByRunesAndBytes(text, MaxEmbeddingTextRunes, MaxEmbeddingTextBytes)
if capped == text {
return text, meta
}
meta.Truncated = true
meta.Runes = MaxEmbeddingTextRunes
return string(runes[:MaxEmbeddingTextRunes]), meta
meta.Runes = len([]rune(capped))
return capped, meta
}
func capStringByRunesAndBytes(text string, maxRunes, maxBytes int) string {
runes := 0
bytes := 0
for end, r := range text {
runeBytes := len(string(r))
if runes >= maxRunes || bytes+runeBytes > maxBytes {
return text[:end]
}
runes++
bytes += runeBytes
}
return text
}
func embeddingContentHash(basis, model, text string) string {
sum := sha256.Sum256([]byte(fmt.Sprintf("%s:%s:%s\n%s", embeddingContentHashVersion, basis, model, text)))
sum := sha256.Sum256([]byte(embeddingContentHashMaterial(basis, model, text)))
return hex.EncodeToString(sum[:])
}
func embeddingContentHashMaterial(basis, model, text string) string {
return fmt.Sprintf("%s:max_runes=%d:max_bytes=%d:%s:%s\n%s", embeddingContentHashVersion, MaxEmbeddingTextRunes, MaxEmbeddingTextBytes, basis, model, text)
}

View File

@ -2,9 +2,11 @@ package store
import (
"context"
"fmt"
"path/filepath"
"strings"
"testing"
"unicode/utf8"
)
func TestListEmbeddingTasksUsesLatestLLMKeySummary(t *testing.T) {
@ -88,6 +90,45 @@ func TestEmbeddingTextForBasisCapsLongInputs(t *testing.T) {
}
}
func TestEmbeddingTextForBasisCapsTokenDenseInputsByBytes(t *testing.T) {
body := strings.Repeat("界", MaxEmbeddingTextRunes)
text, meta, err := embeddingTextForBasisWithMeta("title_original", "oversized unicode", body, "", "", "")
if err != nil {
t.Fatalf("embedding text: %v", err)
}
if !meta.Truncated {
t.Fatal("token-dense embedding text should be marked truncated")
}
if got := len([]byte(text)); got > MaxEmbeddingTextBytes {
t.Fatalf("truncated bytes = %d, want <= %d", got, MaxEmbeddingTextBytes)
}
if !utf8.ValidString(text) {
t.Fatal("truncated text is not valid UTF-8")
}
if got := len([]rune(text)); got >= MaxEmbeddingTextRunes {
t.Fatalf("truncated runes = %d, want byte cap to apply before rune cap %d", got, MaxEmbeddingTextRunes)
}
if meta.OriginalRunes <= meta.Runes {
t.Fatalf("meta = %+v", meta)
}
}
func TestEmbeddingContentHashVersionTracksCurrentInputCaps(t *testing.T) {
if embeddingContentHash("title_original", "test", "body") == "" {
t.Fatal("embedding content hash should be non-empty")
}
material := embeddingContentHashMaterial("title_original", "test", "body")
if want := fmt.Sprintf("max_runes=%d", MaxEmbeddingTextRunes); !strings.Contains(material, want) {
t.Fatalf("embedding hash material should include %s", want)
}
if want := fmt.Sprintf("max_bytes=%d", MaxEmbeddingTextBytes); !strings.Contains(material, want) {
t.Fatalf("embedding hash material should include %s", want)
}
if strings.Contains(material, "max_runes=24000") {
t.Fatal("embedding hash material still carries stale 24000 rune cap")
}
}
func TestListEmbeddingTasksIncludeClosed(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))

View File

@ -20,7 +20,9 @@ type PortablePruneStats struct {
BytesBefore int64 `json:"bytes_before"`
BytesAfter int64 `json:"bytes_after"`
ThreadsPruned int64 `json:"threads_pruned"`
CommentsPruned int64 `json:"comments_pruned"`
RepositoriesPruned int64 `json:"repositories_pruned"`
RawJSONPruned int64 `json:"raw_json_pruned"`
FingerprintsPruned int64 `json:"fingerprints_pruned"`
DocumentsDeleted int64 `json:"documents_deleted"`
DocumentsFTSRebuilt bool `json:"documents_fts_rebuilt"`
@ -43,34 +45,25 @@ func (s *Store) PrunePortablePayloads(ctx context.Context, options PortablePrune
}
if s.hasColumn(ctx, "threads", "body") {
if s.hasColumn(ctx, "threads", "body_excerpt") && s.hasColumn(ctx, "threads", "body_length") {
if result, err := s.db.ExecContext(ctx, `
update threads
set body_length = case when body is not null then length(body) else body_length end,
body_excerpt = case
when body is not null and length(body) > ? then substr(body, 1, ?)
when body is not null then body
else body_excerpt
end
where body is not null
`, options.BodyChars, options.BodyChars); err != nil {
return stats, fmt.Errorf("prune thread body excerpts: %w", err)
} else {
stats.ThreadsPruned += rowsAffected(result)
}
if _, err := s.db.ExecContext(ctx, `update threads set body = body_excerpt`); err != nil {
return stats, fmt.Errorf("replace thread bodies with excerpts: %w", err)
}
if err := s.ensurePortableExcerptColumns(ctx, "threads"); err != nil {
return stats, err
}
if result, err := s.db.ExecContext(ctx, `
update threads
set body_length = case when body is not null then length(body) else body_length end,
body_excerpt = case
when body is not null and length(body) > ? then substr(body, 1, ?)
when body is not null then body
else body_excerpt
end
where body is not null
`, options.BodyChars, options.BodyChars); err != nil {
return stats, fmt.Errorf("prune thread body excerpts: %w", err)
} else {
if result, err := s.db.ExecContext(ctx, `
update threads
set body = case when length(body) > ? then substr(body, 1, ?) else body end
where body is not null
`, options.BodyChars, options.BodyChars); err != nil {
return stats, fmt.Errorf("trim thread bodies: %w", err)
} else {
stats.ThreadsPruned += rowsAffected(result)
}
stats.ThreadsPruned += rowsAffected(result)
}
if _, err := s.db.ExecContext(ctx, `update threads set body = body_excerpt`); err != nil {
return stats, fmt.Errorf("replace thread bodies with excerpts: %w", err)
}
}
if s.hasColumn(ctx, "threads", "raw_json") {
@ -85,6 +78,26 @@ func (s *Store) PrunePortablePayloads(ctx context.Context, options PortablePrune
}
stats.RepositoriesPruned = rowsAffected(result)
}
if s.tableExists(ctx, "comments") && s.hasColumn(ctx, "comments", "body") {
if err := s.ensurePortableExcerptColumns(ctx, "comments"); err != nil {
return stats, err
}
if result, err := s.db.ExecContext(ctx, `
update comments
set body_length = length(body),
body_excerpt = case when length(body) > ? then substr(body, 1, ?) else body end,
body = case when length(body) > ? then substr(body, 1, ?) else body end
`, options.BodyChars, options.BodyChars, options.BodyChars, options.BodyChars); err != nil {
return stats, fmt.Errorf("prune comment bodies: %w", err)
} else {
stats.CommentsPruned = rowsAffected(result)
}
}
if pruned, err := s.clearPortableRawJSON(ctx); err != nil {
return stats, err
} else {
stats.RawJSONPruned = pruned
}
if s.tableExists(ctx, "thread_fingerprints") {
result, err := s.db.ExecContext(ctx, `
update thread_fingerprints
@ -180,11 +193,13 @@ func (s *Store) canonicalizePortableSchema(ctx context.Context, bodyChars int, s
return fmt.Errorf("ensure portable metadata: %w", err)
}
metadata := map[string]string{
"schema": "ghcrawl-portable-sync-v1",
"body_chars": fmt.Sprintf("%d", bodyChars),
"excluded": "raw_json,comments,documents,fts,vectors,code_snapshots,cluster_events,run_history,similarity_edges,blobs",
"exported_at": time.Now().UTC().Format(timeLayout),
"source_path": s.path,
"schema": "gitcrawl-portable-sync-v2",
"body_chars": fmt.Sprintf("%d", bodyChars),
"capabilities": "body_excerpts,comment_excerpts,pr_details,pr_files,pr_commits,pr_checks,workflow_runs,raw_json_stripped",
"includes": "repositories,threads,comments,pull_request_details,pull_request_files,pull_request_commits,pull_request_checks,github_workflow_runs,thread_fingerprints",
"excluded": "raw_json,documents,fts,vectors,code_snapshots,cluster_events,run_history,similarity_edges,blobs",
"exported_at": time.Now().UTC().Format(timeLayout),
"source_path": s.path,
}
for key, value := range metadata {
if _, err := s.db.ExecContext(ctx, `
@ -198,6 +213,59 @@ func (s *Store) canonicalizePortableSchema(ctx context.Context, bodyChars int, s
return nil
}
func (s *Store) ensurePortableExcerptColumns(ctx context.Context, table string) error {
if !s.hasColumn(ctx, table, "body_excerpt") {
if _, err := s.db.ExecContext(ctx, `alter table `+sqliteIdentifier(table)+` add column body_excerpt text`); err != nil {
return fmt.Errorf("add portable %s.body_excerpt: %w", table, err)
}
}
if !s.hasColumn(ctx, table, "body_length") {
if _, err := s.db.ExecContext(ctx, `alter table `+sqliteIdentifier(table)+` add column body_length integer not null default 0`); err != nil {
return fmt.Errorf("add portable %s.body_length: %w", table, err)
}
}
return nil
}
func (s *Store) clearPortableRawJSON(ctx context.Context) (int64, error) {
var total int64
for _, column := range []struct {
table string
name string
}{
{table: "comments", name: "raw_json"},
{table: "pull_request_details", name: "raw_json"},
{table: "pull_request_files", name: "raw_json"},
{table: "pull_request_commits", name: "raw_json"},
{table: "pull_request_checks", name: "raw_json"},
{table: "github_workflow_runs", name: "raw_json"},
} {
if !s.hasColumn(ctx, column.table, column.name) {
continue
}
result, err := s.db.ExecContext(ctx, `update `+sqliteIdentifier(column.table)+` set `+sqliteIdentifier(column.name)+` = '' where `+sqliteIdentifier(column.name)+` is not null and `+sqliteIdentifier(column.name)+` != ''`)
if err != nil {
return total, fmt.Errorf("clear portable raw json %s.%s: %w", column.table, column.name, err)
}
total += rowsAffected(result)
}
for _, column := range []struct {
table string
name string
}{
{table: "comments", name: "raw_json_blob_id"},
{table: "thread_revisions", name: "raw_json_blob_id"},
} {
if !s.hasColumn(ctx, column.table, column.name) {
continue
}
if _, err := s.db.ExecContext(ctx, `update `+sqliteIdentifier(column.table)+` set `+sqliteIdentifier(column.name)+` = null where `+sqliteIdentifier(column.name)+` is not null`); err != nil {
return total, fmt.Errorf("clear portable raw blob pointer %s.%s: %w", column.table, column.name, err)
}
}
return total, nil
}
func canonicalPortableDroppedTables() []string {
return []string{
"documents_fts",
@ -205,7 +273,6 @@ func canonicalPortableDroppedTables() []string {
"documents_fts_data",
"documents_fts_docsize",
"documents_fts_idx",
"comments",
"documents",
"document_embeddings",
"document_summaries",

View File

@ -0,0 +1,358 @@
package store
import (
"context"
"database/sql"
"fmt"
"strings"
)
type PullRequestDetail struct {
ThreadID int64 `json:"thread_id"`
RepoID int64 `json:"repo_id"`
Number int `json:"number"`
BaseSHA string `json:"base_sha,omitempty"`
HeadSHA string `json:"head_sha,omitempty"`
HeadRef string `json:"head_ref,omitempty"`
HeadRepoFullName string `json:"head_repo_full_name,omitempty"`
MergeableState string `json:"mergeable_state,omitempty"`
Additions int `json:"additions"`
Deletions int `json:"deletions"`
ChangedFiles int `json:"changed_files"`
RawJSON string `json:"raw_json,omitempty"`
FetchedAt string `json:"fetched_at"`
UpdatedAt string `json:"updated_at"`
}
type PullRequestFile struct {
ThreadID int64 `json:"thread_id"`
Path string `json:"path"`
Status string `json:"status,omitempty"`
Additions int `json:"additions"`
Deletions int `json:"deletions"`
Changes int `json:"changes"`
PreviousPath string `json:"previous_path,omitempty"`
Patch string `json:"patch,omitempty"`
RawJSON string `json:"raw_json,omitempty"`
FetchedAt string `json:"fetched_at"`
}
type PullRequestCommit struct {
ThreadID int64 `json:"thread_id"`
SHA string `json:"sha"`
Message string `json:"message,omitempty"`
AuthorLogin string `json:"author_login,omitempty"`
AuthorName string `json:"author_name,omitempty"`
CommittedAt string `json:"committed_at,omitempty"`
HTMLURL string `json:"html_url,omitempty"`
RawJSON string `json:"raw_json,omitempty"`
FetchedAt string `json:"fetched_at"`
}
type PullRequestCheck struct {
ID int64 `json:"id"`
ThreadID int64 `json:"thread_id"`
Name string `json:"name"`
Status string `json:"status,omitempty"`
Conclusion string `json:"conclusion,omitempty"`
DetailsURL string `json:"details_url,omitempty"`
WorkflowName string `json:"workflow_name,omitempty"`
StartedAt string `json:"started_at,omitempty"`
CompletedAt string `json:"completed_at,omitempty"`
RawJSON string `json:"raw_json,omitempty"`
FetchedAt string `json:"fetched_at"`
}
type WorkflowRun struct {
RepoID int64 `json:"repo_id"`
RunID string `json:"run_id"`
RunNumber int `json:"run_number"`
HeadBranch string `json:"head_branch,omitempty"`
HeadSHA string `json:"head_sha,omitempty"`
Status string `json:"status,omitempty"`
Conclusion string `json:"conclusion,omitempty"`
WorkflowName string `json:"workflow_name,omitempty"`
Event string `json:"event,omitempty"`
HTMLURL string `json:"html_url,omitempty"`
CreatedAtGH string `json:"created_at_gh,omitempty"`
UpdatedAtGH string `json:"updated_at_gh,omitempty"`
RawJSON string `json:"raw_json,omitempty"`
FetchedAt string `json:"fetched_at"`
}
type PullRequestCache struct {
Detail PullRequestDetail `json:"detail"`
Files []PullRequestFile `json:"files"`
Commits []PullRequestCommit `json:"commits"`
Checks []PullRequestCheck `json:"checks"`
}
func (s *Store) UpsertPullRequestCache(ctx context.Context, detail PullRequestDetail, files []PullRequestFile, commits []PullRequestCommit, checks []PullRequestCheck, runs []WorkflowRun) error {
if s.queries != nil {
return s.upsertPullRequestCache(ctx, detail, files, commits, checks, runs)
}
return s.WithTx(ctx, func(tx *Store) error {
return tx.upsertPullRequestCache(ctx, detail, files, commits, checks, runs)
})
}
func (s *Store) upsertPullRequestCache(ctx context.Context, detail PullRequestDetail, files []PullRequestFile, commits []PullRequestCommit, checks []PullRequestCheck, runs []WorkflowRun) error {
if _, err := s.q().ExecContext(ctx, `
insert into pull_request_details(thread_id, repo_id, number, base_sha, head_sha, head_ref, head_repo_full_name, mergeable_state, additions, deletions, changed_files, raw_json, fetched_at, updated_at)
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(thread_id) do update set
repo_id=excluded.repo_id,
number=excluded.number,
base_sha=excluded.base_sha,
head_sha=excluded.head_sha,
head_ref=excluded.head_ref,
head_repo_full_name=excluded.head_repo_full_name,
mergeable_state=excluded.mergeable_state,
additions=excluded.additions,
deletions=excluded.deletions,
changed_files=excluded.changed_files,
raw_json=excluded.raw_json,
fetched_at=excluded.fetched_at,
updated_at=excluded.updated_at
`, detail.ThreadID, detail.RepoID, detail.Number, nullString(detail.BaseSHA), nullString(detail.HeadSHA), nullString(detail.HeadRef), nullString(detail.HeadRepoFullName), nullString(detail.MergeableState), detail.Additions, detail.Deletions, detail.ChangedFiles, detail.RawJSON, detail.FetchedAt, detail.UpdatedAt); err != nil {
return fmt.Errorf("upsert pull request detail: %w", err)
}
if _, err := s.q().ExecContext(ctx, `delete from pull_request_files where thread_id = ?`, detail.ThreadID); err != nil {
return fmt.Errorf("clear pull request files: %w", err)
}
for _, file := range files {
if _, err := s.q().ExecContext(ctx, `
insert into pull_request_files(thread_id, path, status, additions, deletions, changes, previous_path, patch, raw_json, fetched_at)
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`, detail.ThreadID, file.Path, nullString(file.Status), file.Additions, file.Deletions, file.Changes, nullString(file.PreviousPath), nullString(file.Patch), file.RawJSON, file.FetchedAt); err != nil {
return fmt.Errorf("upsert pull request file: %w", err)
}
}
if _, err := s.q().ExecContext(ctx, `delete from pull_request_commits where thread_id = ?`, detail.ThreadID); err != nil {
return fmt.Errorf("clear pull request commits: %w", err)
}
for _, commit := range commits {
if _, err := s.q().ExecContext(ctx, `
insert into pull_request_commits(thread_id, sha, message, author_login, author_name, committed_at, html_url, raw_json, fetched_at)
values(?, ?, ?, ?, ?, ?, ?, ?, ?)
`, detail.ThreadID, commit.SHA, nullString(commit.Message), nullString(commit.AuthorLogin), nullString(commit.AuthorName), nullString(commit.CommittedAt), nullString(commit.HTMLURL), commit.RawJSON, commit.FetchedAt); err != nil {
return fmt.Errorf("upsert pull request commit: %w", err)
}
}
if _, err := s.q().ExecContext(ctx, `delete from pull_request_checks where thread_id = ?`, detail.ThreadID); err != nil {
return fmt.Errorf("clear pull request checks: %w", err)
}
for _, check := range checks {
if _, err := s.q().ExecContext(ctx, `
insert into pull_request_checks(thread_id, name, status, conclusion, details_url, workflow_name, started_at, completed_at, raw_json, fetched_at)
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`, detail.ThreadID, check.Name, nullString(check.Status), nullString(check.Conclusion), nullString(check.DetailsURL), nullString(check.WorkflowName), nullString(check.StartedAt), nullString(check.CompletedAt), check.RawJSON, check.FetchedAt); err != nil {
return fmt.Errorf("upsert pull request check: %w", err)
}
}
for _, run := range runs {
if _, err := s.q().ExecContext(ctx, `
insert into github_workflow_runs(repo_id, run_id, run_number, head_branch, head_sha, status, conclusion, workflow_name, event, html_url, created_at_gh, updated_at_gh, raw_json, fetched_at)
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(repo_id, run_id) do update set
run_number=excluded.run_number,
head_branch=excluded.head_branch,
head_sha=excluded.head_sha,
status=excluded.status,
conclusion=excluded.conclusion,
workflow_name=excluded.workflow_name,
event=excluded.event,
html_url=excluded.html_url,
created_at_gh=excluded.created_at_gh,
updated_at_gh=excluded.updated_at_gh,
raw_json=excluded.raw_json,
fetched_at=excluded.fetched_at
`, run.RepoID, run.RunID, run.RunNumber, nullString(run.HeadBranch), nullString(run.HeadSHA), nullString(run.Status), nullString(run.Conclusion), nullString(run.WorkflowName), nullString(run.Event), nullString(run.HTMLURL), nullString(run.CreatedAtGH), nullString(run.UpdatedAtGH), run.RawJSON, run.FetchedAt); err != nil {
return fmt.Errorf("upsert workflow run: %w", err)
}
}
return nil
}
func (s *Store) PullRequestCache(ctx context.Context, repoID int64, number int) (PullRequestCache, error) {
var cache PullRequestCache
var baseSHA, headSHA, headRef, headRepo, mergeable sql.NullString
err := s.q().QueryRowContext(ctx, `
select thread_id, repo_id, number, base_sha, head_sha, head_ref, head_repo_full_name, mergeable_state, additions, deletions, changed_files, raw_json, fetched_at, updated_at
from pull_request_details
where repo_id = ? and number = ?
`, repoID, number).Scan(&cache.Detail.ThreadID, &cache.Detail.RepoID, &cache.Detail.Number, &baseSHA, &headSHA, &headRef, &headRepo, &mergeable, &cache.Detail.Additions, &cache.Detail.Deletions, &cache.Detail.ChangedFiles, &cache.Detail.RawJSON, &cache.Detail.FetchedAt, &cache.Detail.UpdatedAt)
if err != nil {
return PullRequestCache{}, fmt.Errorf("pull request detail: %w", err)
}
cache.Detail.BaseSHA = baseSHA.String
cache.Detail.HeadSHA = headSHA.String
cache.Detail.HeadRef = headRef.String
cache.Detail.HeadRepoFullName = headRepo.String
cache.Detail.MergeableState = mergeable.String
files, err := s.PullRequestFiles(ctx, cache.Detail.ThreadID)
if err != nil {
return PullRequestCache{}, err
}
cache.Files = files
commits, err := s.PullRequestCommits(ctx, cache.Detail.ThreadID)
if err != nil {
return PullRequestCache{}, err
}
cache.Commits = commits
checks, err := s.PullRequestChecks(ctx, cache.Detail.ThreadID)
if err != nil {
return PullRequestCache{}, err
}
cache.Checks = checks
return cache, nil
}
func (s *Store) PullRequestFiles(ctx context.Context, threadID int64) ([]PullRequestFile, error) {
rows, err := s.q().QueryContext(ctx, `
select thread_id, path, status, additions, deletions, changes, previous_path, patch, raw_json, fetched_at
from pull_request_files
where thread_id = ?
order by path
`, threadID)
if err != nil {
return nil, fmt.Errorf("list pull request files: %w", err)
}
defer rows.Close()
var out []PullRequestFile
for rows.Next() {
var file PullRequestFile
var status, previousPath, patch sql.NullString
if err := rows.Scan(&file.ThreadID, &file.Path, &status, &file.Additions, &file.Deletions, &file.Changes, &previousPath, &patch, &file.RawJSON, &file.FetchedAt); err != nil {
return nil, fmt.Errorf("scan pull request file: %w", err)
}
file.Status = status.String
file.PreviousPath = previousPath.String
file.Patch = patch.String
out = append(out, file)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterate pull request files: %w", err)
}
return out, nil
}
func (s *Store) PullRequestCommits(ctx context.Context, threadID int64) ([]PullRequestCommit, error) {
rows, err := s.q().QueryContext(ctx, `
select thread_id, sha, message, author_login, author_name, committed_at, html_url, raw_json, fetched_at
from pull_request_commits
where thread_id = ?
order by rowid
`, threadID)
if err != nil {
return nil, fmt.Errorf("list pull request commits: %w", err)
}
defer rows.Close()
var out []PullRequestCommit
for rows.Next() {
var commit PullRequestCommit
var message, authorLogin, authorName, committedAt, htmlURL sql.NullString
if err := rows.Scan(&commit.ThreadID, &commit.SHA, &message, &authorLogin, &authorName, &committedAt, &htmlURL, &commit.RawJSON, &commit.FetchedAt); err != nil {
return nil, fmt.Errorf("scan pull request commit: %w", err)
}
commit.Message = message.String
commit.AuthorLogin = authorLogin.String
commit.AuthorName = authorName.String
commit.CommittedAt = committedAt.String
commit.HTMLURL = htmlURL.String
out = append(out, commit)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterate pull request commits: %w", err)
}
return out, nil
}
func (s *Store) PullRequestChecks(ctx context.Context, threadID int64) ([]PullRequestCheck, error) {
rows, err := s.q().QueryContext(ctx, `
select id, thread_id, name, status, conclusion, details_url, workflow_name, started_at, completed_at, raw_json, fetched_at
from pull_request_checks
where thread_id = ?
order by name
`, threadID)
if err != nil {
return nil, fmt.Errorf("list pull request checks: %w", err)
}
defer rows.Close()
var out []PullRequestCheck
for rows.Next() {
var check PullRequestCheck
var status, conclusion, detailsURL, workflowName, startedAt, completedAt sql.NullString
if err := rows.Scan(&check.ID, &check.ThreadID, &check.Name, &status, &conclusion, &detailsURL, &workflowName, &startedAt, &completedAt, &check.RawJSON, &check.FetchedAt); err != nil {
return nil, fmt.Errorf("scan pull request check: %w", err)
}
check.Status = status.String
check.Conclusion = conclusion.String
check.DetailsURL = detailsURL.String
check.WorkflowName = workflowName.String
check.StartedAt = startedAt.String
check.CompletedAt = completedAt.String
out = append(out, check)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterate pull request checks: %w", err)
}
return out, nil
}
type WorkflowRunListOptions struct {
Branch string
HeadSHA string
Limit int
}
func (s *Store) ListWorkflowRuns(ctx context.Context, repoID int64, options WorkflowRunListOptions) ([]WorkflowRun, error) {
where := []string{"repo_id = ?"}
args := []any{repoID}
if options.Branch != "" {
where = append(where, "head_branch = ?")
args = append(args, options.Branch)
}
if options.HeadSHA != "" {
where = append(where, "head_sha = ?")
args = append(args, options.HeadSHA)
}
limit := options.Limit
if limit <= 0 {
limit = 20
}
args = append(args, limit)
rows, err := s.q().QueryContext(ctx, `
select repo_id, run_id, run_number, head_branch, head_sha, status, conclusion, workflow_name, event, html_url, created_at_gh, updated_at_gh, raw_json, fetched_at
from github_workflow_runs
where `+strings.Join(where, " and ")+`
order by updated_at_gh desc, run_id desc
limit ?
`, args...)
if err != nil {
return nil, fmt.Errorf("list workflow runs: %w", err)
}
defer rows.Close()
var out []WorkflowRun
for rows.Next() {
var run WorkflowRun
var branch, sha, status, conclusion, workflowName, event, htmlURL, createdAt, updatedAt sql.NullString
if err := rows.Scan(&run.RepoID, &run.RunID, &run.RunNumber, &branch, &sha, &status, &conclusion, &workflowName, &event, &htmlURL, &createdAt, &updatedAt, &run.RawJSON, &run.FetchedAt); err != nil {
return nil, fmt.Errorf("scan workflow run: %w", err)
}
run.HeadBranch = branch.String
run.HeadSHA = sha.String
run.Status = status.String
run.Conclusion = conclusion.String
run.WorkflowName = workflowName.String
run.Event = event.String
run.HTMLURL = htmlURL.String
run.CreatedAtGH = createdAt.String
run.UpdatedAtGH = updatedAt.String
out = append(out, run)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterate workflow runs: %w", err)
}
return out, nil
}

View File

@ -0,0 +1,87 @@
package store
import (
"context"
"path/filepath"
"testing"
)
func TestPullRequestCacheRoundTripAndWorkflowFilters(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
repoID, threadIDs := seedVectorThreads(t, ctx, st)
threadID := threadIDs[1]
fetchedAt := "2026-05-05T10:00:00Z"
detail := PullRequestDetail{
ThreadID: threadID, RepoID: repoID, Number: 302,
BaseSHA: "base", HeadSHA: "head", HeadRef: "feature/cache", HeadRepoFullName: "openclaw/gitcrawl-fork",
MergeableState: "clean", Additions: 12, Deletions: 3, ChangedFiles: 2,
RawJSON: "{}", FetchedAt: fetchedAt, UpdatedAt: "2026-05-05T09:59:00Z",
}
files := []PullRequestFile{
{Path: "z.go", Status: "modified", Additions: 2, Deletions: 1, Changes: 3, Patch: "@@", RawJSON: "{}", FetchedAt: fetchedAt},
{Path: "a.go", Status: "renamed", Additions: 10, Changes: 10, PreviousPath: "old.go", RawJSON: "{}", FetchedAt: fetchedAt},
}
commits := []PullRequestCommit{
{SHA: "abc", Message: "feat: cache", AuthorLogin: "alice", AuthorName: "Alice", CommittedAt: "2026-05-05T08:00:00Z", HTMLURL: "https://example.invalid/commit/abc", RawJSON: "{}", FetchedAt: fetchedAt},
}
checks := []PullRequestCheck{
{Name: "z-check", Status: "completed", Conclusion: "success", DetailsURL: "https://example.invalid/z", WorkflowName: "CI", StartedAt: "2026-05-05T09:00:00Z", CompletedAt: "2026-05-05T09:05:00Z", RawJSON: "{}", FetchedAt: fetchedAt},
{Name: "a-check", Status: "queued", RawJSON: "{}", FetchedAt: fetchedAt},
}
runs := []WorkflowRun{
{RepoID: repoID, RunID: "100", RunNumber: 7, HeadBranch: "main", HeadSHA: "head", Status: "completed", Conclusion: "success", WorkflowName: "CI", Event: "push", HTMLURL: "https://example.invalid/run/100", CreatedAtGH: "2026-05-05T09:00:00Z", UpdatedAtGH: "2026-05-05T09:05:00Z", RawJSON: "{}", FetchedAt: fetchedAt},
{RepoID: repoID, RunID: "101", RunNumber: 8, HeadBranch: "release", HeadSHA: "other", Status: "in_progress", WorkflowName: "release", Event: "workflow_dispatch", CreatedAtGH: "2026-05-05T09:10:00Z", UpdatedAtGH: "2026-05-05T09:11:00Z", RawJSON: "{}", FetchedAt: fetchedAt},
}
if err := st.UpsertPullRequestCache(ctx, detail, files, commits, checks, runs); err != nil {
t.Fatalf("upsert pr cache: %v", err)
}
cache, err := st.PullRequestCache(ctx, repoID, 302)
if err != nil {
t.Fatalf("pull request cache: %v", err)
}
if cache.Detail.HeadSHA != "head" || cache.Detail.MergeableState != "clean" {
t.Fatalf("detail = %+v", cache.Detail)
}
if len(cache.Files) != 2 || cache.Files[0].Path != "a.go" || cache.Files[0].PreviousPath != "old.go" {
t.Fatalf("files = %+v", cache.Files)
}
if len(cache.Commits) != 1 || cache.Commits[0].SHA != "abc" || cache.Commits[0].AuthorName != "Alice" {
t.Fatalf("commits = %+v", cache.Commits)
}
if len(cache.Checks) != 2 || cache.Checks[0].Name != "a-check" || cache.Checks[1].Conclusion != "success" {
t.Fatalf("checks = %+v", cache.Checks)
}
mainRuns, err := st.ListWorkflowRuns(ctx, repoID, WorkflowRunListOptions{Branch: "main", HeadSHA: "head", Limit: 5})
if err != nil {
t.Fatalf("list filtered runs: %v", err)
}
if len(mainRuns) != 1 || mainRuns[0].RunID != "100" || mainRuns[0].HTMLURL == "" {
t.Fatalf("main runs = %+v", mainRuns)
}
allRuns, err := st.ListWorkflowRuns(ctx, repoID, WorkflowRunListOptions{})
if err != nil {
t.Fatalf("list default runs: %v", err)
}
if len(allRuns) != 2 || allRuns[0].RunID != "101" {
t.Fatalf("all runs = %+v", allRuns)
}
detail.HeadSHA = "head-v2"
if err := st.UpsertPullRequestCache(ctx, detail, files[:1], nil, nil, []WorkflowRun{{RepoID: repoID, RunID: "100", RunNumber: 9, HeadBranch: "main", HeadSHA: "head-v2", Status: "completed", Conclusion: "failure", UpdatedAtGH: "2026-05-05T10:00:00Z", RawJSON: "{}", FetchedAt: fetchedAt}}); err != nil {
t.Fatalf("update pr cache: %v", err)
}
cache, err = st.PullRequestCache(ctx, repoID, 302)
if err != nil {
t.Fatalf("updated pull request cache: %v", err)
}
if cache.Detail.HeadSHA != "head-v2" || len(cache.Files) != 1 || len(cache.Commits) != 0 || len(cache.Checks) != 0 {
t.Fatalf("updated cache = %+v", cache)
}
}

View File

@ -4,6 +4,7 @@ import (
"context"
"database/sql"
"fmt"
"strings"
"time"
)
@ -94,6 +95,50 @@ func (s *Store) LastSuccessfulSyncAt(ctx context.Context, repoID int64) (time.Ti
return parsed, nil
}
func (s *Store) LastSuccessfulListSyncAt(ctx context.Context, repoID int64, state string) (time.Time, error) {
scopes := listSyncScopesForState(state)
if len(scopes) == 0 {
return time.Time{}, nil
}
placeholders := make([]string, len(scopes))
args := make([]any, 0, 1+len(scopes))
args = append(args, repoID)
for i, scope := range scopes {
placeholders[i] = "?"
args = append(args, scope)
}
var lastSync string
err := s.q().QueryRowContext(ctx, `
select coalesce(max(finished_at), '')
from sync_runs
where repo_id = ? and status in ('success', 'completed') and scope in (`+strings.Join(placeholders, ",")+`)
`, args...).Scan(&lastSync)
if err != nil {
return time.Time{}, fmt.Errorf("read last successful list sync: %w", err)
}
if lastSync == "" {
return time.Time{}, nil
}
parsed, err := time.Parse(time.RFC3339Nano, lastSync)
if err != nil {
return time.Time{}, fmt.Errorf("parse last successful list sync %q: %w", lastSync, err)
}
return parsed, nil
}
func listSyncScopesForState(state string) []string {
switch strings.TrimSpace(strings.ToLower(state)) {
case "", "open":
return []string{"open", "all"}
case "closed":
return []string{"closed", "all"}
case "all":
return []string{"all"}
default:
return nil
}
}
func runTable(kind string) (string, error) {
switch kind {
case "sync":

View File

@ -111,3 +111,42 @@ func TestLastSuccessfulSyncAt(t *testing.T) {
t.Fatalf("last sync = %s, want %s", lastSync, want)
}
}
func TestLastSuccessfulListSyncAtIgnoresTargetedRuns(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
repoID, err := st.UpsertRepository(ctx, Repository{
Owner: "openclaw", Name: "gitcrawl", FullName: "openclaw/gitcrawl", RawJSON: "{}", UpdatedAt: "2026-04-26T00:00:00Z",
})
if err != nil {
t.Fatalf("repo: %v", err)
}
if _, err := st.RecordRun(ctx, RunRecord{
RepoID: repoID, Kind: "sync", Scope: "numbers:13", Status: "success",
StartedAt: "2026-04-26T00:03:00Z", FinishedAt: "2026-04-26T00:03:30Z",
}); err != nil {
t.Fatalf("record targeted run: %v", err)
}
if lastSync, err := st.LastSuccessfulListSyncAt(ctx, repoID, "open"); err != nil || !lastSync.IsZero() {
t.Fatalf("targeted run should not count as broad list sync: last=%s err=%v", lastSync, err)
}
if _, err := st.RecordRun(ctx, RunRecord{
RepoID: repoID, Kind: "sync", Scope: "all", Status: "success",
StartedAt: "2026-04-26T00:04:00Z", FinishedAt: "2026-04-26T00:04:30Z",
}); err != nil {
t.Fatalf("record all run: %v", err)
}
lastSync, err := st.LastSuccessfulListSyncAt(ctx, repoID, "open")
if err != nil {
t.Fatalf("last broad sync: %v", err)
}
want, _ := time.Parse(time.RFC3339Nano, "2026-04-26T00:04:30Z")
if !lastSync.Equal(want) {
t.Fatalf("last broad sync = %s, want %s", lastSync, want)
}
}

View File

@ -122,6 +122,84 @@ create table if not exists thread_hunk_signatures (
unique(snapshot_id, path, hunk_hash)
);
create table if not exists pull_request_details (
thread_id integer primary key references threads(id) on delete cascade,
repo_id integer not null references repositories(id) on delete cascade,
number integer not null,
base_sha text,
head_sha text,
head_ref text,
head_repo_full_name text,
mergeable_state text,
additions integer not null default 0,
deletions integer not null default 0,
changed_files integer not null default 0,
raw_json text not null,
fetched_at text not null,
updated_at text not null,
unique(repo_id, number)
);
create table if not exists pull_request_files (
thread_id integer not null references threads(id) on delete cascade,
path text not null,
status text,
additions integer not null default 0,
deletions integer not null default 0,
changes integer not null default 0,
previous_path text,
patch text,
raw_json text not null,
fetched_at text not null,
primary key(thread_id, path)
);
create table if not exists pull_request_commits (
thread_id integer not null references threads(id) on delete cascade,
sha text not null,
message text,
author_login text,
author_name text,
committed_at text,
html_url text,
raw_json text not null,
fetched_at text not null,
primary key(thread_id, sha)
);
create table if not exists pull_request_checks (
id integer primary key,
thread_id integer not null references threads(id) on delete cascade,
name text not null,
status text,
conclusion text,
details_url text,
workflow_name text,
started_at text,
completed_at text,
raw_json text not null,
fetched_at text not null,
unique(thread_id, name, details_url)
);
create table if not exists github_workflow_runs (
repo_id integer not null references repositories(id) on delete cascade,
run_id text not null,
run_number integer not null default 0,
head_branch text,
head_sha text,
status text,
conclusion text,
workflow_name text,
event text,
html_url text,
created_at_gh text,
updated_at_gh text,
raw_json text not null,
fetched_at text not null,
primary key(repo_id, run_id)
);
create table if not exists documents (
id integer primary key,
thread_id integer not null unique references threads(id) on delete cascade,
@ -391,6 +469,11 @@ create index if not exists idx_threads_repo_updated on threads(repo_id, updated_
create index if not exists idx_comments_thread_type on comments(thread_id, comment_type);
create index if not exists idx_thread_revisions_thread_created on thread_revisions(thread_id, created_at);
create index if not exists idx_thread_changed_files_path on thread_changed_files(path);
create index if not exists idx_pull_request_details_repo_number on pull_request_details(repo_id, number);
create index if not exists idx_pull_request_files_path on pull_request_files(path);
create index if not exists idx_pull_request_checks_thread_status on pull_request_checks(thread_id, status, conclusion);
create index if not exists idx_github_workflow_runs_repo_branch on github_workflow_runs(repo_id, head_branch, run_id);
create index if not exists idx_github_workflow_runs_repo_sha on github_workflow_runs(repo_id, head_sha, run_id);
create index if not exists idx_thread_fingerprints_hash on thread_fingerprints(fingerprint_hash);
create index if not exists idx_thread_vectors_basis_model on thread_vectors(basis, model);
create index if not exists idx_sync_runs_repo_status_id on sync_runs(repo_id, status, id);

View File

@ -24,6 +24,9 @@ type ThreadSearchOptions struct {
Query string
Kind string
State string
Author string
Assignee string
Labels []string
IncludeLocallyClosed bool
Limit int
}
@ -192,6 +195,29 @@ func threadSearchWhere(options ThreadSearchOptions) ([]string, []any) {
where = append(where, `t.state = ?`)
args = append(args, strings.TrimSpace(options.State))
}
if author := strings.TrimSpace(options.Author); author != "" {
where = append(where, `lower(coalesce(t.author_login, '')) = lower(?)`)
args = append(args, author)
}
if assignee := strings.TrimSpace(options.Assignee); assignee != "" {
where = append(where, `exists (
select 1
from json_each(case when json_valid(t.assignees_json) then t.assignees_json else '[]' end) a
where lower(case when json_valid(a.value) then coalesce(json_extract(a.value, '$.login'), a.value) else a.value end) = lower(?)
)`)
args = append(args, assignee)
}
for _, label := range options.Labels {
if label = strings.TrimSpace(label); label == "" {
continue
}
where = append(where, `exists (
select 1
from json_each(case when json_valid(t.labels_json) then t.labels_json else '[]' end) l
where lower(case when json_valid(l.value) then coalesce(json_extract(l.value, '$.name'), l.value) else l.value end) = lower(?)
)`)
args = append(args, label)
}
if !options.IncludeLocallyClosed {
where = append(where, `t.closed_at_local is null`)
}

View File

@ -73,6 +73,52 @@ func TestSearchDocumentsEscapesFTSQuery(t *testing.T) {
}
}
func TestSearchThreadsFiltersAuthorAssigneeAndLabels(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
repoID, err := st.UpsertRepository(ctx, Repository{Owner: "openclaw", Name: "gitcrawl", FullName: "openclaw/gitcrawl", RawJSON: "{}", UpdatedAt: "2026-04-26T00:00:00Z"})
if err != nil {
t.Fatalf("repo: %v", err)
}
threads := []Thread{
{
RepoID: repoID, GitHubID: "3", Number: 3, Kind: "issue", State: "open",
Title: "cache bug", AuthorLogin: "alice", HTMLURL: "https://github.com/openclaw/gitcrawl/issues/3",
LabelsJSON: `[{"name":"bug"},{"name":"cache"}]`, AssigneesJSON: `[{"login":"peter"}]`, RawJSON: "{}", ContentHash: "hash-3", UpdatedAt: "2026-04-26T03:00:00Z",
},
{
RepoID: repoID, GitHubID: "4", Number: 4, Kind: "issue", State: "open",
Title: "ui bug", AuthorLogin: "bob", HTMLURL: "https://github.com/openclaw/gitcrawl/issues/4",
LabelsJSON: `["bug"]`, AssigneesJSON: `["alice"]`, RawJSON: "{}", ContentHash: "hash-4", UpdatedAt: "2026-04-26T04:00:00Z",
},
}
for _, thread := range threads {
if _, err := st.UpsertThread(ctx, thread); err != nil {
t.Fatalf("thread %d: %v", thread.Number, err)
}
}
rows, err := st.SearchThreads(ctx, ThreadSearchOptions{RepoID: repoID, Kind: "issue", State: "open", Author: "alice", Assignee: "peter", Labels: []string{"cache"}, Limit: 10})
if err != nil {
t.Fatalf("search: %v", err)
}
if len(rows) != 1 || rows[0].Number != 3 {
t.Fatalf("rows = %#v", rows)
}
rows, err = st.SearchThreads(ctx, ThreadSearchOptions{RepoID: repoID, Kind: "issue", State: "open", Assignee: "alice", Labels: []string{"bug"}, Limit: 10})
if err != nil {
t.Fatalf("search string arrays: %v", err)
}
if len(rows) != 1 || rows[0].Number != 4 {
t.Fatalf("string-array rows = %#v", rows)
}
}
func TestSearchThreadsSupportsPortableSchema(t *testing.T) {
ctx := context.Background()
dbPath := filepath.Join(t.TempDir(), "portable.sync.db")

View File

@ -4,12 +4,9 @@ import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"runtime"
"time"
_ "modernc.org/sqlite"
crawlstore "github.com/vincentkoc/crawlkit/store"
)
const (
@ -39,64 +36,33 @@ type Status struct {
}
func Open(ctx context.Context, path string) (*Store, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("create db dir: %w", err)
}
if err := ensureDBFile(path); err != nil {
return nil, err
}
dsn := fmt.Sprintf(
"file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path})
if err != nil {
return nil, fmt.Errorf("open sqlite: %w", err)
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite: %w", err)
}
if err := tightenDBFilePerms(path); err != nil {
_ = db.Close()
return nil, err
}
db := base.DB()
st := &Store{db: db, path: path}
if err := st.migrate(ctx); err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
return st, nil
}
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
if _, err := os.Stat(path); err != nil {
return nil, fmt.Errorf("stat db file: %w", err)
}
dsn := fmt.Sprintf(
"file:%s?mode=ro&_pragma=query_only(1)&_pragma=foreign_keys(1)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.OpenReadOnly(ctx, path)
if err != nil {
return nil, fmt.Errorf("open sqlite readonly: %w", err)
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite readonly: %w", err)
return nil, err
}
db := base.DB()
st := &Store{db: db, path: path}
current, err := st.schemaVersion(ctx)
if err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
if current > schemaVersion {
_ = db.Close()
_ = base.Close()
return nil, fmt.Errorf("database schema version %d is newer than supported version %d", current, schemaVersion)
}
return st, nil
@ -273,31 +239,3 @@ func (s *Store) schemaVersion(ctx context.Context) (int, error) {
}
return version, nil
}
func ensureDBFile(path string) error {
if _, err := os.Stat(path); err == nil {
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat db file: %w", err)
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("create db file: %w", err)
}
if file != nil {
if err := file.Close(); err != nil {
return fmt.Errorf("close db file: %w", err)
}
}
return nil
}
func tightenDBFilePerms(path string) error {
if runtime.GOOS == "windows" {
return nil
}
if err := os.Chmod(path, 0o600); err != nil {
return fmt.Errorf("chmod db file: %w", err)
}
return nil
}

View File

@ -6,6 +6,7 @@ import (
"database/sql"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
@ -456,7 +457,19 @@ func TestPrunePortablePayloads(t *testing.T) {
insert into repositories(id, owner, name, full_name, raw_json, updated_at)
values(1, 'openclaw', 'gitcrawl', 'openclaw/gitcrawl', '{"id":1}', '2026-04-26T00:00:00Z');
insert into threads(id, repo_id, github_id, number, kind, state, title, body, html_url, labels_json, assignees_json, raw_json, content_hash, updated_at)
values(1, 1, '1', 1, 'issue', 'open', 'download stalls', 'abcdefghijklmnopqrstuvwxyz', 'https://github.com/openclaw/gitcrawl/issues/1', '[]', '[]', '{"body":"abcdefghijklmnopqrstuvwxyz"}', 'hash', '2026-04-26T00:00:00Z');
values(1, 1, '1', 1, 'pull_request', 'open', 'download stalls', 'abcdefghijklmnopqrstuvwxyz', 'https://github.com/openclaw/gitcrawl/pull/1', '[]', '[]', '{"body":"abcdefghijklmnopqrstuvwxyz"}', 'hash', '2026-04-26T00:00:00Z');
insert into comments(id, thread_id, github_id, comment_type, author_login, author_type, body, is_bot, raw_json, created_at_gh, updated_at_gh)
values(1, 1, 'c1', 'issue_comment', 'alice', 'User', 'comment abcdefghijklmnopqrstuvwxyz', 0, '{"body":"comment abcdefghijklmnopqrstuvwxyz"}', '2026-04-26T00:00:00Z', '2026-04-26T00:00:00Z');
insert into pull_request_details(thread_id, repo_id, number, base_sha, head_sha, additions, deletions, changed_files, raw_json, fetched_at, updated_at)
values(1, 1, 1, 'base', 'head', 10, 2, 1, '{"mergeable":true}', '2026-04-26T00:00:00Z', '2026-04-26T00:00:00Z');
insert into pull_request_files(thread_id, path, status, additions, deletions, changes, patch, raw_json, fetched_at)
values(1, 'README.md', 'modified', 10, 2, 12, '@@ patch', '{"filename":"README.md"}', '2026-04-26T00:00:00Z');
insert into pull_request_commits(thread_id, sha, message, raw_json, fetched_at)
values(1, 'abc123', 'fix download stall', '{"sha":"abc123"}', '2026-04-26T00:00:00Z');
insert into pull_request_checks(thread_id, name, status, conclusion, details_url, raw_json, fetched_at)
values(1, 'CI', 'completed', 'success', 'https://example.test/check', '{"name":"CI"}', '2026-04-26T00:00:00Z');
insert into github_workflow_runs(repo_id, run_id, run_number, head_branch, head_sha, status, conclusion, workflow_name, html_url, raw_json, fetched_at)
values(1, '99', 99, 'main', 'head', 'completed', 'success', 'CI', 'https://example.test/run', '{"id":99}', '2026-04-26T00:00:00Z');
insert into documents(thread_id, title, body, raw_text, dedupe_text, updated_at)
values(1, 'download stalls', 'abcdefghijklmnopqrstuvwxyz', 'download stalls abcdefghijklmnopqrstuvwxyz', 'download stalls', '2026-04-26T00:00:00Z');
insert into thread_revisions(thread_id, source_updated_at, content_hash, title_hash, body_hash, labels_hash, created_at)
@ -472,7 +485,7 @@ func TestPrunePortablePayloads(t *testing.T) {
if err != nil {
t.Fatalf("prune: %v", err)
}
if stats.DocumentsDeleted != 1 || stats.FingerprintsPruned != 1 {
if stats.DocumentsDeleted != 1 || stats.FingerprintsPruned != 1 || stats.CommentsPruned != 1 || stats.RawJSONPruned == 0 {
t.Fatalf("unexpected stats: %#v", stats)
}
@ -489,12 +502,59 @@ func TestPrunePortablePayloads(t *testing.T) {
if err := st.DB().QueryRowContext(ctx, `select body_excerpt from threads where id = 1`).Scan(&bodyExcerpt); err != nil {
t.Fatalf("thread body excerpt: %v", err)
}
var bodyLength int
if err := st.DB().QueryRowContext(ctx, `select body_length from threads where id = 1`).Scan(&bodyLength); err != nil {
t.Fatalf("thread body length: %v", err)
}
if bodyLength != 26 {
t.Fatalf("thread body_length = %d, want 26", bodyLength)
}
if err := st.DB().QueryRowContext(ctx, `select title_tokens_json, linked_refs_json, module_buckets_json, feature_json from thread_fingerprints where id = 1`).Scan(&titleTokens, &linkedRefs, &buckets, &features); err != nil {
t.Fatalf("fingerprint payload: %v", err)
}
if st.tableExists(ctx, "documents") {
t.Fatal("documents table was not dropped")
}
if !st.tableExists(ctx, "comments") {
t.Fatal("comments table was dropped")
}
var commentBody, commentExcerpt, commentRawJSON string
var commentBodyLength int
if err := st.DB().QueryRowContext(ctx, `select body, body_excerpt, body_length, raw_json from comments where id = 1`).Scan(&commentBody, &commentExcerpt, &commentBodyLength, &commentRawJSON); err != nil {
t.Fatalf("comment portable payload: %v", err)
}
if commentBody != "comment " || commentExcerpt != "comment " || commentBodyLength != 34 || commentRawJSON != "" {
t.Fatalf("comment not pruned: body=%q excerpt=%q length=%d raw=%q", commentBody, commentExcerpt, commentBodyLength, commentRawJSON)
}
var prDetailCount, prFileCount, prCommitCount, prCheckCount, runCount int
if err := st.DB().QueryRowContext(ctx, `select count(*) from pull_request_details where raw_json = ''`).Scan(&prDetailCount); err != nil {
t.Fatalf("pr detail count: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select count(*) from pull_request_files where raw_json = ''`).Scan(&prFileCount); err != nil {
t.Fatalf("pr file count: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select count(*) from pull_request_commits where raw_json = ''`).Scan(&prCommitCount); err != nil {
t.Fatalf("pr commit count: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select count(*) from pull_request_checks where raw_json = ''`).Scan(&prCheckCount); err != nil {
t.Fatalf("pr check count: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select count(*) from github_workflow_runs where raw_json = ''`).Scan(&runCount); err != nil {
t.Fatalf("workflow run count: %v", err)
}
if prDetailCount != 1 || prFileCount != 1 || prCommitCount != 1 || prCheckCount != 1 || runCount != 1 {
t.Fatalf("pr/run rows not retained: detail=%d files=%d commits=%d checks=%d runs=%d", prDetailCount, prFileCount, prCommitCount, prCheckCount, runCount)
}
var portableSchema, capabilities string
if err := st.DB().QueryRowContext(ctx, `select value from portable_metadata where key = 'schema'`).Scan(&portableSchema); err != nil {
t.Fatalf("portable schema metadata: %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select value from portable_metadata where key = 'capabilities'`).Scan(&capabilities); err != nil {
t.Fatalf("portable capabilities metadata: %v", err)
}
if portableSchema != "gitcrawl-portable-sync-v2" || !strings.Contains(capabilities, "comment_excerpts") || !strings.Contains(capabilities, "workflow_runs") {
t.Fatalf("portable metadata schema=%q capabilities=%q", portableSchema, capabilities)
}
if bodyExcerpt != "abcdefgh" || titleTokens != "[]" || linkedRefs != "[]" || buckets != "[]" || features != "{}" {
t.Fatalf("payloads not pruned: bodyExcerpt=%q titleTokens=%q linkedRefs=%q buckets=%q features=%q", bodyExcerpt, titleTokens, linkedRefs, buckets, features)
}

View File

@ -0,0 +1,117 @@
package syncer
import (
"context"
"time"
gh "github.com/openclaw/gitcrawl/internal/github"
"github.com/openclaw/gitcrawl/internal/store"
)
type pullDetailStats struct {
files int
commits int
checks int
runs int
}
func (s *Syncer) syncPullRequestDetails(ctx context.Context, st *store.Store, options Options, thread store.Thread) (pullDetailStats, error) {
fetchedAt := s.now().Format(time.RFC3339Nano)
pull, err := s.client.GetPull(ctx, options.Owner, options.Repo, thread.Number, options.Reporter)
if err != nil {
return pullDetailStats{}, err
}
filesRaw, err := s.client.ListPullFiles(ctx, options.Owner, options.Repo, thread.Number, options.Reporter)
if err != nil {
return pullDetailStats{}, err
}
commitsRaw, err := s.client.ListPullCommits(ctx, options.Owner, options.Repo, thread.Number, options.Reporter)
if err != nil {
return pullDetailStats{}, err
}
headSHA := nestedString(pull, "head", "sha")
var checksRaw []map[string]any
if headSHA != "" {
checksRaw, err = s.client.ListCommitCheckRuns(ctx, options.Owner, options.Repo, headSHA, options.Reporter)
if err != nil {
return pullDetailStats{}, err
}
}
runsRaw, err := s.client.ListWorkflowRuns(ctx, options.Owner, options.Repo, gh.ListWorkflowRunsOptions{HeadSHA: headSHA, Limit: 20}, options.Reporter)
if err != nil {
return pullDetailStats{}, err
}
detail := mapPullDetail(thread, pull, fetchedAt)
files := mapPullFiles(thread.ID, filesRaw, fetchedAt)
commits := mapPullCommits(thread.ID, commitsRaw, fetchedAt)
checks := mapPullChecks(thread.ID, checksRaw, fetchedAt)
runs := mapWorkflowRuns(thread.RepoID, runsRaw, fetchedAt)
if err := st.UpsertPullRequestCache(ctx, detail, files, commits, checks, runs); err != nil {
return pullDetailStats{}, err
}
return pullDetailStats{files: len(files), commits: len(commits), checks: len(checks), runs: len(runs)}, nil
}
func mapPullDetail(thread store.Thread, pull map[string]any, fetchedAt string) store.PullRequestDetail {
return store.PullRequestDetail{
ThreadID: thread.ID,
RepoID: thread.RepoID,
Number: thread.Number,
BaseSHA: nestedString(pull, "base", "sha"),
HeadSHA: nestedString(pull, "head", "sha"),
HeadRef: nestedString(pull, "head", "ref"),
HeadRepoFullName: nestedString(pull, "head", "repo", "full_name"),
MergeableState: stringValue(pull["mergeable_state"]),
Additions: intValue(pull["additions"]),
Deletions: intValue(pull["deletions"]),
ChangedFiles: intValue(pull["changed_files"]),
RawJSON: mustJSON(pull),
FetchedAt: fetchedAt,
UpdatedAt: fetchedAt,
}
}
func mapPullFiles(threadID int64, rows []map[string]any, fetchedAt string) []store.PullRequestFile {
out := make([]store.PullRequestFile, 0, len(rows))
for _, row := range rows {
filename := stringValue(row["filename"])
if filename == "" {
continue
}
out = append(out, store.PullRequestFile{
ThreadID: threadID,
Path: filename,
Status: stringValue(row["status"]),
Additions: intValue(row["additions"]),
Deletions: intValue(row["deletions"]),
Changes: intValue(row["changes"]),
PreviousPath: stringValue(row["previous_filename"]),
Patch: stringValue(row["patch"]),
RawJSON: mustJSON(row),
FetchedAt: fetchedAt,
})
}
return out
}
func mapPullCommits(threadID int64, rows []map[string]any, fetchedAt string) []store.PullRequestCommit {
out := make([]store.PullRequestCommit, 0, len(rows))
for _, row := range rows {
sha := stringValue(row["sha"])
if sha == "" {
continue
}
out = append(out, store.PullRequestCommit{
ThreadID: threadID,
SHA: sha,
Message: nestedString(row, "commit", "message"),
AuthorLogin: nestedString(row, "author", "login"),
AuthorName: nestedString(row, "commit", "author", "name"),
CommittedAt: nestedString(row, "commit", "author", "date"),
HTMLURL: stringValue(row["html_url"]),
RawJSON: mustJSON(row),
FetchedAt: fetchedAt,
})
}
return out
}

View File

@ -0,0 +1,65 @@
package syncer
import "github.com/openclaw/gitcrawl/internal/store"
func mapPullChecks(threadID int64, rows []map[string]any, fetchedAt string) []store.PullRequestCheck {
out := make([]store.PullRequestCheck, 0, len(rows))
for _, row := range rows {
name := stringValue(row["name"])
if name == "" {
continue
}
out = append(out, store.PullRequestCheck{
ThreadID: threadID,
Name: name,
Status: stringValue(row["status"]),
Conclusion: stringValue(row["conclusion"]),
DetailsURL: stringValue(row["details_url"]),
WorkflowName: nestedString(row, "check_suite", "app", "name"),
StartedAt: stringValue(row["started_at"]),
CompletedAt: stringValue(row["completed_at"]),
RawJSON: mustJSON(row),
FetchedAt: fetchedAt,
})
}
return out
}
func mapWorkflowRuns(repoID int64, rows []map[string]any, fetchedAt string) []store.WorkflowRun {
out := make([]store.WorkflowRun, 0, len(rows))
for _, row := range rows {
runID := jsonID(row["id"])
if runID == "" {
continue
}
out = append(out, store.WorkflowRun{
RepoID: repoID,
RunID: runID,
RunNumber: intValue(row["run_number"]),
HeadBranch: stringValue(row["head_branch"]),
HeadSHA: stringValue(row["head_sha"]),
Status: stringValue(row["status"]),
Conclusion: stringValue(row["conclusion"]),
WorkflowName: stringValue(row["name"]),
Event: stringValue(row["event"]),
HTMLURL: stringValue(row["html_url"]),
CreatedAtGH: stringValue(row["created_at"]),
UpdatedAtGH: stringValue(row["updated_at"]),
RawJSON: mustJSON(row),
FetchedAt: fetchedAt,
})
}
return out
}
func nestedString(row map[string]any, path ...string) string {
var current any = row
for _, key := range path {
typed, ok := current.(map[string]any)
if !ok {
return ""
}
current = typed[key]
}
return stringValue(current)
}

View File

@ -6,6 +6,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"log/slog"
"strconv"
"strings"
"time"
@ -13,15 +14,21 @@ import (
"github.com/openclaw/gitcrawl/internal/documents"
gh "github.com/openclaw/gitcrawl/internal/github"
"github.com/openclaw/gitcrawl/internal/store"
"github.com/vincentkoc/crawlkit/progress"
)
type GitHubClient interface {
GetRepo(ctx context.Context, owner, repo string, reporter gh.Reporter) (map[string]any, error)
GetIssue(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error)
GetPull(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error)
ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error)
ListIssueComments(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
ListPullReviews(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
ListPullReviewComments(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
ListPullFiles(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
ListPullCommits(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter gh.Reporter) ([]map[string]any, error)
ListWorkflowRuns(ctx context.Context, owner, repo string, options gh.ListWorkflowRunsOptions, reporter gh.Reporter) ([]map[string]any, error)
}
type Syncer struct {
@ -31,14 +38,16 @@ type Syncer struct {
}
type Options struct {
Owner string
Repo string
State string
Since string
Limit int
Numbers []int
IncludeComments bool
Reporter gh.Reporter
Owner string
Repo string
State string
Since string
Limit int
Numbers []int
IncludeComments bool
IncludePRDetails bool
Reporter gh.Reporter
Logger *slog.Logger
}
type Stats struct {
@ -47,6 +56,11 @@ type Stats struct {
IssuesSynced int `json:"issues_synced"`
PullRequestsSynced int `json:"pull_requests_synced"`
CommentsSynced int `json:"comments_synced"`
PRDetailsSynced int `json:"pr_details_synced"`
PRFilesSynced int `json:"pr_files_synced"`
PRCommitsSynced int `json:"pr_commits_synced"`
PRChecksSynced int `json:"pr_checks_synced"`
WorkflowRunsSynced int `json:"workflow_runs_synced"`
ThreadsClosed int `json:"threads_closed"`
RequestedSince string `json:"requested_since,omitempty"`
Limit int `json:"limit,omitempty"`
@ -121,6 +135,15 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
MetadataOnly: !options.IncludeComments,
StartedAt: started,
}
tracker := progress.New(options.Logger, progress.Options{
Name: "sync",
Unit: "threads",
Total: int64(len(rows)),
Attrs: []any{
"repository", stats.Repository,
"state", state,
},
})
persist := func(st *store.Store) error {
for _, row := range rows {
thread := mapIssueToThread(repoID, row, s.now().Format(time.RFC3339Nano))
@ -138,6 +161,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
}
stats.CommentsSynced += len(comments)
}
if options.IncludePRDetails && thread.Kind == "pull_request" {
detailStats, err := s.syncPullRequestDetails(ctx, st, options, thread)
if err != nil {
return err
}
stats.PRDetailsSynced++
stats.PRFilesSynced += detailStats.files
stats.PRCommitsSynced += detailStats.commits
stats.PRChecksSynced += detailStats.checks
stats.WorkflowRunsSynced += detailStats.runs
}
if _, err := st.UpsertDocument(ctx, documents.BuildWithComments(thread, comments)); err != nil {
return err
}
@ -147,6 +181,11 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
} else {
stats.IssuesSynced++
}
tracker.Add(1,
"number", thread.Number,
"kind", thread.Kind,
"thread_state", thread.State,
)
}
if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 {
closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since)
@ -171,13 +210,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
}
if !options.IncludeComments {
if err := s.store.WithTx(ctx, persist); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}
if err := persist(s.store); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}

View File

@ -1,9 +1,12 @@
package syncer
import (
"bytes"
"context"
"encoding/json"
"log/slog"
"path/filepath"
"strings"
"testing"
"time"
@ -49,6 +52,18 @@ func (fakeGitHub) GetIssue(ctx context.Context, owner, repo string, number int,
}, nil
}
func (fakeGitHub) GetPull(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error) {
return map[string]any{
"number": number,
"head": map[string]any{"sha": "head-sha", "ref": "feature", "repo": map[string]any{"full_name": "openclaw/gitcrawl"}},
"base": map[string]any{"sha": "base-sha"},
"mergeable_state": "clean",
"additions": 12,
"deletions": 3,
"changed_files": 2,
}, nil
}
func (fakeGitHub) ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error) {
if options.State == "closed" {
return nil, nil
@ -105,6 +120,22 @@ func (fakeGitHub) ListPullReviewComments(ctx context.Context, owner, repo string
return nil, nil
}
func (fakeGitHub) ListPullFiles(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) {
return nil, nil
}
func (fakeGitHub) ListPullCommits(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) {
return nil, nil
}
func (fakeGitHub) ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter gh.Reporter) ([]map[string]any, error) {
return nil, nil
}
func (fakeGitHub) ListWorkflowRuns(ctx context.Context, owner, repo string, options gh.ListWorkflowRunsOptions, reporter gh.Reporter) ([]map[string]any, error) {
return nil, nil
}
type sinceCaptureGitHub struct {
fakeGitHub
since string
@ -195,6 +226,59 @@ func (pullCommentGitHub) ListPullReviewComments(ctx context.Context, owner, repo
}}, nil
}
type pullDetailsGitHub struct {
fakeGitHub
}
func (pullDetailsGitHub) ListPullFiles(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) {
return []map[string]any{{
"filename": "internal/cache.go",
"status": "modified",
"additions": 10,
"deletions": 2,
"changes": 12,
"patch": "@@ cache",
}}, nil
}
func (pullDetailsGitHub) ListPullCommits(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) {
return []map[string]any{{
"sha": "commit-sha",
"html_url": "https://github.com/openclaw/gitcrawl/commit/commit-sha",
"author": map[string]any{"login": "alice"},
"commit": map[string]any{
"message": "feat: cache",
"author": map[string]any{"name": "Alice", "date": "2026-04-26T00:00:00Z"},
},
}}, nil
}
func (pullDetailsGitHub) ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter gh.Reporter) ([]map[string]any, error) {
return []map[string]any{{
"name": "test",
"status": "completed",
"conclusion": "success",
"details_url": "https://github.com/openclaw/gitcrawl/actions/runs/99",
"check_suite": map[string]any{"app": map[string]any{"name": "GitHub Actions"}},
}}, nil
}
func (pullDetailsGitHub) ListWorkflowRuns(ctx context.Context, owner, repo string, options gh.ListWorkflowRunsOptions, reporter gh.Reporter) ([]map[string]any, error) {
return []map[string]any{{
"id": 99,
"run_number": 7,
"head_branch": "feature",
"head_sha": options.HeadSHA,
"status": "completed",
"conclusion": "success",
"name": "CI",
"event": "pull_request",
"html_url": "https://github.com/openclaw/gitcrawl/actions/runs/99",
"created_at": "2026-04-26T00:00:00Z",
"updated_at": "2026-04-26T00:01:00Z",
}}, nil
}
func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
ctx := context.Background()
st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
@ -205,7 +289,13 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
s := New(fakeGitHub{}, st)
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", IncludeComments: true})
var progressLogs bytes.Buffer
stats, err := s.Sync(ctx, Options{
Owner: "openclaw",
Repo: "gitcrawl",
IncludeComments: true,
Logger: testProgressLogger(&progressLogs),
})
if err != nil {
t.Fatalf("sync: %v", err)
}
@ -240,6 +330,18 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
if documentCount != 1 {
t.Fatalf("document count: got %d want 1", documentCount)
}
for _, want := range []string{
`msg="sync progress"`,
`state=finished`,
`unit=threads`,
`percent=100.0`,
`completion=100.0%`,
`repository=openclaw/gitcrawl`,
} {
if !strings.Contains(progressLogs.String(), want) {
t.Fatalf("missing %q in progress logs:\n%s", want, progressLogs.String())
}
}
}
func TestSyncHydratesPullReviewComments(t *testing.T) {
@ -271,6 +373,42 @@ func TestSyncHydratesPullReviewComments(t *testing.T) {
}
}
func TestSyncHydratesPullRequestDetails(t *testing.T) {
ctx := context.Background()
st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
s := New(pullDetailsGitHub{}, st)
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", Numbers: []int{8}, IncludePRDetails: true})
if err != nil {
t.Fatalf("sync: %v", err)
}
if stats.PRDetailsSynced != 1 || stats.PRFilesSynced != 1 || stats.PRCommitsSynced != 1 || stats.PRChecksSynced != 1 || stats.WorkflowRunsSynced != 1 {
t.Fatalf("stats = %#v", stats)
}
repo, err := st.RepositoryByFullName(ctx, "openclaw/gitcrawl")
if err != nil {
t.Fatalf("repo: %v", err)
}
cache, err := st.PullRequestCache(ctx, repo.ID, 8)
if err != nil {
t.Fatalf("pr cache: %v", err)
}
if cache.Detail.HeadSHA != "head-sha" || len(cache.Files) != 1 || len(cache.Commits) != 1 || len(cache.Checks) != 1 {
t.Fatalf("cache = %+v", cache)
}
runs, err := st.ListWorkflowRuns(ctx, repo.ID, store.WorkflowRunListOptions{HeadSHA: "head-sha", Limit: 10})
if err != nil {
t.Fatalf("workflow runs: %v", err)
}
if len(runs) != 1 || runs[0].RunID != "99" {
t.Fatalf("runs = %+v", runs)
}
}
func TestSyncCanTargetIssueNumbers(t *testing.T) {
ctx := context.Background()
st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
@ -527,3 +665,51 @@ func TestMappingHelperBranches(t *testing.T) {
t.Fatalf("comment = %+v", comment)
}
}
func TestMappingFallbackBranches(t *testing.T) {
now := time.Date(2026, 5, 5, 12, 0, 0, 123, time.UTC)
normalized, err := normalizeSince("2026-05-05T12:00:00+02:00", now)
if err != nil {
t.Fatalf("normalize iso since: %v", err)
}
if normalized != "2026-05-05T10:00:00Z" {
t.Fatalf("normalized iso since = %q", normalized)
}
if got, err := normalizeSince("2w", now); err != nil || got != "2026-04-21T12:00:00.000000123Z" {
t.Fatalf("normalize weeks = %q, %v", got, err)
}
if got := mustJSON(map[string]any{"bad": make(chan int)}); got != "{}" {
t.Fatalf("mustJSON marshal fallback = %q", got)
}
thread := mapIssueToThread(99, map[string]any{
"id": int64(123),
"number": 456,
"state": "closed",
"title": "fallbacks",
"body": "body",
"html_url": "https://github.com/openclaw/gitcrawl/issues/456",
"labels": nil,
"assignees": nil,
"created_at": "2026-05-05T10:00:00Z",
"updated_at": "2026-05-05T11:00:00Z",
"closed_at": "2026-05-05T12:00:00Z",
}, "2026-05-05T12:00:00Z")
if thread.LabelsJSON != "[]" || thread.AssigneesJSON != "[]" {
t.Fatalf("nullable label defaults: labels=%s assignees=%s", thread.LabelsJSON, thread.AssigneesJSON)
}
if thread.GitHubID != "123" || thread.Number != 456 || thread.AuthorLogin != "" || thread.ClosedAtGitHub == "" {
t.Fatalf("thread = %+v", thread)
}
}
func testProgressLogger(out *bytes.Buffer) *slog.Logger {
return slog.New(slog.NewTextHandler(out, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}

652
scripts/build-docs-site.mjs Normal file
View File

@ -0,0 +1,652 @@
#!/usr/bin/env node
import fs from "node:fs";
import path from "node:path";
import { css, faviconSvg, js, themeInitJs, themeToggleHtml } from "./docs-site-assets.mjs";
const root = process.cwd();
const docsDir = path.join(root, "docs");
const outDir = path.join(root, "dist", "docs-site");
const repoBase = "https://github.com/openclaw/gitcrawl";
const repoEditBase = `${repoBase}/edit/main/docs`;
const cname = readCname();
const siteBase = cname ? `https://${cname}` : "";
const sections = [
["Start", ["index.md", "installation.md", "quickstart.md", "concepts.md"]],
["Configure", ["configuration.md", "sync.md", "refresh-and-embed.md"]],
["Use", ["search.md", "clustering.md", "governance.md", "tui.md", "gh-shim.md"]],
["Operate", ["portable-stores.md", "automation.md"]],
["Reference", ["commands.md", "reference.md"]],
];
fs.rmSync(outDir, { recursive: true, force: true });
fs.mkdirSync(outDir, { recursive: true });
const pages = allMarkdown(docsDir).map((file) => {
const rel = path.relative(docsDir, file).replaceAll(path.sep, "/");
const raw = fs.readFileSync(file, "utf8");
const { frontmatter, body } = parseFrontmatter(raw);
const cleaned = cleanKramdown(body);
const title = frontmatter.title || firstHeading(cleaned) || titleize(path.basename(rel, ".md"));
return { file, rel, title, outRel: outPath(rel, frontmatter), markdown: cleaned, frontmatter };
});
const pageMap = new Map(pages.map((page) => [page.rel, page]));
const permalinkMap = new Map();
for (const page of pages) {
if (page.frontmatter.permalink) {
permalinkMap.set(normalizePermalink(page.frontmatter.permalink), page);
}
}
const nav = sections
.map(([name, rels]) => ({
name,
pages: rels.map((rel) => pageMap.get(rel)).filter(Boolean),
}))
.filter((section) => section.pages.length);
const sectionByRel = new Map();
for (const section of nav) for (const page of section.pages) sectionByRel.set(page.rel, section.name);
const orderedPages = nav.flatMap((s) => s.pages);
for (const page of pages) {
const html = markdownToHtml(page.markdown, page.rel);
const toc = tocFromHtml(html);
const idx = orderedPages.findIndex((p) => p.rel === page.rel);
const prev = idx > 0 ? orderedPages[idx - 1] : null;
const next = idx >= 0 && idx < orderedPages.length - 1 ? orderedPages[idx + 1] : null;
const sectionName = sectionByRel.get(page.rel) || "Docs";
const pageOut = path.join(outDir, page.outRel);
fs.mkdirSync(path.dirname(pageOut), { recursive: true });
fs.writeFileSync(pageOut, layout({ page, html, toc, prev, next, sectionName }), "utf8");
}
fs.writeFileSync(path.join(outDir, "favicon.svg"), faviconSvg(), "utf8");
copyStaticAsset("social-card.svg");
copyStaticAsset("social-card.png");
fs.writeFileSync(path.join(outDir, ".nojekyll"), "", "utf8");
if (cname) fs.writeFileSync(path.join(outDir, "CNAME"), cname, "utf8");
validateLinks(outDir);
console.log(`built docs site: ${path.relative(root, outDir)}`);
function readCname() {
for (const candidate of [path.join(docsDir, "CNAME"), path.join(root, "CNAME")]) {
if (fs.existsSync(candidate)) return fs.readFileSync(candidate, "utf8").trim();
}
return "";
}
function copyStaticAsset(name) {
const source = path.join(docsDir, name);
if (fs.existsSync(source)) fs.copyFileSync(source, path.join(outDir, name));
}
function parseFrontmatter(raw) {
const match = raw.match(/^---\n([\s\S]*?)\n---\n?/);
if (!match) return { frontmatter: {}, body: raw };
const fm = {};
for (const line of match[1].split("\n")) {
const m = line.match(/^([A-Za-z0-9_-]+):\s*(.*?)\s*$/);
if (!m) continue;
let value = m[2];
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
value = value.slice(1, -1);
}
fm[m[1]] = value;
}
return { frontmatter: fm, body: raw.slice(match[0].length) };
}
function cleanKramdown(body) {
const lines = body.replace(/\r\n/g, "\n").split("\n");
const out = [];
let kramdownDivDepth = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (/^\s*\{:\s*[^}]*\}\s*$/.test(line)) continue;
if (/^\s*1\.\s+TOC\s*$/.test(line) && /^\s*\{:toc\}\s*$/.test(lines[i + 1] || "")) {
i += 1;
continue;
}
if (/^\s*<div\b[^>]*\bmarkdown\s*=\s*"1"[^>]*>\s*$/.test(line)) {
kramdownDivDepth++;
continue;
}
if (kramdownDivDepth > 0 && /^\s*<\/div>\s*$/.test(line)) {
kramdownDivDepth--;
continue;
}
out.push(line.replace(/\s*\{:\s*[^}]*\}\s*$/, ""));
}
return out.join("\n");
}
function normalizePermalink(value) {
let v = value.trim();
if (!v) return "/";
if (!v.startsWith("/")) v = `/${v}`;
if (v.length > 1 && v.endsWith("/")) v = v.slice(0, -1);
return v;
}
function allMarkdown(dir) {
return fs
.readdirSync(dir, { withFileTypes: true })
.flatMap((entry) => {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) return allMarkdown(full);
return entry.name.endsWith(".md") ? [full] : [];
})
.sort();
}
function outPath(rel, frontmatter = {}) {
if (frontmatter.permalink) {
const permalink = normalizePermalink(frontmatter.permalink);
if (permalink === "/") return "index.html";
return `${permalink.slice(1)}/index.html`;
}
if (rel === "index.md") return "index.html";
if (rel === "README.md") return "index.html";
if (rel.endsWith("/README.md")) return rel.replace(/README\.md$/, "index.html");
return rel.replace(/\.md$/, ".html");
}
function firstHeading(markdown) {
return markdown.match(/^#\s+(.+)$/m)?.[1]?.trim();
}
function titleize(input) {
return input.replaceAll("-", " ").replace(/\b\w/g, (m) => m.toUpperCase());
}
function markdownToHtml(markdown, currentRel) {
const lines = markdown.replace(/\r\n/g, "\n").split("\n");
const html = [];
let paragraph = [];
let list = null;
let fence = null;
let blockquote = [];
const flushParagraph = () => {
if (!paragraph.length) return;
const text = paragraph.join(" ");
const className = currentRel === "index.md" && /^\[Quickstart\]\([^)]*\)\s+\[View on GitHub\]\(/.test(text) ? ' class="home-actions"' : "";
html.push(`<p${className}>${inline(text, currentRel)}</p>`);
paragraph = [];
};
const closeList = () => {
if (!list) return;
html.push(`</${list}>`);
list = null;
};
const flushBlockquote = () => {
if (!blockquote.length) return;
const inner = markdownToHtml(blockquote.join("\n"), currentRel);
html.push(`<blockquote>${inner}</blockquote>`);
blockquote = [];
};
const splitRow = (line) => {
let trimmed = line.trim();
if (trimmed.startsWith("|")) trimmed = trimmed.slice(1);
if (trimmed.endsWith("|") && !trimmed.endsWith("\\|")) trimmed = trimmed.slice(0, -1);
const cells = [];
let current = "";
for (let idx = 0; idx < trimmed.length; idx++) {
const char = trimmed[idx];
if (char === "\\" && trimmed[idx + 1] === "|") {
current += "\\|";
idx += 1;
continue;
}
if (char === "|") {
cells.push(current.trim().replace(/\\\|/g, "|"));
current = "";
continue;
}
current += char;
}
cells.push(current.trim().replace(/\\\|/g, "|"));
return cells;
};
const isDivider = (line) => /^\s*\|?\s*:?-{2,}:?\s*(\|\s*:?-{2,}:?\s*)+\|?\s*$/.test(line);
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const fenceMatch = line.match(/^```(\w+)?\s*$/);
if (fenceMatch) {
flushParagraph();
closeList();
flushBlockquote();
if (fence) {
html.push(`<pre><code class="language-${escapeAttr(fence.lang)}">${highlightCode(fence.lines.join("\n"), fence.lang)}</code></pre>`);
fence = null;
} else {
fence = { lang: fenceMatch[1] || "text", lines: [] };
}
continue;
}
if (fence) {
fence.lines.push(line);
continue;
}
if (/^>\s?/.test(line)) {
flushParagraph();
closeList();
blockquote.push(line.replace(/^>\s?/, ""));
continue;
}
flushBlockquote();
if (!line.trim()) {
flushParagraph();
closeList();
continue;
}
if (/^\s*---+\s*$/.test(line)) {
flushParagraph();
closeList();
html.push("<hr>");
continue;
}
const heading = line.match(/^(#{1,4})\s+(.+)$/);
if (heading) {
flushParagraph();
closeList();
const level = heading[1].length;
const text = heading[2].trim();
const id = slug(text);
const inner = inline(text, currentRel);
if (level === 1) {
html.push(`<h1 id="${id}">${inner}</h1>`);
} else {
html.push(`<h${level} id="${id}"><a class="anchor" href="#${id}" aria-label="Anchor link">#</a>${inner}</h${level}>`);
}
continue;
}
if (line.trimStart().startsWith("|") && line.includes("|", line.indexOf("|") + 1) && isDivider(lines[i + 1] || "")) {
flushParagraph();
closeList();
const header = splitRow(line);
const aligns = splitRow(lines[i + 1]).map((cell) => {
const left = cell.startsWith(":");
const right = cell.endsWith(":");
return right && left ? "center" : right ? "right" : left ? "left" : "";
});
i += 1;
const rows = [];
while (i + 1 < lines.length && lines[i + 1].trimStart().startsWith("|")) {
i += 1;
rows.push(splitRow(lines[i]));
}
const th = header.map((c, idx) => `<th${aligns[idx] ? ` style="text-align:${aligns[idx]}"` : ""}>${inline(c, currentRel)}</th>`).join("");
const tb = rows.map((r) => `<tr>${r.map((c, idx) => `<td${aligns[idx] ? ` style="text-align:${aligns[idx]}"` : ""}>${inline(c, currentRel)}</td>`).join("")}</tr>`).join("");
html.push(`<table><thead><tr>${th}</tr></thead><tbody>${tb}</tbody></table>`);
continue;
}
const bullet = line.match(/^\s*-\s+(.+)$/);
const numbered = line.match(/^\s*\d+\.\s+(.+)$/);
if (bullet || numbered) {
flushParagraph();
const tag = bullet ? "ul" : "ol";
if (list && list !== tag) closeList();
if (!list) {
list = tag;
html.push(`<${tag}>`);
}
html.push(`<li>${inline((bullet || numbered)[1], currentRel)}</li>`);
continue;
}
paragraph.push(line.trim());
}
flushParagraph();
closeList();
flushBlockquote();
return html.join("\n");
}
function highlightCode(code, lang) {
const normalized = String(lang || "text").toLowerCase();
if (["bash", "sh", "shell", "zsh"].includes(normalized)) return highlightBash(code);
if (normalized === "json") return highlightJSON(code);
if (normalized === "toml") return highlightConfig(code, "toml");
if (["yaml", "yml"].includes(normalized)) return highlightConfig(code, "yaml");
if (normalized === "cron") return highlightCron(code);
return escapeHtml(code);
}
function highlightBash(code) {
return code.split("\n").map((line) => {
if (/^\s*#/.test(line)) return span("comment", line);
return highlightSegments(line, /("(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|`[^`]*`|\$\{?[A-Za-z_][A-Za-z0-9_]*\}?|--?[A-Za-z0-9][A-Za-z0-9_-]*|\b(?:brew|case|cd|curl|do|done|else|esac|export|fi|for|gh|git|gitcrawl|go|if|in|jq|ln|local|mkdir|set|then|while)\b|#.*)/g, (token) => {
if (token.startsWith("#")) return span("comment", token);
if (/^["'`]/.test(token)) return span("string", token);
if (token.startsWith("$")) return span("variable", token);
if (token.startsWith("-")) return span("option", token);
return span("keyword", token);
});
}).join("\n");
}
function highlightJSON(code) {
return highlightSegments(code, /("(?:\\.|[^"\\])*"\s*:)|("(?:\\.|[^"\\])*")|\b(?:true|false|null)\b|-?\b\d+(?:\.\d+)?(?:[eE][+-]?\d+)?\b/g, (token) => {
if (token.endsWith(":")) return `${span("key", token.slice(0, -1))}:`;
if (token.startsWith('"')) return span("string", token);
if (/^(?:true|false|null)$/.test(token)) return span("literal", token);
return span("number", token);
});
}
function highlightConfig(code, lang) {
return code.split("\n").map((line) => {
if (/^\s*#/.test(line)) return span("comment", line);
const commentMatch = line.match(/(^|[^"'])#.*/);
const commentStart = commentMatch ? commentMatch.index + commentMatch[1].length : -1;
const body = commentStart >= 0 ? line.slice(0, commentStart) : line;
const comment = commentStart >= 0 ? line.slice(commentStart) : "";
const highlighted = lang === "toml"
? highlightSegments(body, /(^\s*[A-Za-z0-9_.-]+(?=\s*=))|("(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*')|\b(?:true|false)\b|-?\b\d+(?:\.\d+)?\b/g, configToken)
: highlightSegments(body, /(^\s*[A-Za-z0-9_.-]+(?=\s*:))|("(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*')|\b(?:true|false|null)\b|-?\b\d+(?:\.\d+)?\b/g, configToken);
return highlighted + (comment ? span("comment", comment) : "");
}).join("\n");
}
function configToken(token) {
if (/^\s*[A-Za-z0-9_.-]+$/.test(token)) {
const leading = token.match(/^\s*/)[0];
return `${escapeHtml(leading)}${span("key", token.slice(leading.length))}`;
}
if (/^["']/.test(token)) return span("string", token);
if (/^(?:true|false|null)$/.test(token)) return span("literal", token);
return span("number", token);
}
function highlightCron(code) {
return code.split("\n").map((line) => {
if (/^\s*#/.test(line)) return span("comment", line);
return highlightSegments(line, /(\*|(?:\d+)(?:[-/,]\d+)*)|("[^"]*"|'[^']*')|#.*|\b[A-Z_][A-Z0-9_]*=/g, (token) => {
if (token.startsWith("#")) return span("comment", token);
if (/^["']/.test(token)) return span("string", token);
if (token.endsWith("=")) return span("key", token.slice(0, -1)) + "=";
return span("number", token);
});
}).join("\n");
}
function highlightSegments(text, pattern, classify) {
let out = "";
let last = 0;
for (const match of text.matchAll(pattern)) {
out += escapeHtml(text.slice(last, match.index));
out += classify(match[0]);
last = match.index + match[0].length;
}
return out + escapeHtml(text.slice(last));
}
function span(kind, value) {
return `<span class="hl-${kind}">${escapeHtml(value)}</span>`;
}
function inline(text, currentRel) {
const stash = [];
let out = text.replace(/`([^`]+)`/g, (_, code) => {
stash.push(`<code>${escapeHtml(code)}</code>`);
return `\u0000${stash.length - 1}\u0000`;
});
out = escapeHtml(out)
.replace(/\*\*([^*]+)\*\*/g, "<strong>$1</strong>")
.replace(/(^|[^*])\*([^*\s][^*]*?)\*(?!\*)/g, "$1<em>$2</em>")
.replace(/(^|[^_])_([^_\s][^_]*?)_(?!_)/g, "$1<em>$2</em>")
.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_, label, href) => `<a href="${escapeAttr(rewriteHref(href, currentRel))}">${label}</a>`)
.replace(/&lt;(https?:\/\/[^\s<>]+)&gt;/g, '<a href="$1">$1</a>');
return out.replace(/\u0000(\d+)\u0000/g, (_, i) => stash[Number(i)]);
}
function rewriteHref(href, currentRel) {
if (/^(https?:|mailto:|#)/.test(href)) return href;
const [raw, hash = ""] = href.split("#");
if (!raw) return hash ? `#${hash}` : "";
if (raw.startsWith("/")) {
const target = permalinkMap.get(normalizePermalink(raw));
if (target) {
const currentOut = pageMap.get(currentRel)?.outRel || outPath(currentRel);
const out = hrefToOutRel(target.outRel, currentOut);
return hash ? `${out}#${hash}` : out;
}
return href;
}
if (!raw.endsWith(".md")) return href;
const from = path.posix.dirname(currentRel);
const target = path.posix.normalize(path.posix.join(from, raw));
let rewritten = pageMap.get(target)?.outRel || outPath(target);
const currentOut = pageMap.get(currentRel)?.outRel || outPath(currentRel);
rewritten = hrefToOutRel(rewritten, currentOut);
return `${rewritten}${hash ? `#${hash}` : ""}`;
}
function tocFromHtml(html) {
const items = [];
const re = /<h([23]) id="([^"]+)">([\s\S]*?)<\/h[23]>/g;
let m;
while ((m = re.exec(html))) {
const text = htmlTextContent(m[3]).replace(/^#/, "").trim();
items.push({ level: Number(m[1]), id: m[2], text });
}
if (items.length < 2) return "";
return `<nav class="toc" aria-label="On this page"><h2>On this page</h2>${items
.map((i) => `<a class="toc-l${i.level}" href="#${i.id}">${escapeHtml(i.text)}</a>`)
.join("")}</nav>`;
}
function layout({ page, html, toc, prev, next, sectionName }) {
const depth = page.outRel.split("/").length - 1;
const rootPrefix = depth ? "../".repeat(depth) : "";
const editUrl = `${repoEditBase}/${page.rel}`;
const isHome = page.rel === "index.md" || page.rel === "README.md";
const prevNext = !isHome && (prev || next) ? pageNavHtml(prev, next, page.outRel) : "";
const heroBlock = isHome ? "" : standardHero(page, sectionName, editUrl);
const articleClass = isHome ? "doc doc-home" : "doc";
const tocBlock = isHome ? "" : toc;
const titleSuffix = isHome ? "gitcrawl" : `${escapeHtml(page.title)} — gitcrawl`;
const canonicalUrl = pageCanonicalUrl(page);
const socialImage = siteBase ? `${siteBase}/social-card.png` : `${rootPrefix}social-card.png`;
const socialMeta = [
["link", "rel", "canonical", "href", canonicalUrl],
["meta", "property", "og:type", "content", "website"],
["meta", "property", "og:site_name", "content", "gitcrawl"],
["meta", "property", "og:title", "content", titleSuffix],
["meta", "property", "og:description", "content", "Local-first GitHub issue and pull request crawler for maintainer triage."],
["meta", "property", "og:url", "content", canonicalUrl],
["meta", "property", "og:image", "content", socialImage],
["meta", "property", "og:image:width", "content", "1200"],
["meta", "property", "og:image:height", "content", "630"],
["meta", "name", "twitter:card", "content", "summary_large_image"],
["meta", "name", "twitter:title", "content", titleSuffix],
["meta", "name", "twitter:description", "content", "Local-first GitHub issue and pull request crawler for maintainer triage."],
["meta", "name", "twitter:image", "content", socialImage],
].map(tagHtml).join("\n ");
return `<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>${titleSuffix}</title>
<meta name="description" content="Local-first GitHub issue and pull request crawler for maintainer triage.">
${socialMeta}
<link rel="icon" href="${rootPrefix}favicon.svg" type="image/svg+xml">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>${css()}</style>
<script>${themeInitJs()}</script>
</head>
<body${isHome ? ' class="home"' : ""}>
<button class="nav-toggle" type="button" aria-label="Toggle navigation" aria-expanded="false">
<span aria-hidden="true"></span><span aria-hidden="true"></span><span aria-hidden="true"></span>
</button>
<div class="shell">
<aside class="sidebar">
<div class="sidebar-head">
<a class="brand" href="${hrefToOutRel("index.html", page.outRel)}" aria-label="gitcrawl docs home">
<img src="${rootPrefix}favicon.svg" alt="">
<span class="brand-text">
<strong class="brand-name">gitcrawl<span class="brand-tag">main</span></strong>
<small>Local-first GitHub triage</small>
</span>
</a>
${themeToggleHtml()}
</div>
<label class="search"><span>Search</span><input id="doc-search" type="search" placeholder="sync, cluster, gh shim"></label>
<nav>${navHtml(page)}</nav>
</aside>
<main>
${heroBlock}
<div class="doc-grid${isHome ? " doc-grid-home" : ""}">
<article class="${articleClass}">${html}${prevNext}</article>
${tocBlock}
</div>
</main>
</div>
<script>${js()}</script>
</body>
</html>`;
}
function pageCanonicalUrl(page) {
if (!siteBase) return page.outRel;
if (page.outRel === "index.html") return `${siteBase}/`;
const rel = page.outRel.endsWith("/index.html") ? page.outRel.slice(0, -"index.html".length) : page.outRel;
return `${siteBase}/${rel}`;
}
function tagHtml([tag, k1, v1, k2, v2]) {
return tag === "link" ? `<link ${k1}="${v1}" ${k2}="${escapeAttr(v2)}">` : `<meta ${k1}="${v1}" ${k2}="${escapeAttr(v2)}">`;
}
function standardHero(page, sectionName, editUrl) {
return `<header class="hero">
<div class="hero-text">
<p class="eyebrow">${escapeHtml(sectionName)}</p>
<h1>${escapeHtml(page.title)}</h1>
</div>
<div class="hero-meta">
<a class="repo" href="${repoBase}" rel="noopener">GitHub</a>
<a class="edit" href="${escapeAttr(editUrl)}" rel="noopener">Edit page</a>
</div>
</header>`;
}
function pageNavHtml(prev, next, currentOutRel) {
const cell = (page, dir) => {
if (!page) return "";
return `<a class="page-nav-${dir}" href="${hrefToOutRel(page.outRel, currentOutRel)}"><small>${dir === "prev" ? "Previous" : "Next"}</small><span>${escapeHtml(page.title)}</span></a>`;
};
return `<nav class="page-nav" aria-label="Pager">${cell(prev, "prev")}${cell(next, "next")}</nav>`;
}
function navHtml(currentPage) {
return nav
.map((section) => `<section><h2>${section.name}</h2>${section.pages.map((page) => {
const href = hrefToOutRel(page.outRel, currentPage.outRel);
const active = page.rel === currentPage.rel ? " active" : "";
return `<a class="nav-link${active}" href="${href}">${escapeHtml(page.title)}</a>`;
}).join("")}</section>`)
.join("");
}
function hrefToOutRel(targetOutRel, currentOutRel) {
const currentDir = path.posix.dirname(currentOutRel);
if (targetOutRel.endsWith("/index.html")) {
const targetDir = targetOutRel.slice(0, -"index.html".length);
const rel = path.posix.relative(currentDir, targetDir || ".") || ".";
return rel.endsWith("/") ? rel : `${rel}/`;
}
if (targetOutRel === "index.html") {
const rel = path.posix.relative(currentDir, ".") || ".";
return rel.endsWith("/") ? rel : `${rel}/`;
}
return path.posix.relative(currentDir, targetOutRel) || path.posix.basename(targetOutRel);
}
function slug(text) {
return text.toLowerCase().replace(/`/g, "").replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "");
}
function escapeHtml(value) {
return String(value).replace(/[&<>"']/g, (char) => ({ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;" })[char]);
}
function escapeAttr(value) {
return escapeHtml(value);
}
function htmlTextContent(fragment) {
let out = "";
let inTag = false;
for (const char of fragment) {
if (char === "<") {
inTag = true;
continue;
}
if (inTag) {
if (char === ">") inTag = false;
continue;
}
out += char;
}
return decodeHtmlText(out);
}
function decodeHtmlText(value) {
return String(value).replace(/&(amp|lt|gt|quot|#39);/g, (_, entity) => ({
amp: "&",
lt: "<",
gt: ">",
quot: '"',
"#39": "'",
})[entity]);
}
function validateLinks(outputDir) {
const failures = [];
for (const file of allHtml(outputDir)) {
const html = fs.readFileSync(file, "utf8");
for (const match of html.matchAll(/href="([^"]+)"/g)) {
const href = match[1];
if (/^(#|https?:|mailto:|tel:|javascript:)/.test(href)) continue;
const [rawPath, anchor = ""] = href.split("#");
const targetPath = rawPath
? path.resolve(path.dirname(file), rawPath)
: file;
const target = fs.existsSync(targetPath) && fs.statSync(targetPath).isDirectory()
? path.join(targetPath, "index.html")
: targetPath;
if (!fs.existsSync(target)) {
failures.push(`${path.relative(outputDir, file)}: ${href} -> missing ${path.relative(outputDir, target)}`);
continue;
}
if (anchor) {
const targetHtml = fs.readFileSync(target, "utf8");
if (!targetHtml.includes(`id="${anchor}"`) && !targetHtml.includes(`name="${anchor}"`)) {
failures.push(`${path.relative(outputDir, file)}: ${href} -> missing anchor`);
}
}
}
}
if (failures.length) {
throw new Error(`broken docs links:\n${failures.join("\n")}`);
}
}
function allHtml(dir) {
return fs
.readdirSync(dir, { withFileTypes: true })
.flatMap((entry) => {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) return allHtml(full);
return entry.name.endsWith(".html") ? [full] : [];
})
.sort();
}

View File

@ -0,0 +1,232 @@
export function css() {
return `
:root{--ink:#0f1115;--text:#1f2328;--text-soft:#3b4147;--muted:#6b7280;--subtle:#9aa1ab;--bg:#fafafa;--paper:#ffffff;--accent:#2563eb;--accent-strong:#1d4ed8;--accent-soft:rgba(37,99,235,.08);--line:#e5e7eb;--line-soft:#eef0f3;--branch:#d0d7de;--code-bg:#0f172a;--code-fg:#e6edf3;--code-border:#1f2937;--code-scroll:#334155;--hl-comment:#94a3b8;--hl-keyword:#93c5fd;--hl-string:#86efac;--hl-number:#fbbf24;--hl-literal:#c4b5fd;--hl-key:#67e8f9;--hl-variable:#f0abfc;--hl-option:#fda4af;--shadow:rgba(15,17,21,.08);--shadow-strong:rgba(15,17,21,.18);--tag-bg:#ddf4ff;--tag-fg:#0969da;--ring:rgba(37,99,235,.32);color-scheme:light}
[data-theme="dark"]{--ink:#e6edf3;--text:#c9d1d9;--text-soft:#8b949e;--muted:#8b949e;--subtle:#6e7681;--bg:#0d1117;--paper:#161b22;--accent:#58a6ff;--accent-strong:#79b8ff;--accent-soft:rgba(56,139,253,.16);--line:#30363d;--line-soft:#21262d;--branch:#30363d;--code-bg:#010409;--code-fg:#e6edf3;--code-border:#21262d;--code-scroll:#30363d;--hl-comment:#8b949e;--hl-keyword:#79c0ff;--hl-string:#a5d6ff;--hl-number:#ffa657;--hl-literal:#d2a8ff;--hl-key:#7ee787;--hl-variable:#ff7b72;--hl-option:#f2cc60;--shadow:rgba(0,0,0,.5);--shadow-strong:rgba(0,0,0,.7);--tag-bg:rgba(56,139,253,.16);--tag-fg:#58a6ff;--ring:rgba(56,139,253,.4);color-scheme:dark}
*{box-sizing:border-box}
html{scroll-behavior:smooth;scroll-padding-top:24px}
body{margin:0;background:var(--bg);color:var(--text);font-family:"Inter",ui-sans-serif,system-ui,-apple-system,Segoe UI,sans-serif;line-height:1.65;overflow-x:hidden;-webkit-font-smoothing:antialiased;font-feature-settings:"cv02","cv03","cv04","cv11"}
::selection{background:var(--accent);color:#fff}
a{color:var(--accent);text-decoration:none;transition:color .12s}
a:hover{text-decoration:underline;text-underline-offset:.2em}
.shell{display:grid;grid-template-columns:268px minmax(0,1fr);min-height:100vh}
.sidebar{position:sticky;top:0;height:100vh;overflow:auto;padding:24px 22px;background:var(--paper);border-right:1px solid var(--line);scrollbar-width:thin;scrollbar-color:var(--line) transparent}
.sidebar::-webkit-scrollbar{width:6px}
.sidebar::-webkit-scrollbar-thumb{background:var(--line);border-radius:6px}
.sidebar-head{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;margin:0 0 22px}
.brand{display:flex;align-items:center;gap:11px;color:var(--ink);text-decoration:none;flex:1 1 auto;min-width:0}
.brand:hover{text-decoration:none}
.brand img{width:32px;height:32px;border-radius:7px;flex:0 0 auto}
.brand-text{display:block;min-width:0}
.brand strong,.brand-name{display:flex;align-items:center;gap:7px;font-size:1.05rem;line-height:1.1;font-weight:600;letter-spacing:-.005em;color:var(--ink)}
.brand-tag{display:inline-flex;align-items:center;font-family:"JetBrains Mono","SF Mono",ui-monospace,monospace;font-size:.6rem;background:var(--tag-bg);color:var(--tag-fg);padding:2px 6px;border-radius:999px;font-weight:500;letter-spacing:.02em;line-height:1}
.brand-tag::before{content:"";display:inline-block;width:5px;height:5px;border-radius:50%;background:currentColor;margin-right:5px;opacity:.85}
.brand small{display:block;color:var(--muted);font-size:.74rem;margin-top:3px;font-weight:400}
.theme-toggle{appearance:none;background:transparent;border:1px solid var(--line);border-radius:8px;width:34px;height:34px;display:inline-flex;align-items:center;justify-content:center;color:var(--muted);cursor:pointer;flex:0 0 auto;transition:border-color .15s,color .15s,background .15s}
.theme-toggle:hover{border-color:var(--ink);color:var(--ink);background:var(--line-soft)}
.theme-toggle:focus-visible{outline:2px solid var(--accent);outline-offset:2px}
.theme-toggle svg{width:16px;height:16px;display:block}
.theme-icon-sun{display:none}
[data-theme="dark"] .theme-icon-moon{display:none}
[data-theme="dark"] .theme-icon-sun{display:block}
.search{display:block;margin:0 0 22px}
.search span{display:block;color:var(--muted);font-size:.7rem;font-weight:600;text-transform:uppercase;letter-spacing:.08em;margin-bottom:7px}
.search input{width:100%;border:1px solid var(--line);background:var(--bg);border-radius:8px;padding:9px 12px;font:inherit;font-size:.9rem;color:var(--text);outline:none;transition:border-color .15s,box-shadow .15s}
.search input:focus{border-color:var(--accent);box-shadow:0 0 0 3px var(--ring)}
nav section{position:relative;margin:0 0 18px}
nav section::before{content:"";position:absolute;left:6px;top:14px;bottom:6px;width:1.5px;background:var(--branch);border-radius:1px}
nav section:last-child::before{bottom:14px}
nav h2{position:relative;font-size:.68rem;color:var(--muted);text-transform:uppercase;letter-spacing:.09em;margin:0 0 10px;font-weight:600;padding-left:24px;line-height:1.2}
nav h2::before{content:"";position:absolute;left:0;top:50%;width:13px;height:13px;border-radius:50%;background:var(--accent);transform:translateY(-50%);box-shadow:0 0 0 3px var(--paper)}
.nav-link{position:relative;display:block;color:var(--text);text-decoration:none;border-radius:6px;padding:5px 10px 5px 24px;margin:1px 0;font-size:.9rem;line-height:1.4;transition:background .12s,color .12s}
.nav-link::before{content:"";position:absolute;left:3px;top:50%;width:7px;height:7px;border-radius:50%;background:var(--paper);border:1.5px solid var(--branch);transform:translateY(-50%);transition:background .12s,border-color .12s,box-shadow .12s;z-index:1}
.nav-link:hover{background:var(--line-soft);color:var(--ink);text-decoration:none}
.nav-link:hover::before{border-color:var(--muted)}
.nav-link.active{background:var(--accent-soft);color:var(--accent);font-weight:600}
.nav-link.active::before{background:var(--accent);border-color:var(--accent);box-shadow:0 0 0 3px var(--accent-soft)}
main{min-width:0;padding:32px clamp(20px,4.5vw,56px) 80px;max-width:1180px;margin:0 auto;width:100%}
.hero{display:flex;align-items:flex-end;justify-content:space-between;gap:22px;border-bottom:1px solid var(--line);padding:8px 0 22px;margin-bottom:8px;flex-wrap:wrap}
.hero-text{min-width:0;flex:1 1 320px}
.eyebrow{margin:0 0 8px;color:var(--muted);font-weight:600;text-transform:uppercase;letter-spacing:.1em;font-size:.7rem;display:inline-flex;align-items:center;gap:8px}
.eyebrow::before{content:"";display:inline-block;width:8px;height:8px;border-radius:50%;background:var(--accent);box-shadow:0 0 0 2px var(--accent-soft)}
.hero h1{font-size:clamp(1.7rem,3vw,2.4rem);line-height:1.1;letter-spacing:-.018em;margin:0;font-weight:700;color:var(--ink)}
.hero-meta{display:flex;gap:8px;flex:0 0 auto}
.repo,.edit{border:1px solid var(--line);color:var(--text);text-decoration:none;border-radius:7px;padding:6px 11px;font-weight:500;font-size:.83rem;background:var(--paper);transition:border-color .15s,color .15s,background .15s}
.repo:hover,.edit:hover{border-color:var(--ink);color:var(--ink);text-decoration:none}
.edit{color:var(--muted)}
.doc-grid{display:grid;grid-template-columns:minmax(0,1fr);gap:48px;margin-top:24px}
.doc-grid-home{margin-top:8px}
@media(min-width:1180px){.doc-grid{grid-template-columns:minmax(0,72ch) 200px;justify-content:start}.doc-grid-home{grid-template-columns:minmax(0,76ch);justify-content:start}}
.doc{min-width:0;max-width:72ch;overflow-wrap:break-word}
.doc-home{max-width:76ch}
.doc h1{font-size:clamp(2rem,3.6vw,2.8rem);line-height:1.08;letter-spacing:-.02em;margin:0 0 .4em;font-weight:700;color:var(--ink)}
.doc-home h1{font-size:clamp(2.2rem,4.2vw,3.2rem)}
body:not(.home) .doc>h1:first-child{display:none}
.doc h2{font-size:1.45rem;line-height:1.2;margin:2em 0 .5em;font-weight:600;letter-spacing:-.012em;color:var(--ink);position:relative}
.doc h3{font-size:1.1rem;margin:1.7em 0 .35em;position:relative;font-weight:600;color:var(--ink);letter-spacing:-.005em}
.doc h4{font-size:.98rem;margin:1.4em 0 .25em;color:var(--ink);position:relative;font-weight:600}
.doc h2:first-child,.doc h3:first-child,.doc h4:first-child{margin-top:.2em}
.doc :is(h2,h3,h4) .anchor{position:absolute;left:-1.05em;top:0;color:var(--subtle);opacity:0;text-decoration:none;font-weight:400;padding-right:.3em;transition:opacity .12s,color .12s}
.doc :is(h2,h3,h4):hover .anchor{opacity:.7}
.doc :is(h2,h3,h4) .anchor:hover{opacity:1;color:var(--accent);text-decoration:none}
.doc p{margin:0 0 1.05em}
.doc-home>p:first-of-type{font-size:1.12rem;color:var(--text-soft);line-height:1.6;margin:0 0 1.3em;max-width:60ch}
.home-actions{display:flex;flex-wrap:wrap;gap:10px;margin:0 0 1.7em!important}
.home-actions a{display:inline-flex;align-items:center;justify-content:center;min-height:42px;border:1px solid var(--line);border-radius:8px;padding:8px 14px;background:var(--paper);color:var(--ink);font-weight:600;font-size:.94rem;line-height:1.2;text-decoration:none;box-shadow:0 1px 2px var(--shadow);transition:transform .14s,border-color .14s,background .14s,color .14s,box-shadow .14s}
.home-actions a:hover{text-decoration:none;transform:translateY(-1px);border-color:var(--accent);box-shadow:0 4px 12px var(--shadow)}
.home-actions a:focus-visible{outline:2px solid var(--accent);outline-offset:2px}
.home-actions a:first-child{background:var(--accent);border-color:var(--accent);color:#fff;box-shadow:0 3px 10px var(--ring)}
.home-actions a:first-child:hover{background:var(--accent-strong);border-color:var(--accent-strong);color:#fff}
.home-actions a:first-child::before{content:"↗";font-size:.9em;margin-right:8px}
.home-actions a[href*="github.com"]::before{content:"";width:15px;height:15px;margin-right:8px;background:currentColor;clip-path:path("M7.5 0C3.36 0 0 3.45 0 7.7c0 3.4 2.15 6.28 5.13 7.3.38.07.51-.17.51-.37v-1.31c-2.08.46-2.52-1.03-2.52-1.03-.34-.89-.83-1.12-.83-1.12-.68-.48.05-.47.05-.47.75.05 1.15.79 1.15.79.67 1.17 1.75.83 2.18.64.07-.5.26-.83.47-1.02-1.66-.2-3.41-.85-3.41-3.78 0-.83.29-1.52.77-2.05-.08-.2-.34-1.02.07-2.02 0 0 .63-.21 2.06.78.6-.17 1.24-.26 1.88-.26.64 0 1.28.09 1.88.26 1.43-.99 2.06-.78 2.06-.78.41 1 .15 1.82.07 2.02.48.53.77 1.22.77 2.05 0 2.94-1.75 3.58-3.42 3.78.27.24.51.72.51 1.45v2.1c0 .2.14.44.52.37A7.7 7.7 0 0 0 15 7.7C15 3.45 11.64 0 7.5 0Z")}
.doc ul,.doc ol{padding-left:1.3rem;margin:0 0 1.15em}
.doc li{margin:.25em 0}
.doc li>p{margin:0 0 .4em}
.doc strong{font-weight:600;color:var(--ink)}
.doc em{font-style:italic}
.doc code{font-family:"JetBrains Mono","SF Mono",ui-monospace,monospace;font-size:.84em;background:var(--line-soft);border:1px solid var(--line);border-radius:5px;padding:.08em .35em;color:var(--ink)}
.doc pre{position:relative;overflow:auto;background:var(--code-bg);color:var(--code-fg);border-radius:8px;padding:14px 18px;margin:1.3em 0;font-size:.85em;line-height:1.6;scrollbar-width:thin;scrollbar-color:var(--code-scroll) transparent;border:1px solid var(--code-border)}
.doc pre::-webkit-scrollbar{height:8px;width:8px}
.doc pre::-webkit-scrollbar-thumb{background:var(--code-scroll);border-radius:8px}
.doc pre code{display:block;background:transparent;border:0;color:inherit;padding:0;font-size:1em;white-space:pre}
.doc pre .hl-comment{color:var(--hl-comment);font-style:italic}
.doc pre .hl-keyword{color:var(--hl-keyword);font-weight:500}
.doc pre .hl-string{color:var(--hl-string)}
.doc pre .hl-number{color:var(--hl-number)}
.doc pre .hl-literal{color:var(--hl-literal);font-weight:500}
.doc pre .hl-key{color:var(--hl-key)}
.doc pre .hl-variable{color:var(--hl-variable)}
.doc pre .hl-option{color:var(--hl-option)}
.doc pre .copy{position:absolute;top:8px;right:8px;background:rgba(255,255,255,.06);color:var(--code-fg);border:1px solid rgba(255,255,255,.16);border-radius:6px;padding:3px 9px;font:500 .7rem/1 "Inter",sans-serif;cursor:pointer;opacity:0;transition:opacity .15s,background .15s,border-color .15s}
.doc pre:hover .copy,.doc pre .copy:focus{opacity:1}
.doc pre .copy:hover{background:rgba(255,255,255,.12)}
.doc pre .copy.copied{background:var(--accent);border-color:var(--accent);opacity:1}
.doc blockquote{margin:1.4em 0;padding:10px 16px;border-left:3px solid var(--accent);background:var(--accent-soft);border-radius:0 8px 8px 0;color:var(--text)}
.doc blockquote p:last-child{margin-bottom:0}
.doc table{width:100%;border-collapse:collapse;margin:1.2em 0;font-size:.92em}
.doc th,.doc td{border-bottom:1px solid var(--line);padding:9px 10px;text-align:left}
.doc th{font-weight:600;color:var(--ink);background:var(--line-soft);border-bottom:1px solid var(--line)}
.doc hr{border:0;border-top:1px solid var(--line);margin:2.2em 0}
.toc{position:sticky;top:24px;align-self:start;font-size:.84rem;padding-left:14px;border-left:1px solid var(--line);max-height:calc(100vh - 48px);overflow:auto;scrollbar-width:thin;scrollbar-color:var(--line) transparent}
.toc::-webkit-scrollbar{width:5px}
.toc::-webkit-scrollbar-thumb{background:var(--line);border-radius:5px}
.toc h2{font-size:.66rem;color:var(--muted);text-transform:uppercase;letter-spacing:.09em;margin:0 0 10px;font-weight:600}
.toc a{display:block;color:var(--muted);text-decoration:none;padding:4px 0 4px 10px;line-height:1.35;border-left:2px solid transparent;margin-left:-12px;transition:color .12s,border-color .12s}
.toc a:hover{color:var(--ink);text-decoration:none}
.toc a.active{color:var(--accent);border-left-color:var(--accent);font-weight:500}
.toc-l3{padding-left:22px!important;font-size:.94em}
@media(max-width:1179px){.toc{display:none}}
.page-nav{display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-top:48px;border-top:1px solid var(--line);padding-top:20px}
.page-nav>a{display:block;border:1px solid var(--line);background:var(--paper);border-radius:9px;padding:13px 16px;text-decoration:none;color:var(--text);transition:border-color .15s,transform .15s,box-shadow .15s}
.page-nav>a:hover{border-color:var(--accent);text-decoration:none;color:var(--ink)}
.page-nav small{display:block;color:var(--muted);font-size:.7rem;text-transform:uppercase;letter-spacing:.09em;margin-bottom:5px;font-weight:600}
.page-nav span{display:block;font-weight:600;line-height:1.3;color:var(--ink)}
.page-nav-prev{text-align:left}
.page-nav-next{text-align:right;grid-column:2}
.page-nav-prev:only-child{grid-column:1}
.nav-toggle{display:none;position:fixed;top:14px;right:14px;top:calc(14px + env(safe-area-inset-top, 0px));right:calc(14px + env(safe-area-inset-right, 0px));z-index:20;width:40px;height:40px;border-radius:9px;background:var(--paper);border:1px solid var(--line);color:var(--ink);cursor:pointer;padding:10px 9px;flex-direction:column;align-items:stretch;justify-content:space-between;box-shadow:0 4px 14px var(--shadow)}
.nav-toggle span{display:block;width:100%;height:2px;flex:0 0 2px;background:currentColor;border-radius:2px;transition:transform .2s,opacity .2s}
.nav-toggle[aria-expanded="true"] span:nth-child(1){transform:translateY(8px) rotate(45deg)}
.nav-toggle[aria-expanded="true"] span:nth-child(2){opacity:0}
.nav-toggle[aria-expanded="true"] span:nth-child(3){transform:translateY(-8px) rotate(-45deg)}
@media(max-width:900px){
.shell{display:block}
.sidebar{position:fixed;inset:0 30% 0 0;max-width:320px;height:100vh;z-index:15;transform:translateX(-100%);transition:transform .25s ease;box-shadow:0 18px 40px var(--shadow-strong);background:var(--paper);pointer-events:none}
.sidebar.open{transform:translateX(0);pointer-events:auto}
.nav-toggle{display:flex}
main{padding:64px 18px 56px}
.hero{padding-top:6px}
.hero h1{font-size:clamp(1.5rem,7vw,2rem)}
.hero-meta{width:100%;justify-content:flex-start}
.doc{padding:0}
.doc-grid{margin-top:18px;gap:24px}
.doc :is(h2,h3,h4) .anchor{display:none}
}
@media(max-width:520px){
main{padding:60px 14px 48px}
.doc pre{margin-left:-14px;margin-right:-14px;border-radius:0;border-left:0;border-right:0}
}
`;
}
export function themeInitJs() {
return `(function(){try{var s=localStorage.getItem('gc-theme');var t=s==='light'||s==='dark'?s:(window.matchMedia&&window.matchMedia('(prefers-color-scheme: dark)').matches?'dark':'light');document.documentElement.dataset.theme=t}catch(e){document.documentElement.dataset.theme='light'}})();`;
}
export function themeToggleHtml() {
return `<button class="theme-toggle" type="button" aria-label="Toggle color theme" title="Toggle color theme">
<svg class="theme-icon-moon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"/></svg>
<svg class="theme-icon-sun" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="4"/><path d="M12 2v2M12 20v2M4.93 4.93l1.41 1.41M17.66 17.66l1.41 1.41M2 12h2M20 12h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41"/></svg>
</button>`;
}
export function js() {
return `
const root=document.documentElement;
const themeButton=document.querySelector('.theme-toggle');
const themeMedia=window.matchMedia('(prefers-color-scheme: dark)');
function readStoredTheme(){try{const v=localStorage.getItem('gc-theme');return v==='light'||v==='dark'?v:null}catch(e){return null}}
function writeStoredTheme(t){try{localStorage.setItem('gc-theme',t)}catch(e){}}
function applyTheme(t){root.dataset.theme=t;if(themeButton){const next=t==='dark'?'light':'dark';themeButton.setAttribute('aria-label','Switch to '+next+' theme');themeButton.setAttribute('title','Switch to '+next+' theme')}}
applyTheme(root.dataset.theme==='dark'?'dark':'light');
themeButton?.addEventListener('click',()=>{const next=root.dataset.theme==='dark'?'light':'dark';applyTheme(next);writeStoredTheme(next)});
const onSystemThemeChange=(e)=>{if(!readStoredTheme())applyTheme(e.matches?'dark':'light')};
if(themeMedia.addEventListener)themeMedia.addEventListener('change',onSystemThemeChange);
else themeMedia.addListener?.(onSystemThemeChange);
const sidebar=document.querySelector('.sidebar');
const toggle=document.querySelector('.nav-toggle');
const mobileNav=window.matchMedia('(max-width: 900px)');
const sidebarFocusable='a[href],button,input,select,textarea,[tabindex]';
function setSidebarFocusable(enabled){
sidebar?.querySelectorAll(sidebarFocusable).forEach((el)=>{
if(enabled){
if(el.dataset.sidebarTabindex!==undefined){
if(el.dataset.sidebarTabindex)el.setAttribute('tabindex',el.dataset.sidebarTabindex);
else el.removeAttribute('tabindex');
delete el.dataset.sidebarTabindex;
}
}else if(el.dataset.sidebarTabindex===undefined){
el.dataset.sidebarTabindex=el.getAttribute('tabindex')??'';
el.setAttribute('tabindex','-1');
}
});
}
function setSidebarOpen(open){
if(!sidebar||!toggle)return;
sidebar.classList.toggle('open',open);
toggle.setAttribute('aria-expanded',open?'true':'false');
if(mobileNav.matches){
sidebar.inert=!open;
if(open)sidebar.removeAttribute('aria-hidden');
else sidebar.setAttribute('aria-hidden','true');
setSidebarFocusable(open);
}else{
sidebar.inert=false;
sidebar.removeAttribute('aria-hidden');
setSidebarFocusable(true);
}
}
setSidebarOpen(false);
toggle?.addEventListener('click',()=>setSidebarOpen(!sidebar?.classList.contains('open')));
document.addEventListener('click',(e)=>{if(!sidebar?.classList.contains('open'))return;if(sidebar.contains(e.target)||toggle?.contains(e.target))return;setSidebarOpen(false)});
document.addEventListener('keydown',(e)=>{if(e.key==='Escape')setSidebarOpen(false)});
const syncSidebarForViewport=()=>setSidebarOpen(sidebar?.classList.contains('open')??false);
if(mobileNav.addEventListener)mobileNav.addEventListener('change',syncSidebarForViewport);
else mobileNav.addListener?.(syncSidebarForViewport);
const input=document.getElementById('doc-search');
input?.addEventListener('input',()=>{const q=input.value.trim().toLowerCase();document.querySelectorAll('nav section').forEach(sec=>{let any=false;sec.querySelectorAll('.nav-link').forEach(a=>{const m=!q||a.textContent.toLowerCase().includes(q);a.style.display=m?'block':'none';if(m)any=true});sec.style.display=any?'block':'none'})});
document.querySelectorAll('.doc pre').forEach(pre=>{const btn=document.createElement('button');btn.type='button';btn.className='copy';btn.textContent='Copy';btn.addEventListener('click',async()=>{const code=pre.querySelector('code')?.textContent??'';try{await navigator.clipboard.writeText(code);btn.textContent='Copied';btn.classList.add('copied');setTimeout(()=>{btn.textContent='Copy';btn.classList.remove('copied')},1400)}catch{btn.textContent='Failed';setTimeout(()=>{btn.textContent='Copy'},1400)}});pre.appendChild(btn)});
const tocLinks=document.querySelectorAll('.toc a');
if(tocLinks.length){const map=new Map();tocLinks.forEach(a=>{const id=a.getAttribute('href').slice(1);const el=document.getElementById(id);if(el)map.set(el,a)});const setActive=l=>{tocLinks.forEach(x=>x.classList.remove('active'));l.classList.add('active')};const obs=new IntersectionObserver(entries=>{const visible=entries.filter(e=>e.isIntersecting).sort((a,b)=>a.boundingClientRect.top-b.boundingClientRect.top);if(visible.length){const link=map.get(visible[0].target);if(link)setActive(link)}},{rootMargin:'-15% 0px -65% 0px',threshold:0});map.forEach((_,el)=>obs.observe(el))}
`;
}
export function faviconSvg() {
return `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" role="img" aria-label="gitcrawl">
<rect width="64" height="64" rx="12" fill="#0f1115"/>
<path d="M20 22v20M22 20c5 2 10 4 22 11M22 44c5-2 10-4 22-11" stroke="#58a6ff" stroke-width="2.6" stroke-linecap="round" fill="none"/>
<circle cx="20" cy="20" r="4.4" fill="#58a6ff"/>
<circle cx="20" cy="44" r="4.4" fill="#58a6ff"/>
<circle cx="44" cy="32" r="4.4" fill="#2dd4bf"/>
</svg>`;
}