feat(gh): add gitcrawl-backed gh shim

This commit is contained in:
Peter Steinberger 2026-05-05 02:56:18 +01:00
parent 5d3906a4d0
commit 9fa2423e37
No known key found for this signature in database
10 changed files with 1281 additions and 8 deletions

View File

@ -34,6 +34,10 @@ gitcrawl search owner/repo --query "download stalls"
gitcrawl search issues "download stalls" -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30
gitcrawl search prs "manifest cache" -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20
gitcrawl search issues "hot loop" -R owner/repo --state open --sync-if-stale 5m --json number,title,url
gitcrawl gh search issues "download stalls" -R owner/repo --state open --match comments --json number,title,url
gitcrawl gh pr view 123 -R owner/repo --json number,title,state,url
gitcrawl gh run view 123456789 -R owner/repo --json status,conclusion
gitcrawl gh xcache stats
gitcrawl tui
gitcrawl tui owner/repo
```
@ -44,6 +48,7 @@ gitcrawl tui owner/repo
`gitcrawl sync` fetches open issues and pull requests by default. Pass `--state all` or `--state closed` for explicit backfill workflows; incremental open syncs with `--since` also sweep recently closed items so local open state does not rot.
Pass `--numbers` to refresh exact issue or pull request rows without relying on list ordering or updated-time windows.
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration.
`gitcrawl gh` is a gh-compatible shim for agent workflows. It answers broad `gh search issues|prs`, `gh issue/pr list`, and supported `gh issue/pr view --json` fields from local SQLite, then falls through to the real GitHub CLI for unsupported commands. Read-only fallthroughs such as `gh run list/view`, `gh pr diff/checks`, `gh repo view/list`, `gh label list`, and GET-only `gh api` calls use a short persistent cache under `cache/gh-shim`; mutating commands pass through and clear that cache. `gh xcache stats|keys|flush` inspects or clears the fallthrough cache. Set `GITCRAWL_GH_PATH` to choose the backend `gh`, and symlink or install the binary as `gh`/`gitcrawl-gh` to run the shim directly.
The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default, so the first screen is the useful cluster workload instead of singleton noise. Pass `--min-size 1` when you intentionally want singleton clusters. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds.
## Local Defaults

23
SPEC.md
View File

@ -76,6 +76,7 @@ Public commands:
- `cluster-explain`
- `neighbors`
- `search`
- `gh`
- `close-thread`
- `close-cluster`
- `exclude-cluster-member`
@ -102,6 +103,28 @@ gitcrawl search issues <query> -R owner/repo --state open --sync-if-stale 5m --j
This compatibility path reads from local SQLite by default. It avoids GitHub REST search quota and is not a replacement for final live `gh` verification before comments, closes, labels, or merges. `--sync-if-stale <duration>` may run one metadata sync first when the repository mirror is older than the requested max age; the search result itself still comes from SQLite.
`gh` is the agent-facing compatibility shim. It may be invoked as `gitcrawl gh ...` or by installing the binary as `gh`/`gitcrawl-gh`. Supported local reads:
```text
gitcrawl gh search issues|prs <query> -R owner/repo --state open --match comments --json number,title,url
gitcrawl gh issue view 123 -R owner/repo --json number,title,state,url,body
gitcrawl gh pr view 123 -R owner/repo --json number,title,state,url,isDraft,author
gitcrawl gh issue list -R owner/repo --state open --search "hot loop" --json number,title,url
gitcrawl gh pr list -R owner/repo --state open --search "manifest cache" --json number,title,url
```
Unsupported commands fall through to the real GitHub CLI. Read-only fallthroughs use a short persistent cache in `cache/gh-shim` for repeated agent calls (`run list/view`, `pr diff/checks`, `repo view/list`, `label list`, `issue/pr view`, and GET-only `api`). Mutating commands are never cached and clear the fallthrough cache on success. The shim does not add GitHub write-back behavior of its own; writes remain delegated to `gh`.
Cache inspection commands:
```text
gitcrawl gh xcache stats
gitcrawl gh xcache keys
gitcrawl gh xcache flush
```
The cache key includes the resolved gitcrawl config path, current working directory, `GH_HOST`, `GH_REPO`, and exact `gh` arguments. This keeps sibling checkouts and portable stores isolated while still coalescing repeated calls from the same agent workspace. Concurrent cache misses use a lock file so one process populates the entry while peers wait for the result.
## Config
Default config path:

View File

@ -4,12 +4,19 @@ import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/openclaw/gitcrawl/internal/cli"
)
func main() {
if err := cli.New().Run(context.Background(), os.Args[1:]); err != nil {
args := os.Args[1:]
name := strings.TrimSuffix(filepath.Base(os.Args[0]), ".exe")
if name == "gh" || name == "gitcrawl-gh" {
args = append([]string{"gh"}, args...)
}
if err := cli.New().Run(context.Background(), args); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(cli.ExitCode(err))
}

View File

@ -3,6 +3,8 @@ package main
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
)
@ -31,3 +33,38 @@ func TestMainPrintsVersion(t *testing.T) {
t.Fatal("version output was empty")
}
}
func TestMainUsesGHShimWhenBinaryNameIsGH(t *testing.T) {
oldArgs := os.Args
oldStdout := os.Stdout
t.Cleanup(func() {
os.Args = oldArgs
os.Stdout = oldStdout
})
dir := t.TempDir()
ghPath := filepath.Join(dir, "real-gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho shim-fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
t.Setenv("GITCRAWL_CONFIG", filepath.Join(dir, "config.toml"))
t.Setenv("GH_REPO", "openclaw/openclaw")
read, write, err := os.Pipe()
if err != nil {
t.Fatalf("pipe: %v", err)
}
os.Stdout = write
os.Args = []string{filepath.Join(dir, "gh"), "run", "view", "123"}
main()
if err := write.Close(); err != nil {
t.Fatalf("close stdout pipe: %v", err)
}
var out bytes.Buffer
if _, err := out.ReadFrom(read); err != nil {
t.Fatalf("read stdout: %v", err)
}
if got := strings.TrimSpace(out.String()); got != "shim-fallback:run view 123" {
t.Fatalf("output = %q", got)
}
}

View File

@ -152,6 +152,8 @@ func (a *App) Run(ctx context.Context, args []string) error {
return a.runRuns(ctx, rest[1:])
case "search":
return a.runSearch(ctx, rest[1:])
case "gh":
return a.runGHShim(ctx, rest[1:])
case "configure":
return a.runConfigure(rest[1:])
case "refresh":
@ -2698,6 +2700,7 @@ Core commands:
cluster-explain alias for cluster-detail
neighbors list vector-nearest local issue and pull request rows
search search local thread documents; also supports search issues|prs gh syntax
gh gh-compatible local cache shim with fallback to real gh
portable prune prune volatile payloads from a portable store
tui [owner/repo] browse clusters in the terminal UI; repo is inferred when omitted

View File

@ -44,9 +44,14 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
limitRaw := fs.String("limit", "", "maximum rows")
limitShortRaw := fs.String("L", "", "maximum rows")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter for JSON output")
fs.String("match", "", "accepted for gh compatibility; local search covers indexed thread documents")
fs.String("sort", "", "accepted for gh compatibility")
fs.String("order", "", "accepted for gh compatibility")
syncIfStaleRaw := fs.String("sync-if-stale", "", "sync owner/repo first when the local mirror is older than this duration")
if err := fs.Parse(normalizeCommandArgs(args[1:], map[string]bool{
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "sync-if-stale": true,
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "jq": true,
"match": true, "sort": true, "order": true, "sync-if-stale": true,
})); err != nil {
return usageErr(err)
}
@ -109,12 +114,7 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
data, err := json.MarshalIndent(rows, "", " ")
if err != nil {
return err
}
_, err = fmt.Fprintf(a.Stdout, "%s\n", data)
return err
return a.writeJSONValue(rows, strings.TrimSpace(*jqRaw))
}
for _, thread := range threads {
@ -260,6 +260,8 @@ func ghSearchJSONValue(thread store.Thread, field string) (any, error) {
switch field {
case "number":
return thread.Number, nil
case "id":
return thread.GitHubID, nil
case "title":
return thread.Title, nil
case "state":

322
internal/cli/gh_shim.go Normal file
View File

@ -0,0 +1,322 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"github.com/openclaw/gitcrawl/internal/store"
)
func (a *App) runGHShim(ctx context.Context, args []string) error {
if len(args) == 0 {
return a.execRealGH(ctx, args)
}
switch args[0] {
case "xcache":
return a.runGHXCache(args[1:])
case "search":
if len(args) >= 2 && isGHSearchKind(args[1]) {
if err := a.runGHSearch(ctx, args[1:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGH(ctx, args)
}
return err
}
return nil
}
case "issue", "pr":
if len(args) >= 2 {
switch args[1] {
case "view":
if err := a.runGHThreadView(ctx, args[0], args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
return nil
case "list":
if err := a.runGHThreadList(ctx, args[0], args[2:]); err != nil {
if isLocalGHUnsupported(err) {
return a.execRealGHMaybeCached(ctx, args)
}
return err
}
return nil
}
}
}
return a.execRealGHMaybeCached(ctx, args)
}
func (a *App) runGHThreadView(ctx context.Context, resource string, args []string) error {
fs := flag.NewFlagSet(resource+" view", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"R": true, "repo": true, "json": true, "jq": true})); err != nil {
return usageErr(err)
}
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("gh %s view requires a number", resource))
}
number, err := parseThreadNumber(fs.Arg(0))
if err != nil {
return usageErr(err)
}
repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong))
if err != nil {
return localGHUnsupported(err)
}
thread, err := a.localGHThread(ctx, repoValue, ghResourceKind(resource), number)
if err != nil {
if errors.Is(err, errLocalGHUnsupported) {
return err
}
return err
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
if jsonFields == "" {
jsonFields = "number,title,state,url"
}
rows, err := ghSearchJSONRows([]store.Thread{thread}, jsonFields)
if err != nil {
return localGHUnsupported(err)
}
return a.writeJSONValue(rows[0], strings.TrimSpace(*jqRaw))
}
_, err = fmt.Fprintf(a.Stdout, "title:\t%s\nstate:\t%s\nurl:\t%s\n\n%s\n", thread.Title, thread.State, thread.HTMLURL, strings.TrimSpace(thread.Body))
return err
}
func (a *App) runGHThreadList(ctx context.Context, resource string, args []string) error {
fs := flag.NewFlagSet(resource+" list", flag.ContinueOnError)
fs.SetOutput(io.Discard)
repoShort := fs.String("R", "", "repository")
repoLong := fs.String("repo", "", "repository")
stateRaw := fs.String("state", "open", "state")
limitRaw := fs.String("limit", "", "maximum rows")
limitShortRaw := fs.String("L", "", "maximum rows")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
jqRaw := fs.String("jq", "", "jq filter")
searchRaw := fs.String("search", "", "local search query")
authorRaw := fs.String("author", "", "fall through to gh when set")
assigneeRaw := fs.String("assignee", "", "fall through to gh when set")
labelRaw := fs.String("label", "", "fall through to gh when set")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "jq": true,
"search": true, "author": true, "assignee": true, "label": true,
})); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("unexpected gh %s list arguments: %s", resource, strings.Join(fs.Args(), " ")))
}
if strings.TrimSpace(*authorRaw) != "" || strings.TrimSpace(*assigneeRaw) != "" || strings.TrimSpace(*labelRaw) != "" {
return localGHUnsupported(fmt.Errorf("list filters author/assignee/label are not local yet"))
}
if err := validateGHSearchState(strings.TrimSpace(*stateRaw)); err != nil {
return usageErr(err)
}
limit, err := parseGHSearchLimit(*limitRaw, *limitShortRaw)
if err != nil {
return usageErr(err)
}
repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong))
if err != nil {
return localGHUnsupported(err)
}
threads, err := a.localGHThreads(ctx, repoValue, ghResourceKind(resource), strings.TrimSpace(*stateRaw), strings.TrimSpace(*searchRaw), limit)
if err != nil {
return err
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
if jsonFields == "" {
jsonFields = "number,title,state,url"
}
rows, err := ghSearchJSONRows(threads, jsonFields)
if err != nil {
return localGHUnsupported(err)
}
return a.writeJSONValue(rows, strings.TrimSpace(*jqRaw))
}
for _, thread := range threads {
if _, err := fmt.Fprintf(a.Stdout, "%d\t%s\t%s\n", thread.Number, thread.Title, thread.HTMLURL); err != nil {
return err
}
}
return nil
}
func (a *App) localGHThread(ctx context.Context, repoValue, kind string, number int) (store.Thread, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return store.Thread{}, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return store.Thread{}, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return store.Thread{}, localGHUnsupported(err)
}
threads, err := rt.Store.ListThreadsFiltered(ctx, store.ThreadListOptions{
RepoID: repo.ID,
IncludeClosed: true,
Numbers: []int{number},
})
if err != nil {
return store.Thread{}, err
}
for _, thread := range threads {
if thread.Number == number && thread.Kind == kind {
return thread, nil
}
}
return store.Thread{}, localGHUnsupported(fmt.Errorf("thread #%d was not found in local cache", number))
}
func (a *App) localGHThreads(ctx context.Context, repoValue, kind, state, query string, limit int) ([]store.Thread, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return nil, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return nil, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return nil, localGHUnsupported(err)
}
return rt.Store.SearchThreads(ctx, store.ThreadSearchOptions{
RepoID: repo.ID,
Query: query,
Kind: kind,
State: state,
IncludeLocallyClosed: true,
Limit: limit,
})
}
func (a *App) resolveGHRepo(ctx context.Context, explicit string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return strings.TrimSpace(explicit), nil
}
if envRepo := strings.TrimSpace(os.Getenv("GH_REPO")); envRepo != "" {
return envRepo, nil
}
cmd := exec.CommandContext(ctx, "git", "remote", "get-url", "origin")
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("repository is required outside a git checkout; pass -R owner/repo")
}
repo, err := ownerRepoFromGitRemote(strings.TrimSpace(string(out)))
if err != nil {
return "", err
}
return repo, nil
}
func (a *App) execRealGH(ctx context.Context, args []string) error {
ghPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH"))
if ghPath == "" {
if _, err := os.Stat("/opt/homebrew/opt/gh/bin/gh"); err == nil {
ghPath = "/opt/homebrew/opt/gh/bin/gh"
} else {
var err error
ghPath, err = exec.LookPath("gh")
if err != nil {
return fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH")
}
}
}
cmd := exec.CommandContext(ctx, ghPath, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = a.Stdout
cmd.Stderr = a.Stderr
return cmd.Run()
}
func (a *App) writeJSONValue(value any, jqExpr string) error {
data, err := json.MarshalIndent(value, "", " ")
if err != nil {
return err
}
if strings.TrimSpace(jqExpr) == "" {
_, err = fmt.Fprintf(a.Stdout, "%s\n", data)
return err
}
jqPath, err := exec.LookPath("jq")
if err != nil {
return localGHUnsupported(fmt.Errorf("--jq requires jq executable"))
}
cmd := exec.Command(jqPath, jqExpr)
cmd.Stdin = bytes.NewReader(data)
cmd.Stdout = a.Stdout
cmd.Stderr = a.Stderr
return cmd.Run()
}
func ghResourceKind(resource string) string {
if resource == "pr" {
return "pull_request"
}
return "issue"
}
func parseThreadNumber(value string) (int, error) {
value = strings.TrimSpace(strings.TrimPrefix(value, "#"))
number, err := strconv.Atoi(value)
if err != nil || number <= 0 {
return 0, fmt.Errorf("expected positive issue or pull request number, got %q", value)
}
return number, nil
}
func ownerRepoFromGitRemote(value string) (string, error) {
value = strings.TrimSuffix(strings.TrimSpace(value), ".git")
value = strings.TrimPrefix(value, "git@github.com:")
if strings.HasPrefix(value, "https://github.com/") {
value = strings.TrimPrefix(value, "https://github.com/")
}
if strings.HasPrefix(value, "ssh://git@github.com/") {
value = strings.TrimPrefix(value, "ssh://git@github.com/")
}
parts := strings.Split(value, "/")
if len(parts) < 2 {
return "", fmt.Errorf("could not infer owner/repo from origin remote")
}
repo := filepath.Join(parts[len(parts)-2], parts[len(parts)-1])
return strings.ReplaceAll(repo, string(os.PathSeparator), "/"), nil
}
var errLocalGHUnsupported = errors.New("local gh shim unsupported")
func localGHUnsupported(err error) error {
if err == nil {
return errLocalGHUnsupported
}
return fmt.Errorf("%w: %v", errLocalGHUnsupported, err)
}
func isLocalGHUnsupported(err error) bool {
return errors.Is(err, errLocalGHUnsupported) || strings.Contains(err.Error(), "unsupported --json field")
}

View File

@ -0,0 +1,344 @@
package cli
import (
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/config"
)
func (a *App) execRealGHMaybeCached(ctx context.Context, args []string) error {
if !cacheableGHRead(args) {
err := a.execRealGH(ctx, args)
if err == nil && mutatingGHCommand(args) {
_ = a.clearGHCommandCache()
}
return err
}
cacheDir, err := a.ghCommandCacheDir()
if err != nil {
return a.execRealGH(ctx, args)
}
ttl := ghCommandCacheTTL(args)
entryPath := filepath.Join(cacheDir, a.ghCommandCacheKey(args)+".json")
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
return a.writeGHCommandCacheEntry(entry)
}
lockPath := entryPath + ".lock"
lock, locked := tryGHCommandCacheLock(lockPath)
if !locked {
if entry, ok := waitGHCommandCache(entryPath, lockPath, ttl); ok {
return a.writeGHCommandCacheEntry(entry)
}
lock, locked = tryGHCommandCacheLock(lockPath)
}
if locked {
defer func() {
_ = lock.Close()
_ = os.Remove(lockPath)
}()
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
return a.writeGHCommandCacheEntry(entry)
}
}
stdout, stderr, exitCode, err := a.captureRealGH(ctx, args)
if err == nil {
_ = writeGHCommandCache(entryPath, ghCommandCacheEntry{
CreatedAt: time.Now().UTC(),
Args: append([]string(nil), args...),
ExitCode: exitCode,
Stdout: stdout,
Stderr: stderr,
})
}
_, _ = io.WriteString(a.Stdout, stdout)
_, _ = io.WriteString(a.Stderr, stderr)
return err
}
func (a *App) captureRealGH(ctx context.Context, args []string) (string, string, int, error) {
ghPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH"))
if ghPath == "" {
if _, err := os.Stat("/opt/homebrew/opt/gh/bin/gh"); err == nil {
ghPath = "/opt/homebrew/opt/gh/bin/gh"
} else {
var err error
ghPath, err = exec.LookPath("gh")
if err != nil {
return "", "", 127, fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH")
}
}
}
var stdout, stderr bytes.Buffer
cmd := exec.CommandContext(ctx, ghPath, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
exitCode := 0
if err != nil {
exitCode = 1
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
exitCode = exitErr.ExitCode()
}
}
return stdout.String(), stderr.String(), exitCode, err
}
func (a *App) ghCommandCacheDir() (string, error) {
cfg, err := config.Load(a.configPath)
if err != nil {
cfg = config.Default()
}
dir := filepath.Join(cfg.CacheDir, "gh-shim")
if err := os.MkdirAll(dir, 0o755); err != nil {
return "", err
}
return dir, nil
}
func (a *App) clearGHCommandCache() error {
_, err := a.clearGHCommandCacheCount()
return err
}
func (a *App) clearGHCommandCacheCount() (int, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return 0, err
}
entries, err := os.ReadDir(dir)
if err != nil {
return 0, err
}
removed := 0
for _, entry := range entries {
if entry.Type().IsRegular() {
if err := os.Remove(filepath.Join(dir, entry.Name())); err == nil {
removed++
}
}
}
return removed, nil
}
type ghCommandCacheEntry struct {
CreatedAt time.Time `json:"created_at"`
Args []string `json:"args"`
ExitCode int `json:"exit_code"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
}
func (a *App) writeGHCommandCacheEntry(entry ghCommandCacheEntry) error {
_, _ = io.WriteString(a.Stdout, entry.Stdout)
_, _ = io.WriteString(a.Stderr, entry.Stderr)
if entry.ExitCode != 0 {
return fmt.Errorf("cached gh command failed with exit code %d", entry.ExitCode)
}
return nil
}
func readGHCommandCache(path string, ttl time.Duration) (ghCommandCacheEntry, bool) {
data, err := os.ReadFile(path)
if err != nil {
return ghCommandCacheEntry{}, false
}
var entry ghCommandCacheEntry
if err := json.Unmarshal(data, &entry); err != nil {
return ghCommandCacheEntry{}, false
}
if entry.CreatedAt.IsZero() || time.Since(entry.CreatedAt) > ttl {
return ghCommandCacheEntry{}, false
}
return entry, true
}
func writeGHCommandCache(path string, entry ghCommandCacheEntry) error {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
data, err := json.Marshal(entry)
if err != nil {
return err
}
temp, err := os.CreateTemp(filepath.Dir(path), "."+filepath.Base(path)+".tmp-*")
if err != nil {
return err
}
tempPath := temp.Name()
cleanup := true
defer func() {
if cleanup {
_ = os.Remove(tempPath)
}
}()
if _, err := temp.Write(data); err != nil {
_ = temp.Close()
return err
}
if err := temp.Close(); err != nil {
return err
}
if err := os.Rename(tempPath, path); err != nil {
return err
}
cleanup = false
return nil
}
func tryGHCommandCacheLock(path string) (*os.File, bool) {
lock, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil {
return nil, false
}
_, _ = fmt.Fprintf(lock, "%d\n", os.Getpid())
return lock, true
}
func waitGHCommandCache(entryPath, lockPath string, ttl time.Duration) (ghCommandCacheEntry, bool) {
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
time.Sleep(100 * time.Millisecond)
if entry, ok := readGHCommandCache(entryPath, ttl); ok {
return entry, true
}
if _, err := os.Stat(lockPath); os.IsNotExist(err) {
return ghCommandCacheEntry{}, false
}
}
_ = os.Remove(lockPath)
return ghCommandCacheEntry{}, false
}
func (a *App) ghCommandCacheKey(args []string) string {
cwd, _ := os.Getwd()
material := strings.Join([]string{
"v1",
config.ResolvePath(a.configPath),
cwd,
os.Getenv("GH_HOST"),
os.Getenv("GH_REPO"),
strings.Join(args, "\x00"),
}, "\x00")
sum := sha256.Sum256([]byte(material))
return hex.EncodeToString(sum[:])
}
func cacheableGHRead(args []string) bool {
if len(args) == 0 || hasAnyGHFlag(args, "--web", "--browser", "--interactive") {
return false
}
switch args[0] {
case "api":
return ghAPIReadOnly(args[1:])
case "run":
return len(args) >= 2 && (args[1] == "list" || args[1] == "view")
case "pr":
return len(args) >= 2 && (args[1] == "diff" || args[1] == "checks" || args[1] == "view")
case "issue":
return len(args) >= 2 && args[1] == "view"
case "repo":
return len(args) >= 2 && (args[1] == "view" || args[1] == "list")
case "label":
return len(args) >= 2 && args[1] == "list"
default:
return false
}
}
func ghCommandName(args []string) string {
if len(args) == 0 {
return ""
}
if len(args) == 1 {
return args[0]
}
return args[0] + " " + args[1]
}
func ghAPIReadOnly(args []string) bool {
method := "GET"
for index := 0; index < len(args); index++ {
arg := args[index]
switch arg {
case "--input", "-F", "-f", "--field", "--raw-field":
return false
case "--method", "-X":
if index+1 >= len(args) {
return false
}
method = strings.ToUpper(args[index+1])
index++
default:
if strings.HasPrefix(arg, "--method=") {
method = strings.ToUpper(strings.TrimPrefix(arg, "--method="))
}
}
}
return method == "GET"
}
func ghCommandCacheTTL(args []string) time.Duration {
if raw := strings.TrimSpace(os.Getenv("GITCRAWL_GH_CACHE_TTL")); raw != "" {
if duration, err := time.ParseDuration(raw); err == nil && duration > 0 {
return duration
}
}
if len(args) >= 2 {
if args[0] == "pr" && args[1] == "diff" {
return 5 * time.Minute
}
if args[0] == "api" {
return time.Minute
}
}
return 30 * time.Second
}
func mutatingGHCommand(args []string) bool {
if len(args) < 2 {
return false
}
switch args[0] {
case "issue":
switch args[1] {
case "close", "comment", "create", "delete", "edit", "lock", "pin", "reopen", "transfer", "unlock", "unpin":
return true
}
case "pr":
switch args[1] {
case "checkout":
return false
case "close", "comment", "create", "edit", "lock", "merge", "ready", "reopen", "review", "unlock":
return true
}
case "api":
return !ghAPIReadOnly(args[1:])
}
return false
}
func hasAnyGHFlag(args []string, flags ...string) bool {
for _, arg := range args {
for _, flag := range flags {
if arg == flag || strings.HasPrefix(arg, flag+"=") {
return true
}
}
}
return false
}

View File

@ -0,0 +1,332 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestGHShimSearchAcceptsGHFlags(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{
"--config", configPath,
"gh", "search", "issues", "hot loop",
"-R", "openclaw/openclaw",
"--state", "open",
"--match", "comments",
"--sort", "updated",
"--order", "desc",
"--json", "number,title,state,url",
"--limit", "10",
}); err != nil {
t.Fatalf("gh shim search: %v", err)
}
var rows []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &rows); err != nil {
t.Fatalf("decode search: %v\n%s", err, stdout.String())
}
if len(rows) != 1 || int(rows[0]["number"].(float64)) != 10 {
t.Fatalf("rows = %#v", rows)
}
}
func TestGHShimViewAndListUseLocalCache(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,title,isDraft,author"}); err != nil {
t.Fatalf("gh pr view: %v", err)
}
var view map[string]any
if err := json.Unmarshal(stdout.Bytes(), &view); err != nil {
t.Fatalf("decode view: %v\n%s", err, stdout.String())
}
if int(view["number"].(float64)) != 12 || view["isDraft"] != true {
t.Fatalf("view = %#v", view)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number,title"}); err != nil {
t.Fatalf("gh issue list: %v", err)
}
var list []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &list); err != nil {
t.Fatalf("decode list: %v\n%s", err, stdout.String())
}
if len(list) != 1 || int(list[0]["number"].(float64)) != 10 {
t.Fatalf("list = %#v", list)
}
}
func TestGHShimFallsBackForUnsupportedRead(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "unsupportedField"}); err != nil {
t.Fatalf("fallback: %v", err)
}
if got := strings.TrimSpace(stdout.String()); got != "fallback:pr view 12 -R openclaw/openclaw --json unsupportedField" {
t.Fatalf("fallback output = %q", got)
}
}
func TestGHShimCachesReadOnlyFallbackCommands(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
dir := t.TempDir()
countPath := filepath.Join(dir, "count")
ghPath := filepath.Join(dir, "gh")
script := `#!/bin/sh
count=0
if [ -f "$GH_SHIM_COUNT" ]; then
count=$(cat "$GH_SHIM_COUNT")
fi
count=$((count + 1))
printf "%s" "$count" > "$GH_SHIM_COUNT"
echo "call-$count:$*"
`
if err := os.WriteFile(ghPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
t.Setenv("GH_SHIM_COUNT", countPath)
t.Setenv("GH_REPO", "cache-test/"+filepath.Base(dir))
t.Setenv("GITCRAWL_GH_CACHE_TTL", "1m")
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
args := []string{"--config", configPath, "gh", "run", "view", "123", "-R", "openclaw/openclaw", "--json", "status"}
if err := run.Run(ctx, args); err != nil {
t.Fatalf("first cached read: %v", err)
}
first := stdout.String()
stdout.Reset()
if err := run.Run(ctx, args); err != nil {
t.Fatalf("second cached read: %v", err)
}
if second := stdout.String(); second != first {
t.Fatalf("cached output changed: first=%q second=%q", first, second)
}
countData, err := os.ReadFile(countPath)
if err != nil {
t.Fatalf("read count: %v", err)
}
if strings.TrimSpace(string(countData)) != "1" {
t.Fatalf("fake gh call count = %q, want 1", countData)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "xcache", "stats", "--json"}); err != nil {
t.Fatalf("xcache stats: %v", err)
}
var stats map[string]any
if err := json.Unmarshal(stdout.Bytes(), &stats); err != nil {
t.Fatalf("decode stats: %v\n%s", err, stdout.String())
}
if int(stats["entries"].(float64)) != 1 {
t.Fatalf("stats = %#v", stats)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "xcache", "keys", "--json"}); err != nil {
t.Fatalf("xcache keys: %v", err)
}
var keys []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &keys); err != nil {
t.Fatalf("decode keys: %v\n%s", err, stdout.String())
}
if len(keys) != 1 || keys[0]["command"] != "run view" {
t.Fatalf("keys = %#v", keys)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "xcache", "flush", "--json"}); err != nil {
t.Fatalf("xcache flush: %v", err)
}
var flushed map[string]any
if err := json.Unmarshal(stdout.Bytes(), &flushed); err != nil {
t.Fatalf("decode flush: %v\n%s", err, stdout.String())
}
if int(flushed["removed"].(float64)) != 1 {
t.Fatalf("flushed = %#v", flushed)
}
}
func TestGHShimCoalescesConcurrentReadOnlyFallbacks(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
dir := t.TempDir()
countPath := filepath.Join(dir, "count")
ghPath := filepath.Join(dir, "gh")
script := `#!/bin/sh
count=0
if [ -f "$GH_SHIM_COUNT" ]; then
count=$(cat "$GH_SHIM_COUNT")
fi
count=$((count + 1))
printf "%s" "$count" > "$GH_SHIM_COUNT"
sleep 0.2
echo "call-$count:$*"
`
if err := os.WriteFile(ghPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
t.Setenv("GH_SHIM_COUNT", countPath)
t.Setenv("GH_REPO", "coalesce-test/"+filepath.Base(dir))
t.Setenv("GITCRAWL_GH_CACHE_TTL", "1m")
args := []string{"--config", configPath, "gh", "run", "view", "456", "-R", "openclaw/openclaw", "--json", "status"}
var wg sync.WaitGroup
errs := make(chan error, 2)
outputs := make(chan string, 2)
for i := 0; i < 2; i++ {
wg.Add(1)
go func() {
defer wg.Done()
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, args); err != nil {
errs <- err
return
}
outputs <- stdout.String()
}()
}
wg.Wait()
close(errs)
close(outputs)
for err := range errs {
t.Fatalf("coalesced run: %v", err)
}
if len(outputs) != 2 {
t.Fatalf("outputs = %d, want 2", len(outputs))
}
var first string
for out := range outputs {
if first == "" {
first = out
} else if out != first {
t.Fatalf("coalesced outputs differ: %q vs %q", first, out)
}
}
countData, err := os.ReadFile(countPath)
if err != nil {
t.Fatalf("read count: %v", err)
}
if strings.TrimSpace(string(countData)) != "1" {
t.Fatalf("fake gh call count = %q, want 1", countData)
}
}
func seedGHShimRepo(t *testing.T, ctx context.Context) string {
t.Helper()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
cfg.CacheDir = filepath.Join(dir, "cache")
if err := config.Save(configPath, cfg); err != nil {
t.Fatalf("save config: %v", err)
}
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
repoID, err := st.UpsertRepository(ctx, store.Repository{
Owner: "openclaw",
Name: "openclaw",
FullName: "openclaw/openclaw",
RawJSON: "{}",
UpdatedAt: "2026-04-27T00:00:00Z",
})
if err != nil {
t.Fatalf("seed repository: %v", err)
}
issueID, err := st.UpsertThread(ctx, store.Thread{
RepoID: repoID,
GitHubID: "10",
Number: 10,
Kind: "issue",
State: "open",
Title: "Hot loop burns CPU",
Body: "the runtime has a hot loop",
AuthorLogin: "alice",
AuthorType: "User",
HTMLURL: "https://github.com/openclaw/openclaw/issues/10",
LabelsJSON: `[{"name":"bug","color":"d73a4a"}]`,
AssigneesJSON: "[]",
RawJSON: "{}",
ContentHash: "issue-10",
UpdatedAtGitHub: "2026-04-27T01:00:00Z",
UpdatedAt: "2026-04-27T01:00:00Z",
})
if err != nil {
t.Fatalf("seed issue: %v", err)
}
if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: issueID, Title: "Hot loop burns CPU", RawText: "runtime hot loop burns CPU", DedupeText: "runtime hot loop burns cpu", UpdatedAt: "2026-04-27T01:00:00Z"}); err != nil {
t.Fatalf("seed issue document: %v", err)
}
prID, err := st.UpsertThread(ctx, store.Thread{
RepoID: repoID,
GitHubID: "12",
Number: 12,
Kind: "pull_request",
State: "open",
Title: "Manifest cache update",
AuthorLogin: "bob",
AuthorType: "User",
HTMLURL: "https://github.com/openclaw/openclaw/pull/12",
LabelsJSON: "[]",
AssigneesJSON: "[]",
RawJSON: "{}",
ContentHash: "pr-12",
IsDraft: true,
UpdatedAtGitHub: "2026-04-27T02:00:00Z",
UpdatedAt: "2026-04-27T02:00:00Z",
})
if err != nil {
t.Fatalf("seed pr: %v", err)
}
if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: prID, Title: "Manifest cache update", RawText: "manifest cache refresh", DedupeText: "manifest cache refresh", UpdatedAt: "2026-04-27T02:00:00Z"}); err != nil {
t.Fatalf("seed pr document: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
return configPath
}

View File

@ -0,0 +1,198 @@
package cli
import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
type ghCommandCacheStats struct {
CacheDir string `json:"cache_dir"`
Entries int `json:"entries"`
Expired int `json:"expired"`
Locks int `json:"locks"`
Bytes int64 `json:"bytes"`
Commands map[string]ghCommandCacheCount `json:"commands"`
}
type ghCommandCacheCount struct {
Entries int `json:"entries"`
Bytes int64 `json:"bytes"`
}
type ghCommandCacheKeyInfo struct {
Key string `json:"key"`
CreatedAt time.Time `json:"created_at"`
Age string `json:"age"`
Command string `json:"command"`
Args []string `json:"args"`
Bytes int64 `json:"bytes"`
Expired bool `json:"expired"`
}
func (a *App) runGHXCache(args []string) error {
if len(args) == 0 {
return usageErr(fmt.Errorf("usage: gh xcache <stats|keys|flush>"))
}
fs := flag.NewFlagSet("xcache "+args[0], flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(args[1:]); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
switch args[0] {
case "stats":
return a.runGHXCacheStats()
case "keys":
return a.runGHXCacheKeys()
case "flush":
return a.runGHXCacheFlush()
default:
return usageErr(fmt.Errorf("unknown xcache command %q", args[0]))
}
}
func (a *App) runGHXCacheStats() error {
stats, err := a.ghCommandCacheStats()
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(stats, "")
}
_, err = fmt.Fprintf(a.Stdout, "Cache Dir: %s\nEntries: %d\nExpired: %d\nLocks: %d\nBytes: %d\n",
stats.CacheDir, stats.Entries, stats.Expired, stats.Locks, stats.Bytes)
if err != nil {
return err
}
if len(stats.Commands) > 0 {
_, _ = fmt.Fprintln(a.Stdout, "\nCommands:")
for command, count := range stats.Commands {
_, _ = fmt.Fprintf(a.Stdout, " %-16s %d entries / %d bytes\n", command, count.Entries, count.Bytes)
}
}
return nil
}
func (a *App) runGHXCacheKeys() error {
keys, err := a.ghCommandCacheKeys()
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(keys, "")
}
for _, key := range keys {
if _, err := fmt.Fprintf(a.Stdout, "%s\t%s\t%s\t%s\n", key.Key, key.Age, key.Command, strings.Join(key.Args, " ")); err != nil {
return err
}
}
return nil
}
func (a *App) runGHXCacheFlush() error {
removed, err := a.clearGHCommandCacheCount()
if err != nil {
return err
}
if a.format == FormatJSON {
return a.writeJSONValue(map[string]any{"removed": removed}, "")
}
_, err = fmt.Fprintf(a.Stdout, "Flushed %d cache entrie(s)\n", removed)
return err
}
func (a *App) ghCommandCacheStats() (ghCommandCacheStats, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return ghCommandCacheStats{}, err
}
keys, locks, err := a.collectGHCommandCacheKeys(dir)
if err != nil {
return ghCommandCacheStats{}, err
}
stats := ghCommandCacheStats{CacheDir: dir, Locks: locks, Commands: map[string]ghCommandCacheCount{}}
for _, key := range keys {
if key.Expired {
stats.Expired++
} else {
stats.Entries++
}
stats.Bytes += key.Bytes
count := stats.Commands[key.Command]
count.Entries++
count.Bytes += key.Bytes
stats.Commands[key.Command] = count
}
return stats, nil
}
func (a *App) ghCommandCacheKeys() ([]ghCommandCacheKeyInfo, error) {
dir, err := a.ghCommandCacheDir()
if err != nil {
return nil, err
}
keys, _, err := a.collectGHCommandCacheKeys(dir)
return keys, err
}
func (a *App) collectGHCommandCacheKeys(dir string) ([]ghCommandCacheKeyInfo, int, error) {
entries, err := os.ReadDir(dir)
if err != nil {
return nil, 0, err
}
keys := make([]ghCommandCacheKeyInfo, 0)
locks := 0
for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".lock") {
locks++
continue
}
if !entry.Type().IsRegular() || !strings.HasSuffix(name, ".json") {
continue
}
key, ok := ghCommandCacheKeyInfoFromDirEntry(dir, entry)
if ok {
keys = append(keys, key)
}
}
sort.Slice(keys, func(i, j int) bool {
return keys[i].CreatedAt.After(keys[j].CreatedAt)
})
return keys, locks, nil
}
func ghCommandCacheKeyInfoFromDirEntry(dir string, entry os.DirEntry) (ghCommandCacheKeyInfo, bool) {
name := entry.Name()
info, err := entry.Info()
if err != nil {
return ghCommandCacheKeyInfo{}, false
}
data, err := os.ReadFile(filepath.Join(dir, name))
if err != nil {
return ghCommandCacheKeyInfo{}, false
}
var cached ghCommandCacheEntry
if err := json.Unmarshal(data, &cached); err != nil {
return ghCommandCacheKeyInfo{}, false
}
ttl := ghCommandCacheTTL(cached.Args)
age := time.Since(cached.CreatedAt)
return ghCommandCacheKeyInfo{
Key: strings.TrimSuffix(name, ".json"),
CreatedAt: cached.CreatedAt,
Age: age.Round(time.Second).String(),
Command: ghCommandName(cached.Args),
Args: cached.Args,
Bytes: info.Size(),
Expired: cached.CreatedAt.IsZero() || age > ttl,
}, true
}