diff --git a/CHANGELOG.md b/CHANGELOG.md index 598060e..d4dd7ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Force embedding refreshes when the embedding input rune cap changes, so stale larger-cap vectors are not reused. - Expand the `gh` shim with local list filters, PR diff caching by cached head SHA, xcache GC, hit/miss/write counters, and throttled portable-store refreshes to reduce GitHub API pressure across agent sessions. - Add explicit PR-detail hydration for files, commits, checks, and workflow runs so `gh pr view`, `gh pr checks`, and `gh run list/view` can answer common review reads from the existing SQLite cache. +- Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`. ## 0.1.2 - 2026-05-01 diff --git a/README.md b/README.md index 31aeb7a..706917d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ gitcrawl tui owner/repo `gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`. `gitcrawl sync` fetches open issues and pull requests by default. Pass `--state all` or `--state closed` for explicit backfill workflows; incremental open syncs with `--since` also sweep recently closed items so local open state does not rot. Pass `--numbers` to refresh exact issue or pull request rows without relying on list ordering or updated-time windows. -Pass `--with pr-details` or `--include-pr-details` to hydrate pull request files, commits, checks, and workflow runs for local review. +Pass `--with pr-details` or `--include-pr-details` to hydrate pull request files, commits, checks, and workflow runs for local review. The `gh` shim can also auto-hydrate one exact PR on a PR-detail miss, then retry locally. `gitcrawl search issues|prs` accepts the common `gh search` shape (` -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration. `gitcrawl gh` is a gh-compatible shim for agent workflows. It answers broad `gh search issues|prs`, `gh issue/pr list`, supported `gh issue/pr view --json` fields, hydrated `gh pr checks`, and hydrated `gh run list/view` from local SQLite, then falls through to the real GitHub CLI for unsupported commands. Local `gh issue/pr list` supports common filters such as `--author`, `--assignee`, and repeated `--label`. Read-only fallthroughs such as `gh pr diff`, `gh repo view/list`, `gh label list`, and GET-only `gh api` calls use a short persistent cache under `cache/gh-shim`; `gh pr diff` entries are keyed by the cached PR head SHA when available. Mutating commands pass through, increment write counters, and clear that cache. `gh xcache stats|keys|gc|flush` inspects, garbage-collects, or clears the fallthrough cache. Set `GITCRAWL_GH_PATH` to choose the backend `gh`, and symlink or install the binary as `gh`/`gitcrawl-gh` to run the shim directly. The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default, so the first screen is the useful cluster workload instead of singleton noise. Pass `--min-size 1` when you intentionally want singleton clusters. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds. @@ -64,7 +64,7 @@ The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default ## Requirements - Go 1.26+ -- a GitHub token for sync commands +- a GitHub token for sync commands, either via `GITHUB_TOKEN` or `gh auth token` - an OpenAI API key only for summary and embedding commands ## Install diff --git a/internal/cli/app.go b/internal/cli/app.go index 1453542..a52099d 100644 --- a/internal/cli/app.go +++ b/internal/cli/app.go @@ -1789,9 +1789,9 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy if err != nil { return syncer.Stats{}, err } - token := config.ResolveGitHubToken(cfg) + token := a.resolveGitHubToken(ctx, cfg) if token.Value == "" { - return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s", cfg.GitHub.TokenEnv) + return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s or authenticate gh", cfg.GitHub.TokenEnv) } if err := config.EnsureRuntimeDirs(cfg); err != nil { return syncer.Stats{}, err diff --git a/internal/cli/gh_shim.go b/internal/cli/gh_shim.go index cbdf44f..9f478c6 100644 --- a/internal/cli/gh_shim.go +++ b/internal/cli/gh_shim.go @@ -119,10 +119,25 @@ func (a *App) runGHThreadView(ctx context.Context, resource string, args []strin } thread, err := a.localGHThread(ctx, repoValue, ghResourceKind(resource), number) if err != nil { - if errors.Is(err, errLocalGHUnsupported) { + if resource == "pr" && a.shouldAutoHydrateGHPRDetails(err) { + owner, repoName, parseErr := parseOwnerRepo(repoValue) + if parseErr != nil { + return localGHUnsupported(parseErr) + } + if _, syncErr := a.syncRepository(ctx, owner, repoName, syncOptions{ + Numbers: []int{number}, + IncludePRDetails: true, + }); syncErr != nil { + return localGHUnsupported(syncErr) + } + thread, err = a.localGHThread(ctx, repoValue, ghResourceKind(resource), number) + } + if err != nil { + if errors.Is(err, errLocalGHUnsupported) { + return err + } return err } - return err } jsonFields := strings.TrimSpace(*jsonFieldsRaw) if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON { diff --git a/internal/cli/gh_shim_autohydrate.go b/internal/cli/gh_shim_autohydrate.go new file mode 100644 index 0000000..71decd1 --- /dev/null +++ b/internal/cli/gh_shim_autohydrate.go @@ -0,0 +1,128 @@ +package cli + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "os" + "strings" + "time" + + "github.com/openclaw/gitcrawl/internal/store" +) + +const ghPRDetailFreshness = 90 * time.Second + +func (a *App) ensureFreshGHPullRequestCache(ctx context.Context, repoValue string, number int) (store.PullRequestCache, error) { + return a.loadGHPullRequestCache(ctx, repoValue, number, true) +} + +func (a *App) loadGHPullRequestCache(ctx context.Context, repoValue string, number int, requireFresh bool) (store.PullRequestCache, error) { + cache, err := a.localGHPullRequestCache(ctx, repoValue, number) + if err == nil && (!requireFresh || ghPullRequestCacheFresh(cache)) { + return cache, nil + } + if !a.shouldAutoHydrateGHPRDetails(err) { + return cache, err + } + owner, repoName, parseErr := parseOwnerRepo(repoValue) + if parseErr != nil { + return store.PullRequestCache{}, parseErr + } + if _, syncErr := a.syncRepository(ctx, owner, repoName, syncOptions{ + Numbers: []int{number}, + IncludePRDetails: true, + }); syncErr != nil { + return store.PullRequestCache{}, localGHUnsupported(syncErr) + } + return a.localGHPullRequestCache(ctx, repoValue, number) +} + +func ghPRFieldsNeedFresh(fields []string) bool { + for _, field := range fields { + switch field { + case "statusCheckRollup", "mergeStateStatus": + return true + } + } + return false +} + +func (a *App) shouldAutoHydrateGHPRDetails(err error) bool { + if strings.EqualFold(strings.TrimSpace(os.Getenv("GITCRAWL_GH_AUTO_HYDRATE")), "0") { + return false + } + if err == nil { + return true + } + return isMissingLocalPRCache(err) || errors.Is(err, errLocalGHUnsupported) +} + +func ghPullRequestCacheFresh(cache store.PullRequestCache) bool { + if rawHead := ghPRHeadSHAFromRawJSON(cache.Detail.RawJSON); rawHead != "" && !strings.EqualFold(cache.Detail.HeadSHA, rawHead) { + return false + } + parsed, err := time.Parse(time.RFC3339Nano, cache.Detail.FetchedAt) + if err != nil { + return false + } + return time.Since(parsed) <= ghPRDetailFreshness +} + +func isMissingLocalPRCache(err error) bool { + if err == nil { + return false + } + return errors.Is(err, sql.ErrNoRows) || + strings.Contains(err.Error(), "pull request detail") || + strings.Contains(err.Error(), "was not found") +} + +func (a *App) findGHPullRequestNumberByBranch(ctx context.Context, repoValue, branch string) (int, error) { + owner, repoName, err := parseOwnerRepo(repoValue) + if err != nil { + return 0, err + } + rt, err := a.openLocalRuntimeReadOnly(ctx) + if err != nil { + return 0, localGHUnsupported(err) + } + defer rt.Store.Close() + repo, err := rt.repository(ctx, owner, repoName) + if err != nil { + return 0, localGHUnsupported(err) + } + threads, err := rt.Store.SearchThreads(ctx, store.ThreadSearchOptions{ + RepoID: repo.ID, + Kind: "pull_request", + State: "open", + IncludeLocallyClosed: true, + Limit: 100, + }) + if err != nil { + return 0, err + } + for _, thread := range threads { + if branch == ghPRHeadRefFromRawJSON(thread.RawJSON) { + return thread.Number, nil + } + if cache, cacheErr := rt.Store.PullRequestCache(ctx, repo.ID, thread.Number); cacheErr == nil && branch == cache.Detail.HeadRef { + return thread.Number, nil + } + } + return 0, localGHUnsupported(fmt.Errorf("cached PR branch %q was not found", branch)) +} + +func ghPRHeadRefFromRawJSON(raw string) string { + var payload struct { + Head struct { + Ref string `json:"ref"` + } `json:"head"` + } + if err := json.Unmarshal([]byte(raw), &payload); err != nil { + return "" + } + return strings.TrimSpace(payload.Head.Ref) +} diff --git a/internal/cli/gh_shim_detail_test.go b/internal/cli/gh_shim_detail_test.go index 304c806..320f842 100644 --- a/internal/cli/gh_shim_detail_test.go +++ b/internal/cli/gh_shim_detail_test.go @@ -4,7 +4,12 @@ import ( "bytes" "context" "encoding/json" + "net/http" + "net/http/httptest" "testing" + + "github.com/openclaw/gitcrawl/internal/config" + "github.com/openclaw/gitcrawl/internal/store" ) func TestGHShimViewAndListUseLocalCache(t *testing.T) { @@ -95,3 +100,82 @@ func TestGHShimViewAndListUseLocalCache(t *testing.T) { t.Fatalf("filtered list = %#v", list) } } + +func TestGHShimAutoHydratesPRDetailsOnMiss(t *testing.T) { + ctx := context.Background() + configPath := seedGHShimRepo(t, ctx) + cfg, err := config.Load(configPath) + if err != nil { + t.Fatalf("load config: %v", err) + } + st, err := store.Open(ctx, cfg.DBPath) + if err != nil { + t.Fatalf("open store: %v", err) + } + for _, table := range []string{"pull_request_checks", "pull_request_commits", "pull_request_files", "pull_request_details", "github_workflow_runs", "threads", "repositories"} { + if _, err := st.DB().ExecContext(ctx, "delete from "+table); err != nil { + t.Fatalf("clear %s: %v", table, err) + } + } + if err := st.Close(); err != nil { + t.Fatalf("close store: %v", err) + } + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/repos/openclaw/openclaw": + _ = json.NewEncoder(w).Encode(map[string]any{"id": 123, "open_issues_count": 1}) + case "/repos/openclaw/openclaw/issues/12": + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": 12, "number": 12, "state": "open", "title": "Manifest cache update", + "body": "", "html_url": "https://github.com/openclaw/openclaw/pull/12", + "labels": []map[string]any{}, "assignees": []map[string]any{}, + "user": map[string]any{"login": "bob", "type": "User"}, + "pull_request": map[string]any{"url": "https://api.github.test/repos/openclaw/openclaw/pulls/12"}, + }) + case "/repos/openclaw/openclaw/pulls/12": + _ = json.NewEncoder(w).Encode(map[string]any{ + "number": 12, "head": map[string]any{"sha": "auto123", "ref": "auto-branch", "repo": map[string]any{"full_name": "openclaw/openclaw"}}, + "base": map[string]any{"sha": "base123"}, "mergeable_state": "clean", "changed_files": 1, + }) + case "/repos/openclaw/openclaw/pulls/12/files": + _ = json.NewEncoder(w).Encode([]map[string]any{{"filename": "auto.go", "status": "modified"}}) + case "/repos/openclaw/openclaw/pulls/12/commits": + _ = json.NewEncoder(w).Encode([]map[string]any{{"sha": "commit123", "commit": map[string]any{"message": "test"}}}) + case "/repos/openclaw/openclaw/commits/auto123/check-runs": + _ = json.NewEncoder(w).Encode(map[string]any{"check_runs": []map[string]any{{"name": "auto-test", "status": "completed", "conclusion": "success"}}}) + case "/repos/openclaw/openclaw/actions/runs": + _ = json.NewEncoder(w).Encode(map[string]any{"workflow_runs": []map[string]any{{"id": 12345, "head_branch": "auto-branch", "head_sha": "auto123", "status": "completed", "conclusion": "success", "name": "CI"}}}) + default: + t.Fatalf("unexpected request: %s", r.URL.String()) + } + })) + defer server.Close() + t.Setenv("GITHUB_TOKEN", "test-token") + t.Setenv("GITCRAWL_GITHUB_BASE_URL", server.URL) + t.Setenv("GITCRAWL_GH_PATH", "/tmp/no-real-gh") + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,files,commits,statusCheckRollup,headRefOid"}); err != nil { + t.Fatalf("auto hydrate view: %v", err) + } + var view map[string]any + if err := json.Unmarshal(stdout.Bytes(), &view); err != nil { + t.Fatalf("decode view: %v\n%s", err, stdout.String()) + } + if view["headRefOid"] != "auto123" || len(view["files"].([]any)) != 1 { + t.Fatalf("view = %#v", view) + } + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "12", "-R", "openclaw/openclaw", "--json", "name,state"}); err != nil { + t.Fatalf("auto hydrate checks: %v", err) + } + var checks []map[string]any + if err := json.Unmarshal(stdout.Bytes(), &checks); err != nil { + t.Fatalf("decode checks: %v\n%s", err, stdout.String()) + } + if len(checks) != 1 || checks[0]["name"] != "auto-test" || checks[0]["state"] != "SUCCESS" { + t.Fatalf("checks = %#v", checks) + } +} diff --git a/internal/cli/gh_shim_prcache.go b/internal/cli/gh_shim_prcache.go index f534c54..a60789c 100644 --- a/internal/cli/gh_shim_prcache.go +++ b/internal/cli/gh_shim_prcache.go @@ -27,7 +27,7 @@ func (a *App) ghThreadViewJSONRow(ctx context.Context, repoValue string, thread return nil, err } if cache == nil { - loaded, loadErr := a.localGHPullRequestCache(ctx, repoValue, thread.Number) + loaded, loadErr := a.loadGHPullRequestCache(ctx, repoValue, thread.Number, ghPRFieldsNeedFresh(fields)) if loadErr != nil { return nil, loadErr } @@ -167,7 +167,7 @@ func (a *App) runGHPRChecks(ctx context.Context, args []string) error { if err != nil { return localGHUnsupported(err) } - cache, err := a.localGHPullRequestCache(ctx, repoValue, number) + cache, err := a.ensureFreshGHPullRequestCache(ctx, repoValue, number) if err != nil { return err } diff --git a/internal/cli/gh_shim_runs.go b/internal/cli/gh_shim_runs.go index fe5880f..1fbaebb 100644 --- a/internal/cli/gh_shim_runs.go +++ b/internal/cli/gh_shim_runs.go @@ -41,8 +41,16 @@ func (a *App) runGHRunList(ctx context.Context, args []string) error { if err != nil { return localGHUnsupported(err) } + branch := strings.TrimSpace(*branchRaw) + if branch != "" && strings.TrimSpace(*commitRaw) == "" { + if number, findErr := a.findGHPullRequestNumberByBranch(ctx, repoValue, branch); findErr == nil { + if _, hydrateErr := a.ensureFreshGHPullRequestCache(ctx, repoValue, number); hydrateErr != nil { + return hydrateErr + } + } + } runs, err := a.localGHWorkflowRuns(ctx, repoValue, store.WorkflowRunListOptions{ - Branch: strings.TrimSpace(*branchRaw), + Branch: branch, HeadSHA: strings.TrimSpace(*commitRaw), Limit: limit, }) diff --git a/internal/cli/gh_shim_test.go b/internal/cli/gh_shim_test.go index ec19c73..7d00801 100644 --- a/internal/cli/gh_shim_test.go +++ b/internal/cli/gh_shim_test.go @@ -9,6 +9,7 @@ import ( "strings" "sync" "testing" + "time" "github.com/openclaw/gitcrawl/internal/config" "github.com/openclaw/gitcrawl/internal/store" @@ -417,6 +418,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: prID, Title: "Manifest cache update", RawText: "manifest cache refresh", DedupeText: "manifest cache refresh", UpdatedAt: "2026-04-27T02:00:00Z"}); err != nil { t.Fatalf("seed pr document: %v", err) } + fetchedAt := time.Now().UTC().Format(time.RFC3339Nano) if err := st.UpsertPullRequestCache(ctx, store.PullRequestDetail{ ThreadID: prID, RepoID: repoID, @@ -430,8 +432,8 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { Deletions: 2, ChangedFiles: 1, RawJSON: `{"head":{"sha":"abc123"}}`, - FetchedAt: "2026-04-27T02:00:00Z", - UpdatedAt: "2026-04-27T02:00:00Z", + FetchedAt: fetchedAt, + UpdatedAt: fetchedAt, }, []store.PullRequestFile{{ ThreadID: prID, Path: "internal/cache.go", @@ -440,7 +442,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { Deletions: 2, Changes: 12, RawJSON: "{}", - FetchedAt: "2026-04-27T02:00:00Z", + FetchedAt: fetchedAt, }}, []store.PullRequestCommit{{ ThreadID: prID, SHA: "commit123", @@ -450,7 +452,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { CommittedAt: "2026-04-27T01:00:00Z", HTMLURL: "https://github.com/openclaw/openclaw/commit/commit123", RawJSON: "{}", - FetchedAt: "2026-04-27T02:00:00Z", + FetchedAt: fetchedAt, }}, []store.PullRequestCheck{{ ThreadID: prID, Name: "test", @@ -459,7 +461,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { DetailsURL: "https://github.com/openclaw/openclaw/actions/runs/99", WorkflowName: "CI", RawJSON: "{}", - FetchedAt: "2026-04-27T02:00:00Z", + FetchedAt: fetchedAt, }}, []store.WorkflowRun{{ RepoID: repoID, RunID: "99", @@ -474,7 +476,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { CreatedAtGH: "2026-04-27T01:00:00Z", UpdatedAtGH: "2026-04-27T02:00:00Z", RawJSON: "{}", - FetchedAt: "2026-04-27T02:00:00Z", + FetchedAt: fetchedAt, }}); err != nil { t.Fatalf("seed pr cache: %v", err) } diff --git a/internal/cli/github_token.go b/internal/cli/github_token.go new file mode 100644 index 0000000..cee1971 --- /dev/null +++ b/internal/cli/github_token.go @@ -0,0 +1,90 @@ +package cli + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/openclaw/gitcrawl/internal/config" +) + +func (a *App) resolveGitHubToken(ctx context.Context, cfg config.Config) config.TokenResolution { + token := config.ResolveGitHubToken(cfg) + if token.Value != "" { + return token + } + if value, err := a.githubAuthToken(ctx); err == nil && value != "" { + return config.TokenResolution{Value: value, Source: "gh auth token"} + } + return token +} + +func (a *App) githubAuthToken(ctx context.Context) (string, error) { + candidates := candidateRealGHPaths() + var lastErr error + for _, candidate := range candidates { + if !usableRealGHPath(candidate) { + continue + } + tokenCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + cmd := exec.CommandContext(tokenCtx, candidate, "auth", "token") + out, err := cmd.Output() + cancel() + if err != nil { + lastErr = err + continue + } + if token := strings.TrimSpace(string(out)); token != "" { + return token, nil + } + } + if lastErr != nil { + return "", lastErr + } + return "", fmt.Errorf("real gh not found") +} + +func candidateRealGHPaths() []string { + var paths []string + if envPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH")); envPath != "" { + paths = append(paths, envPath) + } + paths = append(paths, + "/opt/homebrew/opt/gh/bin/gh", + "/usr/local/bin/gh", + "/usr/bin/gh", + ) + if lookPath, err := exec.LookPath("gh"); err == nil { + paths = append(paths, lookPath) + } + seen := map[string]bool{} + unique := paths[:0] + for _, path := range paths { + if path = strings.TrimSpace(path); path != "" && !seen[path] { + seen[path] = true + unique = append(unique, path) + } + } + return unique +} + +func usableRealGHPath(path string) bool { + info, err := os.Stat(path) + if err != nil || info.IsDir() || info.Mode()&0111 == 0 { + return false + } + exe, err := os.Executable() + if err != nil { + return true + } + candidateReal, candidateErr := filepath.EvalSymlinks(path) + exeReal, exeErr := filepath.EvalSymlinks(exe) + if candidateErr == nil && exeErr == nil && candidateReal == exeReal { + return false + } + return true +} diff --git a/internal/cli/github_token_test.go b/internal/cli/github_token_test.go new file mode 100644 index 0000000..f9928a7 --- /dev/null +++ b/internal/cli/github_token_test.go @@ -0,0 +1,25 @@ +package cli + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/openclaw/gitcrawl/internal/config" +) + +func TestResolveGitHubTokenFallsBackToGHAuthToken(t *testing.T) { + dir := t.TempDir() + ghPath := filepath.Join(dir, "gh") + if err := os.WriteFile(ghPath, []byte("#!/bin/sh\nif [ \"$1\" = auth ] && [ \"$2\" = token ]; then echo gh-fallback-token; exit 0; fi\nexit 1\n"), 0o755); err != nil { + t.Fatalf("write fake gh: %v", err) + } + t.Setenv("GITHUB_TOKEN", "") + t.Setenv("GITCRAWL_GH_PATH", ghPath) + + token := New().resolveGitHubToken(context.Background(), config.Default()) + if token.Value != "gh-fallback-token" || token.Source != "gh auth token" { + t.Fatalf("token = %#v", token) + } +}