feat(gh): auto-hydrate PR detail cache

This commit is contained in:
Peter Steinberger 2026-05-05 03:51:27 +01:00
parent d3215f9e42
commit 93290e290f
No known key found for this signature in database
11 changed files with 368 additions and 15 deletions

View File

@ -5,6 +5,7 @@
- Force embedding refreshes when the embedding input rune cap changes, so stale larger-cap vectors are not reused.
- Expand the `gh` shim with local list filters, PR diff caching by cached head SHA, xcache GC, hit/miss/write counters, and throttled portable-store refreshes to reduce GitHub API pressure across agent sessions.
- Add explicit PR-detail hydration for files, commits, checks, and workflow runs so `gh pr view`, `gh pr checks`, and `gh run list/view` can answer common review reads from the existing SQLite cache.
- Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`.
## 0.1.2 - 2026-05-01

View File

@ -48,7 +48,7 @@ gitcrawl tui owner/repo
`gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`.
`gitcrawl sync` fetches open issues and pull requests by default. Pass `--state all` or `--state closed` for explicit backfill workflows; incremental open syncs with `--since` also sweep recently closed items so local open state does not rot.
Pass `--numbers` to refresh exact issue or pull request rows without relying on list ordering or updated-time windows.
Pass `--with pr-details` or `--include-pr-details` to hydrate pull request files, commits, checks, and workflow runs for local review.
Pass `--with pr-details` or `--include-pr-details` to hydrate pull request files, commits, checks, and workflow runs for local review. The `gh` shim can also auto-hydrate one exact PR on a PR-detail miss, then retry locally.
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration.
`gitcrawl gh` is a gh-compatible shim for agent workflows. It answers broad `gh search issues|prs`, `gh issue/pr list`, supported `gh issue/pr view --json` fields, hydrated `gh pr checks`, and hydrated `gh run list/view` from local SQLite, then falls through to the real GitHub CLI for unsupported commands. Local `gh issue/pr list` supports common filters such as `--author`, `--assignee`, and repeated `--label`. Read-only fallthroughs such as `gh pr diff`, `gh repo view/list`, `gh label list`, and GET-only `gh api` calls use a short persistent cache under `cache/gh-shim`; `gh pr diff` entries are keyed by the cached PR head SHA when available. Mutating commands pass through, increment write counters, and clear that cache. `gh xcache stats|keys|gc|flush` inspects, garbage-collects, or clears the fallthrough cache. Set `GITCRAWL_GH_PATH` to choose the backend `gh`, and symlink or install the binary as `gh`/`gitcrawl-gh` to run the shim directly.
The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default, so the first screen is the useful cluster workload instead of singleton noise. Pass `--min-size 1` when you intentionally want singleton clusters. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds.
@ -64,7 +64,7 @@ The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default
## Requirements
- Go 1.26+
- a GitHub token for sync commands
- a GitHub token for sync commands, either via `GITHUB_TOKEN` or `gh auth token`
- an OpenAI API key only for summary and embedding commands
## Install

View File

@ -1789,9 +1789,9 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
if err != nil {
return syncer.Stats{}, err
}
token := config.ResolveGitHubToken(cfg)
token := a.resolveGitHubToken(ctx, cfg)
if token.Value == "" {
return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s", cfg.GitHub.TokenEnv)
return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s or authenticate gh", cfg.GitHub.TokenEnv)
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return syncer.Stats{}, err

View File

@ -119,10 +119,25 @@ func (a *App) runGHThreadView(ctx context.Context, resource string, args []strin
}
thread, err := a.localGHThread(ctx, repoValue, ghResourceKind(resource), number)
if err != nil {
if errors.Is(err, errLocalGHUnsupported) {
if resource == "pr" && a.shouldAutoHydrateGHPRDetails(err) {
owner, repoName, parseErr := parseOwnerRepo(repoValue)
if parseErr != nil {
return localGHUnsupported(parseErr)
}
if _, syncErr := a.syncRepository(ctx, owner, repoName, syncOptions{
Numbers: []int{number},
IncludePRDetails: true,
}); syncErr != nil {
return localGHUnsupported(syncErr)
}
thread, err = a.localGHThread(ctx, repoValue, ghResourceKind(resource), number)
}
if err != nil {
if errors.Is(err, errLocalGHUnsupported) {
return err
}
return err
}
return err
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {

View File

@ -0,0 +1,128 @@
package cli
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"os"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/store"
)
const ghPRDetailFreshness = 90 * time.Second
func (a *App) ensureFreshGHPullRequestCache(ctx context.Context, repoValue string, number int) (store.PullRequestCache, error) {
return a.loadGHPullRequestCache(ctx, repoValue, number, true)
}
func (a *App) loadGHPullRequestCache(ctx context.Context, repoValue string, number int, requireFresh bool) (store.PullRequestCache, error) {
cache, err := a.localGHPullRequestCache(ctx, repoValue, number)
if err == nil && (!requireFresh || ghPullRequestCacheFresh(cache)) {
return cache, nil
}
if !a.shouldAutoHydrateGHPRDetails(err) {
return cache, err
}
owner, repoName, parseErr := parseOwnerRepo(repoValue)
if parseErr != nil {
return store.PullRequestCache{}, parseErr
}
if _, syncErr := a.syncRepository(ctx, owner, repoName, syncOptions{
Numbers: []int{number},
IncludePRDetails: true,
}); syncErr != nil {
return store.PullRequestCache{}, localGHUnsupported(syncErr)
}
return a.localGHPullRequestCache(ctx, repoValue, number)
}
func ghPRFieldsNeedFresh(fields []string) bool {
for _, field := range fields {
switch field {
case "statusCheckRollup", "mergeStateStatus":
return true
}
}
return false
}
func (a *App) shouldAutoHydrateGHPRDetails(err error) bool {
if strings.EqualFold(strings.TrimSpace(os.Getenv("GITCRAWL_GH_AUTO_HYDRATE")), "0") {
return false
}
if err == nil {
return true
}
return isMissingLocalPRCache(err) || errors.Is(err, errLocalGHUnsupported)
}
func ghPullRequestCacheFresh(cache store.PullRequestCache) bool {
if rawHead := ghPRHeadSHAFromRawJSON(cache.Detail.RawJSON); rawHead != "" && !strings.EqualFold(cache.Detail.HeadSHA, rawHead) {
return false
}
parsed, err := time.Parse(time.RFC3339Nano, cache.Detail.FetchedAt)
if err != nil {
return false
}
return time.Since(parsed) <= ghPRDetailFreshness
}
func isMissingLocalPRCache(err error) bool {
if err == nil {
return false
}
return errors.Is(err, sql.ErrNoRows) ||
strings.Contains(err.Error(), "pull request detail") ||
strings.Contains(err.Error(), "was not found")
}
func (a *App) findGHPullRequestNumberByBranch(ctx context.Context, repoValue, branch string) (int, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return 0, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return 0, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return 0, localGHUnsupported(err)
}
threads, err := rt.Store.SearchThreads(ctx, store.ThreadSearchOptions{
RepoID: repo.ID,
Kind: "pull_request",
State: "open",
IncludeLocallyClosed: true,
Limit: 100,
})
if err != nil {
return 0, err
}
for _, thread := range threads {
if branch == ghPRHeadRefFromRawJSON(thread.RawJSON) {
return thread.Number, nil
}
if cache, cacheErr := rt.Store.PullRequestCache(ctx, repo.ID, thread.Number); cacheErr == nil && branch == cache.Detail.HeadRef {
return thread.Number, nil
}
}
return 0, localGHUnsupported(fmt.Errorf("cached PR branch %q was not found", branch))
}
func ghPRHeadRefFromRawJSON(raw string) string {
var payload struct {
Head struct {
Ref string `json:"ref"`
} `json:"head"`
}
if err := json.Unmarshal([]byte(raw), &payload); err != nil {
return ""
}
return strings.TrimSpace(payload.Head.Ref)
}

View File

@ -4,7 +4,12 @@ import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestGHShimViewAndListUseLocalCache(t *testing.T) {
@ -95,3 +100,82 @@ func TestGHShimViewAndListUseLocalCache(t *testing.T) {
t.Fatalf("filtered list = %#v", list)
}
}
func TestGHShimAutoHydratesPRDetailsOnMiss(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
st, err := store.Open(ctx, cfg.DBPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
for _, table := range []string{"pull_request_checks", "pull_request_commits", "pull_request_files", "pull_request_details", "github_workflow_runs", "threads", "repositories"} {
if _, err := st.DB().ExecContext(ctx, "delete from "+table); err != nil {
t.Fatalf("clear %s: %v", table, err)
}
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/repos/openclaw/openclaw":
_ = json.NewEncoder(w).Encode(map[string]any{"id": 123, "open_issues_count": 1})
case "/repos/openclaw/openclaw/issues/12":
_ = json.NewEncoder(w).Encode(map[string]any{
"id": 12, "number": 12, "state": "open", "title": "Manifest cache update",
"body": "", "html_url": "https://github.com/openclaw/openclaw/pull/12",
"labels": []map[string]any{}, "assignees": []map[string]any{},
"user": map[string]any{"login": "bob", "type": "User"},
"pull_request": map[string]any{"url": "https://api.github.test/repos/openclaw/openclaw/pulls/12"},
})
case "/repos/openclaw/openclaw/pulls/12":
_ = json.NewEncoder(w).Encode(map[string]any{
"number": 12, "head": map[string]any{"sha": "auto123", "ref": "auto-branch", "repo": map[string]any{"full_name": "openclaw/openclaw"}},
"base": map[string]any{"sha": "base123"}, "mergeable_state": "clean", "changed_files": 1,
})
case "/repos/openclaw/openclaw/pulls/12/files":
_ = json.NewEncoder(w).Encode([]map[string]any{{"filename": "auto.go", "status": "modified"}})
case "/repos/openclaw/openclaw/pulls/12/commits":
_ = json.NewEncoder(w).Encode([]map[string]any{{"sha": "commit123", "commit": map[string]any{"message": "test"}}})
case "/repos/openclaw/openclaw/commits/auto123/check-runs":
_ = json.NewEncoder(w).Encode(map[string]any{"check_runs": []map[string]any{{"name": "auto-test", "status": "completed", "conclusion": "success"}}})
case "/repos/openclaw/openclaw/actions/runs":
_ = json.NewEncoder(w).Encode(map[string]any{"workflow_runs": []map[string]any{{"id": 12345, "head_branch": "auto-branch", "head_sha": "auto123", "status": "completed", "conclusion": "success", "name": "CI"}}})
default:
t.Fatalf("unexpected request: %s", r.URL.String())
}
}))
defer server.Close()
t.Setenv("GITHUB_TOKEN", "test-token")
t.Setenv("GITCRAWL_GITHUB_BASE_URL", server.URL)
t.Setenv("GITCRAWL_GH_PATH", "/tmp/no-real-gh")
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,files,commits,statusCheckRollup,headRefOid"}); err != nil {
t.Fatalf("auto hydrate view: %v", err)
}
var view map[string]any
if err := json.Unmarshal(stdout.Bytes(), &view); err != nil {
t.Fatalf("decode view: %v\n%s", err, stdout.String())
}
if view["headRefOid"] != "auto123" || len(view["files"].([]any)) != 1 {
t.Fatalf("view = %#v", view)
}
stdout.Reset()
if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "12", "-R", "openclaw/openclaw", "--json", "name,state"}); err != nil {
t.Fatalf("auto hydrate checks: %v", err)
}
var checks []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &checks); err != nil {
t.Fatalf("decode checks: %v\n%s", err, stdout.String())
}
if len(checks) != 1 || checks[0]["name"] != "auto-test" || checks[0]["state"] != "SUCCESS" {
t.Fatalf("checks = %#v", checks)
}
}

View File

@ -27,7 +27,7 @@ func (a *App) ghThreadViewJSONRow(ctx context.Context, repoValue string, thread
return nil, err
}
if cache == nil {
loaded, loadErr := a.localGHPullRequestCache(ctx, repoValue, thread.Number)
loaded, loadErr := a.loadGHPullRequestCache(ctx, repoValue, thread.Number, ghPRFieldsNeedFresh(fields))
if loadErr != nil {
return nil, loadErr
}
@ -167,7 +167,7 @@ func (a *App) runGHPRChecks(ctx context.Context, args []string) error {
if err != nil {
return localGHUnsupported(err)
}
cache, err := a.localGHPullRequestCache(ctx, repoValue, number)
cache, err := a.ensureFreshGHPullRequestCache(ctx, repoValue, number)
if err != nil {
return err
}

View File

@ -41,8 +41,16 @@ func (a *App) runGHRunList(ctx context.Context, args []string) error {
if err != nil {
return localGHUnsupported(err)
}
branch := strings.TrimSpace(*branchRaw)
if branch != "" && strings.TrimSpace(*commitRaw) == "" {
if number, findErr := a.findGHPullRequestNumberByBranch(ctx, repoValue, branch); findErr == nil {
if _, hydrateErr := a.ensureFreshGHPullRequestCache(ctx, repoValue, number); hydrateErr != nil {
return hydrateErr
}
}
}
runs, err := a.localGHWorkflowRuns(ctx, repoValue, store.WorkflowRunListOptions{
Branch: strings.TrimSpace(*branchRaw),
Branch: branch,
HeadSHA: strings.TrimSpace(*commitRaw),
Limit: limit,
})

View File

@ -9,6 +9,7 @@ import (
"strings"
"sync"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/config"
"github.com/openclaw/gitcrawl/internal/store"
@ -417,6 +418,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string {
if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: prID, Title: "Manifest cache update", RawText: "manifest cache refresh", DedupeText: "manifest cache refresh", UpdatedAt: "2026-04-27T02:00:00Z"}); err != nil {
t.Fatalf("seed pr document: %v", err)
}
fetchedAt := time.Now().UTC().Format(time.RFC3339Nano)
if err := st.UpsertPullRequestCache(ctx, store.PullRequestDetail{
ThreadID: prID,
RepoID: repoID,
@ -430,8 +432,8 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string {
Deletions: 2,
ChangedFiles: 1,
RawJSON: `{"head":{"sha":"abc123"}}`,
FetchedAt: "2026-04-27T02:00:00Z",
UpdatedAt: "2026-04-27T02:00:00Z",
FetchedAt: fetchedAt,
UpdatedAt: fetchedAt,
}, []store.PullRequestFile{{
ThreadID: prID,
Path: "internal/cache.go",
@ -440,7 +442,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string {
Deletions: 2,
Changes: 12,
RawJSON: "{}",
FetchedAt: "2026-04-27T02:00:00Z",
FetchedAt: fetchedAt,
}}, []store.PullRequestCommit{{
ThreadID: prID,
SHA: "commit123",
@ -450,7 +452,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string {
CommittedAt: "2026-04-27T01:00:00Z",
HTMLURL: "https://github.com/openclaw/openclaw/commit/commit123",
RawJSON: "{}",
FetchedAt: "2026-04-27T02:00:00Z",
FetchedAt: fetchedAt,
}}, []store.PullRequestCheck{{
ThreadID: prID,
Name: "test",
@ -459,7 +461,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string {
DetailsURL: "https://github.com/openclaw/openclaw/actions/runs/99",
WorkflowName: "CI",
RawJSON: "{}",
FetchedAt: "2026-04-27T02:00:00Z",
FetchedAt: fetchedAt,
}}, []store.WorkflowRun{{
RepoID: repoID,
RunID: "99",
@ -474,7 +476,7 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string {
CreatedAtGH: "2026-04-27T01:00:00Z",
UpdatedAtGH: "2026-04-27T02:00:00Z",
RawJSON: "{}",
FetchedAt: "2026-04-27T02:00:00Z",
FetchedAt: fetchedAt,
}}); err != nil {
t.Fatalf("seed pr cache: %v", err)
}

View File

@ -0,0 +1,90 @@
package cli
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/config"
)
func (a *App) resolveGitHubToken(ctx context.Context, cfg config.Config) config.TokenResolution {
token := config.ResolveGitHubToken(cfg)
if token.Value != "" {
return token
}
if value, err := a.githubAuthToken(ctx); err == nil && value != "" {
return config.TokenResolution{Value: value, Source: "gh auth token"}
}
return token
}
func (a *App) githubAuthToken(ctx context.Context) (string, error) {
candidates := candidateRealGHPaths()
var lastErr error
for _, candidate := range candidates {
if !usableRealGHPath(candidate) {
continue
}
tokenCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
cmd := exec.CommandContext(tokenCtx, candidate, "auth", "token")
out, err := cmd.Output()
cancel()
if err != nil {
lastErr = err
continue
}
if token := strings.TrimSpace(string(out)); token != "" {
return token, nil
}
}
if lastErr != nil {
return "", lastErr
}
return "", fmt.Errorf("real gh not found")
}
func candidateRealGHPaths() []string {
var paths []string
if envPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH")); envPath != "" {
paths = append(paths, envPath)
}
paths = append(paths,
"/opt/homebrew/opt/gh/bin/gh",
"/usr/local/bin/gh",
"/usr/bin/gh",
)
if lookPath, err := exec.LookPath("gh"); err == nil {
paths = append(paths, lookPath)
}
seen := map[string]bool{}
unique := paths[:0]
for _, path := range paths {
if path = strings.TrimSpace(path); path != "" && !seen[path] {
seen[path] = true
unique = append(unique, path)
}
}
return unique
}
func usableRealGHPath(path string) bool {
info, err := os.Stat(path)
if err != nil || info.IsDir() || info.Mode()&0111 == 0 {
return false
}
exe, err := os.Executable()
if err != nil {
return true
}
candidateReal, candidateErr := filepath.EvalSymlinks(path)
exeReal, exeErr := filepath.EvalSymlinks(exe)
if candidateErr == nil && exeErr == nil && candidateReal == exeReal {
return false
}
return true
}

View File

@ -0,0 +1,25 @@
package cli
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/openclaw/gitcrawl/internal/config"
)
func TestResolveGitHubTokenFallsBackToGHAuthToken(t *testing.T) {
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\nif [ \"$1\" = auth ] && [ \"$2\" = token ]; then echo gh-fallback-token; exit 0; fi\nexit 1\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITHUB_TOKEN", "")
t.Setenv("GITCRAWL_GH_PATH", ghPath)
token := New().resolveGitHubToken(context.Background(), config.Default())
if token.Value != "gh-fallback-token" || token.Source != "gh auth token" {
t.Fatalf("token = %#v", token)
}
}