From 7222fef197b41053279e84ab56055e5562f593d2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 5 May 2026 03:26:39 +0100 Subject: [PATCH] feat(gh): cache hydrated PR details --- CHANGELOG.md | 1 + README.md | 4 +- internal/cli/app.go | 62 +++-- internal/cli/gh_shim.go | 38 ++- internal/cli/gh_shim_cache.go | 16 +- internal/cli/gh_shim_detail_test.go | 97 ++++++++ internal/cli/gh_shim_helpers_test.go | 27 ++ internal/cli/gh_shim_prcache.go | 218 ++++++++++++++++ internal/cli/gh_shim_runs.go | 161 ++++++++++++ internal/cli/gh_shim_test.go | 114 +++++---- internal/github/client.go | 45 ++++ internal/github/client_test.go | 18 +- internal/store/pull_requests.go | 358 +++++++++++++++++++++++++++ internal/store/schema.go | 83 +++++++ internal/syncer/pull_details.go | 117 +++++++++ internal/syncer/pull_details_more.go | 65 +++++ internal/syncer/syncer.go | 38 ++- internal/syncer/syncer_test.go | 117 +++++++++ 18 files changed, 1504 insertions(+), 75 deletions(-) create mode 100644 internal/cli/gh_shim_detail_test.go create mode 100644 internal/cli/gh_shim_helpers_test.go create mode 100644 internal/cli/gh_shim_prcache.go create mode 100644 internal/cli/gh_shim_runs.go create mode 100644 internal/store/pull_requests.go create mode 100644 internal/syncer/pull_details.go create mode 100644 internal/syncer/pull_details_more.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 1639553..598060e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Force embedding refreshes when the embedding input rune cap changes, so stale larger-cap vectors are not reused. - Expand the `gh` shim with local list filters, PR diff caching by cached head SHA, xcache GC, hit/miss/write counters, and throttled portable-store refreshes to reduce GitHub API pressure across agent sessions. +- Add explicit PR-detail hydration for files, commits, checks, and workflow runs so `gh pr view`, `gh pr checks`, and `gh run list/view` can answer common review reads from the existing SQLite cache. ## 0.1.2 - 2026-05-01 diff --git a/README.md b/README.md index fb8486e..31aeb7a 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ gitcrawl search owner/repo --query "download stalls" gitcrawl search issues "download stalls" -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30 gitcrawl search prs "manifest cache" -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20 gitcrawl search issues "hot loop" -R owner/repo --state open --sync-if-stale 5m --json number,title,url +gitcrawl sync owner/repo --numbers 123 --with pr-details gitcrawl gh search issues "download stalls" -R owner/repo --state open --match comments --json number,title,url gitcrawl gh pr view 123 -R owner/repo --json number,title,state,url gitcrawl gh run view 123456789 -R owner/repo --json status,conclusion @@ -47,8 +48,9 @@ gitcrawl tui owner/repo `gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`. `gitcrawl sync` fetches open issues and pull requests by default. Pass `--state all` or `--state closed` for explicit backfill workflows; incremental open syncs with `--since` also sweep recently closed items so local open state does not rot. Pass `--numbers` to refresh exact issue or pull request rows without relying on list ordering or updated-time windows. +Pass `--with pr-details` or `--include-pr-details` to hydrate pull request files, commits, checks, and workflow runs for local review. `gitcrawl search issues|prs` accepts the common `gh search` shape (` -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration. -`gitcrawl gh` is a gh-compatible shim for agent workflows. It answers broad `gh search issues|prs`, `gh issue/pr list`, and supported `gh issue/pr view --json` fields from local SQLite, then falls through to the real GitHub CLI for unsupported commands. Local `gh issue/pr list` supports common filters such as `--author`, `--assignee`, and repeated `--label`. Read-only fallthroughs such as `gh run list/view`, `gh pr diff/checks`, `gh repo view/list`, `gh label list`, and GET-only `gh api` calls use a short persistent cache under `cache/gh-shim`; `gh pr diff` entries are keyed by the cached PR head SHA when available. Mutating commands pass through, increment write counters, and clear that cache. `gh xcache stats|keys|gc|flush` inspects, garbage-collects, or clears the fallthrough cache. Set `GITCRAWL_GH_PATH` to choose the backend `gh`, and symlink or install the binary as `gh`/`gitcrawl-gh` to run the shim directly. +`gitcrawl gh` is a gh-compatible shim for agent workflows. It answers broad `gh search issues|prs`, `gh issue/pr list`, supported `gh issue/pr view --json` fields, hydrated `gh pr checks`, and hydrated `gh run list/view` from local SQLite, then falls through to the real GitHub CLI for unsupported commands. Local `gh issue/pr list` supports common filters such as `--author`, `--assignee`, and repeated `--label`. Read-only fallthroughs such as `gh pr diff`, `gh repo view/list`, `gh label list`, and GET-only `gh api` calls use a short persistent cache under `cache/gh-shim`; `gh pr diff` entries are keyed by the cached PR head SHA when available. Mutating commands pass through, increment write counters, and clear that cache. `gh xcache stats|keys|gc|flush` inspects, garbage-collects, or clears the fallthrough cache. Set `GITCRAWL_GH_PATH` to choose the backend `gh`, and symlink or install the binary as `gh`/`gitcrawl-gh` to run the shim directly. The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default, so the first screen is the useful cluster workload instead of singleton noise. Pass `--min-size 1` when you intentionally want singleton clusters. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds. ## Local Defaults diff --git a/internal/cli/app.go b/internal/cli/app.go index c2449c8..1453542 100644 --- a/internal/cli/app.go +++ b/internal/cli/app.go @@ -1717,8 +1717,10 @@ func (a *App) runSync(ctx context.Context, args []string) error { limitRaw := fs.String("limit", "", "maximum issue/PR rows") jsonOut := fs.Bool("json", false, "write JSON output") includeComments := fs.Bool("include-comments", false, "hydrate issue comments, PR reviews, and PR review comments") + includePRDetails := fs.Bool("include-pr-details", false, "hydrate PR files, commits, checks, and workflow runs") + withRaw := fs.String("with", "", "extra hydration: pr-details") fs.Bool("include-code", false, "accepted for compatibility; code hydration is not implemented yet") - if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "since": true, "state": true, "limit": true})); err != nil { + if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "since": true, "state": true, "limit": true, "with": true})); err != nil { return usageErr(err) } a.applyCommandJSON(*jsonOut) @@ -1737,13 +1739,18 @@ func (a *App) runSync(ctx context.Context, args []string) error { if err != nil { return usageErr(err) } + with, err := parseSyncWith(*withRaw) + if err != nil { + return usageErr(err) + } stats, err := a.syncRepository(ctx, owner, repo, syncOptions{ - Since: strings.TrimSpace(*since), - State: strings.TrimSpace(*state), - Limit: limit, - Numbers: numbers, - IncludeComments: *includeComments, + Since: strings.TrimSpace(*since), + State: strings.TrimSpace(*state), + Limit: limit, + Numbers: numbers, + IncludeComments: *includeComments, + IncludePRDetails: *includePRDetails || with["pr-details"], }) if err != nil { return err @@ -1752,11 +1759,29 @@ func (a *App) runSync(ctx context.Context, args []string) error { } type syncOptions struct { - Since string - State string - Limit int - Numbers []int - IncludeComments bool + Since string + State string + Limit int + Numbers []int + IncludeComments bool + IncludePRDetails bool +} + +func parseSyncWith(value string) (map[string]bool, error) { + out := map[string]bool{} + for _, part := range strings.Split(value, ",") { + name := strings.TrimSpace(part) + if name == "" { + continue + } + switch name { + case "pr-details": + out[name] = true + default: + return nil, fmt.Errorf("unsupported --with value %q", name) + } + } + return out, nil } func (a *App) syncRepository(ctx context.Context, owner, repo string, options syncOptions) (syncer.Stats, error) { @@ -1780,13 +1805,14 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy client := gh.New(gh.Options{Token: token.Value, BaseURL: githubBaseURL()}) service := syncer.New(client, st) stats, err := service.Sync(ctx, syncer.Options{ - Owner: owner, - Repo: repo, - State: strings.TrimSpace(options.State), - Since: strings.TrimSpace(options.Since), - Limit: options.Limit, - Numbers: options.Numbers, - IncludeComments: options.IncludeComments, + Owner: owner, + Repo: repo, + State: strings.TrimSpace(options.State), + Since: strings.TrimSpace(options.Since), + Limit: options.Limit, + Numbers: options.Numbers, + IncludeComments: options.IncludeComments, + IncludePRDetails: options.IncludePRDetails, Reporter: func(message string) { fmt.Fprintln(a.Stderr, message) }, diff --git a/internal/cli/gh_shim.go b/internal/cli/gh_shim.go index 0ee70ca..cbdf44f 100644 --- a/internal/cli/gh_shim.go +++ b/internal/cli/gh_shim.go @@ -47,6 +47,17 @@ func (a *App) runGHShim(ctx context.Context, args []string) error { } _ = a.incrementGHXCacheCounter("local_hits") return nil + case "checks": + if args[0] == "pr" { + if err := a.runGHPRChecks(ctx, args[2:]); err != nil { + if isLocalGHUnsupported(err) { + return a.execRealGHMaybeCached(ctx, args) + } + return err + } + _ = a.incrementGHXCacheCounter("local_hits") + return nil + } case "list": if err := a.runGHThreadList(ctx, args[0], args[2:]); err != nil { if isLocalGHUnsupported(err) { @@ -58,6 +69,29 @@ func (a *App) runGHShim(ctx context.Context, args []string) error { return nil } } + case "run": + if len(args) >= 2 { + switch args[1] { + case "list": + if err := a.runGHRunList(ctx, args[2:]); err != nil { + if isLocalGHUnsupported(err) { + return a.execRealGHMaybeCached(ctx, args) + } + return err + } + _ = a.incrementGHXCacheCounter("local_hits") + return nil + case "view": + if err := a.runGHRunView(ctx, args[2:]); err != nil { + if isLocalGHUnsupported(err) { + return a.execRealGHMaybeCached(ctx, args) + } + return err + } + _ = a.incrementGHXCacheCounter("local_hits") + return nil + } + } } return a.execRealGHMaybeCached(ctx, args) } @@ -95,11 +129,11 @@ func (a *App) runGHThreadView(ctx context.Context, resource string, args []strin if jsonFields == "" { jsonFields = "number,title,state,url" } - rows, err := ghSearchJSONRows([]store.Thread{thread}, jsonFields) + row, err := a.ghThreadViewJSONRow(ctx, repoValue, thread, jsonFields) if err != nil { return localGHUnsupported(err) } - return a.writeJSONValue(rows[0], strings.TrimSpace(*jqRaw)) + return a.writeJSONValue(row, strings.TrimSpace(*jqRaw)) } _, err = fmt.Fprintf(a.Stdout, "title:\t%s\nstate:\t%s\nurl:\t%s\n\n%s\n", thread.Title, thread.State, thread.HTMLURL, strings.TrimSpace(thread.Body)) return err diff --git a/internal/cli/gh_shim_cache.go b/internal/cli/gh_shim_cache.go index 15ced16..4697c5f 100644 --- a/internal/cli/gh_shim_cache.go +++ b/internal/cli/gh_shim_cache.go @@ -263,7 +263,21 @@ func (a *App) ghCommandStableIdentity(ctx context.Context, args []string) string if err != nil { return "" } - sha := ghPRHeadSHAFromRawJSON(thread.RawJSON) + sha := "" + owner, repoName, err := parseOwnerRepo(repo) + if err == nil { + if rt, openErr := a.openLocalRuntimeReadOnly(ctx); openErr == nil { + if localRepo, repoErr := rt.repository(ctx, owner, repoName); repoErr == nil { + if cache, cacheErr := rt.Store.PullRequestCache(ctx, localRepo.ID, number); cacheErr == nil { + sha = cache.Detail.HeadSHA + } + } + _ = rt.Store.Close() + } + } + if sha == "" { + sha = ghPRHeadSHAFromRawJSON(thread.RawJSON) + } if sha == "" { return "" } diff --git a/internal/cli/gh_shim_detail_test.go b/internal/cli/gh_shim_detail_test.go new file mode 100644 index 0000000..304c806 --- /dev/null +++ b/internal/cli/gh_shim_detail_test.go @@ -0,0 +1,97 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "testing" +) + +func TestGHShimViewAndListUseLocalCache(t *testing.T) { + ctx := context.Background() + configPath := seedGHShimRepo(t, ctx) + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,title,isDraft,author"}); err != nil { + t.Fatalf("gh pr view: %v", err) + } + var view map[string]any + if err := json.Unmarshal(stdout.Bytes(), &view); err != nil { + t.Fatalf("decode view: %v\n%s", err, stdout.String()) + } + if int(view["number"].(float64)) != 12 || view["isDraft"] != true { + t.Fatalf("view = %#v", view) + } + + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,files,commits,statusCheckRollup,headRefOid,headRefName"}); err != nil { + t.Fatalf("gh pr rich view: %v", err) + } + if err := json.Unmarshal(stdout.Bytes(), &view); err != nil { + t.Fatalf("decode rich view: %v\n%s", err, stdout.String()) + } + if view["headRefOid"] != "abc123" || len(view["files"].([]any)) != 1 || len(view["commits"].([]any)) != 1 { + t.Fatalf("rich view = %#v", view) + } + + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "checks", "12", "-R", "openclaw/openclaw", "--json", "name,state,detailsUrl,workflow"}); err != nil { + t.Fatalf("gh pr checks: %v", err) + } + var checks []map[string]any + if err := json.Unmarshal(stdout.Bytes(), &checks); err != nil { + t.Fatalf("decode checks: %v\n%s", err, stdout.String()) + } + if len(checks) != 1 || checks[0]["name"] != "test" || checks[0]["state"] != "SUCCESS" { + t.Fatalf("checks = %#v", checks) + } + + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "run", "list", "-R", "openclaw/openclaw", "--branch", "manifest-cache", "--json", "databaseId,workflowName,status,conclusion,headSha"}); err != nil { + t.Fatalf("gh run list: %v", err) + } + var runs []map[string]any + if err := json.Unmarshal(stdout.Bytes(), &runs); err != nil { + t.Fatalf("decode runs: %v\n%s", err, stdout.String()) + } + if len(runs) != 1 || int(runs[0]["databaseId"].(float64)) != 99 || runs[0]["headSha"] != "abc123" { + t.Fatalf("runs = %#v", runs) + } + + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "run", "view", "99", "-R", "openclaw/openclaw", "--json", "databaseId,url"}); err != nil { + t.Fatalf("gh run view: %v", err) + } + var runView map[string]any + if err := json.Unmarshal(stdout.Bytes(), &runView); err != nil { + t.Fatalf("decode run view: %v\n%s", err, stdout.String()) + } + if int(runView["databaseId"].(float64)) != 99 { + t.Fatalf("run view = %#v", runView) + } + + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number,title"}); err != nil { + t.Fatalf("gh issue list: %v", err) + } + var list []map[string]any + if err := json.Unmarshal(stdout.Bytes(), &list); err != nil { + t.Fatalf("decode list: %v\n%s", err, stdout.String()) + } + if len(list) != 1 || int(list[0]["number"].(float64)) != 10 { + t.Fatalf("list = %#v", list) + } + + stdout.Reset() + if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--author", "alice", "--assignee", "peter", "--label", "bug", "--json", "number,title"}); err != nil { + t.Fatalf("gh issue list filtered: %v", err) + } + if err := json.Unmarshal(stdout.Bytes(), &list); err != nil { + t.Fatalf("decode filtered list: %v\n%s", err, stdout.String()) + } + if len(list) != 1 || int(list[0]["number"].(float64)) != 10 { + t.Fatalf("filtered list = %#v", list) + } +} diff --git a/internal/cli/gh_shim_helpers_test.go b/internal/cli/gh_shim_helpers_test.go new file mode 100644 index 0000000..a41df7f --- /dev/null +++ b/internal/cli/gh_shim_helpers_test.go @@ -0,0 +1,27 @@ +package cli + +import ( + "context" + "testing" + + "github.com/openclaw/gitcrawl/internal/store" +) + +func prIDForTest(t *testing.T, ctx context.Context, st *store.Store, repoID int64, number int) int64 { + t.Helper() + threads, err := st.ListThreadsFiltered(ctx, store.ThreadListOptions{ + RepoID: repoID, + IncludeClosed: true, + Numbers: []int{number}, + }) + if err != nil { + t.Fatalf("list PR for test: %v", err) + } + for _, thread := range threads { + if thread.Number == number && thread.Kind == "pull_request" { + return thread.ID + } + } + t.Fatalf("missing PR #%d", number) + return 0 +} diff --git a/internal/cli/gh_shim_prcache.go b/internal/cli/gh_shim_prcache.go new file mode 100644 index 0000000..f534c54 --- /dev/null +++ b/internal/cli/gh_shim_prcache.go @@ -0,0 +1,218 @@ +package cli + +import ( + "context" + "flag" + "fmt" + "io" + "strings" + + "github.com/openclaw/gitcrawl/internal/store" +) + +func (a *App) ghThreadViewJSONRow(ctx context.Context, repoValue string, thread store.Thread, fieldsRaw string) (map[string]any, error) { + fields := parseJSONFields(fieldsRaw) + if len(fields) == 0 { + return nil, fmt.Errorf("--json requires at least one field") + } + row := make(map[string]any, len(fields)) + var cache *store.PullRequestCache + for _, field := range fields { + value, err := ghSearchJSONValue(thread, field) + if err == nil { + row[field] = value + continue + } + if thread.Kind != "pull_request" { + return nil, err + } + if cache == nil { + loaded, loadErr := a.localGHPullRequestCache(ctx, repoValue, thread.Number) + if loadErr != nil { + return nil, loadErr + } + cache = &loaded + } + value, err = ghPRDetailJSONValue(thread, *cache, field) + if err != nil { + return nil, err + } + row[field] = value + } + return row, nil +} + +func (a *App) localGHPullRequestCache(ctx context.Context, repoValue string, number int) (store.PullRequestCache, error) { + owner, repoName, err := parseOwnerRepo(repoValue) + if err != nil { + return store.PullRequestCache{}, err + } + rt, err := a.openLocalRuntimeReadOnly(ctx) + if err != nil { + return store.PullRequestCache{}, localGHUnsupported(err) + } + defer rt.Store.Close() + repo, err := rt.repository(ctx, owner, repoName) + if err != nil { + return store.PullRequestCache{}, localGHUnsupported(err) + } + cache, err := rt.Store.PullRequestCache(ctx, repo.ID, number) + if err != nil { + return store.PullRequestCache{}, localGHUnsupported(err) + } + return cache, nil +} + +func ghPRDetailJSONValue(thread store.Thread, cache store.PullRequestCache, field string) (any, error) { + switch field { + case "files": + files := make([]map[string]any, 0, len(cache.Files)) + for _, file := range cache.Files { + files = append(files, map[string]any{ + "path": file.Path, + "additions": file.Additions, + "deletions": file.Deletions, + "status": file.Status, + }) + } + return files, nil + case "commits": + commits := make([]map[string]any, 0, len(cache.Commits)) + for _, commit := range cache.Commits { + headline := commit.Message + if index := strings.IndexByte(headline, '\n'); index >= 0 { + headline = headline[:index] + } + commits = append(commits, map[string]any{ + "oid": commit.SHA, + "messageHeadline": headline, + "messageBody": commit.Message, + "authoredDate": commit.CommittedAt, + "url": commit.HTMLURL, + "authors": []map[string]any{{ + "login": commit.AuthorLogin, + "name": commit.AuthorName, + }}, + }) + } + return commits, nil + case "statusCheckRollup": + return ghStatusCheckRollup(cache.Checks), nil + case "headRefName": + return cache.Detail.HeadRef, nil + case "headRefOid": + return cache.Detail.HeadSHA, nil + case "baseRefOid": + return cache.Detail.BaseSHA, nil + case "headRepositoryOwner": + owner := strings.Split(cache.Detail.HeadRepoFullName, "/")[0] + return map[string]any{"login": owner}, nil + case "headRepository": + return map[string]any{"nameWithOwner": cache.Detail.HeadRepoFullName}, nil + case "mergeStateStatus": + return strings.ToUpper(cache.Detail.MergeableState), nil + case "additions": + return cache.Detail.Additions, nil + case "deletions": + return cache.Detail.Deletions, nil + case "changedFiles": + return cache.Detail.ChangedFiles, nil + case "isDraft": + return thread.IsDraft, nil + default: + return nil, fmt.Errorf("unsupported --json field %q", field) + } +} + +func ghStatusCheckRollup(checks []store.PullRequestCheck) []map[string]any { + out := make([]map[string]any, 0, len(checks)) + for _, check := range checks { + state := strings.ToUpper(firstNonEmpty(check.Conclusion, check.Status)) + out = append(out, map[string]any{ + "__typename": "CheckRun", + "name": check.Name, + "status": strings.ToUpper(check.Status), + "conclusion": strings.ToUpper(check.Conclusion), + "state": state, + "detailsUrl": check.DetailsURL, + "workflowName": check.WorkflowName, + "startedAt": check.StartedAt, + "completedAt": check.CompletedAt, + }) + } + return out +} + +func (a *App) runGHPRChecks(ctx context.Context, args []string) error { + if hasAnyGHFlag(args, "--watch", "--web") { + return localGHUnsupported(fmt.Errorf("interactive PR checks flags require live gh")) + } + fs := flag.NewFlagSet("pr checks", flag.ContinueOnError) + fs.SetOutput(io.Discard) + repoShort := fs.String("R", "", "repository") + repoLong := fs.String("repo", "", "repository") + jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields") + jqRaw := fs.String("jq", "", "jq filter") + if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"R": true, "repo": true, "json": true, "jq": true})); err != nil { + return usageErr(err) + } + if fs.NArg() != 1 { + return usageErr(fmt.Errorf("gh pr checks requires a number")) + } + number, err := parseThreadNumber(fs.Arg(0)) + if err != nil { + return usageErr(err) + } + repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong)) + if err != nil { + return localGHUnsupported(err) + } + cache, err := a.localGHPullRequestCache(ctx, repoValue, number) + if err != nil { + return err + } + if len(cache.Checks) == 0 { + return localGHUnsupported(fmt.Errorf("cached PR checks are empty")) + } + if strings.TrimSpace(*jsonFieldsRaw) != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON { + fields := firstNonEmpty(strings.TrimSpace(*jsonFieldsRaw), "name,state,conclusion,detailsUrl,workflow") + rows := ghPRChecksJSONRows(cache.Checks, fields) + return a.writeJSONValue(rows, strings.TrimSpace(*jqRaw)) + } + for _, check := range cache.Checks { + if _, err := fmt.Fprintf(a.Stdout, "%s\t%s\t%s\t%s\n", check.Name, check.Status, check.Conclusion, check.DetailsURL); err != nil { + return err + } + } + return nil +} + +func ghPRChecksJSONRows(checks []store.PullRequestCheck, fieldsRaw string) []map[string]any { + fields := parseJSONFields(fieldsRaw) + rows := make([]map[string]any, 0, len(checks)) + for _, check := range checks { + row := make(map[string]any, len(fields)) + for _, field := range fields { + switch field { + case "name": + row[field] = check.Name + case "state": + row[field] = strings.ToUpper(firstNonEmpty(check.Conclusion, check.Status)) + case "status": + row[field] = check.Status + case "conclusion": + row[field] = check.Conclusion + case "detailsUrl", "link": + row[field] = check.DetailsURL + case "workflow": + row[field] = check.WorkflowName + case "startedAt": + row[field] = check.StartedAt + case "completedAt": + row[field] = check.CompletedAt + } + } + rows = append(rows, row) + } + return rows +} diff --git a/internal/cli/gh_shim_runs.go b/internal/cli/gh_shim_runs.go new file mode 100644 index 0000000..fe5880f --- /dev/null +++ b/internal/cli/gh_shim_runs.go @@ -0,0 +1,161 @@ +package cli + +import ( + "context" + "flag" + "fmt" + "io" + "strconv" + "strings" + + "github.com/openclaw/gitcrawl/internal/store" +) + +func (a *App) runGHRunList(ctx context.Context, args []string) error { + if hasAnyGHFlag(args, "--web") { + return localGHUnsupported(fmt.Errorf("web workflow run flags require live gh")) + } + fs := flag.NewFlagSet("run list", flag.ContinueOnError) + fs.SetOutput(io.Discard) + repoShort := fs.String("R", "", "repository") + repoLong := fs.String("repo", "", "repository") + branchRaw := fs.String("branch", "", "branch") + commitRaw := fs.String("commit", "", "head sha") + limitRaw := fs.String("limit", "", "maximum rows") + limitShortRaw := fs.String("L", "", "maximum rows") + jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields") + jqRaw := fs.String("jq", "", "jq filter") + if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{ + "R": true, "repo": true, "branch": true, "commit": true, "limit": true, "L": true, "json": true, "jq": true, + })); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { + return usageErr(fmt.Errorf("unexpected gh run list arguments: %s", strings.Join(fs.Args(), " "))) + } + limit, err := parseGHSearchLimit(*limitRaw, *limitShortRaw) + if err != nil { + return usageErr(err) + } + repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong)) + if err != nil { + return localGHUnsupported(err) + } + runs, err := a.localGHWorkflowRuns(ctx, repoValue, store.WorkflowRunListOptions{ + Branch: strings.TrimSpace(*branchRaw), + HeadSHA: strings.TrimSpace(*commitRaw), + Limit: limit, + }) + if err != nil { + return err + } + if len(runs) == 0 { + return localGHUnsupported(fmt.Errorf("no cached workflow runs")) + } + if strings.TrimSpace(*jsonFieldsRaw) != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON { + fields := firstNonEmpty(strings.TrimSpace(*jsonFieldsRaw), "databaseId,workflowName,status,conclusion,url,createdAt,updatedAt") + return a.writeJSONValue(ghWorkflowRunJSONRows(runs, fields), strings.TrimSpace(*jqRaw)) + } + for _, run := range runs { + if _, err := fmt.Fprintf(a.Stdout, "%s\t%s\t%s\t%s\n", run.RunID, run.WorkflowName, run.Status, run.HTMLURL); err != nil { + return err + } + } + return nil +} + +func (a *App) runGHRunView(ctx context.Context, args []string) error { + if hasAnyGHFlag(args, "--web", "--log", "--log-failed") { + return localGHUnsupported(fmt.Errorf("workflow run logs require live gh")) + } + fs := flag.NewFlagSet("run view", flag.ContinueOnError) + fs.SetOutput(io.Discard) + repoShort := fs.String("R", "", "repository") + repoLong := fs.String("repo", "", "repository") + jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields") + jqRaw := fs.String("jq", "", "jq filter") + if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"R": true, "repo": true, "json": true, "jq": true})); err != nil { + return usageErr(err) + } + if fs.NArg() != 1 { + return usageErr(fmt.Errorf("gh run view requires a run id")) + } + runID := strings.TrimSpace(fs.Arg(0)) + repoValue, err := a.resolveGHRepo(ctx, firstNonEmpty(*repoShort, *repoLong)) + if err != nil { + return localGHUnsupported(err) + } + runs, err := a.localGHWorkflowRuns(ctx, repoValue, store.WorkflowRunListOptions{Limit: 100}) + if err != nil { + return err + } + for _, run := range runs { + if run.RunID != runID { + continue + } + if strings.TrimSpace(*jsonFieldsRaw) != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON { + fields := firstNonEmpty(strings.TrimSpace(*jsonFieldsRaw), "databaseId,workflowName,status,conclusion,url,createdAt,updatedAt") + return a.writeJSONValue(ghWorkflowRunJSONRows([]store.WorkflowRun{run}, fields)[0], strings.TrimSpace(*jqRaw)) + } + _, err := fmt.Fprintf(a.Stdout, "run: %s\nworkflow: %s\nstatus: %s\nurl: %s\n", run.RunID, run.WorkflowName, run.Status, run.HTMLURL) + return err + } + return localGHUnsupported(fmt.Errorf("cached workflow run %s was not found", runID)) +} + +func (a *App) localGHWorkflowRuns(ctx context.Context, repoValue string, options store.WorkflowRunListOptions) ([]store.WorkflowRun, error) { + owner, repoName, err := parseOwnerRepo(repoValue) + if err != nil { + return nil, err + } + rt, err := a.openLocalRuntimeReadOnly(ctx) + if err != nil { + return nil, localGHUnsupported(err) + } + defer rt.Store.Close() + repo, err := rt.repository(ctx, owner, repoName) + if err != nil { + return nil, localGHUnsupported(err) + } + return rt.Store.ListWorkflowRuns(ctx, repo.ID, options) +} + +func ghWorkflowRunJSONRows(runs []store.WorkflowRun, fieldsRaw string) []map[string]any { + fields := parseJSONFields(fieldsRaw) + rows := make([]map[string]any, 0, len(runs)) + for _, run := range runs { + row := make(map[string]any, len(fields)) + for _, field := range fields { + switch field { + case "databaseId", "id": + if id, err := strconv.ParseInt(run.RunID, 10, 64); err == nil { + row[field] = id + } else { + row[field] = run.RunID + } + case "number": + row[field] = run.RunNumber + case "workflowName", "name", "displayTitle": + row[field] = run.WorkflowName + case "status": + row[field] = run.Status + case "conclusion": + row[field] = run.Conclusion + case "url": + row[field] = run.HTMLURL + case "event": + row[field] = run.Event + case "headBranch": + row[field] = run.HeadBranch + case "headSha": + row[field] = run.HeadSHA + case "createdAt": + row[field] = run.CreatedAtGH + case "updatedAt": + row[field] = run.UpdatedAtGH + } + } + rows = append(rows, row) + } + return rows +} diff --git a/internal/cli/gh_shim_test.go b/internal/cli/gh_shim_test.go index 2d7f50e..ec19c73 100644 --- a/internal/cli/gh_shim_test.go +++ b/internal/cli/gh_shim_test.go @@ -43,48 +43,6 @@ func TestGHShimSearchAcceptsGHFlags(t *testing.T) { } } -func TestGHShimViewAndListUseLocalCache(t *testing.T) { - ctx := context.Background() - configPath := seedGHShimRepo(t, ctx) - - run := New() - var stdout bytes.Buffer - run.Stdout = &stdout - if err := run.Run(ctx, []string{"--config", configPath, "gh", "pr", "view", "12", "-R", "openclaw/openclaw", "--json", "number,title,isDraft,author"}); err != nil { - t.Fatalf("gh pr view: %v", err) - } - var view map[string]any - if err := json.Unmarshal(stdout.Bytes(), &view); err != nil { - t.Fatalf("decode view: %v\n%s", err, stdout.String()) - } - if int(view["number"].(float64)) != 12 || view["isDraft"] != true { - t.Fatalf("view = %#v", view) - } - - stdout.Reset() - if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number,title"}); err != nil { - t.Fatalf("gh issue list: %v", err) - } - var list []map[string]any - if err := json.Unmarshal(stdout.Bytes(), &list); err != nil { - t.Fatalf("decode list: %v\n%s", err, stdout.String()) - } - if len(list) != 1 || int(list[0]["number"].(float64)) != 10 { - t.Fatalf("list = %#v", list) - } - - stdout.Reset() - if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--author", "alice", "--assignee", "peter", "--label", "bug", "--json", "number,title"}); err != nil { - t.Fatalf("gh issue list filtered: %v", err) - } - if err := json.Unmarshal(stdout.Bytes(), &list); err != nil { - t.Fatalf("decode filtered list: %v\n%s", err, stdout.String()) - } - if len(list) != 1 || int(list[0]["number"].(float64)) != 10 { - t.Fatalf("filtered list = %#v", list) - } -} - func TestGHShimFallsBackForUnsupportedRead(t *testing.T) { ctx := context.Background() configPath := seedGHShimRepo(t, ctx) @@ -254,6 +212,17 @@ echo "diff-$count:$*" }); err != nil { t.Fatalf("update pr head: %v", err) } + if err := st.UpsertPullRequestCache(ctx, store.PullRequestDetail{ + ThreadID: prIDForTest(t, ctx, st, repo.ID, 12), + RepoID: repo.ID, + Number: 12, + HeadSHA: "def456", + RawJSON: `{"head":{"sha":"def456"}}`, + FetchedAt: "2026-04-27T03:00:00Z", + UpdatedAt: "2026-04-27T03:00:00Z", + }, nil, nil, nil, nil); err != nil { + t.Fatalf("update pr cache head: %v", err) + } if err := st.Close(); err != nil { t.Fatalf("close store: %v", err) } @@ -448,6 +417,67 @@ func seedGHShimRepo(t *testing.T, ctx context.Context) string { if _, err := st.UpsertDocument(ctx, store.Document{ThreadID: prID, Title: "Manifest cache update", RawText: "manifest cache refresh", DedupeText: "manifest cache refresh", UpdatedAt: "2026-04-27T02:00:00Z"}); err != nil { t.Fatalf("seed pr document: %v", err) } + if err := st.UpsertPullRequestCache(ctx, store.PullRequestDetail{ + ThreadID: prID, + RepoID: repoID, + Number: 12, + BaseSHA: "base123", + HeadSHA: "abc123", + HeadRef: "manifest-cache", + HeadRepoFullName: "openclaw/openclaw", + MergeableState: "clean", + Additions: 10, + Deletions: 2, + ChangedFiles: 1, + RawJSON: `{"head":{"sha":"abc123"}}`, + FetchedAt: "2026-04-27T02:00:00Z", + UpdatedAt: "2026-04-27T02:00:00Z", + }, []store.PullRequestFile{{ + ThreadID: prID, + Path: "internal/cache.go", + Status: "modified", + Additions: 10, + Deletions: 2, + Changes: 12, + RawJSON: "{}", + FetchedAt: "2026-04-27T02:00:00Z", + }}, []store.PullRequestCommit{{ + ThreadID: prID, + SHA: "commit123", + Message: "feat: cache", + AuthorLogin: "alice", + AuthorName: "Alice", + CommittedAt: "2026-04-27T01:00:00Z", + HTMLURL: "https://github.com/openclaw/openclaw/commit/commit123", + RawJSON: "{}", + FetchedAt: "2026-04-27T02:00:00Z", + }}, []store.PullRequestCheck{{ + ThreadID: prID, + Name: "test", + Status: "completed", + Conclusion: "success", + DetailsURL: "https://github.com/openclaw/openclaw/actions/runs/99", + WorkflowName: "CI", + RawJSON: "{}", + FetchedAt: "2026-04-27T02:00:00Z", + }}, []store.WorkflowRun{{ + RepoID: repoID, + RunID: "99", + RunNumber: 7, + HeadBranch: "manifest-cache", + HeadSHA: "abc123", + Status: "completed", + Conclusion: "success", + WorkflowName: "CI", + Event: "pull_request", + HTMLURL: "https://github.com/openclaw/openclaw/actions/runs/99", + CreatedAtGH: "2026-04-27T01:00:00Z", + UpdatedAtGH: "2026-04-27T02:00:00Z", + RawJSON: "{}", + FetchedAt: "2026-04-27T02:00:00Z", + }}); err != nil { + t.Fatalf("seed pr cache: %v", err) + } if err := st.Close(); err != nil { t.Fatalf("close store: %v", err) } diff --git a/internal/github/client.go b/internal/github/client.go index f9c5457..0c8e168 100644 --- a/internal/github/client.go +++ b/internal/github/client.go @@ -37,6 +37,12 @@ type ListIssuesOptions struct { ExpectedTotal int } +type ListWorkflowRunsOptions struct { + Branch string + HeadSHA string + Limit int +} + type RequestError struct { Method string URL string @@ -137,6 +143,45 @@ func (c *Client) ListPullFiles(ctx context.Context, owner, repo string, number i return c.paginate(ctx, path, 0, 0, reporter) } +func (c *Client) ListPullCommits(ctx context.Context, owner, repo string, number int, reporter Reporter) ([]map[string]any, error) { + path := fmt.Sprintf("/repos/%s/%s/pulls/%d/commits?per_page=100", pathEscape(owner), pathEscape(repo), number) + return c.paginate(ctx, path, 0, 0, reporter) +} + +func (c *Client) ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter Reporter) ([]map[string]any, error) { + var payload struct { + CheckRuns []map[string]any `json:"check_runs"` + } + path := fmt.Sprintf("/repos/%s/%s/commits/%s/check-runs?per_page=100", pathEscape(owner), pathEscape(repo), pathEscape(ref)) + if err := c.doJSON(ctx, http.MethodGet, path, nil, reporter, &payload); err != nil { + return nil, err + } + return payload.CheckRuns, nil +} + +func (c *Client) ListWorkflowRuns(ctx context.Context, owner, repo string, options ListWorkflowRunsOptions, reporter Reporter) ([]map[string]any, error) { + values := url.Values{} + values.Set("per_page", "100") + if options.Branch != "" { + values.Set("branch", options.Branch) + } + if options.HeadSHA != "" { + values.Set("head_sha", options.HeadSHA) + } + path := fmt.Sprintf("/repos/%s/%s/actions/runs?%s", pathEscape(owner), pathEscape(repo), values.Encode()) + var payload struct { + WorkflowRuns []map[string]any `json:"workflow_runs"` + } + if err := c.doJSON(ctx, http.MethodGet, path, nil, reporter, &payload); err != nil { + return nil, err + } + rows := payload.WorkflowRuns + if options.Limit > 0 && len(rows) > options.Limit { + rows = rows[:options.Limit] + } + return rows, nil +} + func (c *Client) paginate(ctx context.Context, firstPath string, limit int, expectedItems int, reporter Reporter) ([]map[string]any, error) { var out []map[string]any nextPath := firstPath diff --git a/internal/github/client_test.go b/internal/github/client_test.go index fc6c177..d042588 100644 --- a/internal/github/client_test.go +++ b/internal/github/client_test.go @@ -158,8 +158,13 @@ func TestClientSingleResourceAndCollectionEndpoints(t *testing.T) { case "/repos/openclaw/gitcrawl/issues/7/comments", "/repos/openclaw/gitcrawl/pulls/8/reviews", "/repos/openclaw/gitcrawl/pulls/8/comments", - "/repos/openclaw/gitcrawl/pulls/8/files": + "/repos/openclaw/gitcrawl/pulls/8/files", + "/repos/openclaw/gitcrawl/pulls/8/commits": _ = json.NewEncoder(w).Encode([]map[string]any{{"id": 1}}) + case "/repos/openclaw/gitcrawl/commits/abc/check-runs": + _ = json.NewEncoder(w).Encode(map[string]any{"check_runs": []map[string]any{{"name": "test"}}}) + case "/repos/openclaw/gitcrawl/actions/runs": + _ = json.NewEncoder(w).Encode(map[string]any{"workflow_runs": []map[string]any{{"id": 99}}}) default: t.Fatalf("unexpected path: %s", r.URL.String()) } @@ -183,14 +188,21 @@ func TestClientSingleResourceAndCollectionEndpoints(t *testing.T) { "review-comments": func() ([]map[string]any, error) { return client.ListPullReviewComments(ctx, "openclaw", "gitcrawl", 8, nil) }, - "files": func() ([]map[string]any, error) { return client.ListPullFiles(ctx, "openclaw", "gitcrawl", 8, nil) }, + "files": func() ([]map[string]any, error) { return client.ListPullFiles(ctx, "openclaw", "gitcrawl", 8, nil) }, + "commits": func() ([]map[string]any, error) { return client.ListPullCommits(ctx, "openclaw", "gitcrawl", 8, nil) }, + "checks": func() ([]map[string]any, error) { + return client.ListCommitCheckRuns(ctx, "openclaw", "gitcrawl", "abc", nil) + }, + "runs": func() ([]map[string]any, error) { + return client.ListWorkflowRuns(ctx, "openclaw", "gitcrawl", ListWorkflowRunsOptions{HeadSHA: "abc"}, nil) + }, } { rows, err := fn() if err != nil || len(rows) != 1 { t.Fatalf("%s rows = %+v err=%v", name, rows, err) } } - if len(requests) != 7 { + if len(requests) != 10 { t.Fatalf("requests = %+v", requests) } } diff --git a/internal/store/pull_requests.go b/internal/store/pull_requests.go new file mode 100644 index 0000000..5c01789 --- /dev/null +++ b/internal/store/pull_requests.go @@ -0,0 +1,358 @@ +package store + +import ( + "context" + "database/sql" + "fmt" + "strings" +) + +type PullRequestDetail struct { + ThreadID int64 `json:"thread_id"` + RepoID int64 `json:"repo_id"` + Number int `json:"number"` + BaseSHA string `json:"base_sha,omitempty"` + HeadSHA string `json:"head_sha,omitempty"` + HeadRef string `json:"head_ref,omitempty"` + HeadRepoFullName string `json:"head_repo_full_name,omitempty"` + MergeableState string `json:"mergeable_state,omitempty"` + Additions int `json:"additions"` + Deletions int `json:"deletions"` + ChangedFiles int `json:"changed_files"` + RawJSON string `json:"raw_json,omitempty"` + FetchedAt string `json:"fetched_at"` + UpdatedAt string `json:"updated_at"` +} + +type PullRequestFile struct { + ThreadID int64 `json:"thread_id"` + Path string `json:"path"` + Status string `json:"status,omitempty"` + Additions int `json:"additions"` + Deletions int `json:"deletions"` + Changes int `json:"changes"` + PreviousPath string `json:"previous_path,omitempty"` + Patch string `json:"patch,omitempty"` + RawJSON string `json:"raw_json,omitempty"` + FetchedAt string `json:"fetched_at"` +} + +type PullRequestCommit struct { + ThreadID int64 `json:"thread_id"` + SHA string `json:"sha"` + Message string `json:"message,omitempty"` + AuthorLogin string `json:"author_login,omitempty"` + AuthorName string `json:"author_name,omitempty"` + CommittedAt string `json:"committed_at,omitempty"` + HTMLURL string `json:"html_url,omitempty"` + RawJSON string `json:"raw_json,omitempty"` + FetchedAt string `json:"fetched_at"` +} + +type PullRequestCheck struct { + ID int64 `json:"id"` + ThreadID int64 `json:"thread_id"` + Name string `json:"name"` + Status string `json:"status,omitempty"` + Conclusion string `json:"conclusion,omitempty"` + DetailsURL string `json:"details_url,omitempty"` + WorkflowName string `json:"workflow_name,omitempty"` + StartedAt string `json:"started_at,omitempty"` + CompletedAt string `json:"completed_at,omitempty"` + RawJSON string `json:"raw_json,omitempty"` + FetchedAt string `json:"fetched_at"` +} + +type WorkflowRun struct { + RepoID int64 `json:"repo_id"` + RunID string `json:"run_id"` + RunNumber int `json:"run_number"` + HeadBranch string `json:"head_branch,omitempty"` + HeadSHA string `json:"head_sha,omitempty"` + Status string `json:"status,omitempty"` + Conclusion string `json:"conclusion,omitempty"` + WorkflowName string `json:"workflow_name,omitempty"` + Event string `json:"event,omitempty"` + HTMLURL string `json:"html_url,omitempty"` + CreatedAtGH string `json:"created_at_gh,omitempty"` + UpdatedAtGH string `json:"updated_at_gh,omitempty"` + RawJSON string `json:"raw_json,omitempty"` + FetchedAt string `json:"fetched_at"` +} + +type PullRequestCache struct { + Detail PullRequestDetail `json:"detail"` + Files []PullRequestFile `json:"files"` + Commits []PullRequestCommit `json:"commits"` + Checks []PullRequestCheck `json:"checks"` +} + +func (s *Store) UpsertPullRequestCache(ctx context.Context, detail PullRequestDetail, files []PullRequestFile, commits []PullRequestCommit, checks []PullRequestCheck, runs []WorkflowRun) error { + if s.queries != nil { + return s.upsertPullRequestCache(ctx, detail, files, commits, checks, runs) + } + return s.WithTx(ctx, func(tx *Store) error { + return tx.upsertPullRequestCache(ctx, detail, files, commits, checks, runs) + }) +} + +func (s *Store) upsertPullRequestCache(ctx context.Context, detail PullRequestDetail, files []PullRequestFile, commits []PullRequestCommit, checks []PullRequestCheck, runs []WorkflowRun) error { + if _, err := s.q().ExecContext(ctx, ` + insert into pull_request_details(thread_id, repo_id, number, base_sha, head_sha, head_ref, head_repo_full_name, mergeable_state, additions, deletions, changed_files, raw_json, fetched_at, updated_at) + values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + on conflict(thread_id) do update set + repo_id=excluded.repo_id, + number=excluded.number, + base_sha=excluded.base_sha, + head_sha=excluded.head_sha, + head_ref=excluded.head_ref, + head_repo_full_name=excluded.head_repo_full_name, + mergeable_state=excluded.mergeable_state, + additions=excluded.additions, + deletions=excluded.deletions, + changed_files=excluded.changed_files, + raw_json=excluded.raw_json, + fetched_at=excluded.fetched_at, + updated_at=excluded.updated_at + `, detail.ThreadID, detail.RepoID, detail.Number, nullString(detail.BaseSHA), nullString(detail.HeadSHA), nullString(detail.HeadRef), nullString(detail.HeadRepoFullName), nullString(detail.MergeableState), detail.Additions, detail.Deletions, detail.ChangedFiles, detail.RawJSON, detail.FetchedAt, detail.UpdatedAt); err != nil { + return fmt.Errorf("upsert pull request detail: %w", err) + } + if _, err := s.q().ExecContext(ctx, `delete from pull_request_files where thread_id = ?`, detail.ThreadID); err != nil { + return fmt.Errorf("clear pull request files: %w", err) + } + for _, file := range files { + if _, err := s.q().ExecContext(ctx, ` + insert into pull_request_files(thread_id, path, status, additions, deletions, changes, previous_path, patch, raw_json, fetched_at) + values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `, detail.ThreadID, file.Path, nullString(file.Status), file.Additions, file.Deletions, file.Changes, nullString(file.PreviousPath), nullString(file.Patch), file.RawJSON, file.FetchedAt); err != nil { + return fmt.Errorf("upsert pull request file: %w", err) + } + } + if _, err := s.q().ExecContext(ctx, `delete from pull_request_commits where thread_id = ?`, detail.ThreadID); err != nil { + return fmt.Errorf("clear pull request commits: %w", err) + } + for _, commit := range commits { + if _, err := s.q().ExecContext(ctx, ` + insert into pull_request_commits(thread_id, sha, message, author_login, author_name, committed_at, html_url, raw_json, fetched_at) + values(?, ?, ?, ?, ?, ?, ?, ?, ?) + `, detail.ThreadID, commit.SHA, nullString(commit.Message), nullString(commit.AuthorLogin), nullString(commit.AuthorName), nullString(commit.CommittedAt), nullString(commit.HTMLURL), commit.RawJSON, commit.FetchedAt); err != nil { + return fmt.Errorf("upsert pull request commit: %w", err) + } + } + if _, err := s.q().ExecContext(ctx, `delete from pull_request_checks where thread_id = ?`, detail.ThreadID); err != nil { + return fmt.Errorf("clear pull request checks: %w", err) + } + for _, check := range checks { + if _, err := s.q().ExecContext(ctx, ` + insert into pull_request_checks(thread_id, name, status, conclusion, details_url, workflow_name, started_at, completed_at, raw_json, fetched_at) + values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `, detail.ThreadID, check.Name, nullString(check.Status), nullString(check.Conclusion), nullString(check.DetailsURL), nullString(check.WorkflowName), nullString(check.StartedAt), nullString(check.CompletedAt), check.RawJSON, check.FetchedAt); err != nil { + return fmt.Errorf("upsert pull request check: %w", err) + } + } + for _, run := range runs { + if _, err := s.q().ExecContext(ctx, ` + insert into github_workflow_runs(repo_id, run_id, run_number, head_branch, head_sha, status, conclusion, workflow_name, event, html_url, created_at_gh, updated_at_gh, raw_json, fetched_at) + values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + on conflict(repo_id, run_id) do update set + run_number=excluded.run_number, + head_branch=excluded.head_branch, + head_sha=excluded.head_sha, + status=excluded.status, + conclusion=excluded.conclusion, + workflow_name=excluded.workflow_name, + event=excluded.event, + html_url=excluded.html_url, + created_at_gh=excluded.created_at_gh, + updated_at_gh=excluded.updated_at_gh, + raw_json=excluded.raw_json, + fetched_at=excluded.fetched_at + `, run.RepoID, run.RunID, run.RunNumber, nullString(run.HeadBranch), nullString(run.HeadSHA), nullString(run.Status), nullString(run.Conclusion), nullString(run.WorkflowName), nullString(run.Event), nullString(run.HTMLURL), nullString(run.CreatedAtGH), nullString(run.UpdatedAtGH), run.RawJSON, run.FetchedAt); err != nil { + return fmt.Errorf("upsert workflow run: %w", err) + } + } + return nil +} + +func (s *Store) PullRequestCache(ctx context.Context, repoID int64, number int) (PullRequestCache, error) { + var cache PullRequestCache + var baseSHA, headSHA, headRef, headRepo, mergeable sql.NullString + err := s.q().QueryRowContext(ctx, ` + select thread_id, repo_id, number, base_sha, head_sha, head_ref, head_repo_full_name, mergeable_state, additions, deletions, changed_files, raw_json, fetched_at, updated_at + from pull_request_details + where repo_id = ? and number = ? + `, repoID, number).Scan(&cache.Detail.ThreadID, &cache.Detail.RepoID, &cache.Detail.Number, &baseSHA, &headSHA, &headRef, &headRepo, &mergeable, &cache.Detail.Additions, &cache.Detail.Deletions, &cache.Detail.ChangedFiles, &cache.Detail.RawJSON, &cache.Detail.FetchedAt, &cache.Detail.UpdatedAt) + if err != nil { + return PullRequestCache{}, fmt.Errorf("pull request detail: %w", err) + } + cache.Detail.BaseSHA = baseSHA.String + cache.Detail.HeadSHA = headSHA.String + cache.Detail.HeadRef = headRef.String + cache.Detail.HeadRepoFullName = headRepo.String + cache.Detail.MergeableState = mergeable.String + files, err := s.PullRequestFiles(ctx, cache.Detail.ThreadID) + if err != nil { + return PullRequestCache{}, err + } + cache.Files = files + commits, err := s.PullRequestCommits(ctx, cache.Detail.ThreadID) + if err != nil { + return PullRequestCache{}, err + } + cache.Commits = commits + checks, err := s.PullRequestChecks(ctx, cache.Detail.ThreadID) + if err != nil { + return PullRequestCache{}, err + } + cache.Checks = checks + return cache, nil +} + +func (s *Store) PullRequestFiles(ctx context.Context, threadID int64) ([]PullRequestFile, error) { + rows, err := s.q().QueryContext(ctx, ` + select thread_id, path, status, additions, deletions, changes, previous_path, patch, raw_json, fetched_at + from pull_request_files + where thread_id = ? + order by path + `, threadID) + if err != nil { + return nil, fmt.Errorf("list pull request files: %w", err) + } + defer rows.Close() + var out []PullRequestFile + for rows.Next() { + var file PullRequestFile + var status, previousPath, patch sql.NullString + if err := rows.Scan(&file.ThreadID, &file.Path, &status, &file.Additions, &file.Deletions, &file.Changes, &previousPath, &patch, &file.RawJSON, &file.FetchedAt); err != nil { + return nil, fmt.Errorf("scan pull request file: %w", err) + } + file.Status = status.String + file.PreviousPath = previousPath.String + file.Patch = patch.String + out = append(out, file) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate pull request files: %w", err) + } + return out, nil +} + +func (s *Store) PullRequestCommits(ctx context.Context, threadID int64) ([]PullRequestCommit, error) { + rows, err := s.q().QueryContext(ctx, ` + select thread_id, sha, message, author_login, author_name, committed_at, html_url, raw_json, fetched_at + from pull_request_commits + where thread_id = ? + order by rowid + `, threadID) + if err != nil { + return nil, fmt.Errorf("list pull request commits: %w", err) + } + defer rows.Close() + var out []PullRequestCommit + for rows.Next() { + var commit PullRequestCommit + var message, authorLogin, authorName, committedAt, htmlURL sql.NullString + if err := rows.Scan(&commit.ThreadID, &commit.SHA, &message, &authorLogin, &authorName, &committedAt, &htmlURL, &commit.RawJSON, &commit.FetchedAt); err != nil { + return nil, fmt.Errorf("scan pull request commit: %w", err) + } + commit.Message = message.String + commit.AuthorLogin = authorLogin.String + commit.AuthorName = authorName.String + commit.CommittedAt = committedAt.String + commit.HTMLURL = htmlURL.String + out = append(out, commit) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate pull request commits: %w", err) + } + return out, nil +} + +func (s *Store) PullRequestChecks(ctx context.Context, threadID int64) ([]PullRequestCheck, error) { + rows, err := s.q().QueryContext(ctx, ` + select id, thread_id, name, status, conclusion, details_url, workflow_name, started_at, completed_at, raw_json, fetched_at + from pull_request_checks + where thread_id = ? + order by name + `, threadID) + if err != nil { + return nil, fmt.Errorf("list pull request checks: %w", err) + } + defer rows.Close() + var out []PullRequestCheck + for rows.Next() { + var check PullRequestCheck + var status, conclusion, detailsURL, workflowName, startedAt, completedAt sql.NullString + if err := rows.Scan(&check.ID, &check.ThreadID, &check.Name, &status, &conclusion, &detailsURL, &workflowName, &startedAt, &completedAt, &check.RawJSON, &check.FetchedAt); err != nil { + return nil, fmt.Errorf("scan pull request check: %w", err) + } + check.Status = status.String + check.Conclusion = conclusion.String + check.DetailsURL = detailsURL.String + check.WorkflowName = workflowName.String + check.StartedAt = startedAt.String + check.CompletedAt = completedAt.String + out = append(out, check) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate pull request checks: %w", err) + } + return out, nil +} + +type WorkflowRunListOptions struct { + Branch string + HeadSHA string + Limit int +} + +func (s *Store) ListWorkflowRuns(ctx context.Context, repoID int64, options WorkflowRunListOptions) ([]WorkflowRun, error) { + where := []string{"repo_id = ?"} + args := []any{repoID} + if options.Branch != "" { + where = append(where, "head_branch = ?") + args = append(args, options.Branch) + } + if options.HeadSHA != "" { + where = append(where, "head_sha = ?") + args = append(args, options.HeadSHA) + } + limit := options.Limit + if limit <= 0 { + limit = 20 + } + args = append(args, limit) + rows, err := s.q().QueryContext(ctx, ` + select repo_id, run_id, run_number, head_branch, head_sha, status, conclusion, workflow_name, event, html_url, created_at_gh, updated_at_gh, raw_json, fetched_at + from github_workflow_runs + where `+strings.Join(where, " and ")+` + order by updated_at_gh desc, run_id desc + limit ? + `, args...) + if err != nil { + return nil, fmt.Errorf("list workflow runs: %w", err) + } + defer rows.Close() + var out []WorkflowRun + for rows.Next() { + var run WorkflowRun + var branch, sha, status, conclusion, workflowName, event, htmlURL, createdAt, updatedAt sql.NullString + if err := rows.Scan(&run.RepoID, &run.RunID, &run.RunNumber, &branch, &sha, &status, &conclusion, &workflowName, &event, &htmlURL, &createdAt, &updatedAt, &run.RawJSON, &run.FetchedAt); err != nil { + return nil, fmt.Errorf("scan workflow run: %w", err) + } + run.HeadBranch = branch.String + run.HeadSHA = sha.String + run.Status = status.String + run.Conclusion = conclusion.String + run.WorkflowName = workflowName.String + run.Event = event.String + run.HTMLURL = htmlURL.String + run.CreatedAtGH = createdAt.String + run.UpdatedAtGH = updatedAt.String + out = append(out, run) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate workflow runs: %w", err) + } + return out, nil +} diff --git a/internal/store/schema.go b/internal/store/schema.go index a174713..71cc936 100644 --- a/internal/store/schema.go +++ b/internal/store/schema.go @@ -122,6 +122,84 @@ create table if not exists thread_hunk_signatures ( unique(snapshot_id, path, hunk_hash) ); +create table if not exists pull_request_details ( + thread_id integer primary key references threads(id) on delete cascade, + repo_id integer not null references repositories(id) on delete cascade, + number integer not null, + base_sha text, + head_sha text, + head_ref text, + head_repo_full_name text, + mergeable_state text, + additions integer not null default 0, + deletions integer not null default 0, + changed_files integer not null default 0, + raw_json text not null, + fetched_at text not null, + updated_at text not null, + unique(repo_id, number) +); + +create table if not exists pull_request_files ( + thread_id integer not null references threads(id) on delete cascade, + path text not null, + status text, + additions integer not null default 0, + deletions integer not null default 0, + changes integer not null default 0, + previous_path text, + patch text, + raw_json text not null, + fetched_at text not null, + primary key(thread_id, path) +); + +create table if not exists pull_request_commits ( + thread_id integer not null references threads(id) on delete cascade, + sha text not null, + message text, + author_login text, + author_name text, + committed_at text, + html_url text, + raw_json text not null, + fetched_at text not null, + primary key(thread_id, sha) +); + +create table if not exists pull_request_checks ( + id integer primary key, + thread_id integer not null references threads(id) on delete cascade, + name text not null, + status text, + conclusion text, + details_url text, + workflow_name text, + started_at text, + completed_at text, + raw_json text not null, + fetched_at text not null, + unique(thread_id, name, details_url) +); + +create table if not exists github_workflow_runs ( + repo_id integer not null references repositories(id) on delete cascade, + run_id text not null, + run_number integer not null default 0, + head_branch text, + head_sha text, + status text, + conclusion text, + workflow_name text, + event text, + html_url text, + created_at_gh text, + updated_at_gh text, + raw_json text not null, + fetched_at text not null, + primary key(repo_id, run_id) +); + create table if not exists documents ( id integer primary key, thread_id integer not null unique references threads(id) on delete cascade, @@ -391,6 +469,11 @@ create index if not exists idx_threads_repo_updated on threads(repo_id, updated_ create index if not exists idx_comments_thread_type on comments(thread_id, comment_type); create index if not exists idx_thread_revisions_thread_created on thread_revisions(thread_id, created_at); create index if not exists idx_thread_changed_files_path on thread_changed_files(path); +create index if not exists idx_pull_request_details_repo_number on pull_request_details(repo_id, number); +create index if not exists idx_pull_request_files_path on pull_request_files(path); +create index if not exists idx_pull_request_checks_thread_status on pull_request_checks(thread_id, status, conclusion); +create index if not exists idx_github_workflow_runs_repo_branch on github_workflow_runs(repo_id, head_branch, run_id); +create index if not exists idx_github_workflow_runs_repo_sha on github_workflow_runs(repo_id, head_sha, run_id); create index if not exists idx_thread_fingerprints_hash on thread_fingerprints(fingerprint_hash); create index if not exists idx_thread_vectors_basis_model on thread_vectors(basis, model); create index if not exists idx_sync_runs_repo_status_id on sync_runs(repo_id, status, id); diff --git a/internal/syncer/pull_details.go b/internal/syncer/pull_details.go new file mode 100644 index 0000000..54e4d38 --- /dev/null +++ b/internal/syncer/pull_details.go @@ -0,0 +1,117 @@ +package syncer + +import ( + "context" + "time" + + gh "github.com/openclaw/gitcrawl/internal/github" + "github.com/openclaw/gitcrawl/internal/store" +) + +type pullDetailStats struct { + files int + commits int + checks int + runs int +} + +func (s *Syncer) syncPullRequestDetails(ctx context.Context, st *store.Store, options Options, thread store.Thread) (pullDetailStats, error) { + fetchedAt := s.now().Format(time.RFC3339Nano) + pull, err := s.client.GetPull(ctx, options.Owner, options.Repo, thread.Number, options.Reporter) + if err != nil { + return pullDetailStats{}, err + } + filesRaw, err := s.client.ListPullFiles(ctx, options.Owner, options.Repo, thread.Number, options.Reporter) + if err != nil { + return pullDetailStats{}, err + } + commitsRaw, err := s.client.ListPullCommits(ctx, options.Owner, options.Repo, thread.Number, options.Reporter) + if err != nil { + return pullDetailStats{}, err + } + headSHA := nestedString(pull, "head", "sha") + var checksRaw []map[string]any + if headSHA != "" { + checksRaw, err = s.client.ListCommitCheckRuns(ctx, options.Owner, options.Repo, headSHA, options.Reporter) + if err != nil { + return pullDetailStats{}, err + } + } + runsRaw, err := s.client.ListWorkflowRuns(ctx, options.Owner, options.Repo, gh.ListWorkflowRunsOptions{HeadSHA: headSHA, Limit: 20}, options.Reporter) + if err != nil { + return pullDetailStats{}, err + } + detail := mapPullDetail(thread, pull, fetchedAt) + files := mapPullFiles(thread.ID, filesRaw, fetchedAt) + commits := mapPullCommits(thread.ID, commitsRaw, fetchedAt) + checks := mapPullChecks(thread.ID, checksRaw, fetchedAt) + runs := mapWorkflowRuns(thread.RepoID, runsRaw, fetchedAt) + if err := st.UpsertPullRequestCache(ctx, detail, files, commits, checks, runs); err != nil { + return pullDetailStats{}, err + } + return pullDetailStats{files: len(files), commits: len(commits), checks: len(checks), runs: len(runs)}, nil +} + +func mapPullDetail(thread store.Thread, pull map[string]any, fetchedAt string) store.PullRequestDetail { + return store.PullRequestDetail{ + ThreadID: thread.ID, + RepoID: thread.RepoID, + Number: thread.Number, + BaseSHA: nestedString(pull, "base", "sha"), + HeadSHA: nestedString(pull, "head", "sha"), + HeadRef: nestedString(pull, "head", "ref"), + HeadRepoFullName: nestedString(pull, "head", "repo", "full_name"), + MergeableState: stringValue(pull["mergeable_state"]), + Additions: intValue(pull["additions"]), + Deletions: intValue(pull["deletions"]), + ChangedFiles: intValue(pull["changed_files"]), + RawJSON: mustJSON(pull), + FetchedAt: fetchedAt, + UpdatedAt: fetchedAt, + } +} + +func mapPullFiles(threadID int64, rows []map[string]any, fetchedAt string) []store.PullRequestFile { + out := make([]store.PullRequestFile, 0, len(rows)) + for _, row := range rows { + filename := stringValue(row["filename"]) + if filename == "" { + continue + } + out = append(out, store.PullRequestFile{ + ThreadID: threadID, + Path: filename, + Status: stringValue(row["status"]), + Additions: intValue(row["additions"]), + Deletions: intValue(row["deletions"]), + Changes: intValue(row["changes"]), + PreviousPath: stringValue(row["previous_filename"]), + Patch: stringValue(row["patch"]), + RawJSON: mustJSON(row), + FetchedAt: fetchedAt, + }) + } + return out +} + +func mapPullCommits(threadID int64, rows []map[string]any, fetchedAt string) []store.PullRequestCommit { + out := make([]store.PullRequestCommit, 0, len(rows)) + for _, row := range rows { + sha := stringValue(row["sha"]) + if sha == "" { + continue + } + out = append(out, store.PullRequestCommit{ + ThreadID: threadID, + SHA: sha, + Message: nestedString(row, "commit", "message"), + AuthorLogin: nestedString(row, "author", "login"), + AuthorName: nestedString(row, "commit", "author", "name"), + CommittedAt: nestedString(row, "commit", "author", "date"), + HTMLURL: stringValue(row["html_url"]), + RawJSON: mustJSON(row), + FetchedAt: fetchedAt, + }) + } + return out +} diff --git a/internal/syncer/pull_details_more.go b/internal/syncer/pull_details_more.go new file mode 100644 index 0000000..49a71bd --- /dev/null +++ b/internal/syncer/pull_details_more.go @@ -0,0 +1,65 @@ +package syncer + +import "github.com/openclaw/gitcrawl/internal/store" + +func mapPullChecks(threadID int64, rows []map[string]any, fetchedAt string) []store.PullRequestCheck { + out := make([]store.PullRequestCheck, 0, len(rows)) + for _, row := range rows { + name := stringValue(row["name"]) + if name == "" { + continue + } + out = append(out, store.PullRequestCheck{ + ThreadID: threadID, + Name: name, + Status: stringValue(row["status"]), + Conclusion: stringValue(row["conclusion"]), + DetailsURL: stringValue(row["details_url"]), + WorkflowName: nestedString(row, "check_suite", "app", "name"), + StartedAt: stringValue(row["started_at"]), + CompletedAt: stringValue(row["completed_at"]), + RawJSON: mustJSON(row), + FetchedAt: fetchedAt, + }) + } + return out +} + +func mapWorkflowRuns(repoID int64, rows []map[string]any, fetchedAt string) []store.WorkflowRun { + out := make([]store.WorkflowRun, 0, len(rows)) + for _, row := range rows { + runID := jsonID(row["id"]) + if runID == "" { + continue + } + out = append(out, store.WorkflowRun{ + RepoID: repoID, + RunID: runID, + RunNumber: intValue(row["run_number"]), + HeadBranch: stringValue(row["head_branch"]), + HeadSHA: stringValue(row["head_sha"]), + Status: stringValue(row["status"]), + Conclusion: stringValue(row["conclusion"]), + WorkflowName: stringValue(row["name"]), + Event: stringValue(row["event"]), + HTMLURL: stringValue(row["html_url"]), + CreatedAtGH: stringValue(row["created_at"]), + UpdatedAtGH: stringValue(row["updated_at"]), + RawJSON: mustJSON(row), + FetchedAt: fetchedAt, + }) + } + return out +} + +func nestedString(row map[string]any, path ...string) string { + var current any = row + for _, key := range path { + typed, ok := current.(map[string]any) + if !ok { + return "" + } + current = typed[key] + } + return stringValue(current) +} diff --git a/internal/syncer/syncer.go b/internal/syncer/syncer.go index 5cf54e2..bb47944 100644 --- a/internal/syncer/syncer.go +++ b/internal/syncer/syncer.go @@ -18,10 +18,15 @@ import ( type GitHubClient interface { GetRepo(ctx context.Context, owner, repo string, reporter gh.Reporter) (map[string]any, error) GetIssue(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error) + GetPull(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error) ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error) ListIssueComments(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) ListPullReviews(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) ListPullReviewComments(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) + ListPullFiles(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) + ListPullCommits(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) + ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter gh.Reporter) ([]map[string]any, error) + ListWorkflowRuns(ctx context.Context, owner, repo string, options gh.ListWorkflowRunsOptions, reporter gh.Reporter) ([]map[string]any, error) } type Syncer struct { @@ -31,14 +36,15 @@ type Syncer struct { } type Options struct { - Owner string - Repo string - State string - Since string - Limit int - Numbers []int - IncludeComments bool - Reporter gh.Reporter + Owner string + Repo string + State string + Since string + Limit int + Numbers []int + IncludeComments bool + IncludePRDetails bool + Reporter gh.Reporter } type Stats struct { @@ -47,6 +53,11 @@ type Stats struct { IssuesSynced int `json:"issues_synced"` PullRequestsSynced int `json:"pull_requests_synced"` CommentsSynced int `json:"comments_synced"` + PRDetailsSynced int `json:"pr_details_synced"` + PRFilesSynced int `json:"pr_files_synced"` + PRCommitsSynced int `json:"pr_commits_synced"` + PRChecksSynced int `json:"pr_checks_synced"` + WorkflowRunsSynced int `json:"workflow_runs_synced"` ThreadsClosed int `json:"threads_closed"` RequestedSince string `json:"requested_since,omitempty"` Limit int `json:"limit,omitempty"` @@ -138,6 +149,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) { } stats.CommentsSynced += len(comments) } + if options.IncludePRDetails && thread.Kind == "pull_request" { + detailStats, err := s.syncPullRequestDetails(ctx, st, options, thread) + if err != nil { + return err + } + stats.PRDetailsSynced++ + stats.PRFilesSynced += detailStats.files + stats.PRCommitsSynced += detailStats.commits + stats.PRChecksSynced += detailStats.checks + stats.WorkflowRunsSynced += detailStats.runs + } if _, err := st.UpsertDocument(ctx, documents.BuildWithComments(thread, comments)); err != nil { return err } diff --git a/internal/syncer/syncer_test.go b/internal/syncer/syncer_test.go index 30af37a..bac0344 100644 --- a/internal/syncer/syncer_test.go +++ b/internal/syncer/syncer_test.go @@ -49,6 +49,18 @@ func (fakeGitHub) GetIssue(ctx context.Context, owner, repo string, number int, }, nil } +func (fakeGitHub) GetPull(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error) { + return map[string]any{ + "number": number, + "head": map[string]any{"sha": "head-sha", "ref": "feature", "repo": map[string]any{"full_name": "openclaw/gitcrawl"}}, + "base": map[string]any{"sha": "base-sha"}, + "mergeable_state": "clean", + "additions": 12, + "deletions": 3, + "changed_files": 2, + }, nil +} + func (fakeGitHub) ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error) { if options.State == "closed" { return nil, nil @@ -105,6 +117,22 @@ func (fakeGitHub) ListPullReviewComments(ctx context.Context, owner, repo string return nil, nil } +func (fakeGitHub) ListPullFiles(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) { + return nil, nil +} + +func (fakeGitHub) ListPullCommits(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) { + return nil, nil +} + +func (fakeGitHub) ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter gh.Reporter) ([]map[string]any, error) { + return nil, nil +} + +func (fakeGitHub) ListWorkflowRuns(ctx context.Context, owner, repo string, options gh.ListWorkflowRunsOptions, reporter gh.Reporter) ([]map[string]any, error) { + return nil, nil +} + type sinceCaptureGitHub struct { fakeGitHub since string @@ -195,6 +223,59 @@ func (pullCommentGitHub) ListPullReviewComments(ctx context.Context, owner, repo }}, nil } +type pullDetailsGitHub struct { + fakeGitHub +} + +func (pullDetailsGitHub) ListPullFiles(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) { + return []map[string]any{{ + "filename": "internal/cache.go", + "status": "modified", + "additions": 10, + "deletions": 2, + "changes": 12, + "patch": "@@ cache", + }}, nil +} + +func (pullDetailsGitHub) ListPullCommits(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error) { + return []map[string]any{{ + "sha": "commit-sha", + "html_url": "https://github.com/openclaw/gitcrawl/commit/commit-sha", + "author": map[string]any{"login": "alice"}, + "commit": map[string]any{ + "message": "feat: cache", + "author": map[string]any{"name": "Alice", "date": "2026-04-26T00:00:00Z"}, + }, + }}, nil +} + +func (pullDetailsGitHub) ListCommitCheckRuns(ctx context.Context, owner, repo, ref string, reporter gh.Reporter) ([]map[string]any, error) { + return []map[string]any{{ + "name": "test", + "status": "completed", + "conclusion": "success", + "details_url": "https://github.com/openclaw/gitcrawl/actions/runs/99", + "check_suite": map[string]any{"app": map[string]any{"name": "GitHub Actions"}}, + }}, nil +} + +func (pullDetailsGitHub) ListWorkflowRuns(ctx context.Context, owner, repo string, options gh.ListWorkflowRunsOptions, reporter gh.Reporter) ([]map[string]any, error) { + return []map[string]any{{ + "id": 99, + "run_number": 7, + "head_branch": "feature", + "head_sha": options.HeadSHA, + "status": "completed", + "conclusion": "success", + "name": "CI", + "event": "pull_request", + "html_url": "https://github.com/openclaw/gitcrawl/actions/runs/99", + "created_at": "2026-04-26T00:00:00Z", + "updated_at": "2026-04-26T00:01:00Z", + }}, nil +} + func TestSyncPersistsIssuesAndPullRequests(t *testing.T) { ctx := context.Background() st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db")) @@ -271,6 +352,42 @@ func TestSyncHydratesPullReviewComments(t *testing.T) { } } +func TestSyncHydratesPullRequestDetails(t *testing.T) { + ctx := context.Background() + st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db")) + if err != nil { + t.Fatalf("open store: %v", err) + } + defer st.Close() + s := New(pullDetailsGitHub{}, st) + s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) } + stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", Numbers: []int{8}, IncludePRDetails: true}) + if err != nil { + t.Fatalf("sync: %v", err) + } + if stats.PRDetailsSynced != 1 || stats.PRFilesSynced != 1 || stats.PRCommitsSynced != 1 || stats.PRChecksSynced != 1 || stats.WorkflowRunsSynced != 1 { + t.Fatalf("stats = %#v", stats) + } + repo, err := st.RepositoryByFullName(ctx, "openclaw/gitcrawl") + if err != nil { + t.Fatalf("repo: %v", err) + } + cache, err := st.PullRequestCache(ctx, repo.ID, 8) + if err != nil { + t.Fatalf("pr cache: %v", err) + } + if cache.Detail.HeadSHA != "head-sha" || len(cache.Files) != 1 || len(cache.Commits) != 1 || len(cache.Checks) != 1 { + t.Fatalf("cache = %+v", cache) + } + runs, err := st.ListWorkflowRuns(ctx, repo.ID, store.WorkflowRunListOptions{HeadSHA: "head-sha", Limit: 10}) + if err != nil { + t.Fatalf("workflow runs: %v", err) + } + if len(runs) != 1 || runs[0].RunID != "99" { + t.Fatalf("runs = %+v", runs) + } +} + func TestSyncCanTargetIssueNumbers(t *testing.T) { ctx := context.Background() st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))