feat: add targeted issue sync
This commit is contained in:
parent
bae22a73a2
commit
fdef74b910
@ -2,6 +2,7 @@
|
||||
|
||||
## Unreleased
|
||||
|
||||
- Add `gitcrawl sync --numbers` for exact issue and pull request hydration, including comment documents, without relying on list ordering or updated-time windows.
|
||||
- Implement `gitcrawl refresh` and `gitcrawl embed` so synced repositories can generate OpenAI embeddings and rebuild durable clusters end to end.
|
||||
- Add `gitcrawl sync --state open|closed|all` so incremental backups can refresh recently closed issues and pull requests.
|
||||
- Default `gitcrawl sync` to `--state all`, keeping closed issue and pull request state fresh unless a narrower state is requested.
|
||||
|
||||
@ -15,6 +15,7 @@ gitcrawl init
|
||||
gitcrawl doctor
|
||||
gitcrawl sync owner/repo
|
||||
gitcrawl sync owner/repo --state open
|
||||
gitcrawl sync owner/repo --numbers 123,456 --include-comments
|
||||
gitcrawl refresh owner/repo
|
||||
gitcrawl cluster owner/repo --threshold 0.80
|
||||
gitcrawl clusters owner/repo
|
||||
@ -41,6 +42,7 @@ gitcrawl tui owner/repo
|
||||
`gitcrawl cluster` and `gitcrawl refresh` build ghcrawl-shaped durable clusters by default (`--threshold 0.80`, `--min-size 1`, `--max-cluster-size 40`, `--k 16`, `--cross-kind-threshold 0.93`): every active vector-backed thread is represented, singleton rows use `singleton_orphan`, multi-member rows use `duplicate_candidate`, and stable IDs are derived from the representative thread. They also add deterministic GitHub reference evidence for direct issue/PR links such as `#123`, `issues/123`, and `pull/123`. Weak embedding edges need concrete title-token overlap unless their similarity is already high, which keeps generic low-confidence bridges from forming unrelated clusters.
|
||||
`gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`.
|
||||
`gitcrawl sync` fetches open issues and pull requests by default. Pass `--state all` or `--state closed` for explicit backfill workflows; incremental open syncs with `--since` also sweep recently closed items so local open state does not rot.
|
||||
Pass `--numbers` to refresh exact issue or pull request rows without relying on list ordering or updated-time windows.
|
||||
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration.
|
||||
The TUI starts at `--min-size 5` and `--sort size`, like ghcrawl's saved default, so the first screen is the useful cluster workload instead of singleton noise. Pass `--min-size 1` when you intentionally want singleton clusters. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds.
|
||||
|
||||
|
||||
@ -1563,11 +1563,12 @@ func (a *App) runSync(ctx context.Context, args []string) error {
|
||||
fs.SetOutput(io.Discard)
|
||||
since := fs.String("since", "", "GitHub since timestamp")
|
||||
state := fs.String("state", "", "GitHub issue state: open|closed|all; default open")
|
||||
numbersRaw := fs.String("numbers", "", "comma-separated issue or pull request numbers")
|
||||
limitRaw := fs.String("limit", "", "maximum issue/PR rows")
|
||||
jsonOut := fs.Bool("json", false, "write JSON output")
|
||||
includeComments := fs.Bool("include-comments", false, "hydrate issue comments, PR reviews, and PR review comments")
|
||||
fs.Bool("include-code", false, "accepted for compatibility; code hydration is not implemented yet")
|
||||
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"since": true, "state": true, "limit": true})); err != nil {
|
||||
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "since": true, "state": true, "limit": true})); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
a.applyCommandJSON(*jsonOut)
|
||||
@ -1582,11 +1583,16 @@ func (a *App) runSync(ctx context.Context, args []string) error {
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
numbers, err := parseOptionalPositiveIntList(*numbersRaw)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
|
||||
stats, err := a.syncRepository(ctx, owner, repo, syncOptions{
|
||||
Since: strings.TrimSpace(*since),
|
||||
State: strings.TrimSpace(*state),
|
||||
Limit: limit,
|
||||
Numbers: numbers,
|
||||
IncludeComments: *includeComments,
|
||||
})
|
||||
if err != nil {
|
||||
@ -1599,6 +1605,7 @@ type syncOptions struct {
|
||||
Since string
|
||||
State string
|
||||
Limit int
|
||||
Numbers []int
|
||||
IncludeComments bool
|
||||
}
|
||||
|
||||
@ -1628,6 +1635,7 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
|
||||
State: strings.TrimSpace(options.State),
|
||||
Since: strings.TrimSpace(options.Since),
|
||||
Limit: options.Limit,
|
||||
Numbers: options.Numbers,
|
||||
IncludeComments: options.IncludeComments,
|
||||
Reporter: func(message string) {
|
||||
fmt.Fprintln(a.Stderr, message)
|
||||
|
||||
@ -17,6 +17,7 @@ import (
|
||||
|
||||
type GitHubClient interface {
|
||||
GetRepo(ctx context.Context, owner, repo string, reporter gh.Reporter) (map[string]any, error)
|
||||
GetIssue(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error)
|
||||
ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error)
|
||||
ListIssueComments(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
|
||||
ListPullReviews(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) ([]map[string]any, error)
|
||||
@ -35,6 +36,7 @@ type Options struct {
|
||||
State string
|
||||
Since string
|
||||
Limit int
|
||||
Numbers []int
|
||||
IncludeComments bool
|
||||
Reporter gh.Reporter
|
||||
}
|
||||
@ -48,6 +50,7 @@ type Stats struct {
|
||||
ThreadsClosed int `json:"threads_closed"`
|
||||
RequestedSince string `json:"requested_since,omitempty"`
|
||||
Limit int `json:"limit,omitempty"`
|
||||
Numbers []int `json:"numbers,omitempty"`
|
||||
MetadataOnly bool `json:"metadata_only"`
|
||||
StartedAt string `json:"started_at"`
|
||||
FinishedAt string `json:"finished_at"`
|
||||
@ -87,19 +90,33 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
return Stats{}, err
|
||||
}
|
||||
|
||||
rows, err := s.client.ListRepositoryIssues(ctx, options.Owner, options.Repo, gh.ListIssuesOptions{
|
||||
State: state,
|
||||
Since: since,
|
||||
Limit: options.Limit,
|
||||
}, options.Reporter)
|
||||
if err != nil {
|
||||
return Stats{}, err
|
||||
numbers := uniquePositiveNumbers(options.Numbers)
|
||||
rows := make([]map[string]any, 0, len(numbers))
|
||||
if len(numbers) > 0 {
|
||||
for _, number := range numbers {
|
||||
row, err := s.client.GetIssue(ctx, options.Owner, options.Repo, number, options.Reporter)
|
||||
if err != nil {
|
||||
return Stats{}, err
|
||||
}
|
||||
rows = append(rows, row)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
rows, err = s.client.ListRepositoryIssues(ctx, options.Owner, options.Repo, gh.ListIssuesOptions{
|
||||
State: state,
|
||||
Since: since,
|
||||
Limit: options.Limit,
|
||||
}, options.Reporter)
|
||||
if err != nil {
|
||||
return Stats{}, err
|
||||
}
|
||||
}
|
||||
|
||||
stats := Stats{
|
||||
Repository: options.Owner + "/" + options.Repo,
|
||||
RequestedSince: since,
|
||||
Limit: options.Limit,
|
||||
Numbers: numbers,
|
||||
MetadataOnly: !options.IncludeComments,
|
||||
StartedAt: started,
|
||||
}
|
||||
@ -130,7 +147,7 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
stats.IssuesSynced++
|
||||
}
|
||||
}
|
||||
if state == "open" && since != "" && options.Limit <= 0 {
|
||||
if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 {
|
||||
closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -141,7 +158,7 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
if _, err := st.RecordRun(ctx, store.RunRecord{
|
||||
RepoID: repoID,
|
||||
Kind: "sync",
|
||||
Scope: state,
|
||||
Scope: syncRunScope(state, numbers),
|
||||
Status: "success",
|
||||
StartedAt: stats.StartedAt,
|
||||
FinishedAt: stats.FinishedAt,
|
||||
@ -163,6 +180,36 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func uniquePositiveNumbers(numbers []int) []int {
|
||||
if len(numbers) == 0 {
|
||||
return nil
|
||||
}
|
||||
seen := make(map[int]struct{}, len(numbers))
|
||||
out := make([]int, 0, len(numbers))
|
||||
for _, number := range numbers {
|
||||
if number <= 0 {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[number]; ok {
|
||||
continue
|
||||
}
|
||||
seen[number] = struct{}{}
|
||||
out = append(out, number)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func syncRunScope(state string, numbers []int) string {
|
||||
if len(numbers) == 0 {
|
||||
return state
|
||||
}
|
||||
parts := make([]string, 0, len(numbers))
|
||||
for _, number := range numbers {
|
||||
parts = append(parts, strconv.Itoa(number))
|
||||
}
|
||||
return "numbers:" + strings.Join(parts, ",")
|
||||
}
|
||||
|
||||
func normalizeState(value string) (string, error) {
|
||||
value = strings.TrimSpace(strings.ToLower(value))
|
||||
if value == "" {
|
||||
|
||||
@ -16,6 +16,38 @@ func (fakeGitHub) GetRepo(ctx context.Context, owner, repo string, reporter gh.R
|
||||
return map[string]any{"id": 123}, nil
|
||||
}
|
||||
|
||||
func (fakeGitHub) GetIssue(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error) {
|
||||
if number == 8 {
|
||||
return map[string]any{
|
||||
"id": 2,
|
||||
"number": 8,
|
||||
"state": "open",
|
||||
"title": "fix sync",
|
||||
"body": "",
|
||||
"html_url": "https://github.com/openclaw/gitcrawl/pull/8",
|
||||
"created_at": "2026-04-26T00:00:00Z",
|
||||
"updated_at": "2026-04-26T00:00:00Z",
|
||||
"labels": []map[string]any{},
|
||||
"assignees": []map[string]any{},
|
||||
"user": map[string]any{"login": "vincentkoc", "type": "User"},
|
||||
"pull_request": map[string]any{"url": "https://api.github.com/repos/openclaw/gitcrawl/pulls/8"},
|
||||
}, nil
|
||||
}
|
||||
return map[string]any{
|
||||
"id": 1,
|
||||
"number": 7,
|
||||
"state": "open",
|
||||
"title": "download stalls",
|
||||
"body": "large file download stalls",
|
||||
"html_url": "https://github.com/openclaw/gitcrawl/issues/7",
|
||||
"created_at": "2026-04-26T00:00:00Z",
|
||||
"updated_at": "2026-04-26T00:00:00Z",
|
||||
"labels": []map[string]any{{"name": "bug"}},
|
||||
"assignees": []map[string]any{},
|
||||
"user": map[string]any{"login": "vincentkoc", "type": "User"},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (fakeGitHub) ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error) {
|
||||
if options.State == "closed" {
|
||||
return nil, nil
|
||||
@ -116,6 +148,22 @@ func (closedSweepGitHub) ListRepositoryIssues(ctx context.Context, owner, repo s
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type targetedGitHub struct {
|
||||
fakeGitHub
|
||||
listCalled bool
|
||||
numbers []int
|
||||
}
|
||||
|
||||
func (f *targetedGitHub) GetIssue(ctx context.Context, owner, repo string, number int, reporter gh.Reporter) (map[string]any, error) {
|
||||
f.numbers = append(f.numbers, number)
|
||||
return f.fakeGitHub.GetIssue(ctx, owner, repo, number, reporter)
|
||||
}
|
||||
|
||||
func (f *targetedGitHub) ListRepositoryIssues(ctx context.Context, owner, repo string, options gh.ListIssuesOptions, reporter gh.Reporter) ([]map[string]any, error) {
|
||||
f.listCalled = true
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
|
||||
@ -163,6 +211,47 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyncCanTargetIssueNumbers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open store: %v", err)
|
||||
}
|
||||
defer st.Close()
|
||||
|
||||
client := &targetedGitHub{}
|
||||
s := New(client, st)
|
||||
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
|
||||
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", Numbers: []int{7, 7, 8}, IncludeComments: true})
|
||||
if err != nil {
|
||||
t.Fatalf("sync: %v", err)
|
||||
}
|
||||
if client.listCalled {
|
||||
t.Fatal("targeted sync should not call repository issue listing")
|
||||
}
|
||||
if got, want := client.numbers, []int{7, 8}; len(got) != len(want) || got[0] != want[0] || got[1] != want[1] {
|
||||
t.Fatalf("targeted numbers: got %#v want %#v", got, want)
|
||||
}
|
||||
if stats.ThreadsSynced != 2 || stats.IssuesSynced != 1 || stats.PullRequestsSynced != 1 {
|
||||
t.Fatalf("unexpected stats: %#v", stats)
|
||||
}
|
||||
if stats.CommentsSynced != 1 {
|
||||
t.Fatalf("comments synced: got %d want 1", stats.CommentsSynced)
|
||||
}
|
||||
|
||||
repo, err := st.RepositoryByFullName(ctx, "openclaw/gitcrawl")
|
||||
if err != nil {
|
||||
t.Fatalf("repo: %v", err)
|
||||
}
|
||||
threads, err := st.ListThreads(ctx, repo.ID, false)
|
||||
if err != nil {
|
||||
t.Fatalf("threads: %v", err)
|
||||
}
|
||||
if len(threads) != 2 {
|
||||
t.Fatalf("threads: got %d want 2", len(threads))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyncNormalizesRelativeSince(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
st, err := store.Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
|
||||
|
||||
Loading…
Reference in New Issue
Block a user