feat: refresh stale cached searches
This commit is contained in:
parent
9b5e168ce5
commit
9699301575
@ -32,6 +32,7 @@ gitcrawl neighbors owner/repo --number 123 --limit 10
|
||||
gitcrawl search owner/repo --query "download stalls"
|
||||
gitcrawl search issues "download stalls" -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30
|
||||
gitcrawl search prs "manifest cache" -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20
|
||||
gitcrawl search issues "hot loop" -R owner/repo --state open --sync-if-stale 5m --json number,title,url
|
||||
gitcrawl tui
|
||||
gitcrawl tui owner/repo
|
||||
```
|
||||
@ -40,7 +41,7 @@ gitcrawl tui owner/repo
|
||||
`gitcrawl cluster` and `gitcrawl refresh` build bounded nearest-neighbor clusters by default (`--max-cluster-size 40`, `--k 16`, `--cross-kind-threshold 0.93`) and add deterministic GitHub reference evidence for direct issue/PR links such as `#123`, `issues/123`, and `pull/123`. Weak embedding edges also need concrete title-token overlap unless their similarity is already high, which keeps generic low-confidence bridges from forming unrelated clusters.
|
||||
`gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`.
|
||||
`gitcrawl sync` fetches issues and pull requests in every GitHub state by default. Pass `--state open` or `--state closed` to limit a sync to one state.
|
||||
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions.
|
||||
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration.
|
||||
The TUI starts at `--min-size 5` so maintainer-significant active clusters are visible first; pass `--min-size 1` to include singletons. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds.
|
||||
|
||||
## Local Defaults
|
||||
|
||||
3
SPEC.md
3
SPEC.md
@ -97,9 +97,10 @@ Public commands:
|
||||
```text
|
||||
gitcrawl search issues <query> -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30
|
||||
gitcrawl search prs <query> -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20
|
||||
gitcrawl search issues <query> -R owner/repo --state open --sync-if-stale 5m --json number,title,url
|
||||
```
|
||||
|
||||
This compatibility path must read only from local SQLite. It avoids GitHub REST search quota and is not a replacement for final live `gh` verification before comments, closes, labels, or merges.
|
||||
This compatibility path reads from local SQLite by default. It avoids GitHub REST search quota and is not a replacement for final live `gh` verification before comments, closes, labels, or merges. `--sync-if-stale <duration>` may run one metadata sync first when the repository mirror is older than the requested max age; the search result itself still comes from SQLite.
|
||||
|
||||
## Config
|
||||
|
||||
|
||||
@ -2,11 +2,16 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/gitcrawl/internal/store"
|
||||
)
|
||||
@ -39,8 +44,9 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
|
||||
limitRaw := fs.String("limit", "", "maximum rows")
|
||||
limitShortRaw := fs.String("L", "", "maximum rows")
|
||||
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
|
||||
syncIfStaleRaw := fs.String("sync-if-stale", "", "sync owner/repo first when the local mirror is older than this duration")
|
||||
if err := fs.Parse(normalizeCommandArgs(args[1:], map[string]bool{
|
||||
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true,
|
||||
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "sync-if-stale": true,
|
||||
})); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
@ -62,6 +68,15 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
syncIfStale, err := parseGHSearchDuration(*syncIfStaleRaw)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if syncIfStale > 0 {
|
||||
if err := a.syncGHSearchIfStale(ctx, owner, repoName, state, syncIfStale); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
rt, err := a.openLocalRuntimeReadOnly(ctx)
|
||||
if err != nil {
|
||||
@ -110,6 +125,49 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) error {
|
||||
stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, maxAge)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !stale {
|
||||
return nil
|
||||
}
|
||||
if lastSync.IsZero() {
|
||||
fmt.Fprintf(a.Stderr, "gitcrawl: no cached sync found for %s/%s; syncing before search\n", owner, repoName)
|
||||
} else {
|
||||
fmt.Fprintf(a.Stderr, "gitcrawl: cached sync for %s/%s is older than %s; syncing before search\n", owner, repoName, maxAge)
|
||||
}
|
||||
_, err = a.syncRepository(ctx, owner, repoName, syncOptions{State: state})
|
||||
return err
|
||||
}
|
||||
|
||||
func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, maxAge time.Duration) (bool, time.Time, error) {
|
||||
rt, err := a.openLocalRuntimeReadOnly(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return true, time.Time{}, nil
|
||||
}
|
||||
return false, time.Time{}, err
|
||||
}
|
||||
defer rt.Store.Close()
|
||||
repo, err := rt.repository(ctx, owner, repoName)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return true, time.Time{}, nil
|
||||
}
|
||||
return false, time.Time{}, err
|
||||
}
|
||||
lastSync, err := rt.Store.LastSuccessfulSyncAt(ctx, repo.ID)
|
||||
if err != nil {
|
||||
return false, time.Time{}, err
|
||||
}
|
||||
if lastSync.IsZero() {
|
||||
return true, time.Time{}, nil
|
||||
}
|
||||
return time.Since(lastSync) > maxAge, lastSync, nil
|
||||
}
|
||||
|
||||
func parseGHSearchQuery(value string) (query string, repo string, state string) {
|
||||
var queryParts []string
|
||||
for _, part := range strings.Fields(value) {
|
||||
@ -141,6 +199,24 @@ func validateGHSearchState(state string) error {
|
||||
}
|
||||
}
|
||||
|
||||
func parseGHSearchDuration(value string) (time.Duration, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return 0, nil
|
||||
}
|
||||
if seconds, err := strconv.Atoi(value); err == nil {
|
||||
if seconds <= 0 {
|
||||
return 0, fmt.Errorf("expected positive duration, got %q", value)
|
||||
}
|
||||
return time.Duration(seconds) * time.Second, nil
|
||||
}
|
||||
duration, err := time.ParseDuration(value)
|
||||
if err != nil || duration <= 0 {
|
||||
return 0, fmt.Errorf("expected positive duration, got %q", value)
|
||||
}
|
||||
return duration, nil
|
||||
}
|
||||
|
||||
func parseGHSearchLimit(longRaw, shortRaw string) (int, error) {
|
||||
if strings.TrimSpace(longRaw) != "" && strings.TrimSpace(shortRaw) != "" && strings.TrimSpace(longRaw) != strings.TrimSpace(shortRaw) {
|
||||
return 0, fmt.Errorf("--limit and -L disagree")
|
||||
|
||||
115
internal/cli/gh_search_test.go
Normal file
115
internal/cli/gh_search_test.go
Normal file
@ -0,0 +1,115 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/gitcrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestParseGHSearchDuration(t *testing.T) {
|
||||
tests := []struct {
|
||||
value string
|
||||
want time.Duration
|
||||
}{
|
||||
{value: "", want: 0},
|
||||
{value: "60", want: time.Minute},
|
||||
{value: "2m", want: 2 * time.Minute},
|
||||
{value: "1h30m", want: 90 * time.Minute},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got, err := parseGHSearchDuration(tt.value)
|
||||
if err != nil {
|
||||
t.Fatalf("parseGHSearchDuration(%q): %v", tt.value, err)
|
||||
}
|
||||
if got != tt.want {
|
||||
t.Fatalf("parseGHSearchDuration(%q) = %s, want %s", tt.value, got, tt.want)
|
||||
}
|
||||
}
|
||||
if _, err := parseGHSearchDuration("-1s"); err == nil {
|
||||
t.Fatal("expected negative duration to fail")
|
||||
}
|
||||
if _, err := parseGHSearchDuration("nope"); err == nil {
|
||||
t.Fatal("expected invalid duration to fail")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
configPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "gitcrawl.db")
|
||||
app := New()
|
||||
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
|
||||
st, err := store.Open(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open store: %v", err)
|
||||
}
|
||||
repoID, err := st.UpsertRepository(ctx, store.Repository{
|
||||
Owner: "openclaw",
|
||||
Name: "openclaw",
|
||||
FullName: "openclaw/openclaw",
|
||||
RawJSON: "{}",
|
||||
UpdatedAt: time.Now().UTC().Format(time.RFC3339Nano),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("repo: %v", err)
|
||||
}
|
||||
finishedAt := time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339Nano)
|
||||
if _, err := st.RecordRun(ctx, store.RunRecord{
|
||||
RepoID: repoID,
|
||||
Kind: "sync",
|
||||
Scope: "open",
|
||||
Status: "success",
|
||||
StartedAt: finishedAt,
|
||||
FinishedAt: finishedAt,
|
||||
}); err != nil {
|
||||
t.Fatalf("record sync: %v", err)
|
||||
}
|
||||
if err := st.Close(); err != nil {
|
||||
t.Fatalf("close store: %v", err)
|
||||
}
|
||||
|
||||
run := New()
|
||||
run.configPath = configPath
|
||||
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 2*time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("freshness check: %v", err)
|
||||
}
|
||||
if stale || lastSync.IsZero() {
|
||||
t.Fatalf("expected cache to be fresh, stale=%v lastSync=%s", stale, lastSync)
|
||||
}
|
||||
stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 30*time.Minute)
|
||||
if err != nil {
|
||||
t.Fatalf("stale freshness check: %v", err)
|
||||
}
|
||||
if !stale {
|
||||
t.Fatal("expected cache to be stale")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGHSearchCacheStaleWhenRepoMissing(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
configPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "gitcrawl.db")
|
||||
app := New()
|
||||
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
|
||||
run := New()
|
||||
run.configPath = configPath
|
||||
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", time.Minute)
|
||||
if err != nil {
|
||||
t.Fatalf("freshness check: %v", err)
|
||||
}
|
||||
if !stale || !lastSync.IsZero() {
|
||||
t.Fatalf("expected missing repo to be stale, stale=%v lastSync=%s", stale, lastSync)
|
||||
}
|
||||
}
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RunRecord struct {
|
||||
@ -74,6 +75,25 @@ func (s *Store) ListRuns(ctx context.Context, repoID int64, kind string, limit i
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (s *Store) LastSuccessfulSyncAt(ctx context.Context, repoID int64) (time.Time, error) {
|
||||
var lastSync string
|
||||
if err := s.q().QueryRowContext(ctx, `
|
||||
select coalesce(max(finished_at), '')
|
||||
from sync_runs
|
||||
where repo_id = ? and status in ('success', 'completed')
|
||||
`, repoID).Scan(&lastSync); err != nil {
|
||||
return time.Time{}, fmt.Errorf("read last successful sync: %w", err)
|
||||
}
|
||||
if lastSync == "" {
|
||||
return time.Time{}, nil
|
||||
}
|
||||
parsed, err := time.Parse(time.RFC3339Nano, lastSync)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("parse last successful sync %q: %w", lastSync, err)
|
||||
}
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func runTable(kind string) (string, error) {
|
||||
switch kind {
|
||||
case "sync":
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRecordAndListRuns(t *testing.T) {
|
||||
@ -73,3 +74,40 @@ func TestStatusAcceptsCompletedSyncRuns(t *testing.T) {
|
||||
t.Fatalf("expected last sync time, got %#v", status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLastSuccessfulSyncAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open store: %v", err)
|
||||
}
|
||||
defer st.Close()
|
||||
|
||||
repoID, err := st.UpsertRepository(ctx, Repository{
|
||||
Owner: "openclaw", Name: "gitcrawl", FullName: "openclaw/gitcrawl", RawJSON: "{}", UpdatedAt: "2026-04-26T00:00:00Z",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("repo: %v", err)
|
||||
}
|
||||
if _, err := st.RecordRun(ctx, RunRecord{
|
||||
RepoID: repoID, Kind: "sync", Scope: "open", Status: "failed",
|
||||
StartedAt: "2026-04-26T00:00:00Z", FinishedAt: "2026-04-26T00:00:30Z",
|
||||
}); err != nil {
|
||||
t.Fatalf("record failed run: %v", err)
|
||||
}
|
||||
if _, err := st.RecordRun(ctx, RunRecord{
|
||||
RepoID: repoID, Kind: "sync", Scope: "open", Status: "success",
|
||||
StartedAt: "2026-04-26T00:01:00Z", FinishedAt: "2026-04-26T00:01:30Z",
|
||||
}); err != nil {
|
||||
t.Fatalf("record success run: %v", err)
|
||||
}
|
||||
|
||||
lastSync, err := st.LastSuccessfulSyncAt(ctx, repoID)
|
||||
if err != nil {
|
||||
t.Fatalf("last sync: %v", err)
|
||||
}
|
||||
want, _ := time.Parse(time.RFC3339Nano, "2026-04-26T00:01:30Z")
|
||||
if !lastSync.Equal(want) {
|
||||
t.Fatalf("last sync = %s, want %s", lastSync, want)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user