feat: refresh stale cached searches

This commit is contained in:
Peter Steinberger 2026-04-28 21:59:15 +01:00
parent 9b5e168ce5
commit 9699301575
No known key found for this signature in database
6 changed files with 254 additions and 3 deletions

View File

@ -32,6 +32,7 @@ gitcrawl neighbors owner/repo --number 123 --limit 10
gitcrawl search owner/repo --query "download stalls"
gitcrawl search issues "download stalls" -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30
gitcrawl search prs "manifest cache" -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20
gitcrawl search issues "hot loop" -R owner/repo --state open --sync-if-stale 5m --json number,title,url
gitcrawl tui
gitcrawl tui owner/repo
```
@ -40,7 +41,7 @@ gitcrawl tui owner/repo
`gitcrawl cluster` and `gitcrawl refresh` build bounded nearest-neighbor clusters by default (`--max-cluster-size 40`, `--k 16`, `--cross-kind-threshold 0.93`) and add deterministic GitHub reference evidence for direct issue/PR links such as `#123`, `issues/123`, and `pull/123`. Weak embedding edges also need concrete title-token overlap unless their similarity is already high, which keeps generic low-confidence bridges from forming unrelated clusters.
`gitcrawl tui` infers the most recently updated local repository when `owner/repo` is omitted. `serve` is intentionally not part of `gitcrawl`.
`gitcrawl sync` fetches issues and pull requests in every GitHub state by default. Pass `--state open` or `--state closed` to limit a sync to one state.
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions.
`gitcrawl search issues|prs` accepts the common `gh search` shape (`<query> -R owner/repo --state open --json fields --limit N`) and answers from the local SQLite cache. It is intended for discovery without spending GitHub REST search quota; use `gh` for final live verification and GitHub write actions. Pass `--sync-if-stale 5m` to perform one metadata sync before the cached search when the local repository mirror is older than that duration.
The TUI starts at `--min-size 5` so maintainer-significant active clusters are visible first; pass `--min-size 1` to include singletons. Mouse support is built in: click rows, wheel panes, and right-click for copy, sort, filter, jump, link, neighbor, local close/reopen, and member triage actions. Press `a` to open the same action menu from the keyboard, `#` to jump directly to an issue or PR number, `p` to switch between repositories already present in the local store, or `n` to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds.
## Local Defaults

View File

@ -97,9 +97,10 @@ Public commands:
```text
gitcrawl search issues <query> -R owner/repo --state open --json number,title,state,url,updatedAt,labels --limit 30
gitcrawl search prs <query> -R owner/repo --state open --json number,title,state,url,updatedAt,isDraft,author --limit 20
gitcrawl search issues <query> -R owner/repo --state open --sync-if-stale 5m --json number,title,url
```
This compatibility path must read only from local SQLite. It avoids GitHub REST search quota and is not a replacement for final live `gh` verification before comments, closes, labels, or merges.
This compatibility path reads from local SQLite by default. It avoids GitHub REST search quota and is not a replacement for final live `gh` verification before comments, closes, labels, or merges. `--sync-if-stale <duration>` may run one metadata sync first when the repository mirror is older than the requested max age; the search result itself still comes from SQLite.
## Config

View File

@ -2,11 +2,16 @@ package cli
import (
"context"
"database/sql"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"os"
"strconv"
"strings"
"time"
"github.com/openclaw/gitcrawl/internal/store"
)
@ -39,8 +44,9 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
limitRaw := fs.String("limit", "", "maximum rows")
limitShortRaw := fs.String("L", "", "maximum rows")
jsonFieldsRaw := fs.String("json", "", "comma-separated JSON fields")
syncIfStaleRaw := fs.String("sync-if-stale", "", "sync owner/repo first when the local mirror is older than this duration")
if err := fs.Parse(normalizeCommandArgs(args[1:], map[string]bool{
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true,
"R": true, "repo": true, "state": true, "limit": true, "L": true, "json": true, "sync-if-stale": true,
})); err != nil {
return usageErr(err)
}
@ -62,6 +68,15 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
if err != nil {
return usageErr(err)
}
syncIfStale, err := parseGHSearchDuration(*syncIfStaleRaw)
if err != nil {
return usageErr(err)
}
if syncIfStale > 0 {
if err := a.syncGHSearchIfStale(ctx, owner, repoName, state, syncIfStale); err != nil {
return err
}
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
@ -110,6 +125,49 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
return nil
}
func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) error {
stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, maxAge)
if err != nil {
return err
}
if !stale {
return nil
}
if lastSync.IsZero() {
fmt.Fprintf(a.Stderr, "gitcrawl: no cached sync found for %s/%s; syncing before search\n", owner, repoName)
} else {
fmt.Fprintf(a.Stderr, "gitcrawl: cached sync for %s/%s is older than %s; syncing before search\n", owner, repoName, maxAge)
}
_, err = a.syncRepository(ctx, owner, repoName, syncOptions{State: state})
return err
}
func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, maxAge time.Duration) (bool, time.Time, error) {
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return true, time.Time{}, nil
}
return false, time.Time{}, err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return true, time.Time{}, nil
}
return false, time.Time{}, err
}
lastSync, err := rt.Store.LastSuccessfulSyncAt(ctx, repo.ID)
if err != nil {
return false, time.Time{}, err
}
if lastSync.IsZero() {
return true, time.Time{}, nil
}
return time.Since(lastSync) > maxAge, lastSync, nil
}
func parseGHSearchQuery(value string) (query string, repo string, state string) {
var queryParts []string
for _, part := range strings.Fields(value) {
@ -141,6 +199,24 @@ func validateGHSearchState(state string) error {
}
}
func parseGHSearchDuration(value string) (time.Duration, error) {
value = strings.TrimSpace(value)
if value == "" {
return 0, nil
}
if seconds, err := strconv.Atoi(value); err == nil {
if seconds <= 0 {
return 0, fmt.Errorf("expected positive duration, got %q", value)
}
return time.Duration(seconds) * time.Second, nil
}
duration, err := time.ParseDuration(value)
if err != nil || duration <= 0 {
return 0, fmt.Errorf("expected positive duration, got %q", value)
}
return duration, nil
}
func parseGHSearchLimit(longRaw, shortRaw string) (int, error) {
if strings.TrimSpace(longRaw) != "" && strings.TrimSpace(shortRaw) != "" && strings.TrimSpace(longRaw) != strings.TrimSpace(shortRaw) {
return 0, fmt.Errorf("--limit and -L disagree")

View File

@ -0,0 +1,115 @@
package cli
import (
"context"
"path/filepath"
"testing"
"time"
"github.com/openclaw/gitcrawl/internal/store"
)
func TestParseGHSearchDuration(t *testing.T) {
tests := []struct {
value string
want time.Duration
}{
{value: "", want: 0},
{value: "60", want: time.Minute},
{value: "2m", want: 2 * time.Minute},
{value: "1h30m", want: 90 * time.Minute},
}
for _, tt := range tests {
got, err := parseGHSearchDuration(tt.value)
if err != nil {
t.Fatalf("parseGHSearchDuration(%q): %v", tt.value, err)
}
if got != tt.want {
t.Fatalf("parseGHSearchDuration(%q) = %s, want %s", tt.value, got, tt.want)
}
}
if _, err := parseGHSearchDuration("-1s"); err == nil {
t.Fatal("expected negative duration to fail")
}
if _, err := parseGHSearchDuration("nope"); err == nil {
t.Fatal("expected invalid duration to fail")
}
}
func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
repoID, err := st.UpsertRepository(ctx, store.Repository{
Owner: "openclaw",
Name: "openclaw",
FullName: "openclaw/openclaw",
RawJSON: "{}",
UpdatedAt: time.Now().UTC().Format(time.RFC3339Nano),
})
if err != nil {
t.Fatalf("repo: %v", err)
}
finishedAt := time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339Nano)
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
Scope: "open",
Status: "success",
StartedAt: finishedAt,
FinishedAt: finishedAt,
}); err != nil {
t.Fatalf("record sync: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
run := New()
run.configPath = configPath
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 2*time.Hour)
if err != nil {
t.Fatalf("freshness check: %v", err)
}
if stale || lastSync.IsZero() {
t.Fatalf("expected cache to be fresh, stale=%v lastSync=%s", stale, lastSync)
}
stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 30*time.Minute)
if err != nil {
t.Fatalf("stale freshness check: %v", err)
}
if !stale {
t.Fatal("expected cache to be stale")
}
}
func TestGHSearchCacheStaleWhenRepoMissing(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
run := New()
run.configPath = configPath
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", time.Minute)
if err != nil {
t.Fatalf("freshness check: %v", err)
}
if !stale || !lastSync.IsZero() {
t.Fatalf("expected missing repo to be stale, stale=%v lastSync=%s", stale, lastSync)
}
}

View File

@ -4,6 +4,7 @@ import (
"context"
"database/sql"
"fmt"
"time"
)
type RunRecord struct {
@ -74,6 +75,25 @@ func (s *Store) ListRuns(ctx context.Context, repoID int64, kind string, limit i
return out, nil
}
func (s *Store) LastSuccessfulSyncAt(ctx context.Context, repoID int64) (time.Time, error) {
var lastSync string
if err := s.q().QueryRowContext(ctx, `
select coalesce(max(finished_at), '')
from sync_runs
where repo_id = ? and status in ('success', 'completed')
`, repoID).Scan(&lastSync); err != nil {
return time.Time{}, fmt.Errorf("read last successful sync: %w", err)
}
if lastSync == "" {
return time.Time{}, nil
}
parsed, err := time.Parse(time.RFC3339Nano, lastSync)
if err != nil {
return time.Time{}, fmt.Errorf("parse last successful sync %q: %w", lastSync, err)
}
return parsed, nil
}
func runTable(kind string) (string, error) {
switch kind {
case "sync":

View File

@ -4,6 +4,7 @@ import (
"context"
"path/filepath"
"testing"
"time"
)
func TestRecordAndListRuns(t *testing.T) {
@ -73,3 +74,40 @@ func TestStatusAcceptsCompletedSyncRuns(t *testing.T) {
t.Fatalf("expected last sync time, got %#v", status)
}
}
func TestLastSuccessfulSyncAt(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
repoID, err := st.UpsertRepository(ctx, Repository{
Owner: "openclaw", Name: "gitcrawl", FullName: "openclaw/gitcrawl", RawJSON: "{}", UpdatedAt: "2026-04-26T00:00:00Z",
})
if err != nil {
t.Fatalf("repo: %v", err)
}
if _, err := st.RecordRun(ctx, RunRecord{
RepoID: repoID, Kind: "sync", Scope: "open", Status: "failed",
StartedAt: "2026-04-26T00:00:00Z", FinishedAt: "2026-04-26T00:00:30Z",
}); err != nil {
t.Fatalf("record failed run: %v", err)
}
if _, err := st.RecordRun(ctx, RunRecord{
RepoID: repoID, Kind: "sync", Scope: "open", Status: "success",
StartedAt: "2026-04-26T00:01:00Z", FinishedAt: "2026-04-26T00:01:30Z",
}); err != nil {
t.Fatalf("record success run: %v", err)
}
lastSync, err := st.LastSuccessfulSyncAt(ctx, repoID)
if err != nil {
t.Fatalf("last sync: %v", err)
}
want, _ := time.Parse(time.RFC3339Nano, "2026-04-26T00:01:30Z")
if !lastSync.Equal(want) {
t.Fatalf("last sync = %s, want %s", lastSync, want)
}
}