From 7671a6b999f8b94dc2539f0fb893a7e2bc0a45f0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 8 May 2026 09:50:17 +0100 Subject: [PATCH] fix: harden gitcrawl command surface --- internal/cli/app.go | 144 ++++++++++++++++- internal/cli/app_test.go | 41 ++++- internal/cli/gh_path.go | 67 ++++++++ internal/cli/gh_search.go | 29 +++- internal/cli/gh_search_test.go | 18 ++- internal/cli/gh_shim.go | 58 +++++-- internal/cli/gh_shim_cache.go | 16 +- internal/cli/gh_shim_policy_extra_test.go | 12 ++ internal/cli/gh_shim_test.go | 186 ++++++++++++++++++++++ internal/cli/github_token.go | 2 +- internal/cli/runtime.go | 17 +- internal/cli/runtime_extra_test.go | 16 ++ internal/store/runs.go | 45 ++++++ internal/store/runs_test.go | 39 +++++ 14 files changed, 652 insertions(+), 38 deletions(-) create mode 100644 internal/cli/gh_path.go diff --git a/internal/cli/app.go b/internal/cli/app.go index e2ff4a9..a20f31e 100644 --- a/internal/cli/app.go +++ b/internal/cli/app.go @@ -1305,7 +1305,8 @@ func (a *App) runClusterDetail(ctx context.Context, args []string) error { clusterIDRaw := fs.String("id", "", "cluster id") memberLimitRaw := fs.String("member-limit", "", "maximum member rows") bodyCharsRaw := fs.String("body-chars", "", "maximum body snippet characters") - includeClosed := fs.Bool("include-closed", false, "include closed clusters and members") + includeClosed := fs.Bool("include-closed", false, "deprecated; closed cluster members are shown by default") + hideClosed := fs.Bool("hide-closed", false, "hide locally closed members") jsonOut := fs.Bool("json", false, "write JSON output") if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "member-limit": true, "body-chars": true})); err != nil { return usageErr(err) @@ -1346,7 +1347,7 @@ func (a *App) runClusterDetail(ctx context.Context, args []string) error { detail, err := rt.Store.ClusterDetail(ctx, store.ClusterDetailOptions{ RepoID: repo.ID, ClusterID: int64(clusterID), - IncludeClosed: *includeClosed, + IncludeClosed: *includeClosed || !*hideClosed, MemberLimit: memberLimit, BodyChars: bodyChars, }) @@ -1849,14 +1850,14 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy if err := config.EnsureRuntimeDirs(cfg); err != nil { return syncer.Stats{}, err } - st, err := store.Open(ctx, cfg.DBPath) + rt, err := a.openLocalRuntime(ctx) if err != nil { return syncer.Stats{}, err } - defer st.Close() + defer rt.Store.Close() client := gh.New(gh.Options{Token: token.Value, BaseURL: githubBaseURL()}) - service := syncer.New(client, st) + service := syncer.New(client, rt.Store) stats, err := service.Sync(ctx, syncer.Options{ Owner: owner, Repo: repo, @@ -2951,7 +2952,14 @@ func (a *App) printUsage() { } func (a *App) printCommandUsage(command string) error { + if text, ok := commandUsageTexts[command]; ok { + fmt.Fprint(a.Stdout, text) + return nil + } switch command { + case "cluster-explain": + fmt.Fprint(a.Stdout, commandUsageTexts["cluster-detail"]) + return nil case "portable": fmt.Fprint(a.Stdout, portableUsageText) return nil @@ -2982,6 +2990,7 @@ Core commands: doctor check config, token, and database readiness sync sync GitHub issue and pull request metadata refresh run sync, enrichment, embedding, and clustering pipeline + embed generate OpenAI embeddings for local thread documents threads list local issue and pull request rows cluster build durable clusters from local thread vectors close-thread locally hide one issue or pull request row @@ -3007,6 +3016,131 @@ Core commands: No API server is provided. There is intentionally no serve command. ` +var commandUsageTexts = map[string]string{ + "metadata": `gitcrawl metadata prints crawlkit control metadata. + +Usage: + gitcrawl metadata [--json] +`, + "status": `gitcrawl status prints fast read-only archive status. + +Usage: + gitcrawl status [--json] +`, + "init": `gitcrawl init creates a local config and SQLite database. + +Usage: + gitcrawl init [--db path] [--portable-store URL] [--json] +`, + "configure": `gitcrawl configure updates model fields in the config. + +Usage: + gitcrawl configure [--summary-model name] [--embed-model name] [--embedding-basis title_original] [--json] +`, + "doctor": `gitcrawl doctor checks config, token, and database readiness. + +Usage: + gitcrawl doctor [--json] +`, + "sync": `gitcrawl sync mirrors GitHub issue and pull request metadata. + +Usage: + gitcrawl sync owner/repo [--state open|closed|all] [--numbers refs] [--with pr-details] [--include-pr-details] [--json] +`, + "refresh": `gitcrawl refresh runs sync, enrichment, embedding, and clustering. + +Usage: + gitcrawl refresh owner/repo [--state open|closed|all] [--sync-if-stale duration] [--no-sync] [--no-embed] [--no-cluster] [--json] +`, + "embed": `gitcrawl embed generates OpenAI embeddings for local thread documents. + +Usage: + gitcrawl embed owner/repo [--number ref] [--limit N] [--force] [--include-closed] [--json] +`, + "threads": `gitcrawl threads lists local issue and pull request rows. + +Usage: + gitcrawl threads owner/repo [--include-closed] [--numbers refs] [--limit N] [--json] +`, + "search": `gitcrawl search queries local thread documents, or accepts gh-shaped issue and PR search. + +Usage: + gitcrawl search owner/repo --query text [--mode keyword|semantic] [--limit N] [--json] + gitcrawl search issues|prs -R owner/repo [--state open|closed|all] [--json fields] [--limit N] +`, + "cluster": `gitcrawl cluster builds durable clusters from local thread vectors. + +Usage: + gitcrawl cluster owner/repo [--threshold N] [--min-size N] [--max-cluster-size N] [--k N] [--cross-kind-threshold N] [--limit N] [--model name] [--basis semantic|references|hybrid] [--include-closed] [--json] +`, + "clusters": `gitcrawl clusters lists latest display clusters with durable fallback. + +Usage: + gitcrawl clusters owner/repo [--sort size|recent|oldest] [--min-size N] [--limit N] [--hide-closed] [--json] +`, + "durable-clusters": `gitcrawl durable-clusters lists governed durable cluster groups. + +Usage: + gitcrawl durable-clusters owner/repo [--include-closed] [--sort size|recent|oldest] [--min-size N] [--limit N] [--json] +`, + "cluster-detail": `gitcrawl cluster-detail dumps one cluster and its member rows. + +Usage: + gitcrawl cluster-detail owner/repo --id N [--member-limit N] [--body-chars N] [--hide-closed] [--json] +`, + "neighbors": `gitcrawl neighbors lists vector-nearest local issue and pull request rows. + +Usage: + gitcrawl neighbors owner/repo --number ref [--limit N] [--json] +`, + "runs": `gitcrawl runs lists local pipeline run history. + +Usage: + gitcrawl runs owner/repo [--kind sync|summary|embedding|cluster] [--limit N] [--json] +`, + "close-thread": `gitcrawl close-thread locally hides one issue or pull request row. + +Usage: + gitcrawl close-thread owner/repo --number ref [--reason text] [--json] +`, + "reopen-thread": `gitcrawl reopen-thread clears a local thread hide. + +Usage: + gitcrawl reopen-thread owner/repo --number ref [--json] +`, + "close-cluster": `gitcrawl close-cluster locally hides one durable cluster. + +Usage: + gitcrawl close-cluster owner/repo --id N [--reason text] [--json] +`, + "reopen-cluster": `gitcrawl reopen-cluster clears a local cluster hide. + +Usage: + gitcrawl reopen-cluster owner/repo --id N [--json] +`, + "exclude-cluster-member": `gitcrawl exclude-cluster-member locally removes one row from a durable cluster. + +Usage: + gitcrawl exclude-cluster-member owner/repo --id N --number ref [--reason text] [--json] +`, + "include-cluster-member": `gitcrawl include-cluster-member restores one row to a durable cluster. + +Usage: + gitcrawl include-cluster-member owner/repo --id N --number ref [--json] +`, + "set-cluster-canonical": `gitcrawl set-cluster-canonical sets the canonical row for a durable cluster. + +Usage: + gitcrawl set-cluster-canonical owner/repo --id N --number ref [--reason text] [--json] +`, + "gh": `gitcrawl gh runs a gh-compatible local cache shim with fallback to real gh. + +Usage: + gitcrawl gh + gitcrawl gh xcache stats|keys|gc|flush|reset|snapshot [--json] +`, +} + const tuiUsageText = `gitcrawl tui opens the local terminal cluster browser. Usage: diff --git a/internal/cli/app_test.go b/internal/cli/app_test.go index 84cc476..f20d109 100644 --- a/internal/cli/app_test.go +++ b/internal/cli/app_test.go @@ -153,6 +153,15 @@ func TestMetadataStatusAndControlStatusJSON(t *testing.T) { if !strings.Contains(helpOut.String(), "cluster browser") { t.Fatalf("tui help output = %q", helpOut.String()) } + for _, topic := range []string{"metadata", "status", "init", "configure", "doctor", "sync", "refresh", "embed", "threads", "search", "cluster", "clusters", "durable-clusters", "cluster-detail", "cluster-explain", "neighbors", "runs", "close-thread", "reopen-thread", "close-cluster", "reopen-cluster", "exclude-cluster-member", "include-cluster-member", "set-cluster-canonical", "gh"} { + helpOut.Reset() + if err := help.printCommandUsage(topic); err != nil { + t.Fatalf("%s help: %v", topic, err) + } + if !strings.Contains(helpOut.String(), "Usage:") { + t.Fatalf("%s help output = %q", topic, helpOut.String()) + } + } if err := New().Run(ctx, []string{"--config", configPath, "status", "extra"}); err == nil { t.Fatal("status extra arg should fail") } @@ -980,7 +989,7 @@ func TestGlobalCommandBranches(t *testing.T) { }{ {args: []string{"--help"}, wantOut: "Usage:"}, {args: []string{"help"}, wantOut: "Usage:"}, - {args: []string{"help", "sync"}, wantErr: true, exitCode: 2}, + {args: []string{"help", "sync"}, wantOut: "gitcrawl sync"}, {args: []string{"--version"}, wantOut: "dev"}, {args: []string{"version"}, wantOut: "dev"}, {args: []string{"--json", "version"}, wantOut: `"version"`}, @@ -2326,6 +2335,36 @@ func TestClustersDefaultShowsActivePrimaryMembers(t *testing.T) { if len(all.Clusters) != 1 || all.Clusters[0].MemberCount != 1 { t.Fatalf("hide-closed should focus active members, got %#v", all.Clusters) } + + stdout.Reset() + detail := New() + detail.Stdout = &stdout + if err := detail.Run(ctx, []string{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", "90"}); err != nil { + t.Fatalf("cluster-detail: %v", err) + } + var detailPayload struct { + Members []store.ClusterMemberDetail `json:"members"` + } + if err := json.Unmarshal(stdout.Bytes(), &detailPayload); err != nil { + t.Fatalf("decode cluster detail: %v\n%s", err, stdout.String()) + } + if len(detailPayload.Members) != 2 { + t.Fatalf("default cluster-detail should match visible cluster members, got %#v", detailPayload.Members) + } + + stdout.Reset() + hideDetail := New() + hideDetail.Stdout = &stdout + if err := hideDetail.Run(ctx, []string{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", "90", "--hide-closed"}); err != nil { + t.Fatalf("cluster-detail hide closed: %v", err) + } + detailPayload.Members = nil + if err := json.Unmarshal(stdout.Bytes(), &detailPayload); err != nil { + t.Fatalf("decode hide-closed cluster detail: %v\n%s", err, stdout.String()) + } + if len(detailPayload.Members) != 1 || detailPayload.Members[0].Thread.Number != 90 { + t.Fatalf("hide-closed cluster-detail should focus open members, got %#v", detailPayload.Members) + } } func TestClusterMemberOverrideCommands(t *testing.T) { diff --git a/internal/cli/gh_path.go b/internal/cli/gh_path.go new file mode 100644 index 0000000..15a37bc --- /dev/null +++ b/internal/cli/gh_path.go @@ -0,0 +1,67 @@ +package cli + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +func resolveRealGHPath() (string, error) { + envPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH")) + candidates := []string{} + if envPath != "" { + candidates = append(candidates, envPath) + } + candidates = append(candidates, + "/opt/homebrew/opt/gh/bin/gh", + "/usr/local/opt/gh/bin/gh", + "/usr/local/bin/gh", + "/usr/bin/gh", + ) + if lookPath, err := exec.LookPath("gh"); err == nil { + candidates = append(candidates, lookPath) + } + + seen := map[string]bool{} + for _, candidate := range candidates { + candidate = strings.TrimSpace(candidate) + if candidate == "" || seen[candidate] { + continue + } + seen[candidate] = true + info, err := os.Stat(candidate) + if err != nil || info.IsDir() { + if envPath != "" && candidate == envPath { + return "", fmt.Errorf("real gh not found at GITCRAWL_GH_PATH %q", envPath) + } + continue + } + if isGitcrawlShimPath(candidate) { + if envPath != "" && candidate == envPath { + return "", fmt.Errorf("GITCRAWL_GH_PATH points to the gitcrawl shim (%s); set it to the real gh binary", envPath) + } + continue + } + return candidate, nil + } + return "", fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH") +} + +func isGitcrawlShimPath(path string) bool { + if path == "" { + return false + } + resolved := path + if eval, err := filepath.EvalSymlinks(path); err == nil { + resolved = eval + } + for _, value := range []string{path, resolved} { + base := strings.ToLower(filepath.Base(value)) + if base == "gitcrawl" || base == "gitcrawl-gh" { + return true + } + } + return false +} diff --git a/internal/cli/gh_search.go b/internal/cli/gh_search.go index 358eb55..54b4ce4 100644 --- a/internal/cli/gh_search.go +++ b/internal/cli/gh_search.go @@ -104,6 +104,15 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error { if err != nil { return err } + if len(threads) == 0 && ghSearchNeedsLiveEmptyCheck(kind, query, state) { + lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state) + if err != nil { + return err + } + if lastSync.IsZero() { + return localGHUnsupported(fmt.Errorf("empty local %s search has no broad %s sync", args[0], ghDefaultListState(state))) + } + } jsonFields := strings.TrimSpace(*jsonFieldsRaw) if jsonFields != "" || a.format == FormatJSON { @@ -126,7 +135,7 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error { } func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) error { - stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, maxAge) + stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, state, maxAge) if err != nil { return err } @@ -142,7 +151,7 @@ func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state st return err } -func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, maxAge time.Duration) (bool, time.Time, error) { +func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) (bool, time.Time, error) { rt, err := a.openLocalRuntimeReadOnly(ctx) if err != nil { if errors.Is(err, os.ErrNotExist) { @@ -158,7 +167,7 @@ func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, ma } return false, time.Time{}, err } - lastSync, err := rt.Store.LastSuccessfulSyncAt(ctx, repo.ID) + lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state) if err != nil { return false, time.Time{}, err } @@ -168,6 +177,20 @@ func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, ma return time.Since(lastSync) > maxAge, lastSync, nil } +func ghSearchNeedsLiveEmptyCheck(kind, query, state string) bool { + if strings.TrimSpace(query) != "" || kind != "issue" { + return false + } + return ghDefaultListState(state) == "open" +} + +func ghDefaultListState(state string) string { + if strings.TrimSpace(state) == "" { + return "open" + } + return strings.TrimSpace(state) +} + func parseGHSearchQuery(value string) (query string, repo string, state string) { var queryParts []string for _, part := range strings.Fields(value) { diff --git a/internal/cli/gh_search_test.go b/internal/cli/gh_search_test.go index 24b0d23..09d555b 100644 --- a/internal/cli/gh_search_test.go +++ b/internal/cli/gh_search_test.go @@ -66,6 +66,16 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) { t.Fatalf("repo: %v", err) } finishedAt := time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339Nano) + if _, err := st.RecordRun(ctx, store.RunRecord{ + RepoID: repoID, + Kind: "sync", + Scope: "numbers:13", + Status: "success", + StartedAt: time.Now().UTC().Format(time.RFC3339Nano), + FinishedAt: time.Now().UTC().Format(time.RFC3339Nano), + }); err != nil { + t.Fatalf("record targeted sync: %v", err) + } if _, err := st.RecordRun(ctx, store.RunRecord{ RepoID: repoID, Kind: "sync", @@ -74,7 +84,7 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) { StartedAt: finishedAt, FinishedAt: finishedAt, }); err != nil { - t.Fatalf("record sync: %v", err) + t.Fatalf("record broad sync: %v", err) } if err := st.Close(); err != nil { t.Fatalf("close store: %v", err) @@ -82,14 +92,14 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) { run := New() run.configPath = configPath - stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 2*time.Hour) + stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", "open", 2*time.Hour) if err != nil { t.Fatalf("freshness check: %v", err) } if stale || lastSync.IsZero() { t.Fatalf("expected cache to be fresh, stale=%v lastSync=%s", stale, lastSync) } - stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 30*time.Minute) + stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", "open", 30*time.Minute) if err != nil { t.Fatalf("stale freshness check: %v", err) } @@ -110,7 +120,7 @@ func TestGHSearchCacheStaleWhenRepoMissing(t *testing.T) { run := New() run.configPath = configPath - stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", time.Minute) + stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", "open", time.Minute) if err != nil { t.Fatalf("freshness check: %v", err) } diff --git a/internal/cli/gh_shim.go b/internal/cli/gh_shim.go index 85ddf38..be378b3 100644 --- a/internal/cli/gh_shim.go +++ b/internal/cli/gh_shim.go @@ -206,6 +206,22 @@ func (a *App) runGHThreadList(ctx context.Context, resource string, args []strin if err != nil { return err } + if len(threads) == 0 && ghThreadListNeedsLiveEmptyCheck(ghThreadListRequest{ + Kind: ghResourceKind(resource), + State: strings.TrimSpace(*stateRaw), + Query: strings.TrimSpace(*searchRaw), + Author: strings.TrimSpace(*authorRaw), + Assignee: strings.TrimSpace(*assigneeRaw), + Labels: labels.Values(), + }) { + fresh, err := a.localGHThreadListHasBroadSync(ctx, repoValue, strings.TrimSpace(*stateRaw)) + if err != nil { + return err + } + if !fresh { + return localGHUnsupported(fmt.Errorf("empty local %s list has no broad %s sync", resource, ghDefaultListState(*stateRaw))) + } + } jsonFields := strings.TrimSpace(*jsonFieldsRaw) if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON { if jsonFields == "" { @@ -293,6 +309,34 @@ func (a *App) localGHThreads(ctx context.Context, req ghThreadListRequest) ([]st }) } +func ghThreadListNeedsLiveEmptyCheck(req ghThreadListRequest) bool { + if req.Kind != "issue" || strings.TrimSpace(req.Query) != "" || strings.TrimSpace(req.Author) != "" || strings.TrimSpace(req.Assignee) != "" || len(req.Labels) > 0 { + return false + } + return ghDefaultListState(req.State) == "open" +} + +func (a *App) localGHThreadListHasBroadSync(ctx context.Context, repoValue, state string) (bool, error) { + owner, repoName, err := parseOwnerRepo(repoValue) + if err != nil { + return false, err + } + rt, err := a.openLocalRuntimeReadOnly(ctx) + if err != nil { + return false, localGHUnsupported(err) + } + defer rt.Store.Close() + repo, err := rt.repository(ctx, owner, repoName) + if err != nil { + return false, localGHUnsupported(err) + } + lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state) + if err != nil { + return false, err + } + return !lastSync.IsZero(), nil +} + func (a *App) resolveGHRepo(ctx context.Context, explicit string) (string, error) { if strings.TrimSpace(explicit) != "" { return strings.TrimSpace(explicit), nil @@ -313,17 +357,9 @@ func (a *App) resolveGHRepo(ctx context.Context, explicit string) (string, error } func (a *App) execRealGH(ctx context.Context, args []string) error { - ghPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH")) - if ghPath == "" { - if _, err := os.Stat("/opt/homebrew/opt/gh/bin/gh"); err == nil { - ghPath = "/opt/homebrew/opt/gh/bin/gh" - } else { - var err error - ghPath, err = exec.LookPath("gh") - if err != nil { - return fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH") - } - } + ghPath, err := resolveRealGHPath() + if err != nil { + return err } cmd := exec.CommandContext(ctx, ghPath, args...) cmd.Stdin = os.Stdin diff --git a/internal/cli/gh_shim_cache.go b/internal/cli/gh_shim_cache.go index 7142236..059f8de 100644 --- a/internal/cli/gh_shim_cache.go +++ b/internal/cli/gh_shim_cache.go @@ -85,24 +85,16 @@ func cacheGHReadErrors() bool { } func (a *App) captureRealGH(ctx context.Context, args []string) (string, string, int, error) { - ghPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH")) - if ghPath == "" { - if _, err := os.Stat("/opt/homebrew/opt/gh/bin/gh"); err == nil { - ghPath = "/opt/homebrew/opt/gh/bin/gh" - } else { - var err error - ghPath, err = exec.LookPath("gh") - if err != nil { - return "", "", 127, fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH") - } - } + ghPath, err := resolveRealGHPath() + if err != nil { + return "", "", 127, err } var stdout, stderr bytes.Buffer cmd := exec.CommandContext(ctx, ghPath, args...) cmd.Stdin = os.Stdin cmd.Stdout = &stdout cmd.Stderr = &stderr - err := cmd.Run() + err = cmd.Run() exitCode := 0 if err != nil { exitCode = 1 diff --git a/internal/cli/gh_shim_policy_extra_test.go b/internal/cli/gh_shim_policy_extra_test.go index 1cc6c91..db597ce 100644 --- a/internal/cli/gh_shim_policy_extra_test.go +++ b/internal/cli/gh_shim_policy_extra_test.go @@ -228,6 +228,18 @@ func TestGHShimCachePolicyExtraBranches(t *testing.T) { if strings.TrimSpace(ghOut.String()) != "real-gh:" { t.Fatalf("empty gh shim output = %q", ghOut.String()) } + shimPath := filepath.Join(t.TempDir(), "gitcrawl-gh") + if err := os.WriteFile(shimPath, []byte("#!/bin/sh\necho shim\n"), 0o755); err != nil { + t.Fatalf("write fake shim: %v", err) + } + shimLink := filepath.Join(t.TempDir(), "gh") + if err := os.Symlink(shimPath, shimLink); err != nil { + t.Fatalf("symlink fake shim: %v", err) + } + t.Setenv("GITCRAWL_GH_PATH", shimLink) + if _, err := resolveRealGHPath(); err == nil || !strings.Contains(err.Error(), "gitcrawl shim") { + t.Fatalf("shim path should fail fast, err=%v", err) + } t.Setenv("GITCRAWL_GH_STALE_GRACE", "3m") if got := ghCommandCacheStaleGrace([]string{"api", "users/octocat"}); got != 3*time.Minute { t.Fatalf("env stale grace = %s", got) diff --git a/internal/cli/gh_shim_test.go b/internal/cli/gh_shim_test.go index 56c2203..89fda80 100644 --- a/internal/cli/gh_shim_test.go +++ b/internal/cli/gh_shim_test.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "encoding/json" + "net/http" + "net/http/httptest" "os" "path/filepath" "strings" @@ -64,6 +66,122 @@ func TestGHShimFallsBackForUnsupportedRead(t *testing.T) { } } +func TestGHShimFallsBackForEmptyOpenIssueListWithoutBroadSync(t *testing.T) { + ctx := context.Background() + configPath := seedGHShimEmptyRepo(t, ctx) + dir := t.TempDir() + ghPath := filepath.Join(dir, "gh") + if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil { + t.Fatalf("write fake gh: %v", err) + } + t.Setenv("GITCRAWL_GH_PATH", ghPath) + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number"}); err != nil { + t.Fatalf("fallback: %v", err) + } + if got := strings.TrimSpace(stdout.String()); got != "fallback:issue list -R openclaw/openclaw --state open --json number" { + t.Fatalf("fallback output = %q", got) + } +} + +func TestGHShimSearchFallsBackForEmptyOpenRepoWithoutBroadSync(t *testing.T) { + ctx := context.Background() + configPath := seedGHShimEmptyRepo(t, ctx) + dir := t.TempDir() + ghPath := filepath.Join(dir, "gh") + if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil { + t.Fatalf("write fake gh: %v", err) + } + t.Setenv("GITCRAWL_GH_PATH", ghPath) + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "gh", "search", "issues", "-R", "openclaw/openclaw", "--state", "open", "--json", "number"}); err != nil { + t.Fatalf("fallback: %v", err) + } + if got := strings.TrimSpace(stdout.String()); got != "fallback:search issues -R openclaw/openclaw --state open --json number" { + t.Fatalf("fallback output = %q", got) + } +} + +func TestGHShimAutoHydratePortableStoreWritesRuntimeMirror(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + remoteDir := filepath.Join(dir, "remote") + checkoutDir := filepath.Join(dir, "checkout") + dbRel := filepath.Join("data", "openclaw__openclaw.sync.db") + if err := os.MkdirAll(filepath.Join(remoteDir, "data"), 0o755); err != nil { + t.Fatalf("mkdir remote data: %v", err) + } + if err := runGit(ctx, remoteDir, "init", "-b", "main"); err != nil { + t.Fatalf("git init: %v", err) + } + seedPortableThread(t, filepath.Join(remoteDir, dbRel), 1, "portable issue") + if err := runGit(ctx, remoteDir, "add", dbRel); err != nil { + t.Fatalf("git add seed: %v", err) + } + if err := runGit(ctx, remoteDir, "-c", "user.email=test@example.com", "-c", "user.name=Test", "commit", "-m", "seed store"); err != nil { + t.Fatalf("git commit seed: %v", err) + } + if _, err := syncPortableStore(ctx, remoteDir, checkoutDir); err != nil { + t.Fatalf("clone portable store: %v", err) + } + + configPath := filepath.Join(dir, "config.toml") + app := New() + if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", filepath.Join(checkoutDir, dbRel)}); err != nil { + t.Fatalf("init config: %v", err) + } + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/repos/openclaw/openclaw": + _ = json.NewEncoder(w).Encode(map[string]any{"id": 101, "full_name": "openclaw/openclaw"}) + case "/repos/openclaw/openclaw/issues/2": + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": 502, + "number": 2, + "state": "open", + "title": "runtime-only issue", + "body": "hydrate into runtime mirror", + "html_url": "https://github.com/openclaw/openclaw/issues/2", + "created_at": "2026-05-08T00:00:00Z", + "updated_at": "2026-05-08T00:00:00Z", + "labels": []map[string]any{}, + "assignees": []map[string]any{}, + "user": map[string]any{"login": "alice", "type": "User"}, + }) + default: + t.Fatalf("unexpected path: %s", r.URL.String()) + } + })) + defer server.Close() + t.Setenv("GITHUB_TOKEN", "test-token") + t.Setenv("GITCRAWL_GITHUB_BASE_URL", server.URL) + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "view", "2", "-R", "openclaw/openclaw", "--json", "number,title"}); err != nil { + t.Fatalf("gh issue view: %v", err) + } + if !strings.Contains(stdout.String(), `"number": 2`) || !strings.Contains(stdout.String(), "runtime-only issue") { + t.Fatalf("view output = %q", stdout.String()) + } + if !gitWorktreeClean(ctx, checkoutDir) { + t.Fatal("auto-hydrate dirtied portable checkout") + } + assertPortableThreadPresence(t, ctx, filepath.Join(checkoutDir, dbRel), 2, false) + mirrorPath, err := run.portableRuntimeDBPath(filepath.Join(checkoutDir, dbRel)) + if err != nil { + t.Fatalf("runtime db path: %v", err) + } + assertPortableThreadPresence(t, ctx, mirrorPath, 2, true) +} + func TestGHShimViewAcceptsFullGitHubURL(t *testing.T) { ctx := context.Background() configPath := seedGHShimRepo(t, ctx) @@ -87,6 +205,74 @@ func TestGHShimViewAcceptsFullGitHubURL(t *testing.T) { } } +func seedGHShimEmptyRepo(t *testing.T, ctx context.Context) string { + t.Helper() + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + dbPath := filepath.Join(dir, "gitcrawl.db") + app := New() + if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil { + t.Fatalf("init: %v", err) + } + cfg, err := config.Load(configPath) + if err != nil { + t.Fatalf("load config: %v", err) + } + cfg.CacheDir = filepath.Join(dir, "cache") + if err := config.Save(configPath, cfg); err != nil { + t.Fatalf("save config: %v", err) + } + st, err := store.Open(ctx, dbPath) + if err != nil { + t.Fatalf("open store: %v", err) + } + repoID, err := st.UpsertRepository(ctx, store.Repository{ + Owner: "openclaw", + Name: "openclaw", + FullName: "openclaw/openclaw", + RawJSON: "{}", + UpdatedAt: "2026-05-08T00:00:00Z", + }) + if err != nil { + t.Fatalf("seed repository: %v", err) + } + if _, err := st.RecordRun(ctx, store.RunRecord{ + RepoID: repoID, + Kind: "sync", + Scope: "numbers:13", + Status: "success", + StartedAt: "2026-05-08T00:00:00Z", + FinishedAt: "2026-05-08T00:00:01Z", + }); err != nil { + t.Fatalf("record targeted sync: %v", err) + } + if err := st.Close(); err != nil { + t.Fatalf("close store: %v", err) + } + return configPath +} + +func assertPortableThreadPresence(t *testing.T, ctx context.Context, dbPath string, number int, want bool) { + t.Helper() + st, err := store.OpenReadOnly(ctx, dbPath) + if err != nil { + t.Fatalf("open store %s: %v", dbPath, err) + } + defer st.Close() + repo, err := st.RepositoryByFullName(ctx, "openclaw/openclaw") + if err != nil { + t.Fatalf("repository %s: %v", dbPath, err) + } + threads, err := st.ListThreadsFiltered(ctx, store.ThreadListOptions{RepoID: repo.ID, IncludeClosed: true, Numbers: []int{number}}) + if err != nil { + t.Fatalf("list threads %s: %v", dbPath, err) + } + got := len(threads) > 0 + if got != want { + t.Fatalf("thread %d presence in %s = %v, want %v", number, dbPath, got, want) + } +} + func seedGHShimRepo(t *testing.T, ctx context.Context) string { t.Helper() dir := t.TempDir() diff --git a/internal/cli/github_token.go b/internal/cli/github_token.go index cee1971..e181fa0 100644 --- a/internal/cli/github_token.go +++ b/internal/cli/github_token.go @@ -64,7 +64,7 @@ func candidateRealGHPaths() []string { seen := map[string]bool{} unique := paths[:0] for _, path := range paths { - if path = strings.TrimSpace(path); path != "" && !seen[path] { + if path = strings.TrimSpace(path); path != "" && !seen[path] && !isGitcrawlShimPath(path) { seen[path] = true unique = append(unique, path) } diff --git a/internal/cli/runtime.go b/internal/cli/runtime.go index 1a64365..74fa697 100644 --- a/internal/cli/runtime.go +++ b/internal/cli/runtime.go @@ -3,6 +3,7 @@ package cli import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -26,6 +27,8 @@ const portableStoreRefreshTimeout = 15 * time.Second const portableStoreRefreshTTL = 2 * time.Minute const portableStoreRefreshFailureBackoff = time.Minute +var errPortableStoreDirty = errors.New("portable store checkout has local changes") + func (a *App) openLocalRuntime(ctx context.Context) (localRuntime, error) { cfg, err := config.Load(a.configPath) if err != nil { @@ -91,7 +94,7 @@ func refreshPortableStoreForDB(ctx context.Context, dbPath string) error { return nil } if !gitWorktreeClean(ctx, root) { - return nil + return errPortableStoreDirty } pullCtx, cancel := context.WithTimeout(ctx, portableStoreRefreshTimeout) defer cancel() @@ -169,6 +172,7 @@ func refreshPortableStoreForDBIfDue(ctx context.Context, sourceDBPath, mirrorPat if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil { return err } + removeStalePortableRefreshLock(lockPath, now) lock, locked := tryGHCommandCacheLock(lockPath) if !locked { return nil @@ -196,6 +200,17 @@ func refreshPortableStoreForDBIfDue(ctx context.Context, sourceDBPath, mirrorPat return writePortableStoreRefreshState(statePath, state) } +func removeStalePortableRefreshLock(path string, now time.Time) { + info, err := os.Stat(path) + if err != nil { + return + } + if now.Sub(info.ModTime()) <= 2*portableStoreRefreshTimeout { + return + } + _ = os.Remove(path) +} + func portableStoreRefreshInterval() time.Duration { if raw := strings.TrimSpace(os.Getenv("GITCRAWL_PORTABLE_REFRESH_TTL")); raw != "" { if duration, err := time.ParseDuration(raw); err == nil && duration >= 0 { diff --git a/internal/cli/runtime_extra_test.go b/internal/cli/runtime_extra_test.go index 0bc94e9..d895a26 100644 --- a/internal/cli/runtime_extra_test.go +++ b/internal/cli/runtime_extra_test.go @@ -66,6 +66,22 @@ func TestPortableRuntimeUtilityBranches(t *testing.T) { if recentPortableRefresh("", now, time.Minute) || recentPortableRefresh("bad", now, time.Minute) || !recentPortableRefresh(now.Format(time.RFC3339Nano), now, time.Minute) { t.Fatal("recent refresh classification mismatch") } + lockPath := filepath.Join(dir, "refresh.lock") + if err := os.WriteFile(lockPath, []byte("123\n"), 0o600); err != nil { + t.Fatalf("write lock: %v", err) + } + removeStalePortableRefreshLock(lockPath, now) + if _, err := os.Stat(lockPath); err != nil { + t.Fatalf("fresh lock should remain: %v", err) + } + old := now.Add(-3 * portableStoreRefreshTimeout) + if err := os.Chtimes(lockPath, old, old); err != nil { + t.Fatalf("age lock: %v", err) + } + removeStalePortableRefreshLock(lockPath, now) + if _, err := os.Stat(lockPath); !os.IsNotExist(err) { + t.Fatalf("stale lock should be removed, err=%v", err) + } t.Setenv("GITCRAWL_PORTABLE_REFRESH_TTL", "0") if got := portableStoreRefreshInterval(); got != 0 { t.Fatalf("zero ttl = %s", got) diff --git a/internal/store/runs.go b/internal/store/runs.go index 613eef3..34685ed 100644 --- a/internal/store/runs.go +++ b/internal/store/runs.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "fmt" + "strings" "time" ) @@ -94,6 +95,50 @@ func (s *Store) LastSuccessfulSyncAt(ctx context.Context, repoID int64) (time.Ti return parsed, nil } +func (s *Store) LastSuccessfulListSyncAt(ctx context.Context, repoID int64, state string) (time.Time, error) { + scopes := listSyncScopesForState(state) + if len(scopes) == 0 { + return time.Time{}, nil + } + placeholders := make([]string, len(scopes)) + args := make([]any, 0, 1+len(scopes)) + args = append(args, repoID) + for i, scope := range scopes { + placeholders[i] = "?" + args = append(args, scope) + } + var lastSync string + err := s.q().QueryRowContext(ctx, ` + select coalesce(max(finished_at), '') + from sync_runs + where repo_id = ? and status in ('success', 'completed') and scope in (`+strings.Join(placeholders, ",")+`) + `, args...).Scan(&lastSync) + if err != nil { + return time.Time{}, fmt.Errorf("read last successful list sync: %w", err) + } + if lastSync == "" { + return time.Time{}, nil + } + parsed, err := time.Parse(time.RFC3339Nano, lastSync) + if err != nil { + return time.Time{}, fmt.Errorf("parse last successful list sync %q: %w", lastSync, err) + } + return parsed, nil +} + +func listSyncScopesForState(state string) []string { + switch strings.TrimSpace(strings.ToLower(state)) { + case "", "open": + return []string{"open", "all"} + case "closed": + return []string{"closed", "all"} + case "all": + return []string{"all"} + default: + return nil + } +} + func runTable(kind string) (string, error) { switch kind { case "sync": diff --git a/internal/store/runs_test.go b/internal/store/runs_test.go index 2d97738..80042fa 100644 --- a/internal/store/runs_test.go +++ b/internal/store/runs_test.go @@ -111,3 +111,42 @@ func TestLastSuccessfulSyncAt(t *testing.T) { t.Fatalf("last sync = %s, want %s", lastSync, want) } } + +func TestLastSuccessfulListSyncAtIgnoresTargetedRuns(t *testing.T) { + ctx := context.Background() + st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db")) + if err != nil { + t.Fatalf("open store: %v", err) + } + defer st.Close() + + repoID, err := st.UpsertRepository(ctx, Repository{ + Owner: "openclaw", Name: "gitcrawl", FullName: "openclaw/gitcrawl", RawJSON: "{}", UpdatedAt: "2026-04-26T00:00:00Z", + }) + if err != nil { + t.Fatalf("repo: %v", err) + } + if _, err := st.RecordRun(ctx, RunRecord{ + RepoID: repoID, Kind: "sync", Scope: "numbers:13", Status: "success", + StartedAt: "2026-04-26T00:03:00Z", FinishedAt: "2026-04-26T00:03:30Z", + }); err != nil { + t.Fatalf("record targeted run: %v", err) + } + if lastSync, err := st.LastSuccessfulListSyncAt(ctx, repoID, "open"); err != nil || !lastSync.IsZero() { + t.Fatalf("targeted run should not count as broad list sync: last=%s err=%v", lastSync, err) + } + if _, err := st.RecordRun(ctx, RunRecord{ + RepoID: repoID, Kind: "sync", Scope: "all", Status: "success", + StartedAt: "2026-04-26T00:04:00Z", FinishedAt: "2026-04-26T00:04:30Z", + }); err != nil { + t.Fatalf("record all run: %v", err) + } + lastSync, err := st.LastSuccessfulListSyncAt(ctx, repoID, "open") + if err != nil { + t.Fatalf("last broad sync: %v", err) + } + want, _ := time.Parse(time.RFC3339Nano, "2026-04-26T00:04:30Z") + if !lastSync.Equal(want) { + t.Fatalf("last broad sync = %s, want %s", lastSync, want) + } +}