fix: harden gitcrawl command surface

This commit is contained in:
Peter Steinberger 2026-05-08 09:50:17 +01:00
parent f2d60276f9
commit 7671a6b999
No known key found for this signature in database
14 changed files with 652 additions and 38 deletions

View File

@ -1305,7 +1305,8 @@ func (a *App) runClusterDetail(ctx context.Context, args []string) error {
clusterIDRaw := fs.String("id", "", "cluster id")
memberLimitRaw := fs.String("member-limit", "", "maximum member rows")
bodyCharsRaw := fs.String("body-chars", "", "maximum body snippet characters")
includeClosed := fs.Bool("include-closed", false, "include closed clusters and members")
includeClosed := fs.Bool("include-closed", false, "deprecated; closed cluster members are shown by default")
hideClosed := fs.Bool("hide-closed", false, "hide locally closed members")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "member-limit": true, "body-chars": true})); err != nil {
return usageErr(err)
@ -1346,7 +1347,7 @@ func (a *App) runClusterDetail(ctx context.Context, args []string) error {
detail, err := rt.Store.ClusterDetail(ctx, store.ClusterDetailOptions{
RepoID: repo.ID,
ClusterID: int64(clusterID),
IncludeClosed: *includeClosed,
IncludeClosed: *includeClosed || !*hideClosed,
MemberLimit: memberLimit,
BodyChars: bodyChars,
})
@ -1849,14 +1850,14 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return syncer.Stats{}, err
}
st, err := store.Open(ctx, cfg.DBPath)
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return syncer.Stats{}, err
}
defer st.Close()
defer rt.Store.Close()
client := gh.New(gh.Options{Token: token.Value, BaseURL: githubBaseURL()})
service := syncer.New(client, st)
service := syncer.New(client, rt.Store)
stats, err := service.Sync(ctx, syncer.Options{
Owner: owner,
Repo: repo,
@ -2951,7 +2952,14 @@ func (a *App) printUsage() {
}
func (a *App) printCommandUsage(command string) error {
if text, ok := commandUsageTexts[command]; ok {
fmt.Fprint(a.Stdout, text)
return nil
}
switch command {
case "cluster-explain":
fmt.Fprint(a.Stdout, commandUsageTexts["cluster-detail"])
return nil
case "portable":
fmt.Fprint(a.Stdout, portableUsageText)
return nil
@ -2982,6 +2990,7 @@ Core commands:
doctor check config, token, and database readiness
sync sync GitHub issue and pull request metadata
refresh run sync, enrichment, embedding, and clustering pipeline
embed generate OpenAI embeddings for local thread documents
threads list local issue and pull request rows
cluster build durable clusters from local thread vectors
close-thread locally hide one issue or pull request row
@ -3007,6 +3016,131 @@ Core commands:
No API server is provided. There is intentionally no serve command.
`
var commandUsageTexts = map[string]string{
"metadata": `gitcrawl metadata prints crawlkit control metadata.
Usage:
gitcrawl metadata [--json]
`,
"status": `gitcrawl status prints fast read-only archive status.
Usage:
gitcrawl status [--json]
`,
"init": `gitcrawl init creates a local config and SQLite database.
Usage:
gitcrawl init [--db path] [--portable-store URL] [--json]
`,
"configure": `gitcrawl configure updates model fields in the config.
Usage:
gitcrawl configure [--summary-model name] [--embed-model name] [--embedding-basis title_original] [--json]
`,
"doctor": `gitcrawl doctor checks config, token, and database readiness.
Usage:
gitcrawl doctor [--json]
`,
"sync": `gitcrawl sync mirrors GitHub issue and pull request metadata.
Usage:
gitcrawl sync owner/repo [--state open|closed|all] [--numbers refs] [--with pr-details] [--include-pr-details] [--json]
`,
"refresh": `gitcrawl refresh runs sync, enrichment, embedding, and clustering.
Usage:
gitcrawl refresh owner/repo [--state open|closed|all] [--sync-if-stale duration] [--no-sync] [--no-embed] [--no-cluster] [--json]
`,
"embed": `gitcrawl embed generates OpenAI embeddings for local thread documents.
Usage:
gitcrawl embed owner/repo [--number ref] [--limit N] [--force] [--include-closed] [--json]
`,
"threads": `gitcrawl threads lists local issue and pull request rows.
Usage:
gitcrawl threads owner/repo [--include-closed] [--numbers refs] [--limit N] [--json]
`,
"search": `gitcrawl search queries local thread documents, or accepts gh-shaped issue and PR search.
Usage:
gitcrawl search owner/repo --query text [--mode keyword|semantic] [--limit N] [--json]
gitcrawl search issues|prs <query> -R owner/repo [--state open|closed|all] [--json fields] [--limit N]
`,
"cluster": `gitcrawl cluster builds durable clusters from local thread vectors.
Usage:
gitcrawl cluster owner/repo [--threshold N] [--min-size N] [--max-cluster-size N] [--k N] [--cross-kind-threshold N] [--limit N] [--model name] [--basis semantic|references|hybrid] [--include-closed] [--json]
`,
"clusters": `gitcrawl clusters lists latest display clusters with durable fallback.
Usage:
gitcrawl clusters owner/repo [--sort size|recent|oldest] [--min-size N] [--limit N] [--hide-closed] [--json]
`,
"durable-clusters": `gitcrawl durable-clusters lists governed durable cluster groups.
Usage:
gitcrawl durable-clusters owner/repo [--include-closed] [--sort size|recent|oldest] [--min-size N] [--limit N] [--json]
`,
"cluster-detail": `gitcrawl cluster-detail dumps one cluster and its member rows.
Usage:
gitcrawl cluster-detail owner/repo --id N [--member-limit N] [--body-chars N] [--hide-closed] [--json]
`,
"neighbors": `gitcrawl neighbors lists vector-nearest local issue and pull request rows.
Usage:
gitcrawl neighbors owner/repo --number ref [--limit N] [--json]
`,
"runs": `gitcrawl runs lists local pipeline run history.
Usage:
gitcrawl runs owner/repo [--kind sync|summary|embedding|cluster] [--limit N] [--json]
`,
"close-thread": `gitcrawl close-thread locally hides one issue or pull request row.
Usage:
gitcrawl close-thread owner/repo --number ref [--reason text] [--json]
`,
"reopen-thread": `gitcrawl reopen-thread clears a local thread hide.
Usage:
gitcrawl reopen-thread owner/repo --number ref [--json]
`,
"close-cluster": `gitcrawl close-cluster locally hides one durable cluster.
Usage:
gitcrawl close-cluster owner/repo --id N [--reason text] [--json]
`,
"reopen-cluster": `gitcrawl reopen-cluster clears a local cluster hide.
Usage:
gitcrawl reopen-cluster owner/repo --id N [--json]
`,
"exclude-cluster-member": `gitcrawl exclude-cluster-member locally removes one row from a durable cluster.
Usage:
gitcrawl exclude-cluster-member owner/repo --id N --number ref [--reason text] [--json]
`,
"include-cluster-member": `gitcrawl include-cluster-member restores one row to a durable cluster.
Usage:
gitcrawl include-cluster-member owner/repo --id N --number ref [--json]
`,
"set-cluster-canonical": `gitcrawl set-cluster-canonical sets the canonical row for a durable cluster.
Usage:
gitcrawl set-cluster-canonical owner/repo --id N --number ref [--reason text] [--json]
`,
"gh": `gitcrawl gh runs a gh-compatible local cache shim with fallback to real gh.
Usage:
gitcrawl gh <gh command>
gitcrawl gh xcache stats|keys|gc|flush|reset|snapshot [--json]
`,
}
const tuiUsageText = `gitcrawl tui opens the local terminal cluster browser.
Usage:

View File

@ -153,6 +153,15 @@ func TestMetadataStatusAndControlStatusJSON(t *testing.T) {
if !strings.Contains(helpOut.String(), "cluster browser") {
t.Fatalf("tui help output = %q", helpOut.String())
}
for _, topic := range []string{"metadata", "status", "init", "configure", "doctor", "sync", "refresh", "embed", "threads", "search", "cluster", "clusters", "durable-clusters", "cluster-detail", "cluster-explain", "neighbors", "runs", "close-thread", "reopen-thread", "close-cluster", "reopen-cluster", "exclude-cluster-member", "include-cluster-member", "set-cluster-canonical", "gh"} {
helpOut.Reset()
if err := help.printCommandUsage(topic); err != nil {
t.Fatalf("%s help: %v", topic, err)
}
if !strings.Contains(helpOut.String(), "Usage:") {
t.Fatalf("%s help output = %q", topic, helpOut.String())
}
}
if err := New().Run(ctx, []string{"--config", configPath, "status", "extra"}); err == nil {
t.Fatal("status extra arg should fail")
}
@ -980,7 +989,7 @@ func TestGlobalCommandBranches(t *testing.T) {
}{
{args: []string{"--help"}, wantOut: "Usage:"},
{args: []string{"help"}, wantOut: "Usage:"},
{args: []string{"help", "sync"}, wantErr: true, exitCode: 2},
{args: []string{"help", "sync"}, wantOut: "gitcrawl sync"},
{args: []string{"--version"}, wantOut: "dev"},
{args: []string{"version"}, wantOut: "dev"},
{args: []string{"--json", "version"}, wantOut: `"version"`},
@ -2326,6 +2335,36 @@ func TestClustersDefaultShowsActivePrimaryMembers(t *testing.T) {
if len(all.Clusters) != 1 || all.Clusters[0].MemberCount != 1 {
t.Fatalf("hide-closed should focus active members, got %#v", all.Clusters)
}
stdout.Reset()
detail := New()
detail.Stdout = &stdout
if err := detail.Run(ctx, []string{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", "90"}); err != nil {
t.Fatalf("cluster-detail: %v", err)
}
var detailPayload struct {
Members []store.ClusterMemberDetail `json:"members"`
}
if err := json.Unmarshal(stdout.Bytes(), &detailPayload); err != nil {
t.Fatalf("decode cluster detail: %v\n%s", err, stdout.String())
}
if len(detailPayload.Members) != 2 {
t.Fatalf("default cluster-detail should match visible cluster members, got %#v", detailPayload.Members)
}
stdout.Reset()
hideDetail := New()
hideDetail.Stdout = &stdout
if err := hideDetail.Run(ctx, []string{"--config", configPath, "--json", "cluster-detail", "openclaw/openclaw", "--id", "90", "--hide-closed"}); err != nil {
t.Fatalf("cluster-detail hide closed: %v", err)
}
detailPayload.Members = nil
if err := json.Unmarshal(stdout.Bytes(), &detailPayload); err != nil {
t.Fatalf("decode hide-closed cluster detail: %v\n%s", err, stdout.String())
}
if len(detailPayload.Members) != 1 || detailPayload.Members[0].Thread.Number != 90 {
t.Fatalf("hide-closed cluster-detail should focus open members, got %#v", detailPayload.Members)
}
}
func TestClusterMemberOverrideCommands(t *testing.T) {

67
internal/cli/gh_path.go Normal file
View File

@ -0,0 +1,67 @@
package cli
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
)
func resolveRealGHPath() (string, error) {
envPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH"))
candidates := []string{}
if envPath != "" {
candidates = append(candidates, envPath)
}
candidates = append(candidates,
"/opt/homebrew/opt/gh/bin/gh",
"/usr/local/opt/gh/bin/gh",
"/usr/local/bin/gh",
"/usr/bin/gh",
)
if lookPath, err := exec.LookPath("gh"); err == nil {
candidates = append(candidates, lookPath)
}
seen := map[string]bool{}
for _, candidate := range candidates {
candidate = strings.TrimSpace(candidate)
if candidate == "" || seen[candidate] {
continue
}
seen[candidate] = true
info, err := os.Stat(candidate)
if err != nil || info.IsDir() {
if envPath != "" && candidate == envPath {
return "", fmt.Errorf("real gh not found at GITCRAWL_GH_PATH %q", envPath)
}
continue
}
if isGitcrawlShimPath(candidate) {
if envPath != "" && candidate == envPath {
return "", fmt.Errorf("GITCRAWL_GH_PATH points to the gitcrawl shim (%s); set it to the real gh binary", envPath)
}
continue
}
return candidate, nil
}
return "", fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH")
}
func isGitcrawlShimPath(path string) bool {
if path == "" {
return false
}
resolved := path
if eval, err := filepath.EvalSymlinks(path); err == nil {
resolved = eval
}
for _, value := range []string{path, resolved} {
base := strings.ToLower(filepath.Base(value))
if base == "gitcrawl" || base == "gitcrawl-gh" {
return true
}
}
return false
}

View File

@ -104,6 +104,15 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
if err != nil {
return err
}
if len(threads) == 0 && ghSearchNeedsLiveEmptyCheck(kind, query, state) {
lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state)
if err != nil {
return err
}
if lastSync.IsZero() {
return localGHUnsupported(fmt.Errorf("empty local %s search has no broad %s sync", args[0], ghDefaultListState(state)))
}
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || a.format == FormatJSON {
@ -126,7 +135,7 @@ func (a *App) runGHSearch(ctx context.Context, args []string) error {
}
func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) error {
stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, maxAge)
stale, lastSync, err := a.ghSearchCacheStale(ctx, owner, repoName, state, maxAge)
if err != nil {
return err
}
@ -142,7 +151,7 @@ func (a *App) syncGHSearchIfStale(ctx context.Context, owner, repoName, state st
return err
}
func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, maxAge time.Duration) (bool, time.Time, error) {
func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName, state string, maxAge time.Duration) (bool, time.Time, error) {
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
@ -158,7 +167,7 @@ func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, ma
}
return false, time.Time{}, err
}
lastSync, err := rt.Store.LastSuccessfulSyncAt(ctx, repo.ID)
lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state)
if err != nil {
return false, time.Time{}, err
}
@ -168,6 +177,20 @@ func (a *App) ghSearchCacheStale(ctx context.Context, owner, repoName string, ma
return time.Since(lastSync) > maxAge, lastSync, nil
}
func ghSearchNeedsLiveEmptyCheck(kind, query, state string) bool {
if strings.TrimSpace(query) != "" || kind != "issue" {
return false
}
return ghDefaultListState(state) == "open"
}
func ghDefaultListState(state string) string {
if strings.TrimSpace(state) == "" {
return "open"
}
return strings.TrimSpace(state)
}
func parseGHSearchQuery(value string) (query string, repo string, state string) {
var queryParts []string
for _, part := range strings.Fields(value) {

View File

@ -66,6 +66,16 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
t.Fatalf("repo: %v", err)
}
finishedAt := time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339Nano)
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
Scope: "numbers:13",
Status: "success",
StartedAt: time.Now().UTC().Format(time.RFC3339Nano),
FinishedAt: time.Now().UTC().Format(time.RFC3339Nano),
}); err != nil {
t.Fatalf("record targeted sync: %v", err)
}
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
@ -74,7 +84,7 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
StartedAt: finishedAt,
FinishedAt: finishedAt,
}); err != nil {
t.Fatalf("record sync: %v", err)
t.Fatalf("record broad sync: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
@ -82,14 +92,14 @@ func TestGHSearchCacheStaleUsesRepoSyncRuns(t *testing.T) {
run := New()
run.configPath = configPath
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 2*time.Hour)
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "openclaw", "open", 2*time.Hour)
if err != nil {
t.Fatalf("freshness check: %v", err)
}
if stale || lastSync.IsZero() {
t.Fatalf("expected cache to be fresh, stale=%v lastSync=%s", stale, lastSync)
}
stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", 30*time.Minute)
stale, _, err = run.ghSearchCacheStale(ctx, "openclaw", "openclaw", "open", 30*time.Minute)
if err != nil {
t.Fatalf("stale freshness check: %v", err)
}
@ -110,7 +120,7 @@ func TestGHSearchCacheStaleWhenRepoMissing(t *testing.T) {
run := New()
run.configPath = configPath
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", time.Minute)
stale, lastSync, err := run.ghSearchCacheStale(ctx, "openclaw", "missing", "open", time.Minute)
if err != nil {
t.Fatalf("freshness check: %v", err)
}

View File

@ -206,6 +206,22 @@ func (a *App) runGHThreadList(ctx context.Context, resource string, args []strin
if err != nil {
return err
}
if len(threads) == 0 && ghThreadListNeedsLiveEmptyCheck(ghThreadListRequest{
Kind: ghResourceKind(resource),
State: strings.TrimSpace(*stateRaw),
Query: strings.TrimSpace(*searchRaw),
Author: strings.TrimSpace(*authorRaw),
Assignee: strings.TrimSpace(*assigneeRaw),
Labels: labels.Values(),
}) {
fresh, err := a.localGHThreadListHasBroadSync(ctx, repoValue, strings.TrimSpace(*stateRaw))
if err != nil {
return err
}
if !fresh {
return localGHUnsupported(fmt.Errorf("empty local %s list has no broad %s sync", resource, ghDefaultListState(*stateRaw)))
}
}
jsonFields := strings.TrimSpace(*jsonFieldsRaw)
if jsonFields != "" || strings.TrimSpace(*jqRaw) != "" || a.format == FormatJSON {
if jsonFields == "" {
@ -293,6 +309,34 @@ func (a *App) localGHThreads(ctx context.Context, req ghThreadListRequest) ([]st
})
}
func ghThreadListNeedsLiveEmptyCheck(req ghThreadListRequest) bool {
if req.Kind != "issue" || strings.TrimSpace(req.Query) != "" || strings.TrimSpace(req.Author) != "" || strings.TrimSpace(req.Assignee) != "" || len(req.Labels) > 0 {
return false
}
return ghDefaultListState(req.State) == "open"
}
func (a *App) localGHThreadListHasBroadSync(ctx context.Context, repoValue, state string) (bool, error) {
owner, repoName, err := parseOwnerRepo(repoValue)
if err != nil {
return false, err
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return false, localGHUnsupported(err)
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return false, localGHUnsupported(err)
}
lastSync, err := rt.Store.LastSuccessfulListSyncAt(ctx, repo.ID, state)
if err != nil {
return false, err
}
return !lastSync.IsZero(), nil
}
func (a *App) resolveGHRepo(ctx context.Context, explicit string) (string, error) {
if strings.TrimSpace(explicit) != "" {
return strings.TrimSpace(explicit), nil
@ -313,17 +357,9 @@ func (a *App) resolveGHRepo(ctx context.Context, explicit string) (string, error
}
func (a *App) execRealGH(ctx context.Context, args []string) error {
ghPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH"))
if ghPath == "" {
if _, err := os.Stat("/opt/homebrew/opt/gh/bin/gh"); err == nil {
ghPath = "/opt/homebrew/opt/gh/bin/gh"
} else {
var err error
ghPath, err = exec.LookPath("gh")
if err != nil {
return fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH")
}
}
ghPath, err := resolveRealGHPath()
if err != nil {
return err
}
cmd := exec.CommandContext(ctx, ghPath, args...)
cmd.Stdin = os.Stdin

View File

@ -85,24 +85,16 @@ func cacheGHReadErrors() bool {
}
func (a *App) captureRealGH(ctx context.Context, args []string) (string, string, int, error) {
ghPath := strings.TrimSpace(os.Getenv("GITCRAWL_GH_PATH"))
if ghPath == "" {
if _, err := os.Stat("/opt/homebrew/opt/gh/bin/gh"); err == nil {
ghPath = "/opt/homebrew/opt/gh/bin/gh"
} else {
var err error
ghPath, err = exec.LookPath("gh")
if err != nil {
return "", "", 127, fmt.Errorf("real gh not found; set GITCRAWL_GH_PATH")
}
}
ghPath, err := resolveRealGHPath()
if err != nil {
return "", "", 127, err
}
var stdout, stderr bytes.Buffer
cmd := exec.CommandContext(ctx, ghPath, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
err = cmd.Run()
exitCode := 0
if err != nil {
exitCode = 1

View File

@ -228,6 +228,18 @@ func TestGHShimCachePolicyExtraBranches(t *testing.T) {
if strings.TrimSpace(ghOut.String()) != "real-gh:" {
t.Fatalf("empty gh shim output = %q", ghOut.String())
}
shimPath := filepath.Join(t.TempDir(), "gitcrawl-gh")
if err := os.WriteFile(shimPath, []byte("#!/bin/sh\necho shim\n"), 0o755); err != nil {
t.Fatalf("write fake shim: %v", err)
}
shimLink := filepath.Join(t.TempDir(), "gh")
if err := os.Symlink(shimPath, shimLink); err != nil {
t.Fatalf("symlink fake shim: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", shimLink)
if _, err := resolveRealGHPath(); err == nil || !strings.Contains(err.Error(), "gitcrawl shim") {
t.Fatalf("shim path should fail fast, err=%v", err)
}
t.Setenv("GITCRAWL_GH_STALE_GRACE", "3m")
if got := ghCommandCacheStaleGrace([]string{"api", "users/octocat"}); got != 3*time.Minute {
t.Fatalf("env stale grace = %s", got)

View File

@ -4,6 +4,8 @@ import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
@ -64,6 +66,122 @@ func TestGHShimFallsBackForUnsupportedRead(t *testing.T) {
}
}
func TestGHShimFallsBackForEmptyOpenIssueListWithoutBroadSync(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimEmptyRepo(t, ctx)
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "list", "-R", "openclaw/openclaw", "--state", "open", "--json", "number"}); err != nil {
t.Fatalf("fallback: %v", err)
}
if got := strings.TrimSpace(stdout.String()); got != "fallback:issue list -R openclaw/openclaw --state open --json number" {
t.Fatalf("fallback output = %q", got)
}
}
func TestGHShimSearchFallsBackForEmptyOpenRepoWithoutBroadSync(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimEmptyRepo(t, ctx)
dir := t.TempDir()
ghPath := filepath.Join(dir, "gh")
if err := os.WriteFile(ghPath, []byte("#!/bin/sh\necho fallback:$*\n"), 0o755); err != nil {
t.Fatalf("write fake gh: %v", err)
}
t.Setenv("GITCRAWL_GH_PATH", ghPath)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "search", "issues", "-R", "openclaw/openclaw", "--state", "open", "--json", "number"}); err != nil {
t.Fatalf("fallback: %v", err)
}
if got := strings.TrimSpace(stdout.String()); got != "fallback:search issues -R openclaw/openclaw --state open --json number" {
t.Fatalf("fallback output = %q", got)
}
}
func TestGHShimAutoHydratePortableStoreWritesRuntimeMirror(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remoteDir := filepath.Join(dir, "remote")
checkoutDir := filepath.Join(dir, "checkout")
dbRel := filepath.Join("data", "openclaw__openclaw.sync.db")
if err := os.MkdirAll(filepath.Join(remoteDir, "data"), 0o755); err != nil {
t.Fatalf("mkdir remote data: %v", err)
}
if err := runGit(ctx, remoteDir, "init", "-b", "main"); err != nil {
t.Fatalf("git init: %v", err)
}
seedPortableThread(t, filepath.Join(remoteDir, dbRel), 1, "portable issue")
if err := runGit(ctx, remoteDir, "add", dbRel); err != nil {
t.Fatalf("git add seed: %v", err)
}
if err := runGit(ctx, remoteDir, "-c", "user.email=test@example.com", "-c", "user.name=Test", "commit", "-m", "seed store"); err != nil {
t.Fatalf("git commit seed: %v", err)
}
if _, err := syncPortableStore(ctx, remoteDir, checkoutDir); err != nil {
t.Fatalf("clone portable store: %v", err)
}
configPath := filepath.Join(dir, "config.toml")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", filepath.Join(checkoutDir, dbRel)}); err != nil {
t.Fatalf("init config: %v", err)
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/repos/openclaw/openclaw":
_ = json.NewEncoder(w).Encode(map[string]any{"id": 101, "full_name": "openclaw/openclaw"})
case "/repos/openclaw/openclaw/issues/2":
_ = json.NewEncoder(w).Encode(map[string]any{
"id": 502,
"number": 2,
"state": "open",
"title": "runtime-only issue",
"body": "hydrate into runtime mirror",
"html_url": "https://github.com/openclaw/openclaw/issues/2",
"created_at": "2026-05-08T00:00:00Z",
"updated_at": "2026-05-08T00:00:00Z",
"labels": []map[string]any{},
"assignees": []map[string]any{},
"user": map[string]any{"login": "alice", "type": "User"},
})
default:
t.Fatalf("unexpected path: %s", r.URL.String())
}
}))
defer server.Close()
t.Setenv("GITHUB_TOKEN", "test-token")
t.Setenv("GITCRAWL_GITHUB_BASE_URL", server.URL)
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "gh", "issue", "view", "2", "-R", "openclaw/openclaw", "--json", "number,title"}); err != nil {
t.Fatalf("gh issue view: %v", err)
}
if !strings.Contains(stdout.String(), `"number": 2`) || !strings.Contains(stdout.String(), "runtime-only issue") {
t.Fatalf("view output = %q", stdout.String())
}
if !gitWorktreeClean(ctx, checkoutDir) {
t.Fatal("auto-hydrate dirtied portable checkout")
}
assertPortableThreadPresence(t, ctx, filepath.Join(checkoutDir, dbRel), 2, false)
mirrorPath, err := run.portableRuntimeDBPath(filepath.Join(checkoutDir, dbRel))
if err != nil {
t.Fatalf("runtime db path: %v", err)
}
assertPortableThreadPresence(t, ctx, mirrorPath, 2, true)
}
func TestGHShimViewAcceptsFullGitHubURL(t *testing.T) {
ctx := context.Background()
configPath := seedGHShimRepo(t, ctx)
@ -87,6 +205,74 @@ func TestGHShimViewAcceptsFullGitHubURL(t *testing.T) {
}
}
func seedGHShimEmptyRepo(t *testing.T, ctx context.Context) string {
t.Helper()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
cfg, err := config.Load(configPath)
if err != nil {
t.Fatalf("load config: %v", err)
}
cfg.CacheDir = filepath.Join(dir, "cache")
if err := config.Save(configPath, cfg); err != nil {
t.Fatalf("save config: %v", err)
}
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
repoID, err := st.UpsertRepository(ctx, store.Repository{
Owner: "openclaw",
Name: "openclaw",
FullName: "openclaw/openclaw",
RawJSON: "{}",
UpdatedAt: "2026-05-08T00:00:00Z",
})
if err != nil {
t.Fatalf("seed repository: %v", err)
}
if _, err := st.RecordRun(ctx, store.RunRecord{
RepoID: repoID,
Kind: "sync",
Scope: "numbers:13",
Status: "success",
StartedAt: "2026-05-08T00:00:00Z",
FinishedAt: "2026-05-08T00:00:01Z",
}); err != nil {
t.Fatalf("record targeted sync: %v", err)
}
if err := st.Close(); err != nil {
t.Fatalf("close store: %v", err)
}
return configPath
}
func assertPortableThreadPresence(t *testing.T, ctx context.Context, dbPath string, number int, want bool) {
t.Helper()
st, err := store.OpenReadOnly(ctx, dbPath)
if err != nil {
t.Fatalf("open store %s: %v", dbPath, err)
}
defer st.Close()
repo, err := st.RepositoryByFullName(ctx, "openclaw/openclaw")
if err != nil {
t.Fatalf("repository %s: %v", dbPath, err)
}
threads, err := st.ListThreadsFiltered(ctx, store.ThreadListOptions{RepoID: repo.ID, IncludeClosed: true, Numbers: []int{number}})
if err != nil {
t.Fatalf("list threads %s: %v", dbPath, err)
}
got := len(threads) > 0
if got != want {
t.Fatalf("thread %d presence in %s = %v, want %v", number, dbPath, got, want)
}
}
func seedGHShimRepo(t *testing.T, ctx context.Context) string {
t.Helper()
dir := t.TempDir()

View File

@ -64,7 +64,7 @@ func candidateRealGHPaths() []string {
seen := map[string]bool{}
unique := paths[:0]
for _, path := range paths {
if path = strings.TrimSpace(path); path != "" && !seen[path] {
if path = strings.TrimSpace(path); path != "" && !seen[path] && !isGitcrawlShimPath(path) {
seen[path] = true
unique = append(unique, path)
}

View File

@ -3,6 +3,7 @@ package cli
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
@ -26,6 +27,8 @@ const portableStoreRefreshTimeout = 15 * time.Second
const portableStoreRefreshTTL = 2 * time.Minute
const portableStoreRefreshFailureBackoff = time.Minute
var errPortableStoreDirty = errors.New("portable store checkout has local changes")
func (a *App) openLocalRuntime(ctx context.Context) (localRuntime, error) {
cfg, err := config.Load(a.configPath)
if err != nil {
@ -91,7 +94,7 @@ func refreshPortableStoreForDB(ctx context.Context, dbPath string) error {
return nil
}
if !gitWorktreeClean(ctx, root) {
return nil
return errPortableStoreDirty
}
pullCtx, cancel := context.WithTimeout(ctx, portableStoreRefreshTimeout)
defer cancel()
@ -169,6 +172,7 @@ func refreshPortableStoreForDBIfDue(ctx context.Context, sourceDBPath, mirrorPat
if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil {
return err
}
removeStalePortableRefreshLock(lockPath, now)
lock, locked := tryGHCommandCacheLock(lockPath)
if !locked {
return nil
@ -196,6 +200,17 @@ func refreshPortableStoreForDBIfDue(ctx context.Context, sourceDBPath, mirrorPat
return writePortableStoreRefreshState(statePath, state)
}
func removeStalePortableRefreshLock(path string, now time.Time) {
info, err := os.Stat(path)
if err != nil {
return
}
if now.Sub(info.ModTime()) <= 2*portableStoreRefreshTimeout {
return
}
_ = os.Remove(path)
}
func portableStoreRefreshInterval() time.Duration {
if raw := strings.TrimSpace(os.Getenv("GITCRAWL_PORTABLE_REFRESH_TTL")); raw != "" {
if duration, err := time.ParseDuration(raw); err == nil && duration >= 0 {

View File

@ -66,6 +66,22 @@ func TestPortableRuntimeUtilityBranches(t *testing.T) {
if recentPortableRefresh("", now, time.Minute) || recentPortableRefresh("bad", now, time.Minute) || !recentPortableRefresh(now.Format(time.RFC3339Nano), now, time.Minute) {
t.Fatal("recent refresh classification mismatch")
}
lockPath := filepath.Join(dir, "refresh.lock")
if err := os.WriteFile(lockPath, []byte("123\n"), 0o600); err != nil {
t.Fatalf("write lock: %v", err)
}
removeStalePortableRefreshLock(lockPath, now)
if _, err := os.Stat(lockPath); err != nil {
t.Fatalf("fresh lock should remain: %v", err)
}
old := now.Add(-3 * portableStoreRefreshTimeout)
if err := os.Chtimes(lockPath, old, old); err != nil {
t.Fatalf("age lock: %v", err)
}
removeStalePortableRefreshLock(lockPath, now)
if _, err := os.Stat(lockPath); !os.IsNotExist(err) {
t.Fatalf("stale lock should be removed, err=%v", err)
}
t.Setenv("GITCRAWL_PORTABLE_REFRESH_TTL", "0")
if got := portableStoreRefreshInterval(); got != 0 {
t.Fatalf("zero ttl = %s", got)

View File

@ -4,6 +4,7 @@ import (
"context"
"database/sql"
"fmt"
"strings"
"time"
)
@ -94,6 +95,50 @@ func (s *Store) LastSuccessfulSyncAt(ctx context.Context, repoID int64) (time.Ti
return parsed, nil
}
func (s *Store) LastSuccessfulListSyncAt(ctx context.Context, repoID int64, state string) (time.Time, error) {
scopes := listSyncScopesForState(state)
if len(scopes) == 0 {
return time.Time{}, nil
}
placeholders := make([]string, len(scopes))
args := make([]any, 0, 1+len(scopes))
args = append(args, repoID)
for i, scope := range scopes {
placeholders[i] = "?"
args = append(args, scope)
}
var lastSync string
err := s.q().QueryRowContext(ctx, `
select coalesce(max(finished_at), '')
from sync_runs
where repo_id = ? and status in ('success', 'completed') and scope in (`+strings.Join(placeholders, ",")+`)
`, args...).Scan(&lastSync)
if err != nil {
return time.Time{}, fmt.Errorf("read last successful list sync: %w", err)
}
if lastSync == "" {
return time.Time{}, nil
}
parsed, err := time.Parse(time.RFC3339Nano, lastSync)
if err != nil {
return time.Time{}, fmt.Errorf("parse last successful list sync %q: %w", lastSync, err)
}
return parsed, nil
}
func listSyncScopesForState(state string) []string {
switch strings.TrimSpace(strings.ToLower(state)) {
case "", "open":
return []string{"open", "all"}
case "closed":
return []string{"closed", "all"}
case "all":
return []string{"all"}
default:
return nil
}
}
func runTable(kind string) (string, error) {
switch kind {
case "sync":

View File

@ -111,3 +111,42 @@ func TestLastSuccessfulSyncAt(t *testing.T) {
t.Fatalf("last sync = %s, want %s", lastSync, want)
}
}
func TestLastSuccessfulListSyncAtIgnoresTargetedRuns(t *testing.T) {
ctx := context.Background()
st, err := Open(ctx, filepath.Join(t.TempDir(), "gitcrawl.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
defer st.Close()
repoID, err := st.UpsertRepository(ctx, Repository{
Owner: "openclaw", Name: "gitcrawl", FullName: "openclaw/gitcrawl", RawJSON: "{}", UpdatedAt: "2026-04-26T00:00:00Z",
})
if err != nil {
t.Fatalf("repo: %v", err)
}
if _, err := st.RecordRun(ctx, RunRecord{
RepoID: repoID, Kind: "sync", Scope: "numbers:13", Status: "success",
StartedAt: "2026-04-26T00:03:00Z", FinishedAt: "2026-04-26T00:03:30Z",
}); err != nil {
t.Fatalf("record targeted run: %v", err)
}
if lastSync, err := st.LastSuccessfulListSyncAt(ctx, repoID, "open"); err != nil || !lastSync.IsZero() {
t.Fatalf("targeted run should not count as broad list sync: last=%s err=%v", lastSync, err)
}
if _, err := st.RecordRun(ctx, RunRecord{
RepoID: repoID, Kind: "sync", Scope: "all", Status: "success",
StartedAt: "2026-04-26T00:04:00Z", FinishedAt: "2026-04-26T00:04:30Z",
}); err != nil {
t.Fatalf("record all run: %v", err)
}
lastSync, err := st.LastSuccessfulListSyncAt(ctx, repoID, "open")
if err != nil {
t.Fatalf("last broad sync: %v", err)
}
want, _ := time.Parse(time.RFC3339Nano, "2026-04-26T00:04:30Z")
if !lastSync.Equal(want) {
t.Fatalf("last broad sync = %s, want %s", lastSync, want)
}
}