diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 399a52e..ec9cc7f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,16 @@ jobs: - name: Build run: go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=${GITHUB_SHA:0:7}" -o bin/gitcrawl ./cmd/gitcrawl + - name: Smoke test TUI help + run: | + set -euo pipefail + test -n "$(./bin/gitcrawl --version)" + ./bin/gitcrawl metadata --json | grep -q '"schema_version"' + ./bin/gitcrawl status --json | grep -q '"databases"' + output="$(./bin/gitcrawl help tui)" + printf '%s\n' "$output" + printf '%s' "$output" | grep -q "gitcrawl tui" + - name: Snapshot release build uses: goreleaser/goreleaser-action@v7.1.0 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 78bb0ab..26288d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,11 @@ - Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`. - Cache more ghx-style read-only fallthroughs, including release, workflow, secret, variable, project, ruleset, gist, org, and search reads; cache repeat read failures by default; and clear the fallthrough cache after the corresponding mutating `gh` commands. - Promote portable backups to the v2 format: keep compact comments, PR files, commits, checks, and workflow runs while stripping raw JSON, generated documents, vectors, clusters, and run history. +- Add crawlkit control metadata/status surfaces with command-local `metadata --json`, `status --json`, and `doctor --json`. +- Include the primary SQLite database inventory in status JSON so local control surfaces can discover archive storage without opening live stores. +- Route config path handling and SQLite openers through `crawlkit` so GitHub archive tooling shares the same foundation as the Slack, Discord, and Notion crawlers. +- Keep shared crawl app TUI nomenclature aligned while `gitcrawl tui` remains the richer cluster-browser reference implementation. +- Keep the existing `gitcrawl tui` as the family reference terminal interface and add CI smoke coverage for its help surface. ## 0.1.2 - 2026-05-01 diff --git a/README.md b/README.md index a38c448..48cecad 100644 --- a/README.md +++ b/README.md @@ -91,4 +91,5 @@ go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(git de ```bash go test ./... go build ./cmd/gitcrawl +go run ./cmd/gitcrawl help tui ``` diff --git a/go.mod b/go.mod index 18e8e33..8fca390 100644 --- a/go.mod +++ b/go.mod @@ -8,8 +8,7 @@ require ( github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 github.com/charmbracelet/x/ansi v0.11.7 github.com/mattn/go-isatty v0.0.22 - github.com/pelletier/go-toml/v2 v2.3.1 - modernc.org/sqlite v1.50.0 + github.com/vincentkoc/crawlkit v0.4.0 ) require ( @@ -30,6 +29,7 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/pelletier/go-toml/v2 v2.3.1 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect @@ -38,4 +38,5 @@ require ( modernc.org/libc v1.72.1 // indirect modernc.org/mathutil v1.7.1 // indirect modernc.org/memory v1.11.0 // indirect + modernc.org/sqlite v1.50.0 // indirect ) diff --git a/go.sum b/go.sum index 04a525e..ebd0ef5 100644 --- a/go.sum +++ b/go.sum @@ -56,6 +56,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI= +github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= diff --git a/internal/cli/app.go b/internal/cli/app.go index a52099d..d9611d1 100644 --- a/internal/cli/app.go +++ b/internal/cli/app.go @@ -7,6 +7,7 @@ import ( "flag" "fmt" "io" + "log/slog" "os" "os/exec" "path/filepath" @@ -23,6 +24,7 @@ import ( "github.com/openclaw/gitcrawl/internal/store" "github.com/openclaw/gitcrawl/internal/syncer" "github.com/openclaw/gitcrawl/internal/vector" + "github.com/vincentkoc/crawlkit/control" ) const ( @@ -124,12 +126,16 @@ func (a *App) Run(ctx context.Context, args []string) error { switch rest[0] { case "version": return a.writeOutput("version", map[string]string{"version": version}, false) + case "metadata": + return a.runMetadata(rest[1:]) case "serve": return usageErr(fmt.Errorf("serve is not supported in gitcrawl")) case "init": return a.runInit(ctx, rest[1:]) case "doctor": return a.runDoctor(ctx, rest[1:]) + case "status": + return a.runStatus(ctx, rest[1:]) case "sync": return a.runSync(ctx, rest[1:]) case "threads": @@ -1077,23 +1083,35 @@ func (a *App) runTUI(ctx context.Context, args []string) error { rt, err = a.openLocalRuntimeReadOnly(ctx) } if err != nil { + if !interactive && errors.Is(err, os.ErrNotExist) { + cfg := config.Default() + if cfgErr := cfg.Normalize(); cfgErr != nil { + return cfgErr + } + sort, sortErr := resolveTUISort(*sortMode, cfg) + if sortErr != nil { + return sortErr + } + return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, cfg, cfg.DBPath, sort, minSize, limit, *hideClosed), true) + } return err } defer rt.Store.Close() repo, inferred, err := a.resolveOptionalRepository(ctx, rt, fs.Args()) if err != nil { + if !interactive && len(fs.Args()) == 0 && strings.Contains(err.Error(), "no local repositories found") { + sort, sortErr := resolveTUISort(*sortMode, rt.Config) + if sortErr != nil { + return sortErr + } + return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, rt.Config, rt.SourceDBPath, sort, minSize, limit, *hideClosed), true) + } return err } - sort := strings.TrimSpace(*sortMode) - if sort == "" { - sort = strings.TrimSpace(rt.Config.TUI.DefaultSort) - } - if sort == "" { - sort = "size" - } - if sort != "recent" && sort != "oldest" && sort != "size" { - return usageErr(fmt.Errorf("unsupported sort %q", sort)) + sort, err := resolveTUISort(*sortMode, rt.Config) + if err != nil { + return err } showClosed := !*hideClosed || *includeClosed @@ -1148,6 +1166,38 @@ func (a *App) runTUI(ctx context.Context, args []string) error { return a.runInteractiveTUI(ctx, rt.Store, repo.ID, payload) } +func resolveTUISort(raw string, cfg config.Config) (string, error) { + sort := strings.TrimSpace(raw) + if sort == "" { + sort = strings.TrimSpace(cfg.TUI.DefaultSort) + } + if sort == "" { + sort = "size" + } + if sort != "recent" && sort != "oldest" && sort != "size" { + return "", usageErr(fmt.Errorf("unsupported sort %q", sort)) + } + return sort, nil +} + +func emptyClusterBrowserPayload(ctx context.Context, cfg config.Config, sourceDBPath, sort string, minSize, limit int, hideClosed bool) clusterBrowserPayload { + if strings.TrimSpace(sourceDBPath) == "" { + sourceDBPath = cfg.DBPath + } + return clusterBrowserPayload{ + Mode: "cluster-browser", + DBSource: databaseSourceKind(sourceDBPath), + DBLocation: databaseSourceLocation(ctx, sourceDBPath), + Sort: sort, + MinSize: minSize, + Limit: limit, + HideClosed: hideClosed, + EmbedModel: cfg.OpenAI.EmbedModel, + EmbeddingBasis: cfg.EmbeddingBasis, + Clusters: []store.ClusterSummary{}, + } +} + func databaseSourceKind(dbPath string) string { if _, ok := portableStoreRoot(dbPath); ok { return "remote" @@ -1816,6 +1866,7 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy Reporter: func(message string) { fmt.Fprintln(a.Stderr, message) }, + Logger: progressLogger(a.Stderr), }) if err != nil { return syncer.Stats{}, err @@ -1823,6 +1874,17 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy return stats, nil } +func progressLogger(w io.Writer) *slog.Logger { + return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{ + ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { + if attr.Key == slog.TimeKey { + return slog.Attr{} + } + return attr + }, + })) +} + func (a *App) runInit(ctx context.Context, args []string) error { fs := flag.NewFlagSet("init", flag.ContinueOnError) fs.SetOutput(io.Discard) @@ -1887,6 +1949,8 @@ func (a *App) runPortable(ctx context.Context, args []string) error { return usageErr(fmt.Errorf("portable requires a subcommand")) } switch args[0] { + case "help", "--help", "-h": + return a.printCommandUsage("portable") case "prune": return a.runPortablePrune(ctx, args[1:]) default: @@ -2197,6 +2261,113 @@ func (a *App) runDoctor(ctx context.Context, args []string) error { }, true) } +func (a *App) runMetadata(args []string) error { + fs := flag.NewFlagSet("metadata", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "write JSON output") + if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil { + return usageErr(err) + } + a.applyCommandJSON(*jsonOut) + if fs.NArg() != 0 { + return usageErr(fmt.Errorf("metadata takes flags only")) + } + cfg := config.Default() + manifest := control.NewManifest("gitcrawl", "Git Crawl", "gitcrawl") + manifest.Description = "Local-first GitHub issue and pull request crawler." + manifest.Branding = control.Branding{SymbolName: "point.3.connected.trianglepath.dotted", AccentColor: "#2da44e"} + manifest.Paths = control.Paths{ + DefaultConfig: config.ResolvePath(""), + ConfigEnv: config.DefaultConfigEnv, + DefaultDatabase: cfg.DBPath, + DefaultCache: cfg.CacheDir, + DefaultLogs: cfg.LogDir, + } + manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "search", "tui", "portable", "clusters", "embeddings"} + manifest.Privacy = control.Privacy{ContainsPrivateMessages: false, ExportsSecrets: false, LocalOnlyScopes: []string{"github", "sqlite", "portable"}} + manifest.Commands = map[string]control.Command{ + "status": {Title: "Status", Argv: []string{"gitcrawl", "status", "--json"}, JSON: true}, + "doctor": {Title: "Doctor", Argv: []string{"gitcrawl", "doctor", "--json"}, JSON: true}, + "sync": {Title: "Sync repository", Argv: []string{"gitcrawl", "sync", "--json"}, JSON: true, Mutates: true}, + "search": {Title: "Search", Argv: []string{"gitcrawl", "search", "--json"}, JSON: true}, + "tui": {Title: "Terminal cluster browser", Argv: []string{"gitcrawl", "tui"}}, + "tui-json": {Title: "Terminal cluster data", Argv: []string{"gitcrawl", "tui", "--json"}, JSON: true}, + "portable": {Title: "Portable store tools", Argv: []string{"gitcrawl", "portable", "prune", "--json"}, JSON: true, Mutates: true}, + "clusters": {Title: "Clusters", Argv: []string{"gitcrawl", "clusters", "--json"}, JSON: true}, + "legacy-sync-api": {Title: "Legacy sync-status alias", Argv: []string{"gitcrawl", "sync-status"}, Legacy: true, Deprecated: true}, + } + return a.writeOutput("metadata", manifest, false) +} + +func (a *App) runStatus(ctx context.Context, args []string) error { + fs := flag.NewFlagSet("status", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "write JSON output") + if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil { + return usageErr(err) + } + a.applyCommandJSON(*jsonOut) + if fs.NArg() != 0 { + return usageErr(fmt.Errorf("status takes flags only")) + } + cfg, err := config.Load(a.configPath) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return err + } + cfg = config.Default() + if err := cfg.Normalize(); err != nil { + return err + } + } + status := store.Status{DBPath: cfg.DBPath} + if _, err := os.Stat(cfg.DBPath); err == nil { + st, err := store.OpenReadOnly(ctx, cfg.DBPath) + if err != nil { + return err + } + defer st.Close() + status, err = st.Status(ctx) + if err != nil { + return err + } + } else if !errors.Is(err, os.ErrNotExist) { + return err + } + status.DBPath = cfg.DBPath + return a.writeOutput("status", controlStatus(config.ResolvePath(a.configPath), cfg, status), false) +} + +func controlStatus(configPath string, cfg config.Config, status store.Status) control.Status { + counts := []control.Count{ + control.NewCount("repositories", "Repositories", int64(status.RepositoryCount)), + control.NewCount("threads", "Threads", int64(status.ThreadCount)), + control.NewCount("open_threads", "Open threads", int64(status.OpenThreadCount)), + control.NewCount("clusters", "Clusters", int64(status.ClusterCount)), + } + out := control.NewStatus("gitcrawl", fmt.Sprintf("%d threads across %d repositories", status.ThreadCount, status.RepositoryCount)) + out.State = "current" + out.ConfigPath = configPath + out.DatabasePath = status.DBPath + out.Counts = counts + if !status.LastSyncAt.IsZero() { + out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339) + } + db := control.SQLiteDatabase("primary", "GitHub archive", "archive", status.DBPath, true, counts) + out.DatabaseBytes = db.Bytes + out.WALBytes = fileSize(status.DBPath + "-wal") + out.Databases = []control.Database{db} + return out +} + +func fileSize(path string) int64 { + info, err := os.Stat(path) + if err != nil { + return 0 + } + return info.Size() +} + func (a *App) applyCommandJSON(enabled bool) { if enabled { a.format = FormatJSON @@ -2683,6 +2854,9 @@ func (a *App) printUsage() { func (a *App) printCommandUsage(command string) error { switch command { + case "portable": + fmt.Fprint(a.Stdout, portableUsageText) + return nil case "tui": fmt.Fprint(a.Stdout, tuiUsageText) return nil @@ -2704,6 +2878,8 @@ Global flags: --version print version Core commands: + metadata print crawlkit control metadata + status print fast read-only archive status init create config, optionally from a portable store doctor check config, token, and database readiness sync sync GitHub issue and pull request metadata @@ -2748,3 +2924,12 @@ Press n to load neighbors for the selected issue or PR. Enter from the members pane also loads neighbors before opening detail. The TUI quietly refreshes from the local store every 15 seconds and leaves the current status alone when nothing changed. ` + +const portableUsageText = `gitcrawl portable manages local portable-store snapshots. + +Usage: + gitcrawl portable prune [--body-chars N] [--no-vacuum] [--json] + +Subcommands: + prune prune volatile payloads from the configured portable store +` diff --git a/internal/cli/app_test.go b/internal/cli/app_test.go index e61349b..7935a92 100644 --- a/internal/cli/app_test.go +++ b/internal/cli/app_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "net/http" "net/http/httptest" @@ -1022,6 +1023,60 @@ func TestTUIInfersRepository(t *testing.T) { } } +func TestTUIJSONUsesDefaultsWhenConfigMissing(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + configPath := filepath.Join(dir, "missing.toml") + t.Setenv("GITCRAWL_DB_PATH", filepath.Join(dir, "missing.db")) + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil { + t.Fatalf("tui: %v", err) + } + var payload map[string]any + if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil { + t.Fatalf("decode tui payload: %v\n%s", err, stdout.String()) + } + if payload["mode"] != "cluster-browser" { + t.Fatalf("mode = %#v", payload["mode"]) + } + clusters, ok := payload["clusters"].([]any) + if !ok || len(clusters) != 0 { + t.Fatalf("clusters = %#v", payload["clusters"]) + } + if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("config file should not be created, stat err=%v", err) + } +} + +func TestTUIJSONHandlesEmptyStoreWithoutRepository(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + dbPath := filepath.Join(dir, "gitcrawl.db") + app := New() + if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil { + t.Fatalf("init: %v", err) + } + + run := New() + var stdout bytes.Buffer + run.Stdout = &stdout + if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil { + t.Fatalf("tui: %v", err) + } + var payload map[string]any + if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil { + t.Fatalf("decode tui payload: %v\n%s", err, stdout.String()) + } + clusters, ok := payload["clusters"].([]any) + if !ok || len(clusters) != 0 { + t.Fatalf("clusters = %#v", payload["clusters"]) + } +} + func TestTUIRequiresInteractiveTerminalByDefault(t *testing.T) { ctx := context.Background() dir := t.TempDir() diff --git a/internal/config/config.go b/internal/config/config.go index cd5db41..f8fd11d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -6,7 +6,7 @@ import ( "path/filepath" "strings" - "github.com/pelletier/go-toml/v2" + crawlconfig "github.com/vincentkoc/crawlkit/config" ) const ( @@ -49,15 +49,24 @@ type TokenResolution struct { Source string } +var appConfig = crawlconfig.App{Name: "gitcrawl", ConfigEnv: DefaultConfigEnv} + func Default() Config { - home := homeDir() - base := filepath.Join(home, ".config", "gitcrawl") + paths, err := appConfig.DefaultPaths() + if err != nil { + paths = crawlconfig.Paths{ + DBPath: filepath.Join(homeDir(), ".config", "gitcrawl", "gitcrawl.db"), + CacheDir: filepath.Join(homeDir(), ".config", "gitcrawl", "cache"), + LogDir: filepath.Join(homeDir(), ".config", "gitcrawl", "logs"), + } + } + base := filepath.Dir(paths.DBPath) return Config{ Version: 1, - DBPath: filepath.Join(base, "gitcrawl.db"), - CacheDir: filepath.Join(base, "cache"), + DBPath: paths.DBPath, + CacheDir: paths.CacheDir, VectorDir: filepath.Join(base, "vectors"), - LogDir: filepath.Join(base, "logs"), + LogDir: paths.LogDir, EmbeddingBasis: "title_original", GitHub: GitHubConfig{ TokenEnv: DefaultTokenEnv, @@ -77,26 +86,19 @@ func Default() Config { } func ResolvePath(flagPath string) string { - if strings.TrimSpace(flagPath) != "" { - return expandHome(flagPath) + path, err := appConfig.ResolveConfigPath(flagPath) + if err != nil { + return filepath.Join(homeDir(), ".config", "gitcrawl", "config.toml") } - if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" { - return expandHome(envPath) - } - home := homeDir() - return filepath.Join(home, ".config", "gitcrawl", "config.toml") + return path } func Load(path string) (Config, error) { cfg := Default() resolved := ResolvePath(path) - data, err := os.ReadFile(resolved) - if err != nil { + if err := crawlconfig.LoadTOML(resolved, &cfg); err != nil { return Config{}, err } - if err := toml.Unmarshal(data, &cfg); err != nil { - return Config{}, fmt.Errorf("parse config: %w", err) - } if err := cfg.Normalize(); err != nil { return Config{}, err } @@ -108,21 +110,19 @@ func Save(path string, cfg Config) error { return err } resolved := ResolvePath(path) - if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil { - return fmt.Errorf("create config dir: %w", err) - } - data, err := toml.Marshal(cfg) - if err != nil { - return fmt.Errorf("marshal config: %w", err) - } - return os.WriteFile(resolved, data, 0o600) + return crawlconfig.WriteTOML(resolved, cfg, 0o600) } func EnsureRuntimeDirs(cfg Config) error { - for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} { - if err := os.MkdirAll(expandHome(path), 0o755); err != nil { - return fmt.Errorf("create runtime dir %s: %w", path, err) - } + if err := crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{ + DBPath: cfg.DBPath, + CacheDir: cfg.CacheDir, + LogDir: cfg.LogDir, + }); err != nil { + return err + } + if err := os.MkdirAll(crawlconfig.ExpandHome(cfg.VectorDir), 0o755); err != nil { + return fmt.Errorf("create runtime dir %s: %w", cfg.VectorDir, err) } return nil } @@ -200,13 +200,7 @@ func envOrDefault(primary, fallback string) string { } func expandHome(path string) string { - if path == "~" { - return homeDir() - } - if strings.HasPrefix(path, "~/") { - return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/")) - } - return path + return crawlconfig.ExpandHome(path) } func homeDir() string { diff --git a/internal/store/store.go b/internal/store/store.go index 0c92cba..0d202d1 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -4,12 +4,9 @@ import ( "context" "database/sql" "fmt" - "os" - "path/filepath" - "runtime" "time" - _ "modernc.org/sqlite" + crawlstore "github.com/vincentkoc/crawlkit/store" ) const ( @@ -39,64 +36,33 @@ type Status struct { } func Open(ctx context.Context, path string) (*Store, error) { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return nil, fmt.Errorf("create db dir: %w", err) - } - if err := ensureDBFile(path); err != nil { - return nil, err - } - dsn := fmt.Sprintf( - "file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)", - path, - ) - db, err := sql.Open("sqlite", dsn) + base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path}) if err != nil { - return nil, fmt.Errorf("open sqlite: %w", err) - } - db.SetMaxOpenConns(1) - db.SetMaxIdleConns(1) - if err := db.PingContext(ctx); err != nil { - _ = db.Close() - return nil, fmt.Errorf("ping sqlite: %w", err) - } - if err := tightenDBFilePerms(path); err != nil { - _ = db.Close() return nil, err } + db := base.DB() st := &Store{db: db, path: path} if err := st.migrate(ctx); err != nil { - _ = db.Close() + _ = base.Close() return nil, err } return st, nil } func OpenReadOnly(ctx context.Context, path string) (*Store, error) { - if _, err := os.Stat(path); err != nil { - return nil, fmt.Errorf("stat db file: %w", err) - } - dsn := fmt.Sprintf( - "file:%s?mode=ro&_pragma=query_only(1)&_pragma=foreign_keys(1)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)", - path, - ) - db, err := sql.Open("sqlite", dsn) + base, err := crawlstore.OpenReadOnly(ctx, path) if err != nil { - return nil, fmt.Errorf("open sqlite readonly: %w", err) - } - db.SetMaxOpenConns(1) - db.SetMaxIdleConns(1) - if err := db.PingContext(ctx); err != nil { - _ = db.Close() - return nil, fmt.Errorf("ping sqlite readonly: %w", err) + return nil, err } + db := base.DB() st := &Store{db: db, path: path} current, err := st.schemaVersion(ctx) if err != nil { - _ = db.Close() + _ = base.Close() return nil, err } if current > schemaVersion { - _ = db.Close() + _ = base.Close() return nil, fmt.Errorf("database schema version %d is newer than supported version %d", current, schemaVersion) } return st, nil @@ -273,31 +239,3 @@ func (s *Store) schemaVersion(ctx context.Context) (int, error) { } return version, nil } - -func ensureDBFile(path string) error { - if _, err := os.Stat(path); err == nil { - return nil - } else if !os.IsNotExist(err) { - return fmt.Errorf("stat db file: %w", err) - } - file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) - if err != nil && !os.IsExist(err) { - return fmt.Errorf("create db file: %w", err) - } - if file != nil { - if err := file.Close(); err != nil { - return fmt.Errorf("close db file: %w", err) - } - } - return nil -} - -func tightenDBFilePerms(path string) error { - if runtime.GOOS == "windows" { - return nil - } - if err := os.Chmod(path, 0o600); err != nil { - return fmt.Errorf("chmod db file: %w", err) - } - return nil -} diff --git a/internal/syncer/syncer.go b/internal/syncer/syncer.go index bb47944..d41c8a3 100644 --- a/internal/syncer/syncer.go +++ b/internal/syncer/syncer.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "log/slog" "strconv" "strings" "time" @@ -13,6 +14,7 @@ import ( "github.com/openclaw/gitcrawl/internal/documents" gh "github.com/openclaw/gitcrawl/internal/github" "github.com/openclaw/gitcrawl/internal/store" + "github.com/vincentkoc/crawlkit/progress" ) type GitHubClient interface { @@ -45,6 +47,7 @@ type Options struct { IncludeComments bool IncludePRDetails bool Reporter gh.Reporter + Logger *slog.Logger } type Stats struct { @@ -132,6 +135,15 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) { MetadataOnly: !options.IncludeComments, StartedAt: started, } + tracker := progress.New(options.Logger, progress.Options{ + Name: "sync", + Unit: "threads", + Total: int64(len(rows)), + Attrs: []any{ + "repository", stats.Repository, + "state", state, + }, + }) persist := func(st *store.Store) error { for _, row := range rows { thread := mapIssueToThread(repoID, row, s.now().Format(time.RFC3339Nano)) @@ -169,6 +181,11 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) { } else { stats.IssuesSynced++ } + tracker.Add(1, + "number", thread.Number, + "kind", thread.Kind, + "thread_state", thread.State, + ) } if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 { closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since) @@ -193,13 +210,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) { } if !options.IncludeComments { if err := s.store.WithTx(ctx, persist); err != nil { + tracker.Finish(err) return Stats{}, err } + tracker.Finish(nil) return stats, nil } if err := persist(s.store); err != nil { + tracker.Finish(err) return Stats{}, err } + tracker.Finish(nil) return stats, nil } diff --git a/internal/syncer/syncer_test.go b/internal/syncer/syncer_test.go index 7a2a606..8ac8c85 100644 --- a/internal/syncer/syncer_test.go +++ b/internal/syncer/syncer_test.go @@ -1,9 +1,12 @@ package syncer import ( + "bytes" "context" "encoding/json" + "log/slog" "path/filepath" + "strings" "testing" "time" @@ -286,7 +289,13 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) { s := New(fakeGitHub{}, st) s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) } - stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", IncludeComments: true}) + var progressLogs bytes.Buffer + stats, err := s.Sync(ctx, Options{ + Owner: "openclaw", + Repo: "gitcrawl", + IncludeComments: true, + Logger: testProgressLogger(&progressLogs), + }) if err != nil { t.Fatalf("sync: %v", err) } @@ -321,6 +330,18 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) { if documentCount != 1 { t.Fatalf("document count: got %d want 1", documentCount) } + for _, want := range []string{ + `msg="sync progress"`, + `state=finished`, + `unit=threads`, + `percent=100.0`, + `completion=100.0%`, + `repository=openclaw/gitcrawl`, + } { + if !strings.Contains(progressLogs.String(), want) { + t.Fatalf("missing %q in progress logs:\n%s", want, progressLogs.String()) + } + } } func TestSyncHydratesPullReviewComments(t *testing.T) { @@ -681,3 +702,14 @@ func TestMappingFallbackBranches(t *testing.T) { t.Fatalf("thread = %+v", thread) } } + +func testProgressLogger(out *bytes.Buffer) *slog.Logger { + return slog.New(slog.NewTextHandler(out, &slog.HandlerOptions{ + ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { + if attr.Key == slog.TimeKey { + return slog.Attr{} + } + return attr + }, + })) +}