merge: use crawlkit infrastructure

* feat/use-crawlkit: (33 commits)
  fix(tui): allow empty json smoke
  chore(deps): use crawlkit v0.4.0
  fix(tui): use compact-pane crawlkit
  fix(tui): pick up crawlkit renderer
  fix(sync): log thread progress percentages
  chore(deps): bump crawlkit to v0.3.13
  chore(deps): bump crawlkit to v0.3.12
  chore(deps): update crawlkit to v0.3.11
  chore(deps): tidy crawlkit checksums
  chore(deps): update crawlkit to v0.3.10
  chore(deps): tidy crawlkit checksum
  chore(deps): update crawlkit to v0.3.9
  chore(deps): update crawlkit to v0.3.8
  docs(changelog): note TUI alignment
  chore(deps): update crawlkit to v0.3.7
  chore(deps): update crawlkit to v0.3.6
  chore(deps): update crawlkit to v0.3.5
  fix(tui): use crawlkit empty-json fix
  fix(tui): use crawlkit safe renderer
  fix(cli): document portable help
  ...
This commit is contained in:
Vincent Koc 2026-05-05 18:20:49 -07:00
commit 1ca61691c0
No known key found for this signature in database
11 changed files with 364 additions and 120 deletions

View File

@ -64,6 +64,16 @@ jobs:
- name: Build
run: go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=${GITHUB_SHA:0:7}" -o bin/gitcrawl ./cmd/gitcrawl
- name: Smoke test TUI help
run: |
set -euo pipefail
test -n "$(./bin/gitcrawl --version)"
./bin/gitcrawl metadata --json | grep -q '"schema_version"'
./bin/gitcrawl status --json | grep -q '"databases"'
output="$(./bin/gitcrawl help tui)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "gitcrawl tui"
- name: Snapshot release build
uses: goreleaser/goreleaser-action@v7.1.0
with:

View File

@ -15,6 +15,11 @@
- Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`.
- Cache more ghx-style read-only fallthroughs, including release, workflow, secret, variable, project, ruleset, gist, org, and search reads; cache repeat read failures by default; and clear the fallthrough cache after the corresponding mutating `gh` commands.
- Promote portable backups to the v2 format: keep compact comments, PR files, commits, checks, and workflow runs while stripping raw JSON, generated documents, vectors, clusters, and run history.
- Add crawlkit control metadata/status surfaces with command-local `metadata --json`, `status --json`, and `doctor --json`.
- Include the primary SQLite database inventory in status JSON so local control surfaces can discover archive storage without opening live stores.
- Route config path handling and SQLite openers through `crawlkit` so GitHub archive tooling shares the same foundation as the Slack, Discord, and Notion crawlers.
- Keep shared crawl app TUI nomenclature aligned while `gitcrawl tui` remains the richer cluster-browser reference implementation.
- Keep the existing `gitcrawl tui` as the family reference terminal interface and add CI smoke coverage for its help surface.
## 0.1.2 - 2026-05-01

View File

@ -91,4 +91,5 @@ go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(git de
```bash
go test ./...
go build ./cmd/gitcrawl
go run ./cmd/gitcrawl help tui
```

5
go.mod
View File

@ -8,8 +8,7 @@ require (
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
github.com/charmbracelet/x/ansi v0.11.7
github.com/mattn/go-isatty v0.0.22
github.com/pelletier/go-toml/v2 v2.3.1
modernc.org/sqlite v1.50.0
github.com/vincentkoc/crawlkit v0.4.0
)
require (
@ -30,6 +29,7 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
@ -38,4 +38,5 @@ require (
modernc.org/libc v1.72.1 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.11.0 // indirect
modernc.org/sqlite v1.50.0 // indirect
)

2
go.sum
View File

@ -56,6 +56,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI=
github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=

View File

@ -7,6 +7,7 @@ import (
"flag"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
@ -23,6 +24,7 @@ import (
"github.com/openclaw/gitcrawl/internal/store"
"github.com/openclaw/gitcrawl/internal/syncer"
"github.com/openclaw/gitcrawl/internal/vector"
"github.com/vincentkoc/crawlkit/control"
)
const (
@ -124,12 +126,16 @@ func (a *App) Run(ctx context.Context, args []string) error {
switch rest[0] {
case "version":
return a.writeOutput("version", map[string]string{"version": version}, false)
case "metadata":
return a.runMetadata(rest[1:])
case "serve":
return usageErr(fmt.Errorf("serve is not supported in gitcrawl"))
case "init":
return a.runInit(ctx, rest[1:])
case "doctor":
return a.runDoctor(ctx, rest[1:])
case "status":
return a.runStatus(ctx, rest[1:])
case "sync":
return a.runSync(ctx, rest[1:])
case "threads":
@ -1077,23 +1083,35 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
rt, err = a.openLocalRuntimeReadOnly(ctx)
}
if err != nil {
if !interactive && errors.Is(err, os.ErrNotExist) {
cfg := config.Default()
if cfgErr := cfg.Normalize(); cfgErr != nil {
return cfgErr
}
sort, sortErr := resolveTUISort(*sortMode, cfg)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, cfg, cfg.DBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
defer rt.Store.Close()
repo, inferred, err := a.resolveOptionalRepository(ctx, rt, fs.Args())
if err != nil {
if !interactive && len(fs.Args()) == 0 && strings.Contains(err.Error(), "no local repositories found") {
sort, sortErr := resolveTUISort(*sortMode, rt.Config)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, rt.Config, rt.SourceDBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
sort := strings.TrimSpace(*sortMode)
if sort == "" {
sort = strings.TrimSpace(rt.Config.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return usageErr(fmt.Errorf("unsupported sort %q", sort))
sort, err := resolveTUISort(*sortMode, rt.Config)
if err != nil {
return err
}
showClosed := !*hideClosed || *includeClosed
@ -1148,6 +1166,38 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
return a.runInteractiveTUI(ctx, rt.Store, repo.ID, payload)
}
func resolveTUISort(raw string, cfg config.Config) (string, error) {
sort := strings.TrimSpace(raw)
if sort == "" {
sort = strings.TrimSpace(cfg.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return "", usageErr(fmt.Errorf("unsupported sort %q", sort))
}
return sort, nil
}
func emptyClusterBrowserPayload(ctx context.Context, cfg config.Config, sourceDBPath, sort string, minSize, limit int, hideClosed bool) clusterBrowserPayload {
if strings.TrimSpace(sourceDBPath) == "" {
sourceDBPath = cfg.DBPath
}
return clusterBrowserPayload{
Mode: "cluster-browser",
DBSource: databaseSourceKind(sourceDBPath),
DBLocation: databaseSourceLocation(ctx, sourceDBPath),
Sort: sort,
MinSize: minSize,
Limit: limit,
HideClosed: hideClosed,
EmbedModel: cfg.OpenAI.EmbedModel,
EmbeddingBasis: cfg.EmbeddingBasis,
Clusters: []store.ClusterSummary{},
}
}
func databaseSourceKind(dbPath string) string {
if _, ok := portableStoreRoot(dbPath); ok {
return "remote"
@ -1816,6 +1866,7 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
Reporter: func(message string) {
fmt.Fprintln(a.Stderr, message)
},
Logger: progressLogger(a.Stderr),
})
if err != nil {
return syncer.Stats{}, err
@ -1823,6 +1874,17 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
return stats, nil
}
func progressLogger(w io.Writer) *slog.Logger {
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}
func (a *App) runInit(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("init", flag.ContinueOnError)
fs.SetOutput(io.Discard)
@ -1887,6 +1949,8 @@ func (a *App) runPortable(ctx context.Context, args []string) error {
return usageErr(fmt.Errorf("portable requires a subcommand"))
}
switch args[0] {
case "help", "--help", "-h":
return a.printCommandUsage("portable")
case "prune":
return a.runPortablePrune(ctx, args[1:])
default:
@ -2197,6 +2261,113 @@ func (a *App) runDoctor(ctx context.Context, args []string) error {
}, true)
}
func (a *App) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("metadata takes flags only"))
}
cfg := config.Default()
manifest := control.NewManifest("gitcrawl", "Git Crawl", "gitcrawl")
manifest.Description = "Local-first GitHub issue and pull request crawler."
manifest.Branding = control.Branding{SymbolName: "point.3.connected.trianglepath.dotted", AccentColor: "#2da44e"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "search", "tui", "portable", "clusters", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: false, ExportsSecrets: false, LocalOnlyScopes: []string{"github", "sqlite", "portable"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"gitcrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"gitcrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync repository", Argv: []string{"gitcrawl", "sync", "--json"}, JSON: true, Mutates: true},
"search": {Title: "Search", Argv: []string{"gitcrawl", "search", "--json"}, JSON: true},
"tui": {Title: "Terminal cluster browser", Argv: []string{"gitcrawl", "tui"}},
"tui-json": {Title: "Terminal cluster data", Argv: []string{"gitcrawl", "tui", "--json"}, JSON: true},
"portable": {Title: "Portable store tools", Argv: []string{"gitcrawl", "portable", "prune", "--json"}, JSON: true, Mutates: true},
"clusters": {Title: "Clusters", Argv: []string{"gitcrawl", "clusters", "--json"}, JSON: true},
"legacy-sync-api": {Title: "Legacy sync-status alias", Argv: []string{"gitcrawl", "sync-status"}, Legacy: true, Deprecated: true},
}
return a.writeOutput("metadata", manifest, false)
}
func (a *App) runStatus(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("status takes flags only"))
}
cfg, err := config.Load(a.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return err
}
}
status := store.Status{DBPath: cfg.DBPath}
if _, err := os.Stat(cfg.DBPath); err == nil {
st, err := store.OpenReadOnly(ctx, cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
status, err = st.Status(ctx)
if err != nil {
return err
}
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
status.DBPath = cfg.DBPath
return a.writeOutput("status", controlStatus(config.ResolvePath(a.configPath), cfg, status), false)
}
func controlStatus(configPath string, cfg config.Config, status store.Status) control.Status {
counts := []control.Count{
control.NewCount("repositories", "Repositories", int64(status.RepositoryCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("open_threads", "Open threads", int64(status.OpenThreadCount)),
control.NewCount("clusters", "Clusters", int64(status.ClusterCount)),
}
out := control.NewStatus("gitcrawl", fmt.Sprintf("%d threads across %d repositories", status.ThreadCount, status.RepositoryCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "GitHub archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}
func (a *App) applyCommandJSON(enabled bool) {
if enabled {
a.format = FormatJSON
@ -2683,6 +2854,9 @@ func (a *App) printUsage() {
func (a *App) printCommandUsage(command string) error {
switch command {
case "portable":
fmt.Fprint(a.Stdout, portableUsageText)
return nil
case "tui":
fmt.Fprint(a.Stdout, tuiUsageText)
return nil
@ -2704,6 +2878,8 @@ Global flags:
--version print version
Core commands:
metadata print crawlkit control metadata
status print fast read-only archive status
init create config, optionally from a portable store
doctor check config, token, and database readiness
sync sync GitHub issue and pull request metadata
@ -2748,3 +2924,12 @@ Press n to load neighbors for the selected issue or PR.
Enter from the members pane also loads neighbors before opening detail.
The TUI quietly refreshes from the local store every 15 seconds and leaves the current status alone when nothing changed.
`
const portableUsageText = `gitcrawl portable manages local portable-store snapshots.
Usage:
gitcrawl portable prune [--body-chars N] [--no-vacuum] [--json]
Subcommands:
prune prune volatile payloads from the configured portable store
`

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
@ -1022,6 +1023,60 @@ func TestTUIInfersRepository(t *testing.T) {
}
}
func TestTUIJSONUsesDefaultsWhenConfigMissing(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "missing.toml")
t.Setenv("GITCRAWL_DB_PATH", filepath.Join(dir, "missing.db"))
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
t.Fatalf("tui: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
}
if payload["mode"] != "cluster-browser" {
t.Fatalf("mode = %#v", payload["mode"])
}
clusters, ok := payload["clusters"].([]any)
if !ok || len(clusters) != 0 {
t.Fatalf("clusters = %#v", payload["clusters"])
}
if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) {
t.Fatalf("config file should not be created, stat err=%v", err)
}
}
func TestTUIJSONHandlesEmptyStoreWithoutRepository(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
t.Fatalf("tui: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
}
clusters, ok := payload["clusters"].([]any)
if !ok || len(clusters) != 0 {
t.Fatalf("clusters = %#v", payload["clusters"])
}
}
func TestTUIRequiresInteractiveTerminalByDefault(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()

View File

@ -6,7 +6,7 @@ import (
"path/filepath"
"strings"
"github.com/pelletier/go-toml/v2"
crawlconfig "github.com/vincentkoc/crawlkit/config"
)
const (
@ -49,15 +49,24 @@ type TokenResolution struct {
Source string
}
var appConfig = crawlconfig.App{Name: "gitcrawl", ConfigEnv: DefaultConfigEnv}
func Default() Config {
home := homeDir()
base := filepath.Join(home, ".config", "gitcrawl")
paths, err := appConfig.DefaultPaths()
if err != nil {
paths = crawlconfig.Paths{
DBPath: filepath.Join(homeDir(), ".config", "gitcrawl", "gitcrawl.db"),
CacheDir: filepath.Join(homeDir(), ".config", "gitcrawl", "cache"),
LogDir: filepath.Join(homeDir(), ".config", "gitcrawl", "logs"),
}
}
base := filepath.Dir(paths.DBPath)
return Config{
Version: 1,
DBPath: filepath.Join(base, "gitcrawl.db"),
CacheDir: filepath.Join(base, "cache"),
DBPath: paths.DBPath,
CacheDir: paths.CacheDir,
VectorDir: filepath.Join(base, "vectors"),
LogDir: filepath.Join(base, "logs"),
LogDir: paths.LogDir,
EmbeddingBasis: "title_original",
GitHub: GitHubConfig{
TokenEnv: DefaultTokenEnv,
@ -77,26 +86,19 @@ func Default() Config {
}
func ResolvePath(flagPath string) string {
if strings.TrimSpace(flagPath) != "" {
return expandHome(flagPath)
path, err := appConfig.ResolveConfigPath(flagPath)
if err != nil {
return filepath.Join(homeDir(), ".config", "gitcrawl", "config.toml")
}
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
return expandHome(envPath)
}
home := homeDir()
return filepath.Join(home, ".config", "gitcrawl", "config.toml")
return path
}
func Load(path string) (Config, error) {
cfg := Default()
resolved := ResolvePath(path)
data, err := os.ReadFile(resolved)
if err != nil {
if err := crawlconfig.LoadTOML(resolved, &cfg); err != nil {
return Config{}, err
}
if err := toml.Unmarshal(data, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Normalize(); err != nil {
return Config{}, err
}
@ -108,21 +110,19 @@ func Save(path string, cfg Config) error {
return err
}
resolved := ResolvePath(path)
if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil {
return fmt.Errorf("create config dir: %w", err)
}
data, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("marshal config: %w", err)
}
return os.WriteFile(resolved, data, 0o600)
return crawlconfig.WriteTOML(resolved, cfg, 0o600)
}
func EnsureRuntimeDirs(cfg Config) error {
for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} {
if err := os.MkdirAll(expandHome(path), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", path, err)
}
if err := crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
DBPath: cfg.DBPath,
CacheDir: cfg.CacheDir,
LogDir: cfg.LogDir,
}); err != nil {
return err
}
if err := os.MkdirAll(crawlconfig.ExpandHome(cfg.VectorDir), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", cfg.VectorDir, err)
}
return nil
}
@ -200,13 +200,7 @@ func envOrDefault(primary, fallback string) string {
}
func expandHome(path string) string {
if path == "~" {
return homeDir()
}
if strings.HasPrefix(path, "~/") {
return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/"))
}
return path
return crawlconfig.ExpandHome(path)
}
func homeDir() string {

View File

@ -4,12 +4,9 @@ import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"runtime"
"time"
_ "modernc.org/sqlite"
crawlstore "github.com/vincentkoc/crawlkit/store"
)
const (
@ -39,64 +36,33 @@ type Status struct {
}
func Open(ctx context.Context, path string) (*Store, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("create db dir: %w", err)
}
if err := ensureDBFile(path); err != nil {
return nil, err
}
dsn := fmt.Sprintf(
"file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path})
if err != nil {
return nil, fmt.Errorf("open sqlite: %w", err)
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite: %w", err)
}
if err := tightenDBFilePerms(path); err != nil {
_ = db.Close()
return nil, err
}
db := base.DB()
st := &Store{db: db, path: path}
if err := st.migrate(ctx); err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
return st, nil
}
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
if _, err := os.Stat(path); err != nil {
return nil, fmt.Errorf("stat db file: %w", err)
}
dsn := fmt.Sprintf(
"file:%s?mode=ro&_pragma=query_only(1)&_pragma=foreign_keys(1)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.OpenReadOnly(ctx, path)
if err != nil {
return nil, fmt.Errorf("open sqlite readonly: %w", err)
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite readonly: %w", err)
return nil, err
}
db := base.DB()
st := &Store{db: db, path: path}
current, err := st.schemaVersion(ctx)
if err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
if current > schemaVersion {
_ = db.Close()
_ = base.Close()
return nil, fmt.Errorf("database schema version %d is newer than supported version %d", current, schemaVersion)
}
return st, nil
@ -273,31 +239,3 @@ func (s *Store) schemaVersion(ctx context.Context) (int, error) {
}
return version, nil
}
func ensureDBFile(path string) error {
if _, err := os.Stat(path); err == nil {
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat db file: %w", err)
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("create db file: %w", err)
}
if file != nil {
if err := file.Close(); err != nil {
return fmt.Errorf("close db file: %w", err)
}
}
return nil
}
func tightenDBFilePerms(path string) error {
if runtime.GOOS == "windows" {
return nil
}
if err := os.Chmod(path, 0o600); err != nil {
return fmt.Errorf("chmod db file: %w", err)
}
return nil
}

View File

@ -6,6 +6,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"log/slog"
"strconv"
"strings"
"time"
@ -13,6 +14,7 @@ import (
"github.com/openclaw/gitcrawl/internal/documents"
gh "github.com/openclaw/gitcrawl/internal/github"
"github.com/openclaw/gitcrawl/internal/store"
"github.com/vincentkoc/crawlkit/progress"
)
type GitHubClient interface {
@ -45,6 +47,7 @@ type Options struct {
IncludeComments bool
IncludePRDetails bool
Reporter gh.Reporter
Logger *slog.Logger
}
type Stats struct {
@ -132,6 +135,15 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
MetadataOnly: !options.IncludeComments,
StartedAt: started,
}
tracker := progress.New(options.Logger, progress.Options{
Name: "sync",
Unit: "threads",
Total: int64(len(rows)),
Attrs: []any{
"repository", stats.Repository,
"state", state,
},
})
persist := func(st *store.Store) error {
for _, row := range rows {
thread := mapIssueToThread(repoID, row, s.now().Format(time.RFC3339Nano))
@ -169,6 +181,11 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
} else {
stats.IssuesSynced++
}
tracker.Add(1,
"number", thread.Number,
"kind", thread.Kind,
"thread_state", thread.State,
)
}
if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 {
closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since)
@ -193,13 +210,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
}
if !options.IncludeComments {
if err := s.store.WithTx(ctx, persist); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}
if err := persist(s.store); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}

View File

@ -1,9 +1,12 @@
package syncer
import (
"bytes"
"context"
"encoding/json"
"log/slog"
"path/filepath"
"strings"
"testing"
"time"
@ -286,7 +289,13 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
s := New(fakeGitHub{}, st)
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", IncludeComments: true})
var progressLogs bytes.Buffer
stats, err := s.Sync(ctx, Options{
Owner: "openclaw",
Repo: "gitcrawl",
IncludeComments: true,
Logger: testProgressLogger(&progressLogs),
})
if err != nil {
t.Fatalf("sync: %v", err)
}
@ -321,6 +330,18 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
if documentCount != 1 {
t.Fatalf("document count: got %d want 1", documentCount)
}
for _, want := range []string{
`msg="sync progress"`,
`state=finished`,
`unit=threads`,
`percent=100.0`,
`completion=100.0%`,
`repository=openclaw/gitcrawl`,
} {
if !strings.Contains(progressLogs.String(), want) {
t.Fatalf("missing %q in progress logs:\n%s", want, progressLogs.String())
}
}
}
func TestSyncHydratesPullReviewComments(t *testing.T) {
@ -681,3 +702,14 @@ func TestMappingFallbackBranches(t *testing.T) {
t.Fatalf("thread = %+v", thread)
}
}
func testProgressLogger(out *bytes.Buffer) *slog.Logger {
return slog.New(slog.NewTextHandler(out, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}