Compare commits

...

33 Commits

Author SHA1 Message Date
Vincent Koc
f760523ca0
fix(tui): allow empty json smoke 2026-05-05 02:23:27 -07:00
Vincent Koc
401bda3ec1
chore(deps): use crawlkit v0.4.0 2026-05-05 02:13:35 -07:00
Vincent Koc
79503f59b2
fix(tui): use compact-pane crawlkit 2026-05-05 02:13:35 -07:00
Vincent Koc
521b9f2387
fix(tui): pick up crawlkit renderer 2026-05-05 02:13:34 -07:00
Vincent Koc
01f3824eb8
fix(sync): log thread progress percentages 2026-05-05 02:13:34 -07:00
Vincent Koc
c83a05b9ec
chore(deps): bump crawlkit to v0.3.13 2026-05-05 02:13:34 -07:00
Vincent Koc
730fe9b074
chore(deps): bump crawlkit to v0.3.12 2026-05-05 02:13:34 -07:00
Vincent Koc
da0b6ecfef
chore(deps): update crawlkit to v0.3.11 2026-05-05 02:13:33 -07:00
Vincent Koc
41840d5bd4
chore(deps): tidy crawlkit checksums 2026-05-05 02:13:33 -07:00
Vincent Koc
8d47036601
chore(deps): update crawlkit to v0.3.10 2026-05-05 02:13:33 -07:00
Vincent Koc
a58178ff45
chore(deps): tidy crawlkit checksum 2026-05-05 02:13:33 -07:00
Vincent Koc
7d32e2ae75
chore(deps): update crawlkit to v0.3.9 2026-05-05 02:13:33 -07:00
Vincent Koc
d39276d981
chore(deps): update crawlkit to v0.3.8 2026-05-05 02:13:33 -07:00
Vincent Koc
5624cb90da
docs(changelog): note TUI alignment 2026-05-05 02:13:32 -07:00
Vincent Koc
3db6f84b6a
chore(deps): update crawlkit to v0.3.7 2026-05-05 02:13:32 -07:00
Vincent Koc
b55b8dae75
chore(deps): update crawlkit to v0.3.6 2026-05-05 02:13:32 -07:00
Vincent Koc
5bcba00bb5
chore(deps): update crawlkit to v0.3.5 2026-05-05 02:13:32 -07:00
Vincent Koc
e7e7fd53b9
fix(tui): use crawlkit empty-json fix 2026-05-05 02:13:32 -07:00
Vincent Koc
64cd17269a
fix(tui): use crawlkit safe renderer 2026-05-05 02:13:32 -07:00
Vincent Koc
906b653750
fix(cli): document portable help 2026-05-05 02:13:31 -07:00
Vincent Koc
ed4e868633
chore(deps): tidy crawlkit module sums 2026-05-05 02:13:31 -07:00
Vincent Koc
82e5c818e5
ci: smoke crawlkit control surface 2026-05-05 02:13:31 -07:00
Vincent Koc
ce983c9160
feat(cli): add crawlkit control surface 2026-05-05 02:13:31 -07:00
Vincent Koc
a41cf423b7
chore: bump crawlkit to v0.3.1 2026-05-05 02:13:31 -07:00
Vincent Koc
606105119d
chore: tidy crawlkit module sums 2026-05-05 02:13:30 -07:00
Vincent Koc
4a7c270c8e
refactor: use crawlkit package nouns 2026-05-05 02:13:30 -07:00
Vincent Koc
9c8691e2eb
chore: use crawlkit v0.2.0 2026-05-05 02:13:30 -07:00
Vincent Koc
066486d9b5
docs(tui): mark gitcrawl as browser reference 2026-05-05 02:13:29 -07:00
Vincent Koc
fa71f1a0b2
chore: use crawlkit v0.1.1 2026-05-05 02:13:29 -07:00
Vincent Koc
2d149fa8ea
chore: use crawlkit v0.1.0 2026-05-05 02:13:29 -07:00
Vincent Koc
c0e0e537c9
refactor(store): use crawlkit sqlite openers 2026-05-05 02:13:29 -07:00
Vincent Koc
7112da8675
refactor(config): route paths through crawlkit 2026-05-05 02:13:28 -07:00
Vincent Koc
73a0d3ea5b
chore: add crawlkit module dependency 2026-05-05 02:13:28 -07:00
11 changed files with 364 additions and 120 deletions

View File

@ -60,6 +60,16 @@ jobs:
- name: Build
run: go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=${GITHUB_SHA:0:7}" -o bin/gitcrawl ./cmd/gitcrawl
- name: Smoke test TUI help
run: |
set -euo pipefail
test -n "$(./bin/gitcrawl --version)"
./bin/gitcrawl metadata --json | grep -q '"schema_version"'
./bin/gitcrawl status --json | grep -q '"databases"'
output="$(./bin/gitcrawl help tui)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "gitcrawl tui"
- name: Snapshot release build
uses: goreleaser/goreleaser-action@v7.1.0
with:

View File

@ -13,6 +13,11 @@
- Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`.
- Cache more ghx-style read-only fallthroughs, including release, workflow, secret, variable, project, ruleset, gist, org, and search reads; cache repeat read failures by default; and clear the fallthrough cache after the corresponding mutating `gh` commands.
- Promote portable backups to the v2 format: keep compact comments, PR files, commits, checks, and workflow runs while stripping raw JSON, generated documents, vectors, clusters, and run history.
- Add crawlkit control metadata/status surfaces with command-local `metadata --json`, `status --json`, and `doctor --json`.
- Include the primary SQLite database inventory in status JSON so local control surfaces can discover archive storage without opening live stores.
- Route config path handling and SQLite openers through `crawlkit` so GitHub archive tooling shares the same foundation as the Slack, Discord, and Notion crawlers.
- Keep shared crawl app TUI nomenclature aligned while `gitcrawl tui` remains the richer cluster-browser reference implementation.
- Keep the existing `gitcrawl tui` as the family reference terminal interface and add CI smoke coverage for its help surface.
## 0.1.2 - 2026-05-01

View File

@ -91,4 +91,5 @@ go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(git de
```bash
go test ./...
go build ./cmd/gitcrawl
go run ./cmd/gitcrawl help tui
```

5
go.mod
View File

@ -8,8 +8,7 @@ require (
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
github.com/charmbracelet/x/ansi v0.11.7
github.com/mattn/go-isatty v0.0.22
github.com/pelletier/go-toml/v2 v2.3.1
modernc.org/sqlite v1.50.0
github.com/vincentkoc/crawlkit v0.4.0
)
require (
@ -30,6 +29,7 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
@ -38,4 +38,5 @@ require (
modernc.org/libc v1.72.1 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.11.0 // indirect
modernc.org/sqlite v1.50.0 // indirect
)

2
go.sum
View File

@ -56,6 +56,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI=
github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=

View File

@ -7,6 +7,7 @@ import (
"flag"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
@ -23,6 +24,7 @@ import (
"github.com/openclaw/gitcrawl/internal/store"
"github.com/openclaw/gitcrawl/internal/syncer"
"github.com/openclaw/gitcrawl/internal/vector"
"github.com/vincentkoc/crawlkit/control"
)
const (
@ -124,12 +126,16 @@ func (a *App) Run(ctx context.Context, args []string) error {
switch rest[0] {
case "version":
return a.writeOutput("version", map[string]string{"version": version}, false)
case "metadata":
return a.runMetadata(rest[1:])
case "serve":
return usageErr(fmt.Errorf("serve is not supported in gitcrawl"))
case "init":
return a.runInit(ctx, rest[1:])
case "doctor":
return a.runDoctor(ctx, rest[1:])
case "status":
return a.runStatus(ctx, rest[1:])
case "sync":
return a.runSync(ctx, rest[1:])
case "threads":
@ -1077,23 +1083,35 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
rt, err = a.openLocalRuntimeReadOnly(ctx)
}
if err != nil {
if !interactive && errors.Is(err, os.ErrNotExist) {
cfg := config.Default()
if cfgErr := cfg.Normalize(); cfgErr != nil {
return cfgErr
}
sort, sortErr := resolveTUISort(*sortMode, cfg)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, cfg, cfg.DBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
defer rt.Store.Close()
repo, inferred, err := a.resolveOptionalRepository(ctx, rt, fs.Args())
if err != nil {
if !interactive && len(fs.Args()) == 0 && strings.Contains(err.Error(), "no local repositories found") {
sort, sortErr := resolveTUISort(*sortMode, rt.Config)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, rt.Config, rt.SourceDBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
sort := strings.TrimSpace(*sortMode)
if sort == "" {
sort = strings.TrimSpace(rt.Config.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return usageErr(fmt.Errorf("unsupported sort %q", sort))
sort, err := resolveTUISort(*sortMode, rt.Config)
if err != nil {
return err
}
showClosed := !*hideClosed || *includeClosed
@ -1148,6 +1166,38 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
return a.runInteractiveTUI(ctx, rt.Store, repo.ID, payload)
}
func resolveTUISort(raw string, cfg config.Config) (string, error) {
sort := strings.TrimSpace(raw)
if sort == "" {
sort = strings.TrimSpace(cfg.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return "", usageErr(fmt.Errorf("unsupported sort %q", sort))
}
return sort, nil
}
func emptyClusterBrowserPayload(ctx context.Context, cfg config.Config, sourceDBPath, sort string, minSize, limit int, hideClosed bool) clusterBrowserPayload {
if strings.TrimSpace(sourceDBPath) == "" {
sourceDBPath = cfg.DBPath
}
return clusterBrowserPayload{
Mode: "cluster-browser",
DBSource: databaseSourceKind(sourceDBPath),
DBLocation: databaseSourceLocation(ctx, sourceDBPath),
Sort: sort,
MinSize: minSize,
Limit: limit,
HideClosed: hideClosed,
EmbedModel: cfg.OpenAI.EmbedModel,
EmbeddingBasis: cfg.EmbeddingBasis,
Clusters: []store.ClusterSummary{},
}
}
func databaseSourceKind(dbPath string) string {
if _, ok := portableStoreRoot(dbPath); ok {
return "remote"
@ -1816,6 +1866,7 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
Reporter: func(message string) {
fmt.Fprintln(a.Stderr, message)
},
Logger: progressLogger(a.Stderr),
})
if err != nil {
return syncer.Stats{}, err
@ -1823,6 +1874,17 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
return stats, nil
}
func progressLogger(w io.Writer) *slog.Logger {
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}
func (a *App) runInit(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("init", flag.ContinueOnError)
fs.SetOutput(io.Discard)
@ -1887,6 +1949,8 @@ func (a *App) runPortable(ctx context.Context, args []string) error {
return usageErr(fmt.Errorf("portable requires a subcommand"))
}
switch args[0] {
case "help", "--help", "-h":
return a.printCommandUsage("portable")
case "prune":
return a.runPortablePrune(ctx, args[1:])
default:
@ -2197,6 +2261,113 @@ func (a *App) runDoctor(ctx context.Context, args []string) error {
}, true)
}
func (a *App) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("metadata takes flags only"))
}
cfg := config.Default()
manifest := control.NewManifest("gitcrawl", "Git Crawl", "gitcrawl")
manifest.Description = "Local-first GitHub issue and pull request crawler."
manifest.Branding = control.Branding{SymbolName: "point.3.connected.trianglepath.dotted", AccentColor: "#2da44e"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "search", "tui", "portable", "clusters", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: false, ExportsSecrets: false, LocalOnlyScopes: []string{"github", "sqlite", "portable"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"gitcrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"gitcrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync repository", Argv: []string{"gitcrawl", "sync", "--json"}, JSON: true, Mutates: true},
"search": {Title: "Search", Argv: []string{"gitcrawl", "search", "--json"}, JSON: true},
"tui": {Title: "Terminal cluster browser", Argv: []string{"gitcrawl", "tui"}},
"tui-json": {Title: "Terminal cluster data", Argv: []string{"gitcrawl", "tui", "--json"}, JSON: true},
"portable": {Title: "Portable store tools", Argv: []string{"gitcrawl", "portable", "prune", "--json"}, JSON: true, Mutates: true},
"clusters": {Title: "Clusters", Argv: []string{"gitcrawl", "clusters", "--json"}, JSON: true},
"legacy-sync-api": {Title: "Legacy sync-status alias", Argv: []string{"gitcrawl", "sync-status"}, Legacy: true, Deprecated: true},
}
return a.writeOutput("metadata", manifest, false)
}
func (a *App) runStatus(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("status takes flags only"))
}
cfg, err := config.Load(a.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return err
}
}
status := store.Status{DBPath: cfg.DBPath}
if _, err := os.Stat(cfg.DBPath); err == nil {
st, err := store.OpenReadOnly(ctx, cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
status, err = st.Status(ctx)
if err != nil {
return err
}
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
status.DBPath = cfg.DBPath
return a.writeOutput("status", controlStatus(config.ResolvePath(a.configPath), cfg, status), false)
}
func controlStatus(configPath string, cfg config.Config, status store.Status) control.Status {
counts := []control.Count{
control.NewCount("repositories", "Repositories", int64(status.RepositoryCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("open_threads", "Open threads", int64(status.OpenThreadCount)),
control.NewCount("clusters", "Clusters", int64(status.ClusterCount)),
}
out := control.NewStatus("gitcrawl", fmt.Sprintf("%d threads across %d repositories", status.ThreadCount, status.RepositoryCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "GitHub archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}
func (a *App) applyCommandJSON(enabled bool) {
if enabled {
a.format = FormatJSON
@ -2683,6 +2854,9 @@ func (a *App) printUsage() {
func (a *App) printCommandUsage(command string) error {
switch command {
case "portable":
fmt.Fprint(a.Stdout, portableUsageText)
return nil
case "tui":
fmt.Fprint(a.Stdout, tuiUsageText)
return nil
@ -2704,6 +2878,8 @@ Global flags:
--version print version
Core commands:
metadata print crawlkit control metadata
status print fast read-only archive status
init create config, optionally from a portable store
doctor check config, token, and database readiness
sync sync GitHub issue and pull request metadata
@ -2748,3 +2924,12 @@ Press n to load neighbors for the selected issue or PR.
Enter from the members pane also loads neighbors before opening detail.
The TUI quietly refreshes from the local store every 15 seconds and leaves the current status alone when nothing changed.
`
const portableUsageText = `gitcrawl portable manages local portable-store snapshots.
Usage:
gitcrawl portable prune [--body-chars N] [--no-vacuum] [--json]
Subcommands:
prune prune volatile payloads from the configured portable store
`

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
@ -1022,6 +1023,60 @@ func TestTUIInfersRepository(t *testing.T) {
}
}
func TestTUIJSONUsesDefaultsWhenConfigMissing(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "missing.toml")
t.Setenv("GITCRAWL_DB_PATH", filepath.Join(dir, "missing.db"))
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
t.Fatalf("tui: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
}
if payload["mode"] != "cluster-browser" {
t.Fatalf("mode = %#v", payload["mode"])
}
clusters, ok := payload["clusters"].([]any)
if !ok || len(clusters) != 0 {
t.Fatalf("clusters = %#v", payload["clusters"])
}
if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) {
t.Fatalf("config file should not be created, stat err=%v", err)
}
}
func TestTUIJSONHandlesEmptyStoreWithoutRepository(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "gitcrawl.db")
app := New()
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
t.Fatalf("init: %v", err)
}
run := New()
var stdout bytes.Buffer
run.Stdout = &stdout
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
t.Fatalf("tui: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
}
clusters, ok := payload["clusters"].([]any)
if !ok || len(clusters) != 0 {
t.Fatalf("clusters = %#v", payload["clusters"])
}
}
func TestTUIRequiresInteractiveTerminalByDefault(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()

View File

@ -6,7 +6,7 @@ import (
"path/filepath"
"strings"
"github.com/pelletier/go-toml/v2"
crawlconfig "github.com/vincentkoc/crawlkit/config"
)
const (
@ -49,15 +49,24 @@ type TokenResolution struct {
Source string
}
var appConfig = crawlconfig.App{Name: "gitcrawl", ConfigEnv: DefaultConfigEnv}
func Default() Config {
home := homeDir()
base := filepath.Join(home, ".config", "gitcrawl")
paths, err := appConfig.DefaultPaths()
if err != nil {
paths = crawlconfig.Paths{
DBPath: filepath.Join(homeDir(), ".config", "gitcrawl", "gitcrawl.db"),
CacheDir: filepath.Join(homeDir(), ".config", "gitcrawl", "cache"),
LogDir: filepath.Join(homeDir(), ".config", "gitcrawl", "logs"),
}
}
base := filepath.Dir(paths.DBPath)
return Config{
Version: 1,
DBPath: filepath.Join(base, "gitcrawl.db"),
CacheDir: filepath.Join(base, "cache"),
DBPath: paths.DBPath,
CacheDir: paths.CacheDir,
VectorDir: filepath.Join(base, "vectors"),
LogDir: filepath.Join(base, "logs"),
LogDir: paths.LogDir,
EmbeddingBasis: "title_original",
GitHub: GitHubConfig{
TokenEnv: DefaultTokenEnv,
@ -77,26 +86,19 @@ func Default() Config {
}
func ResolvePath(flagPath string) string {
if strings.TrimSpace(flagPath) != "" {
return expandHome(flagPath)
path, err := appConfig.ResolveConfigPath(flagPath)
if err != nil {
return filepath.Join(homeDir(), ".config", "gitcrawl", "config.toml")
}
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
return expandHome(envPath)
}
home := homeDir()
return filepath.Join(home, ".config", "gitcrawl", "config.toml")
return path
}
func Load(path string) (Config, error) {
cfg := Default()
resolved := ResolvePath(path)
data, err := os.ReadFile(resolved)
if err != nil {
if err := crawlconfig.LoadTOML(resolved, &cfg); err != nil {
return Config{}, err
}
if err := toml.Unmarshal(data, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Normalize(); err != nil {
return Config{}, err
}
@ -108,21 +110,19 @@ func Save(path string, cfg Config) error {
return err
}
resolved := ResolvePath(path)
if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil {
return fmt.Errorf("create config dir: %w", err)
}
data, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("marshal config: %w", err)
}
return os.WriteFile(resolved, data, 0o600)
return crawlconfig.WriteTOML(resolved, cfg, 0o600)
}
func EnsureRuntimeDirs(cfg Config) error {
for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} {
if err := os.MkdirAll(expandHome(path), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", path, err)
}
if err := crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
DBPath: cfg.DBPath,
CacheDir: cfg.CacheDir,
LogDir: cfg.LogDir,
}); err != nil {
return err
}
if err := os.MkdirAll(crawlconfig.ExpandHome(cfg.VectorDir), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", cfg.VectorDir, err)
}
return nil
}
@ -200,13 +200,7 @@ func envOrDefault(primary, fallback string) string {
}
func expandHome(path string) string {
if path == "~" {
return homeDir()
}
if strings.HasPrefix(path, "~/") {
return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/"))
}
return path
return crawlconfig.ExpandHome(path)
}
func homeDir() string {

View File

@ -4,12 +4,9 @@ import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"runtime"
"time"
_ "modernc.org/sqlite"
crawlstore "github.com/vincentkoc/crawlkit/store"
)
const (
@ -39,64 +36,33 @@ type Status struct {
}
func Open(ctx context.Context, path string) (*Store, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("create db dir: %w", err)
}
if err := ensureDBFile(path); err != nil {
return nil, err
}
dsn := fmt.Sprintf(
"file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path})
if err != nil {
return nil, fmt.Errorf("open sqlite: %w", err)
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite: %w", err)
}
if err := tightenDBFilePerms(path); err != nil {
_ = db.Close()
return nil, err
}
db := base.DB()
st := &Store{db: db, path: path}
if err := st.migrate(ctx); err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
return st, nil
}
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
if _, err := os.Stat(path); err != nil {
return nil, fmt.Errorf("stat db file: %w", err)
}
dsn := fmt.Sprintf(
"file:%s?mode=ro&_pragma=query_only(1)&_pragma=foreign_keys(1)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.OpenReadOnly(ctx, path)
if err != nil {
return nil, fmt.Errorf("open sqlite readonly: %w", err)
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite readonly: %w", err)
return nil, err
}
db := base.DB()
st := &Store{db: db, path: path}
current, err := st.schemaVersion(ctx)
if err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
if current > schemaVersion {
_ = db.Close()
_ = base.Close()
return nil, fmt.Errorf("database schema version %d is newer than supported version %d", current, schemaVersion)
}
return st, nil
@ -273,31 +239,3 @@ func (s *Store) schemaVersion(ctx context.Context) (int, error) {
}
return version, nil
}
func ensureDBFile(path string) error {
if _, err := os.Stat(path); err == nil {
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat db file: %w", err)
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("create db file: %w", err)
}
if file != nil {
if err := file.Close(); err != nil {
return fmt.Errorf("close db file: %w", err)
}
}
return nil
}
func tightenDBFilePerms(path string) error {
if runtime.GOOS == "windows" {
return nil
}
if err := os.Chmod(path, 0o600); err != nil {
return fmt.Errorf("chmod db file: %w", err)
}
return nil
}

View File

@ -6,6 +6,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"log/slog"
"strconv"
"strings"
"time"
@ -13,6 +14,7 @@ import (
"github.com/openclaw/gitcrawl/internal/documents"
gh "github.com/openclaw/gitcrawl/internal/github"
"github.com/openclaw/gitcrawl/internal/store"
"github.com/vincentkoc/crawlkit/progress"
)
type GitHubClient interface {
@ -45,6 +47,7 @@ type Options struct {
IncludeComments bool
IncludePRDetails bool
Reporter gh.Reporter
Logger *slog.Logger
}
type Stats struct {
@ -132,6 +135,15 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
MetadataOnly: !options.IncludeComments,
StartedAt: started,
}
tracker := progress.New(options.Logger, progress.Options{
Name: "sync",
Unit: "threads",
Total: int64(len(rows)),
Attrs: []any{
"repository", stats.Repository,
"state", state,
},
})
persist := func(st *store.Store) error {
for _, row := range rows {
thread := mapIssueToThread(repoID, row, s.now().Format(time.RFC3339Nano))
@ -169,6 +181,11 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
} else {
stats.IssuesSynced++
}
tracker.Add(1,
"number", thread.Number,
"kind", thread.Kind,
"thread_state", thread.State,
)
}
if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 {
closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since)
@ -193,13 +210,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
}
if !options.IncludeComments {
if err := s.store.WithTx(ctx, persist); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}
if err := persist(s.store); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}

View File

@ -1,9 +1,12 @@
package syncer
import (
"bytes"
"context"
"encoding/json"
"log/slog"
"path/filepath"
"strings"
"testing"
"time"
@ -286,7 +289,13 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
s := New(fakeGitHub{}, st)
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", IncludeComments: true})
var progressLogs bytes.Buffer
stats, err := s.Sync(ctx, Options{
Owner: "openclaw",
Repo: "gitcrawl",
IncludeComments: true,
Logger: testProgressLogger(&progressLogs),
})
if err != nil {
t.Fatalf("sync: %v", err)
}
@ -321,6 +330,18 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
if documentCount != 1 {
t.Fatalf("document count: got %d want 1", documentCount)
}
for _, want := range []string{
`msg="sync progress"`,
`state=finished`,
`unit=threads`,
`percent=100.0`,
`completion=100.0%`,
`repository=openclaw/gitcrawl`,
} {
if !strings.Contains(progressLogs.String(), want) {
t.Fatalf("missing %q in progress logs:\n%s", want, progressLogs.String())
}
}
}
func TestSyncHydratesPullReviewComments(t *testing.T) {
@ -644,3 +665,14 @@ func TestMappingHelperBranches(t *testing.T) {
t.Fatalf("comment = %+v", comment)
}
}
func testProgressLogger(out *bytes.Buffer) *slog.Logger {
return slog.New(slog.NewTextHandler(out, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}