merge: use crawlkit infrastructure
* feat/use-crawlkit: (33 commits) fix(tui): allow empty json smoke chore(deps): use crawlkit v0.4.0 fix(tui): use compact-pane crawlkit fix(tui): pick up crawlkit renderer fix(sync): log thread progress percentages chore(deps): bump crawlkit to v0.3.13 chore(deps): bump crawlkit to v0.3.12 chore(deps): update crawlkit to v0.3.11 chore(deps): tidy crawlkit checksums chore(deps): update crawlkit to v0.3.10 chore(deps): tidy crawlkit checksum chore(deps): update crawlkit to v0.3.9 chore(deps): update crawlkit to v0.3.8 docs(changelog): note TUI alignment chore(deps): update crawlkit to v0.3.7 chore(deps): update crawlkit to v0.3.6 chore(deps): update crawlkit to v0.3.5 fix(tui): use crawlkit empty-json fix fix(tui): use crawlkit safe renderer fix(cli): document portable help ...
This commit is contained in:
commit
1ca61691c0
10
.github/workflows/ci.yml
vendored
10
.github/workflows/ci.yml
vendored
@ -64,6 +64,16 @@ jobs:
|
||||
- name: Build
|
||||
run: go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=${GITHUB_SHA:0:7}" -o bin/gitcrawl ./cmd/gitcrawl
|
||||
|
||||
- name: Smoke test TUI help
|
||||
run: |
|
||||
set -euo pipefail
|
||||
test -n "$(./bin/gitcrawl --version)"
|
||||
./bin/gitcrawl metadata --json | grep -q '"schema_version"'
|
||||
./bin/gitcrawl status --json | grep -q '"databases"'
|
||||
output="$(./bin/gitcrawl help tui)"
|
||||
printf '%s\n' "$output"
|
||||
printf '%s' "$output" | grep -q "gitcrawl tui"
|
||||
|
||||
- name: Snapshot release build
|
||||
uses: goreleaser/goreleaser-action@v7.1.0
|
||||
with:
|
||||
|
||||
@ -15,6 +15,11 @@
|
||||
- Auto-hydrate one exact pull request when local PR detail reads miss or check/run data is stale, using `gh auth token` if `GITHUB_TOKEN` is absent, then retry from SQLite before falling back to live `gh`.
|
||||
- Cache more ghx-style read-only fallthroughs, including release, workflow, secret, variable, project, ruleset, gist, org, and search reads; cache repeat read failures by default; and clear the fallthrough cache after the corresponding mutating `gh` commands.
|
||||
- Promote portable backups to the v2 format: keep compact comments, PR files, commits, checks, and workflow runs while stripping raw JSON, generated documents, vectors, clusters, and run history.
|
||||
- Add crawlkit control metadata/status surfaces with command-local `metadata --json`, `status --json`, and `doctor --json`.
|
||||
- Include the primary SQLite database inventory in status JSON so local control surfaces can discover archive storage without opening live stores.
|
||||
- Route config path handling and SQLite openers through `crawlkit` so GitHub archive tooling shares the same foundation as the Slack, Discord, and Notion crawlers.
|
||||
- Keep shared crawl app TUI nomenclature aligned while `gitcrawl tui` remains the richer cluster-browser reference implementation.
|
||||
- Keep the existing `gitcrawl tui` as the family reference terminal interface and add CI smoke coverage for its help surface.
|
||||
|
||||
## 0.1.2 - 2026-05-01
|
||||
|
||||
|
||||
@ -91,4 +91,5 @@ go build -ldflags "-X github.com/openclaw/gitcrawl/internal/cli.version=$(git de
|
||||
```bash
|
||||
go test ./...
|
||||
go build ./cmd/gitcrawl
|
||||
go run ./cmd/gitcrawl help tui
|
||||
```
|
||||
|
||||
5
go.mod
5
go.mod
@ -8,8 +8,7 @@ require (
|
||||
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
|
||||
github.com/charmbracelet/x/ansi v0.11.7
|
||||
github.com/mattn/go-isatty v0.0.22
|
||||
github.com/pelletier/go-toml/v2 v2.3.1
|
||||
modernc.org/sqlite v1.50.0
|
||||
github.com/vincentkoc/crawlkit v0.4.0
|
||||
)
|
||||
|
||||
require (
|
||||
@ -30,6 +29,7 @@ require (
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
@ -38,4 +38,5 @@ require (
|
||||
modernc.org/libc v1.72.1 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
modernc.org/sqlite v1.50.0 // indirect
|
||||
)
|
||||
|
||||
2
go.sum
2
go.sum
@ -56,6 +56,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI=
|
||||
github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@ -23,6 +24,7 @@ import (
|
||||
"github.com/openclaw/gitcrawl/internal/store"
|
||||
"github.com/openclaw/gitcrawl/internal/syncer"
|
||||
"github.com/openclaw/gitcrawl/internal/vector"
|
||||
"github.com/vincentkoc/crawlkit/control"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -124,12 +126,16 @@ func (a *App) Run(ctx context.Context, args []string) error {
|
||||
switch rest[0] {
|
||||
case "version":
|
||||
return a.writeOutput("version", map[string]string{"version": version}, false)
|
||||
case "metadata":
|
||||
return a.runMetadata(rest[1:])
|
||||
case "serve":
|
||||
return usageErr(fmt.Errorf("serve is not supported in gitcrawl"))
|
||||
case "init":
|
||||
return a.runInit(ctx, rest[1:])
|
||||
case "doctor":
|
||||
return a.runDoctor(ctx, rest[1:])
|
||||
case "status":
|
||||
return a.runStatus(ctx, rest[1:])
|
||||
case "sync":
|
||||
return a.runSync(ctx, rest[1:])
|
||||
case "threads":
|
||||
@ -1077,23 +1083,35 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
|
||||
rt, err = a.openLocalRuntimeReadOnly(ctx)
|
||||
}
|
||||
if err != nil {
|
||||
if !interactive && errors.Is(err, os.ErrNotExist) {
|
||||
cfg := config.Default()
|
||||
if cfgErr := cfg.Normalize(); cfgErr != nil {
|
||||
return cfgErr
|
||||
}
|
||||
sort, sortErr := resolveTUISort(*sortMode, cfg)
|
||||
if sortErr != nil {
|
||||
return sortErr
|
||||
}
|
||||
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, cfg, cfg.DBPath, sort, minSize, limit, *hideClosed), true)
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer rt.Store.Close()
|
||||
|
||||
repo, inferred, err := a.resolveOptionalRepository(ctx, rt, fs.Args())
|
||||
if err != nil {
|
||||
if !interactive && len(fs.Args()) == 0 && strings.Contains(err.Error(), "no local repositories found") {
|
||||
sort, sortErr := resolveTUISort(*sortMode, rt.Config)
|
||||
if sortErr != nil {
|
||||
return sortErr
|
||||
}
|
||||
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, rt.Config, rt.SourceDBPath, sort, minSize, limit, *hideClosed), true)
|
||||
}
|
||||
return err
|
||||
}
|
||||
sort := strings.TrimSpace(*sortMode)
|
||||
if sort == "" {
|
||||
sort = strings.TrimSpace(rt.Config.TUI.DefaultSort)
|
||||
}
|
||||
if sort == "" {
|
||||
sort = "size"
|
||||
}
|
||||
if sort != "recent" && sort != "oldest" && sort != "size" {
|
||||
return usageErr(fmt.Errorf("unsupported sort %q", sort))
|
||||
sort, err := resolveTUISort(*sortMode, rt.Config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
showClosed := !*hideClosed || *includeClosed
|
||||
|
||||
@ -1148,6 +1166,38 @@ func (a *App) runTUI(ctx context.Context, args []string) error {
|
||||
return a.runInteractiveTUI(ctx, rt.Store, repo.ID, payload)
|
||||
}
|
||||
|
||||
func resolveTUISort(raw string, cfg config.Config) (string, error) {
|
||||
sort := strings.TrimSpace(raw)
|
||||
if sort == "" {
|
||||
sort = strings.TrimSpace(cfg.TUI.DefaultSort)
|
||||
}
|
||||
if sort == "" {
|
||||
sort = "size"
|
||||
}
|
||||
if sort != "recent" && sort != "oldest" && sort != "size" {
|
||||
return "", usageErr(fmt.Errorf("unsupported sort %q", sort))
|
||||
}
|
||||
return sort, nil
|
||||
}
|
||||
|
||||
func emptyClusterBrowserPayload(ctx context.Context, cfg config.Config, sourceDBPath, sort string, minSize, limit int, hideClosed bool) clusterBrowserPayload {
|
||||
if strings.TrimSpace(sourceDBPath) == "" {
|
||||
sourceDBPath = cfg.DBPath
|
||||
}
|
||||
return clusterBrowserPayload{
|
||||
Mode: "cluster-browser",
|
||||
DBSource: databaseSourceKind(sourceDBPath),
|
||||
DBLocation: databaseSourceLocation(ctx, sourceDBPath),
|
||||
Sort: sort,
|
||||
MinSize: minSize,
|
||||
Limit: limit,
|
||||
HideClosed: hideClosed,
|
||||
EmbedModel: cfg.OpenAI.EmbedModel,
|
||||
EmbeddingBasis: cfg.EmbeddingBasis,
|
||||
Clusters: []store.ClusterSummary{},
|
||||
}
|
||||
}
|
||||
|
||||
func databaseSourceKind(dbPath string) string {
|
||||
if _, ok := portableStoreRoot(dbPath); ok {
|
||||
return "remote"
|
||||
@ -1816,6 +1866,7 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
|
||||
Reporter: func(message string) {
|
||||
fmt.Fprintln(a.Stderr, message)
|
||||
},
|
||||
Logger: progressLogger(a.Stderr),
|
||||
})
|
||||
if err != nil {
|
||||
return syncer.Stats{}, err
|
||||
@ -1823,6 +1874,17 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func progressLogger(w io.Writer) *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
|
||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
||||
if attr.Key == slog.TimeKey {
|
||||
return slog.Attr{}
|
||||
}
|
||||
return attr
|
||||
},
|
||||
}))
|
||||
}
|
||||
|
||||
func (a *App) runInit(ctx context.Context, args []string) error {
|
||||
fs := flag.NewFlagSet("init", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
@ -1887,6 +1949,8 @@ func (a *App) runPortable(ctx context.Context, args []string) error {
|
||||
return usageErr(fmt.Errorf("portable requires a subcommand"))
|
||||
}
|
||||
switch args[0] {
|
||||
case "help", "--help", "-h":
|
||||
return a.printCommandUsage("portable")
|
||||
case "prune":
|
||||
return a.runPortablePrune(ctx, args[1:])
|
||||
default:
|
||||
@ -2197,6 +2261,113 @@ func (a *App) runDoctor(ctx context.Context, args []string) error {
|
||||
}, true)
|
||||
}
|
||||
|
||||
func (a *App) runMetadata(args []string) error {
|
||||
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "write JSON output")
|
||||
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
a.applyCommandJSON(*jsonOut)
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(fmt.Errorf("metadata takes flags only"))
|
||||
}
|
||||
cfg := config.Default()
|
||||
manifest := control.NewManifest("gitcrawl", "Git Crawl", "gitcrawl")
|
||||
manifest.Description = "Local-first GitHub issue and pull request crawler."
|
||||
manifest.Branding = control.Branding{SymbolName: "point.3.connected.trianglepath.dotted", AccentColor: "#2da44e"}
|
||||
manifest.Paths = control.Paths{
|
||||
DefaultConfig: config.ResolvePath(""),
|
||||
ConfigEnv: config.DefaultConfigEnv,
|
||||
DefaultDatabase: cfg.DBPath,
|
||||
DefaultCache: cfg.CacheDir,
|
||||
DefaultLogs: cfg.LogDir,
|
||||
}
|
||||
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "search", "tui", "portable", "clusters", "embeddings"}
|
||||
manifest.Privacy = control.Privacy{ContainsPrivateMessages: false, ExportsSecrets: false, LocalOnlyScopes: []string{"github", "sqlite", "portable"}}
|
||||
manifest.Commands = map[string]control.Command{
|
||||
"status": {Title: "Status", Argv: []string{"gitcrawl", "status", "--json"}, JSON: true},
|
||||
"doctor": {Title: "Doctor", Argv: []string{"gitcrawl", "doctor", "--json"}, JSON: true},
|
||||
"sync": {Title: "Sync repository", Argv: []string{"gitcrawl", "sync", "--json"}, JSON: true, Mutates: true},
|
||||
"search": {Title: "Search", Argv: []string{"gitcrawl", "search", "--json"}, JSON: true},
|
||||
"tui": {Title: "Terminal cluster browser", Argv: []string{"gitcrawl", "tui"}},
|
||||
"tui-json": {Title: "Terminal cluster data", Argv: []string{"gitcrawl", "tui", "--json"}, JSON: true},
|
||||
"portable": {Title: "Portable store tools", Argv: []string{"gitcrawl", "portable", "prune", "--json"}, JSON: true, Mutates: true},
|
||||
"clusters": {Title: "Clusters", Argv: []string{"gitcrawl", "clusters", "--json"}, JSON: true},
|
||||
"legacy-sync-api": {Title: "Legacy sync-status alias", Argv: []string{"gitcrawl", "sync-status"}, Legacy: true, Deprecated: true},
|
||||
}
|
||||
return a.writeOutput("metadata", manifest, false)
|
||||
}
|
||||
|
||||
func (a *App) runStatus(ctx context.Context, args []string) error {
|
||||
fs := flag.NewFlagSet("status", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "write JSON output")
|
||||
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
a.applyCommandJSON(*jsonOut)
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(fmt.Errorf("status takes flags only"))
|
||||
}
|
||||
cfg, err := config.Load(a.configPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
cfg = config.Default()
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
status := store.Status{DBPath: cfg.DBPath}
|
||||
if _, err := os.Stat(cfg.DBPath); err == nil {
|
||||
st, err := store.OpenReadOnly(ctx, cfg.DBPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer st.Close()
|
||||
status, err = st.Status(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
status.DBPath = cfg.DBPath
|
||||
return a.writeOutput("status", controlStatus(config.ResolvePath(a.configPath), cfg, status), false)
|
||||
}
|
||||
|
||||
func controlStatus(configPath string, cfg config.Config, status store.Status) control.Status {
|
||||
counts := []control.Count{
|
||||
control.NewCount("repositories", "Repositories", int64(status.RepositoryCount)),
|
||||
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
|
||||
control.NewCount("open_threads", "Open threads", int64(status.OpenThreadCount)),
|
||||
control.NewCount("clusters", "Clusters", int64(status.ClusterCount)),
|
||||
}
|
||||
out := control.NewStatus("gitcrawl", fmt.Sprintf("%d threads across %d repositories", status.ThreadCount, status.RepositoryCount))
|
||||
out.State = "current"
|
||||
out.ConfigPath = configPath
|
||||
out.DatabasePath = status.DBPath
|
||||
out.Counts = counts
|
||||
if !status.LastSyncAt.IsZero() {
|
||||
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
|
||||
}
|
||||
db := control.SQLiteDatabase("primary", "GitHub archive", "archive", status.DBPath, true, counts)
|
||||
out.DatabaseBytes = db.Bytes
|
||||
out.WALBytes = fileSize(status.DBPath + "-wal")
|
||||
out.Databases = []control.Database{db}
|
||||
return out
|
||||
}
|
||||
|
||||
func fileSize(path string) int64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return info.Size()
|
||||
}
|
||||
|
||||
func (a *App) applyCommandJSON(enabled bool) {
|
||||
if enabled {
|
||||
a.format = FormatJSON
|
||||
@ -2683,6 +2854,9 @@ func (a *App) printUsage() {
|
||||
|
||||
func (a *App) printCommandUsage(command string) error {
|
||||
switch command {
|
||||
case "portable":
|
||||
fmt.Fprint(a.Stdout, portableUsageText)
|
||||
return nil
|
||||
case "tui":
|
||||
fmt.Fprint(a.Stdout, tuiUsageText)
|
||||
return nil
|
||||
@ -2704,6 +2878,8 @@ Global flags:
|
||||
--version print version
|
||||
|
||||
Core commands:
|
||||
metadata print crawlkit control metadata
|
||||
status print fast read-only archive status
|
||||
init create config, optionally from a portable store
|
||||
doctor check config, token, and database readiness
|
||||
sync sync GitHub issue and pull request metadata
|
||||
@ -2748,3 +2924,12 @@ Press n to load neighbors for the selected issue or PR.
|
||||
Enter from the members pane also loads neighbors before opening detail.
|
||||
The TUI quietly refreshes from the local store every 15 seconds and leaves the current status alone when nothing changed.
|
||||
`
|
||||
|
||||
const portableUsageText = `gitcrawl portable manages local portable-store snapshots.
|
||||
|
||||
Usage:
|
||||
gitcrawl portable prune [--body-chars N] [--no-vacuum] [--json]
|
||||
|
||||
Subcommands:
|
||||
prune prune volatile payloads from the configured portable store
|
||||
`
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@ -1022,6 +1023,60 @@ func TestTUIInfersRepository(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTUIJSONUsesDefaultsWhenConfigMissing(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
configPath := filepath.Join(dir, "missing.toml")
|
||||
t.Setenv("GITCRAWL_DB_PATH", filepath.Join(dir, "missing.db"))
|
||||
|
||||
run := New()
|
||||
var stdout bytes.Buffer
|
||||
run.Stdout = &stdout
|
||||
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
|
||||
t.Fatalf("tui: %v", err)
|
||||
}
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
|
||||
}
|
||||
if payload["mode"] != "cluster-browser" {
|
||||
t.Fatalf("mode = %#v", payload["mode"])
|
||||
}
|
||||
clusters, ok := payload["clusters"].([]any)
|
||||
if !ok || len(clusters) != 0 {
|
||||
t.Fatalf("clusters = %#v", payload["clusters"])
|
||||
}
|
||||
if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) {
|
||||
t.Fatalf("config file should not be created, stat err=%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTUIJSONHandlesEmptyStoreWithoutRepository(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
configPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "gitcrawl.db")
|
||||
app := New()
|
||||
if err := app.Run(ctx, []string{"--config", configPath, "init", "--db", dbPath}); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
|
||||
run := New()
|
||||
var stdout bytes.Buffer
|
||||
run.Stdout = &stdout
|
||||
if err := run.Run(ctx, []string{"--config", configPath, "tui", "--json"}); err != nil {
|
||||
t.Fatalf("tui: %v", err)
|
||||
}
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode tui payload: %v\n%s", err, stdout.String())
|
||||
}
|
||||
clusters, ok := payload["clusters"].([]any)
|
||||
if !ok || len(clusters) != 0 {
|
||||
t.Fatalf("clusters = %#v", payload["clusters"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestTUIRequiresInteractiveTerminalByDefault(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
|
||||
@ -6,7 +6,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
crawlconfig "github.com/vincentkoc/crawlkit/config"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -49,15 +49,24 @@ type TokenResolution struct {
|
||||
Source string
|
||||
}
|
||||
|
||||
var appConfig = crawlconfig.App{Name: "gitcrawl", ConfigEnv: DefaultConfigEnv}
|
||||
|
||||
func Default() Config {
|
||||
home := homeDir()
|
||||
base := filepath.Join(home, ".config", "gitcrawl")
|
||||
paths, err := appConfig.DefaultPaths()
|
||||
if err != nil {
|
||||
paths = crawlconfig.Paths{
|
||||
DBPath: filepath.Join(homeDir(), ".config", "gitcrawl", "gitcrawl.db"),
|
||||
CacheDir: filepath.Join(homeDir(), ".config", "gitcrawl", "cache"),
|
||||
LogDir: filepath.Join(homeDir(), ".config", "gitcrawl", "logs"),
|
||||
}
|
||||
}
|
||||
base := filepath.Dir(paths.DBPath)
|
||||
return Config{
|
||||
Version: 1,
|
||||
DBPath: filepath.Join(base, "gitcrawl.db"),
|
||||
CacheDir: filepath.Join(base, "cache"),
|
||||
DBPath: paths.DBPath,
|
||||
CacheDir: paths.CacheDir,
|
||||
VectorDir: filepath.Join(base, "vectors"),
|
||||
LogDir: filepath.Join(base, "logs"),
|
||||
LogDir: paths.LogDir,
|
||||
EmbeddingBasis: "title_original",
|
||||
GitHub: GitHubConfig{
|
||||
TokenEnv: DefaultTokenEnv,
|
||||
@ -77,26 +86,19 @@ func Default() Config {
|
||||
}
|
||||
|
||||
func ResolvePath(flagPath string) string {
|
||||
if strings.TrimSpace(flagPath) != "" {
|
||||
return expandHome(flagPath)
|
||||
path, err := appConfig.ResolveConfigPath(flagPath)
|
||||
if err != nil {
|
||||
return filepath.Join(homeDir(), ".config", "gitcrawl", "config.toml")
|
||||
}
|
||||
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
|
||||
return expandHome(envPath)
|
||||
}
|
||||
home := homeDir()
|
||||
return filepath.Join(home, ".config", "gitcrawl", "config.toml")
|
||||
return path
|
||||
}
|
||||
|
||||
func Load(path string) (Config, error) {
|
||||
cfg := Default()
|
||||
resolved := ResolvePath(path)
|
||||
data, err := os.ReadFile(resolved)
|
||||
if err != nil {
|
||||
if err := crawlconfig.LoadTOML(resolved, &cfg); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if err := toml.Unmarshal(data, &cfg); err != nil {
|
||||
return Config{}, fmt.Errorf("parse config: %w", err)
|
||||
}
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
@ -108,21 +110,19 @@ func Save(path string, cfg Config) error {
|
||||
return err
|
||||
}
|
||||
resolved := ResolvePath(path)
|
||||
if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil {
|
||||
return fmt.Errorf("create config dir: %w", err)
|
||||
}
|
||||
data, err := toml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal config: %w", err)
|
||||
}
|
||||
return os.WriteFile(resolved, data, 0o600)
|
||||
return crawlconfig.WriteTOML(resolved, cfg, 0o600)
|
||||
}
|
||||
|
||||
func EnsureRuntimeDirs(cfg Config) error {
|
||||
for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} {
|
||||
if err := os.MkdirAll(expandHome(path), 0o755); err != nil {
|
||||
return fmt.Errorf("create runtime dir %s: %w", path, err)
|
||||
}
|
||||
if err := crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
|
||||
DBPath: cfg.DBPath,
|
||||
CacheDir: cfg.CacheDir,
|
||||
LogDir: cfg.LogDir,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(crawlconfig.ExpandHome(cfg.VectorDir), 0o755); err != nil {
|
||||
return fmt.Errorf("create runtime dir %s: %w", cfg.VectorDir, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -200,13 +200,7 @@ func envOrDefault(primary, fallback string) string {
|
||||
}
|
||||
|
||||
func expandHome(path string) string {
|
||||
if path == "~" {
|
||||
return homeDir()
|
||||
}
|
||||
if strings.HasPrefix(path, "~/") {
|
||||
return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/"))
|
||||
}
|
||||
return path
|
||||
return crawlconfig.ExpandHome(path)
|
||||
}
|
||||
|
||||
func homeDir() string {
|
||||
|
||||
@ -4,12 +4,9 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
_ "modernc.org/sqlite"
|
||||
crawlstore "github.com/vincentkoc/crawlkit/store"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -39,64 +36,33 @@ type Status struct {
|
||||
}
|
||||
|
||||
func Open(ctx context.Context, path string) (*Store, error) {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return nil, fmt.Errorf("create db dir: %w", err)
|
||||
}
|
||||
if err := ensureDBFile(path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dsn := fmt.Sprintf(
|
||||
"file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
|
||||
path,
|
||||
)
|
||||
db, err := sql.Open("sqlite", dsn)
|
||||
base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open sqlite: %w", err)
|
||||
}
|
||||
db.SetMaxOpenConns(1)
|
||||
db.SetMaxIdleConns(1)
|
||||
if err := db.PingContext(ctx); err != nil {
|
||||
_ = db.Close()
|
||||
return nil, fmt.Errorf("ping sqlite: %w", err)
|
||||
}
|
||||
if err := tightenDBFilePerms(path); err != nil {
|
||||
_ = db.Close()
|
||||
return nil, err
|
||||
}
|
||||
db := base.DB()
|
||||
st := &Store{db: db, path: path}
|
||||
if err := st.migrate(ctx); err != nil {
|
||||
_ = db.Close()
|
||||
_ = base.Close()
|
||||
return nil, err
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return nil, fmt.Errorf("stat db file: %w", err)
|
||||
}
|
||||
dsn := fmt.Sprintf(
|
||||
"file:%s?mode=ro&_pragma=query_only(1)&_pragma=foreign_keys(1)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
|
||||
path,
|
||||
)
|
||||
db, err := sql.Open("sqlite", dsn)
|
||||
base, err := crawlstore.OpenReadOnly(ctx, path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open sqlite readonly: %w", err)
|
||||
}
|
||||
db.SetMaxOpenConns(1)
|
||||
db.SetMaxIdleConns(1)
|
||||
if err := db.PingContext(ctx); err != nil {
|
||||
_ = db.Close()
|
||||
return nil, fmt.Errorf("ping sqlite readonly: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
db := base.DB()
|
||||
st := &Store{db: db, path: path}
|
||||
current, err := st.schemaVersion(ctx)
|
||||
if err != nil {
|
||||
_ = db.Close()
|
||||
_ = base.Close()
|
||||
return nil, err
|
||||
}
|
||||
if current > schemaVersion {
|
||||
_ = db.Close()
|
||||
_ = base.Close()
|
||||
return nil, fmt.Errorf("database schema version %d is newer than supported version %d", current, schemaVersion)
|
||||
}
|
||||
return st, nil
|
||||
@ -273,31 +239,3 @@ func (s *Store) schemaVersion(ctx context.Context) (int, error) {
|
||||
}
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func ensureDBFile(path string) error {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
} else if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("stat db file: %w", err)
|
||||
}
|
||||
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("create db file: %w", err)
|
||||
}
|
||||
if file != nil {
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("close db file: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tightenDBFilePerms(path string) error {
|
||||
if runtime.GOOS == "windows" {
|
||||
return nil
|
||||
}
|
||||
if err := os.Chmod(path, 0o600); err != nil {
|
||||
return fmt.Errorf("chmod db file: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@ -13,6 +14,7 @@ import (
|
||||
"github.com/openclaw/gitcrawl/internal/documents"
|
||||
gh "github.com/openclaw/gitcrawl/internal/github"
|
||||
"github.com/openclaw/gitcrawl/internal/store"
|
||||
"github.com/vincentkoc/crawlkit/progress"
|
||||
)
|
||||
|
||||
type GitHubClient interface {
|
||||
@ -45,6 +47,7 @@ type Options struct {
|
||||
IncludeComments bool
|
||||
IncludePRDetails bool
|
||||
Reporter gh.Reporter
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
@ -132,6 +135,15 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
MetadataOnly: !options.IncludeComments,
|
||||
StartedAt: started,
|
||||
}
|
||||
tracker := progress.New(options.Logger, progress.Options{
|
||||
Name: "sync",
|
||||
Unit: "threads",
|
||||
Total: int64(len(rows)),
|
||||
Attrs: []any{
|
||||
"repository", stats.Repository,
|
||||
"state", state,
|
||||
},
|
||||
})
|
||||
persist := func(st *store.Store) error {
|
||||
for _, row := range rows {
|
||||
thread := mapIssueToThread(repoID, row, s.now().Format(time.RFC3339Nano))
|
||||
@ -169,6 +181,11 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
} else {
|
||||
stats.IssuesSynced++
|
||||
}
|
||||
tracker.Add(1,
|
||||
"number", thread.Number,
|
||||
"kind", thread.Kind,
|
||||
"thread_state", thread.State,
|
||||
)
|
||||
}
|
||||
if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 {
|
||||
closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since)
|
||||
@ -193,13 +210,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
|
||||
}
|
||||
if !options.IncludeComments {
|
||||
if err := s.store.WithTx(ctx, persist); err != nil {
|
||||
tracker.Finish(err)
|
||||
return Stats{}, err
|
||||
}
|
||||
tracker.Finish(nil)
|
||||
return stats, nil
|
||||
}
|
||||
if err := persist(s.store); err != nil {
|
||||
tracker.Finish(err)
|
||||
return Stats{}, err
|
||||
}
|
||||
tracker.Finish(nil)
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
package syncer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -286,7 +289,13 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
|
||||
|
||||
s := New(fakeGitHub{}, st)
|
||||
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
|
||||
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", IncludeComments: true})
|
||||
var progressLogs bytes.Buffer
|
||||
stats, err := s.Sync(ctx, Options{
|
||||
Owner: "openclaw",
|
||||
Repo: "gitcrawl",
|
||||
IncludeComments: true,
|
||||
Logger: testProgressLogger(&progressLogs),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("sync: %v", err)
|
||||
}
|
||||
@ -321,6 +330,18 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
|
||||
if documentCount != 1 {
|
||||
t.Fatalf("document count: got %d want 1", documentCount)
|
||||
}
|
||||
for _, want := range []string{
|
||||
`msg="sync progress"`,
|
||||
`state=finished`,
|
||||
`unit=threads`,
|
||||
`percent=100.0`,
|
||||
`completion=100.0%`,
|
||||
`repository=openclaw/gitcrawl`,
|
||||
} {
|
||||
if !strings.Contains(progressLogs.String(), want) {
|
||||
t.Fatalf("missing %q in progress logs:\n%s", want, progressLogs.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyncHydratesPullReviewComments(t *testing.T) {
|
||||
@ -681,3 +702,14 @@ func TestMappingFallbackBranches(t *testing.T) {
|
||||
t.Fatalf("thread = %+v", thread)
|
||||
}
|
||||
}
|
||||
|
||||
func testProgressLogger(out *bytes.Buffer) *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(out, &slog.HandlerOptions{
|
||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
||||
if attr.Key == slog.TimeKey {
|
||||
return slog.Attr{}
|
||||
}
|
||||
return attr
|
||||
},
|
||||
}))
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user