feat(mirror): add safer share repo helpers

This commit is contained in:
Vincent Koc 2026-05-05 17:18:53 -07:00
parent 59c0033fc7
commit 8cda2498b2
No known key found for this signature in database
3 changed files with 285 additions and 5 deletions

View File

@ -5,6 +5,8 @@
- Initial `crawlkit` module scaffold.
- Document the `crawlkit` versus crawl-app boundary for embeddings, search,
inference, sync state, snapshots, SQLite, and git mirrors.
- Add safer `mirror` helpers for origin updates, existing-origin pulls,
path-scoped commits, and portable SQLite sidecar cleanup.
- Add `tui`, a shared Bubble Tea terminal archive browser used by the crawl apps for consistent `tui` command behavior.
- Improve `tui` rows with compact column rendering, pane-specific scrolling, and full-height pane borders.
- Tune `tui` pane colors and mouse-wheel buffering to better match the `gitcrawl` terminal browser feel.

View File

@ -23,6 +23,9 @@ func EnsureRepo(ctx context.Context, opts Options) error {
return errors.New("repo path is required")
}
if _, err := os.Stat(filepath.Join(opts.RepoPath, ".git")); err == nil {
if opts.Remote != "" {
return setOrigin(ctx, opts)
}
return nil
}
if opts.Remote != "" {
@ -49,6 +52,17 @@ func EnsureRepo(ctx context.Context, opts Options) error {
return nil
}
func EnsureRemote(ctx context.Context, opts Options) error {
opts = normalize(opts)
if opts.Remote == "" {
return errors.New("remote is required")
}
if err := EnsureRepo(ctx, opts); err != nil {
return err
}
return setOrigin(ctx, opts)
}
func Pull(ctx context.Context, opts Options) error {
opts = normalize(opts)
if opts.Remote == "" {
@ -67,19 +81,51 @@ func Pull(ctx context.Context, opts Options) error {
return run(ctx, opts.RepoPath, opts.Git, "checkout", "-B", opts.Branch, "origin/"+opts.Branch)
}
func PullCurrent(ctx context.Context, opts Options) error {
opts = normalize(opts)
if opts.Remote != "" {
return Pull(ctx, opts)
}
if err := EnsureRepo(ctx, opts); err != nil {
return err
}
if err := run(ctx, opts.RepoPath, opts.Git, "fetch", "--prune", "origin"); err != nil {
return err
}
if _, err := output(ctx, opts.RepoPath, opts.Git, "rev-parse", "--verify", "refs/heads/"+opts.Branch); err != nil {
return run(ctx, opts.RepoPath, opts.Git, "checkout", "-B", opts.Branch, "origin/"+opts.Branch)
}
if err := run(ctx, opts.RepoPath, opts.Git, "checkout", opts.Branch); err != nil {
return err
}
return run(ctx, opts.RepoPath, opts.Git, "pull", "--ff-only", "origin", opts.Branch)
}
func Commit(ctx context.Context, opts Options, message string) (bool, error) {
return CommitPaths(ctx, opts, message, []string{"."})
}
func CommitPaths(ctx context.Context, opts Options, message string, paths []string) (bool, error) {
opts = normalize(opts)
if message == "" {
message = "archive: update snapshot"
}
if err := run(ctx, opts.RepoPath, opts.Git, "add", "."); err != nil {
return false, err
}
dirty, err := Dirty(ctx, opts)
pathspecs, err := cleanPathspecs(paths)
if err != nil {
return false, err
}
if !dirty {
if len(pathspecs) == 0 {
return false, nil
}
args := append([]string{"add", "--"}, pathspecs...)
if err := run(ctx, opts.RepoPath, opts.Git, args...); err != nil {
return false, err
}
staged, err := staged(ctx, opts)
if err != nil {
return false, err
}
if !staged {
return false, nil
}
if err := run(ctx, opts.RepoPath, opts.Git,
@ -117,6 +163,37 @@ func Dirty(ctx context.Context, opts Options) (bool, error) {
return strings.TrimSpace(out) != "", nil
}
func CleanSQLiteSidecars(rootDir string) (int, error) {
rootDir = strings.TrimSpace(rootDir)
if rootDir == "" {
return 0, errors.New("root dir is required")
}
count := 0
err := filepath.WalkDir(rootDir, func(path string, entry os.DirEntry, err error) error {
if err != nil {
return err
}
if entry.IsDir() {
if entry.Name() == ".git" {
return filepath.SkipDir
}
return nil
}
if !isSQLiteSidecar(path) {
return nil
}
if err := os.Remove(path); err != nil {
return fmt.Errorf("remove sqlite sidecar %s: %w", path, err)
}
count++
return nil
})
if err != nil {
return count, fmt.Errorf("clean sqlite sidecars: %w", err)
}
return count, nil
}
func normalize(opts Options) Options {
opts.RepoPath = strings.TrimSpace(opts.RepoPath)
opts.Remote = strings.TrimSpace(opts.Remote)
@ -131,6 +208,63 @@ func normalize(opts Options) Options {
return opts
}
func setOrigin(ctx context.Context, opts Options) error {
current, err := output(ctx, opts.RepoPath, opts.Git, "remote", "get-url", "origin")
if err != nil {
return run(ctx, opts.RepoPath, opts.Git, "remote", "add", "origin", opts.Remote)
}
if strings.TrimSpace(current) == opts.Remote {
return nil
}
return run(ctx, opts.RepoPath, opts.Git, "remote", "set-url", "origin", opts.Remote)
}
func cleanPathspecs(paths []string) ([]string, error) {
var out []string
for _, path := range paths {
path = strings.TrimSpace(path)
if path == "" {
continue
}
if filepath.IsAbs(path) {
return nil, fmt.Errorf("commit path %q must be relative", path)
}
clean := filepath.Clean(path)
if clean == "." {
out = append(out, ".")
continue
}
if clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) {
return nil, fmt.Errorf("commit path %q must stay inside the repo", path)
}
out = append(out, filepath.ToSlash(clean))
}
return out, nil
}
func staged(ctx context.Context, opts Options) (bool, error) {
opts = normalize(opts)
out, err := output(ctx, opts.RepoPath, opts.Git, "diff", "--cached", "--quiet")
if err == nil {
return false, nil
}
var exitErr *exec.ExitError
if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
return true, nil
}
return false, fmt.Errorf("git diff --cached --quiet: %w\n%s", err, strings.TrimSpace(out))
}
func isSQLiteSidecar(path string) bool {
name := filepath.Base(path)
return strings.HasSuffix(name, ".db-wal") ||
strings.HasSuffix(name, ".db-shm") ||
strings.HasSuffix(name, ".sqlite-wal") ||
strings.HasSuffix(name, ".sqlite-shm") ||
strings.HasSuffix(name, ".sqlite3-wal") ||
strings.HasSuffix(name, ".sqlite3-shm")
}
func run(ctx context.Context, dir, git string, args ...string) error {
out, err := output(ctx, dir, git, args...)
if err != nil {

View File

@ -4,6 +4,7 @@ import (
"context"
"os"
"path/filepath"
"strings"
"testing"
)
@ -42,3 +43,146 @@ func TestEnsureRepoCommitDirty(t *testing.T) {
t.Fatal("repo should be clean after commit")
}
}
func TestEnsureRepoUpdatesExistingOrigin(t *testing.T) {
ctx := context.Background()
repo := filepath.Join(t.TempDir(), "share")
opts := Options{RepoPath: repo, Branch: "main"}
if err := EnsureRepo(ctx, opts); err != nil {
t.Fatal(err)
}
if err := run(ctx, repo, "git", "remote", "add", "origin", "https://example.invalid/old.git"); err != nil {
t.Fatal(err)
}
opts.Remote = "https://example.invalid/new.git"
if err := EnsureRepo(ctx, opts); err != nil {
t.Fatal(err)
}
out, err := output(ctx, repo, "git", "remote", "get-url", "origin")
if err != nil {
t.Fatal(err)
}
if strings.TrimSpace(out) != opts.Remote {
t.Fatalf("origin = %q, want %q", strings.TrimSpace(out), opts.Remote)
}
}
func TestCommitPathsDoesNotStageUnrelatedFiles(t *testing.T) {
ctx := context.Background()
repo := filepath.Join(t.TempDir(), "share")
opts := Options{RepoPath: repo, Branch: "main"}
if err := EnsureRepo(ctx, opts); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(repo, "manifest.json"), []byte("{}\n"), 0o600); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(repo, "notes.txt"), []byte("local draft\n"), 0o600); err != nil {
t.Fatal(err)
}
committed, err := CommitPaths(ctx, opts, "archive: manifest", []string{"manifest.json"})
if err != nil {
t.Fatal(err)
}
if !committed {
t.Fatal("expected commit")
}
tree, err := output(ctx, repo, "git", "ls-tree", "--name-only", "HEAD")
if err != nil {
t.Fatal(err)
}
if !strings.Contains(tree, "manifest.json") {
t.Fatalf("manifest was not committed: %q", tree)
}
if strings.Contains(tree, "notes.txt") {
t.Fatalf("unrelated file was committed: %q", tree)
}
status, err := output(ctx, repo, "git", "status", "--porcelain")
if err != nil {
t.Fatal(err)
}
if strings.TrimSpace(status) != "?? notes.txt" {
t.Fatalf("status = %q, want only untracked notes.txt", strings.TrimSpace(status))
}
}
func TestPullCurrentUsesExistingOrigin(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remote := filepath.Join(dir, "remote.git")
seed := filepath.Join(dir, "seed")
repo := filepath.Join(dir, "share")
if err := run(ctx, "", "git", "init", "--bare", remote); err != nil {
t.Fatal(err)
}
if err := run(ctx, "", "git", "clone", remote, seed); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "checkout", "-B", "main"); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(seed, "manifest.json"), []byte("one\n"), 0o600); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "add", "manifest.json"); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "-c", "commit.gpgsign=false", "-c", "user.name=test", "-c", "user.email=test@example.invalid", "commit", "-m", "one"); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "push", "-u", "origin", "main"); err != nil {
t.Fatal(err)
}
if err := Pull(ctx, Options{RepoPath: repo, Remote: remote, Branch: "main"}); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(seed, "manifest.json"), []byte("two\n"), 0o600); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "add", "manifest.json"); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "-c", "commit.gpgsign=false", "-c", "user.name=test", "-c", "user.email=test@example.invalid", "commit", "-m", "two"); err != nil {
t.Fatal(err)
}
if err := run(ctx, seed, "git", "push", "origin", "main"); err != nil {
t.Fatal(err)
}
if err := PullCurrent(ctx, Options{RepoPath: repo, Branch: "main"}); err != nil {
t.Fatal(err)
}
data, err := os.ReadFile(filepath.Join(repo, "manifest.json"))
if err != nil {
t.Fatal(err)
}
if string(data) != "two\n" {
t.Fatalf("manifest = %q, want updated content", data)
}
}
func TestCleanSQLiteSidecars(t *testing.T) {
dir := t.TempDir()
files := []string{"archive.db", "archive.db-wal", "archive.db-shm", "notes.txt"}
for _, file := range files {
if err := os.WriteFile(filepath.Join(dir, file), []byte(file), 0o600); err != nil {
t.Fatal(err)
}
}
removed, err := CleanSQLiteSidecars(dir)
if err != nil {
t.Fatal(err)
}
if removed != 2 {
t.Fatalf("removed = %d, want 2", removed)
}
for _, file := range []string{"archive.db-wal", "archive.db-shm"} {
if _, err := os.Stat(filepath.Join(dir, file)); !os.IsNotExist(err) {
t.Fatalf("%s should have been removed, err=%v", file, err)
}
}
for _, file := range []string{"archive.db", "notes.txt"} {
if _, err := os.Stat(filepath.Join(dir, file)); err != nil {
t.Fatalf("%s should remain: %v", file, err)
}
}
}