From 8cda2498b2d7cf6af4668d90289fd55439e528fb Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Tue, 5 May 2026 17:18:53 -0700 Subject: [PATCH] feat(mirror): add safer share repo helpers --- CHANGELOG.md | 2 + mirror/mirror.go | 144 ++++++++++++++++++++++++++++++++++++++++-- mirror/mirror_test.go | 144 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 285 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d288d55..94515df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ - Initial `crawlkit` module scaffold. - Document the `crawlkit` versus crawl-app boundary for embeddings, search, inference, sync state, snapshots, SQLite, and git mirrors. +- Add safer `mirror` helpers for origin updates, existing-origin pulls, + path-scoped commits, and portable SQLite sidecar cleanup. - Add `tui`, a shared Bubble Tea terminal archive browser used by the crawl apps for consistent `tui` command behavior. - Improve `tui` rows with compact column rendering, pane-specific scrolling, and full-height pane borders. - Tune `tui` pane colors and mouse-wheel buffering to better match the `gitcrawl` terminal browser feel. diff --git a/mirror/mirror.go b/mirror/mirror.go index 347625f..b5542f1 100644 --- a/mirror/mirror.go +++ b/mirror/mirror.go @@ -23,6 +23,9 @@ func EnsureRepo(ctx context.Context, opts Options) error { return errors.New("repo path is required") } if _, err := os.Stat(filepath.Join(opts.RepoPath, ".git")); err == nil { + if opts.Remote != "" { + return setOrigin(ctx, opts) + } return nil } if opts.Remote != "" { @@ -49,6 +52,17 @@ func EnsureRepo(ctx context.Context, opts Options) error { return nil } +func EnsureRemote(ctx context.Context, opts Options) error { + opts = normalize(opts) + if opts.Remote == "" { + return errors.New("remote is required") + } + if err := EnsureRepo(ctx, opts); err != nil { + return err + } + return setOrigin(ctx, opts) +} + func Pull(ctx context.Context, opts Options) error { opts = normalize(opts) if opts.Remote == "" { @@ -67,19 +81,51 @@ func Pull(ctx context.Context, opts Options) error { return run(ctx, opts.RepoPath, opts.Git, "checkout", "-B", opts.Branch, "origin/"+opts.Branch) } +func PullCurrent(ctx context.Context, opts Options) error { + opts = normalize(opts) + if opts.Remote != "" { + return Pull(ctx, opts) + } + if err := EnsureRepo(ctx, opts); err != nil { + return err + } + if err := run(ctx, opts.RepoPath, opts.Git, "fetch", "--prune", "origin"); err != nil { + return err + } + if _, err := output(ctx, opts.RepoPath, opts.Git, "rev-parse", "--verify", "refs/heads/"+opts.Branch); err != nil { + return run(ctx, opts.RepoPath, opts.Git, "checkout", "-B", opts.Branch, "origin/"+opts.Branch) + } + if err := run(ctx, opts.RepoPath, opts.Git, "checkout", opts.Branch); err != nil { + return err + } + return run(ctx, opts.RepoPath, opts.Git, "pull", "--ff-only", "origin", opts.Branch) +} + func Commit(ctx context.Context, opts Options, message string) (bool, error) { + return CommitPaths(ctx, opts, message, []string{"."}) +} + +func CommitPaths(ctx context.Context, opts Options, message string, paths []string) (bool, error) { opts = normalize(opts) if message == "" { message = "archive: update snapshot" } - if err := run(ctx, opts.RepoPath, opts.Git, "add", "."); err != nil { - return false, err - } - dirty, err := Dirty(ctx, opts) + pathspecs, err := cleanPathspecs(paths) if err != nil { return false, err } - if !dirty { + if len(pathspecs) == 0 { + return false, nil + } + args := append([]string{"add", "--"}, pathspecs...) + if err := run(ctx, opts.RepoPath, opts.Git, args...); err != nil { + return false, err + } + staged, err := staged(ctx, opts) + if err != nil { + return false, err + } + if !staged { return false, nil } if err := run(ctx, opts.RepoPath, opts.Git, @@ -117,6 +163,37 @@ func Dirty(ctx context.Context, opts Options) (bool, error) { return strings.TrimSpace(out) != "", nil } +func CleanSQLiteSidecars(rootDir string) (int, error) { + rootDir = strings.TrimSpace(rootDir) + if rootDir == "" { + return 0, errors.New("root dir is required") + } + count := 0 + err := filepath.WalkDir(rootDir, func(path string, entry os.DirEntry, err error) error { + if err != nil { + return err + } + if entry.IsDir() { + if entry.Name() == ".git" { + return filepath.SkipDir + } + return nil + } + if !isSQLiteSidecar(path) { + return nil + } + if err := os.Remove(path); err != nil { + return fmt.Errorf("remove sqlite sidecar %s: %w", path, err) + } + count++ + return nil + }) + if err != nil { + return count, fmt.Errorf("clean sqlite sidecars: %w", err) + } + return count, nil +} + func normalize(opts Options) Options { opts.RepoPath = strings.TrimSpace(opts.RepoPath) opts.Remote = strings.TrimSpace(opts.Remote) @@ -131,6 +208,63 @@ func normalize(opts Options) Options { return opts } +func setOrigin(ctx context.Context, opts Options) error { + current, err := output(ctx, opts.RepoPath, opts.Git, "remote", "get-url", "origin") + if err != nil { + return run(ctx, opts.RepoPath, opts.Git, "remote", "add", "origin", opts.Remote) + } + if strings.TrimSpace(current) == opts.Remote { + return nil + } + return run(ctx, opts.RepoPath, opts.Git, "remote", "set-url", "origin", opts.Remote) +} + +func cleanPathspecs(paths []string) ([]string, error) { + var out []string + for _, path := range paths { + path = strings.TrimSpace(path) + if path == "" { + continue + } + if filepath.IsAbs(path) { + return nil, fmt.Errorf("commit path %q must be relative", path) + } + clean := filepath.Clean(path) + if clean == "." { + out = append(out, ".") + continue + } + if clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) { + return nil, fmt.Errorf("commit path %q must stay inside the repo", path) + } + out = append(out, filepath.ToSlash(clean)) + } + return out, nil +} + +func staged(ctx context.Context, opts Options) (bool, error) { + opts = normalize(opts) + out, err := output(ctx, opts.RepoPath, opts.Git, "diff", "--cached", "--quiet") + if err == nil { + return false, nil + } + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + return true, nil + } + return false, fmt.Errorf("git diff --cached --quiet: %w\n%s", err, strings.TrimSpace(out)) +} + +func isSQLiteSidecar(path string) bool { + name := filepath.Base(path) + return strings.HasSuffix(name, ".db-wal") || + strings.HasSuffix(name, ".db-shm") || + strings.HasSuffix(name, ".sqlite-wal") || + strings.HasSuffix(name, ".sqlite-shm") || + strings.HasSuffix(name, ".sqlite3-wal") || + strings.HasSuffix(name, ".sqlite3-shm") +} + func run(ctx context.Context, dir, git string, args ...string) error { out, err := output(ctx, dir, git, args...) if err != nil { diff --git a/mirror/mirror_test.go b/mirror/mirror_test.go index ed31d65..6909199 100644 --- a/mirror/mirror_test.go +++ b/mirror/mirror_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "path/filepath" + "strings" "testing" ) @@ -42,3 +43,146 @@ func TestEnsureRepoCommitDirty(t *testing.T) { t.Fatal("repo should be clean after commit") } } + +func TestEnsureRepoUpdatesExistingOrigin(t *testing.T) { + ctx := context.Background() + repo := filepath.Join(t.TempDir(), "share") + opts := Options{RepoPath: repo, Branch: "main"} + if err := EnsureRepo(ctx, opts); err != nil { + t.Fatal(err) + } + if err := run(ctx, repo, "git", "remote", "add", "origin", "https://example.invalid/old.git"); err != nil { + t.Fatal(err) + } + opts.Remote = "https://example.invalid/new.git" + if err := EnsureRepo(ctx, opts); err != nil { + t.Fatal(err) + } + out, err := output(ctx, repo, "git", "remote", "get-url", "origin") + if err != nil { + t.Fatal(err) + } + if strings.TrimSpace(out) != opts.Remote { + t.Fatalf("origin = %q, want %q", strings.TrimSpace(out), opts.Remote) + } +} + +func TestCommitPathsDoesNotStageUnrelatedFiles(t *testing.T) { + ctx := context.Background() + repo := filepath.Join(t.TempDir(), "share") + opts := Options{RepoPath: repo, Branch: "main"} + if err := EnsureRepo(ctx, opts); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(repo, "manifest.json"), []byte("{}\n"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(repo, "notes.txt"), []byte("local draft\n"), 0o600); err != nil { + t.Fatal(err) + } + committed, err := CommitPaths(ctx, opts, "archive: manifest", []string{"manifest.json"}) + if err != nil { + t.Fatal(err) + } + if !committed { + t.Fatal("expected commit") + } + tree, err := output(ctx, repo, "git", "ls-tree", "--name-only", "HEAD") + if err != nil { + t.Fatal(err) + } + if !strings.Contains(tree, "manifest.json") { + t.Fatalf("manifest was not committed: %q", tree) + } + if strings.Contains(tree, "notes.txt") { + t.Fatalf("unrelated file was committed: %q", tree) + } + status, err := output(ctx, repo, "git", "status", "--porcelain") + if err != nil { + t.Fatal(err) + } + if strings.TrimSpace(status) != "?? notes.txt" { + t.Fatalf("status = %q, want only untracked notes.txt", strings.TrimSpace(status)) + } +} + +func TestPullCurrentUsesExistingOrigin(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + remote := filepath.Join(dir, "remote.git") + seed := filepath.Join(dir, "seed") + repo := filepath.Join(dir, "share") + if err := run(ctx, "", "git", "init", "--bare", remote); err != nil { + t.Fatal(err) + } + if err := run(ctx, "", "git", "clone", remote, seed); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "checkout", "-B", "main"); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(seed, "manifest.json"), []byte("one\n"), 0o600); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "add", "manifest.json"); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "-c", "commit.gpgsign=false", "-c", "user.name=test", "-c", "user.email=test@example.invalid", "commit", "-m", "one"); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "push", "-u", "origin", "main"); err != nil { + t.Fatal(err) + } + if err := Pull(ctx, Options{RepoPath: repo, Remote: remote, Branch: "main"}); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(seed, "manifest.json"), []byte("two\n"), 0o600); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "add", "manifest.json"); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "-c", "commit.gpgsign=false", "-c", "user.name=test", "-c", "user.email=test@example.invalid", "commit", "-m", "two"); err != nil { + t.Fatal(err) + } + if err := run(ctx, seed, "git", "push", "origin", "main"); err != nil { + t.Fatal(err) + } + if err := PullCurrent(ctx, Options{RepoPath: repo, Branch: "main"}); err != nil { + t.Fatal(err) + } + data, err := os.ReadFile(filepath.Join(repo, "manifest.json")) + if err != nil { + t.Fatal(err) + } + if string(data) != "two\n" { + t.Fatalf("manifest = %q, want updated content", data) + } +} + +func TestCleanSQLiteSidecars(t *testing.T) { + dir := t.TempDir() + files := []string{"archive.db", "archive.db-wal", "archive.db-shm", "notes.txt"} + for _, file := range files { + if err := os.WriteFile(filepath.Join(dir, file), []byte(file), 0o600); err != nil { + t.Fatal(err) + } + } + removed, err := CleanSQLiteSidecars(dir) + if err != nil { + t.Fatal(err) + } + if removed != 2 { + t.Fatalf("removed = %d, want 2", removed) + } + for _, file := range []string{"archive.db-wal", "archive.db-shm"} { + if _, err := os.Stat(filepath.Join(dir, file)); !os.IsNotExist(err) { + t.Fatalf("%s should have been removed, err=%v", file, err) + } + } + for _, file := range []string{"archive.db", "notes.txt"} { + if _, err := os.Stat(filepath.Join(dir, file)); err != nil { + t.Fatalf("%s should remain: %v", file, err) + } + } +}