From cdc32f65d76a1e713cca2fce595cb83558db6ead Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 5 May 2026 02:29:43 +0100 Subject: [PATCH] feat: add crabboxignore sync excludes --- CHANGELOG.md | 4 +++ docs/commands/run.md | 4 +-- docs/commands/sync-plan.md | 7 ++-- docs/features/sync.md | 9 ++++- internal/cli/repo.go | 46 +++++++++++++++++++++--- internal/cli/repo_test.go | 73 ++++++++++++++++++++++++++++++++++++++ internal/cli/run.go | 10 ++++-- internal/cli/sync_plan.go | 6 +++- 8 files changed, 145 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1defe72..3f3787a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## 0.5.1 - Unreleased +### Added + +- Added `.crabboxignore` for repo-local sync-only exclude patterns shared by `run` and `sync-plan`. + ### Fixed - Fixed managed AWS Windows WSL2 bootstrap by using the current Ubuntu WSL rootfs URL, downloading large rootfs files through `curl.exe`, and retrying empty or partial rootfs downloads instead of reusing a poisoned tarball. Thanks @vincentkoc. diff --git a/docs/commands/run.md b/docs/commands/run.md index 1a325fe..4968df3 100644 --- a/docs/commands/run.md +++ b/docs/commands/run.md @@ -46,7 +46,7 @@ metadata exists and SSH is reachable, `tailscale` fails if the tailnet path is not available, and `public` forces the provider host. See [Tailscale](../features/tailscale.md). -Sync uses `git ls-files --cached --others --exclude-standard` to build a file manifest, then feeds that manifest to rsync over SSH. That means tracked files plus nonignored untracked files sync, while `.git`, ignored local build output, dependency folders, and common caches stay out of the transfer. Crabbox records a local/remote sync fingerprint and skips rsync when the tracked commit plus manifest and dirty metadata have not changed. Use `--checksum` when you need a paranoid checksum scan, and `--debug` to print sync timing, progress, and itemized rsync output. +Sync uses `git ls-files --cached --others --exclude-standard` to build a file manifest, then feeds that manifest to rsync over SSH. That means tracked files plus nonignored untracked files sync, while `.git`, ignored local build output, dependency folders, `.crabboxignore` patterns, `sync.exclude` patterns, and common caches stay out of the transfer. Crabbox records a local/remote sync fingerprint and skips rsync when the tracked commit plus manifest and dirty metadata have not changed. Use `--checksum` when you need a paranoid checksum scan, and `--debug` to print sync timing, progress, and itemized rsync output. For `provider=ssh`, `target=macos` and `target=windows windows.mode=wsl2` use the same POSIX rsync flow. Native Windows mode uses PowerShell over OpenSSH @@ -63,7 +63,7 @@ At the end of every command, `run` prints a one-line summary with sync duration, Use `--timing-json` to emit a final JSON timing record with provider, lease ID, sync phases, command duration, total duration, exit code, and Actions run URL when available. In `blacksmith-testbox` mode, sync is reported as delegated in the same schema. -Before the first rsync into a Git checkout, Crabbox tries to seed the remote worktree from the local `origin` remote so the first sync is a dirty-tree overlay instead of a full source upload. Project-specific excludes, env forwarding, and base ref belong in `crabbox.yaml` or `.crabbox.yaml`. +Before the first rsync into a Git checkout, Crabbox tries to seed the remote worktree from the local `origin` remote so the first sync is a dirty-tree overlay instead of a full source upload. Project-specific excludes can live in `.crabboxignore` or `sync.exclude` in `crabbox.yaml` / `.crabbox.yaml`; env forwarding and base ref belong in config. After sync, Crabbox runs a remote sanity check. If the remote checkout reports at least 200 tracked deletions, Crabbox fails before running tests unless local `CRABBOX_ALLOW_MASS_DELETIONS=1` is set. diff --git a/docs/commands/sync-plan.md b/docs/commands/sync-plan.md index 4c91a83..fe48d91 100644 --- a/docs/commands/sync-plan.md +++ b/docs/commands/sync-plan.md @@ -7,14 +7,17 @@ crabbox sync-plan crabbox sync-plan --limit 10 ``` -It uses the same Git file-list manifest and excludes as `crabbox run`, then prints: +It uses the same Git file-list manifest, `.crabboxignore`, and config excludes +as `crabbox run`, then prints: - candidate file count and total bytes; - tracked deletes that would be applied remotely; - largest files; - largest first or second-level directories. -Use it before a cold sync when the preflight estimate looks too large. +Use it before a cold sync when the preflight estimate looks too large, or after +editing `.crabboxignore` to confirm that local artifacts dropped out of the +manifest. Related docs: diff --git a/docs/features/sync.md b/docs/features/sync.md index ff24dad..616e2f0 100644 --- a/docs/features/sync.md +++ b/docs/features/sync.md @@ -11,7 +11,8 @@ It syncs the Git-managed working set, not the whole directory tree: - tracked files from `git ls-files --cached`; - nonignored untracked files from `git ls-files --others --exclude-standard`; -- repo-local `sync.exclude` patterns and Crabbox's default cache/build excludes. +- root `.crabboxignore` patterns, repo-local `sync.exclude` patterns, and + Crabbox's default cache/build excludes. Ignored build output, dependency folders, `.git`, and common local caches stay out of the transfer. This keeps first syncs close to the code that CI would see while still letting agents test uncommitted edits. @@ -67,6 +68,12 @@ Use `crabbox sync-plan` to inspect the local manifest before leasing a box. It p Repo-local config should hold project-specific excludes and env allowlists. Secrets must not be passed as command-line arguments or broad env globs. +Use `.crabboxignore` when you only need repo-local sync exclusions. The file is +read from the repository root. Blank lines and lines starting with `#` are +ignored; remaining lines are appended to `sync.exclude` and use the same matcher +as config excludes. Crabbox intentionally supports only `.crabboxignore`; there +is no short alias. + Related docs: - [CLI](../cli.md) diff --git a/internal/cli/repo.go b/internal/cli/repo.go index e6657c1..bdd9f16 100644 --- a/internal/cli/repo.go +++ b/internal/cli/repo.go @@ -70,6 +70,38 @@ func configuredExcludes(cfg Config) []string { return appendUniqueStrings(defaultExcludes(), cfg.Sync.Excludes...) } +func syncExcludes(root string, cfg Config) ([]string, error) { + excludes := configuredExcludes(cfg) + ignore, err := readCrabboxIgnore(root) + if err != nil { + return nil, err + } + return appendUniqueStrings(excludes, ignore...), nil +} + +func readCrabboxIgnore(root string) ([]string, error) { + if root == "" { + return nil, nil + } + data, err := os.ReadFile(filepath.Join(root, ".crabboxignore")) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, exit(2, "read .crabboxignore: %v", err) + } + lines := strings.Split(string(data), "\n") + patterns := make([]string, 0, len(lines)) + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + patterns = append(patterns, line) + } + return patterns, nil +} + func allowedEnv(allow []string) map[string]string { out := map[string]string{} for _, env := range os.Environ() { @@ -133,18 +165,22 @@ func defaultBaseRef(root string) string { } func syncFingerprint(repo Repo, cfg Config) (string, error) { - manifest, err := syncManifest(repo.Root, configuredExcludes(cfg)) + excludes, err := syncExcludes(repo.Root, cfg) if err != nil { return "", err } - return syncFingerprintForManifest(repo, cfg, manifest) + manifest, err := syncManifest(repo.Root, excludes) + if err != nil { + return "", err + } + return syncFingerprintForManifest(repo, cfg, manifest, excludes) } -func syncFingerprintForManifest(repo Repo, cfg Config, manifest SyncManifest) (string, error) { +func syncFingerprintForManifest(repo Repo, cfg Config, manifest SyncManifest, excludes []string) (string, error) { if repo.Head == "" { return "", nil } - paths, err := changedSyncPaths(repo.Root, configuredExcludes(cfg)) + paths, err := changedSyncPaths(repo.Root, excludes) if err != nil { return "", err } @@ -153,7 +189,7 @@ func syncFingerprintForManifest(repo Repo, cfg Config, manifest SyncManifest) (s fmt.Fprintf(h, "delete=%t\nchecksum=%t\n", cfg.Sync.Delete, cfg.Sync.Checksum) fmt.Fprintf(h, "manifest=%x\n", sha256.Sum256(manifest.NUL())) fmt.Fprintf(h, "deleted=%x\n", sha256.Sum256(manifest.DeletedNUL())) - for _, exclude := range configuredExcludes(cfg) { + for _, exclude := range excludes { fmt.Fprintf(h, "exclude=%s\n", exclude) } for _, rel := range paths { diff --git a/internal/cli/repo_test.go b/internal/cli/repo_test.go index b6a4d78..1b8f0f1 100644 --- a/internal/cli/repo_test.go +++ b/internal/cli/repo_test.go @@ -89,6 +89,79 @@ func TestSyncManifestPrunesAppleDoubleSidecars(t *testing.T) { } } +func TestCrabboxIgnoreExtendsSyncExcludes(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + runGit(t, dir, "config", "user.email", "test@example.com") + runGit(t, dir, "config", "user.name", "Test") + writeFile(t, filepath.Join(dir, ".crabboxignore"), "# local-only artifacts\nlocal-artifacts\n*.tmp\n\n") + writeFile(t, filepath.Join(dir, "src", "main.go"), "package main\n") + writeFile(t, filepath.Join(dir, "local-artifacts", "cache.bin"), "cache") + writeFile(t, filepath.Join(dir, "notes.tmp"), "tmp") + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-m", "init") + + excludes, err := syncExcludes(dir, baseConfig()) + if err != nil { + t.Fatal(err) + } + manifest, err := syncManifest(dir, excludes) + if err != nil { + t.Fatal(err) + } + got := strings.Join(manifest.Files, ",") + if !strings.Contains(got, "src/main.go") { + t.Fatalf("manifest missing source file: %q", got) + } + for _, notWant := range []string{"local-artifacts/cache.bin", "notes.tmp"} { + if strings.Contains(got, notWant) { + t.Fatalf("manifest %q should exclude .crabboxignore pattern %q", got, notWant) + } + } +} + +func TestCrabboxIgnorePrunesDeletedPaths(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + runGit(t, dir, "config", "user.email", "test@example.com") + runGit(t, dir, "config", "user.name", "Test") + writeFile(t, filepath.Join(dir, ".crabboxignore"), "generated.bin\n") + writeFile(t, filepath.Join(dir, "generated.bin"), "old") + writeFile(t, filepath.Join(dir, "deleted.txt"), "old") + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-m", "init") + if err := os.Remove(filepath.Join(dir, "generated.bin")); err != nil { + t.Fatal(err) + } + if err := os.Remove(filepath.Join(dir, "deleted.txt")); err != nil { + t.Fatal(err) + } + + excludes, err := syncExcludes(dir, baseConfig()) + if err != nil { + t.Fatal(err) + } + manifest, err := syncManifest(dir, excludes) + if err != nil { + t.Fatal(err) + } + if strings.Join(manifest.Deleted, ",") != "deleted.txt" { + t.Fatalf("deleted manifest should omit .crabboxignore patterns: %v", manifest.Deleted) + } +} + +func TestReadCrabboxIgnoreSkipsBlankAndCommentLines(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, ".crabboxignore"), "\n# comment\n build-output \n*.tmp\r\n") + got, err := readCrabboxIgnore(dir) + if err != nil { + t.Fatal(err) + } + if strings.Join(got, ",") != "build-output,*.tmp" { + t.Fatalf("patterns=%q", got) + } +} + func TestSyncManifestRecordsTrackedDeletes(t *testing.T) { dir := t.TempDir() runGit(t, dir, "init") diff --git a/internal/cli/run.go b/internal/cli/run.go index e6fdf45..c656036 100644 --- a/internal/cli/run.go +++ b/internal/cli/run.go @@ -425,6 +425,10 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { } recorder.Event("sync.started", "sync", "") timings.syncSteps.sshReady = time.Since(stepStart) + excludes, err := syncExcludes(repo.Root, cfg) + if err != nil { + return recordFailure(err) + } if isWindowsNativeTarget(target) { stepStart = time.Now() if err := runSSHQuiet(ctx, target, windowsRemoteMkdir(workdir)); err != nil { @@ -433,7 +437,7 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { timings.syncSteps.mkdir = time.Since(stepStart) } stepStart = time.Now() - manifest, err := syncManifest(repo.Root, configuredExcludes(cfg)) + manifest, err := syncManifest(repo.Root, excludes) if err != nil { return recordFailure(exit(6, "build sync file list: %v", err)) } @@ -457,7 +461,7 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { fingerprint := "" if cfg.Sync.Fingerprint { stepStart = time.Now() - fingerprint, err = syncFingerprintForManifest(repo, cfg, manifest) + fingerprint, err = syncFingerprintForManifest(repo, cfg, manifest, excludes) timings.syncSteps.fingerprintLocal = time.Since(stepStart) if err != nil { fmt.Fprintf(a.Stderr, "warning: sync fingerprint failed: %v\n", err) @@ -496,7 +500,7 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { timings.syncSteps.prune = time.Since(stepStart) } stepStart = time.Now() - if err := rsync(ctx, target, repo.Root, workdir, configuredExcludes(cfg), a.Stdout, a.Stderr, rsyncOptions{Debug: *debugSync, Delete: cfg.Sync.Delete, Checksum: cfg.Sync.Checksum, UseFilesFrom: true, FilesFrom: manifestData, Timeout: cfg.Sync.Timeout, HeartbeatInterval: 15 * time.Second}); err != nil { + if err := rsync(ctx, target, repo.Root, workdir, excludes, a.Stdout, a.Stderr, rsyncOptions{Debug: *debugSync, Delete: cfg.Sync.Delete, Checksum: cfg.Sync.Checksum, UseFilesFrom: true, FilesFrom: manifestData, Timeout: cfg.Sync.Timeout, HeartbeatInterval: 15 * time.Second}); err != nil { return recordFailure(exit(6, "rsync failed: %v", err)) } timings.syncSteps.rsync = time.Since(stepStart) diff --git a/internal/cli/sync_plan.go b/internal/cli/sync_plan.go index 264878d..170cf64 100644 --- a/internal/cli/sync_plan.go +++ b/internal/cli/sync_plan.go @@ -32,7 +32,11 @@ func (a App) syncPlan(ctx context.Context, args []string) error { if err != nil { return err } - manifest, err := syncManifest(repo.Root, configuredExcludes(cfg)) + excludes, err := syncExcludes(repo.Root, cfg) + if err != nil { + return err + } + manifest, err := syncManifest(repo.Root, excludes) if err != nil { return exit(6, "build sync file list: %v", err) }