From f26af3adba6d549e852f77d42686d9b0e80f980d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 29 Apr 2026 03:35:18 +0100 Subject: [PATCH] feat(safety): add baked safety profiles (#536) * feat(safety): add baked safety profiles Co-authored-by: Drew Burchfield <1084679+drewburchfield@users.noreply.github.com> * fix(safety): narrow readonly profile parent allows * fix(safety): verify basename safe-build outputs * fix(backup): promote Gmail checkpoints into final manifest * docs(safety): explain baked safety profiles * feat(safety): filter profiled help and schema * fix(safety): avoid help filter shadow warnings * fix(backup): make plaintext export resilient * docs(changelog): mention safety help filtering * fix(backup): satisfy export lint checks --------- Co-authored-by: Drew Burchfield <1084679+drewburchfield@users.noreply.github.com> --- .gitignore | 1 + CHANGELOG.md | 7 + Makefile | 5 +- README.md | 29 +- build-safe.sh | 79 ++++++ cmd/bake-safety-profile/main.go | 40 +++ docs/backup.md | 8 +- docs/safety-profiles.md | 201 ++++++++++++++ go.mod | 2 +- internal/backup/backup.go | 98 +++++-- internal/backup/backup_test.go | 54 ++++ internal/backup/read.go | 37 ++- internal/cmd/backup_export.go | 96 ++++--- internal/cmd/backup_export_gmail.go | 22 +- internal/cmd/backup_export_gmail_test.go | 47 ++++ internal/cmd/backup_gmail.go | 221 +++++++++++++-- internal/cmd/backup_test.go | 195 ++++++++++++++ internal/cmd/help_printer.go | 74 ++++- internal/cmd/help_printer_test.go | 12 + internal/cmd/root.go | 4 + internal/cmd/safety_profile.go | 261 ++++++++++++++++++ internal/cmd/safety_profile_default.go | 5 + internal/cmd/safety_profile_test.go | 217 +++++++++++++++ internal/cmd/schema.go | 18 +- safety-profiles/agent-safe.yaml | 322 ++++++++++++++++++++++ safety-profiles/full.yaml | 4 + safety-profiles/readonly.yaml | 327 +++++++++++++++++++++++ 27 files changed, 2271 insertions(+), 115 deletions(-) create mode 100755 build-safe.sh create mode 100644 cmd/bake-safety-profile/main.go create mode 100644 docs/safety-profiles.md create mode 100644 internal/cmd/safety_profile.go create mode 100644 internal/cmd/safety_profile_default.go create mode 100644 internal/cmd/safety_profile_test.go create mode 100644 safety-profiles/agent-safe.yaml create mode 100644 safety-profiles/full.yaml create mode 100644 safety-profiles/readonly.yaml diff --git a/.gitignore b/.gitignore index b2b0327..f2157e2 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ go.work.sum # Local build output bin/ /gog +internal/cmd/safety_profile_baked_gen.go # Node (optional dev scripts) node_modules/ diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d8e1e..521a298 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## 0.15.0 - Unreleased +### Added +- Agent safety: add baked safety-profile builds for fail-closed agent binaries, with `agent-safe`, `readonly`, and `full` profiles, filtered help/schema output, docs, and build tooling. (#366, #239) — thanks @drewburchfield. + +### Fixed +- Backup: promote completed Gmail checkpoint shards into the final manifest and byte-split fallback Gmail message shards so full-mailbox runs do not create GitHub-rejected blobs or giant final pushes. +- Backup export: stream decrypted shards one at a time, preserve resumable Gmail Markdown mirrors, handle very large JSONL rows, and write Markdown fallbacks for malformed MIME messages instead of aborting full-mailbox exports. + ## 0.14.0 - 2026-04-28 ### Added diff --git a/Makefile b/Makefile index e2e031d..233aa48 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ SHELL := /bin/bash # `make` should build the binary by default. .DEFAULT_GOAL := build -.PHONY: build gog gogcli gog-help gogcli-help help fmt fmt-check lint test ci tools docs-commands +.PHONY: build build-safe gog gogcli gog-help gogcli-help help fmt fmt-check lint test ci tools docs-commands .PHONY: worker-ci BIN_DIR := $(CURDIR)/bin @@ -38,6 +38,9 @@ build: @mkdir -p $(BIN_DIR) @go build -ldflags "$(LDFLAGS)" -o $(BIN) $(CMD) +build-safe: + @./build-safe.sh $${PROFILE:-safety-profiles/agent-safe.yaml} -o $${OUTPUT:-$(BIN_DIR)/gog-safe} + gog: build @if [ -n "$(RUN_ARGS)" ]; then \ $(BIN) $(RUN_ARGS); \ diff --git a/README.md b/README.md index 4d6cbc5..dd461d0 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Fast, script-friendly CLI for Gmail, Calendar, Chat, Classroom, Drive, Docs, Sli - **Groups** - list groups you belong to, view group members (Google Workspace) - **Local time** - quick local/UTC time display for scripts and agents - **Multiple accounts** - manage multiple Google accounts simultaneously, with account aliases and per-client OAuth buckets -- **Command allowlist** - restrict top-level commands for sandboxed/agent runs +- **Command allowlist + baked safety profiles** - restrict commands at runtime or build a fail-closed agent binary - **Secure credential storage** using OS keyring or encrypted on-disk keyring (configurable) - **Auto-refreshing tokens** - authenticate once, use indefinitely - **Flexible auth** - OAuth refresh tokens, ADC, direct access tokens, service accounts, manual/remote flows, `--extra-scopes`, and proxy-safe callbacks @@ -584,6 +584,17 @@ gog tasks list gog --gmail-no-send gmail send --to someone@example.com --subject Test --body Test gog config no-send set agent@example.com ``` + +For stronger isolation, build a dedicated binary with an embedded safety profile: + +```bash +./build-safe.sh safety-profiles/agent-safe.yaml -o bin/gog-agent-safe +./build-safe.sh safety-profiles/readonly.yaml -o bin/gog-readonly +``` + +Baked profiles are checked after CLI parsing and before any command runs. They are +fail-closed and cannot be changed by config, environment variables, or runtime +allowlist flags. See `docs/safety-profiles.md`. ## Security @@ -788,9 +799,10 @@ For a bounded first run: gog backup push --services gmail --account you@gmail.com --query 'newer_than:7d' --max 25 ``` -Backups use age-encrypted JSONL gzip shards under `data/`. `gog` stores the -private age identity locally at `~/.gog/age.key`; GitHub only receives public -`age1...` recipients, `manifest.json`, and encrypted `*.jsonl.gz.age` payloads. +Backups use age-encrypted JSONL gzip shards under `data/` and completed Gmail +checkpoint shards under `checkpoints/`. `gog` stores the private age identity +locally at `~/.gog/age.key`; GitHub only receives public `age1...` recipients, +`manifest.json`, and encrypted `*.jsonl.gz.age` payloads. The private `AGE-SECRET-KEY-...` value must stay local or in a password manager. Supported backup services are `gmail`, `gmail-settings`, `calendar`, @@ -810,9 +822,12 @@ by default (`--gmail-checkpoint-rows`, `--gmail-checkpoint-interval`, conservative plaintext byte ceiling to avoid GitHub blob rejections. Checkpoint commits push through a single ordered background queue so cached Gmail fetching can continue while GitHub uploads run; the final completed backup waits for the -queue to drain before updating the authoritative manifest. Checkpoints live -under `checkpoints/` and do not replace the authoritative `manifest.json` until -the final backup completes. Use `--gmail-refresh-cache` to force a refetch. +queue to drain before updating the authoritative manifest. When a cached Gmail +run completes, the final manifest promotes the completed checkpoint message +shards instead of re-encrypting the mailbox into a second giant Git push. +Checkpoints live under `checkpoints/` and do not become authoritative until the +final `manifest.json` references them. Use `--gmail-refresh-cache` to force a +refetch. Workspace inventories Docs/Sheets/Slides and backs up Forms/responses discovered through Drive; add `--workspace-native` for full native Docs/Sheets/Slides API JSON. diff --git a/build-safe.sh b/build-safe.sh new file mode 100755 index 0000000..aa6dd57 --- /dev/null +++ b/build-safe.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd "$(dirname "$0")" + +usage() { + cat >&2 <<'USAGE' +Usage: ./build-safe.sh [-o output] + +Examples: + ./build-safe.sh safety-profiles/readonly.yaml + ./build-safe.sh safety-profiles/agent-safe.yaml -o /usr/local/bin/gog-safe +USAGE +} + +if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + usage + exit 0 +fi + +if [[ -z "${1:-}" || "${1:-}" == -* ]]; then + usage + exit 2 +fi + +PROFILE="$1" +shift + +OUTPUT="bin/gog-safe" +while [[ $# -gt 0 ]]; do + case "$1" in + -o|--output) + if [[ -z "${2:-}" ]]; then + echo "error: $1 requires a path" >&2 + exit 2 + fi + OUTPUT="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "error: unknown flag: $1" >&2 + usage + exit 2 + ;; + esac +done + +if [[ ! -f "$PROFILE" ]]; then + echo "error: profile not found: $PROFILE" >&2 + exit 1 +fi + +GEN_FILE="internal/cmd/safety_profile_baked_gen.go" +cleanup() { + rm -f "$GEN_FILE" +} +trap cleanup EXIT + +cleanup +go run ./cmd/bake-safety-profile "$PROFILE" "$GEN_FILE" + +VERSION=$(git describe --tags --always --dirty 2>/dev/null || echo dev) +COMMIT=$(git rev-parse --short=12 HEAD 2>/dev/null || echo "") +DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ) +LDFLAGS="-X github.com/steipete/gogcli/internal/cmd.version=${VERSION}-safe -X github.com/steipete/gogcli/internal/cmd.commit=${COMMIT} -X github.com/steipete/gogcli/internal/cmd.date=${DATE}" + +mkdir -p "$(dirname "$OUTPUT")" +go build -tags safety_profile -ldflags "$LDFLAGS" -o "$OUTPUT" ./cmd/gog +RUN_OUTPUT="$OUTPUT" +if [[ "$RUN_OUTPUT" != */* ]]; then + RUN_OUTPUT="./$RUN_OUTPUT" +fi +"$RUN_OUTPUT" --version + +echo "built $OUTPUT with baked safety profile $PROFILE" diff --git a/cmd/bake-safety-profile/main.go b/cmd/bake-safety-profile/main.go new file mode 100644 index 0000000..9dc22bb --- /dev/null +++ b/cmd/bake-safety-profile/main.go @@ -0,0 +1,40 @@ +package main + +import ( + "bytes" + "fmt" + "os" + "strconv" + + "github.com/steipete/gogcli/internal/cmd" +) + +func main() { + if len(os.Args) != 3 { + _, _ = fmt.Fprintln(os.Stderr, "usage: bake-safety-profile ") + os.Exit(2) + } + + raw, err := os.ReadFile(os.Args[1]) // #nosec G304 G703 -- build helper intentionally reads the requested profile path. + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "read profile: %v\n", err) + os.Exit(1) + } + if err := cmd.ValidateSafetyProfile(string(raw)); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "parse profile: %v\n", err) + os.Exit(1) + } + + var out bytes.Buffer + out.WriteString("// Code generated by cmd/bake-safety-profile; DO NOT EDIT.\n") + out.WriteString("//go:build safety_profile\n\n") + out.WriteString("package cmd\n\n") + out.WriteString("var bakedSafetyProfileYAML = ") + out.WriteString(strconv.Quote(string(raw))) + out.WriteString("\n") + + if err := os.WriteFile(os.Args[2], out.Bytes(), 0o600); err != nil { // #nosec G306 G703 -- build helper intentionally writes the requested generated Go path. + _, _ = fmt.Fprintf(os.Stderr, "write output: %v\n", err) + os.Exit(1) + } +} diff --git a/docs/backup.md b/docs/backup.md index e200986..94104fb 100644 --- a/docs/backup.md +++ b/docs/backup.md @@ -295,8 +295,12 @@ single ordered background queue: `gog` records the exact commit SHA, continues cached Gmail fetching, and pushes queued SHAs to the current branch one at a time. Transient push failures are retried; GitHub hard rejections stop later checkpoints because descendants would inherit the rejected object. The final -completed backup waits for the queue to drain before writing and pushing the -root manifest. Tune the commit cadence with `--gmail-checkpoint-rows` / +completed backup waits for the queue to drain, then promotes the completed +checkpoint message shards into the root manifest instead of re-encrypting the +same mailbox into a second multi-GB final push. If no complete matching +checkpoint exists, final Gmail message shards still split by row count and the +same conservative plaintext byte ceiling. Tune the commit cadence with +`--gmail-checkpoint-rows` / `--gmail-checkpoint-interval` on `gog backup push`, or `--checkpoint-rows` / `--checkpoint-interval` on `gog backup gmail push`; set the interval or rows to `0` to disable that trigger, or use `--no-gmail-checkpoints` / diff --git a/docs/safety-profiles.md b/docs/safety-profiles.md new file mode 100644 index 0000000..2034e11 --- /dev/null +++ b/docs/safety-profiles.md @@ -0,0 +1,201 @@ +# Safety Profiles + +Safety profiles build a dedicated `gog` binary with an embedded command policy. +Use them when `gog` is available to an agent, CI job, sandbox, or other caller +that should not be able to change its own command permissions at runtime. + +Runtime guards such as `--enable-commands`, `--disable-commands`, and +`--gmail-no-send` are still useful for normal scripting. A baked safety profile is +stronger: the policy is compiled into the binary and cannot be changed with +flags, environment variables, config files, or shell arguments. + +## Quick Start + +Build an agent-safe binary: + +```bash +./build-safe.sh safety-profiles/agent-safe.yaml -o bin/gog-agent-safe +``` + +Build a read-only binary: + +```bash +./build-safe.sh safety-profiles/readonly.yaml -o bin/gog-readonly +``` + +Use the built binary exactly like `gog`: + +```bash +bin/gog-agent-safe gmail search 'from:me newer_than:7d' +bin/gog-agent-safe gmail drafts create --to you@example.com --subject "Review" --body "Draft only" +bin/gog-agent-safe gmail drafts send draft-id +``` + +The final command fails before the Gmail send handler runs: + +```text +command "gmail drafts send" is blocked by baked safety profile "agent-safe" +``` + +## How It Works + +`build-safe.sh` performs a normal Go build with one extra generated file: + +1. Validates the YAML profile. +2. Generates `internal/cmd/safety_profile_baked_gen.go` with the profile content. +3. Builds with `-tags safety_profile`. +4. Runs the built binary with `--version` as a smoke test. +5. Deletes the generated file on exit. + +Normal `go build` does not include a profile, so the stock `gog` binary is +unchanged. + +At runtime, `gog` parses the command with Kong first. After parsing and before +any command handler or Google API call, it checks the baked profile: + +1. Explicit deny rules win. +2. Allow rules permit matching commands. +3. If the profile has allow rules, everything not allowed is blocked. + +That means a caller cannot re-enable a blocked baked command: + +```bash +bin/gog-readonly --enable-commands gmail.send gmail send \ + --to a@example.com --subject Test --body Test +``` + +The command still fails because the baked policy is checked before runtime +allowlists. + +## Preset Profiles + +`safety-profiles/agent-safe.yaml` + +Allows reading, searching, drafting, labeling, archiving, organizing files, and +other low-risk recoverable actions. Blocks sends, deletes, sharing changes, admin +operations, and auth writes. + +Good for: + +- inbox triage agents +- draft reply generation +- summarization/reporting jobs that may organize labels or files +- workflows where a human should review before anything is sent + +`safety-profiles/readonly.yaml` + +Allows read/list/search/get style commands only. Blocks mutations, sends, deletes, +sharing changes, auth writes, and local config writes. + +Good for: + +- reporting +- audits +- monitoring +- read-only agent context gathering + +`safety-profiles/full.yaml` + +Allows everything. This is mostly useful for smoke testing the build path or for +creating a `-safe` binary with the same command surface as stock `gog`. + +## Profile Syntax + +Profiles are YAML maps that mirror command paths: + +```yaml +name: agent-safe + +gmail: + search: true + send: false + drafts: + create: true + send: false + +aliases: + send: false +``` + +Rules: + +- `true` allows a command path. +- `false` blocks a command path. +- blocked rules override allowed parent rules. +- unlisted commands are blocked when the profile has any allow rules. +- command names are written as dot paths internally, such as `gmail.drafts.create`. +- `aliases:` controls root shortcuts such as `send`, `ls`, `search`, and `upload`. + +Parent rules are prefix matches. For example, `drive: true` allows every `drive` +subcommand unless a child is explicitly blocked. For restrictive profiles, prefer +listing leaf commands so a parent allow does not accidentally include future +mutating subcommands: + +```yaml +gmail: + messages: + search: true + modify: false +``` + +## Choosing A Profile + +Use `readonly` when the caller should never change Google or local `gog` state. + +Use `agent-safe` when the caller may prepare work but should not perform +externally visible or hard-to-reverse actions. For example, it may create a Gmail +draft but cannot send it. + +Use a custom profile when the preset is too broad or too narrow: + +```bash +cp safety-profiles/readonly.yaml /tmp/my-agent.yaml +editor /tmp/my-agent.yaml +./build-safe.sh /tmp/my-agent.yaml -o bin/gog-my-agent +``` + +## Verifying A Safe Binary + +Build and smoke test: + +```bash +./build-safe.sh safety-profiles/readonly.yaml -o gog-readonly +./gog-readonly version +``` + +Check blocked commands: + +```bash +./gog-readonly gmail messages modify msg-1 --add Label_1 +./gog-readonly calendar alias set work abc123@group.calendar.google.com +./gog-readonly --enable-commands gmail.send gmail send \ + --to a@example.com --subject Test --body Test +``` + +Each should fail with exit code 2 before any handler or Google API call runs. + +Check allowed commands: + +```bash +./gog-readonly gmail search 'newer_than:1d' +./gog-readonly auth services +``` + +## Help And Schema Output + +Safety-profiled binaries filter help and schema output to the baked profile. +Blocked commands are not listed in parent help menus or `gog schema` output. + +For example, `agent-safe` shows `gmail drafts create` but not `gmail drafts send`. +If you ask for help for a blocked leaf command directly, the binary prints the +same baked-profile block message instead of the command documentation. + +## Security Boundary + +Help and schema filtering are usability layers for humans and tool-discovering +agents. The security boundary remains the pre-execution profile check: blocked +commands fail before any command handler or Google API call runs. + +Safety profiles also do not replace OAuth scopes, account separation, or Google +Workspace policy. Use the narrowest practical OAuth scopes and account access, +then use a baked profile as an additional local execution guard. diff --git a/go.mod b/go.mod index 68132a5..9627539 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( golang.org/x/term v0.42.0 golang.org/x/text v0.36.0 google.golang.org/api v0.276.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( @@ -55,5 +56,4 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4 // indirect google.golang.org/grpc v1.80.0 // indirect google.golang.org/protobuf v1.36.11 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/internal/backup/backup.go b/internal/backup/backup.go index 97da527..5986f13 100644 --- a/internal/backup/backup.go +++ b/internal/backup/backup.go @@ -2,7 +2,6 @@ package backup import ( - "bufio" "bytes" "context" "crypto/sha256" @@ -71,13 +70,15 @@ type ShardEntry struct { } type PlainShard struct { - Service string - Kind string - Account string - Path string - Rows int - Plaintext []byte - PlaintextPath string + Service string + Kind string + Account string + Path string + Rows int + Plaintext []byte + PlaintextPath string + Existing *ShardEntry + ExistingRecipients []string } type Snapshot struct { @@ -240,10 +241,7 @@ func Verify(ctx context.Context, opts Options) (Result, error) { if got := sha256Hex(plaintext); got != shard.SHA256 { return Result{}, fmt.Errorf("backup shard hash mismatch for %s", shard.Path) } - rows, err := countJSONLLines(plaintext) - if err != nil { - return Result{}, fmt.Errorf("count rows in %s: %w", shard.Path, err) - } + rows := countJSONLLines(plaintext) if rows != shard.Rows { return Result{}, fmt.Errorf("backup shard row count mismatch for %s: got %d, want %d", shard.Path, rows, shard.Rows) } @@ -286,6 +284,22 @@ func NewJSONLShard(service, kind, account, rel string, rows any) (PlainShard, er }, nil } +func ExistingShard(entry ShardEntry, recipients []string) PlainShard { + return PlainShard{ + Service: strings.TrimSpace(entry.Service), + Kind: strings.TrimSpace(entry.Kind), + Account: strings.TrimSpace(entry.Account), + Path: filepath.ToSlash(entry.Path), + Rows: entry.Rows, + Existing: &entry, + ExistingRecipients: append([]string(nil), recipients...), + } +} + +func ReadCheckpointManifest(repo, rel string) (CheckpointManifest, error) { + return readCheckpointManifest(repo, rel) +} + func writeCheckpoint(ctx context.Context, cfg Config, snapshot Snapshot, checkpoint Checkpoint) (CheckpointManifest, error) { checkpoint.Service = safePathPart(checkpoint.Service) checkpoint.Account = safePathPart(checkpoint.Account) @@ -456,6 +470,40 @@ func writeShard(cfg Config, old Manifest, shard PlainShard, reuseEncrypted bool) if strings.TrimSpace(shard.Service) == "" { return ShardEntry{}, fmt.Errorf("backup shard service is required") } + if shard.Existing != nil { + if len(shard.ExistingRecipients) > 0 && !sameStrings(shard.ExistingRecipients, cfg.Recipients) { + return ShardEntry{}, fmt.Errorf("backup shard %s was encrypted for different recipients", shard.Existing.Path) + } + entry := *shard.Existing + if strings.TrimSpace(entry.Service) == "" { + entry.Service = shard.Service + } + if strings.TrimSpace(entry.Kind) == "" { + entry.Kind = shard.Kind + } + if strings.TrimSpace(entry.Account) == "" { + entry.Account = shard.Account + } + if strings.TrimSpace(entry.Path) == "" { + entry.Path = shard.Path + } + if entry.Rows == 0 { + entry.Rows = shard.Rows + } + path, err := resolveShardPath(cfg.Repo, entry.Path) + if err != nil { + return ShardEntry{}, err + } + info, err := os.Stat(path) + if err != nil { + return ShardEntry{}, fmt.Errorf("reuse encrypted backup shard %s: %w", entry.Path, err) + } + if entry.Bytes > 0 && info.Size() != entry.Bytes { + return ShardEntry{}, fmt.Errorf("reuse encrypted backup shard %s: size changed from %d to %d", entry.Path, entry.Bytes, info.Size()) + } + entry.Bytes = info.Size() + return entry, nil + } hash, err := shardPlaintextHash(shard) if err != nil { return ShardEntry{}, err @@ -572,28 +620,32 @@ func encodeJSONL(rows any) ([]byte, int, error) { } func DecodeJSONL[T any](plaintext []byte, out *[]T) error { - scanner := bufio.NewScanner(bytes.NewReader(plaintext)) - scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) - for scanner.Scan() { + for _, line := range jsonlLines(plaintext) { + line = bytes.TrimSpace(line) + if len(line) == 0 { + continue + } var value T - if err := json.Unmarshal(scanner.Bytes(), &value); err != nil { + if err := json.Unmarshal(line, &value); err != nil { return err } *out = append(*out, value) } - return scanner.Err() + return nil } -func countJSONLLines(plaintext []byte) (int, error) { - scanner := bufio.NewScanner(bytes.NewReader(plaintext)) - scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) +func countJSONLLines(plaintext []byte) int { count := 0 - for scanner.Scan() { - if len(bytes.TrimSpace(scanner.Bytes())) > 0 { + for _, line := range jsonlLines(plaintext) { + if len(bytes.TrimSpace(line)) > 0 { count++ } } - return count, scanner.Err() + return count +} + +func jsonlLines(plaintext []byte) [][]byte { + return bytes.Split(plaintext, []byte{'\n'}) } func readManifest(repo string) (Manifest, error) { diff --git a/internal/backup/backup_test.go b/internal/backup/backup_test.go index 1635742..3e31b9e 100644 --- a/internal/backup/backup_test.go +++ b/internal/backup/backup_test.go @@ -142,6 +142,44 @@ func TestPushCheckpointWritesIncompleteManifestOutsideMainSnapshot(t *testing.T) } } +func TestPushSnapshotCanReferenceExistingCheckpointShard(t *testing.T) { + ctx, repo, config, _ := initTestBackup(t) + checkpointShard := mustGmailMessageShard(t, "checkpoints/gmail/acct/run-one/messages/part-000001.jsonl.gz.age", []map[string]string{ + {"id": "m1", "raw": "checkpoint final"}, + }) + if _, err := PushCheckpoint(ctx, Snapshot{ + Services: []string{"gmail"}, + Accounts: []string{"acct"}, + Counts: map[string]int{"gmail.messages": 1}, + Shards: []PlainShard{checkpointShard}, + }, Checkpoint{RunID: "run-one", Service: "gmail", Account: "acct", Done: 1, Total: 1}, Options{ConfigPath: config, Push: false}); err != nil { + t.Fatalf("PushCheckpoint: %v", err) + } + checkpointManifest, err := readCheckpointManifest(repo, "checkpoints/gmail/acct/run-one/manifest.json") + if err != nil { + t.Fatalf("readCheckpointManifest: %v", err) + } + if _, err := PushSnapshot(ctx, Snapshot{ + Services: []string{"gmail"}, + Accounts: []string{"acct"}, + Counts: map[string]int{"gmail.messages": 1}, + Shards: []PlainShard{ExistingShard(checkpointManifest.Shards[0], checkpointManifest.Recipients)}, + }, Options{ConfigPath: config, Push: false}); err != nil { + t.Fatalf("PushSnapshot existing checkpoint shard: %v", err) + } + + manifest := readTestManifest(t, repo) + if len(manifest.Shards) != 1 || manifest.Shards[0].Path != checkpointManifest.Shards[0].Path { + t.Fatalf("root manifest did not reference checkpoint shard: %+v", manifest.Shards) + } + if _, err := Verify(ctx, Options{ConfigPath: config}); err != nil { + t.Fatalf("Verify: %v", err) + } + if _, err := Cat(ctx, Options{ConfigPath: config}, checkpointManifest.Shards[0].Path); err != nil { + t.Fatalf("Cat checkpoint shard from root manifest: %v", err) + } +} + func TestAsyncCheckpointPushDrainsBeforeFinalSnapshot(t *testing.T) { ctx := context.Background() dir := t.TempDir() @@ -373,6 +411,22 @@ func TestVerifyDetectsManifestRowCountMismatch(t *testing.T) { } } +func TestJSONLHelpersHandleLargeRows(t *testing.T) { + large := strings.Repeat("x", 17*1024*1024) + plaintext := []byte(`{"id":"large","raw":"` + large + "\"}\n") + rows := countJSONLLines(plaintext) + if rows != 1 { + t.Fatalf("rows = %d, want 1", rows) + } + var decoded []map[string]string + if err := DecodeJSONL(plaintext, &decoded); err != nil { + t.Fatalf("DecodeJSONL: %v", err) + } + if len(decoded) != 1 || decoded[0]["raw"] != large { + t.Fatalf("decoded large row mismatch: len=%d", len(decoded)) + } +} + func TestPushReusesEncryptedShardWhenPlaintextAndRecipientsMatch(t *testing.T) { ctx, repo, config, _ := initTestBackup(t) shardPath := "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age" diff --git a/internal/backup/read.go b/internal/backup/read.go index 0253ac7..e805aab 100644 --- a/internal/backup/read.go +++ b/internal/backup/read.go @@ -37,39 +37,51 @@ func Cat(ctx context.Context, opts Options, shardPath string) (PlainShard, error } func DecryptSnapshot(ctx context.Context, opts Options) (Manifest, string, []PlainShard, error) { + shards := []PlainShard{} + manifest, repo, err := WalkSnapshot(ctx, opts, func(_ Manifest, _ string, shard PlainShard) error { + shards = append(shards, shard) + return nil + }) + return manifest, repo, shards, err +} + +func WalkSnapshot(ctx context.Context, opts Options, visit func(Manifest, string, PlainShard) error) (Manifest, string, error) { cfg, err := ResolveOptions(opts) if err != nil { - return Manifest{}, "", nil, err + return Manifest{}, "", err } if !opts.SkipPull { repoErr := ensureRepo(ctx, cfg) if repoErr != nil { - return Manifest{}, "", nil, repoErr + return Manifest{}, "", repoErr } } else if strings.TrimSpace(cfg.Repo) == "" { - return Manifest{}, "", nil, fmt.Errorf("backup repo path is required") + return Manifest{}, "", fmt.Errorf("backup repo path is required") } manifest, err := readManifest(cfg.Repo) if err != nil { - return Manifest{}, "", nil, err + return Manifest{}, "", err } if manifest.Format != formatVersion { - return Manifest{}, "", nil, fmt.Errorf("unsupported backup format %d", manifest.Format) + return Manifest{}, "", fmt.Errorf("unsupported backup format %d", manifest.Format) } - shards := make([]PlainShard, 0, len(manifest.Shards)) for _, shard := range manifest.Shards { select { case <-ctx.Done(): - return Manifest{}, "", nil, ctx.Err() + return Manifest{}, "", ctx.Err() default: } plain, err := decryptManifestShard(cfg, shard) if err != nil { - return Manifest{}, "", nil, err + return Manifest{}, "", err + } + if visit != nil { + if err := visit(manifest, cfg.Repo, plain); err != nil { + return Manifest{}, "", err + } } - shards = append(shards, plain) } - return manifest, cfg.Repo, shards, nil + return manifest, cfg.Repo, nil } func decryptManifestShard(cfg Config, shard ShardEntry) (PlainShard, error) { @@ -94,10 +106,7 @@ func verifyPlainShard(shard ShardEntry, plaintext []byte) error { if got := sha256Hex(plaintext); got != shard.SHA256 { return fmt.Errorf("backup shard hash mismatch for %s", shard.Path) } - rows, err := countJSONLLines(plaintext) - if err != nil { - return fmt.Errorf("count rows in %s: %w", shard.Path, err) - } + rows := countJSONLLines(plaintext) if rows != shard.Rows { return fmt.Errorf("backup shard row count mismatch for %s: got %d, want %d", shard.Path, rows, shard.Rows) } diff --git a/internal/cmd/backup_export.go b/internal/cmd/backup_export.go index dd3c4ab..b8fef9a 100644 --- a/internal/cmd/backup_export.go +++ b/internal/cmd/backup_export.go @@ -1,7 +1,6 @@ package cmd import ( - "bufio" "bytes" "context" "encoding/base64" @@ -76,36 +75,55 @@ func (c *BackupExportCmd) Run(ctx context.Context) error { if err != nil { return err } - manifest, repo, shards, err := backup.DecryptSnapshot(ctx, c.options()) - if err != nil { - return err - } - if exportErr := ensureExportOutsideRepo(outDir, repo); exportErr != nil { - return exportErr - } - result := backupExportResult{ - Out: outDir, - Repo: repo, - ManifestExport: manifest.Exported, - Counts: map[string]int{}, - } - if mkdirErr := os.MkdirAll(outDir, 0o700); mkdirErr != nil { - return mkdirErr - } - if readmeErr := writeBackupExportReadme(outDir); readmeErr != nil { - return readmeErr - } - if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil { - return manifestErr - } exportOpts := backupExportOptions{ GmailFormat: c.GmailFormat, GmailAttachments: c.GmailAttachments, } - if resetErr := resetExportTargets(outDir, shards); resetErr != nil { - return resetErr + result := backupExportResult{ + Out: outDir, + Counts: map[string]int{}, } - for _, shard := range shards { + initialized := false + shardIndex := 0 + u := ui.FromContext(ctx) + initExport := func(manifest backup.Manifest, repo string) error { + if initialized { + return nil + } + if exportErr := ensureExportOutsideRepo(outDir, repo); exportErr != nil { + return exportErr + } + result.Repo = repo + result.ManifestExport = manifest.Exported + if mkdirErr := os.MkdirAll(outDir, 0o700); mkdirErr != nil { + return mkdirErr + } + if readmeErr := writeBackupExportReadme(outDir); readmeErr != nil { + return readmeErr + } + if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil { + return manifestErr + } + if resetErr := resetExportTargets(outDir, manifest.Shards); resetErr != nil { + return resetErr + } + initialized = true + return nil + } + var manifest backup.Manifest + var repo string + manifest, repo, err = backup.WalkSnapshot(ctx, c.options(), func(snapshot backup.Manifest, snapshotRepo string, shard backup.PlainShard) error { + if initErr := initExport(snapshot, snapshotRepo); initErr != nil { + return initErr + } + shardIndex++ + if u != nil { + key := shard.Service + if strings.TrimSpace(shard.Kind) != "" { + key += "." + shard.Kind + } + u.Err().Printf("export\t%d/%d\t%s\trows=%d", shardIndex, len(snapshot.Shards), key, shard.Rows) + } _, count, shardErr := exportPlainShard(outDir, shard, exportOpts) if shardErr != nil { return shardErr @@ -115,6 +133,15 @@ func (c *BackupExportCmd) Run(ctx context.Context) error { key += "." + shard.Kind } result.Counts[key] += count + return nil + }) + if err != nil { + return err + } + if !initialized { + if initErr := initExport(manifest, repo); initErr != nil { + return initErr + } } files, err := countExportFiles(outDir) if err != nil { @@ -124,7 +151,6 @@ func (c *BackupExportCmd) Run(ctx context.Context) error { if outfmt.IsJSON(ctx) { return outfmt.WriteJSON(ctx, os.Stdout, result) } - u := ui.FromContext(ctx) u.Out().Printf("out\t%s", result.Out) u.Out().Printf("repo\t%s", result.Repo) u.Out().Printf("files\t%d", result.Files) @@ -140,16 +166,14 @@ func (c *BackupExportCmd) Run(ctx context.Context) error { } func prettyJSONL(data []byte) ([]byte, error) { - scanner := bufio.NewScanner(bytes.NewReader(data)) - scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) var out bytes.Buffer - for scanner.Scan() { - line := bytes.TrimSpace(scanner.Bytes()) - if len(line) == 0 { + for _, rawLine := range bytes.Split(data, []byte{'\n'}) { + trimmedLine := bytes.TrimSpace(rawLine) + if len(trimmedLine) == 0 { continue } var pretty bytes.Buffer - if err := json.Indent(&pretty, line, "", " "); err != nil { + if err := json.Indent(&pretty, trimmedLine, "", " "); err != nil { return nil, err } if _, err := pretty.WriteTo(&out); err != nil { @@ -159,7 +183,7 @@ func prettyJSONL(data []byte) ([]byte, error) { return nil, err } } - return out.Bytes(), scanner.Err() + return out.Bytes(), nil } func expandUserPath(path string) (string, error) { @@ -206,13 +230,13 @@ func ensureExportOutsideRepo(outDir, repo string) error { return nil } -func resetExportTargets(outDir string, shards []backup.PlainShard) error { +func resetExportTargets(outDir string, shards []backup.ShardEntry) error { seen := map[string]struct{}{} for _, shard := range shards { target := "" switch { case shard.Service == backupServiceGmail && shard.Kind == "messages": - target = filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages") + target = filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages", "index.jsonl") case shard.Service == backupServiceDrive && shard.Kind == "contents": target = filepath.Join(outDir, backupServiceDrive, sanitizeFilePart(shard.Account), "files", "index.jsonl") } diff --git a/internal/cmd/backup_export_gmail.go b/internal/cmd/backup_export_gmail.go index ea57fc1..7a3d991 100644 --- a/internal/cmd/backup_export_gmail.go +++ b/internal/cmd/backup_export_gmail.go @@ -43,6 +43,7 @@ type backupEmail struct { Date string TextBody string HTMLBody string + ParseError string Attachments []backupEmailAttachment } @@ -96,7 +97,7 @@ func exportGmailMessages(outDir string, shard backup.PlainShard, opts backupExpo } parsed, parseErr := parseBackupEmail(rawMIME) if parseErr != nil && gmailFormat != "eml" { - return files, 0, fmt.Errorf("parse Gmail MIME %s: %w", message.ID, parseErr) + parsed.ParseError = parseErr.Error() } entry := gmailExportIndexEntry{ ID: message.ID, @@ -272,6 +273,7 @@ func renderGmailMessageMarkdown(message gmailBackupMessage, parsed backupEmail, writeYAMLList(&b, "cc", parsed.Cc) writeYAMLScalar(&b, "subject", parsed.Subject) writeYAMLList(&b, "labels", message.LabelIDs) + writeYAMLScalar(&b, "parse_error", parsed.ParseError) if message.SizeEstimate > 0 { fmt.Fprintf(&b, "size_estimate: %d\n", message.SizeEstimate) } @@ -282,10 +284,18 @@ func renderGmailMessageMarkdown(message gmailBackupMessage, parsed backupEmail, b.WriteString(markdownHeadingText(parsed.Subject)) b.WriteString("\n\n") } - if strings.TrimSpace(body) != "" { - b.WriteString(strings.TrimSpace(body)) + trimmedBody := strings.TrimSpace(body) + parseError := strings.TrimSpace(parsed.ParseError) + switch { + case trimmedBody != "": + b.WriteString(trimmedBody) b.WriteString("\n") - } else { + case parseError != "": + b.WriteString("_MIME parse failed: ") + b.WriteString(markdownHeadingText(parseError)) + b.WriteString("._\n\n") + b.WriteString("_Raw MIME remains available in the encrypted backup._\n") + default: b.WriteString("_No text body found._\n") } if len(attachmentRels) > 0 { @@ -317,10 +327,10 @@ func parseBackupEmail(rawMIME []byte) (backupEmail, error) { } body, err := io.ReadAll(msg.Body) if err != nil { - return backupEmail{}, err + return out, err } if err := parseBackupEmailEntity(body, string(msg.Header.Get("Content-Type")), string(msg.Header.Get("Content-Transfer-Encoding")), &out); err != nil { - return backupEmail{}, err + return out, err } return out, nil } diff --git a/internal/cmd/backup_export_gmail_test.go b/internal/cmd/backup_export_gmail_test.go index 7f49c40..09536b3 100644 --- a/internal/cmd/backup_export_gmail_test.go +++ b/internal/cmd/backup_export_gmail_test.go @@ -135,6 +135,53 @@ func TestExportGmailMessagesWritesMarkdownAndAttachments(t *testing.T) { } } +func TestExportGmailMessagesWritesMarkdownFallbackForMalformedMIME(t *testing.T) { + outDir := t.TempDir() + payload := strings.Join([]string{ + "Subject: Broken", + "From: Alice ", + "MIME-Version: 1.0", + `Content-Type: multipart/mixed; boundary="b1"`, + "", + "--b1", + "Content-Type: text/plain; charset=utf-8", + "", + "incomplete body", + }, "\r\n") + message := gmailBackupMessage{ + ID: "broken", + InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"), + Raw: base64.RawURLEncoding.EncodeToString([]byte(payload)), + } + shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message}) + if err != nil { + t.Fatalf("NewJSONLShard: %v", err) + } + + files, count, err := exportGmailMessages(outDir, shard, backupExportOptions{GmailFormat: "markdown", GmailAttachments: "extract"}) + if err != nil { + t.Fatalf("exportGmailMessages: %v", err) + } + if files != 2 || count != 1 { + t.Fatalf("files,count = %d,%d want 2,1", files, count) + } + mdRel := filepath.ToSlash(filepath.Join(backupExportMessageDir("acct_hash", message, "Broken"), "message.md")) + md := readText(t, filepath.Join(outDir, filepath.FromSlash(mdRel))) + for _, want := range []string{ + `subject: "Broken"`, + "parse_error:", + "MIME parse failed", + } { + if !strings.Contains(md, want) { + t.Fatalf("markdown missing %q:\n%s", want, md) + } + } + index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl")) + if !strings.Contains(index, `"markdown":"`+mdRel+`"`) { + t.Fatalf("index missing markdown fallback: %s", index) + } +} + func TestBackupEmailMarkdownBodyCleansHTMLFragments(t *testing.T) { got := backupEmailMarkdownBody(backupEmail{TextBody: "

Hello Peter

"}) if got != "Hello Peter" { diff --git a/internal/cmd/backup_gmail.go b/internal/cmd/backup_gmail.go index b6569ca..e811e43 100644 --- a/internal/cmd/backup_gmail.go +++ b/internal/cmd/backup_gmail.go @@ -5,6 +5,7 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "errors" "fmt" "os" "path/filepath" @@ -105,14 +106,20 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB if listErr != nil { return backup.Snapshot{}, listErr } - opts.CheckpointRunID = gmailBackupCheckpointRunID(opts, ids) + opts.CheckpointRunID = gmailBackupResolvedCheckpointRunID(ctx, opts, ids) if cacheErr := ensureGmailBackupMessageCache(ctx, svc, opts, ids); cacheErr != nil { return backup.Snapshot{}, cacheErr } - messageShards, shardErr := buildGmailMessageShardsFromCache(ctx, opts, ids) + messageShards, promoted, shardErr := buildGmailMessageShardsFromCheckpoint(ctx, opts, ids) if shardErr != nil { return backup.Snapshot{}, shardErr } + if !promoted { + messageShards, shardErr = buildGmailMessageShardsFromCache(ctx, opts, ids) + if shardErr != nil { + return backup.Snapshot{}, shardErr + } + } shards = append(shards, messageShards...) messageCount = len(ids) } else { @@ -353,7 +360,10 @@ type gmailBackupCheckpointer struct { const gmailCheckpointShardMaxRows = 250 -var gmailCheckpointShardMaxPlaintextBytes int64 = 32 * 1024 * 1024 +var ( + gmailCheckpointShardMaxPlaintextBytes int64 = 32 * 1024 * 1024 + gmailMessageShardMaxPlaintextBytes int64 = 32 * 1024 * 1024 +) func newGmailBackupCheckpointer(ctx context.Context, opts gmailBackupOptions, total int) *gmailBackupCheckpointer { enabled := opts.Checkpoints && @@ -683,9 +693,14 @@ func gmailBackupProgressf(ctx context.Context, format string, args ...any) { type gmailBackupMessageRef struct { ID string InternalDate int64 + LineBytes int64 } func buildGmailMessageShardsFromCache(ctx context.Context, opts gmailBackupOptions, ids []string) ([]backup.PlainShard, error) { + return buildGmailMessageShardsFromCacheWithLimit(ctx, opts, ids, gmailMessageShardMaxPlaintextBytes) +} + +func buildGmailMessageShardsFromCacheWithLimit(ctx context.Context, opts gmailBackupOptions, ids []string, maxPlaintextBytes int64) ([]backup.PlainShard, error) { if opts.ShardMaxRows <= 0 { opts.ShardMaxRows = 1000 } @@ -708,8 +723,12 @@ func buildGmailMessageShardsFromCache(ctx context.Context, opts gmailBackupOptio if !ok { return nil, fmt.Errorf("gmail message %s missing from backup cache", id) } + lineBytes, err := gmailBackupMessageJSONLSize(msg) + if err != nil { + return nil, err + } key := gmailBackupMessageMonthKey(msg.InternalDate) - buckets[key] = append(buckets[key], gmailBackupMessageRef{ID: msg.ID, InternalDate: msg.InternalDate}) + buckets[key] = append(buckets[key], gmailBackupMessageRef{ID: msg.ID, InternalDate: msg.InternalDate, LineBytes: lineBytes}) done := i + 1 if done == len(ids) || done%5000 == 0 { gmailBackupProgressf(ctx, "backup gmail shard-index\t%d/%d", done, len(ids)) @@ -730,19 +749,27 @@ func buildGmailMessageShardsFromCache(ctx context.Context, opts gmailBackupOptio } return refs[i].InternalDate < refs[j].InternalDate }) - for part, start := 1, 0; start < len(refs); part, start = part+1, start+opts.ShardMaxRows { - end := start + opts.ShardMaxRows - if end > len(refs) { - end = len(refs) + for part, start := 1, 0; start < len(refs); part++ { + chunkStart := start + var chunkBytes int64 + for start < len(refs) { + lineBytes := refs[start].LineBytes + overRows := start-chunkStart >= opts.ShardMaxRows + overBytes := maxPlaintextBytes > 0 && start > chunkStart && chunkBytes+lineBytes > maxPlaintextBytes + if overRows || overBytes { + break + } + chunkBytes += lineBytes + start++ } rel := fmt.Sprintf("data/gmail/%s/messages/%s/part-%04d.jsonl.gz.age", opts.AccountHash, key, part) - shard, err := buildGmailMessageShardFromCache(opts.AccountHash, rel, tempDir, refs[start:end]) + shard, err := buildGmailMessageShardFromCache(opts.AccountHash, rel, tempDir, refs[chunkStart:start]) if err != nil { return nil, err } shards = append(shards, shard) shardCount++ - if shardCount%25 == 0 || end == len(refs) { + if shardCount%25 == 0 || start == len(refs) { gmailBackupProgressf(ctx, "backup gmail shard-build\tshards=%d\tmessages=%d/%d", shardCount, countGmailShardRows(shards), len(ids)) } } @@ -911,12 +938,11 @@ func buildGmailCheckpointShardsFromCache(accountHash, runID string, firstPart in cleanup() return nil, fmt.Errorf("gmail message %s missing from backup cache", id) } - line, err := json.Marshal(msg) + lineBytes, err := gmailBackupMessageJSONLSize(msg) if err != nil { cleanup() - return nil, fmt.Errorf("encode gmail backup checkpoint shard estimate: %w", err) + return nil, err } - lineBytes := int64(len(line) + 1) overRows := len(chunk) >= gmailCheckpointShardMaxRows overBytes := gmailCheckpointShardMaxPlaintextBytes > 0 && len(chunk) > 0 && chunkBytes+lineBytes > gmailCheckpointShardMaxPlaintextBytes if overRows || overBytes { @@ -933,6 +959,51 @@ func buildGmailCheckpointShardsFromCache(accountHash, runID string, firstPart in return shards, nil } +func buildGmailMessageShardsFromCheckpoint(ctx context.Context, opts gmailBackupOptions, ids []string) ([]backup.PlainShard, bool, error) { + if !opts.CacheMessages || !opts.Checkpoints || strings.TrimSpace(opts.AccountHash) == "" || strings.TrimSpace(opts.CheckpointRunID) == "" { + return nil, false, nil + } + cfg, err := backup.ResolveOptions(opts.BackupOptions) + if err != nil { + return nil, false, err + } + if len(cfg.Recipients) == 0 { + recipient, recipientErr := backup.RecipientFromIdentity(cfg.Identity) + if recipientErr != nil { + return nil, false, recipientErr + } + cfg.Recipients = []string{recipient} + } + manifest, err := backup.ReadCheckpointManifest(cfg.Repo, gmailBackupCheckpointManifestRel(opts.AccountHash, opts.CheckpointRunID)) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, false, nil + } + return nil, false, err + } + if !gmailBackupCheckpointCompleteForSelection(manifest, opts, ids) { + return nil, false, nil + } + if !sameBackupRecipients(manifest.Recipients, cfg.Recipients) { + gmailBackupProgressf(ctx, "backup gmail checkpoint-promote\tskip=recipients-changed\trun=%s", opts.CheckpointRunID) + return nil, false, nil + } + shards := make([]backup.PlainShard, 0, len(manifest.Shards)) + rows := 0 + for _, entry := range manifest.Shards { + if entry.Service != backupServiceGmail || entry.Kind != "messages" || entry.Account != opts.AccountHash { + return nil, false, fmt.Errorf("gmail checkpoint %s contains unexpected shard %s/%s/%s", opts.CheckpointRunID, entry.Service, entry.Kind, entry.Account) + } + shards = append(shards, backup.ExistingShard(entry, manifest.Recipients)) + rows += entry.Rows + } + if rows != len(ids) { + return nil, false, fmt.Errorf("gmail checkpoint %s row count = %d, want %d", opts.CheckpointRunID, rows, len(ids)) + } + gmailBackupProgressf(ctx, "backup gmail checkpoint-promote\trun=%s\tshards=%d\tmessages=%d", opts.CheckpointRunID, len(shards), rows) + return shards, true, nil +} + func gmailBackupCheckpointTempShardDir(accountHash, runID string) (string, bool) { accountHash = strings.TrimSpace(accountHash) runID = strings.TrimSpace(runID) @@ -947,6 +1018,10 @@ func gmailBackupCheckpointTempShardDir(accountHash, runID string) (string, bool) } func gmailBackupCheckpointRunID(opts gmailBackupOptions, ids []string) string { + return time.Now().UTC().Format("20060102T150405Z") + "-" + gmailBackupCheckpointRunIDSuffix(opts, ids) +} + +func gmailBackupCheckpointRunIDSuffix(opts gmailBackupOptions, ids []string) string { key := struct { AccountHash string `json:"accountHash"` Query string `json:"query,omitempty"` @@ -962,7 +1037,92 @@ func gmailBackupCheckpointRunID(opts gmailBackupOptions, ids []string) string { } data, _ := json.Marshal(key) sum := sha256.Sum256(data) - return time.Now().UTC().Format("20060102T150405Z") + "-" + hex.EncodeToString(sum[:6]) + return hex.EncodeToString(sum[:6]) +} + +func gmailBackupResolvedCheckpointRunID(ctx context.Context, opts gmailBackupOptions, ids []string) string { + generated := gmailBackupCheckpointRunID(opts, ids) + if !opts.Checkpoints || !opts.CacheMessages || strings.TrimSpace(opts.AccountHash) == "" { + return generated + } + suffix := gmailBackupCheckpointRunIDSuffix(opts, ids) + cfg, err := backup.ResolveOptions(opts.BackupOptions) + if err != nil { + return generated + } + root := filepath.Join(cfg.Repo, "checkpoints", "gmail", opts.AccountHash) + entries, err := os.ReadDir(root) + if err != nil { + return generated + } + runIDs := make([]string, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() && strings.HasSuffix(entry.Name(), "-"+suffix) { + runIDs = append(runIDs, entry.Name()) + } + } + sort.Sort(sort.Reverse(sort.StringSlice(runIDs))) + for _, runID := range runIDs { + manifest, err := backup.ReadCheckpointManifest(cfg.Repo, gmailBackupCheckpointManifestRel(opts.AccountHash, runID)) + if err != nil { + continue + } + if !gmailBackupCheckpointMatchesSelection(manifest, opts, ids) { + continue + } + gmailBackupProgressf(ctx, "backup gmail checkpoint\treuse=%s\tdone=%d/%d", runID, manifest.Done, manifest.Total) + return runID + } + return generated +} + +func gmailBackupCheckpointManifestRel(accountHash, runID string) string { + return fmt.Sprintf("checkpoints/gmail/%s/%s/manifest.json", accountHash, runID) +} + +func gmailBackupCheckpointMatchesSelection(manifest backup.CheckpointManifest, opts gmailBackupOptions, ids []string) bool { + return manifest.Service == backupServiceGmail && + manifest.Account == opts.AccountHash && + manifest.Total == len(ids) && + strings.TrimSpace(manifest.RunID) != "" +} + +func gmailBackupCheckpointCompleteForSelection(manifest backup.CheckpointManifest, opts gmailBackupOptions, ids []string) bool { + return gmailBackupCheckpointMatchesSelection(manifest, opts, ids) && + manifest.Done == len(ids) && + manifest.Total == len(ids) +} + +func sameBackupRecipients(a, b []string) bool { + a = normalizedBackupStrings(a) + b = normalizedBackupStrings(b) + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func normalizedBackupStrings(values []string) []string { + seen := map[string]struct{}{} + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.TrimSpace(value) + if value == "" { + continue + } + if _, ok := seen[value]; ok { + continue + } + seen[value] = struct{}{} + out = append(out, value) + } + sort.Strings(out) + return out } func gmailBackupMessageMonthKey(internalDate int64) string { @@ -974,6 +1134,10 @@ func gmailBackupMessageMonthKey(internalDate int64) string { } func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage, shardMaxRows int) ([]backup.PlainShard, error) { + return buildGmailMessageShardsWithLimit(accountHash, messages, shardMaxRows, gmailMessageShardMaxPlaintextBytes) +} + +func buildGmailMessageShardsWithLimit(accountHash string, messages []gmailBackupMessage, shardMaxRows int, maxPlaintextBytes int64) ([]backup.PlainShard, error) { if shardMaxRows <= 0 { shardMaxRows = 1000 } @@ -996,13 +1160,24 @@ func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage, } return values[i].InternalDate < values[j].InternalDate }) - for part, start := 1, 0; start < len(values); part, start = part+1, start+shardMaxRows { - end := start + shardMaxRows - if end > len(values) { - end = len(values) + for part, start := 1, 0; start < len(values); part++ { + chunkStart := start + var chunkBytes int64 + for start < len(values) { + lineBytes, err := gmailBackupMessageJSONLSize(values[start]) + if err != nil { + return nil, err + } + overRows := start-chunkStart >= shardMaxRows + overBytes := maxPlaintextBytes > 0 && start > chunkStart && chunkBytes+lineBytes > maxPlaintextBytes + if overRows || overBytes { + break + } + chunkBytes += lineBytes + start++ } rel := fmt.Sprintf("data/gmail/%s/messages/%s/part-%04d.jsonl.gz.age", accountHash, key, part) - shard, err := backup.NewJSONLShard(backupServiceGmail, "messages", accountHash, rel, values[start:end]) + shard, err := backup.NewJSONLShard(backupServiceGmail, "messages", accountHash, rel, values[chunkStart:start]) if err != nil { return nil, err } @@ -1011,3 +1186,11 @@ func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage, } return shards, nil } + +func gmailBackupMessageJSONLSize(message gmailBackupMessage) (int64, error) { + line, err := json.Marshal(message) + if err != nil { + return 0, fmt.Errorf("encode gmail backup shard estimate: %w", err) + } + return int64(len(line) + 1), nil +} diff --git a/internal/cmd/backup_test.go b/internal/cmd/backup_test.go index 81dca07..2dbab4d 100644 --- a/internal/cmd/backup_test.go +++ b/internal/cmd/backup_test.go @@ -90,6 +90,32 @@ func TestBuildGmailMessageShardsBucketsSortsAndChunks(t *testing.T) { } } +func TestBuildGmailMessageShardsSplitsByPlaintextSize(t *testing.T) { + accountHash := "accthash" + messages := []gmailBackupMessage{ + {ID: "m1", InternalDate: mustUnixMilli(t, "2026-04-01T10:00:00Z"), Raw: strings.Repeat("raw-1", 8)}, + {ID: "m2", InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"), Raw: strings.Repeat("raw-2", 8)}, + {ID: "m3", InternalDate: mustUnixMilli(t, "2026-04-03T10:00:00Z"), Raw: strings.Repeat("raw-3", 8)}, + } + + shards, err := buildGmailMessageShardsWithLimit(accountHash, messages, 100, 1) + if err != nil { + t.Fatalf("buildGmailMessageShardsWithLimit: %v", err) + } + if len(shards) != 3 { + t.Fatalf("len(shards) = %d, want 3", len(shards)) + } + for i, shard := range shards { + if shard.Rows != 1 { + t.Fatalf("shards[%d].Rows = %d, want 1", i, shard.Rows) + } + want := fmt.Sprintf("part-%04d.jsonl.gz.age", i+1) + if !strings.HasSuffix(shard.Path, want) { + t.Fatalf("shards[%d].Path = %q, want suffix %q", i, shard.Path, want) + } + } +} + func TestMergeBackupSnapshotsKeepsCountsAndShardOrder(t *testing.T) { left := backup.Snapshot{ Services: []string{"gmail"}, @@ -404,6 +430,94 @@ func TestEnsureGmailBackupMessageCacheWritesEncryptedCheckpoints(t *testing.T) { } } +func TestBuildGmailMessageShardsFromCheckpointPromotesCompleteRun(t *testing.T) { + t.Setenv("HOME", t.TempDir()) + ctx := context.Background() + repo, config, recipients := newBackupConfigForCmdTest(t) + checkpointShard, err := backup.NewJSONLShard(backupServiceGmail, "messages", "accthash", "checkpoints/gmail/accthash/run-test/messages/part-000001.jsonl.gz.age", []gmailBackupMessage{ + {ID: "m1", Raw: "raw-1"}, + {ID: "m2", Raw: "raw-2"}, + }) + if err != nil { + t.Fatalf("NewJSONLShard: %v", err) + } + if _, pushErr := backup.PushCheckpoint(ctx, backup.Snapshot{ + Services: []string{backupServiceGmail}, + Accounts: []string{"accthash"}, + Counts: map[string]int{"gmail.messages": 2}, + Shards: []backup.PlainShard{checkpointShard}, + }, backup.Checkpoint{ + RunID: "run-test", + Service: backupServiceGmail, + Account: "accthash", + Done: 2, + Total: 2, + }, backup.Options{ConfigPath: config, Push: false}); pushErr != nil { + t.Fatalf("PushCheckpoint: %v", pushErr) + } + + shards, promoted, err := buildGmailMessageShardsFromCheckpoint(ctx, gmailBackupOptions{ + AccountHash: "accthash", + CacheMessages: true, + Checkpoints: true, + CheckpointRunID: "run-test", + BackupOptions: backup.Options{ConfigPath: config}, + }, []string{"m1", "m2"}) + if err != nil { + t.Fatalf("buildGmailMessageShardsFromCheckpoint: %v", err) + } + if !promoted || len(shards) != 1 || shards[0].Existing == nil { + t.Fatalf("expected promoted existing shard, promoted=%t shards=%+v", promoted, shards) + } + if shards[0].Path != "checkpoints/gmail/accthash/run-test/messages/part-000001.jsonl.gz.age" { + t.Fatalf("promoted path = %q", shards[0].Path) + } + if !sameBackupRecipients(shards[0].ExistingRecipients, recipients) { + t.Fatalf("promoted recipients = %v, want %v", shards[0].ExistingRecipients, recipients) + } + if _, err := os.Stat(filepath.Join(repo, filepath.FromSlash(shards[0].Path))); err != nil { + t.Fatalf("checkpoint shard missing: %v", err) + } +} + +func TestGmailBackupResolvedCheckpointRunIDReusesSelectionRun(t *testing.T) { + ctx := context.Background() + _, config, _ := newBackupConfigForCmdTest(t) + ids := []string{"m1"} + opts := gmailBackupOptions{ + AccountHash: "accthash", + CacheMessages: true, + Checkpoints: true, + IncludeSpamTrash: true, + BackupOptions: backup.Options{ConfigPath: config}, + } + runID := "20260428T010203Z-" + gmailBackupCheckpointRunIDSuffix(opts, ids) + checkpointShard, err := backup.NewJSONLShard(backupServiceGmail, "messages", "accthash", fmt.Sprintf("checkpoints/gmail/accthash/%s/messages/part-000001.jsonl.gz.age", runID), []gmailBackupMessage{ + {ID: "m1", Raw: "raw-1"}, + }) + if err != nil { + t.Fatalf("NewJSONLShard: %v", err) + } + if _, err := backup.PushCheckpoint(ctx, backup.Snapshot{ + Services: []string{backupServiceGmail}, + Accounts: []string{"accthash"}, + Counts: map[string]int{"gmail.messages": 1}, + Shards: []backup.PlainShard{checkpointShard}, + }, backup.Checkpoint{ + RunID: runID, + Service: backupServiceGmail, + Account: "accthash", + Done: 1, + Total: 1, + }, backup.Options{ConfigPath: config, Push: false}); err != nil { + t.Fatalf("PushCheckpoint: %v", err) + } + + if got := gmailBackupResolvedCheckpointRunID(ctx, opts, ids); got != runID { + t.Fatalf("resolved run ID = %q, want %q", got, runID) + } +} + func TestBuildGmailCheckpointShardFromCacheWritesPlaintextPath(t *testing.T) { t.Setenv("HOME", t.TempDir()) accountHash := "accthash" @@ -494,6 +608,40 @@ func TestBuildGmailCheckpointShardsFromCacheSplitsByPlaintextSize(t *testing.T) } } +func TestBuildGmailMessageShardsFromCacheSplitsByPlaintextSize(t *testing.T) { + t.Setenv("HOME", t.TempDir()) + accountHash := "accthash" + ids := []string{"m1", "m2", "m3"} + for _, id := range ids { + if err := writeGmailBackupMessageCache(accountHash, gmailBackupMessage{ + ID: id, + InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"), + Raw: strings.Repeat("raw-"+id, 8), + }); err != nil { + t.Fatalf("writeGmailBackupMessageCache: %v", err) + } + } + shards, err := buildGmailMessageShardsFromCacheWithLimit(context.Background(), gmailBackupOptions{ + AccountHash: accountHash, + ShardMaxRows: 100, + }, ids, 1) + if err != nil { + t.Fatalf("buildGmailMessageShardsFromCacheWithLimit: %v", err) + } + if len(shards) != 3 { + t.Fatalf("len(shards) = %d, want 3", len(shards)) + } + for i, shard := range shards { + if shard.Rows != 1 { + t.Fatalf("shards[%d].Rows = %d, want 1", i, shard.Rows) + } + want := fmt.Sprintf("part-%04d.jsonl.gz.age", i+1) + if !strings.HasSuffix(shard.Path, want) { + t.Fatalf("shards[%d].Path = %q, want suffix %q", i, shard.Path, want) + } + } +} + func TestBuildGmailMessageShardsFromCacheWritesPlaintextPaths(t *testing.T) { t.Setenv("HOME", t.TempDir()) accountHash := "accthash" @@ -689,6 +837,53 @@ func TestEnsureExportOutsideRepoRejectsNestedPlaintext(t *testing.T) { } } +func TestResetExportTargetsKeepsGmailMessageFiles(t *testing.T) { + outDir := t.TempDir() + messagePath := filepath.Join(outDir, "gmail", "acct_hash", "messages", "2026", "04", "message.md") + indexPath := filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl") + if err := os.MkdirAll(filepath.Dir(messagePath), 0o700); err != nil { + t.Fatalf("mkdir message dir: %v", err) + } + if err := os.WriteFile(messagePath, []byte("keep"), 0o600); err != nil { + t.Fatalf("write message: %v", err) + } + if err := os.WriteFile(indexPath, []byte("reset"), 0o600); err != nil { + t.Fatalf("write index: %v", err) + } + + err := resetExportTargets(outDir, []backup.ShardEntry{{ + Service: backupServiceGmail, + Kind: "messages", + Account: "acct/hash", + }}) + if err != nil { + t.Fatalf("resetExportTargets: %v", err) + } + if got := readText(t, messagePath); got != "keep" { + t.Fatalf("message file = %q, want keep", got) + } + if _, err := os.Stat(indexPath); !os.IsNotExist(err) { + t.Fatalf("index still exists or stat failed: %v", err) + } +} + +func newBackupConfigForCmdTest(t *testing.T) (string, string, []string) { + t.Helper() + dir := t.TempDir() + repo := filepath.Join(dir, "repo") + identity := filepath.Join(dir, "age.key") + config := filepath.Join(dir, "backup.json") + recipient, err := backup.EnsureIdentity(identity) + if err != nil { + t.Fatalf("EnsureIdentity: %v", err) + } + recipients := []string{recipient} + if err := backup.SaveConfig(config, backup.Config{Repo: repo, Identity: identity, Recipients: recipients}); err != nil { + t.Fatalf("SaveConfig: %v", err) + } + return repo, config, recipients +} + func mustUnixMilli(t *testing.T, value string) int64 { t.Helper() parsed, err := time.Parse(time.RFC3339, value) diff --git a/internal/cmd/help_printer.go b/internal/cmd/help_printer.go index 83cd0f8..7f7183e 100644 --- a/internal/cmd/help_printer.go +++ b/internal/cmd/help_printer.go @@ -25,6 +25,19 @@ func helpOptions() kong.HelpOptions { func helpPrinter(options kong.HelpOptions, ctx *kong.Context) error { origStdout := ctx.Stdout origStderr := ctx.Stderr + profile, err := loadBakedSafetyProfile() + if err != nil { + return usagef("invalid baked safety profile: %v", err) + } + if profile.commandNodeBlockedForHelp(ctx.Selected()) { + path := commandNodePath(ctx.Selected()) + if blockErr := profile.commandPathError(path); blockErr != nil { + _, _ = fmt.Fprintln(origStdout, blockErr) + } + return nil + } + restoreVisibility := applySafetyProfileVisibility(ctx.Model.Node, profile) + defer restoreVisibility() width := guessColumns(origStdout) @@ -43,14 +56,15 @@ func helpPrinter(options kong.HelpOptions, ctx *kong.Context) error { ctx.Stderr = origStderr defer func() { ctx.Stdout = origStdout }() - if err := kong.DefaultHelpPrinter(options, ctx); err != nil { - return err + if helpErr := kong.DefaultHelpPrinter(options, ctx); helpErr != nil { + return helpErr } out := rewriteCommandSummaries(buf.String(), ctx.Selected()) + out = removeEmptyCommandGroups(out) out = injectBuildLine(out) out = colorizeHelp(out, helpProfile(origStdout, helpColorMode(ctx.Args))) - _, err := io.WriteString(origStdout, out) + _, err = io.WriteString(origStdout, out) return err } @@ -167,6 +181,60 @@ func colorizeHelp(out string, profile termenv.Profile) string { return strings.Join(lines, "\n") } +func removeEmptyCommandGroups(out string) string { + lines := strings.Split(out, "\n") + skip := map[int]bool{} + for i, line := range lines { + if !isHelpCommandGroup(line) { + continue + } + if !helpGroupHasCommand(lines, i+1) { + skip[i] = true + } + } + if len(skip) == 0 { + return out + } + kept := make([]string, 0, len(lines)-len(skip)) + for i, line := range lines { + if skip[i] { + continue + } + kept = append(kept, line) + } + return strings.Join(kept, "\n") +} + +func helpGroupHasCommand(lines []string, start int) bool { + for i := start; i < len(lines); i++ { + line := lines[i] + if isHelpCommandGroup(line) || isHelpSection(line) { + return false + } + if isHelpCommandSummaryLine(line) { + return true + } + } + return false +} + +func isHelpCommandGroup(line string) bool { + switch line { + case "Read", "Write", "Organize", "Admin": + return true + default: + return false + } +} + +func isHelpSection(line string) bool { + return line == "Usage:" || strings.HasPrefix(line, "Usage:") || line == "Flags:" || line == "Commands:" || line == "Arguments:" || strings.HasPrefix(line, "Build:") || line == "Config:" +} + +func isHelpCommandSummaryLine(line string) bool { + return strings.HasPrefix(line, " ") && (len(line) < 3 || line[2] != ' ') && strings.TrimSpace(line) != "" +} + func colorizeCommandSummaryLine(line string, cmdName func(string) string, dim func(string) string) string { if !strings.HasPrefix(line, " ") { return line diff --git a/internal/cmd/help_printer_test.go b/internal/cmd/help_printer_test.go index 291ace9..9617f5f 100644 --- a/internal/cmd/help_printer_test.go +++ b/internal/cmd/help_printer_test.go @@ -4,6 +4,7 @@ import ( "bytes" "io" "os" + "strings" "testing" "github.com/alecthomas/kong" @@ -134,3 +135,14 @@ func TestColorizeHelp(t *testing.T) { t.Fatalf("expected colorized output") } } + +func TestRemoveEmptyCommandGroups(t *testing.T) { + in := "Read\n search [flags]\n Search\n\nOrganize\n" + out := removeEmptyCommandGroups(in) + if strings.Contains(out, "Organize") { + t.Fatalf("expected empty group removed, got: %q", out) + } + if !strings.Contains(out, "Read") || !strings.Contains(out, "search") { + t.Fatalf("expected non-empty group retained, got: %q", out) + } +} diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 37911d0..fb7a92a 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -125,6 +125,10 @@ func Execute(args []string) (err error) { return parsedErr } + if err = enforceBakedSafetyProfile(kctx); err != nil { + _, _ = fmt.Fprintln(os.Stderr, errfmt.Format(err)) + return err + } if err = enforceEnabledCommands(kctx, cli.EnableCommands); err != nil { _, _ = fmt.Fprintln(os.Stderr, errfmt.Format(err)) return err diff --git a/internal/cmd/safety_profile.go b/internal/cmd/safety_profile.go new file mode 100644 index 0000000..55ebcfc --- /dev/null +++ b/internal/cmd/safety_profile.go @@ -0,0 +1,261 @@ +package cmd + +import ( + "fmt" + "strings" + + "github.com/alecthomas/kong" + "gopkg.in/yaml.v3" +) + +type bakedSafetyProfile struct { + enabled bool + name string + allow map[string]bool + deny map[string]bool +} + +func enforceBakedSafetyProfile(kctx *kong.Context) error { + profile, err := loadBakedSafetyProfile() + if err != nil { + return usagef("invalid baked safety profile: %v", err) + } + if !profile.enabled { + return nil + } + + path := commandPath(kctx.Command()) + if len(path) == 0 { + return nil + } + if !profile.allowsCommandPath(path) { + return profile.commandPathError(path) + } + return nil +} + +func bakedSafetyProfileError(path []string, profileName string, included bool) error { + command := strings.Join(path, " ") + if included { + return usagef("command %q is blocked by baked safety profile %q", command, profileName) + } + return usagef("command %q is not included in baked safety profile %q", command, profileName) +} + +func loadBakedSafetyProfile() (bakedSafetyProfile, error) { + raw := strings.TrimSpace(bakedSafetyProfileYAML) + if raw == "" { + return bakedSafetyProfile{}, nil + } + profile, err := parseSafetyProfile(raw) + if err != nil { + return bakedSafetyProfile{}, err + } + return *profile, nil +} + +func ValidateSafetyProfile(raw string) error { + _, err := parseSafetyProfile(raw) + return err +} + +func (p bakedSafetyProfile) allowsCommandPath(path []string) bool { + if !p.enabled || len(path) == 0 { + return true + } + if commandPathMatches(p.deny, path) { + return false + } + if len(p.allow) == 0 { + return true + } + return commandPathMatches(p.allow, path) +} + +func (p bakedSafetyProfile) commandPathError(path []string) error { + if commandPathMatches(p.deny, path) { + return bakedSafetyProfileError(path, p.name, true) + } + return bakedSafetyProfileError(path, p.name, false) +} + +func (p bakedSafetyProfile) commandNodeVisible(node *kong.Node) bool { + if !p.enabled || node == nil { + return true + } + if node.Type == kong.ApplicationNode { + return true + } + path := commandNodePath(node) + if len(path) > 0 && p.allowsCommandPath(path) { + return true + } + return p.commandNodeHasVisibleChildren(node) +} + +func (p bakedSafetyProfile) commandNodeBlockedForHelp(node *kong.Node) bool { + if !p.enabled || node == nil || node.Type != kong.CommandNode { + return false + } + path := commandNodePath(node) + if len(path) == 0 || p.allowsCommandPath(path) { + return false + } + return !p.commandNodeHasVisibleChildren(node) +} + +func (p bakedSafetyProfile) commandNodeHasVisibleChildren(node *kong.Node) bool { + for _, child := range node.Children { + if child == nil || child.Type != kong.CommandNode { + continue + } + if p.commandNodeVisible(child) { + return true + } + } + return false +} + +func commandNodePath(node *kong.Node) []string { + if node == nil { + return nil + } + var rev []string + for cur := node; cur != nil && cur.Type != kong.ApplicationNode; cur = cur.Parent { + if cur.Type == kong.CommandNode && strings.TrimSpace(cur.Name) != "" { + rev = append(rev, strings.ToLower(strings.TrimSpace(cur.Name))) + } + } + path := make([]string, 0, len(rev)) + for i := len(rev) - 1; i >= 0; i-- { + path = append(path, rev[i]) + } + return path +} + +func applySafetyProfileVisibility(root *kong.Node, profile bakedSafetyProfile) func() { + if !profile.enabled || root == nil { + return func() {} + } + type hiddenState struct { + node *kong.Node + hidden bool + } + restore := []hiddenState{} + var walk func(*kong.Node) + walk = func(node *kong.Node) { + for _, child := range node.Children { + if child == nil || child.Type != kong.CommandNode { + continue + } + restore = append(restore, hiddenState{node: child, hidden: child.Hidden}) + if !profile.commandNodeVisible(child) { + child.Hidden = true + } + walk(child) + } + } + walk(root) + return func() { + for i := len(restore) - 1; i >= 0; i-- { + restore[i].node.Hidden = restore[i].hidden + } + } +} + +func parseSafetyProfile(raw string) (*bakedSafetyProfile, error) { + var root map[string]any + if err := yaml.Unmarshal([]byte(raw), &root); err != nil { + return nil, err + } + + profile := &bakedSafetyProfile{ + enabled: true, + name: "unnamed", + allow: map[string]bool{}, + deny: map[string]bool{}, + } + + if name, ok := root["name"].(string); ok && strings.TrimSpace(name) != "" { + profile.name = strings.TrimSpace(name) + } + if err := addSafetyProfileList(profile.allow, root["allow"]); err != nil { + return nil, fmt.Errorf("allow: %w", err) + } + if err := addSafetyProfileList(profile.deny, root["deny"]); err != nil { + return nil, fmt.Errorf("deny: %w", err) + } + + for key, value := range root { + switch key { + case "name", "description", "allow", "deny": + continue + } + prefix := []string{key} + if key == "aliases" { + prefix = nil + } + if err := flattenSafetyProfileNode(profile, prefix, value); err != nil { + return nil, err + } + } + + if len(profile.allow) == 0 && len(profile.deny) == 0 { + return nil, fmt.Errorf("profile has no allow or deny entries") + } + return profile, nil +} + +func addSafetyProfileList(out map[string]bool, value any) error { + if value == nil { + return nil + } + items, ok := value.([]any) + if !ok { + return fmt.Errorf("expected list") + } + for _, item := range items { + s, ok := item.(string) + if !ok { + return fmt.Errorf("expected string item") + } + rule := normalizeSafetyProfileRule(s) + if rule != "" { + out[rule] = true + } + } + return nil +} + +func flattenSafetyProfileNode(profile *bakedSafetyProfile, prefix []string, value any) error { + switch typed := value.(type) { + case bool: + rule := normalizeSafetyProfileRule(strings.Join(prefix, ".")) + if rule == "" { + return fmt.Errorf("empty safety profile command path") + } + if typed { + profile.allow[rule] = true + } else { + profile.deny[rule] = true + } + return nil + case map[string]any: + for key, child := range typed { + next := append(append([]string{}, prefix...), key) + if err := flattenSafetyProfileNode(profile, next, child); err != nil { + return err + } + } + return nil + default: + return fmt.Errorf("unsupported safety profile value at %q", strings.Join(prefix, ".")) + } +} + +func normalizeSafetyProfileRule(rule string) string { + rule = strings.TrimSpace(strings.ToLower(rule)) + rule = strings.ReplaceAll(rule, " ", ".") + rule = strings.Trim(rule, ".") + return rule +} diff --git a/internal/cmd/safety_profile_default.go b/internal/cmd/safety_profile_default.go new file mode 100644 index 0000000..efec126 --- /dev/null +++ b/internal/cmd/safety_profile_default.go @@ -0,0 +1,5 @@ +//go:build !safety_profile + +package cmd + +var bakedSafetyProfileYAML = "" diff --git a/internal/cmd/safety_profile_test.go b/internal/cmd/safety_profile_test.go new file mode 100644 index 0000000..39d4937 --- /dev/null +++ b/internal/cmd/safety_profile_test.go @@ -0,0 +1,217 @@ +package cmd + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func withBakedSafetyProfile(t *testing.T, raw string) { + t.Helper() + prev := bakedSafetyProfileYAML + bakedSafetyProfileYAML = raw + t.Cleanup(func() { bakedSafetyProfileYAML = prev }) +} + +func TestParseSafetyProfileNestedAndAliases(t *testing.T) { + profile, err := parseSafetyProfile(` +name: test +gmail: + search: true + send: false +aliases: + send: false +allow: + - version +deny: + - auth.remove +`) + if err != nil { + t.Fatalf("parseSafetyProfile: %v", err) + } + for _, rule := range []string{"gmail.search", "version"} { + if !profile.allow[rule] { + t.Fatalf("expected allow rule %q in %#v", rule, profile.allow) + } + } + for _, rule := range []string{"gmail.send", "send", "auth.remove"} { + if !profile.deny[rule] { + t.Fatalf("expected deny rule %q in %#v", rule, profile.deny) + } + } +} + +func TestBakedSafetyProfileBlocksBeforeRuntimeAllowlist(t *testing.T) { + setTestConfigHome(t) + withBakedSafetyProfile(t, ` +name: test +allow: + - version +deny: + - gmail.send + - send +`) + + err := Execute([]string{"--enable-commands", "gmail.send", "gmail", "send", "--to", "a@example.com", "--subject", "S", "--body", "B"}) + if err == nil { + t.Fatalf("expected baked safety profile block") + } + if got := err.Error(); !strings.Contains(got, "baked safety profile") || !strings.Contains(got, "gmail send") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestBakedSafetyProfileFailsClosed(t *testing.T) { + setTestConfigHome(t) + withBakedSafetyProfile(t, ` +name: readonly +allow: + - version +`) + + err := Execute([]string{"tasks", "list", "task-list-1"}) + if err == nil { + t.Fatalf("expected fail-closed safety profile block") + } + if got := err.Error(); !strings.Contains(got, "not included") || !strings.Contains(got, "tasks list") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestBakedSafetyProfileAllowsListedCommand(t *testing.T) { + setTestConfigHome(t) + withBakedSafetyProfile(t, ` +name: test +allow: + - version +`) + + if err := Execute([]string{"version"}); err != nil { + t.Fatalf("expected allowed command, got %v", err) + } +} + +func TestReadonlySafetyProfileBlocksNestedMutations(t *testing.T) { + setTestConfigHome(t) + raw, err := os.ReadFile(filepath.Join("..", "..", "safety-profiles", "readonly.yaml")) + if err != nil { + t.Fatalf("read readonly profile: %v", err) + } + withBakedSafetyProfile(t, string(raw)) + + tests := [][]string{ + {"gmail", "messages", "modify", "msg-1", "--add", "Label_1"}, + {"calendar", "alias", "set", "work", "abc123@group.calendar.google.com"}, + {"calendar", "alias", "unset", "work"}, + } + for _, args := range tests { + err := Execute(args) + if err == nil { + t.Fatalf("expected readonly profile block for %v", args) + } + if got := err.Error(); !strings.Contains(got, "baked safety profile") { + t.Fatalf("unexpected error for %v: %v", args, err) + } + } +} + +func TestReadonlySafetyProfileFiltersHelp(t *testing.T) { + setTestConfigHome(t) + raw, err := os.ReadFile(filepath.Join("..", "..", "safety-profiles", "readonly.yaml")) + if err != nil { + t.Fatalf("read readonly profile: %v", err) + } + withBakedSafetyProfile(t, string(raw)) + + out := captureStdout(t, func() { + _ = captureStderr(t, func() { + if err := Execute([]string{"gmail", "messages", "--help"}); err != nil { + t.Fatalf("Execute: %v", err) + } + }) + }) + if !strings.Contains(out, "\n search") { + t.Fatalf("expected search in filtered help, got: %q", out) + } + if strings.Contains(out, "\n modify") { + t.Fatalf("expected modify to be hidden from readonly help, got: %q", out) + } + if strings.Contains(out, "\nOrganize\n") { + t.Fatalf("expected empty command group to be hidden from readonly help, got: %q", out) + } + + out = captureStdout(t, func() { + _ = captureStderr(t, func() { + if err := Execute([]string{"calendar", "alias", "--help"}); err != nil { + t.Fatalf("Execute: %v", err) + } + }) + }) + if !strings.Contains(out, "\n list") { + t.Fatalf("expected list in filtered help, got: %q", out) + } + if strings.Contains(out, "\n set ") || strings.Contains(out, "\n unset ") { + t.Fatalf("expected alias writes to be hidden from readonly help, got: %q", out) + } +} + +func TestAgentSafeProfileFiltersHelp(t *testing.T) { + setTestConfigHome(t) + raw, err := os.ReadFile(filepath.Join("..", "..", "safety-profiles", "agent-safe.yaml")) + if err != nil { + t.Fatalf("read agent-safe profile: %v", err) + } + withBakedSafetyProfile(t, string(raw)) + + out := captureStdout(t, func() { + _ = captureStderr(t, func() { + if err := Execute([]string{"gmail", "drafts", "--help"}); err != nil { + t.Fatalf("Execute: %v", err) + } + }) + }) + if !strings.Contains(out, "\n create") { + t.Fatalf("expected create in filtered help, got: %q", out) + } + if strings.Contains(out, "\n send ") { + t.Fatalf("expected send to be hidden from agent-safe help, got: %q", out) + } + + blocked := captureStdout(t, func() { + _ = captureStderr(t, func() { + if err := Execute([]string{"gmail", "drafts", "send", "--help"}); err != nil { + t.Fatalf("Execute: %v", err) + } + }) + }) + if !strings.Contains(blocked, `command "gmail drafts send" is blocked by baked safety profile "agent-safe"`) { + t.Fatalf("expected blocked help message, got: %q", blocked) + } + if strings.Contains(blocked, "Send a draft") { + t.Fatalf("expected blocked command docs to be hidden, got: %q", blocked) + } +} + +func TestSafetyProfileFiltersSchema(t *testing.T) { + setTestConfigHome(t) + raw, err := os.ReadFile(filepath.Join("..", "..", "safety-profiles", "agent-safe.yaml")) + if err != nil { + t.Fatalf("read agent-safe profile: %v", err) + } + withBakedSafetyProfile(t, string(raw)) + + out := captureStdout(t, func() { + _ = captureStderr(t, func() { + if err := Execute([]string{"schema", "gmail drafts"}); err != nil { + t.Fatalf("Execute: %v", err) + } + }) + }) + if !strings.Contains(out, `"name": "create"`) { + t.Fatalf("expected create in filtered schema, got: %q", out) + } + if strings.Contains(out, `"name": "send"`) { + t.Fatalf("expected send to be hidden from filtered schema, got: %q", out) + } +} diff --git a/internal/cmd/schema.go b/internal/cmd/schema.go index d8b9003..da98494 100644 --- a/internal/cmd/schema.go +++ b/internal/cmd/schema.go @@ -84,11 +84,18 @@ func (c *SchemaCmd) Run(ctx context.Context, kctx *kong.Context) error { } hide := !c.IncludeHidden + profile, err := loadBakedSafetyProfile() + if err != nil { + return usagef("invalid baked safety profile: %v", err) + } + if profile.commandNodeBlockedForHelp(node) { + return profile.commandPathError(commandNodePath(node)) + } doc := schemaDoc{ SchemaVersion: 1, Build: VersionString(), - Command: buildSchemaNode(node, hide), + Command: buildSchemaNode(node, hide, profile), } return outfmt.WriteJSON(ctx, os.Stdout, doc) @@ -145,7 +152,7 @@ func findChildCommand(parent *kong.Node, token string) *kong.Node { return nil } -func buildSchemaNode(node *kong.Node, hide bool) *schemaNode { +func buildSchemaNode(node *kong.Node, hide bool, profile bakedSafetyProfile) *schemaNode { if node == nil { return nil } @@ -177,12 +184,17 @@ func buildSchemaNode(node *kong.Node, hide bool) *schemaNode { if hide && child.Hidden { continue } + if !profile.commandNodeVisible(child) { + continue + } children = append(children, child) } sort.Slice(children, func(i, j int) bool { return children[i].Name < children[j].Name }) for _, child := range children { - out.Subcommands = append(out.Subcommands, buildSchemaNode(child, hide)) + if childNode := buildSchemaNode(child, hide, profile); childNode != nil { + out.Subcommands = append(out.Subcommands, childNode) + } } return out diff --git a/safety-profiles/agent-safe.yaml b/safety-profiles/agent-safe.yaml new file mode 100644 index 0000000..e213cbc --- /dev/null +++ b/safety-profiles/agent-safe.yaml @@ -0,0 +1,322 @@ +name: agent-safe +description: Agent workflow profile. Allows reading, drafting, organizing, and recoverable local work; blocks sends, deletes, sharing, admin, and auth writes. + +version: true +schema: true +exit-codes: true +agent: + exit-codes: true + +gmail: + search: true + get: true + messages: true + attachment: true + url: true + history: true + thread: + get: true + modify: true + attachments: true + labels: + list: true + get: true + create: true + rename: true + modify: true + delete: false + style: true + batch: + modify: true + delete: false + archive: true + mark-read: true + unread: true + trash: false + send: false + autoreply: false + track: false + drafts: + list: true + get: true + create: true + update: true + delete: false + send: false + settings: false + watch: false + autoforward: false + delegates: false + filters: false + forwarding: false + sendas: false + vacation: false + forward: false + +calendar: + calendars: true + subscribe: false + acl: true + alias: true + events: true + event: true + create: true + update: true + delete: false + freebusy: true + respond: false + propose-time: true + colors: true + conflicts: true + search: true + time: true + users: true + team: true + focus-time: true + out-of-office: false + working-location: false + create-calendar: true + +drive: + ls: true + search: true + get: true + download: true + upload: true + mkdir: true + copy: true + delete: false + move: true + rename: true + share: false + unshare: false + permissions: true + url: true + drives: true + comments: + list: true + get: true + create: true + update: true + delete: false + reply: true + +contacts: + search: true + list: true + get: true + export: true + create: false + update: false + delete: false + directory: + list: true + search: true + other: + list: true + search: true + delete: false + +tasks: + lists: + list: true + create: true + list: true + get: true + add: true + update: true + done: true + undo: true + delete: false + clear: false + +docs: + export: true + info: true + cat: true + list-tabs: true + create: false + copy: false + write: false + insert: false + delete: false + find-replace: false + update: false + edit: false + sed: false + clear: false + structure: true + comments: + list: true + get: true + add: true + reply: true + resolve: false + delete: false + +sheets: + get: true + metadata: true + notes: true + update-note: false + links: true + named-ranges: + list: true + get: true + add: false + update: false + delete: false + read-format: true + export: true + update: false + append: false + insert: false + clear: false + format: false + merge: false + unmerge: false + number-format: false + freeze: false + resize-columns: false + resize-rows: false + find-replace: false + create: false + copy: false + add-tab: true + rename-tab: true + delete-tab: false + chart: + list: true + get: true + create: true + update: true + delete: false + +slides: + export: true + info: true + list-slides: true + read-slide: true + thumbnail: true + create: false + create-from-markdown: false + create-from-template: false + copy: false + add-slide: false + delete-slide: false + update-notes: false + replace-slide: false + insert-text: false + replace-text: false + +chat: + spaces: + list: true + find: true + create: false + messages: + list: true + send: false + react: true + reactions: false + threads: + list: true + dm: + send: false + space: false + +forms: + get: true + create: false + update: false + add-question: false + delete-question: false + move-question: true + responses: + list: true + get: true + watch: false + +appscript: + get: true + content: true + run: false + create: false + +people: + me: true + get: true + search: true + relations: true + +groups: + list: true + members: true + +keep: + list: true + get: true + search: true + create: true + delete: false + attachment: true + +auth: + credentials: + list: true + set: false + remove: false + services: true + list: true + doctor: true + alias: + list: true + set: false + unset: false + status: true + keyring: false + add: false + remove: false + tokens: + list: true + delete: false + export: false + import: false + manage: false + service-account: + status: true + set: false + unset: false + keep: false + +config: + get: true + keys: true + list: true + path: true + set: false + unset: false + no-send: + list: true + set: false + remove: false + +time: true +classroom: false +admin: false +backup: false +completion: false +__complete: false + +aliases: + send: false + ls: true + search: true + open: true + download: true + upload: true + login: false + logout: false + status: true + me: true + whoami: true diff --git a/safety-profiles/full.yaml b/safety-profiles/full.yaml new file mode 100644 index 0000000..fb2193b --- /dev/null +++ b/safety-profiles/full.yaml @@ -0,0 +1,4 @@ +name: full +description: Stock gog behavior. Every command is allowed. +allow: + - all diff --git a/safety-profiles/readonly.yaml b/safety-profiles/readonly.yaml new file mode 100644 index 0000000..c326bf0 --- /dev/null +++ b/safety-profiles/readonly.yaml @@ -0,0 +1,327 @@ +name: readonly +description: Read/list/search/get commands only. Mutations, sends, deletes, sharing, and auth writes are blocked. + +version: true +schema: true +exit-codes: true +agent: + exit-codes: true + +gmail: + search: true + get: true + messages: + search: true + modify: false + attachment: true + url: true + history: true + thread: + get: true + modify: false + attachments: true + labels: + list: true + get: true + create: false + rename: false + modify: false + delete: false + style: false + batch: + modify: false + delete: false + archive: false + mark-read: false + unread: false + trash: false + send: false + autoreply: false + track: false + drafts: + list: true + get: true + create: false + update: false + delete: false + send: false + settings: false + watch: false + autoforward: false + delegates: false + filters: false + forwarding: false + sendas: false + vacation: false + forward: false + +calendar: + calendars: true + subscribe: false + acl: true + alias: + list: true + set: false + unset: false + events: true + event: true + create: false + update: false + delete: false + freebusy: true + respond: false + propose-time: false + colors: true + conflicts: true + search: true + time: true + users: true + team: true + focus-time: false + out-of-office: false + working-location: false + create-calendar: false + +drive: + ls: true + search: true + get: true + download: true + upload: false + mkdir: false + copy: false + delete: false + move: false + rename: false + share: false + unshare: false + permissions: true + url: true + drives: true + comments: + list: true + get: true + create: false + update: false + delete: false + reply: false + +contacts: + search: true + list: true + get: true + export: true + create: false + update: false + delete: false + directory: + list: true + search: true + other: + list: true + search: true + delete: false + +tasks: + lists: + list: true + create: false + list: true + get: true + add: false + update: false + done: false + undo: false + delete: false + clear: false + +docs: + export: true + info: true + cat: true + list-tabs: true + create: false + copy: false + write: false + insert: false + delete: false + find-replace: false + update: false + edit: false + sed: false + clear: false + structure: true + comments: + list: true + get: true + add: false + reply: false + resolve: false + delete: false + +sheets: + get: true + metadata: true + notes: true + update-note: false + links: true + named-ranges: + list: true + get: true + add: false + update: false + delete: false + read-format: true + export: true + update: false + append: false + insert: false + clear: false + format: false + merge: false + unmerge: false + number-format: false + freeze: false + resize-columns: false + resize-rows: false + find-replace: false + create: false + copy: false + add-tab: false + rename-tab: false + delete-tab: false + chart: + list: true + get: true + create: false + update: false + delete: false + +slides: + export: true + info: true + list-slides: true + read-slide: true + thumbnail: true + create: false + create-from-markdown: false + create-from-template: false + copy: false + add-slide: false + delete-slide: false + update-notes: false + replace-slide: false + insert-text: false + replace-text: false + +chat: + spaces: + list: true + find: true + create: false + messages: + list: true + send: false + react: false + reactions: false + threads: + list: true + dm: + send: false + space: false + +forms: + get: true + create: false + update: false + add-question: false + delete-question: false + move-question: false + responses: + list: true + get: true + watch: false + +appscript: + get: true + content: true + run: false + create: false + +people: + me: true + get: true + search: true + relations: true + +groups: + list: true + members: true + +keep: + list: true + get: true + search: true + create: false + delete: false + attachment: true + +auth: + credentials: + list: true + set: false + remove: false + services: true + list: true + doctor: true + alias: + list: true + set: false + unset: false + status: true + keyring: false + add: false + remove: false + tokens: + list: true + delete: false + export: false + import: false + manage: false + service-account: + status: true + set: false + unset: false + keep: false + +config: + get: true + keys: true + list: true + path: true + set: false + unset: false + no-send: + list: true + set: false + remove: false + +time: true +classroom: false +admin: false +backup: false +completion: false +__complete: false + +aliases: + send: false + ls: true + search: true + open: true + download: true + upload: false + login: false + logout: false + status: true + me: true + whoami: true