diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f89cf4..cc6b5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,12 @@ ## 0.14.0 - Unreleased ### Added -- Backup: add `gog backup` with age-encrypted Git shards, Gmail labels/raw message export, manifest status, full decrypt-and-verify, docs, and security-focused regression coverage. +- Backup: add `gog backup` with age-encrypted Git shards, Gmail labels/raw message export, manifest status, full decrypt-and-verify, shard `cat`, local plaintext export, docs, and security-focused regression coverage. + +### Fixed +- Drive: include `driveId` in `drive ls`, `drive search`, and `drive get` field masks so Shared Drive files can be identified in JSON output. (#524) — thanks @LeanSheng. +- Gmail: expose reply threading headers in default `gmail get --format metadata` output and fail explicit reply targets that cannot provide a `Message-ID`. (#528, #512) — thanks @solomonneas. +- Docs: include available tab names when `docs cat --tab` / structure lookup cannot find the requested tab. (#532) — thanks @johnbenjaminlewis. ## 0.13.0 - 2026-04-20 diff --git a/README.md b/README.md index 5029631..a6fedc6 100644 --- a/README.md +++ b/README.md @@ -728,6 +728,8 @@ gog backup init --repo ~/Projects/backup-gog --remote https://github.com/steipet gog backup push --services gmail --account you@gmail.com gog backup status gog backup verify +gog backup cat data/gmail//labels.jsonl.gz.age --pretty +gog backup export --out ~/Documents/gog-backup-export ``` For a bounded first run: @@ -741,6 +743,12 @@ private age identity locally at `~/.gog/age.key`; GitHub only receives public `age1...` recipients, `manifest.json`, and encrypted `*.jsonl.gz.age` payloads. The private `AGE-SECRET-KEY-...` value must stay local or in a password manager. +Use `gog backup cat` to decrypt one shard as JSONL, or `gog backup export` to +write a local plaintext copy. The export writes Gmail messages as `.eml` files, +plus `gmail//messages/index.jsonl` and pretty `labels.json`. +That export is intentionally unencrypted; keep it out of Git, shared folders, +and cloud sync unless that is intentional. + `manifest.json` is intentionally cleartext for cheap status and verification. It exposes metadata: export time, service names, account hashes, shard paths, row counts, encrypted byte sizes, plaintext verification hashes, backup cadence, diff --git a/docs/backup.md b/docs/backup.md index f5858a9..682e736 100644 --- a/docs/backup.md +++ b/docs/backup.md @@ -48,6 +48,18 @@ Decrypt every shard and verify hashes and row counts: gog backup verify ``` +Decrypt one shard to stdout: + +```bash +gog backup cat data/gmail//labels.jsonl.gz.age --pretty +``` + +Write an unencrypted local copy for easy reading on the Mac: + +```bash +gog backup export --out ~/Documents/gog-backup-export +``` + Use `--no-push` on `init` or `push` to commit locally without pushing to the remote. @@ -75,6 +87,23 @@ counts, encrypted byte sizes, and plaintext hashes used for verification. It does not contain email subjects, senders, recipients, bodies, raw message IDs, or labels. +Plaintext export directory: + +```text +README.md +manifest.json +gmail//labels.json +gmail//messages/index.jsonl +gmail//messages/YYYY/MM/-.eml +raw//... +``` + +`gog backup export` decrypts and verifies the manifest-backed shards before +writing files. Gmail messages become `.eml` files that open in Mail and other +mail clients. The export is not encrypted; do not place it inside the backup +Git repository, and keep it out of synced/shared folders unless that is +intentional. + ## Encryption Backups use the Go `filippo.io/age` library with X25519 age identities. There @@ -98,6 +127,8 @@ For each shard, `gog backup push`: `gog backup verify` decrypts each shard with the local age identity, gunzips it, checks the plaintext SHA-256 hash from the manifest, and verifies row counts. +`gog backup cat` and `gog backup export` use the same verification path before +returning plaintext. ## Security Boundary diff --git a/docs/commands.generated.md b/docs/commands.generated.md index 28bbaff..0cdbd47 100644 --- a/docs/commands.generated.md +++ b/docs/commands.generated.md @@ -49,6 +49,8 @@ Generated from `gog schema --json`. - `gog auth tokens import ` - Import a refresh token file into keyring (contains secrets) - `gog auth tokens list` - List stored tokens (by key only) - `gog backup [flags]` - Encrypted Google account backups +- `gog backup cat [flags]` - Decrypt one backup shard to stdout +- `gog backup export [flags]` - Write a local plaintext export - `gog backup gmail ` - Gmail backup operations - `gog backup gmail push [flags]` - Export Gmail into encrypted backup shards - `gog backup init [flags]` - Initialize encrypted backup config and repository diff --git a/internal/backup/backup.go b/internal/backup/backup.go index f6eb50b..678d295 100644 --- a/internal/backup/backup.go +++ b/internal/backup/backup.go @@ -479,80 +479,3 @@ func removeStaleShards(repo string, shards []ShardEntry) error { } return nil } - -func writeBackupReadme(repo string) error { - path := filepath.Join(repo, "README.md") - if _, err := os.Stat(path); err == nil { - return nil - } - const body = `# backup-gog - -Encrypted Git backup for Google account data exported by gog. - -This repository is written by ` + "`gog backup push`" + `. It is safe to keep on -GitHub because service payloads are encrypted before Git sees them. - -## Layout - -` + "```text" + ` -README.md -manifest.json -data///... -` + "```" + ` - -` + "`manifest.json`" + ` is cleartext and contains format version, export time, -public age recipients, service names, account hashes, shard paths, row counts, -encrypted byte sizes, and plaintext hashes used for verification. Email bodies, -subjects, senders, Drive filenames, contacts, event titles, and other private -Google data stay inside encrypted ` + "`*.jsonl.gz.age`" + ` shards. - -## Security Model - -Shard contents are deterministic JSONL, gzip-compressed with a fixed timestamp, -and encrypted with age for every configured public recipient. The local -` + "`~/.gog/age.key`" + ` identity is required to decrypt. - -Git can still see manifest metadata: export time, public recipients, service -names, account hashes, shard paths, encrypted byte sizes, plaintext shard -hashes, backup cadence, and which encrypted shards changed. Git cannot read -Google content without an age identity. - -Anyone who can push to this repository can replace encrypted backup data with -different data encrypted to your public recipient. Keep repository write access -restricted and review unexpected backup commits. If an age identity is -compromised, remove its public recipient and push a new backup; old Git history -may still contain shards decryptable by the compromised key. - -## Push - -` + "```bash" + ` -gog backup push --services gmail -` + "```" + ` - -The command pulls/rebases this checkout, exports selected Google services, -writes encrypted shards, updates the manifest, commits, and pushes this -repository. - -## Verify - -` + "```bash" + ` -gog backup verify -` + "```" + ` - -` + "`verify`" + ` decrypts every shard with the local age identity and verifies the -manifest hashes and row counts. It does not restore or write Google data. - -## Recovery - -Install gog, clone this repo to the path in ` + "`~/.gog/backup.json`" + `, -restore the local age identity file, then run: - -` + "```bash" + ` -gog backup verify -` + "```" + ` - -Do not commit the age identity. Only public ` + "`age1...`" + ` recipients belong in -config; ` + "`AGE-SECRET-KEY-...`" + ` values must stay local or in a password manager. -` - return os.WriteFile(path, []byte(body), 0o600) -} diff --git a/internal/backup/backup_test.go b/internal/backup/backup_test.go index 1449e40..999211e 100644 --- a/internal/backup/backup_test.go +++ b/internal/backup/backup_test.go @@ -58,6 +58,53 @@ func TestPushSnapshotAndVerify(t *testing.T) { } } +func TestCatAndDecryptSnapshotVerifyPlaintext(t *testing.T) { + ctx, repo, config, _ := initTestBackup(t) + shardPath := "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age" + pushSingleShard(t, ctx, config, mustGmailMessageShard(t, shardPath, []map[string]string{{ + "id": "m1", + "raw": "plain marker", + }})) + + cat, err := Cat(ctx, Options{ConfigPath: config}, shardPath) + if err != nil { + t.Fatalf("Cat: %v", err) + } + if cat.Path != shardPath || cat.Service != "gmail" || cat.Kind != "messages" || !strings.Contains(string(cat.Plaintext), "plain marker") { + t.Fatalf("unexpected cat shard: %+v plaintext=%q", cat, cat.Plaintext) + } + + absPath := filepath.Join(repo, filepath.FromSlash(shardPath)) + catAbs, err := Cat(ctx, Options{ConfigPath: config}, absPath) + if err != nil { + t.Fatalf("Cat absolute: %v", err) + } + if string(catAbs.Plaintext) != string(cat.Plaintext) { + t.Fatalf("absolute Cat plaintext mismatch") + } + + manifest, gotRepo, shards, err := DecryptSnapshot(ctx, Options{ConfigPath: config}) + if err != nil { + t.Fatalf("DecryptSnapshot: %v", err) + } + if gotRepo != repo || len(manifest.Shards) != 1 || len(shards) != 1 || string(shards[0].Plaintext) != string(cat.Plaintext) { + t.Fatalf("unexpected decrypt snapshot repo=%s manifest=%+v shards=%+v", gotRepo, manifest, shards) + } +} + +func TestCatRejectsShardOutsideManifest(t *testing.T) { + ctx, _, config, _ := initTestBackup(t) + pushSingleShard(t, ctx, config, mustGmailMessageShard(t, "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []map[string]string{{"id": "m1"}})) + + for _, ref := range []string{"../data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", "data/gmail/acct/messages/2026/05/part-0001.jsonl.gz.age"} { + t.Run(ref, func(t *testing.T) { + if _, err := Cat(ctx, Options{ConfigPath: config}, ref); err == nil { + t.Fatal("expected Cat to reject missing or escaping shard") + } + }) + } +} + func TestIdentityAndConfigArePrivate(t *testing.T) { _, _, config, identity := initTestBackup(t) diff --git a/internal/backup/read.go b/internal/backup/read.go new file mode 100644 index 0000000..b1a0f3a --- /dev/null +++ b/internal/backup/read.go @@ -0,0 +1,125 @@ +//nolint:err113,wrapcheck,wsl_v5 // Contextual errors keep backup call sites readable. +package backup + +import ( + "context" + "fmt" + "path" + "path/filepath" + "strings" +) + +func Cat(ctx context.Context, opts Options, shardPath string) (PlainShard, error) { + cfg, err := ResolveOptions(opts) + if err != nil { + return PlainShard{}, err + } + if repoErr := ensureRepo(ctx, cfg); repoErr != nil { + return PlainShard{}, repoErr + } + manifest, err := readManifest(cfg.Repo) + if err != nil { + return PlainShard{}, err + } + if manifest.Format != formatVersion { + return PlainShard{}, fmt.Errorf("unsupported backup format %d", manifest.Format) + } + shard, err := findManifestShard(manifest, cfg.Repo, shardPath) + if err != nil { + return PlainShard{}, err + } + return decryptManifestShard(cfg, shard) +} + +func DecryptSnapshot(ctx context.Context, opts Options) (Manifest, string, []PlainShard, error) { + cfg, err := ResolveOptions(opts) + if err != nil { + return Manifest{}, "", nil, err + } + if repoErr := ensureRepo(ctx, cfg); repoErr != nil { + return Manifest{}, "", nil, repoErr + } + manifest, err := readManifest(cfg.Repo) + if err != nil { + return Manifest{}, "", nil, err + } + if manifest.Format != formatVersion { + return Manifest{}, "", nil, fmt.Errorf("unsupported backup format %d", manifest.Format) + } + shards := make([]PlainShard, 0, len(manifest.Shards)) + for _, shard := range manifest.Shards { + select { + case <-ctx.Done(): + return Manifest{}, "", nil, ctx.Err() + default: + } + plain, err := decryptManifestShard(cfg, shard) + if err != nil { + return Manifest{}, "", nil, err + } + shards = append(shards, plain) + } + return manifest, cfg.Repo, shards, nil +} + +func decryptManifestShard(cfg Config, shard ShardEntry) (PlainShard, error) { + plaintext, err := decryptShardFile(cfg, shard) + if err != nil { + return PlainShard{}, err + } + if err := verifyPlainShard(shard, plaintext); err != nil { + return PlainShard{}, err + } + return PlainShard{ + Service: shard.Service, + Kind: shard.Kind, + Account: shard.Account, + Path: shard.Path, + Rows: shard.Rows, + Plaintext: plaintext, + }, nil +} + +func verifyPlainShard(shard ShardEntry, plaintext []byte) error { + if got := sha256Hex(plaintext); got != shard.SHA256 { + return fmt.Errorf("backup shard hash mismatch for %s", shard.Path) + } + rows, err := countJSONLLines(plaintext) + if err != nil { + return fmt.Errorf("count rows in %s: %w", shard.Path, err) + } + if rows != shard.Rows { + return fmt.Errorf("backup shard row count mismatch for %s: got %d, want %d", shard.Path, rows, shard.Rows) + } + return nil +} + +func findManifestShard(manifest Manifest, repo, shardPath string) (ShardEntry, error) { + ref, err := normalizeShardRef(repo, shardPath) + if err != nil { + return ShardEntry{}, err + } + if shard, ok := manifest.entry(ref); ok { + return shard, nil + } + return ShardEntry{}, fmt.Errorf("backup shard not found in manifest: %s", shardPath) +} + +func normalizeShardRef(repo, ref string) (string, error) { + ref = strings.TrimSpace(ref) + if ref == "" { + return "", fmt.Errorf("backup shard path is required") + } + if filepath.IsAbs(ref) { + rel, err := filepath.Rel(repo, ref) + if err != nil { + return "", err + } + ref = rel + } + clean := path.Clean(filepath.ToSlash(ref)) + if clean == "." || clean == ".." || strings.HasPrefix(clean, "../") || path.IsAbs(clean) { + return "", fmt.Errorf("backup shard path escapes backup root: %s", ref) + } + return clean, nil +} diff --git a/internal/backup/readme.go b/internal/backup/readme.go new file mode 100644 index 0000000..e1295c8 --- /dev/null +++ b/internal/backup/readme.go @@ -0,0 +1,89 @@ +package backup + +import ( + "fmt" + "os" + "path/filepath" +) + +func writeBackupReadme(repo string) error { + path := filepath.Join(repo, "README.md") + if _, err := os.Stat(path); err == nil { + return nil + } + + const body = `# backup-gog + +Encrypted Git backup for Google account data exported by gog. + +This repository is written by ` + "`gog backup push`" + `. It is safe to keep on +GitHub because service payloads are encrypted before Git sees them. + +## Layout + +` + "```text" + ` +README.md +manifest.json +data///... +` + "```" + ` + +` + "`manifest.json`" + ` is cleartext and contains format version, export time, +public age recipients, service names, account hashes, shard paths, row counts, +encrypted byte sizes, and plaintext hashes used for verification. Email bodies, +subjects, senders, Drive filenames, contacts, event titles, and other private +Google data stay inside encrypted ` + "`*.jsonl.gz.age`" + ` shards. + +## Security Model + +Shard contents are deterministic JSONL, gzip-compressed with a fixed timestamp, +and encrypted with age for every configured public recipient. The local +` + "`~/.gog/age.key`" + ` identity is required to decrypt. + +Git can still see manifest metadata: export time, public recipients, service +names, account hashes, shard paths, encrypted byte sizes, plaintext shard +hashes, backup cadence, and which encrypted shards changed. Git cannot read +Google content without an age identity. + +Anyone who can push to this repository can replace encrypted backup data with +different data encrypted to your public recipient. Keep repository write access +restricted and review unexpected backup commits. If an age identity is +compromised, remove its public recipient and push a new backup; old Git history +may still contain shards decryptable by the compromised key. + +## Push + +` + "```bash" + ` +gog backup push --services gmail +` + "```" + ` + +The command pulls/rebases this checkout, exports selected Google services, +writes encrypted shards, updates the manifest, commits, and pushes this +repository. + +## Verify + +` + "```bash" + ` +gog backup verify +` + "```" + ` + +` + "`verify`" + ` decrypts every shard with the local age identity and verifies the +manifest hashes and row counts. It does not restore or write Google data. + +## Recovery + +Install gog, clone this repo to the path in ` + "`~/.gog/backup.json`" + `, +restore the local age identity file, then run: + +` + "```bash" + ` +gog backup verify +` + "```" + ` + +Do not commit the age identity. Only public ` + "`age1...`" + ` recipients belong in +config; ` + "`AGE-SECRET-KEY-...`" + ` values must stay local or in a password manager. +` + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + return fmt.Errorf("write backup readme: %w", err) + } + + return nil +} diff --git a/internal/cmd/backup.go b/internal/cmd/backup.go index fa7b0ff..7c49557 100644 --- a/internal/cmd/backup.go +++ b/internal/cmd/backup.go @@ -24,6 +24,8 @@ type BackupCmd struct { Push BackupPushCmd `cmd:"" name:"push" help:"Export services into encrypted backup shards"` Status BackupStatusCmd `cmd:"" name:"status" help:"Inspect backup manifest without decrypting shards"` Verify BackupVerifyCmd `cmd:"" name:"verify" help:"Decrypt and verify all backup shards"` + Cat BackupCatCmd `cmd:"" name:"cat" help:"Decrypt one backup shard to stdout"` + Export BackupExportCmd `cmd:"" name:"export" help:"Write a local plaintext export"` Gmail BackupGmailCmd `cmd:"" name:"gmail" help:"Gmail backup operations"` } @@ -31,6 +33,8 @@ type BackupGmailCmd struct { Push BackupGmailPushCmd `cmd:"" name:"push" help:"Export Gmail into encrypted backup shards"` } +const backupServiceGmail = "gmail" + type backupFlags struct { Config string `name:"config" help:"Backup config path" default:""` Repo string `name:"repo" help:"Local backup repository path"` @@ -51,6 +55,23 @@ func (f backupFlags) options() backup.Options { } } +type backupReadFlags struct { + Config string `name:"config" help:"Backup config path" default:""` + Repo string `name:"repo" help:"Local backup repository path"` + Remote string `name:"remote" help:"Backup Git remote URL"` + Identity string `name:"identity" help:"Local age identity path"` +} + +func (f backupReadFlags) options() backup.Options { + return backup.Options{ + ConfigPath: f.Config, + Repo: f.Repo, + Remote: f.Remote, + Identity: f.Identity, + Push: false, + } +} + type BackupInitCmd struct { backupFlags } @@ -93,7 +114,7 @@ func (c *BackupPushCmd) Run(ctx context.Context, flags *RootFlags) error { var snapshots []backup.Snapshot for _, service := range services { switch strings.ToLower(strings.TrimSpace(service)) { - case "gmail": + case backupServiceGmail: snapshot, err := buildGmailBackupSnapshot(ctx, flags, gmailBackupOptions{ Query: c.Query, Max: c.Max, @@ -233,7 +254,7 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB return backup.Snapshot{}, err } shards := make([]backup.PlainShard, 0, 1) - labelShard, err := backup.NewJSONLShard("gmail", "labels", accountHash, fmt.Sprintf("data/gmail/%s/labels.jsonl.gz.age", accountHash), labels) + labelShard, err := backup.NewJSONLShard(backupServiceGmail, "labels", accountHash, fmt.Sprintf("data/gmail/%s/labels.jsonl.gz.age", accountHash), labels) if err != nil { return backup.Snapshot{}, err } @@ -244,7 +265,7 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB } shards = append(shards, messageShards...) return backup.Snapshot{ - Services: []string{"gmail"}, + Services: []string{backupServiceGmail}, Accounts: []string{accountHash}, Counts: map[string]int{ "gmail.labels": len(labels), @@ -422,7 +443,7 @@ func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage, end = len(values) } rel := fmt.Sprintf("data/gmail/%s/messages/%s/part-%04d.jsonl.gz.age", accountHash, key, part) - shard, err := backup.NewJSONLShard("gmail", "messages", accountHash, rel, values[start:end]) + shard, err := backup.NewJSONLShard(backupServiceGmail, "messages", accountHash, rel, values[start:end]) if err != nil { return nil, err } diff --git a/internal/cmd/backup_export.go b/internal/cmd/backup_export.go new file mode 100644 index 0000000..82a2a91 --- /dev/null +++ b/internal/cmd/backup_export.go @@ -0,0 +1,394 @@ +package cmd + +import ( + "bufio" + "bytes" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/steipete/gogcli/internal/backup" + "github.com/steipete/gogcli/internal/outfmt" + "github.com/steipete/gogcli/internal/ui" +) + +type BackupCatCmd struct { + backupReadFlags + Shard string `arg:"" name:"shard" help:"Manifest shard path, or absolute path under the backup repo"` + Pretty bool `name:"pretty" help:"Pretty-print each JSONL row"` + Out string `name:"out" help:"Write decrypted JSONL to this file instead of stdout"` +} + +func (c *BackupCatCmd) Run(ctx context.Context) error { + shard, err := backup.Cat(ctx, c.options(), c.Shard) + if err != nil { + return err + } + data := shard.Plaintext + if c.Pretty { + data, err = prettyJSONL(data) + if err != nil { + return fmt.Errorf("pretty-print shard: %w", err) + } + } + if strings.TrimSpace(c.Out) != "" { + out, expandErr := expandUserPath(c.Out) + if expandErr != nil { + return expandErr + } + if mkdirErr := os.MkdirAll(filepath.Dir(out), 0o700); mkdirErr != nil { + return mkdirErr + } + return os.WriteFile(out, data, 0o600) + } + _, err = os.Stdout.Write(data) + return err +} + +type BackupExportCmd struct { + backupReadFlags + Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"` +} + +type backupExportResult struct { + Out string `json:"out"` + Repo string `json:"repo"` + ManifestExport time.Time `json:"manifestExported"` + Files int `json:"files"` + Counts map[string]int `json:"counts"` +} + +type gmailExportIndexEntry struct { + ID string `json:"id"` + ThreadID string `json:"threadId,omitempty"` + HistoryID string `json:"historyId,omitempty"` + InternalDate int64 `json:"internalDate,omitempty"` + LabelIDs []string `json:"labelIds,omitempty"` + SizeEstimate int64 `json:"sizeEstimate,omitempty"` + EML string `json:"eml"` +} + +func (c *BackupExportCmd) Run(ctx context.Context) error { + outDir, err := expandUserPath(c.Out) + if err != nil { + return err + } + manifest, repo, shards, err := backup.DecryptSnapshot(ctx, c.options()) + if err != nil { + return err + } + if exportErr := ensureExportOutsideRepo(outDir, repo); exportErr != nil { + return exportErr + } + result := backupExportResult{ + Out: outDir, + Repo: repo, + ManifestExport: manifest.Exported, + Counts: map[string]int{}, + } + if mkdirErr := os.MkdirAll(outDir, 0o700); mkdirErr != nil { + return mkdirErr + } + if readmeErr := writeBackupExportReadme(outDir); readmeErr != nil { + return readmeErr + } + if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil { + return manifestErr + } + if resetErr := resetGmailExportIndexes(outDir, shards); resetErr != nil { + return resetErr + } + for _, shard := range shards { + _, count, shardErr := exportPlainShard(outDir, shard) + if shardErr != nil { + return shardErr + } + key := shard.Service + if strings.TrimSpace(shard.Kind) != "" { + key += "." + shard.Kind + } + result.Counts[key] += count + } + files, err := countExportFiles(outDir) + if err != nil { + return err + } + result.Files = files + if outfmt.IsJSON(ctx) { + return outfmt.WriteJSON(ctx, os.Stdout, result) + } + u := ui.FromContext(ctx) + u.Out().Printf("out\t%s", result.Out) + u.Out().Printf("repo\t%s", result.Repo) + u.Out().Printf("files\t%d", result.Files) + keys := make([]string, 0, len(result.Counts)) + for key := range result.Counts { + keys = append(keys, key) + } + sort.Strings(keys) + for _, key := range keys { + u.Out().Printf("count.%s\t%d", key, result.Counts[key]) + } + return nil +} + +func prettyJSONL(data []byte) ([]byte, error) { + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) + var out bytes.Buffer + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + var pretty bytes.Buffer + if err := json.Indent(&pretty, line, "", " "); err != nil { + return nil, err + } + if _, err := pretty.WriteTo(&out); err != nil { + return nil, err + } + if err := out.WriteByte('\n'); err != nil { + return nil, err + } + } + return out.Bytes(), scanner.Err() +} + +func expandUserPath(path string) (string, error) { + path = strings.TrimSpace(path) + if path == "" { + path = "~/Documents/gog-backup-export" + } + if path == "~" || strings.HasPrefix(path, "~/") { + home, err := os.UserHomeDir() + if err != nil { + return "", err + } + if path == "~" { + path = home + } else { + path = filepath.Join(home, path[2:]) + } + } + abs, err := filepath.Abs(path) + if err != nil { + return "", err + } + return filepath.Clean(abs), nil +} + +func ensureExportOutsideRepo(outDir, repo string) error { + outAbs, err := filepath.Abs(outDir) + if err != nil { + return err + } + repoAbs, err := filepath.Abs(repo) + if err != nil { + return err + } + outDir = filepath.Clean(outAbs) + repo = filepath.Clean(repoAbs) + rel, err := filepath.Rel(repo, outDir) + if err != nil { + return err + } + if rel == "." || (!strings.HasPrefix(rel, ".."+string(filepath.Separator)) && rel != ".." && !filepath.IsAbs(rel)) { + return fmt.Errorf("plaintext export directory must be outside backup repo: %s", outDir) + } + return nil +} + +func resetGmailExportIndexes(outDir string, shards []backup.PlainShard) error { + seen := map[string]struct{}{} + for _, shard := range shards { + if shard.Service != backupServiceGmail || shard.Kind != "messages" { + continue + } + index := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages", "index.jsonl") + if _, ok := seen[index]; ok { + continue + } + seen[index] = struct{}{} + if err := os.Remove(index); err != nil && !os.IsNotExist(err) { + return err + } + } + return nil +} + +func writeBackupExportReadme(outDir string) error { + const body = "# gog backup plaintext export\n" + + "\n" + + "This directory is an unencrypted local copy created by `gog backup export`.\n" + + "Keep it out of Git, shared folders, and cloud sync unless that is intentional.\n" + + "\n" + + "Gmail messages are written as `.eml` files that can be opened by Mail and many\n" + + "mail clients. `gmail//messages/index.jsonl` maps backup message IDs\n" + + "to the exported `.eml` files. Labels are written as pretty JSON.\n" + return os.WriteFile(filepath.Join(outDir, "README.md"), []byte(body), 0o600) +} + +func writeJSONFile(path string, value any) error { + data, err := json.MarshalIndent(value, "", " ") + if err != nil { + return err + } + data = append(data, '\n') + if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil { + return err + } + return os.WriteFile(path, data, 0o600) +} + +func exportPlainShard(outDir string, shard backup.PlainShard) (int, int, error) { + switch { + case shard.Service == backupServiceGmail && shard.Kind == "labels": + return exportGmailLabels(outDir, shard) + case shard.Service == backupServiceGmail && shard.Kind == "messages": + return exportGmailMessages(outDir, shard) + default: + return exportRawShard(outDir, shard) + } +} + +func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) { + var labels []gmailBackupLabel + if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil { + return 0, 0, err + } + path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json") + if err := writeJSONFile(path, labels); err != nil { + return 0, 0, err + } + return 1, len(labels), nil +} + +func exportGmailMessages(outDir string, shard backup.PlainShard) (int, int, error) { + var messages []gmailBackupMessage + if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil { + return 0, 0, err + } + account := sanitizeFilePart(shard.Account) + indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl") + if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil { + return 0, 0, err + } + indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account. + if err != nil { + return 0, 0, err + } + defer indexFile.Close() + enc := json.NewEncoder(indexFile) + enc.SetEscapeHTML(false) + files := 0 + for _, message := range messages { + mime, err := decodeGmailRaw(message.Raw) + if err != nil { + return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err) + } + rel := backupExportMessagePath(account, message) + path := filepath.Join(outDir, filepath.FromSlash(rel)) + if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil { + return files, 0, err + } + if err := os.WriteFile(path, mime, 0o600); err != nil { + return files, 0, err + } + files++ + if err := enc.Encode(gmailExportIndexEntry{ + ID: message.ID, + ThreadID: message.ThreadID, + HistoryID: message.HistoryID, + InternalDate: message.InternalDate, + LabelIDs: message.LabelIDs, + SizeEstimate: message.SizeEstimate, + EML: rel, + }); err != nil { + return files, 0, err + } + } + return files + 1, len(messages), nil +} + +func exportRawShard(outDir string, shard backup.PlainShard) (int, int, error) { + rel := strings.TrimSuffix(shard.Path, ".gz.age") + path := filepath.Join(outDir, "raw", filepath.FromSlash(rel)) + if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil { + return 0, 0, err + } + if err := os.WriteFile(path, shard.Plaintext, 0o600); err != nil { + return 0, 0, err + } + return 1, shard.Rows, nil +} + +func countExportFiles(outDir string) (int, error) { + count := 0 + err := filepath.WalkDir(outDir, func(_ string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if d != nil && !d.IsDir() { + count++ + } + return nil + }) + return count, err +} + +func decodeGmailRaw(raw string) ([]byte, error) { + raw = strings.TrimSpace(raw) + if raw == "" { + return nil, fmt.Errorf("empty raw payload") + } + if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil { + return data, nil + } + return base64.URLEncoding.DecodeString(raw) +} + +func backupExportMessagePath(account string, message gmailBackupMessage) string { + timestamp := trackingUnknown + yearMonth := trackingUnknown + if message.InternalDate > 0 { + t := time.UnixMilli(message.InternalDate).UTC() + timestamp = t.Format("20060102T150405Z") + yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month()))) + } + name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml" + return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name)) +} + +func sanitizeFilePart(value string) string { + value = strings.TrimSpace(value) + if value == "" { + return trackingUnknown + } + var b strings.Builder + for _, r := range value { + switch { + case r >= 'a' && r <= 'z': + b.WriteRune(r) + case r >= 'A' && r <= 'Z': + b.WriteRune(r) + case r >= '0' && r <= '9': + b.WriteRune(r) + case r == '.', r == '-', r == '_': + b.WriteRune(r) + default: + b.WriteByte('_') + } + } + out := strings.Trim(b.String(), "._-") + if out == "" { + return trackingUnknown + } + return out +} diff --git a/internal/cmd/backup_test.go b/internal/cmd/backup_test.go index 9a600a0..09ba7de 100644 --- a/internal/cmd/backup_test.go +++ b/internal/cmd/backup_test.go @@ -1,6 +1,9 @@ package cmd import ( + "encoding/base64" + "os" + "path/filepath" "strings" "testing" "time" @@ -88,6 +91,77 @@ func TestMergeBackupSnapshotsKeepsCountsAndShardOrder(t *testing.T) { } } +func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) { + payload := []byte("Subject: Hello\r\n\r\nBody") + raw := base64.RawURLEncoding.EncodeToString(payload) + got, err := decodeGmailRaw(raw) + if err != nil { + t.Fatalf("decodeGmailRaw raw: %v", err) + } + if string(got) != string(payload) { + t.Fatalf("raw decoded = %q, want %q", got, payload) + } + + padded := base64.URLEncoding.EncodeToString(payload) + got, err = decodeGmailRaw(padded) + if err != nil { + t.Fatalf("decodeGmailRaw padded: %v", err) + } + if string(got) != string(payload) { + t.Fatalf("padded decoded = %q, want %q", got, payload) + } +} + +func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) { + outDir := t.TempDir() + payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody") + message := gmailBackupMessage{ + ID: "msg/one", + ThreadID: "thread-1", + InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"), + LabelIDs: []string{"INBOX"}, + Raw: base64.RawURLEncoding.EncodeToString(payload), + } + shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message}) + if err != nil { + t.Fatalf("NewJSONLShard: %v", err) + } + + files, count, err := exportGmailMessages(outDir, shard) + if err != nil { + t.Fatalf("exportGmailMessages: %v", err) + } + if files != 2 || count != 1 { + t.Fatalf("files,count = %d,%d want 2,1", files, count) + } + + emlRel := backupExportMessagePath("acct_hash", message) + eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel))) + if err != nil { + t.Fatalf("read eml: %v", err) + } + if string(eml) != string(payload) { + t.Fatalf("eml = %q, want %q", eml, payload) + } + index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl")) + if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) { + t.Fatalf("index missing expected fields: %s", index) + } +} + +func TestEnsureExportOutsideRepoRejectsNestedPlaintext(t *testing.T) { + repo := filepath.Join(t.TempDir(), "repo") + if err := os.MkdirAll(filepath.Join(repo, "data"), 0o700); err != nil { + t.Fatalf("mkdir repo: %v", err) + } + if err := ensureExportOutsideRepo(filepath.Join(repo, "plaintext"), repo); err == nil { + t.Fatal("expected nested export dir to be rejected") + } + if err := ensureExportOutsideRepo(filepath.Join(filepath.Dir(repo), "export"), repo); err != nil { + t.Fatalf("outside export rejected: %v", err) + } +} + func mustUnixMilli(t *testing.T, value string) int64 { t.Helper() parsed, err := time.Parse(time.RFC3339, value) @@ -96,3 +170,12 @@ func mustUnixMilli(t *testing.T, value string) int64 { } return parsed.UnixMilli() } + +func readText(t *testing.T, path string) string { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + return string(data) +}