feat(backup): add plaintext read and export commands

This commit is contained in:
Peter Steinberger 2026-04-27 10:15:26 +01:00
parent 922ca38b3b
commit ee9b552dcd
No known key found for this signature in database
11 changed files with 810 additions and 82 deletions

View File

@ -3,7 +3,12 @@
## 0.14.0 - Unreleased
### Added
- Backup: add `gog backup` with age-encrypted Git shards, Gmail labels/raw message export, manifest status, full decrypt-and-verify, docs, and security-focused regression coverage.
- Backup: add `gog backup` with age-encrypted Git shards, Gmail labels/raw message export, manifest status, full decrypt-and-verify, shard `cat`, local plaintext export, docs, and security-focused regression coverage.
### Fixed
- Drive: include `driveId` in `drive ls`, `drive search`, and `drive get` field masks so Shared Drive files can be identified in JSON output. (#524) — thanks @LeanSheng.
- Gmail: expose reply threading headers in default `gmail get --format metadata` output and fail explicit reply targets that cannot provide a `Message-ID`. (#528, #512) — thanks @solomonneas.
- Docs: include available tab names when `docs cat --tab` / structure lookup cannot find the requested tab. (#532) — thanks @johnbenjaminlewis.
## 0.13.0 - 2026-04-20

View File

@ -728,6 +728,8 @@ gog backup init --repo ~/Projects/backup-gog --remote https://github.com/steipet
gog backup push --services gmail --account you@gmail.com
gog backup status
gog backup verify
gog backup cat data/gmail/<account-hash>/labels.jsonl.gz.age --pretty
gog backup export --out ~/Documents/gog-backup-export
```
For a bounded first run:
@ -741,6 +743,12 @@ private age identity locally at `~/.gog/age.key`; GitHub only receives public
`age1...` recipients, `manifest.json`, and encrypted `*.jsonl.gz.age` payloads.
The private `AGE-SECRET-KEY-...` value must stay local or in a password manager.
Use `gog backup cat` to decrypt one shard as JSONL, or `gog backup export` to
write a local plaintext copy. The export writes Gmail messages as `.eml` files,
plus `gmail/<account-hash>/messages/index.jsonl` and pretty `labels.json`.
That export is intentionally unencrypted; keep it out of Git, shared folders,
and cloud sync unless that is intentional.
`manifest.json` is intentionally cleartext for cheap status and verification.
It exposes metadata: export time, service names, account hashes, shard paths,
row counts, encrypted byte sizes, plaintext verification hashes, backup cadence,

View File

@ -48,6 +48,18 @@ Decrypt every shard and verify hashes and row counts:
gog backup verify
```
Decrypt one shard to stdout:
```bash
gog backup cat data/gmail/<account-hash>/labels.jsonl.gz.age --pretty
```
Write an unencrypted local copy for easy reading on the Mac:
```bash
gog backup export --out ~/Documents/gog-backup-export
```
Use `--no-push` on `init` or `push` to commit locally without pushing to the
remote.
@ -75,6 +87,23 @@ counts, encrypted byte sizes, and plaintext hashes used for verification. It
does not contain email subjects, senders, recipients, bodies, raw message IDs,
or labels.
Plaintext export directory:
```text
README.md
manifest.json
gmail/<account-hash>/labels.json
gmail/<account-hash>/messages/index.jsonl
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<message-id>.eml
raw/<service>/...
```
`gog backup export` decrypts and verifies the manifest-backed shards before
writing files. Gmail messages become `.eml` files that open in Mail and other
mail clients. The export is not encrypted; do not place it inside the backup
Git repository, and keep it out of synced/shared folders unless that is
intentional.
## Encryption
Backups use the Go `filippo.io/age` library with X25519 age identities. There
@ -98,6 +127,8 @@ For each shard, `gog backup push`:
`gog backup verify` decrypts each shard with the local age identity, gunzips it,
checks the plaintext SHA-256 hash from the manifest, and verifies row counts.
`gog backup cat` and `gog backup export` use the same verification path before
returning plaintext.
## Security Boundary

View File

@ -49,6 +49,8 @@ Generated from `gog schema --json`.
- `gog auth tokens import <inPath>` - Import a refresh token file into keyring (contains secrets)
- `gog auth tokens list` - List stored tokens (by key only)
- `gog backup <command> [flags]` - Encrypted Google account backups
- `gog backup cat <shard> [flags]` - Decrypt one backup shard to stdout
- `gog backup export [flags]` - Write a local plaintext export
- `gog backup gmail <command>` - Gmail backup operations
- `gog backup gmail push [flags]` - Export Gmail into encrypted backup shards
- `gog backup init [flags]` - Initialize encrypted backup config and repository

View File

@ -479,80 +479,3 @@ func removeStaleShards(repo string, shards []ShardEntry) error {
}
return nil
}
func writeBackupReadme(repo string) error {
path := filepath.Join(repo, "README.md")
if _, err := os.Stat(path); err == nil {
return nil
}
const body = `# backup-gog
Encrypted Git backup for Google account data exported by gog.
This repository is written by ` + "`gog backup push`" + `. It is safe to keep on
GitHub because service payloads are encrypted before Git sees them.
## Layout
` + "```text" + `
README.md
manifest.json
data/<service>/<account-hash>/...
` + "```" + `
` + "`manifest.json`" + ` is cleartext and contains format version, export time,
public age recipients, service names, account hashes, shard paths, row counts,
encrypted byte sizes, and plaintext hashes used for verification. Email bodies,
subjects, senders, Drive filenames, contacts, event titles, and other private
Google data stay inside encrypted ` + "`*.jsonl.gz.age`" + ` shards.
## Security Model
Shard contents are deterministic JSONL, gzip-compressed with a fixed timestamp,
and encrypted with age for every configured public recipient. The local
` + "`~/.gog/age.key`" + ` identity is required to decrypt.
Git can still see manifest metadata: export time, public recipients, service
names, account hashes, shard paths, encrypted byte sizes, plaintext shard
hashes, backup cadence, and which encrypted shards changed. Git cannot read
Google content without an age identity.
Anyone who can push to this repository can replace encrypted backup data with
different data encrypted to your public recipient. Keep repository write access
restricted and review unexpected backup commits. If an age identity is
compromised, remove its public recipient and push a new backup; old Git history
may still contain shards decryptable by the compromised key.
## Push
` + "```bash" + `
gog backup push --services gmail
` + "```" + `
The command pulls/rebases this checkout, exports selected Google services,
writes encrypted shards, updates the manifest, commits, and pushes this
repository.
## Verify
` + "```bash" + `
gog backup verify
` + "```" + `
` + "`verify`" + ` decrypts every shard with the local age identity and verifies the
manifest hashes and row counts. It does not restore or write Google data.
## Recovery
Install gog, clone this repo to the path in ` + "`~/.gog/backup.json`" + `,
restore the local age identity file, then run:
` + "```bash" + `
gog backup verify
` + "```" + `
Do not commit the age identity. Only public ` + "`age1...`" + ` recipients belong in
config; ` + "`AGE-SECRET-KEY-...`" + ` values must stay local or in a password manager.
`
return os.WriteFile(path, []byte(body), 0o600)
}

View File

@ -58,6 +58,53 @@ func TestPushSnapshotAndVerify(t *testing.T) {
}
}
func TestCatAndDecryptSnapshotVerifyPlaintext(t *testing.T) {
ctx, repo, config, _ := initTestBackup(t)
shardPath := "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age"
pushSingleShard(t, ctx, config, mustGmailMessageShard(t, shardPath, []map[string]string{{
"id": "m1",
"raw": "plain marker",
}}))
cat, err := Cat(ctx, Options{ConfigPath: config}, shardPath)
if err != nil {
t.Fatalf("Cat: %v", err)
}
if cat.Path != shardPath || cat.Service != "gmail" || cat.Kind != "messages" || !strings.Contains(string(cat.Plaintext), "plain marker") {
t.Fatalf("unexpected cat shard: %+v plaintext=%q", cat, cat.Plaintext)
}
absPath := filepath.Join(repo, filepath.FromSlash(shardPath))
catAbs, err := Cat(ctx, Options{ConfigPath: config}, absPath)
if err != nil {
t.Fatalf("Cat absolute: %v", err)
}
if string(catAbs.Plaintext) != string(cat.Plaintext) {
t.Fatalf("absolute Cat plaintext mismatch")
}
manifest, gotRepo, shards, err := DecryptSnapshot(ctx, Options{ConfigPath: config})
if err != nil {
t.Fatalf("DecryptSnapshot: %v", err)
}
if gotRepo != repo || len(manifest.Shards) != 1 || len(shards) != 1 || string(shards[0].Plaintext) != string(cat.Plaintext) {
t.Fatalf("unexpected decrypt snapshot repo=%s manifest=%+v shards=%+v", gotRepo, manifest, shards)
}
}
func TestCatRejectsShardOutsideManifest(t *testing.T) {
ctx, _, config, _ := initTestBackup(t)
pushSingleShard(t, ctx, config, mustGmailMessageShard(t, "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []map[string]string{{"id": "m1"}}))
for _, ref := range []string{"../data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", "data/gmail/acct/messages/2026/05/part-0001.jsonl.gz.age"} {
t.Run(ref, func(t *testing.T) {
if _, err := Cat(ctx, Options{ConfigPath: config}, ref); err == nil {
t.Fatal("expected Cat to reject missing or escaping shard")
}
})
}
}
func TestIdentityAndConfigArePrivate(t *testing.T) {
_, _, config, identity := initTestBackup(t)

125
internal/backup/read.go Normal file
View File

@ -0,0 +1,125 @@
//nolint:err113,wrapcheck,wsl_v5 // Contextual errors keep backup call sites readable.
package backup
import (
"context"
"fmt"
"path"
"path/filepath"
"strings"
)
func Cat(ctx context.Context, opts Options, shardPath string) (PlainShard, error) {
cfg, err := ResolveOptions(opts)
if err != nil {
return PlainShard{}, err
}
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
return PlainShard{}, repoErr
}
manifest, err := readManifest(cfg.Repo)
if err != nil {
return PlainShard{}, err
}
if manifest.Format != formatVersion {
return PlainShard{}, fmt.Errorf("unsupported backup format %d", manifest.Format)
}
shard, err := findManifestShard(manifest, cfg.Repo, shardPath)
if err != nil {
return PlainShard{}, err
}
return decryptManifestShard(cfg, shard)
}
func DecryptSnapshot(ctx context.Context, opts Options) (Manifest, string, []PlainShard, error) {
cfg, err := ResolveOptions(opts)
if err != nil {
return Manifest{}, "", nil, err
}
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
return Manifest{}, "", nil, repoErr
}
manifest, err := readManifest(cfg.Repo)
if err != nil {
return Manifest{}, "", nil, err
}
if manifest.Format != formatVersion {
return Manifest{}, "", nil, fmt.Errorf("unsupported backup format %d", manifest.Format)
}
shards := make([]PlainShard, 0, len(manifest.Shards))
for _, shard := range manifest.Shards {
select {
case <-ctx.Done():
return Manifest{}, "", nil, ctx.Err()
default:
}
plain, err := decryptManifestShard(cfg, shard)
if err != nil {
return Manifest{}, "", nil, err
}
shards = append(shards, plain)
}
return manifest, cfg.Repo, shards, nil
}
func decryptManifestShard(cfg Config, shard ShardEntry) (PlainShard, error) {
plaintext, err := decryptShardFile(cfg, shard)
if err != nil {
return PlainShard{}, err
}
if err := verifyPlainShard(shard, plaintext); err != nil {
return PlainShard{}, err
}
return PlainShard{
Service: shard.Service,
Kind: shard.Kind,
Account: shard.Account,
Path: shard.Path,
Rows: shard.Rows,
Plaintext: plaintext,
}, nil
}
func verifyPlainShard(shard ShardEntry, plaintext []byte) error {
if got := sha256Hex(plaintext); got != shard.SHA256 {
return fmt.Errorf("backup shard hash mismatch for %s", shard.Path)
}
rows, err := countJSONLLines(plaintext)
if err != nil {
return fmt.Errorf("count rows in %s: %w", shard.Path, err)
}
if rows != shard.Rows {
return fmt.Errorf("backup shard row count mismatch for %s: got %d, want %d", shard.Path, rows, shard.Rows)
}
return nil
}
func findManifestShard(manifest Manifest, repo, shardPath string) (ShardEntry, error) {
ref, err := normalizeShardRef(repo, shardPath)
if err != nil {
return ShardEntry{}, err
}
if shard, ok := manifest.entry(ref); ok {
return shard, nil
}
return ShardEntry{}, fmt.Errorf("backup shard not found in manifest: %s", shardPath)
}
func normalizeShardRef(repo, ref string) (string, error) {
ref = strings.TrimSpace(ref)
if ref == "" {
return "", fmt.Errorf("backup shard path is required")
}
if filepath.IsAbs(ref) {
rel, err := filepath.Rel(repo, ref)
if err != nil {
return "", err
}
ref = rel
}
clean := path.Clean(filepath.ToSlash(ref))
if clean == "." || clean == ".." || strings.HasPrefix(clean, "../") || path.IsAbs(clean) {
return "", fmt.Errorf("backup shard path escapes backup root: %s", ref)
}
return clean, nil
}

89
internal/backup/readme.go Normal file
View File

@ -0,0 +1,89 @@
package backup
import (
"fmt"
"os"
"path/filepath"
)
func writeBackupReadme(repo string) error {
path := filepath.Join(repo, "README.md")
if _, err := os.Stat(path); err == nil {
return nil
}
const body = `# backup-gog
Encrypted Git backup for Google account data exported by gog.
This repository is written by ` + "`gog backup push`" + `. It is safe to keep on
GitHub because service payloads are encrypted before Git sees them.
## Layout
` + "```text" + `
README.md
manifest.json
data/<service>/<account-hash>/...
` + "```" + `
` + "`manifest.json`" + ` is cleartext and contains format version, export time,
public age recipients, service names, account hashes, shard paths, row counts,
encrypted byte sizes, and plaintext hashes used for verification. Email bodies,
subjects, senders, Drive filenames, contacts, event titles, and other private
Google data stay inside encrypted ` + "`*.jsonl.gz.age`" + ` shards.
## Security Model
Shard contents are deterministic JSONL, gzip-compressed with a fixed timestamp,
and encrypted with age for every configured public recipient. The local
` + "`~/.gog/age.key`" + ` identity is required to decrypt.
Git can still see manifest metadata: export time, public recipients, service
names, account hashes, shard paths, encrypted byte sizes, plaintext shard
hashes, backup cadence, and which encrypted shards changed. Git cannot read
Google content without an age identity.
Anyone who can push to this repository can replace encrypted backup data with
different data encrypted to your public recipient. Keep repository write access
restricted and review unexpected backup commits. If an age identity is
compromised, remove its public recipient and push a new backup; old Git history
may still contain shards decryptable by the compromised key.
## Push
` + "```bash" + `
gog backup push --services gmail
` + "```" + `
The command pulls/rebases this checkout, exports selected Google services,
writes encrypted shards, updates the manifest, commits, and pushes this
repository.
## Verify
` + "```bash" + `
gog backup verify
` + "```" + `
` + "`verify`" + ` decrypts every shard with the local age identity and verifies the
manifest hashes and row counts. It does not restore or write Google data.
## Recovery
Install gog, clone this repo to the path in ` + "`~/.gog/backup.json`" + `,
restore the local age identity file, then run:
` + "```bash" + `
gog backup verify
` + "```" + `
Do not commit the age identity. Only public ` + "`age1...`" + ` recipients belong in
config; ` + "`AGE-SECRET-KEY-...`" + ` values must stay local or in a password manager.
`
if err := os.WriteFile(path, []byte(body), 0o600); err != nil {
return fmt.Errorf("write backup readme: %w", err)
}
return nil
}

View File

@ -24,6 +24,8 @@ type BackupCmd struct {
Push BackupPushCmd `cmd:"" name:"push" help:"Export services into encrypted backup shards"`
Status BackupStatusCmd `cmd:"" name:"status" help:"Inspect backup manifest without decrypting shards"`
Verify BackupVerifyCmd `cmd:"" name:"verify" help:"Decrypt and verify all backup shards"`
Cat BackupCatCmd `cmd:"" name:"cat" help:"Decrypt one backup shard to stdout"`
Export BackupExportCmd `cmd:"" name:"export" help:"Write a local plaintext export"`
Gmail BackupGmailCmd `cmd:"" name:"gmail" help:"Gmail backup operations"`
}
@ -31,6 +33,8 @@ type BackupGmailCmd struct {
Push BackupGmailPushCmd `cmd:"" name:"push" help:"Export Gmail into encrypted backup shards"`
}
const backupServiceGmail = "gmail"
type backupFlags struct {
Config string `name:"config" help:"Backup config path" default:""`
Repo string `name:"repo" help:"Local backup repository path"`
@ -51,6 +55,23 @@ func (f backupFlags) options() backup.Options {
}
}
type backupReadFlags struct {
Config string `name:"config" help:"Backup config path" default:""`
Repo string `name:"repo" help:"Local backup repository path"`
Remote string `name:"remote" help:"Backup Git remote URL"`
Identity string `name:"identity" help:"Local age identity path"`
}
func (f backupReadFlags) options() backup.Options {
return backup.Options{
ConfigPath: f.Config,
Repo: f.Repo,
Remote: f.Remote,
Identity: f.Identity,
Push: false,
}
}
type BackupInitCmd struct {
backupFlags
}
@ -93,7 +114,7 @@ func (c *BackupPushCmd) Run(ctx context.Context, flags *RootFlags) error {
var snapshots []backup.Snapshot
for _, service := range services {
switch strings.ToLower(strings.TrimSpace(service)) {
case "gmail":
case backupServiceGmail:
snapshot, err := buildGmailBackupSnapshot(ctx, flags, gmailBackupOptions{
Query: c.Query,
Max: c.Max,
@ -233,7 +254,7 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB
return backup.Snapshot{}, err
}
shards := make([]backup.PlainShard, 0, 1)
labelShard, err := backup.NewJSONLShard("gmail", "labels", accountHash, fmt.Sprintf("data/gmail/%s/labels.jsonl.gz.age", accountHash), labels)
labelShard, err := backup.NewJSONLShard(backupServiceGmail, "labels", accountHash, fmt.Sprintf("data/gmail/%s/labels.jsonl.gz.age", accountHash), labels)
if err != nil {
return backup.Snapshot{}, err
}
@ -244,7 +265,7 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB
}
shards = append(shards, messageShards...)
return backup.Snapshot{
Services: []string{"gmail"},
Services: []string{backupServiceGmail},
Accounts: []string{accountHash},
Counts: map[string]int{
"gmail.labels": len(labels),
@ -422,7 +443,7 @@ func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage,
end = len(values)
}
rel := fmt.Sprintf("data/gmail/%s/messages/%s/part-%04d.jsonl.gz.age", accountHash, key, part)
shard, err := backup.NewJSONLShard("gmail", "messages", accountHash, rel, values[start:end])
shard, err := backup.NewJSONLShard(backupServiceGmail, "messages", accountHash, rel, values[start:end])
if err != nil {
return nil, err
}

View File

@ -0,0 +1,394 @@
package cmd
import (
"bufio"
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/steipete/gogcli/internal/backup"
"github.com/steipete/gogcli/internal/outfmt"
"github.com/steipete/gogcli/internal/ui"
)
type BackupCatCmd struct {
backupReadFlags
Shard string `arg:"" name:"shard" help:"Manifest shard path, or absolute path under the backup repo"`
Pretty bool `name:"pretty" help:"Pretty-print each JSONL row"`
Out string `name:"out" help:"Write decrypted JSONL to this file instead of stdout"`
}
func (c *BackupCatCmd) Run(ctx context.Context) error {
shard, err := backup.Cat(ctx, c.options(), c.Shard)
if err != nil {
return err
}
data := shard.Plaintext
if c.Pretty {
data, err = prettyJSONL(data)
if err != nil {
return fmt.Errorf("pretty-print shard: %w", err)
}
}
if strings.TrimSpace(c.Out) != "" {
out, expandErr := expandUserPath(c.Out)
if expandErr != nil {
return expandErr
}
if mkdirErr := os.MkdirAll(filepath.Dir(out), 0o700); mkdirErr != nil {
return mkdirErr
}
return os.WriteFile(out, data, 0o600)
}
_, err = os.Stdout.Write(data)
return err
}
type BackupExportCmd struct {
backupReadFlags
Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"`
}
type backupExportResult struct {
Out string `json:"out"`
Repo string `json:"repo"`
ManifestExport time.Time `json:"manifestExported"`
Files int `json:"files"`
Counts map[string]int `json:"counts"`
}
type gmailExportIndexEntry struct {
ID string `json:"id"`
ThreadID string `json:"threadId,omitempty"`
HistoryID string `json:"historyId,omitempty"`
InternalDate int64 `json:"internalDate,omitempty"`
LabelIDs []string `json:"labelIds,omitempty"`
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
EML string `json:"eml"`
}
func (c *BackupExportCmd) Run(ctx context.Context) error {
outDir, err := expandUserPath(c.Out)
if err != nil {
return err
}
manifest, repo, shards, err := backup.DecryptSnapshot(ctx, c.options())
if err != nil {
return err
}
if exportErr := ensureExportOutsideRepo(outDir, repo); exportErr != nil {
return exportErr
}
result := backupExportResult{
Out: outDir,
Repo: repo,
ManifestExport: manifest.Exported,
Counts: map[string]int{},
}
if mkdirErr := os.MkdirAll(outDir, 0o700); mkdirErr != nil {
return mkdirErr
}
if readmeErr := writeBackupExportReadme(outDir); readmeErr != nil {
return readmeErr
}
if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil {
return manifestErr
}
if resetErr := resetGmailExportIndexes(outDir, shards); resetErr != nil {
return resetErr
}
for _, shard := range shards {
_, count, shardErr := exportPlainShard(outDir, shard)
if shardErr != nil {
return shardErr
}
key := shard.Service
if strings.TrimSpace(shard.Kind) != "" {
key += "." + shard.Kind
}
result.Counts[key] += count
}
files, err := countExportFiles(outDir)
if err != nil {
return err
}
result.Files = files
if outfmt.IsJSON(ctx) {
return outfmt.WriteJSON(ctx, os.Stdout, result)
}
u := ui.FromContext(ctx)
u.Out().Printf("out\t%s", result.Out)
u.Out().Printf("repo\t%s", result.Repo)
u.Out().Printf("files\t%d", result.Files)
keys := make([]string, 0, len(result.Counts))
for key := range result.Counts {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
u.Out().Printf("count.%s\t%d", key, result.Counts[key])
}
return nil
}
func prettyJSONL(data []byte) ([]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(data))
scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
var out bytes.Buffer
for scanner.Scan() {
line := bytes.TrimSpace(scanner.Bytes())
if len(line) == 0 {
continue
}
var pretty bytes.Buffer
if err := json.Indent(&pretty, line, "", " "); err != nil {
return nil, err
}
if _, err := pretty.WriteTo(&out); err != nil {
return nil, err
}
if err := out.WriteByte('\n'); err != nil {
return nil, err
}
}
return out.Bytes(), scanner.Err()
}
func expandUserPath(path string) (string, error) {
path = strings.TrimSpace(path)
if path == "" {
path = "~/Documents/gog-backup-export"
}
if path == "~" || strings.HasPrefix(path, "~/") {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
if path == "~" {
path = home
} else {
path = filepath.Join(home, path[2:])
}
}
abs, err := filepath.Abs(path)
if err != nil {
return "", err
}
return filepath.Clean(abs), nil
}
func ensureExportOutsideRepo(outDir, repo string) error {
outAbs, err := filepath.Abs(outDir)
if err != nil {
return err
}
repoAbs, err := filepath.Abs(repo)
if err != nil {
return err
}
outDir = filepath.Clean(outAbs)
repo = filepath.Clean(repoAbs)
rel, err := filepath.Rel(repo, outDir)
if err != nil {
return err
}
if rel == "." || (!strings.HasPrefix(rel, ".."+string(filepath.Separator)) && rel != ".." && !filepath.IsAbs(rel)) {
return fmt.Errorf("plaintext export directory must be outside backup repo: %s", outDir)
}
return nil
}
func resetGmailExportIndexes(outDir string, shards []backup.PlainShard) error {
seen := map[string]struct{}{}
for _, shard := range shards {
if shard.Service != backupServiceGmail || shard.Kind != "messages" {
continue
}
index := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages", "index.jsonl")
if _, ok := seen[index]; ok {
continue
}
seen[index] = struct{}{}
if err := os.Remove(index); err != nil && !os.IsNotExist(err) {
return err
}
}
return nil
}
func writeBackupExportReadme(outDir string) error {
const body = "# gog backup plaintext export\n" +
"\n" +
"This directory is an unencrypted local copy created by `gog backup export`.\n" +
"Keep it out of Git, shared folders, and cloud sync unless that is intentional.\n" +
"\n" +
"Gmail messages are written as `.eml` files that can be opened by Mail and many\n" +
"mail clients. `gmail/<account>/messages/index.jsonl` maps backup message IDs\n" +
"to the exported `.eml` files. Labels are written as pretty JSON.\n"
return os.WriteFile(filepath.Join(outDir, "README.md"), []byte(body), 0o600)
}
func writeJSONFile(path string, value any) error {
data, err := json.MarshalIndent(value, "", " ")
if err != nil {
return err
}
data = append(data, '\n')
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return err
}
return os.WriteFile(path, data, 0o600)
}
func exportPlainShard(outDir string, shard backup.PlainShard) (int, int, error) {
switch {
case shard.Service == backupServiceGmail && shard.Kind == "labels":
return exportGmailLabels(outDir, shard)
case shard.Service == backupServiceGmail && shard.Kind == "messages":
return exportGmailMessages(outDir, shard)
default:
return exportRawShard(outDir, shard)
}
}
func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) {
var labels []gmailBackupLabel
if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil {
return 0, 0, err
}
path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json")
if err := writeJSONFile(path, labels); err != nil {
return 0, 0, err
}
return 1, len(labels), nil
}
func exportGmailMessages(outDir string, shard backup.PlainShard) (int, int, error) {
var messages []gmailBackupMessage
if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil {
return 0, 0, err
}
account := sanitizeFilePart(shard.Account)
indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl")
if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil {
return 0, 0, err
}
indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account.
if err != nil {
return 0, 0, err
}
defer indexFile.Close()
enc := json.NewEncoder(indexFile)
enc.SetEscapeHTML(false)
files := 0
for _, message := range messages {
mime, err := decodeGmailRaw(message.Raw)
if err != nil {
return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err)
}
rel := backupExportMessagePath(account, message)
path := filepath.Join(outDir, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return files, 0, err
}
if err := os.WriteFile(path, mime, 0o600); err != nil {
return files, 0, err
}
files++
if err := enc.Encode(gmailExportIndexEntry{
ID: message.ID,
ThreadID: message.ThreadID,
HistoryID: message.HistoryID,
InternalDate: message.InternalDate,
LabelIDs: message.LabelIDs,
SizeEstimate: message.SizeEstimate,
EML: rel,
}); err != nil {
return files, 0, err
}
}
return files + 1, len(messages), nil
}
func exportRawShard(outDir string, shard backup.PlainShard) (int, int, error) {
rel := strings.TrimSuffix(shard.Path, ".gz.age")
path := filepath.Join(outDir, "raw", filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return 0, 0, err
}
if err := os.WriteFile(path, shard.Plaintext, 0o600); err != nil {
return 0, 0, err
}
return 1, shard.Rows, nil
}
func countExportFiles(outDir string) (int, error) {
count := 0
err := filepath.WalkDir(outDir, func(_ string, d os.DirEntry, err error) error {
if err != nil {
return err
}
if d != nil && !d.IsDir() {
count++
}
return nil
})
return count, err
}
func decodeGmailRaw(raw string) ([]byte, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, fmt.Errorf("empty raw payload")
}
if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil {
return data, nil
}
return base64.URLEncoding.DecodeString(raw)
}
func backupExportMessagePath(account string, message gmailBackupMessage) string {
timestamp := trackingUnknown
yearMonth := trackingUnknown
if message.InternalDate > 0 {
t := time.UnixMilli(message.InternalDate).UTC()
timestamp = t.Format("20060102T150405Z")
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
}
name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml"
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
}
func sanitizeFilePart(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return trackingUnknown
}
var b strings.Builder
for _, r := range value {
switch {
case r >= 'a' && r <= 'z':
b.WriteRune(r)
case r >= 'A' && r <= 'Z':
b.WriteRune(r)
case r >= '0' && r <= '9':
b.WriteRune(r)
case r == '.', r == '-', r == '_':
b.WriteRune(r)
default:
b.WriteByte('_')
}
}
out := strings.Trim(b.String(), "._-")
if out == "" {
return trackingUnknown
}
return out
}

View File

@ -1,6 +1,9 @@
package cmd
import (
"encoding/base64"
"os"
"path/filepath"
"strings"
"testing"
"time"
@ -88,6 +91,77 @@ func TestMergeBackupSnapshotsKeepsCountsAndShardOrder(t *testing.T) {
}
}
func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) {
payload := []byte("Subject: Hello\r\n\r\nBody")
raw := base64.RawURLEncoding.EncodeToString(payload)
got, err := decodeGmailRaw(raw)
if err != nil {
t.Fatalf("decodeGmailRaw raw: %v", err)
}
if string(got) != string(payload) {
t.Fatalf("raw decoded = %q, want %q", got, payload)
}
padded := base64.URLEncoding.EncodeToString(payload)
got, err = decodeGmailRaw(padded)
if err != nil {
t.Fatalf("decodeGmailRaw padded: %v", err)
}
if string(got) != string(payload) {
t.Fatalf("padded decoded = %q, want %q", got, payload)
}
}
func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) {
outDir := t.TempDir()
payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody")
message := gmailBackupMessage{
ID: "msg/one",
ThreadID: "thread-1",
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
LabelIDs: []string{"INBOX"},
Raw: base64.RawURLEncoding.EncodeToString(payload),
}
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
if err != nil {
t.Fatalf("NewJSONLShard: %v", err)
}
files, count, err := exportGmailMessages(outDir, shard)
if err != nil {
t.Fatalf("exportGmailMessages: %v", err)
}
if files != 2 || count != 1 {
t.Fatalf("files,count = %d,%d want 2,1", files, count)
}
emlRel := backupExportMessagePath("acct_hash", message)
eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel)))
if err != nil {
t.Fatalf("read eml: %v", err)
}
if string(eml) != string(payload) {
t.Fatalf("eml = %q, want %q", eml, payload)
}
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) {
t.Fatalf("index missing expected fields: %s", index)
}
}
func TestEnsureExportOutsideRepoRejectsNestedPlaintext(t *testing.T) {
repo := filepath.Join(t.TempDir(), "repo")
if err := os.MkdirAll(filepath.Join(repo, "data"), 0o700); err != nil {
t.Fatalf("mkdir repo: %v", err)
}
if err := ensureExportOutsideRepo(filepath.Join(repo, "plaintext"), repo); err == nil {
t.Fatal("expected nested export dir to be rejected")
}
if err := ensureExportOutsideRepo(filepath.Join(filepath.Dir(repo), "export"), repo); err != nil {
t.Fatalf("outside export rejected: %v", err)
}
}
func mustUnixMilli(t *testing.T, value string) int64 {
t.Helper()
parsed, err := time.Parse(time.RFC3339, value)
@ -96,3 +170,12 @@ func mustUnixMilli(t *testing.T, value string) int64 {
}
return parsed.UnixMilli()
}
func readText(t *testing.T, path string) string {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read %s: %v", path, err)
}
return string(data)
}