feat(backup): add plaintext read and export commands
This commit is contained in:
parent
922ca38b3b
commit
ee9b552dcd
@ -3,7 +3,12 @@
|
||||
## 0.14.0 - Unreleased
|
||||
|
||||
### Added
|
||||
- Backup: add `gog backup` with age-encrypted Git shards, Gmail labels/raw message export, manifest status, full decrypt-and-verify, docs, and security-focused regression coverage.
|
||||
- Backup: add `gog backup` with age-encrypted Git shards, Gmail labels/raw message export, manifest status, full decrypt-and-verify, shard `cat`, local plaintext export, docs, and security-focused regression coverage.
|
||||
|
||||
### Fixed
|
||||
- Drive: include `driveId` in `drive ls`, `drive search`, and `drive get` field masks so Shared Drive files can be identified in JSON output. (#524) — thanks @LeanSheng.
|
||||
- Gmail: expose reply threading headers in default `gmail get --format metadata` output and fail explicit reply targets that cannot provide a `Message-ID`. (#528, #512) — thanks @solomonneas.
|
||||
- Docs: include available tab names when `docs cat --tab` / structure lookup cannot find the requested tab. (#532) — thanks @johnbenjaminlewis.
|
||||
|
||||
## 0.13.0 - 2026-04-20
|
||||
|
||||
|
||||
@ -728,6 +728,8 @@ gog backup init --repo ~/Projects/backup-gog --remote https://github.com/steipet
|
||||
gog backup push --services gmail --account you@gmail.com
|
||||
gog backup status
|
||||
gog backup verify
|
||||
gog backup cat data/gmail/<account-hash>/labels.jsonl.gz.age --pretty
|
||||
gog backup export --out ~/Documents/gog-backup-export
|
||||
```
|
||||
|
||||
For a bounded first run:
|
||||
@ -741,6 +743,12 @@ private age identity locally at `~/.gog/age.key`; GitHub only receives public
|
||||
`age1...` recipients, `manifest.json`, and encrypted `*.jsonl.gz.age` payloads.
|
||||
The private `AGE-SECRET-KEY-...` value must stay local or in a password manager.
|
||||
|
||||
Use `gog backup cat` to decrypt one shard as JSONL, or `gog backup export` to
|
||||
write a local plaintext copy. The export writes Gmail messages as `.eml` files,
|
||||
plus `gmail/<account-hash>/messages/index.jsonl` and pretty `labels.json`.
|
||||
That export is intentionally unencrypted; keep it out of Git, shared folders,
|
||||
and cloud sync unless that is intentional.
|
||||
|
||||
`manifest.json` is intentionally cleartext for cheap status and verification.
|
||||
It exposes metadata: export time, service names, account hashes, shard paths,
|
||||
row counts, encrypted byte sizes, plaintext verification hashes, backup cadence,
|
||||
|
||||
@ -48,6 +48,18 @@ Decrypt every shard and verify hashes and row counts:
|
||||
gog backup verify
|
||||
```
|
||||
|
||||
Decrypt one shard to stdout:
|
||||
|
||||
```bash
|
||||
gog backup cat data/gmail/<account-hash>/labels.jsonl.gz.age --pretty
|
||||
```
|
||||
|
||||
Write an unencrypted local copy for easy reading on the Mac:
|
||||
|
||||
```bash
|
||||
gog backup export --out ~/Documents/gog-backup-export
|
||||
```
|
||||
|
||||
Use `--no-push` on `init` or `push` to commit locally without pushing to the
|
||||
remote.
|
||||
|
||||
@ -75,6 +87,23 @@ counts, encrypted byte sizes, and plaintext hashes used for verification. It
|
||||
does not contain email subjects, senders, recipients, bodies, raw message IDs,
|
||||
or labels.
|
||||
|
||||
Plaintext export directory:
|
||||
|
||||
```text
|
||||
README.md
|
||||
manifest.json
|
||||
gmail/<account-hash>/labels.json
|
||||
gmail/<account-hash>/messages/index.jsonl
|
||||
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<message-id>.eml
|
||||
raw/<service>/...
|
||||
```
|
||||
|
||||
`gog backup export` decrypts and verifies the manifest-backed shards before
|
||||
writing files. Gmail messages become `.eml` files that open in Mail and other
|
||||
mail clients. The export is not encrypted; do not place it inside the backup
|
||||
Git repository, and keep it out of synced/shared folders unless that is
|
||||
intentional.
|
||||
|
||||
## Encryption
|
||||
|
||||
Backups use the Go `filippo.io/age` library with X25519 age identities. There
|
||||
@ -98,6 +127,8 @@ For each shard, `gog backup push`:
|
||||
|
||||
`gog backup verify` decrypts each shard with the local age identity, gunzips it,
|
||||
checks the plaintext SHA-256 hash from the manifest, and verifies row counts.
|
||||
`gog backup cat` and `gog backup export` use the same verification path before
|
||||
returning plaintext.
|
||||
|
||||
## Security Boundary
|
||||
|
||||
|
||||
@ -49,6 +49,8 @@ Generated from `gog schema --json`.
|
||||
- `gog auth tokens import <inPath>` - Import a refresh token file into keyring (contains secrets)
|
||||
- `gog auth tokens list` - List stored tokens (by key only)
|
||||
- `gog backup <command> [flags]` - Encrypted Google account backups
|
||||
- `gog backup cat <shard> [flags]` - Decrypt one backup shard to stdout
|
||||
- `gog backup export [flags]` - Write a local plaintext export
|
||||
- `gog backup gmail <command>` - Gmail backup operations
|
||||
- `gog backup gmail push [flags]` - Export Gmail into encrypted backup shards
|
||||
- `gog backup init [flags]` - Initialize encrypted backup config and repository
|
||||
|
||||
@ -479,80 +479,3 @@ func removeStaleShards(repo string, shards []ShardEntry) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeBackupReadme(repo string) error {
|
||||
path := filepath.Join(repo, "README.md")
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
}
|
||||
const body = `# backup-gog
|
||||
|
||||
Encrypted Git backup for Google account data exported by gog.
|
||||
|
||||
This repository is written by ` + "`gog backup push`" + `. It is safe to keep on
|
||||
GitHub because service payloads are encrypted before Git sees them.
|
||||
|
||||
## Layout
|
||||
|
||||
` + "```text" + `
|
||||
README.md
|
||||
manifest.json
|
||||
data/<service>/<account-hash>/...
|
||||
` + "```" + `
|
||||
|
||||
` + "`manifest.json`" + ` is cleartext and contains format version, export time,
|
||||
public age recipients, service names, account hashes, shard paths, row counts,
|
||||
encrypted byte sizes, and plaintext hashes used for verification. Email bodies,
|
||||
subjects, senders, Drive filenames, contacts, event titles, and other private
|
||||
Google data stay inside encrypted ` + "`*.jsonl.gz.age`" + ` shards.
|
||||
|
||||
## Security Model
|
||||
|
||||
Shard contents are deterministic JSONL, gzip-compressed with a fixed timestamp,
|
||||
and encrypted with age for every configured public recipient. The local
|
||||
` + "`~/.gog/age.key`" + ` identity is required to decrypt.
|
||||
|
||||
Git can still see manifest metadata: export time, public recipients, service
|
||||
names, account hashes, shard paths, encrypted byte sizes, plaintext shard
|
||||
hashes, backup cadence, and which encrypted shards changed. Git cannot read
|
||||
Google content without an age identity.
|
||||
|
||||
Anyone who can push to this repository can replace encrypted backup data with
|
||||
different data encrypted to your public recipient. Keep repository write access
|
||||
restricted and review unexpected backup commits. If an age identity is
|
||||
compromised, remove its public recipient and push a new backup; old Git history
|
||||
may still contain shards decryptable by the compromised key.
|
||||
|
||||
## Push
|
||||
|
||||
` + "```bash" + `
|
||||
gog backup push --services gmail
|
||||
` + "```" + `
|
||||
|
||||
The command pulls/rebases this checkout, exports selected Google services,
|
||||
writes encrypted shards, updates the manifest, commits, and pushes this
|
||||
repository.
|
||||
|
||||
## Verify
|
||||
|
||||
` + "```bash" + `
|
||||
gog backup verify
|
||||
` + "```" + `
|
||||
|
||||
` + "`verify`" + ` decrypts every shard with the local age identity and verifies the
|
||||
manifest hashes and row counts. It does not restore or write Google data.
|
||||
|
||||
## Recovery
|
||||
|
||||
Install gog, clone this repo to the path in ` + "`~/.gog/backup.json`" + `,
|
||||
restore the local age identity file, then run:
|
||||
|
||||
` + "```bash" + `
|
||||
gog backup verify
|
||||
` + "```" + `
|
||||
|
||||
Do not commit the age identity. Only public ` + "`age1...`" + ` recipients belong in
|
||||
config; ` + "`AGE-SECRET-KEY-...`" + ` values must stay local or in a password manager.
|
||||
`
|
||||
return os.WriteFile(path, []byte(body), 0o600)
|
||||
}
|
||||
|
||||
@ -58,6 +58,53 @@ func TestPushSnapshotAndVerify(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCatAndDecryptSnapshotVerifyPlaintext(t *testing.T) {
|
||||
ctx, repo, config, _ := initTestBackup(t)
|
||||
shardPath := "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age"
|
||||
pushSingleShard(t, ctx, config, mustGmailMessageShard(t, shardPath, []map[string]string{{
|
||||
"id": "m1",
|
||||
"raw": "plain marker",
|
||||
}}))
|
||||
|
||||
cat, err := Cat(ctx, Options{ConfigPath: config}, shardPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Cat: %v", err)
|
||||
}
|
||||
if cat.Path != shardPath || cat.Service != "gmail" || cat.Kind != "messages" || !strings.Contains(string(cat.Plaintext), "plain marker") {
|
||||
t.Fatalf("unexpected cat shard: %+v plaintext=%q", cat, cat.Plaintext)
|
||||
}
|
||||
|
||||
absPath := filepath.Join(repo, filepath.FromSlash(shardPath))
|
||||
catAbs, err := Cat(ctx, Options{ConfigPath: config}, absPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Cat absolute: %v", err)
|
||||
}
|
||||
if string(catAbs.Plaintext) != string(cat.Plaintext) {
|
||||
t.Fatalf("absolute Cat plaintext mismatch")
|
||||
}
|
||||
|
||||
manifest, gotRepo, shards, err := DecryptSnapshot(ctx, Options{ConfigPath: config})
|
||||
if err != nil {
|
||||
t.Fatalf("DecryptSnapshot: %v", err)
|
||||
}
|
||||
if gotRepo != repo || len(manifest.Shards) != 1 || len(shards) != 1 || string(shards[0].Plaintext) != string(cat.Plaintext) {
|
||||
t.Fatalf("unexpected decrypt snapshot repo=%s manifest=%+v shards=%+v", gotRepo, manifest, shards)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCatRejectsShardOutsideManifest(t *testing.T) {
|
||||
ctx, _, config, _ := initTestBackup(t)
|
||||
pushSingleShard(t, ctx, config, mustGmailMessageShard(t, "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []map[string]string{{"id": "m1"}}))
|
||||
|
||||
for _, ref := range []string{"../data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", "data/gmail/acct/messages/2026/05/part-0001.jsonl.gz.age"} {
|
||||
t.Run(ref, func(t *testing.T) {
|
||||
if _, err := Cat(ctx, Options{ConfigPath: config}, ref); err == nil {
|
||||
t.Fatal("expected Cat to reject missing or escaping shard")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIdentityAndConfigArePrivate(t *testing.T) {
|
||||
_, _, config, identity := initTestBackup(t)
|
||||
|
||||
|
||||
125
internal/backup/read.go
Normal file
125
internal/backup/read.go
Normal file
@ -0,0 +1,125 @@
|
||||
//nolint:err113,wrapcheck,wsl_v5 // Contextual errors keep backup call sites readable.
|
||||
package backup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func Cat(ctx context.Context, opts Options, shardPath string) (PlainShard, error) {
|
||||
cfg, err := ResolveOptions(opts)
|
||||
if err != nil {
|
||||
return PlainShard{}, err
|
||||
}
|
||||
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
|
||||
return PlainShard{}, repoErr
|
||||
}
|
||||
manifest, err := readManifest(cfg.Repo)
|
||||
if err != nil {
|
||||
return PlainShard{}, err
|
||||
}
|
||||
if manifest.Format != formatVersion {
|
||||
return PlainShard{}, fmt.Errorf("unsupported backup format %d", manifest.Format)
|
||||
}
|
||||
shard, err := findManifestShard(manifest, cfg.Repo, shardPath)
|
||||
if err != nil {
|
||||
return PlainShard{}, err
|
||||
}
|
||||
return decryptManifestShard(cfg, shard)
|
||||
}
|
||||
|
||||
func DecryptSnapshot(ctx context.Context, opts Options) (Manifest, string, []PlainShard, error) {
|
||||
cfg, err := ResolveOptions(opts)
|
||||
if err != nil {
|
||||
return Manifest{}, "", nil, err
|
||||
}
|
||||
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
|
||||
return Manifest{}, "", nil, repoErr
|
||||
}
|
||||
manifest, err := readManifest(cfg.Repo)
|
||||
if err != nil {
|
||||
return Manifest{}, "", nil, err
|
||||
}
|
||||
if manifest.Format != formatVersion {
|
||||
return Manifest{}, "", nil, fmt.Errorf("unsupported backup format %d", manifest.Format)
|
||||
}
|
||||
shards := make([]PlainShard, 0, len(manifest.Shards))
|
||||
for _, shard := range manifest.Shards {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return Manifest{}, "", nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
plain, err := decryptManifestShard(cfg, shard)
|
||||
if err != nil {
|
||||
return Manifest{}, "", nil, err
|
||||
}
|
||||
shards = append(shards, plain)
|
||||
}
|
||||
return manifest, cfg.Repo, shards, nil
|
||||
}
|
||||
|
||||
func decryptManifestShard(cfg Config, shard ShardEntry) (PlainShard, error) {
|
||||
plaintext, err := decryptShardFile(cfg, shard)
|
||||
if err != nil {
|
||||
return PlainShard{}, err
|
||||
}
|
||||
if err := verifyPlainShard(shard, plaintext); err != nil {
|
||||
return PlainShard{}, err
|
||||
}
|
||||
return PlainShard{
|
||||
Service: shard.Service,
|
||||
Kind: shard.Kind,
|
||||
Account: shard.Account,
|
||||
Path: shard.Path,
|
||||
Rows: shard.Rows,
|
||||
Plaintext: plaintext,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func verifyPlainShard(shard ShardEntry, plaintext []byte) error {
|
||||
if got := sha256Hex(plaintext); got != shard.SHA256 {
|
||||
return fmt.Errorf("backup shard hash mismatch for %s", shard.Path)
|
||||
}
|
||||
rows, err := countJSONLLines(plaintext)
|
||||
if err != nil {
|
||||
return fmt.Errorf("count rows in %s: %w", shard.Path, err)
|
||||
}
|
||||
if rows != shard.Rows {
|
||||
return fmt.Errorf("backup shard row count mismatch for %s: got %d, want %d", shard.Path, rows, shard.Rows)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func findManifestShard(manifest Manifest, repo, shardPath string) (ShardEntry, error) {
|
||||
ref, err := normalizeShardRef(repo, shardPath)
|
||||
if err != nil {
|
||||
return ShardEntry{}, err
|
||||
}
|
||||
if shard, ok := manifest.entry(ref); ok {
|
||||
return shard, nil
|
||||
}
|
||||
return ShardEntry{}, fmt.Errorf("backup shard not found in manifest: %s", shardPath)
|
||||
}
|
||||
|
||||
func normalizeShardRef(repo, ref string) (string, error) {
|
||||
ref = strings.TrimSpace(ref)
|
||||
if ref == "" {
|
||||
return "", fmt.Errorf("backup shard path is required")
|
||||
}
|
||||
if filepath.IsAbs(ref) {
|
||||
rel, err := filepath.Rel(repo, ref)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
ref = rel
|
||||
}
|
||||
clean := path.Clean(filepath.ToSlash(ref))
|
||||
if clean == "." || clean == ".." || strings.HasPrefix(clean, "../") || path.IsAbs(clean) {
|
||||
return "", fmt.Errorf("backup shard path escapes backup root: %s", ref)
|
||||
}
|
||||
return clean, nil
|
||||
}
|
||||
89
internal/backup/readme.go
Normal file
89
internal/backup/readme.go
Normal file
@ -0,0 +1,89 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func writeBackupReadme(repo string) error {
|
||||
path := filepath.Join(repo, "README.md")
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
const body = `# backup-gog
|
||||
|
||||
Encrypted Git backup for Google account data exported by gog.
|
||||
|
||||
This repository is written by ` + "`gog backup push`" + `. It is safe to keep on
|
||||
GitHub because service payloads are encrypted before Git sees them.
|
||||
|
||||
## Layout
|
||||
|
||||
` + "```text" + `
|
||||
README.md
|
||||
manifest.json
|
||||
data/<service>/<account-hash>/...
|
||||
` + "```" + `
|
||||
|
||||
` + "`manifest.json`" + ` is cleartext and contains format version, export time,
|
||||
public age recipients, service names, account hashes, shard paths, row counts,
|
||||
encrypted byte sizes, and plaintext hashes used for verification. Email bodies,
|
||||
subjects, senders, Drive filenames, contacts, event titles, and other private
|
||||
Google data stay inside encrypted ` + "`*.jsonl.gz.age`" + ` shards.
|
||||
|
||||
## Security Model
|
||||
|
||||
Shard contents are deterministic JSONL, gzip-compressed with a fixed timestamp,
|
||||
and encrypted with age for every configured public recipient. The local
|
||||
` + "`~/.gog/age.key`" + ` identity is required to decrypt.
|
||||
|
||||
Git can still see manifest metadata: export time, public recipients, service
|
||||
names, account hashes, shard paths, encrypted byte sizes, plaintext shard
|
||||
hashes, backup cadence, and which encrypted shards changed. Git cannot read
|
||||
Google content without an age identity.
|
||||
|
||||
Anyone who can push to this repository can replace encrypted backup data with
|
||||
different data encrypted to your public recipient. Keep repository write access
|
||||
restricted and review unexpected backup commits. If an age identity is
|
||||
compromised, remove its public recipient and push a new backup; old Git history
|
||||
may still contain shards decryptable by the compromised key.
|
||||
|
||||
## Push
|
||||
|
||||
` + "```bash" + `
|
||||
gog backup push --services gmail
|
||||
` + "```" + `
|
||||
|
||||
The command pulls/rebases this checkout, exports selected Google services,
|
||||
writes encrypted shards, updates the manifest, commits, and pushes this
|
||||
repository.
|
||||
|
||||
## Verify
|
||||
|
||||
` + "```bash" + `
|
||||
gog backup verify
|
||||
` + "```" + `
|
||||
|
||||
` + "`verify`" + ` decrypts every shard with the local age identity and verifies the
|
||||
manifest hashes and row counts. It does not restore or write Google data.
|
||||
|
||||
## Recovery
|
||||
|
||||
Install gog, clone this repo to the path in ` + "`~/.gog/backup.json`" + `,
|
||||
restore the local age identity file, then run:
|
||||
|
||||
` + "```bash" + `
|
||||
gog backup verify
|
||||
` + "```" + `
|
||||
|
||||
Do not commit the age identity. Only public ` + "`age1...`" + ` recipients belong in
|
||||
config; ` + "`AGE-SECRET-KEY-...`" + ` values must stay local or in a password manager.
|
||||
`
|
||||
if err := os.WriteFile(path, []byte(body), 0o600); err != nil {
|
||||
return fmt.Errorf("write backup readme: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -24,6 +24,8 @@ type BackupCmd struct {
|
||||
Push BackupPushCmd `cmd:"" name:"push" help:"Export services into encrypted backup shards"`
|
||||
Status BackupStatusCmd `cmd:"" name:"status" help:"Inspect backup manifest without decrypting shards"`
|
||||
Verify BackupVerifyCmd `cmd:"" name:"verify" help:"Decrypt and verify all backup shards"`
|
||||
Cat BackupCatCmd `cmd:"" name:"cat" help:"Decrypt one backup shard to stdout"`
|
||||
Export BackupExportCmd `cmd:"" name:"export" help:"Write a local plaintext export"`
|
||||
Gmail BackupGmailCmd `cmd:"" name:"gmail" help:"Gmail backup operations"`
|
||||
}
|
||||
|
||||
@ -31,6 +33,8 @@ type BackupGmailCmd struct {
|
||||
Push BackupGmailPushCmd `cmd:"" name:"push" help:"Export Gmail into encrypted backup shards"`
|
||||
}
|
||||
|
||||
const backupServiceGmail = "gmail"
|
||||
|
||||
type backupFlags struct {
|
||||
Config string `name:"config" help:"Backup config path" default:""`
|
||||
Repo string `name:"repo" help:"Local backup repository path"`
|
||||
@ -51,6 +55,23 @@ func (f backupFlags) options() backup.Options {
|
||||
}
|
||||
}
|
||||
|
||||
type backupReadFlags struct {
|
||||
Config string `name:"config" help:"Backup config path" default:""`
|
||||
Repo string `name:"repo" help:"Local backup repository path"`
|
||||
Remote string `name:"remote" help:"Backup Git remote URL"`
|
||||
Identity string `name:"identity" help:"Local age identity path"`
|
||||
}
|
||||
|
||||
func (f backupReadFlags) options() backup.Options {
|
||||
return backup.Options{
|
||||
ConfigPath: f.Config,
|
||||
Repo: f.Repo,
|
||||
Remote: f.Remote,
|
||||
Identity: f.Identity,
|
||||
Push: false,
|
||||
}
|
||||
}
|
||||
|
||||
type BackupInitCmd struct {
|
||||
backupFlags
|
||||
}
|
||||
@ -93,7 +114,7 @@ func (c *BackupPushCmd) Run(ctx context.Context, flags *RootFlags) error {
|
||||
var snapshots []backup.Snapshot
|
||||
for _, service := range services {
|
||||
switch strings.ToLower(strings.TrimSpace(service)) {
|
||||
case "gmail":
|
||||
case backupServiceGmail:
|
||||
snapshot, err := buildGmailBackupSnapshot(ctx, flags, gmailBackupOptions{
|
||||
Query: c.Query,
|
||||
Max: c.Max,
|
||||
@ -233,7 +254,7 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB
|
||||
return backup.Snapshot{}, err
|
||||
}
|
||||
shards := make([]backup.PlainShard, 0, 1)
|
||||
labelShard, err := backup.NewJSONLShard("gmail", "labels", accountHash, fmt.Sprintf("data/gmail/%s/labels.jsonl.gz.age", accountHash), labels)
|
||||
labelShard, err := backup.NewJSONLShard(backupServiceGmail, "labels", accountHash, fmt.Sprintf("data/gmail/%s/labels.jsonl.gz.age", accountHash), labels)
|
||||
if err != nil {
|
||||
return backup.Snapshot{}, err
|
||||
}
|
||||
@ -244,7 +265,7 @@ func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailB
|
||||
}
|
||||
shards = append(shards, messageShards...)
|
||||
return backup.Snapshot{
|
||||
Services: []string{"gmail"},
|
||||
Services: []string{backupServiceGmail},
|
||||
Accounts: []string{accountHash},
|
||||
Counts: map[string]int{
|
||||
"gmail.labels": len(labels),
|
||||
@ -422,7 +443,7 @@ func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage,
|
||||
end = len(values)
|
||||
}
|
||||
rel := fmt.Sprintf("data/gmail/%s/messages/%s/part-%04d.jsonl.gz.age", accountHash, key, part)
|
||||
shard, err := backup.NewJSONLShard("gmail", "messages", accountHash, rel, values[start:end])
|
||||
shard, err := backup.NewJSONLShard(backupServiceGmail, "messages", accountHash, rel, values[start:end])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
394
internal/cmd/backup_export.go
Normal file
394
internal/cmd/backup_export.go
Normal file
@ -0,0 +1,394 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/steipete/gogcli/internal/backup"
|
||||
"github.com/steipete/gogcli/internal/outfmt"
|
||||
"github.com/steipete/gogcli/internal/ui"
|
||||
)
|
||||
|
||||
type BackupCatCmd struct {
|
||||
backupReadFlags
|
||||
Shard string `arg:"" name:"shard" help:"Manifest shard path, or absolute path under the backup repo"`
|
||||
Pretty bool `name:"pretty" help:"Pretty-print each JSONL row"`
|
||||
Out string `name:"out" help:"Write decrypted JSONL to this file instead of stdout"`
|
||||
}
|
||||
|
||||
func (c *BackupCatCmd) Run(ctx context.Context) error {
|
||||
shard, err := backup.Cat(ctx, c.options(), c.Shard)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
data := shard.Plaintext
|
||||
if c.Pretty {
|
||||
data, err = prettyJSONL(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("pretty-print shard: %w", err)
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(c.Out) != "" {
|
||||
out, expandErr := expandUserPath(c.Out)
|
||||
if expandErr != nil {
|
||||
return expandErr
|
||||
}
|
||||
if mkdirErr := os.MkdirAll(filepath.Dir(out), 0o700); mkdirErr != nil {
|
||||
return mkdirErr
|
||||
}
|
||||
return os.WriteFile(out, data, 0o600)
|
||||
}
|
||||
_, err = os.Stdout.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
type BackupExportCmd struct {
|
||||
backupReadFlags
|
||||
Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"`
|
||||
}
|
||||
|
||||
type backupExportResult struct {
|
||||
Out string `json:"out"`
|
||||
Repo string `json:"repo"`
|
||||
ManifestExport time.Time `json:"manifestExported"`
|
||||
Files int `json:"files"`
|
||||
Counts map[string]int `json:"counts"`
|
||||
}
|
||||
|
||||
type gmailExportIndexEntry struct {
|
||||
ID string `json:"id"`
|
||||
ThreadID string `json:"threadId,omitempty"`
|
||||
HistoryID string `json:"historyId,omitempty"`
|
||||
InternalDate int64 `json:"internalDate,omitempty"`
|
||||
LabelIDs []string `json:"labelIds,omitempty"`
|
||||
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
|
||||
EML string `json:"eml"`
|
||||
}
|
||||
|
||||
func (c *BackupExportCmd) Run(ctx context.Context) error {
|
||||
outDir, err := expandUserPath(c.Out)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
manifest, repo, shards, err := backup.DecryptSnapshot(ctx, c.options())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if exportErr := ensureExportOutsideRepo(outDir, repo); exportErr != nil {
|
||||
return exportErr
|
||||
}
|
||||
result := backupExportResult{
|
||||
Out: outDir,
|
||||
Repo: repo,
|
||||
ManifestExport: manifest.Exported,
|
||||
Counts: map[string]int{},
|
||||
}
|
||||
if mkdirErr := os.MkdirAll(outDir, 0o700); mkdirErr != nil {
|
||||
return mkdirErr
|
||||
}
|
||||
if readmeErr := writeBackupExportReadme(outDir); readmeErr != nil {
|
||||
return readmeErr
|
||||
}
|
||||
if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil {
|
||||
return manifestErr
|
||||
}
|
||||
if resetErr := resetGmailExportIndexes(outDir, shards); resetErr != nil {
|
||||
return resetErr
|
||||
}
|
||||
for _, shard := range shards {
|
||||
_, count, shardErr := exportPlainShard(outDir, shard)
|
||||
if shardErr != nil {
|
||||
return shardErr
|
||||
}
|
||||
key := shard.Service
|
||||
if strings.TrimSpace(shard.Kind) != "" {
|
||||
key += "." + shard.Kind
|
||||
}
|
||||
result.Counts[key] += count
|
||||
}
|
||||
files, err := countExportFiles(outDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
result.Files = files
|
||||
if outfmt.IsJSON(ctx) {
|
||||
return outfmt.WriteJSON(ctx, os.Stdout, result)
|
||||
}
|
||||
u := ui.FromContext(ctx)
|
||||
u.Out().Printf("out\t%s", result.Out)
|
||||
u.Out().Printf("repo\t%s", result.Repo)
|
||||
u.Out().Printf("files\t%d", result.Files)
|
||||
keys := make([]string, 0, len(result.Counts))
|
||||
for key := range result.Counts {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, key := range keys {
|
||||
u.Out().Printf("count.%s\t%d", key, result.Counts[key])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func prettyJSONL(data []byte) ([]byte, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(data))
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
|
||||
var out bytes.Buffer
|
||||
for scanner.Scan() {
|
||||
line := bytes.TrimSpace(scanner.Bytes())
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
var pretty bytes.Buffer
|
||||
if err := json.Indent(&pretty, line, "", " "); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := pretty.WriteTo(&out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := out.WriteByte('\n'); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return out.Bytes(), scanner.Err()
|
||||
}
|
||||
|
||||
func expandUserPath(path string) (string, error) {
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
path = "~/Documents/gog-backup-export"
|
||||
}
|
||||
if path == "~" || strings.HasPrefix(path, "~/") {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if path == "~" {
|
||||
path = home
|
||||
} else {
|
||||
path = filepath.Join(home, path[2:])
|
||||
}
|
||||
}
|
||||
abs, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Clean(abs), nil
|
||||
}
|
||||
|
||||
func ensureExportOutsideRepo(outDir, repo string) error {
|
||||
outAbs, err := filepath.Abs(outDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
repoAbs, err := filepath.Abs(repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
outDir = filepath.Clean(outAbs)
|
||||
repo = filepath.Clean(repoAbs)
|
||||
rel, err := filepath.Rel(repo, outDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if rel == "." || (!strings.HasPrefix(rel, ".."+string(filepath.Separator)) && rel != ".." && !filepath.IsAbs(rel)) {
|
||||
return fmt.Errorf("plaintext export directory must be outside backup repo: %s", outDir)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func resetGmailExportIndexes(outDir string, shards []backup.PlainShard) error {
|
||||
seen := map[string]struct{}{}
|
||||
for _, shard := range shards {
|
||||
if shard.Service != backupServiceGmail || shard.Kind != "messages" {
|
||||
continue
|
||||
}
|
||||
index := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages", "index.jsonl")
|
||||
if _, ok := seen[index]; ok {
|
||||
continue
|
||||
}
|
||||
seen[index] = struct{}{}
|
||||
if err := os.Remove(index); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeBackupExportReadme(outDir string) error {
|
||||
const body = "# gog backup plaintext export\n" +
|
||||
"\n" +
|
||||
"This directory is an unencrypted local copy created by `gog backup export`.\n" +
|
||||
"Keep it out of Git, shared folders, and cloud sync unless that is intentional.\n" +
|
||||
"\n" +
|
||||
"Gmail messages are written as `.eml` files that can be opened by Mail and many\n" +
|
||||
"mail clients. `gmail/<account>/messages/index.jsonl` maps backup message IDs\n" +
|
||||
"to the exported `.eml` files. Labels are written as pretty JSON.\n"
|
||||
return os.WriteFile(filepath.Join(outDir, "README.md"), []byte(body), 0o600)
|
||||
}
|
||||
|
||||
func writeJSONFile(path string, value any) error {
|
||||
data, err := json.MarshalIndent(value, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
data = append(data, '\n')
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(path, data, 0o600)
|
||||
}
|
||||
|
||||
func exportPlainShard(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
switch {
|
||||
case shard.Service == backupServiceGmail && shard.Kind == "labels":
|
||||
return exportGmailLabels(outDir, shard)
|
||||
case shard.Service == backupServiceGmail && shard.Kind == "messages":
|
||||
return exportGmailMessages(outDir, shard)
|
||||
default:
|
||||
return exportRawShard(outDir, shard)
|
||||
}
|
||||
}
|
||||
|
||||
func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
var labels []gmailBackupLabel
|
||||
if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json")
|
||||
if err := writeJSONFile(path, labels); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return 1, len(labels), nil
|
||||
}
|
||||
|
||||
func exportGmailMessages(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
var messages []gmailBackupMessage
|
||||
if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
account := sanitizeFilePart(shard.Account)
|
||||
indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl")
|
||||
if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account.
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer indexFile.Close()
|
||||
enc := json.NewEncoder(indexFile)
|
||||
enc.SetEscapeHTML(false)
|
||||
files := 0
|
||||
for _, message := range messages {
|
||||
mime, err := decodeGmailRaw(message.Raw)
|
||||
if err != nil {
|
||||
return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err)
|
||||
}
|
||||
rel := backupExportMessagePath(account, message)
|
||||
path := filepath.Join(outDir, filepath.FromSlash(rel))
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
if err := os.WriteFile(path, mime, 0o600); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
files++
|
||||
if err := enc.Encode(gmailExportIndexEntry{
|
||||
ID: message.ID,
|
||||
ThreadID: message.ThreadID,
|
||||
HistoryID: message.HistoryID,
|
||||
InternalDate: message.InternalDate,
|
||||
LabelIDs: message.LabelIDs,
|
||||
SizeEstimate: message.SizeEstimate,
|
||||
EML: rel,
|
||||
}); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
}
|
||||
return files + 1, len(messages), nil
|
||||
}
|
||||
|
||||
func exportRawShard(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
rel := strings.TrimSuffix(shard.Path, ".gz.age")
|
||||
path := filepath.Join(outDir, "raw", filepath.FromSlash(rel))
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if err := os.WriteFile(path, shard.Plaintext, 0o600); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return 1, shard.Rows, nil
|
||||
}
|
||||
|
||||
func countExportFiles(outDir string) (int, error) {
|
||||
count := 0
|
||||
err := filepath.WalkDir(outDir, func(_ string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d != nil && !d.IsDir() {
|
||||
count++
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return count, err
|
||||
}
|
||||
|
||||
func decodeGmailRaw(raw string) ([]byte, error) {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return nil, fmt.Errorf("empty raw payload")
|
||||
}
|
||||
if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil {
|
||||
return data, nil
|
||||
}
|
||||
return base64.URLEncoding.DecodeString(raw)
|
||||
}
|
||||
|
||||
func backupExportMessagePath(account string, message gmailBackupMessage) string {
|
||||
timestamp := trackingUnknown
|
||||
yearMonth := trackingUnknown
|
||||
if message.InternalDate > 0 {
|
||||
t := time.UnixMilli(message.InternalDate).UTC()
|
||||
timestamp = t.Format("20060102T150405Z")
|
||||
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
|
||||
}
|
||||
name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml"
|
||||
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
|
||||
}
|
||||
|
||||
func sanitizeFilePart(value string) string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return trackingUnknown
|
||||
}
|
||||
var b strings.Builder
|
||||
for _, r := range value {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z':
|
||||
b.WriteRune(r)
|
||||
case r >= 'A' && r <= 'Z':
|
||||
b.WriteRune(r)
|
||||
case r >= '0' && r <= '9':
|
||||
b.WriteRune(r)
|
||||
case r == '.', r == '-', r == '_':
|
||||
b.WriteRune(r)
|
||||
default:
|
||||
b.WriteByte('_')
|
||||
}
|
||||
}
|
||||
out := strings.Trim(b.String(), "._-")
|
||||
if out == "" {
|
||||
return trackingUnknown
|
||||
}
|
||||
return out
|
||||
}
|
||||
@ -1,6 +1,9 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@ -88,6 +91,77 @@ func TestMergeBackupSnapshotsKeepsCountsAndShardOrder(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) {
|
||||
payload := []byte("Subject: Hello\r\n\r\nBody")
|
||||
raw := base64.RawURLEncoding.EncodeToString(payload)
|
||||
got, err := decodeGmailRaw(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeGmailRaw raw: %v", err)
|
||||
}
|
||||
if string(got) != string(payload) {
|
||||
t.Fatalf("raw decoded = %q, want %q", got, payload)
|
||||
}
|
||||
|
||||
padded := base64.URLEncoding.EncodeToString(payload)
|
||||
got, err = decodeGmailRaw(padded)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeGmailRaw padded: %v", err)
|
||||
}
|
||||
if string(got) != string(payload) {
|
||||
t.Fatalf("padded decoded = %q, want %q", got, payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) {
|
||||
outDir := t.TempDir()
|
||||
payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody")
|
||||
message := gmailBackupMessage{
|
||||
ID: "msg/one",
|
||||
ThreadID: "thread-1",
|
||||
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
|
||||
LabelIDs: []string{"INBOX"},
|
||||
Raw: base64.RawURLEncoding.EncodeToString(payload),
|
||||
}
|
||||
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
|
||||
if err != nil {
|
||||
t.Fatalf("NewJSONLShard: %v", err)
|
||||
}
|
||||
|
||||
files, count, err := exportGmailMessages(outDir, shard)
|
||||
if err != nil {
|
||||
t.Fatalf("exportGmailMessages: %v", err)
|
||||
}
|
||||
if files != 2 || count != 1 {
|
||||
t.Fatalf("files,count = %d,%d want 2,1", files, count)
|
||||
}
|
||||
|
||||
emlRel := backupExportMessagePath("acct_hash", message)
|
||||
eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel)))
|
||||
if err != nil {
|
||||
t.Fatalf("read eml: %v", err)
|
||||
}
|
||||
if string(eml) != string(payload) {
|
||||
t.Fatalf("eml = %q, want %q", eml, payload)
|
||||
}
|
||||
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
|
||||
if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) {
|
||||
t.Fatalf("index missing expected fields: %s", index)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureExportOutsideRepoRejectsNestedPlaintext(t *testing.T) {
|
||||
repo := filepath.Join(t.TempDir(), "repo")
|
||||
if err := os.MkdirAll(filepath.Join(repo, "data"), 0o700); err != nil {
|
||||
t.Fatalf("mkdir repo: %v", err)
|
||||
}
|
||||
if err := ensureExportOutsideRepo(filepath.Join(repo, "plaintext"), repo); err == nil {
|
||||
t.Fatal("expected nested export dir to be rejected")
|
||||
}
|
||||
if err := ensureExportOutsideRepo(filepath.Join(filepath.Dir(repo), "export"), repo); err != nil {
|
||||
t.Fatalf("outside export rejected: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func mustUnixMilli(t *testing.T, value string) int64 {
|
||||
t.Helper()
|
||||
parsed, err := time.Parse(time.RFC3339, value)
|
||||
@ -96,3 +170,12 @@ func mustUnixMilli(t *testing.T, value string) int64 {
|
||||
}
|
||||
return parsed.UnixMilli()
|
||||
}
|
||||
|
||||
func readText(t *testing.T, path string) string {
|
||||
t.Helper()
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", path, err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user