feat(backup): add markdown Gmail export
This commit is contained in:
parent
d51e94a8ca
commit
1eaad2556c
@ -10,6 +10,7 @@
|
||||
- Backup: add Gmail message-list checkpoints, streaming shard construction, and stderr progress counters so full-mailbox backups can resume cleanly after interruption without keeping every raw message in RAM.
|
||||
- Backup: push encrypted incomplete Gmail checkpoint commits during long cached fetches so day-scale mailbox backups have offsite progress before the final manifest is committed.
|
||||
- Backup: push Gmail checkpoint commits through a single ordered background queue so cached fetches continue while GitHub uploads run.
|
||||
- Backup: add `gog backup export --gmail-format markdown` for local readable Gmail mirrors with Markdown notes and extracted attachment files.
|
||||
- Calendar: add `--start-timezone` / `--end-timezone` to `calendar create` and `calendar update` for preserving named IANA event timezones when RFC3339 inputs only carry numeric offsets. (#422)
|
||||
- Drive: add `drive search --drive` and `--parent` for scoping search to a shared drive or folder. (#525) — thanks @LeanSheng.
|
||||
- Docs: add experimental `docs export --tab` / `drive download --tab` to export a single Google Docs tab as PDF, DOCX, text, Markdown, or HTML. (#535) — thanks @johnbenjaminlewis.
|
||||
|
||||
17
README.md
17
README.md
@ -749,6 +749,7 @@ gog backup status
|
||||
gog backup verify
|
||||
gog backup cat data/gmail/<account-hash>/labels.jsonl.gz.age --pretty
|
||||
gog backup export --out ~/Documents/gog-backup-export
|
||||
gog backup export --no-pull --out ~/Library/CloudStorage/Dropbox/backup/gog --gmail-format markdown
|
||||
```
|
||||
|
||||
For a bounded first run:
|
||||
@ -789,12 +790,16 @@ Optional Workspace-only services use `--best-effort` by default, recording
|
||||
permission/auth errors as encrypted error shards instead of stopping the run.
|
||||
|
||||
Use `gog backup cat` to decrypt one shard as JSONL, or `gog backup export` to
|
||||
write a local plaintext copy. The export writes Gmail messages as `.eml` files,
|
||||
plus `gmail/<account-hash>/messages/index.jsonl` and pretty `labels.json`.
|
||||
Drive contents export as normal files under `drive/<account-hash>/files/` with
|
||||
an `index.jsonl`; other services export as verified JSONL under `raw/`.
|
||||
That export is intentionally unencrypted; keep it out of Git, shared folders,
|
||||
and cloud sync unless that is intentional.
|
||||
write a local plaintext copy. By default Gmail messages export as `.eml` files.
|
||||
Use `--gmail-format markdown` for a readable mirror with `message.md` files and
|
||||
extracted `attachments/` folders, or `--gmail-format both` to keep Markdown and
|
||||
`.eml` side by side. `--gmail-attachments none` keeps Markdown notes without
|
||||
writing attachment files. Drive contents export as normal files under
|
||||
`drive/<account-hash>/files/` with an `index.jsonl`; other services export as
|
||||
verified JSONL under `raw/`. That export is intentionally unencrypted; keep it
|
||||
out of Git, shared folders, and cloud sync unless that is intentional.
|
||||
Use `--no-pull` when exporting from a local backup repository that another
|
||||
process is already updating.
|
||||
|
||||
`manifest.json` is intentionally cleartext for cheap status and verification.
|
||||
It exposes metadata: export time, service names, account hashes, shard paths,
|
||||
|
||||
@ -64,6 +64,7 @@ Write an unencrypted local copy for easy reading on the Mac:
|
||||
|
||||
```bash
|
||||
gog backup export --out ~/Documents/gog-backup-export
|
||||
gog backup export --no-pull --out ~/Library/CloudStorage/Dropbox/backup/gog --gmail-format markdown
|
||||
```
|
||||
|
||||
Use `--no-push` on `init` or `push` to commit locally without pushing to the
|
||||
@ -166,17 +167,24 @@ manifest.json
|
||||
gmail/<account-hash>/labels.json
|
||||
gmail/<account-hash>/messages/index.jsonl
|
||||
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<message-id>.eml
|
||||
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<subject>-<message-id>/message.md
|
||||
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<subject>-<message-id>/attachments/<filename>
|
||||
drive/<account-hash>/files/index.jsonl
|
||||
drive/<account-hash>/files/<file-id>/<exported-file>
|
||||
raw/<service>/...
|
||||
```
|
||||
|
||||
`gog backup export` decrypts and verifies the manifest-backed shards before
|
||||
writing files. Gmail messages become `.eml` files that open in Mail and other
|
||||
mail clients. Drive content shards become normal files plus an index. Other
|
||||
writing files. Gmail messages become `.eml` files by default. Use
|
||||
`--gmail-format markdown` for `message.md` files with YAML metadata and
|
||||
extracted `attachments/` folders, or `--gmail-format both` to write Markdown and
|
||||
`.eml` side by side. `--gmail-attachments none` keeps Markdown notes but skips
|
||||
attachment files. Drive content shards become normal files plus an index. Other
|
||||
services are written as verified JSONL under `raw/`. The export is not
|
||||
encrypted; do not place it inside the backup Git repository, and keep it out of
|
||||
synced/shared folders unless that is intentional.
|
||||
Use `--no-pull` when exporting from a local backup repository that another
|
||||
process is already updating.
|
||||
|
||||
## Encryption
|
||||
|
||||
|
||||
@ -28,6 +28,7 @@ type Options struct {
|
||||
Identity string
|
||||
Recipients []string
|
||||
Push bool
|
||||
SkipPull bool
|
||||
AsyncPush bool
|
||||
PushQueueLimit int
|
||||
Progress func(format string, args ...any)
|
||||
|
||||
@ -14,8 +14,13 @@ func Cat(ctx context.Context, opts Options, shardPath string) (PlainShard, error
|
||||
if err != nil {
|
||||
return PlainShard{}, err
|
||||
}
|
||||
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
|
||||
return PlainShard{}, repoErr
|
||||
if !opts.SkipPull {
|
||||
repoErr := ensureRepo(ctx, cfg)
|
||||
if repoErr != nil {
|
||||
return PlainShard{}, repoErr
|
||||
}
|
||||
} else if strings.TrimSpace(cfg.Repo) == "" {
|
||||
return PlainShard{}, fmt.Errorf("backup repo path is required")
|
||||
}
|
||||
manifest, err := readManifest(cfg.Repo)
|
||||
if err != nil {
|
||||
@ -36,8 +41,13 @@ func DecryptSnapshot(ctx context.Context, opts Options) (Manifest, string, []Pla
|
||||
if err != nil {
|
||||
return Manifest{}, "", nil, err
|
||||
}
|
||||
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
|
||||
return Manifest{}, "", nil, repoErr
|
||||
if !opts.SkipPull {
|
||||
repoErr := ensureRepo(ctx, cfg)
|
||||
if repoErr != nil {
|
||||
return Manifest{}, "", nil, repoErr
|
||||
}
|
||||
} else if strings.TrimSpace(cfg.Repo) == "" {
|
||||
return Manifest{}, "", nil, fmt.Errorf("backup repo path is required")
|
||||
}
|
||||
manifest, err := readManifest(cfg.Repo)
|
||||
if err != nil {
|
||||
|
||||
@ -71,6 +71,7 @@ type backupReadFlags struct {
|
||||
Repo string `name:"repo" help:"Local backup repository path"`
|
||||
Remote string `name:"remote" help:"Backup Git remote URL"`
|
||||
Identity string `name:"identity" help:"Local age identity path"`
|
||||
NoPull bool `name:"no-pull" help:"Use local backup repository state without pulling first"`
|
||||
}
|
||||
|
||||
func (f backupReadFlags) options() backup.Options {
|
||||
@ -80,6 +81,7 @@ func (f backupReadFlags) options() backup.Options {
|
||||
Remote: f.Remote,
|
||||
Identity: f.Identity,
|
||||
Push: false,
|
||||
SkipPull: f.NoPull,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -53,7 +53,9 @@ func (c *BackupCatCmd) Run(ctx context.Context) error {
|
||||
|
||||
type BackupExportCmd struct {
|
||||
backupReadFlags
|
||||
Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"`
|
||||
Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"`
|
||||
GmailFormat string `name:"gmail-format" help:"Gmail message export format: eml, markdown, or both" default:"eml" enum:"eml,markdown,both"`
|
||||
GmailAttachments string `name:"gmail-attachments" help:"Gmail attachment export mode for markdown/both: extract or none" default:"extract" enum:"extract,none"`
|
||||
}
|
||||
|
||||
type backupExportResult struct {
|
||||
@ -64,14 +66,9 @@ type backupExportResult struct {
|
||||
Counts map[string]int `json:"counts"`
|
||||
}
|
||||
|
||||
type gmailExportIndexEntry struct {
|
||||
ID string `json:"id"`
|
||||
ThreadID string `json:"threadId,omitempty"`
|
||||
HistoryID string `json:"historyId,omitempty"`
|
||||
InternalDate int64 `json:"internalDate,omitempty"`
|
||||
LabelIDs []string `json:"labelIds,omitempty"`
|
||||
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
|
||||
EML string `json:"eml"`
|
||||
type backupExportOptions struct {
|
||||
GmailFormat string
|
||||
GmailAttachments string
|
||||
}
|
||||
|
||||
func (c *BackupExportCmd) Run(ctx context.Context) error {
|
||||
@ -101,11 +98,15 @@ func (c *BackupExportCmd) Run(ctx context.Context) error {
|
||||
if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil {
|
||||
return manifestErr
|
||||
}
|
||||
if resetErr := resetExportIndexes(outDir, shards); resetErr != nil {
|
||||
exportOpts := backupExportOptions{
|
||||
GmailFormat: c.GmailFormat,
|
||||
GmailAttachments: c.GmailAttachments,
|
||||
}
|
||||
if resetErr := resetExportTargets(outDir, shards); resetErr != nil {
|
||||
return resetErr
|
||||
}
|
||||
for _, shard := range shards {
|
||||
_, count, shardErr := exportPlainShard(outDir, shard)
|
||||
_, count, shardErr := exportPlainShard(outDir, shard, exportOpts)
|
||||
if shardErr != nil {
|
||||
return shardErr
|
||||
}
|
||||
@ -205,24 +206,24 @@ func ensureExportOutsideRepo(outDir, repo string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func resetExportIndexes(outDir string, shards []backup.PlainShard) error {
|
||||
func resetExportTargets(outDir string, shards []backup.PlainShard) error {
|
||||
seen := map[string]struct{}{}
|
||||
for _, shard := range shards {
|
||||
index := ""
|
||||
target := ""
|
||||
switch {
|
||||
case shard.Service == backupServiceGmail && shard.Kind == "messages":
|
||||
index = filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages", "index.jsonl")
|
||||
target = filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages")
|
||||
case shard.Service == backupServiceDrive && shard.Kind == "contents":
|
||||
index = filepath.Join(outDir, backupServiceDrive, sanitizeFilePart(shard.Account), "files", "index.jsonl")
|
||||
target = filepath.Join(outDir, backupServiceDrive, sanitizeFilePart(shard.Account), "files", "index.jsonl")
|
||||
}
|
||||
if index == "" {
|
||||
if target == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[index]; ok {
|
||||
if _, ok := seen[target]; ok {
|
||||
continue
|
||||
}
|
||||
seen[index] = struct{}{}
|
||||
if err := os.Remove(index); err != nil && !os.IsNotExist(err) {
|
||||
seen[target] = struct{}{}
|
||||
if err := os.RemoveAll(target); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -235,9 +236,10 @@ func writeBackupExportReadme(outDir string) error {
|
||||
"This directory is an unencrypted local copy created by `gog backup export`.\n" +
|
||||
"Keep it out of Git, shared folders, and cloud sync unless that is intentional.\n" +
|
||||
"\n" +
|
||||
"Gmail messages are written as `.eml` files that can be opened by Mail and many\n" +
|
||||
"mail clients. `gmail/<account>/messages/index.jsonl` maps backup message IDs\n" +
|
||||
"to the exported `.eml` files. Labels are written as pretty JSON.\n"
|
||||
"Gmail messages are written according to `--gmail-format`: `.eml` by default,\n" +
|
||||
"Markdown notes with extracted attachment files when `--gmail-format markdown`,\n" +
|
||||
"or both when `--gmail-format both`. `gmail/<account>/messages/index.jsonl`\n" +
|
||||
"maps backup message IDs to exported files. Labels are written as pretty JSON.\n"
|
||||
return os.WriteFile(filepath.Join(outDir, "README.md"), []byte(body), 0o600)
|
||||
}
|
||||
|
||||
@ -253,14 +255,14 @@ func writeJSONFile(path string, value any) error {
|
||||
return os.WriteFile(path, data, 0o600)
|
||||
}
|
||||
|
||||
func exportPlainShard(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
func exportPlainShard(outDir string, shard backup.PlainShard, opts backupExportOptions) (int, int, error) {
|
||||
switch {
|
||||
case shard.Service == backupServiceDrive && shard.Kind == "contents":
|
||||
return exportDriveContents(outDir, shard)
|
||||
case shard.Service == backupServiceGmail && shard.Kind == "labels":
|
||||
return exportGmailLabels(outDir, shard)
|
||||
case shard.Service == backupServiceGmail && shard.Kind == "messages":
|
||||
return exportGmailMessages(outDir, shard)
|
||||
return exportGmailMessages(outDir, shard, opts)
|
||||
default:
|
||||
return exportRawShard(outDir, shard)
|
||||
}
|
||||
@ -321,65 +323,6 @@ func exportDriveContents(outDir string, shard backup.PlainShard) (int, int, erro
|
||||
return files + 1, len(rows), nil
|
||||
}
|
||||
|
||||
func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
var labels []gmailBackupLabel
|
||||
if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json")
|
||||
if err := writeJSONFile(path, labels); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return 1, len(labels), nil
|
||||
}
|
||||
|
||||
func exportGmailMessages(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
var messages []gmailBackupMessage
|
||||
if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
account := sanitizeFilePart(shard.Account)
|
||||
indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl")
|
||||
if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account.
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer indexFile.Close()
|
||||
enc := json.NewEncoder(indexFile)
|
||||
enc.SetEscapeHTML(false)
|
||||
files := 0
|
||||
for _, message := range messages {
|
||||
mime, err := decodeGmailRaw(message.Raw)
|
||||
if err != nil {
|
||||
return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err)
|
||||
}
|
||||
rel := backupExportMessagePath(account, message)
|
||||
path := filepath.Join(outDir, filepath.FromSlash(rel))
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
if err := os.WriteFile(path, mime, 0o600); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
files++
|
||||
if err := enc.Encode(gmailExportIndexEntry{
|
||||
ID: message.ID,
|
||||
ThreadID: message.ThreadID,
|
||||
HistoryID: message.HistoryID,
|
||||
InternalDate: message.InternalDate,
|
||||
LabelIDs: message.LabelIDs,
|
||||
SizeEstimate: message.SizeEstimate,
|
||||
EML: rel,
|
||||
}); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
}
|
||||
return files + 1, len(messages), nil
|
||||
}
|
||||
|
||||
func exportRawShard(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
rel := strings.TrimSuffix(shard.Path, ".gz.age")
|
||||
path := filepath.Join(outDir, "raw", filepath.FromSlash(rel))
|
||||
@ -406,29 +349,6 @@ func countExportFiles(outDir string) (int, error) {
|
||||
return count, err
|
||||
}
|
||||
|
||||
func decodeGmailRaw(raw string) ([]byte, error) {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return nil, fmt.Errorf("empty raw payload")
|
||||
}
|
||||
if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil {
|
||||
return data, nil
|
||||
}
|
||||
return base64.URLEncoding.DecodeString(raw)
|
||||
}
|
||||
|
||||
func backupExportMessagePath(account string, message gmailBackupMessage) string {
|
||||
timestamp := trackingUnknown
|
||||
yearMonth := trackingUnknown
|
||||
if message.InternalDate > 0 {
|
||||
t := time.UnixMilli(message.InternalDate).UTC()
|
||||
timestamp = t.Format("20060102T150405Z")
|
||||
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
|
||||
}
|
||||
name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml"
|
||||
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
|
||||
}
|
||||
|
||||
func sanitizeFilePart(value string) string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
|
||||
500
internal/cmd/backup_export_gmail.go
Normal file
500
internal/cmd/backup_export_gmail.go
Normal file
@ -0,0 +1,500 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
stdhtml "html"
|
||||
"io"
|
||||
"mime"
|
||||
"mime/multipart"
|
||||
"net/mail"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/steipete/gogcli/internal/backup"
|
||||
)
|
||||
|
||||
type gmailExportIndexEntry struct {
|
||||
ID string `json:"id"`
|
||||
ThreadID string `json:"threadId,omitempty"`
|
||||
HistoryID string `json:"historyId,omitempty"`
|
||||
InternalDate int64 `json:"internalDate,omitempty"`
|
||||
LabelIDs []string `json:"labelIds,omitempty"`
|
||||
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
|
||||
Subject string `json:"subject,omitempty"`
|
||||
From string `json:"from,omitempty"`
|
||||
To []string `json:"to,omitempty"`
|
||||
Cc []string `json:"cc,omitempty"`
|
||||
Date string `json:"date,omitempty"`
|
||||
EML string `json:"eml,omitempty"`
|
||||
Markdown string `json:"markdown,omitempty"`
|
||||
Attachments []string `json:"attachments,omitempty"`
|
||||
}
|
||||
|
||||
type backupEmail struct {
|
||||
Subject string
|
||||
From string
|
||||
To []string
|
||||
Cc []string
|
||||
Date string
|
||||
TextBody string
|
||||
HTMLBody string
|
||||
Attachments []backupEmailAttachment
|
||||
}
|
||||
|
||||
type backupEmailAttachment struct {
|
||||
Filename string
|
||||
Data []byte
|
||||
}
|
||||
|
||||
func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) {
|
||||
var labels []gmailBackupLabel
|
||||
if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json")
|
||||
if err := writeJSONFile(path, labels); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return 1, len(labels), nil
|
||||
}
|
||||
|
||||
func exportGmailMessages(outDir string, shard backup.PlainShard, opts backupExportOptions) (int, int, error) {
|
||||
var messages []gmailBackupMessage
|
||||
if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
gmailFormat := strings.ToLower(strings.TrimSpace(opts.GmailFormat))
|
||||
if gmailFormat == "" {
|
||||
gmailFormat = "eml"
|
||||
}
|
||||
attachmentsMode := strings.ToLower(strings.TrimSpace(opts.GmailAttachments))
|
||||
if attachmentsMode == "" {
|
||||
attachmentsMode = "extract"
|
||||
}
|
||||
account := sanitizeFilePart(shard.Account)
|
||||
indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl")
|
||||
if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account.
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer indexFile.Close()
|
||||
enc := json.NewEncoder(indexFile)
|
||||
enc.SetEscapeHTML(false)
|
||||
files := 0
|
||||
for _, message := range messages {
|
||||
rawMIME, err := decodeGmailRaw(message.Raw)
|
||||
if err != nil {
|
||||
return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err)
|
||||
}
|
||||
parsed, parseErr := parseBackupEmail(rawMIME)
|
||||
if parseErr != nil && gmailFormat != "eml" {
|
||||
return files, 0, fmt.Errorf("parse Gmail MIME %s: %w", message.ID, parseErr)
|
||||
}
|
||||
entry := gmailExportIndexEntry{
|
||||
ID: message.ID,
|
||||
ThreadID: message.ThreadID,
|
||||
HistoryID: message.HistoryID,
|
||||
InternalDate: message.InternalDate,
|
||||
LabelIDs: message.LabelIDs,
|
||||
SizeEstimate: message.SizeEstimate,
|
||||
Subject: parsed.Subject,
|
||||
From: parsed.From,
|
||||
To: parsed.To,
|
||||
Cc: parsed.Cc,
|
||||
Date: parsed.Date,
|
||||
}
|
||||
if gmailFormat == "eml" || gmailFormat == "both" {
|
||||
rel := backupExportMessageEMLPath(account, message)
|
||||
path := filepath.Join(outDir, filepath.FromSlash(rel))
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
if err := os.WriteFile(path, rawMIME, 0o600); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
files++
|
||||
entry.EML = rel
|
||||
}
|
||||
if gmailFormat == "markdown" || gmailFormat == "both" {
|
||||
rel, attachmentRels, written, err := exportGmailMarkdownMessage(outDir, account, message, parsed, attachmentsMode == "extract")
|
||||
if err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
files += written
|
||||
entry.Markdown = rel
|
||||
entry.Attachments = attachmentRels
|
||||
}
|
||||
if err := enc.Encode(entry); err != nil {
|
||||
return files, 0, err
|
||||
}
|
||||
}
|
||||
return files + 1, len(messages), nil
|
||||
}
|
||||
|
||||
func decodeGmailRaw(raw string) ([]byte, error) {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return nil, fmt.Errorf("empty raw payload")
|
||||
}
|
||||
if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil {
|
||||
return data, nil
|
||||
}
|
||||
return base64.URLEncoding.DecodeString(raw)
|
||||
}
|
||||
|
||||
func backupExportMessageEMLPath(account string, message gmailBackupMessage) string {
|
||||
timestamp := trackingUnknown
|
||||
yearMonth := trackingUnknown
|
||||
if message.InternalDate > 0 {
|
||||
t := time.UnixMilli(message.InternalDate).UTC()
|
||||
timestamp = t.Format("20060102T150405Z")
|
||||
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
|
||||
}
|
||||
name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml"
|
||||
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
|
||||
}
|
||||
|
||||
func backupExportMessageDir(account string, message gmailBackupMessage, subject string) string {
|
||||
timestamp := trackingUnknown
|
||||
yearMonth := trackingUnknown
|
||||
if message.InternalDate > 0 {
|
||||
t := time.UnixMilli(message.InternalDate).UTC()
|
||||
timestamp = t.Format("20060102T150405Z")
|
||||
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
|
||||
}
|
||||
subjectPart := truncateFilePart(sanitizeFilePart(subject), 72)
|
||||
if subjectPart == trackingUnknown {
|
||||
subjectPart = "no-subject"
|
||||
}
|
||||
name := timestamp + "-" + subjectPart + "-" + sanitizeFilePart(message.ID)
|
||||
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
|
||||
}
|
||||
|
||||
func exportGmailMarkdownMessage(outDir, account string, message gmailBackupMessage, parsed backupEmail, extractAttachments bool) (string, []string, int, error) {
|
||||
messageDirRel := backupExportMessageDir(account, message, parsed.Subject)
|
||||
messageDir := filepath.Join(outDir, filepath.FromSlash(messageDirRel))
|
||||
if err := os.MkdirAll(messageDir, 0o700); err != nil {
|
||||
return "", nil, 0, err
|
||||
}
|
||||
var attachmentRels []string
|
||||
files := 0
|
||||
if extractAttachments {
|
||||
seen := map[string]int{}
|
||||
for i, attachment := range parsed.Attachments {
|
||||
filename := sanitizeBackupAttachmentFilename(attachment.Filename, i+1)
|
||||
filename = uniqueExportFilename(seen, filename)
|
||||
rel := filepath.ToSlash(filepath.Join(messageDirRel, "attachments", filename))
|
||||
path := filepath.Join(outDir, filepath.FromSlash(rel))
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return "", nil, files, err
|
||||
}
|
||||
if err := os.WriteFile(path, attachment.Data, 0o600); err != nil {
|
||||
return "", nil, files, err
|
||||
}
|
||||
attachmentRels = append(attachmentRels, rel)
|
||||
files++
|
||||
}
|
||||
}
|
||||
body := backupEmailMarkdownBody(parsed)
|
||||
md := renderGmailMessageMarkdown(message, parsed, body, attachmentRels)
|
||||
rel := filepath.ToSlash(filepath.Join(messageDirRel, "message.md"))
|
||||
path := filepath.Join(outDir, filepath.FromSlash(rel))
|
||||
if err := os.WriteFile(path, []byte(md), 0o600); err != nil {
|
||||
return "", nil, files, err
|
||||
}
|
||||
files++
|
||||
return rel, attachmentRels, files, nil
|
||||
}
|
||||
|
||||
func backupEmailMarkdownBody(parsed backupEmail) string {
|
||||
if strings.TrimSpace(parsed.TextBody) != "" {
|
||||
return backupEmailMarkdownText(parsed.TextBody)
|
||||
}
|
||||
if strings.TrimSpace(parsed.HTMLBody) != "" {
|
||||
return cleanBackupHTMLBody(parsed.HTMLBody)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func backupEmailMarkdownText(value string) string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
if looksLikeHTML(value) || looksLikeHTMLFragment(value) {
|
||||
return cleanBackupHTMLBody(value)
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func cleanBackupHTMLBody(value string) string {
|
||||
cleaned := stdhtml.UnescapeString(stripHTMLTags(value))
|
||||
return strings.Join(strings.Fields(cleaned), " ")
|
||||
}
|
||||
|
||||
func looksLikeHTMLFragment(value string) bool {
|
||||
trimmed := strings.ToLower(strings.TrimSpace(value))
|
||||
if trimmed == "" {
|
||||
return false
|
||||
}
|
||||
for _, marker := range []string{
|
||||
"<p", "</p", "<br", "<div", "</div", "<span", "</span", "<table", "</table",
|
||||
"<tr", "</tr", "<td", "</td", "<section", "</section", "<blockquote",
|
||||
"</blockquote", "<a ", "</a", "<img", "<font", "</font", "<style", "<!--",
|
||||
} {
|
||||
if strings.Contains(trimmed, marker) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func renderGmailMessageMarkdown(message gmailBackupMessage, parsed backupEmail, body string, attachmentRels []string) string {
|
||||
var b strings.Builder
|
||||
b.WriteString("---\n")
|
||||
writeYAMLScalar(&b, "gmail_id", message.ID)
|
||||
writeYAMLScalar(&b, "thread_id", message.ThreadID)
|
||||
writeYAMLScalar(&b, "history_id", message.HistoryID)
|
||||
if message.InternalDate > 0 {
|
||||
writeYAMLScalar(&b, "internal_date", time.UnixMilli(message.InternalDate).UTC().Format(time.RFC3339))
|
||||
}
|
||||
writeYAMLScalar(&b, "date", parsed.Date)
|
||||
writeYAMLScalar(&b, "from", parsed.From)
|
||||
writeYAMLList(&b, "to", parsed.To)
|
||||
writeYAMLList(&b, "cc", parsed.Cc)
|
||||
writeYAMLScalar(&b, "subject", parsed.Subject)
|
||||
writeYAMLList(&b, "labels", message.LabelIDs)
|
||||
if message.SizeEstimate > 0 {
|
||||
fmt.Fprintf(&b, "size_estimate: %d\n", message.SizeEstimate)
|
||||
}
|
||||
writeYAMLList(&b, "attachments", attachmentRels)
|
||||
b.WriteString("---\n\n")
|
||||
if strings.TrimSpace(parsed.Subject) != "" {
|
||||
b.WriteString("# ")
|
||||
b.WriteString(markdownHeadingText(parsed.Subject))
|
||||
b.WriteString("\n\n")
|
||||
}
|
||||
if strings.TrimSpace(body) != "" {
|
||||
b.WriteString(strings.TrimSpace(body))
|
||||
b.WriteString("\n")
|
||||
} else {
|
||||
b.WriteString("_No text body found._\n")
|
||||
}
|
||||
if len(attachmentRels) > 0 {
|
||||
b.WriteString("\n## Attachments\n\n")
|
||||
for _, rel := range attachmentRels {
|
||||
name := filepath.Base(rel)
|
||||
b.WriteString("- [")
|
||||
b.WriteString(markdownLinkText(name))
|
||||
b.WriteString("](")
|
||||
b.WriteString("attachments/")
|
||||
b.WriteString(markdownLinkTarget(name))
|
||||
b.WriteString(")\n")
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func parseBackupEmail(rawMIME []byte) (backupEmail, error) {
|
||||
msg, err := mail.ReadMessage(bytes.NewReader(rawMIME))
|
||||
if err != nil {
|
||||
return backupEmail{}, err
|
||||
}
|
||||
out := backupEmail{
|
||||
Subject: decodeMIMEHeader(msg.Header.Get("Subject")),
|
||||
From: decodeMIMEHeader(msg.Header.Get("From")),
|
||||
Date: decodeMIMEHeader(msg.Header.Get("Date")),
|
||||
To: parseAddressHeader(msg.Header.Get("To")),
|
||||
Cc: parseAddressHeader(msg.Header.Get("Cc")),
|
||||
}
|
||||
body, err := io.ReadAll(msg.Body)
|
||||
if err != nil {
|
||||
return backupEmail{}, err
|
||||
}
|
||||
if err := parseBackupEmailEntity(body, string(msg.Header.Get("Content-Type")), string(msg.Header.Get("Content-Transfer-Encoding")), &out); err != nil {
|
||||
return backupEmail{}, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func parseBackupEmailEntity(body []byte, contentType, transferEncoding string, out *backupEmail) error {
|
||||
mediaType, params, err := mime.ParseMediaType(contentType)
|
||||
if err != nil || strings.TrimSpace(mediaType) == "" {
|
||||
mediaType = "text/plain"
|
||||
}
|
||||
mediaType = strings.ToLower(mediaType)
|
||||
if strings.HasPrefix(mediaType, "multipart/") {
|
||||
boundary := params["boundary"]
|
||||
if strings.TrimSpace(boundary) == "" {
|
||||
return nil
|
||||
}
|
||||
reader := multipart.NewReader(bytes.NewReader(body), boundary)
|
||||
for {
|
||||
part, partErr := reader.NextPart()
|
||||
if partErr == io.EOF {
|
||||
break
|
||||
}
|
||||
if partErr != nil {
|
||||
return partErr
|
||||
}
|
||||
partBody, readErr := io.ReadAll(part)
|
||||
_ = part.Close()
|
||||
if readErr != nil {
|
||||
return readErr
|
||||
}
|
||||
partContentType := part.Header.Get("Content-Type")
|
||||
partEncoding := part.Header.Get("Content-Transfer-Encoding")
|
||||
if isBackupEmailAttachment(part.Header.Get("Content-Disposition"), partContentType) {
|
||||
decoded := decodeTransferEncoding(partBody, partEncoding)
|
||||
filename := backupAttachmentFilename(part.Header.Get("Content-Disposition"), partContentType)
|
||||
out.Attachments = append(out.Attachments, backupEmailAttachment{
|
||||
Filename: filename,
|
||||
Data: decoded,
|
||||
})
|
||||
continue
|
||||
}
|
||||
if err := parseBackupEmailEntity(partBody, partContentType, partEncoding, out); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
decoded := decodeTransferEncoding(body, transferEncoding)
|
||||
decoded = decodeBodyCharset(decoded, contentType)
|
||||
switch mediaType {
|
||||
case "text/plain":
|
||||
if strings.TrimSpace(out.TextBody) == "" {
|
||||
out.TextBody = string(decoded)
|
||||
}
|
||||
case "text/html":
|
||||
if strings.TrimSpace(out.HTMLBody) == "" {
|
||||
out.HTMLBody = string(decoded)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isBackupEmailAttachment(contentDisposition, contentType string) bool {
|
||||
disposition, dispParams, _ := mime.ParseMediaType(contentDisposition)
|
||||
if strings.EqualFold(disposition, "attachment") {
|
||||
return true
|
||||
}
|
||||
if strings.EqualFold(disposition, "inline") && strings.TrimSpace(dispParams["filename"]) != "" {
|
||||
return true
|
||||
}
|
||||
_, typeParams, _ := mime.ParseMediaType(contentType)
|
||||
return strings.TrimSpace(typeParams["name"]) != ""
|
||||
}
|
||||
|
||||
func backupAttachmentFilename(contentDisposition, contentType string) string {
|
||||
_, dispParams, _ := mime.ParseMediaType(contentDisposition)
|
||||
if filename := decodeMIMEHeader(dispParams["filename"]); strings.TrimSpace(filename) != "" {
|
||||
return filename
|
||||
}
|
||||
_, typeParams, _ := mime.ParseMediaType(contentType)
|
||||
if filename := decodeMIMEHeader(typeParams["name"]); strings.TrimSpace(filename) != "" {
|
||||
return filename
|
||||
}
|
||||
return "attachment"
|
||||
}
|
||||
|
||||
func decodeMIMEHeader(value string) string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
decoded, err := (&mime.WordDecoder{}).DecodeHeader(value)
|
||||
if err == nil {
|
||||
return strings.TrimSpace(decoded)
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func parseAddressHeader(value string) []string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
addrs, err := mail.ParseAddressList(value)
|
||||
if err != nil {
|
||||
return []string{decodeMIMEHeader(value)}
|
||||
}
|
||||
out := make([]string, 0, len(addrs))
|
||||
for _, addr := range addrs {
|
||||
out = append(out, addr.String())
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func writeYAMLScalar(b *strings.Builder, key, value string) {
|
||||
if strings.TrimSpace(value) == "" {
|
||||
return
|
||||
}
|
||||
fmt.Fprintf(b, "%s: %q\n", key, value)
|
||||
}
|
||||
|
||||
func writeYAMLList(b *strings.Builder, key string, values []string) {
|
||||
if len(values) == 0 {
|
||||
return
|
||||
}
|
||||
fmt.Fprintf(b, "%s:\n", key)
|
||||
for _, value := range values {
|
||||
fmt.Fprintf(b, " - %q\n", value)
|
||||
}
|
||||
}
|
||||
|
||||
func markdownHeadingText(value string) string {
|
||||
value = strings.ReplaceAll(value, "\r", " ")
|
||||
value = strings.ReplaceAll(value, "\n", " ")
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func markdownLinkText(value string) string {
|
||||
value = strings.ReplaceAll(value, "[", "\\[")
|
||||
value = strings.ReplaceAll(value, "]", "\\]")
|
||||
return value
|
||||
}
|
||||
|
||||
func markdownLinkTarget(value string) string {
|
||||
value = strings.ReplaceAll(value, " ", "%20")
|
||||
value = strings.ReplaceAll(value, "(", "%28")
|
||||
value = strings.ReplaceAll(value, ")", "%29")
|
||||
return value
|
||||
}
|
||||
|
||||
func sanitizeBackupAttachmentFilename(value string, fallbackIndex int) string {
|
||||
value = filepath.Base(strings.TrimSpace(value))
|
||||
if value == "" || value == "." || value == ".." {
|
||||
value = fmt.Sprintf("attachment-%03d", fallbackIndex)
|
||||
}
|
||||
return sanitizeFilePart(value)
|
||||
}
|
||||
|
||||
func uniqueExportFilename(seen map[string]int, filename string) string {
|
||||
if filename == "" {
|
||||
filename = "attachment"
|
||||
}
|
||||
count := seen[filename]
|
||||
seen[filename] = count + 1
|
||||
if count == 0 {
|
||||
return filename
|
||||
}
|
||||
ext := filepath.Ext(filename)
|
||||
base := strings.TrimSuffix(filename, ext)
|
||||
return fmt.Sprintf("%s-%d%s", base, count+1, ext)
|
||||
}
|
||||
|
||||
func truncateFilePart(value string, limit int) string {
|
||||
if limit <= 0 || len(value) <= limit {
|
||||
return value
|
||||
}
|
||||
return strings.Trim(value[:limit], "._-")
|
||||
}
|
||||
148
internal/cmd/backup_export_gmail_test.go
Normal file
148
internal/cmd/backup_export_gmail_test.go
Normal file
@ -0,0 +1,148 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/steipete/gogcli/internal/backup"
|
||||
)
|
||||
|
||||
func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) {
|
||||
payload := []byte("Subject: Hello\r\n\r\nBody")
|
||||
raw := base64.RawURLEncoding.EncodeToString(payload)
|
||||
got, err := decodeGmailRaw(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeGmailRaw raw: %v", err)
|
||||
}
|
||||
if string(got) != string(payload) {
|
||||
t.Fatalf("raw decoded = %q, want %q", got, payload)
|
||||
}
|
||||
|
||||
padded := base64.URLEncoding.EncodeToString(payload)
|
||||
got, err = decodeGmailRaw(padded)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeGmailRaw padded: %v", err)
|
||||
}
|
||||
if string(got) != string(payload) {
|
||||
t.Fatalf("padded decoded = %q, want %q", got, payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) {
|
||||
outDir := t.TempDir()
|
||||
payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody")
|
||||
message := gmailBackupMessage{
|
||||
ID: "msg/one",
|
||||
ThreadID: "thread-1",
|
||||
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
|
||||
LabelIDs: []string{"INBOX"},
|
||||
Raw: base64.RawURLEncoding.EncodeToString(payload),
|
||||
}
|
||||
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
|
||||
if err != nil {
|
||||
t.Fatalf("NewJSONLShard: %v", err)
|
||||
}
|
||||
|
||||
files, count, err := exportGmailMessages(outDir, shard, backupExportOptions{GmailFormat: "eml"})
|
||||
if err != nil {
|
||||
t.Fatalf("exportGmailMessages: %v", err)
|
||||
}
|
||||
if files != 2 || count != 1 {
|
||||
t.Fatalf("files,count = %d,%d want 2,1", files, count)
|
||||
}
|
||||
|
||||
emlRel := backupExportMessageEMLPath("acct_hash", message)
|
||||
eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel)))
|
||||
if err != nil {
|
||||
t.Fatalf("read eml: %v", err)
|
||||
}
|
||||
if string(eml) != string(payload) {
|
||||
t.Fatalf("eml = %q, want %q", eml, payload)
|
||||
}
|
||||
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
|
||||
if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) {
|
||||
t.Fatalf("index missing expected fields: %s", index)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportGmailMessagesWritesMarkdownAndAttachments(t *testing.T) {
|
||||
outDir := t.TempDir()
|
||||
payload := strings.Join([]string{
|
||||
"Subject: Report",
|
||||
"From: Alice <alice@example.com>",
|
||||
"To: Peter <peter@example.com>",
|
||||
"Date: Thu, 02 Apr 2026 10:00:00 +0000",
|
||||
"MIME-Version: 1.0",
|
||||
`Content-Type: multipart/mixed; boundary="b1"`,
|
||||
"",
|
||||
"--b1",
|
||||
"Content-Type: text/plain; charset=utf-8",
|
||||
"",
|
||||
"Body text.",
|
||||
"--b1",
|
||||
"Content-Type: application/pdf",
|
||||
"Content-Transfer-Encoding: base64",
|
||||
`Content-Disposition: attachment; filename="report.pdf"`,
|
||||
"",
|
||||
base64.StdEncoding.EncodeToString([]byte("pdf bytes")),
|
||||
"--b1--",
|
||||
"",
|
||||
}, "\r\n")
|
||||
message := gmailBackupMessage{
|
||||
ID: "msg/one",
|
||||
ThreadID: "thread-1",
|
||||
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
|
||||
LabelIDs: []string{"INBOX"},
|
||||
Raw: base64.RawURLEncoding.EncodeToString([]byte(payload)),
|
||||
}
|
||||
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
|
||||
if err != nil {
|
||||
t.Fatalf("NewJSONLShard: %v", err)
|
||||
}
|
||||
|
||||
files, count, err := exportGmailMessages(outDir, shard, backupExportOptions{GmailFormat: "markdown", GmailAttachments: "extract"})
|
||||
if err != nil {
|
||||
t.Fatalf("exportGmailMessages: %v", err)
|
||||
}
|
||||
if files != 3 || count != 1 {
|
||||
t.Fatalf("files,count = %d,%d want 3,1", files, count)
|
||||
}
|
||||
messageDir := backupExportMessageDir("acct_hash", message, "Report")
|
||||
mdRel := filepath.ToSlash(filepath.Join(messageDir, "message.md"))
|
||||
md := readText(t, filepath.Join(outDir, filepath.FromSlash(mdRel)))
|
||||
for _, want := range []string{
|
||||
`subject: "Report"`,
|
||||
"# Report",
|
||||
"Body text.",
|
||||
"- [report.pdf](attachments/report.pdf)",
|
||||
} {
|
||||
if !strings.Contains(md, want) {
|
||||
t.Fatalf("markdown missing %q:\n%s", want, md)
|
||||
}
|
||||
}
|
||||
attachment := readText(t, filepath.Join(outDir, filepath.FromSlash(filepath.Join(messageDir, "attachments", "report.pdf"))))
|
||||
if attachment != "pdf bytes" {
|
||||
t.Fatalf("attachment = %q", attachment)
|
||||
}
|
||||
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
|
||||
if !strings.Contains(index, `"markdown":"`+mdRel+`"`) ||
|
||||
!strings.Contains(index, `"attachments":["`+filepath.ToSlash(filepath.Join(messageDir, "attachments", "report.pdf"))+`"]`) ||
|
||||
strings.Contains(index, `"eml"`) {
|
||||
t.Fatalf("index missing expected markdown-only fields: %s", index)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupEmailMarkdownBodyCleansHTMLFragments(t *testing.T) {
|
||||
got := backupEmailMarkdownBody(backupEmail{TextBody: "<p>Hello <b>Peter</b></p>"})
|
||||
if got != "Hello Peter" {
|
||||
t.Fatalf("body = %q, want %q", got, "Hello Peter")
|
||||
}
|
||||
|
||||
got = backupEmailMarkdownBody(backupEmail{HTMLBody: "<html><body><p>Hi<br>there</p></body></html>"})
|
||||
if got != "Hi there" {
|
||||
t.Fatalf("html body = %q, want %q", got, "Hi there")
|
||||
}
|
||||
}
|
||||
@ -37,6 +37,16 @@ func TestBackupAccountHashStableAndOpaque(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupReadFlagsOptionsSkipPull(t *testing.T) {
|
||||
opts := backupReadFlags{NoPull: true}.options()
|
||||
if !opts.SkipPull {
|
||||
t.Fatal("SkipPull = false, want true")
|
||||
}
|
||||
if opts.Push {
|
||||
t.Fatal("Push = true, want false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildGmailMessageShardsBucketsSortsAndChunks(t *testing.T) {
|
||||
accountHash := "accthash"
|
||||
messages := []gmailBackupMessage{
|
||||
@ -632,64 +642,6 @@ func TestDownloadDriveBackupContentHonorsTimeout(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) {
|
||||
payload := []byte("Subject: Hello\r\n\r\nBody")
|
||||
raw := base64.RawURLEncoding.EncodeToString(payload)
|
||||
got, err := decodeGmailRaw(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeGmailRaw raw: %v", err)
|
||||
}
|
||||
if string(got) != string(payload) {
|
||||
t.Fatalf("raw decoded = %q, want %q", got, payload)
|
||||
}
|
||||
|
||||
padded := base64.URLEncoding.EncodeToString(payload)
|
||||
got, err = decodeGmailRaw(padded)
|
||||
if err != nil {
|
||||
t.Fatalf("decodeGmailRaw padded: %v", err)
|
||||
}
|
||||
if string(got) != string(payload) {
|
||||
t.Fatalf("padded decoded = %q, want %q", got, payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) {
|
||||
outDir := t.TempDir()
|
||||
payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody")
|
||||
message := gmailBackupMessage{
|
||||
ID: "msg/one",
|
||||
ThreadID: "thread-1",
|
||||
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
|
||||
LabelIDs: []string{"INBOX"},
|
||||
Raw: base64.RawURLEncoding.EncodeToString(payload),
|
||||
}
|
||||
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
|
||||
if err != nil {
|
||||
t.Fatalf("NewJSONLShard: %v", err)
|
||||
}
|
||||
|
||||
files, count, err := exportGmailMessages(outDir, shard)
|
||||
if err != nil {
|
||||
t.Fatalf("exportGmailMessages: %v", err)
|
||||
}
|
||||
if files != 2 || count != 1 {
|
||||
t.Fatalf("files,count = %d,%d want 2,1", files, count)
|
||||
}
|
||||
|
||||
emlRel := backupExportMessagePath("acct_hash", message)
|
||||
eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel)))
|
||||
if err != nil {
|
||||
t.Fatalf("read eml: %v", err)
|
||||
}
|
||||
if string(eml) != string(payload) {
|
||||
t.Fatalf("eml = %q, want %q", eml, payload)
|
||||
}
|
||||
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
|
||||
if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) {
|
||||
t.Fatalf("index missing expected fields: %s", index)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportDriveContentsWritesReadableFilesAndIndex(t *testing.T) {
|
||||
outDir := t.TempDir()
|
||||
row := driveBackupContent{
|
||||
|
||||
Loading…
Reference in New Issue
Block a user