feat(backup): add markdown Gmail export

This commit is contained in:
Peter Steinberger 2026-04-28 07:29:27 +01:00
parent d51e94a8ca
commit 1eaad2556c
No known key found for this signature in database
10 changed files with 723 additions and 176 deletions

View File

@ -10,6 +10,7 @@
- Backup: add Gmail message-list checkpoints, streaming shard construction, and stderr progress counters so full-mailbox backups can resume cleanly after interruption without keeping every raw message in RAM.
- Backup: push encrypted incomplete Gmail checkpoint commits during long cached fetches so day-scale mailbox backups have offsite progress before the final manifest is committed.
- Backup: push Gmail checkpoint commits through a single ordered background queue so cached fetches continue while GitHub uploads run.
- Backup: add `gog backup export --gmail-format markdown` for local readable Gmail mirrors with Markdown notes and extracted attachment files.
- Calendar: add `--start-timezone` / `--end-timezone` to `calendar create` and `calendar update` for preserving named IANA event timezones when RFC3339 inputs only carry numeric offsets. (#422)
- Drive: add `drive search --drive` and `--parent` for scoping search to a shared drive or folder. (#525) — thanks @LeanSheng.
- Docs: add experimental `docs export --tab` / `drive download --tab` to export a single Google Docs tab as PDF, DOCX, text, Markdown, or HTML. (#535) — thanks @johnbenjaminlewis.

View File

@ -749,6 +749,7 @@ gog backup status
gog backup verify
gog backup cat data/gmail/<account-hash>/labels.jsonl.gz.age --pretty
gog backup export --out ~/Documents/gog-backup-export
gog backup export --no-pull --out ~/Library/CloudStorage/Dropbox/backup/gog --gmail-format markdown
```
For a bounded first run:
@ -789,12 +790,16 @@ Optional Workspace-only services use `--best-effort` by default, recording
permission/auth errors as encrypted error shards instead of stopping the run.
Use `gog backup cat` to decrypt one shard as JSONL, or `gog backup export` to
write a local plaintext copy. The export writes Gmail messages as `.eml` files,
plus `gmail/<account-hash>/messages/index.jsonl` and pretty `labels.json`.
Drive contents export as normal files under `drive/<account-hash>/files/` with
an `index.jsonl`; other services export as verified JSONL under `raw/`.
That export is intentionally unencrypted; keep it out of Git, shared folders,
and cloud sync unless that is intentional.
write a local plaintext copy. By default Gmail messages export as `.eml` files.
Use `--gmail-format markdown` for a readable mirror with `message.md` files and
extracted `attachments/` folders, or `--gmail-format both` to keep Markdown and
`.eml` side by side. `--gmail-attachments none` keeps Markdown notes without
writing attachment files. Drive contents export as normal files under
`drive/<account-hash>/files/` with an `index.jsonl`; other services export as
verified JSONL under `raw/`. That export is intentionally unencrypted; keep it
out of Git, shared folders, and cloud sync unless that is intentional.
Use `--no-pull` when exporting from a local backup repository that another
process is already updating.
`manifest.json` is intentionally cleartext for cheap status and verification.
It exposes metadata: export time, service names, account hashes, shard paths,

View File

@ -64,6 +64,7 @@ Write an unencrypted local copy for easy reading on the Mac:
```bash
gog backup export --out ~/Documents/gog-backup-export
gog backup export --no-pull --out ~/Library/CloudStorage/Dropbox/backup/gog --gmail-format markdown
```
Use `--no-push` on `init` or `push` to commit locally without pushing to the
@ -166,17 +167,24 @@ manifest.json
gmail/<account-hash>/labels.json
gmail/<account-hash>/messages/index.jsonl
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<message-id>.eml
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<subject>-<message-id>/message.md
gmail/<account-hash>/messages/YYYY/MM/<timestamp>-<subject>-<message-id>/attachments/<filename>
drive/<account-hash>/files/index.jsonl
drive/<account-hash>/files/<file-id>/<exported-file>
raw/<service>/...
```
`gog backup export` decrypts and verifies the manifest-backed shards before
writing files. Gmail messages become `.eml` files that open in Mail and other
mail clients. Drive content shards become normal files plus an index. Other
writing files. Gmail messages become `.eml` files by default. Use
`--gmail-format markdown` for `message.md` files with YAML metadata and
extracted `attachments/` folders, or `--gmail-format both` to write Markdown and
`.eml` side by side. `--gmail-attachments none` keeps Markdown notes but skips
attachment files. Drive content shards become normal files plus an index. Other
services are written as verified JSONL under `raw/`. The export is not
encrypted; do not place it inside the backup Git repository, and keep it out of
synced/shared folders unless that is intentional.
Use `--no-pull` when exporting from a local backup repository that another
process is already updating.
## Encryption

View File

@ -28,6 +28,7 @@ type Options struct {
Identity string
Recipients []string
Push bool
SkipPull bool
AsyncPush bool
PushQueueLimit int
Progress func(format string, args ...any)

View File

@ -14,8 +14,13 @@ func Cat(ctx context.Context, opts Options, shardPath string) (PlainShard, error
if err != nil {
return PlainShard{}, err
}
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
return PlainShard{}, repoErr
if !opts.SkipPull {
repoErr := ensureRepo(ctx, cfg)
if repoErr != nil {
return PlainShard{}, repoErr
}
} else if strings.TrimSpace(cfg.Repo) == "" {
return PlainShard{}, fmt.Errorf("backup repo path is required")
}
manifest, err := readManifest(cfg.Repo)
if err != nil {
@ -36,8 +41,13 @@ func DecryptSnapshot(ctx context.Context, opts Options) (Manifest, string, []Pla
if err != nil {
return Manifest{}, "", nil, err
}
if repoErr := ensureRepo(ctx, cfg); repoErr != nil {
return Manifest{}, "", nil, repoErr
if !opts.SkipPull {
repoErr := ensureRepo(ctx, cfg)
if repoErr != nil {
return Manifest{}, "", nil, repoErr
}
} else if strings.TrimSpace(cfg.Repo) == "" {
return Manifest{}, "", nil, fmt.Errorf("backup repo path is required")
}
manifest, err := readManifest(cfg.Repo)
if err != nil {

View File

@ -71,6 +71,7 @@ type backupReadFlags struct {
Repo string `name:"repo" help:"Local backup repository path"`
Remote string `name:"remote" help:"Backup Git remote URL"`
Identity string `name:"identity" help:"Local age identity path"`
NoPull bool `name:"no-pull" help:"Use local backup repository state without pulling first"`
}
func (f backupReadFlags) options() backup.Options {
@ -80,6 +81,7 @@ func (f backupReadFlags) options() backup.Options {
Remote: f.Remote,
Identity: f.Identity,
Push: false,
SkipPull: f.NoPull,
}
}

View File

@ -53,7 +53,9 @@ func (c *BackupCatCmd) Run(ctx context.Context) error {
type BackupExportCmd struct {
backupReadFlags
Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"`
Out string `name:"out" help:"Plaintext export directory" default:"~/Documents/gog-backup-export"`
GmailFormat string `name:"gmail-format" help:"Gmail message export format: eml, markdown, or both" default:"eml" enum:"eml,markdown,both"`
GmailAttachments string `name:"gmail-attachments" help:"Gmail attachment export mode for markdown/both: extract or none" default:"extract" enum:"extract,none"`
}
type backupExportResult struct {
@ -64,14 +66,9 @@ type backupExportResult struct {
Counts map[string]int `json:"counts"`
}
type gmailExportIndexEntry struct {
ID string `json:"id"`
ThreadID string `json:"threadId,omitempty"`
HistoryID string `json:"historyId,omitempty"`
InternalDate int64 `json:"internalDate,omitempty"`
LabelIDs []string `json:"labelIds,omitempty"`
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
EML string `json:"eml"`
type backupExportOptions struct {
GmailFormat string
GmailAttachments string
}
func (c *BackupExportCmd) Run(ctx context.Context) error {
@ -101,11 +98,15 @@ func (c *BackupExportCmd) Run(ctx context.Context) error {
if manifestErr := writeJSONFile(filepath.Join(outDir, "manifest.json"), manifest); manifestErr != nil {
return manifestErr
}
if resetErr := resetExportIndexes(outDir, shards); resetErr != nil {
exportOpts := backupExportOptions{
GmailFormat: c.GmailFormat,
GmailAttachments: c.GmailAttachments,
}
if resetErr := resetExportTargets(outDir, shards); resetErr != nil {
return resetErr
}
for _, shard := range shards {
_, count, shardErr := exportPlainShard(outDir, shard)
_, count, shardErr := exportPlainShard(outDir, shard, exportOpts)
if shardErr != nil {
return shardErr
}
@ -205,24 +206,24 @@ func ensureExportOutsideRepo(outDir, repo string) error {
return nil
}
func resetExportIndexes(outDir string, shards []backup.PlainShard) error {
func resetExportTargets(outDir string, shards []backup.PlainShard) error {
seen := map[string]struct{}{}
for _, shard := range shards {
index := ""
target := ""
switch {
case shard.Service == backupServiceGmail && shard.Kind == "messages":
index = filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages", "index.jsonl")
target = filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "messages")
case shard.Service == backupServiceDrive && shard.Kind == "contents":
index = filepath.Join(outDir, backupServiceDrive, sanitizeFilePart(shard.Account), "files", "index.jsonl")
target = filepath.Join(outDir, backupServiceDrive, sanitizeFilePart(shard.Account), "files", "index.jsonl")
}
if index == "" {
if target == "" {
continue
}
if _, ok := seen[index]; ok {
if _, ok := seen[target]; ok {
continue
}
seen[index] = struct{}{}
if err := os.Remove(index); err != nil && !os.IsNotExist(err) {
seen[target] = struct{}{}
if err := os.RemoveAll(target); err != nil && !os.IsNotExist(err) {
return err
}
}
@ -235,9 +236,10 @@ func writeBackupExportReadme(outDir string) error {
"This directory is an unencrypted local copy created by `gog backup export`.\n" +
"Keep it out of Git, shared folders, and cloud sync unless that is intentional.\n" +
"\n" +
"Gmail messages are written as `.eml` files that can be opened by Mail and many\n" +
"mail clients. `gmail/<account>/messages/index.jsonl` maps backup message IDs\n" +
"to the exported `.eml` files. Labels are written as pretty JSON.\n"
"Gmail messages are written according to `--gmail-format`: `.eml` by default,\n" +
"Markdown notes with extracted attachment files when `--gmail-format markdown`,\n" +
"or both when `--gmail-format both`. `gmail/<account>/messages/index.jsonl`\n" +
"maps backup message IDs to exported files. Labels are written as pretty JSON.\n"
return os.WriteFile(filepath.Join(outDir, "README.md"), []byte(body), 0o600)
}
@ -253,14 +255,14 @@ func writeJSONFile(path string, value any) error {
return os.WriteFile(path, data, 0o600)
}
func exportPlainShard(outDir string, shard backup.PlainShard) (int, int, error) {
func exportPlainShard(outDir string, shard backup.PlainShard, opts backupExportOptions) (int, int, error) {
switch {
case shard.Service == backupServiceDrive && shard.Kind == "contents":
return exportDriveContents(outDir, shard)
case shard.Service == backupServiceGmail && shard.Kind == "labels":
return exportGmailLabels(outDir, shard)
case shard.Service == backupServiceGmail && shard.Kind == "messages":
return exportGmailMessages(outDir, shard)
return exportGmailMessages(outDir, shard, opts)
default:
return exportRawShard(outDir, shard)
}
@ -321,65 +323,6 @@ func exportDriveContents(outDir string, shard backup.PlainShard) (int, int, erro
return files + 1, len(rows), nil
}
func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) {
var labels []gmailBackupLabel
if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil {
return 0, 0, err
}
path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json")
if err := writeJSONFile(path, labels); err != nil {
return 0, 0, err
}
return 1, len(labels), nil
}
func exportGmailMessages(outDir string, shard backup.PlainShard) (int, int, error) {
var messages []gmailBackupMessage
if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil {
return 0, 0, err
}
account := sanitizeFilePart(shard.Account)
indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl")
if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil {
return 0, 0, err
}
indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account.
if err != nil {
return 0, 0, err
}
defer indexFile.Close()
enc := json.NewEncoder(indexFile)
enc.SetEscapeHTML(false)
files := 0
for _, message := range messages {
mime, err := decodeGmailRaw(message.Raw)
if err != nil {
return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err)
}
rel := backupExportMessagePath(account, message)
path := filepath.Join(outDir, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return files, 0, err
}
if err := os.WriteFile(path, mime, 0o600); err != nil {
return files, 0, err
}
files++
if err := enc.Encode(gmailExportIndexEntry{
ID: message.ID,
ThreadID: message.ThreadID,
HistoryID: message.HistoryID,
InternalDate: message.InternalDate,
LabelIDs: message.LabelIDs,
SizeEstimate: message.SizeEstimate,
EML: rel,
}); err != nil {
return files, 0, err
}
}
return files + 1, len(messages), nil
}
func exportRawShard(outDir string, shard backup.PlainShard) (int, int, error) {
rel := strings.TrimSuffix(shard.Path, ".gz.age")
path := filepath.Join(outDir, "raw", filepath.FromSlash(rel))
@ -406,29 +349,6 @@ func countExportFiles(outDir string) (int, error) {
return count, err
}
func decodeGmailRaw(raw string) ([]byte, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, fmt.Errorf("empty raw payload")
}
if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil {
return data, nil
}
return base64.URLEncoding.DecodeString(raw)
}
func backupExportMessagePath(account string, message gmailBackupMessage) string {
timestamp := trackingUnknown
yearMonth := trackingUnknown
if message.InternalDate > 0 {
t := time.UnixMilli(message.InternalDate).UTC()
timestamp = t.Format("20060102T150405Z")
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
}
name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml"
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
}
func sanitizeFilePart(value string) string {
value = strings.TrimSpace(value)
if value == "" {

View File

@ -0,0 +1,500 @@
package cmd
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
stdhtml "html"
"io"
"mime"
"mime/multipart"
"net/mail"
"os"
"path/filepath"
"strings"
"time"
"github.com/steipete/gogcli/internal/backup"
)
type gmailExportIndexEntry struct {
ID string `json:"id"`
ThreadID string `json:"threadId,omitempty"`
HistoryID string `json:"historyId,omitempty"`
InternalDate int64 `json:"internalDate,omitempty"`
LabelIDs []string `json:"labelIds,omitempty"`
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
Subject string `json:"subject,omitempty"`
From string `json:"from,omitempty"`
To []string `json:"to,omitempty"`
Cc []string `json:"cc,omitempty"`
Date string `json:"date,omitempty"`
EML string `json:"eml,omitempty"`
Markdown string `json:"markdown,omitempty"`
Attachments []string `json:"attachments,omitempty"`
}
type backupEmail struct {
Subject string
From string
To []string
Cc []string
Date string
TextBody string
HTMLBody string
Attachments []backupEmailAttachment
}
type backupEmailAttachment struct {
Filename string
Data []byte
}
func exportGmailLabels(outDir string, shard backup.PlainShard) (int, int, error) {
var labels []gmailBackupLabel
if err := backup.DecodeJSONL(shard.Plaintext, &labels); err != nil {
return 0, 0, err
}
path := filepath.Join(outDir, backupServiceGmail, sanitizeFilePart(shard.Account), "labels.json")
if err := writeJSONFile(path, labels); err != nil {
return 0, 0, err
}
return 1, len(labels), nil
}
func exportGmailMessages(outDir string, shard backup.PlainShard, opts backupExportOptions) (int, int, error) {
var messages []gmailBackupMessage
if err := backup.DecodeJSONL(shard.Plaintext, &messages); err != nil {
return 0, 0, err
}
gmailFormat := strings.ToLower(strings.TrimSpace(opts.GmailFormat))
if gmailFormat == "" {
gmailFormat = "eml"
}
attachmentsMode := strings.ToLower(strings.TrimSpace(opts.GmailAttachments))
if attachmentsMode == "" {
attachmentsMode = "extract"
}
account := sanitizeFilePart(shard.Account)
indexPath := filepath.Join(outDir, backupServiceGmail, account, "messages", "index.jsonl")
if err := os.MkdirAll(filepath.Dir(indexPath), 0o700); err != nil {
return 0, 0, err
}
indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) // #nosec G304 -- path is confined to caller-selected export dir and sanitized account.
if err != nil {
return 0, 0, err
}
defer indexFile.Close()
enc := json.NewEncoder(indexFile)
enc.SetEscapeHTML(false)
files := 0
for _, message := range messages {
rawMIME, err := decodeGmailRaw(message.Raw)
if err != nil {
return files, 0, fmt.Errorf("decode Gmail raw %s: %w", message.ID, err)
}
parsed, parseErr := parseBackupEmail(rawMIME)
if parseErr != nil && gmailFormat != "eml" {
return files, 0, fmt.Errorf("parse Gmail MIME %s: %w", message.ID, parseErr)
}
entry := gmailExportIndexEntry{
ID: message.ID,
ThreadID: message.ThreadID,
HistoryID: message.HistoryID,
InternalDate: message.InternalDate,
LabelIDs: message.LabelIDs,
SizeEstimate: message.SizeEstimate,
Subject: parsed.Subject,
From: parsed.From,
To: parsed.To,
Cc: parsed.Cc,
Date: parsed.Date,
}
if gmailFormat == "eml" || gmailFormat == "both" {
rel := backupExportMessageEMLPath(account, message)
path := filepath.Join(outDir, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return files, 0, err
}
if err := os.WriteFile(path, rawMIME, 0o600); err != nil {
return files, 0, err
}
files++
entry.EML = rel
}
if gmailFormat == "markdown" || gmailFormat == "both" {
rel, attachmentRels, written, err := exportGmailMarkdownMessage(outDir, account, message, parsed, attachmentsMode == "extract")
if err != nil {
return files, 0, err
}
files += written
entry.Markdown = rel
entry.Attachments = attachmentRels
}
if err := enc.Encode(entry); err != nil {
return files, 0, err
}
}
return files + 1, len(messages), nil
}
func decodeGmailRaw(raw string) ([]byte, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, fmt.Errorf("empty raw payload")
}
if data, err := base64.RawURLEncoding.DecodeString(raw); err == nil {
return data, nil
}
return base64.URLEncoding.DecodeString(raw)
}
func backupExportMessageEMLPath(account string, message gmailBackupMessage) string {
timestamp := trackingUnknown
yearMonth := trackingUnknown
if message.InternalDate > 0 {
t := time.UnixMilli(message.InternalDate).UTC()
timestamp = t.Format("20060102T150405Z")
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
}
name := timestamp + "-" + sanitizeFilePart(message.ID) + ".eml"
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
}
func backupExportMessageDir(account string, message gmailBackupMessage, subject string) string {
timestamp := trackingUnknown
yearMonth := trackingUnknown
if message.InternalDate > 0 {
t := time.UnixMilli(message.InternalDate).UTC()
timestamp = t.Format("20060102T150405Z")
yearMonth = filepath.Join(fmt.Sprintf("%04d", t.Year()), fmt.Sprintf("%02d", int(t.Month())))
}
subjectPart := truncateFilePart(sanitizeFilePart(subject), 72)
if subjectPart == trackingUnknown {
subjectPart = "no-subject"
}
name := timestamp + "-" + subjectPart + "-" + sanitizeFilePart(message.ID)
return filepath.ToSlash(filepath.Join(backupServiceGmail, account, "messages", yearMonth, name))
}
func exportGmailMarkdownMessage(outDir, account string, message gmailBackupMessage, parsed backupEmail, extractAttachments bool) (string, []string, int, error) {
messageDirRel := backupExportMessageDir(account, message, parsed.Subject)
messageDir := filepath.Join(outDir, filepath.FromSlash(messageDirRel))
if err := os.MkdirAll(messageDir, 0o700); err != nil {
return "", nil, 0, err
}
var attachmentRels []string
files := 0
if extractAttachments {
seen := map[string]int{}
for i, attachment := range parsed.Attachments {
filename := sanitizeBackupAttachmentFilename(attachment.Filename, i+1)
filename = uniqueExportFilename(seen, filename)
rel := filepath.ToSlash(filepath.Join(messageDirRel, "attachments", filename))
path := filepath.Join(outDir, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return "", nil, files, err
}
if err := os.WriteFile(path, attachment.Data, 0o600); err != nil {
return "", nil, files, err
}
attachmentRels = append(attachmentRels, rel)
files++
}
}
body := backupEmailMarkdownBody(parsed)
md := renderGmailMessageMarkdown(message, parsed, body, attachmentRels)
rel := filepath.ToSlash(filepath.Join(messageDirRel, "message.md"))
path := filepath.Join(outDir, filepath.FromSlash(rel))
if err := os.WriteFile(path, []byte(md), 0o600); err != nil {
return "", nil, files, err
}
files++
return rel, attachmentRels, files, nil
}
func backupEmailMarkdownBody(parsed backupEmail) string {
if strings.TrimSpace(parsed.TextBody) != "" {
return backupEmailMarkdownText(parsed.TextBody)
}
if strings.TrimSpace(parsed.HTMLBody) != "" {
return cleanBackupHTMLBody(parsed.HTMLBody)
}
return ""
}
func backupEmailMarkdownText(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
if looksLikeHTML(value) || looksLikeHTMLFragment(value) {
return cleanBackupHTMLBody(value)
}
return value
}
func cleanBackupHTMLBody(value string) string {
cleaned := stdhtml.UnescapeString(stripHTMLTags(value))
return strings.Join(strings.Fields(cleaned), " ")
}
func looksLikeHTMLFragment(value string) bool {
trimmed := strings.ToLower(strings.TrimSpace(value))
if trimmed == "" {
return false
}
for _, marker := range []string{
"<p", "</p", "<br", "<div", "</div", "<span", "</span", "<table", "</table",
"<tr", "</tr", "<td", "</td", "<section", "</section", "<blockquote",
"</blockquote", "<a ", "</a", "<img", "<font", "</font", "<style", "<!--",
} {
if strings.Contains(trimmed, marker) {
return true
}
}
return false
}
func renderGmailMessageMarkdown(message gmailBackupMessage, parsed backupEmail, body string, attachmentRels []string) string {
var b strings.Builder
b.WriteString("---\n")
writeYAMLScalar(&b, "gmail_id", message.ID)
writeYAMLScalar(&b, "thread_id", message.ThreadID)
writeYAMLScalar(&b, "history_id", message.HistoryID)
if message.InternalDate > 0 {
writeYAMLScalar(&b, "internal_date", time.UnixMilli(message.InternalDate).UTC().Format(time.RFC3339))
}
writeYAMLScalar(&b, "date", parsed.Date)
writeYAMLScalar(&b, "from", parsed.From)
writeYAMLList(&b, "to", parsed.To)
writeYAMLList(&b, "cc", parsed.Cc)
writeYAMLScalar(&b, "subject", parsed.Subject)
writeYAMLList(&b, "labels", message.LabelIDs)
if message.SizeEstimate > 0 {
fmt.Fprintf(&b, "size_estimate: %d\n", message.SizeEstimate)
}
writeYAMLList(&b, "attachments", attachmentRels)
b.WriteString("---\n\n")
if strings.TrimSpace(parsed.Subject) != "" {
b.WriteString("# ")
b.WriteString(markdownHeadingText(parsed.Subject))
b.WriteString("\n\n")
}
if strings.TrimSpace(body) != "" {
b.WriteString(strings.TrimSpace(body))
b.WriteString("\n")
} else {
b.WriteString("_No text body found._\n")
}
if len(attachmentRels) > 0 {
b.WriteString("\n## Attachments\n\n")
for _, rel := range attachmentRels {
name := filepath.Base(rel)
b.WriteString("- [")
b.WriteString(markdownLinkText(name))
b.WriteString("](")
b.WriteString("attachments/")
b.WriteString(markdownLinkTarget(name))
b.WriteString(")\n")
}
}
return b.String()
}
func parseBackupEmail(rawMIME []byte) (backupEmail, error) {
msg, err := mail.ReadMessage(bytes.NewReader(rawMIME))
if err != nil {
return backupEmail{}, err
}
out := backupEmail{
Subject: decodeMIMEHeader(msg.Header.Get("Subject")),
From: decodeMIMEHeader(msg.Header.Get("From")),
Date: decodeMIMEHeader(msg.Header.Get("Date")),
To: parseAddressHeader(msg.Header.Get("To")),
Cc: parseAddressHeader(msg.Header.Get("Cc")),
}
body, err := io.ReadAll(msg.Body)
if err != nil {
return backupEmail{}, err
}
if err := parseBackupEmailEntity(body, string(msg.Header.Get("Content-Type")), string(msg.Header.Get("Content-Transfer-Encoding")), &out); err != nil {
return backupEmail{}, err
}
return out, nil
}
func parseBackupEmailEntity(body []byte, contentType, transferEncoding string, out *backupEmail) error {
mediaType, params, err := mime.ParseMediaType(contentType)
if err != nil || strings.TrimSpace(mediaType) == "" {
mediaType = "text/plain"
}
mediaType = strings.ToLower(mediaType)
if strings.HasPrefix(mediaType, "multipart/") {
boundary := params["boundary"]
if strings.TrimSpace(boundary) == "" {
return nil
}
reader := multipart.NewReader(bytes.NewReader(body), boundary)
for {
part, partErr := reader.NextPart()
if partErr == io.EOF {
break
}
if partErr != nil {
return partErr
}
partBody, readErr := io.ReadAll(part)
_ = part.Close()
if readErr != nil {
return readErr
}
partContentType := part.Header.Get("Content-Type")
partEncoding := part.Header.Get("Content-Transfer-Encoding")
if isBackupEmailAttachment(part.Header.Get("Content-Disposition"), partContentType) {
decoded := decodeTransferEncoding(partBody, partEncoding)
filename := backupAttachmentFilename(part.Header.Get("Content-Disposition"), partContentType)
out.Attachments = append(out.Attachments, backupEmailAttachment{
Filename: filename,
Data: decoded,
})
continue
}
if err := parseBackupEmailEntity(partBody, partContentType, partEncoding, out); err != nil {
return err
}
}
return nil
}
decoded := decodeTransferEncoding(body, transferEncoding)
decoded = decodeBodyCharset(decoded, contentType)
switch mediaType {
case "text/plain":
if strings.TrimSpace(out.TextBody) == "" {
out.TextBody = string(decoded)
}
case "text/html":
if strings.TrimSpace(out.HTMLBody) == "" {
out.HTMLBody = string(decoded)
}
}
return nil
}
func isBackupEmailAttachment(contentDisposition, contentType string) bool {
disposition, dispParams, _ := mime.ParseMediaType(contentDisposition)
if strings.EqualFold(disposition, "attachment") {
return true
}
if strings.EqualFold(disposition, "inline") && strings.TrimSpace(dispParams["filename"]) != "" {
return true
}
_, typeParams, _ := mime.ParseMediaType(contentType)
return strings.TrimSpace(typeParams["name"]) != ""
}
func backupAttachmentFilename(contentDisposition, contentType string) string {
_, dispParams, _ := mime.ParseMediaType(contentDisposition)
if filename := decodeMIMEHeader(dispParams["filename"]); strings.TrimSpace(filename) != "" {
return filename
}
_, typeParams, _ := mime.ParseMediaType(contentType)
if filename := decodeMIMEHeader(typeParams["name"]); strings.TrimSpace(filename) != "" {
return filename
}
return "attachment"
}
func decodeMIMEHeader(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
decoded, err := (&mime.WordDecoder{}).DecodeHeader(value)
if err == nil {
return strings.TrimSpace(decoded)
}
return value
}
func parseAddressHeader(value string) []string {
value = strings.TrimSpace(value)
if value == "" {
return nil
}
addrs, err := mail.ParseAddressList(value)
if err != nil {
return []string{decodeMIMEHeader(value)}
}
out := make([]string, 0, len(addrs))
for _, addr := range addrs {
out = append(out, addr.String())
}
return out
}
func writeYAMLScalar(b *strings.Builder, key, value string) {
if strings.TrimSpace(value) == "" {
return
}
fmt.Fprintf(b, "%s: %q\n", key, value)
}
func writeYAMLList(b *strings.Builder, key string, values []string) {
if len(values) == 0 {
return
}
fmt.Fprintf(b, "%s:\n", key)
for _, value := range values {
fmt.Fprintf(b, " - %q\n", value)
}
}
func markdownHeadingText(value string) string {
value = strings.ReplaceAll(value, "\r", " ")
value = strings.ReplaceAll(value, "\n", " ")
return strings.TrimSpace(value)
}
func markdownLinkText(value string) string {
value = strings.ReplaceAll(value, "[", "\\[")
value = strings.ReplaceAll(value, "]", "\\]")
return value
}
func markdownLinkTarget(value string) string {
value = strings.ReplaceAll(value, " ", "%20")
value = strings.ReplaceAll(value, "(", "%28")
value = strings.ReplaceAll(value, ")", "%29")
return value
}
func sanitizeBackupAttachmentFilename(value string, fallbackIndex int) string {
value = filepath.Base(strings.TrimSpace(value))
if value == "" || value == "." || value == ".." {
value = fmt.Sprintf("attachment-%03d", fallbackIndex)
}
return sanitizeFilePart(value)
}
func uniqueExportFilename(seen map[string]int, filename string) string {
if filename == "" {
filename = "attachment"
}
count := seen[filename]
seen[filename] = count + 1
if count == 0 {
return filename
}
ext := filepath.Ext(filename)
base := strings.TrimSuffix(filename, ext)
return fmt.Sprintf("%s-%d%s", base, count+1, ext)
}
func truncateFilePart(value string, limit int) string {
if limit <= 0 || len(value) <= limit {
return value
}
return strings.Trim(value[:limit], "._-")
}

View File

@ -0,0 +1,148 @@
package cmd
import (
"encoding/base64"
"os"
"path/filepath"
"strings"
"testing"
"github.com/steipete/gogcli/internal/backup"
)
func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) {
payload := []byte("Subject: Hello\r\n\r\nBody")
raw := base64.RawURLEncoding.EncodeToString(payload)
got, err := decodeGmailRaw(raw)
if err != nil {
t.Fatalf("decodeGmailRaw raw: %v", err)
}
if string(got) != string(payload) {
t.Fatalf("raw decoded = %q, want %q", got, payload)
}
padded := base64.URLEncoding.EncodeToString(payload)
got, err = decodeGmailRaw(padded)
if err != nil {
t.Fatalf("decodeGmailRaw padded: %v", err)
}
if string(got) != string(payload) {
t.Fatalf("padded decoded = %q, want %q", got, payload)
}
}
func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) {
outDir := t.TempDir()
payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody")
message := gmailBackupMessage{
ID: "msg/one",
ThreadID: "thread-1",
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
LabelIDs: []string{"INBOX"},
Raw: base64.RawURLEncoding.EncodeToString(payload),
}
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
if err != nil {
t.Fatalf("NewJSONLShard: %v", err)
}
files, count, err := exportGmailMessages(outDir, shard, backupExportOptions{GmailFormat: "eml"})
if err != nil {
t.Fatalf("exportGmailMessages: %v", err)
}
if files != 2 || count != 1 {
t.Fatalf("files,count = %d,%d want 2,1", files, count)
}
emlRel := backupExportMessageEMLPath("acct_hash", message)
eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel)))
if err != nil {
t.Fatalf("read eml: %v", err)
}
if string(eml) != string(payload) {
t.Fatalf("eml = %q, want %q", eml, payload)
}
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) {
t.Fatalf("index missing expected fields: %s", index)
}
}
func TestExportGmailMessagesWritesMarkdownAndAttachments(t *testing.T) {
outDir := t.TempDir()
payload := strings.Join([]string{
"Subject: Report",
"From: Alice <alice@example.com>",
"To: Peter <peter@example.com>",
"Date: Thu, 02 Apr 2026 10:00:00 +0000",
"MIME-Version: 1.0",
`Content-Type: multipart/mixed; boundary="b1"`,
"",
"--b1",
"Content-Type: text/plain; charset=utf-8",
"",
"Body text.",
"--b1",
"Content-Type: application/pdf",
"Content-Transfer-Encoding: base64",
`Content-Disposition: attachment; filename="report.pdf"`,
"",
base64.StdEncoding.EncodeToString([]byte("pdf bytes")),
"--b1--",
"",
}, "\r\n")
message := gmailBackupMessage{
ID: "msg/one",
ThreadID: "thread-1",
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
LabelIDs: []string{"INBOX"},
Raw: base64.RawURLEncoding.EncodeToString([]byte(payload)),
}
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
if err != nil {
t.Fatalf("NewJSONLShard: %v", err)
}
files, count, err := exportGmailMessages(outDir, shard, backupExportOptions{GmailFormat: "markdown", GmailAttachments: "extract"})
if err != nil {
t.Fatalf("exportGmailMessages: %v", err)
}
if files != 3 || count != 1 {
t.Fatalf("files,count = %d,%d want 3,1", files, count)
}
messageDir := backupExportMessageDir("acct_hash", message, "Report")
mdRel := filepath.ToSlash(filepath.Join(messageDir, "message.md"))
md := readText(t, filepath.Join(outDir, filepath.FromSlash(mdRel)))
for _, want := range []string{
`subject: "Report"`,
"# Report",
"Body text.",
"- [report.pdf](attachments/report.pdf)",
} {
if !strings.Contains(md, want) {
t.Fatalf("markdown missing %q:\n%s", want, md)
}
}
attachment := readText(t, filepath.Join(outDir, filepath.FromSlash(filepath.Join(messageDir, "attachments", "report.pdf"))))
if attachment != "pdf bytes" {
t.Fatalf("attachment = %q", attachment)
}
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
if !strings.Contains(index, `"markdown":"`+mdRel+`"`) ||
!strings.Contains(index, `"attachments":["`+filepath.ToSlash(filepath.Join(messageDir, "attachments", "report.pdf"))+`"]`) ||
strings.Contains(index, `"eml"`) {
t.Fatalf("index missing expected markdown-only fields: %s", index)
}
}
func TestBackupEmailMarkdownBodyCleansHTMLFragments(t *testing.T) {
got := backupEmailMarkdownBody(backupEmail{TextBody: "<p>Hello&nbsp;<b>Peter</b></p>"})
if got != "Hello Peter" {
t.Fatalf("body = %q, want %q", got, "Hello Peter")
}
got = backupEmailMarkdownBody(backupEmail{HTMLBody: "<html><body><p>Hi<br>there</p></body></html>"})
if got != "Hi there" {
t.Fatalf("html body = %q, want %q", got, "Hi there")
}
}

View File

@ -37,6 +37,16 @@ func TestBackupAccountHashStableAndOpaque(t *testing.T) {
}
}
func TestBackupReadFlagsOptionsSkipPull(t *testing.T) {
opts := backupReadFlags{NoPull: true}.options()
if !opts.SkipPull {
t.Fatal("SkipPull = false, want true")
}
if opts.Push {
t.Fatal("Push = true, want false")
}
}
func TestBuildGmailMessageShardsBucketsSortsAndChunks(t *testing.T) {
accountHash := "accthash"
messages := []gmailBackupMessage{
@ -632,64 +642,6 @@ func TestDownloadDriveBackupContentHonorsTimeout(t *testing.T) {
}
}
func TestDecodeGmailRawAcceptsBase64URLVariants(t *testing.T) {
payload := []byte("Subject: Hello\r\n\r\nBody")
raw := base64.RawURLEncoding.EncodeToString(payload)
got, err := decodeGmailRaw(raw)
if err != nil {
t.Fatalf("decodeGmailRaw raw: %v", err)
}
if string(got) != string(payload) {
t.Fatalf("raw decoded = %q, want %q", got, payload)
}
padded := base64.URLEncoding.EncodeToString(payload)
got, err = decodeGmailRaw(padded)
if err != nil {
t.Fatalf("decodeGmailRaw padded: %v", err)
}
if string(got) != string(payload) {
t.Fatalf("padded decoded = %q, want %q", got, payload)
}
}
func TestExportGmailMessagesWritesReadableEMLAndIndex(t *testing.T) {
outDir := t.TempDir()
payload := []byte("Subject: Hello\r\nFrom: a@example.com\r\n\r\nBody")
message := gmailBackupMessage{
ID: "msg/one",
ThreadID: "thread-1",
InternalDate: mustUnixMilli(t, "2026-04-02T10:00:00Z"),
LabelIDs: []string{"INBOX"},
Raw: base64.RawURLEncoding.EncodeToString(payload),
}
shard, err := backup.NewJSONLShard("gmail", "messages", "acct/hash", "data/gmail/acct/messages/2026/04/part-0001.jsonl.gz.age", []gmailBackupMessage{message})
if err != nil {
t.Fatalf("NewJSONLShard: %v", err)
}
files, count, err := exportGmailMessages(outDir, shard)
if err != nil {
t.Fatalf("exportGmailMessages: %v", err)
}
if files != 2 || count != 1 {
t.Fatalf("files,count = %d,%d want 2,1", files, count)
}
emlRel := backupExportMessagePath("acct_hash", message)
eml, err := os.ReadFile(filepath.Join(outDir, filepath.FromSlash(emlRel)))
if err != nil {
t.Fatalf("read eml: %v", err)
}
if string(eml) != string(payload) {
t.Fatalf("eml = %q, want %q", eml, payload)
}
index := readText(t, filepath.Join(outDir, "gmail", "acct_hash", "messages", "index.jsonl"))
if !strings.Contains(index, `"id":"msg/one"`) || !strings.Contains(index, `"eml":"`+emlRel+`"`) {
t.Fatalf("index missing expected fields: %s", index)
}
}
func TestExportDriveContentsWritesReadableFilesAndIndex(t *testing.T) {
outDir := t.TempDir()
row := driveBackupContent{