feat(backup): resume full gmail backups

This commit is contained in:
Peter Steinberger 2026-04-27 13:02:43 +01:00
parent eb6886633d
commit ff5e85b159
No known key found for this signature in database
5 changed files with 296 additions and 12 deletions

View File

@ -7,6 +7,7 @@
- Backup: expand `gog backup push --services all` with Drive content export/download, Gmail settings, native Workspace Docs/Sheets/Slides/Form data, Apps Script projects, Chat, Classroom, best-effort optional service error shards, and plaintext Drive file export.
- Backup: extend `--services all` with Drive permissions/comments/revisions, Calendar ACL/settings/colors, contact groups, Cloud Identity groups, Workspace Admin Directory users/groups/members, Keep notes, and local Gmail message caching for resumable full-mailbox fetches.
- Backup: bound individual Drive content exports with `--drive-content-timeout` so one stuck Google export records an encrypted error row instead of blocking the full backup.
- Backup: add Gmail message-list checkpoints and stderr progress counters so full-mailbox backups can resume cleanly after interruption.
### Fixed
- Gmail: auto-fill draft reply subjects from the original message when `gmail drafts create --reply-to-message-id` omits `--subject`. (#488) — thanks @jbowerbir.

View File

@ -753,9 +753,10 @@ metadata, permissions, comments, revisions, and exported Google-native file
content by default. Non-Google binary Drive files are metadata-only unless
`--drive-binary-contents` is set. `--drive-content-timeout` turns a stuck
per-file export into an encrypted error row instead of wedging the run. Gmail
raw-message fetches use a local cache by default so interrupted full-mailbox
backups can resume; use
`--gmail-refresh-cache` to force a refetch. Workspace inventories
raw-message fetches and message-list pages use a local cache by default so
interrupted full-mailbox backups can resume; progress is written to stderr
while stdout stays parseable. Use `--gmail-refresh-cache` to force a refetch.
Workspace inventories
Docs/Sheets/Slides and backs up Forms/responses discovered through Drive; add
`--workspace-native` for full native Docs/Sheets/Slides API JSON.
Optional Workspace-only services use `--best-effort` by default, recording

View File

@ -259,13 +259,15 @@ Raw message payloads stay base64url encoded inside encrypted JSONL. This
preserves the RFC 2822 message content while keeping the shard format text
friendly.
By default, each fetched raw message is also cached locally under the OS user
cache directory (`gogcli/backup/gmail/<account-hash>/raw-v1/`). The cache stores
the same raw message row that will be encrypted into shards and is keyed by a
SHA-256 of the Gmail message ID, so rerunning after an interruption can reuse
already fetched messages. `--gmail-refresh-cache` forces a refetch. The cache is
plaintext local data; clear it if the machine should not retain local mail
copies outside the encrypted backup/export locations.
By default, Gmail backup state is cached locally under the OS user cache
directory (`gogcli/backup/gmail/<account-hash>/`). Message-list page checkpoints
live under `list-v1/`, and fetched raw messages live under `raw-v1/`. Raw-message
cache files store the same row that will be encrypted into shards and are keyed
by a SHA-256 of the Gmail message ID, so rerunning after an interruption can
reuse already fetched messages. Long Gmail runs report list/fetch counters to
stderr while stdout stays parseable. `--gmail-refresh-cache` forces a refetch.
The cache is plaintext local data; clear it if the machine should not retain
local mail copies outside the encrypted backup/export locations.
`--include-spam-trash` defaults to true. Use `--query` and `--max` for bounded
test exports; omit them for a full mailbox scan.

View File

@ -16,6 +16,7 @@ import (
"google.golang.org/api/gmail/v1"
"github.com/steipete/gogcli/internal/backup"
"github.com/steipete/gogcli/internal/ui"
)
type gmailBackupOptions struct {
@ -50,6 +51,18 @@ type gmailBackupLabel struct {
ThreadsUnread int64 `json:"threadsUnread,omitempty"`
}
type gmailBackupListState struct {
Version int `json:"version"`
AccountHash string `json:"accountHash"`
Query string `json:"query,omitempty"`
Max int64 `json:"max,omitempty"`
IncludeSpamTrash bool `json:"includeSpamTrash"`
PageToken string `json:"pageToken,omitempty"`
IDs []string `json:"ids"`
Complete bool `json:"complete"`
Updated time.Time `json:"updated"`
}
func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailBackupOptions) (backup.Snapshot, error) {
if opts.ShardMaxRows <= 0 {
opts.ShardMaxRows = 1000
@ -125,11 +138,13 @@ func fetchGmailBackupMessages(ctx context.Context, svc *gmail.Service, opts gmai
if err != nil {
return nil, err
}
gmailBackupProgressf(ctx, "backup gmail fetch\tqueued=%d", len(ids))
const maxConcurrency = 8
sem := make(chan struct{}, maxConcurrency)
type result struct {
index int
msg gmailBackupMessage
cache bool
err error
}
results := make(chan result, len(ids))
@ -152,7 +167,7 @@ func fetchGmailBackupMessages(ctx context.Context, svc *gmail.Service, opts gmai
return
}
if ok {
results <- result{index: index, msg: msg}
results <- result{index: index, msg: msg, cache: true}
return
}
}
@ -193,11 +208,23 @@ func fetchGmailBackupMessages(ctx context.Context, svc *gmail.Service, opts gmai
}()
ordered := make([]gmailBackupMessage, len(ids))
var firstErr error
done := 0
cacheHits := 0
fetched := 0
for res := range results {
if res.err != nil && firstErr == nil {
firstErr = res.err
}
ordered[res.index] = res.msg
done++
if res.cache {
cacheHits++
} else if res.err == nil {
fetched++
}
if done == len(ids) || done%100 == 0 {
gmailBackupProgressf(ctx, "backup gmail fetch\t%d/%d\tfetched=%d\tcache=%d", done, len(ids), fetched, cacheHits)
}
}
if firstErr != nil {
return nil, firstErr
@ -288,6 +315,23 @@ func gmailBackupMessageCachePath(accountHash, messageID string) (string, bool) {
func listGmailBackupMessageIDs(ctx context.Context, svc *gmail.Service, opts gmailBackupOptions) ([]string, error) {
var ids []string
pageToken := ""
statePath, hasStatePath := gmailBackupListStatePath(opts)
if opts.CacheMessages && !opts.RefreshCache && hasStatePath {
state, ok, err := readGmailBackupListState(statePath)
if err != nil {
return nil, err
}
if ok {
if state.Complete {
gmailBackupProgressf(ctx, "backup gmail list\tresume=complete\tmessages=%d", len(state.IDs))
return append([]string(nil), state.IDs...), nil
}
ids = append(ids, state.IDs...)
pageToken = state.PageToken
gmailBackupProgressf(ctx, "backup gmail list\tresume=partial\tmessages=%d", len(ids))
}
}
gmailBackupProgressf(ctx, "backup gmail list\tstart\tmessages=%d", len(ids))
for {
maxResults := int64(500)
if opts.Max > 0 {
@ -319,14 +363,123 @@ func listGmailBackupMessageIDs(ctx context.Context, svc *gmail.Service, opts gma
ids = append(ids, message.Id)
}
}
if resp.NextPageToken == "" {
gmailBackupProgressf(ctx, "backup gmail list\tmessages=%d", len(ids))
complete := resp.NextPageToken == "" || (opts.Max > 0 && int64(len(ids)) >= opts.Max)
if complete {
if opts.CacheMessages && hasStatePath {
if err := writeGmailBackupListState(statePath, opts, ids, "", true); err != nil {
return nil, err
}
}
break
}
pageToken = resp.NextPageToken
if opts.CacheMessages && hasStatePath {
if err := writeGmailBackupListState(statePath, opts, ids, pageToken, false); err != nil {
return nil, err
}
}
}
return ids, nil
}
func readGmailBackupListState(path string) (gmailBackupListState, bool, error) {
data, err := os.ReadFile(path) //nolint:gosec // path is derived from the OS cache dir and query hash.
if err != nil {
if os.IsNotExist(err) {
return gmailBackupListState{}, false, nil
}
return gmailBackupListState{}, false, fmt.Errorf("read gmail backup list state %s: %w", path, err)
}
var state gmailBackupListState
if err := json.Unmarshal(data, &state); err != nil {
return gmailBackupListState{}, false, fmt.Errorf("decode gmail backup list state %s: %w", path, err)
}
if state.Version != 1 {
return gmailBackupListState{}, false, nil
}
return state, true, nil
}
func writeGmailBackupListState(path string, opts gmailBackupOptions, ids []string, pageToken string, complete bool) error {
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return fmt.Errorf("create gmail backup list state dir: %w", err)
}
state := gmailBackupListState{
Version: 1,
AccountHash: opts.AccountHash,
Query: strings.TrimSpace(opts.Query),
Max: opts.Max,
IncludeSpamTrash: opts.IncludeSpamTrash,
PageToken: pageToken,
IDs: append([]string(nil), ids...),
Complete: complete,
Updated: time.Now().UTC(),
}
data, err := json.Marshal(state)
if err != nil {
return fmt.Errorf("encode gmail backup list state: %w", err)
}
tmp, err := os.CreateTemp(filepath.Dir(path), ".list-*.json")
if err != nil {
return fmt.Errorf("create gmail backup list state temp: %w", err)
}
tmpPath := tmp.Name()
if _, err := tmp.Write(data); err != nil {
_ = tmp.Close()
_ = os.Remove(tmpPath)
return fmt.Errorf("write gmail backup list state temp: %w", err)
}
if err := tmp.Close(); err != nil {
_ = os.Remove(tmpPath)
return fmt.Errorf("close gmail backup list state temp: %w", err)
}
if err := os.Chmod(tmpPath, 0o600); err != nil {
_ = os.Remove(tmpPath)
return fmt.Errorf("chmod gmail backup list state temp: %w", err)
}
if err := os.Rename(tmpPath, path); err != nil {
_ = os.Remove(tmpPath)
return fmt.Errorf("replace gmail backup list state %s: %w", path, err)
}
return nil
}
func gmailBackupListStatePath(opts gmailBackupOptions) (string, bool) {
accountHash := strings.TrimSpace(opts.AccountHash)
if accountHash == "" {
return "", false
}
dir, err := os.UserCacheDir()
if err != nil || strings.TrimSpace(dir) == "" {
return "", false
}
key := struct {
Query string `json:"query,omitempty"`
Max int64 `json:"max,omitempty"`
IncludeSpamTrash bool `json:"includeSpamTrash"`
}{
Query: strings.TrimSpace(opts.Query),
Max: opts.Max,
IncludeSpamTrash: opts.IncludeSpamTrash,
}
data, err := json.Marshal(key)
if err != nil {
return "", false
}
sum := sha256.Sum256(data)
name := hex.EncodeToString(sum[:]) + ".json"
return filepath.Join(dir, "gogcli", "backup", "gmail", accountHash, "list-v1", name), true
}
func gmailBackupProgressf(ctx context.Context, format string, args ...any) {
u := ui.FromContext(ctx)
if u == nil {
return
}
u.Err().Printf(format, args...)
}
func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage, shardMaxRows int) ([]backup.PlainShard, error) {
if shardMaxRows <= 0 {
shardMaxRows = 1000

View File

@ -1,9 +1,11 @@
package cmd
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"os"
@ -16,6 +18,7 @@ import (
"google.golang.org/api/option"
"github.com/steipete/gogcli/internal/backup"
"github.com/steipete/gogcli/internal/ui"
)
func TestBackupAccountHashStableAndOpaque(t *testing.T) {
@ -174,6 +177,130 @@ func TestGmailBackupMessageCacheRejectsWrongID(t *testing.T) {
}
}
func TestListGmailBackupMessageIDsResumesFromCheckpoint(t *testing.T) {
t.Setenv("HOME", t.TempDir())
opts := gmailBackupOptions{
AccountHash: "accthash",
IncludeSpamTrash: true,
CacheMessages: true,
}
path, ok := gmailBackupListStatePath(opts)
if !ok {
t.Fatal("expected list state path")
}
if err := writeGmailBackupListState(path, opts, []string{"m1"}, "p2", false); err != nil {
t.Fatalf("writeGmailBackupListState: %v", err)
}
requests := 0
svc, cleanup := newGmailServiceForTest(t, func(w http.ResponseWriter, r *http.Request) {
requests++
if got := r.URL.Query().Get("pageToken"); got != "p2" {
t.Fatalf("pageToken = %q, want p2", got)
}
_ = json.NewEncoder(w).Encode(map[string]any{
"messages": []map[string]string{{"id": "m2"}},
})
})
defer cleanup()
var stderr bytes.Buffer
u, err := ui.New(ui.Options{Stdout: io.Discard, Stderr: &stderr, Color: "never"})
if err != nil {
t.Fatalf("ui.New: %v", err)
}
ids, err := listGmailBackupMessageIDs(ui.WithUI(context.Background(), u), svc, opts)
if err != nil {
t.Fatalf("listGmailBackupMessageIDs: %v", err)
}
if strings.Join(ids, ",") != "m1,m2" {
t.Fatalf("ids = %v, want [m1 m2]", ids)
}
if requests != 1 {
t.Fatalf("requests = %d, want 1", requests)
}
if !strings.Contains(stderr.String(), "resume=partial") || !strings.Contains(stderr.String(), "messages=2") {
t.Fatalf("stderr missing progress: %s", stderr.String())
}
state, ok, err := readGmailBackupListState(path)
if err != nil {
t.Fatalf("readGmailBackupListState: %v", err)
}
if !ok || !state.Complete || strings.Join(state.IDs, ",") != "m1,m2" {
t.Fatalf("state = %#v ok=%t", state, ok)
}
}
func TestListGmailBackupMessageIDsReusesCompleteCheckpoint(t *testing.T) {
t.Setenv("HOME", t.TempDir())
opts := gmailBackupOptions{
AccountHash: "accthash",
IncludeSpamTrash: true,
CacheMessages: true,
}
path, ok := gmailBackupListStatePath(opts)
if !ok {
t.Fatal("expected list state path")
}
if err := writeGmailBackupListState(path, opts, []string{"m1", "m2"}, "", true); err != nil {
t.Fatalf("writeGmailBackupListState: %v", err)
}
requests := 0
svc, cleanup := newGmailServiceForTest(t, func(w http.ResponseWriter, r *http.Request) {
requests++
http.NotFound(w, r)
})
defer cleanup()
ids, err := listGmailBackupMessageIDs(context.Background(), svc, opts)
if err != nil {
t.Fatalf("listGmailBackupMessageIDs: %v", err)
}
if strings.Join(ids, ",") != "m1,m2" {
t.Fatalf("ids = %v, want [m1 m2]", ids)
}
if requests != 0 {
t.Fatalf("requests = %d, want 0", requests)
}
}
func TestListGmailBackupMessageIDsMarksMaxLimitedRunComplete(t *testing.T) {
t.Setenv("HOME", t.TempDir())
opts := gmailBackupOptions{
AccountHash: "accthash",
Max: 1,
IncludeSpamTrash: true,
CacheMessages: true,
}
svc, cleanup := newGmailServiceForTest(t, func(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{
"messages": []map[string]string{{"id": "m1"}},
"nextPageToken": "p2",
})
})
defer cleanup()
ids, err := listGmailBackupMessageIDs(context.Background(), svc, opts)
if err != nil {
t.Fatalf("listGmailBackupMessageIDs: %v", err)
}
if strings.Join(ids, ",") != "m1" {
t.Fatalf("ids = %v, want [m1]", ids)
}
path, ok := gmailBackupListStatePath(opts)
if !ok {
t.Fatal("expected list state path")
}
state, ok, err := readGmailBackupListState(path)
if err != nil {
t.Fatalf("readGmailBackupListState: %v", err)
}
if !ok || !state.Complete || state.PageToken != "" {
t.Fatalf("state = %#v ok=%t", state, ok)
}
}
func TestFetchBackupDriveCollaborationCollectsMetadataAndErrors(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")