feat(backup): resume full gmail backups
This commit is contained in:
parent
eb6886633d
commit
ff5e85b159
@ -7,6 +7,7 @@
|
||||
- Backup: expand `gog backup push --services all` with Drive content export/download, Gmail settings, native Workspace Docs/Sheets/Slides/Form data, Apps Script projects, Chat, Classroom, best-effort optional service error shards, and plaintext Drive file export.
|
||||
- Backup: extend `--services all` with Drive permissions/comments/revisions, Calendar ACL/settings/colors, contact groups, Cloud Identity groups, Workspace Admin Directory users/groups/members, Keep notes, and local Gmail message caching for resumable full-mailbox fetches.
|
||||
- Backup: bound individual Drive content exports with `--drive-content-timeout` so one stuck Google export records an encrypted error row instead of blocking the full backup.
|
||||
- Backup: add Gmail message-list checkpoints and stderr progress counters so full-mailbox backups can resume cleanly after interruption.
|
||||
|
||||
### Fixed
|
||||
- Gmail: auto-fill draft reply subjects from the original message when `gmail drafts create --reply-to-message-id` omits `--subject`. (#488) — thanks @jbowerbir.
|
||||
|
||||
@ -753,9 +753,10 @@ metadata, permissions, comments, revisions, and exported Google-native file
|
||||
content by default. Non-Google binary Drive files are metadata-only unless
|
||||
`--drive-binary-contents` is set. `--drive-content-timeout` turns a stuck
|
||||
per-file export into an encrypted error row instead of wedging the run. Gmail
|
||||
raw-message fetches use a local cache by default so interrupted full-mailbox
|
||||
backups can resume; use
|
||||
`--gmail-refresh-cache` to force a refetch. Workspace inventories
|
||||
raw-message fetches and message-list pages use a local cache by default so
|
||||
interrupted full-mailbox backups can resume; progress is written to stderr
|
||||
while stdout stays parseable. Use `--gmail-refresh-cache` to force a refetch.
|
||||
Workspace inventories
|
||||
Docs/Sheets/Slides and backs up Forms/responses discovered through Drive; add
|
||||
`--workspace-native` for full native Docs/Sheets/Slides API JSON.
|
||||
Optional Workspace-only services use `--best-effort` by default, recording
|
||||
|
||||
@ -259,13 +259,15 @@ Raw message payloads stay base64url encoded inside encrypted JSONL. This
|
||||
preserves the RFC 2822 message content while keeping the shard format text
|
||||
friendly.
|
||||
|
||||
By default, each fetched raw message is also cached locally under the OS user
|
||||
cache directory (`gogcli/backup/gmail/<account-hash>/raw-v1/`). The cache stores
|
||||
the same raw message row that will be encrypted into shards and is keyed by a
|
||||
SHA-256 of the Gmail message ID, so rerunning after an interruption can reuse
|
||||
already fetched messages. `--gmail-refresh-cache` forces a refetch. The cache is
|
||||
plaintext local data; clear it if the machine should not retain local mail
|
||||
copies outside the encrypted backup/export locations.
|
||||
By default, Gmail backup state is cached locally under the OS user cache
|
||||
directory (`gogcli/backup/gmail/<account-hash>/`). Message-list page checkpoints
|
||||
live under `list-v1/`, and fetched raw messages live under `raw-v1/`. Raw-message
|
||||
cache files store the same row that will be encrypted into shards and are keyed
|
||||
by a SHA-256 of the Gmail message ID, so rerunning after an interruption can
|
||||
reuse already fetched messages. Long Gmail runs report list/fetch counters to
|
||||
stderr while stdout stays parseable. `--gmail-refresh-cache` forces a refetch.
|
||||
The cache is plaintext local data; clear it if the machine should not retain
|
||||
local mail copies outside the encrypted backup/export locations.
|
||||
|
||||
`--include-spam-trash` defaults to true. Use `--query` and `--max` for bounded
|
||||
test exports; omit them for a full mailbox scan.
|
||||
|
||||
@ -16,6 +16,7 @@ import (
|
||||
"google.golang.org/api/gmail/v1"
|
||||
|
||||
"github.com/steipete/gogcli/internal/backup"
|
||||
"github.com/steipete/gogcli/internal/ui"
|
||||
)
|
||||
|
||||
type gmailBackupOptions struct {
|
||||
@ -50,6 +51,18 @@ type gmailBackupLabel struct {
|
||||
ThreadsUnread int64 `json:"threadsUnread,omitempty"`
|
||||
}
|
||||
|
||||
type gmailBackupListState struct {
|
||||
Version int `json:"version"`
|
||||
AccountHash string `json:"accountHash"`
|
||||
Query string `json:"query,omitempty"`
|
||||
Max int64 `json:"max,omitempty"`
|
||||
IncludeSpamTrash bool `json:"includeSpamTrash"`
|
||||
PageToken string `json:"pageToken,omitempty"`
|
||||
IDs []string `json:"ids"`
|
||||
Complete bool `json:"complete"`
|
||||
Updated time.Time `json:"updated"`
|
||||
}
|
||||
|
||||
func buildGmailBackupSnapshot(ctx context.Context, flags *RootFlags, opts gmailBackupOptions) (backup.Snapshot, error) {
|
||||
if opts.ShardMaxRows <= 0 {
|
||||
opts.ShardMaxRows = 1000
|
||||
@ -125,11 +138,13 @@ func fetchGmailBackupMessages(ctx context.Context, svc *gmail.Service, opts gmai
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
gmailBackupProgressf(ctx, "backup gmail fetch\tqueued=%d", len(ids))
|
||||
const maxConcurrency = 8
|
||||
sem := make(chan struct{}, maxConcurrency)
|
||||
type result struct {
|
||||
index int
|
||||
msg gmailBackupMessage
|
||||
cache bool
|
||||
err error
|
||||
}
|
||||
results := make(chan result, len(ids))
|
||||
@ -152,7 +167,7 @@ func fetchGmailBackupMessages(ctx context.Context, svc *gmail.Service, opts gmai
|
||||
return
|
||||
}
|
||||
if ok {
|
||||
results <- result{index: index, msg: msg}
|
||||
results <- result{index: index, msg: msg, cache: true}
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -193,11 +208,23 @@ func fetchGmailBackupMessages(ctx context.Context, svc *gmail.Service, opts gmai
|
||||
}()
|
||||
ordered := make([]gmailBackupMessage, len(ids))
|
||||
var firstErr error
|
||||
done := 0
|
||||
cacheHits := 0
|
||||
fetched := 0
|
||||
for res := range results {
|
||||
if res.err != nil && firstErr == nil {
|
||||
firstErr = res.err
|
||||
}
|
||||
ordered[res.index] = res.msg
|
||||
done++
|
||||
if res.cache {
|
||||
cacheHits++
|
||||
} else if res.err == nil {
|
||||
fetched++
|
||||
}
|
||||
if done == len(ids) || done%100 == 0 {
|
||||
gmailBackupProgressf(ctx, "backup gmail fetch\t%d/%d\tfetched=%d\tcache=%d", done, len(ids), fetched, cacheHits)
|
||||
}
|
||||
}
|
||||
if firstErr != nil {
|
||||
return nil, firstErr
|
||||
@ -288,6 +315,23 @@ func gmailBackupMessageCachePath(accountHash, messageID string) (string, bool) {
|
||||
func listGmailBackupMessageIDs(ctx context.Context, svc *gmail.Service, opts gmailBackupOptions) ([]string, error) {
|
||||
var ids []string
|
||||
pageToken := ""
|
||||
statePath, hasStatePath := gmailBackupListStatePath(opts)
|
||||
if opts.CacheMessages && !opts.RefreshCache && hasStatePath {
|
||||
state, ok, err := readGmailBackupListState(statePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ok {
|
||||
if state.Complete {
|
||||
gmailBackupProgressf(ctx, "backup gmail list\tresume=complete\tmessages=%d", len(state.IDs))
|
||||
return append([]string(nil), state.IDs...), nil
|
||||
}
|
||||
ids = append(ids, state.IDs...)
|
||||
pageToken = state.PageToken
|
||||
gmailBackupProgressf(ctx, "backup gmail list\tresume=partial\tmessages=%d", len(ids))
|
||||
}
|
||||
}
|
||||
gmailBackupProgressf(ctx, "backup gmail list\tstart\tmessages=%d", len(ids))
|
||||
for {
|
||||
maxResults := int64(500)
|
||||
if opts.Max > 0 {
|
||||
@ -319,14 +363,123 @@ func listGmailBackupMessageIDs(ctx context.Context, svc *gmail.Service, opts gma
|
||||
ids = append(ids, message.Id)
|
||||
}
|
||||
}
|
||||
if resp.NextPageToken == "" {
|
||||
gmailBackupProgressf(ctx, "backup gmail list\tmessages=%d", len(ids))
|
||||
complete := resp.NextPageToken == "" || (opts.Max > 0 && int64(len(ids)) >= opts.Max)
|
||||
if complete {
|
||||
if opts.CacheMessages && hasStatePath {
|
||||
if err := writeGmailBackupListState(statePath, opts, ids, "", true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
pageToken = resp.NextPageToken
|
||||
if opts.CacheMessages && hasStatePath {
|
||||
if err := writeGmailBackupListState(statePath, opts, ids, pageToken, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
func readGmailBackupListState(path string) (gmailBackupListState, bool, error) {
|
||||
data, err := os.ReadFile(path) //nolint:gosec // path is derived from the OS cache dir and query hash.
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return gmailBackupListState{}, false, nil
|
||||
}
|
||||
return gmailBackupListState{}, false, fmt.Errorf("read gmail backup list state %s: %w", path, err)
|
||||
}
|
||||
var state gmailBackupListState
|
||||
if err := json.Unmarshal(data, &state); err != nil {
|
||||
return gmailBackupListState{}, false, fmt.Errorf("decode gmail backup list state %s: %w", path, err)
|
||||
}
|
||||
if state.Version != 1 {
|
||||
return gmailBackupListState{}, false, nil
|
||||
}
|
||||
return state, true, nil
|
||||
}
|
||||
|
||||
func writeGmailBackupListState(path string, opts gmailBackupOptions, ids []string, pageToken string, complete bool) error {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return fmt.Errorf("create gmail backup list state dir: %w", err)
|
||||
}
|
||||
state := gmailBackupListState{
|
||||
Version: 1,
|
||||
AccountHash: opts.AccountHash,
|
||||
Query: strings.TrimSpace(opts.Query),
|
||||
Max: opts.Max,
|
||||
IncludeSpamTrash: opts.IncludeSpamTrash,
|
||||
PageToken: pageToken,
|
||||
IDs: append([]string(nil), ids...),
|
||||
Complete: complete,
|
||||
Updated: time.Now().UTC(),
|
||||
}
|
||||
data, err := json.Marshal(state)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encode gmail backup list state: %w", err)
|
||||
}
|
||||
tmp, err := os.CreateTemp(filepath.Dir(path), ".list-*.json")
|
||||
if err != nil {
|
||||
return fmt.Errorf("create gmail backup list state temp: %w", err)
|
||||
}
|
||||
tmpPath := tmp.Name()
|
||||
if _, err := tmp.Write(data); err != nil {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmpPath)
|
||||
return fmt.Errorf("write gmail backup list state temp: %w", err)
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
_ = os.Remove(tmpPath)
|
||||
return fmt.Errorf("close gmail backup list state temp: %w", err)
|
||||
}
|
||||
if err := os.Chmod(tmpPath, 0o600); err != nil {
|
||||
_ = os.Remove(tmpPath)
|
||||
return fmt.Errorf("chmod gmail backup list state temp: %w", err)
|
||||
}
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
_ = os.Remove(tmpPath)
|
||||
return fmt.Errorf("replace gmail backup list state %s: %w", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func gmailBackupListStatePath(opts gmailBackupOptions) (string, bool) {
|
||||
accountHash := strings.TrimSpace(opts.AccountHash)
|
||||
if accountHash == "" {
|
||||
return "", false
|
||||
}
|
||||
dir, err := os.UserCacheDir()
|
||||
if err != nil || strings.TrimSpace(dir) == "" {
|
||||
return "", false
|
||||
}
|
||||
key := struct {
|
||||
Query string `json:"query,omitempty"`
|
||||
Max int64 `json:"max,omitempty"`
|
||||
IncludeSpamTrash bool `json:"includeSpamTrash"`
|
||||
}{
|
||||
Query: strings.TrimSpace(opts.Query),
|
||||
Max: opts.Max,
|
||||
IncludeSpamTrash: opts.IncludeSpamTrash,
|
||||
}
|
||||
data, err := json.Marshal(key)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
sum := sha256.Sum256(data)
|
||||
name := hex.EncodeToString(sum[:]) + ".json"
|
||||
return filepath.Join(dir, "gogcli", "backup", "gmail", accountHash, "list-v1", name), true
|
||||
}
|
||||
|
||||
func gmailBackupProgressf(ctx context.Context, format string, args ...any) {
|
||||
u := ui.FromContext(ctx)
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
u.Err().Printf(format, args...)
|
||||
}
|
||||
|
||||
func buildGmailMessageShards(accountHash string, messages []gmailBackupMessage, shardMaxRows int) ([]backup.PlainShard, error) {
|
||||
if shardMaxRows <= 0 {
|
||||
shardMaxRows = 1000
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
@ -16,6 +18,7 @@ import (
|
||||
"google.golang.org/api/option"
|
||||
|
||||
"github.com/steipete/gogcli/internal/backup"
|
||||
"github.com/steipete/gogcli/internal/ui"
|
||||
)
|
||||
|
||||
func TestBackupAccountHashStableAndOpaque(t *testing.T) {
|
||||
@ -174,6 +177,130 @@ func TestGmailBackupMessageCacheRejectsWrongID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestListGmailBackupMessageIDsResumesFromCheckpoint(t *testing.T) {
|
||||
t.Setenv("HOME", t.TempDir())
|
||||
opts := gmailBackupOptions{
|
||||
AccountHash: "accthash",
|
||||
IncludeSpamTrash: true,
|
||||
CacheMessages: true,
|
||||
}
|
||||
path, ok := gmailBackupListStatePath(opts)
|
||||
if !ok {
|
||||
t.Fatal("expected list state path")
|
||||
}
|
||||
if err := writeGmailBackupListState(path, opts, []string{"m1"}, "p2", false); err != nil {
|
||||
t.Fatalf("writeGmailBackupListState: %v", err)
|
||||
}
|
||||
|
||||
requests := 0
|
||||
svc, cleanup := newGmailServiceForTest(t, func(w http.ResponseWriter, r *http.Request) {
|
||||
requests++
|
||||
if got := r.URL.Query().Get("pageToken"); got != "p2" {
|
||||
t.Fatalf("pageToken = %q, want p2", got)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"messages": []map[string]string{{"id": "m2"}},
|
||||
})
|
||||
})
|
||||
defer cleanup()
|
||||
|
||||
var stderr bytes.Buffer
|
||||
u, err := ui.New(ui.Options{Stdout: io.Discard, Stderr: &stderr, Color: "never"})
|
||||
if err != nil {
|
||||
t.Fatalf("ui.New: %v", err)
|
||||
}
|
||||
ids, err := listGmailBackupMessageIDs(ui.WithUI(context.Background(), u), svc, opts)
|
||||
if err != nil {
|
||||
t.Fatalf("listGmailBackupMessageIDs: %v", err)
|
||||
}
|
||||
if strings.Join(ids, ",") != "m1,m2" {
|
||||
t.Fatalf("ids = %v, want [m1 m2]", ids)
|
||||
}
|
||||
if requests != 1 {
|
||||
t.Fatalf("requests = %d, want 1", requests)
|
||||
}
|
||||
if !strings.Contains(stderr.String(), "resume=partial") || !strings.Contains(stderr.String(), "messages=2") {
|
||||
t.Fatalf("stderr missing progress: %s", stderr.String())
|
||||
}
|
||||
state, ok, err := readGmailBackupListState(path)
|
||||
if err != nil {
|
||||
t.Fatalf("readGmailBackupListState: %v", err)
|
||||
}
|
||||
if !ok || !state.Complete || strings.Join(state.IDs, ",") != "m1,m2" {
|
||||
t.Fatalf("state = %#v ok=%t", state, ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListGmailBackupMessageIDsReusesCompleteCheckpoint(t *testing.T) {
|
||||
t.Setenv("HOME", t.TempDir())
|
||||
opts := gmailBackupOptions{
|
||||
AccountHash: "accthash",
|
||||
IncludeSpamTrash: true,
|
||||
CacheMessages: true,
|
||||
}
|
||||
path, ok := gmailBackupListStatePath(opts)
|
||||
if !ok {
|
||||
t.Fatal("expected list state path")
|
||||
}
|
||||
if err := writeGmailBackupListState(path, opts, []string{"m1", "m2"}, "", true); err != nil {
|
||||
t.Fatalf("writeGmailBackupListState: %v", err)
|
||||
}
|
||||
|
||||
requests := 0
|
||||
svc, cleanup := newGmailServiceForTest(t, func(w http.ResponseWriter, r *http.Request) {
|
||||
requests++
|
||||
http.NotFound(w, r)
|
||||
})
|
||||
defer cleanup()
|
||||
|
||||
ids, err := listGmailBackupMessageIDs(context.Background(), svc, opts)
|
||||
if err != nil {
|
||||
t.Fatalf("listGmailBackupMessageIDs: %v", err)
|
||||
}
|
||||
if strings.Join(ids, ",") != "m1,m2" {
|
||||
t.Fatalf("ids = %v, want [m1 m2]", ids)
|
||||
}
|
||||
if requests != 0 {
|
||||
t.Fatalf("requests = %d, want 0", requests)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListGmailBackupMessageIDsMarksMaxLimitedRunComplete(t *testing.T) {
|
||||
t.Setenv("HOME", t.TempDir())
|
||||
opts := gmailBackupOptions{
|
||||
AccountHash: "accthash",
|
||||
Max: 1,
|
||||
IncludeSpamTrash: true,
|
||||
CacheMessages: true,
|
||||
}
|
||||
svc, cleanup := newGmailServiceForTest(t, func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"messages": []map[string]string{{"id": "m1"}},
|
||||
"nextPageToken": "p2",
|
||||
})
|
||||
})
|
||||
defer cleanup()
|
||||
|
||||
ids, err := listGmailBackupMessageIDs(context.Background(), svc, opts)
|
||||
if err != nil {
|
||||
t.Fatalf("listGmailBackupMessageIDs: %v", err)
|
||||
}
|
||||
if strings.Join(ids, ",") != "m1" {
|
||||
t.Fatalf("ids = %v, want [m1]", ids)
|
||||
}
|
||||
path, ok := gmailBackupListStatePath(opts)
|
||||
if !ok {
|
||||
t.Fatal("expected list state path")
|
||||
}
|
||||
state, ok, err := readGmailBackupListState(path)
|
||||
if err != nil {
|
||||
t.Fatalf("readGmailBackupListState: %v", err)
|
||||
}
|
||||
if !ok || !state.Complete || state.PageToken != "" {
|
||||
t.Fatalf("state = %#v ok=%t", state, ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchBackupDriveCollaborationCollectsMetadataAndErrors(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user