gogcli/internal/cmd/gmail_thread.go
Alex Hillman d08771e66d
feat(gmail): add --body-format flag to messages search
Supports "text" (default, existing behavior) and "html" to prefer
the HTML MIME part over plaintext. Useful for newsletter ingestion
where the HTML body contains the rich formatted content.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
(cherry picked from commit cb5499ed7b4d20a863de1aef07c58c94ecd9a9fe)
2026-04-27 22:38:57 +01:00

721 lines
19 KiB
Go

package cmd
import (
"bytes"
"context"
"encoding/base64"
"errors"
"fmt"
"io"
"mime"
"mime/quotedprintable"
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"unicode/utf8"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding/ianaindex"
"google.golang.org/api/gmail/v1"
"github.com/steipete/gogcli/internal/config"
"github.com/steipete/gogcli/internal/outfmt"
"github.com/steipete/gogcli/internal/ui"
)
// HTML stripping patterns for cleaner text output.
var (
// Remove script blocks entirely (including content)
scriptPattern = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
// Remove style blocks entirely (including content)
stylePattern = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
// Remove all HTML tags
htmlTagPattern = regexp.MustCompile(`<[^>]*>`)
// Collapse multiple whitespace/newlines
whitespacePattern = regexp.MustCompile(`\s+`)
)
func stripHTMLTags(s string) string {
// First remove script and style blocks entirely
s = scriptPattern.ReplaceAllString(s, "")
s = stylePattern.ReplaceAllString(s, "")
// Then remove remaining HTML tags
s = htmlTagPattern.ReplaceAllString(s, " ")
// Collapse whitespace
s = whitespacePattern.ReplaceAllString(s, " ")
return strings.TrimSpace(s)
}
type GmailThreadCmd struct {
Get GmailThreadGetCmd `cmd:"" name:"get" aliases:"info,show" default:"withargs" help:"Get a thread with all messages (optionally download attachments)"`
Modify GmailThreadModifyCmd `cmd:"" name:"modify" aliases:"update,edit,set" help:"Modify labels on all messages in a thread"`
Attachments GmailThreadAttachmentsCmd `cmd:"" name:"attachments" aliases:"files" help:"List all attachments in a thread"`
}
type GmailThreadGetCmd struct {
ThreadID string `arg:"" name:"threadId" help:"Thread ID"`
Download bool `name:"download" help:"Download attachments"`
Full bool `name:"full" help:"Show full message bodies"`
OutputDir OutputDirFlag `embed:""`
}
func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
u := ui.FromContext(ctx)
account, err := requireAccount(flags)
if err != nil {
return err
}
threadID := strings.TrimSpace(c.ThreadID)
threadID = normalizeGmailThreadID(threadID)
if threadID == "" {
return usage("empty threadId")
}
svc, err := newGmailService(ctx, account)
if err != nil {
return err
}
thread, err := svc.Users.Threads.Get("me", threadID).Format("full").Context(ctx).Do()
if err != nil {
return err
}
var attachDir string
if c.Download {
if strings.TrimSpace(c.OutputDir.Dir) == "" {
// Default: current directory, not gogcli config dir.
attachDir = "."
} else {
expanded, err := config.ExpandPath(c.OutputDir.Dir)
if err != nil {
return err
}
attachDir = filepath.Clean(expanded)
}
}
if outfmt.IsJSON(ctx) {
var downloadedFiles []attachmentDownloadSummary
if c.Download && thread != nil {
for _, msg := range thread.Messages {
if msg == nil || msg.Id == "" {
continue
}
downloads, err := downloadAttachmentOutputs(ctx, svc, msg.Id, collectAttachments(msg.Payload), attachDir)
if err != nil {
return err
}
downloadedFiles = append(downloadedFiles, attachmentDownloadSummaries(downloads)...)
}
}
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
"thread": thread,
"downloaded": downloadedFiles,
})
}
if thread == nil || len(thread.Messages) == 0 {
u.Err().Println("Empty thread")
return nil
}
// Show message count upfront so users know how many messages to expect
u.Out().Printf("Thread contains %d message(s)", len(thread.Messages))
u.Out().Println("")
for i, msg := range thread.Messages {
if msg == nil {
continue
}
u.Out().Printf("=== Message %d/%d: %s ===", i+1, len(thread.Messages), msg.Id)
u.Out().Printf("From: %s", headerValue(msg.Payload, "From"))
u.Out().Printf("To: %s", headerValue(msg.Payload, "To"))
u.Out().Printf("Subject: %s", headerValue(msg.Payload, "Subject"))
u.Out().Printf("Date: %s", headerValue(msg.Payload, "Date"))
u.Out().Println("")
body, isHTML := bestBodyForDisplay(msg.Payload)
if body != "" {
cleanBody := body
if isHTML {
// Strip HTML tags for cleaner text output
cleanBody = stripHTMLTags(body)
}
// Limit body preview to avoid overwhelming output
// Use runes to avoid breaking multi-byte UTF-8 characters
runes := []rune(cleanBody)
if len(runes) > 500 && !c.Full {
cleanBody = string(runes[:500]) + "... [truncated]"
}
u.Out().Println(cleanBody)
u.Out().Println("")
}
attachments := collectAttachments(msg.Payload)
printAttachmentSection(u.Out(), attachments)
if c.Download && len(attachments) > 0 {
downloads, err := downloadAttachmentOutputs(ctx, svc, msg.Id, attachments, attachDir)
if err != nil {
return err
}
for _, a := range downloads {
if a.Cached {
u.Out().Printf("Cached: %s", a.Path)
} else {
u.Out().Successf("Saved: %s", a.Path)
}
}
u.Out().Println("")
}
}
return nil
}
type GmailThreadModifyCmd struct {
ThreadID string `arg:"" name:"threadId" help:"Thread ID"`
Add string `name:"add" help:"Labels to add (comma-separated, name or ID)"`
Remove string `name:"remove" help:"Labels to remove (comma-separated, name or ID)"`
}
func (c *GmailThreadModifyCmd) Run(ctx context.Context, flags *RootFlags) error {
u := ui.FromContext(ctx)
threadID := strings.TrimSpace(c.ThreadID)
threadID = normalizeGmailThreadID(threadID)
if threadID == "" {
return usage("empty threadId")
}
addLabels := splitCSV(c.Add)
removeLabels := splitCSV(c.Remove)
if len(addLabels) == 0 && len(removeLabels) == 0 {
return usage("must specify --add and/or --remove")
}
if err := dryRunExit(ctx, flags, "gmail.thread.modify", map[string]any{
"thread_id": threadID,
"add": addLabels,
"remove": removeLabels,
}); err != nil {
return err
}
account, err := requireAccount(flags)
if err != nil {
return err
}
svc, err := newGmailService(ctx, account)
if err != nil {
return err
}
addIDs, removeIDs, err := resolveModifyLabelIDs(svc, addLabels, removeLabels)
if err != nil {
return err
}
// Use Gmail's Threads.Modify API
_, err = svc.Users.Threads.Modify("me", threadID, &gmail.ModifyThreadRequest{
AddLabelIds: addIDs,
RemoveLabelIds: removeIDs,
}).Context(ctx).Do()
if err != nil {
return err
}
if outfmt.IsJSON(ctx) {
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
"modified": threadID,
"addedLabels": addIDs,
"removedLabels": removeIDs,
})
}
u.Out().Printf("Modified thread %s", threadID)
return nil
}
// GmailThreadAttachmentsCmd lists all attachments in a thread.
type GmailThreadAttachmentsCmd struct {
ThreadID string `arg:"" name:"threadId" help:"Thread ID"`
Download bool `name:"download" help:"Download all attachments"`
OutputDir OutputDirFlag `embed:""`
}
func (c *GmailThreadAttachmentsCmd) Run(ctx context.Context, flags *RootFlags) error {
u := ui.FromContext(ctx)
account, err := requireAccount(flags)
if err != nil {
return err
}
threadID := strings.TrimSpace(c.ThreadID)
threadID = normalizeGmailThreadID(threadID)
if threadID == "" {
return usage("empty threadId")
}
svc, err := newGmailService(ctx, account)
if err != nil {
return err
}
thread, err := svc.Users.Threads.Get("me", threadID).Format("full").Context(ctx).Do()
if err != nil {
return err
}
if thread == nil || len(thread.Messages) == 0 {
if outfmt.IsJSON(ctx) {
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
"threadId": threadID,
"attachments": []any{},
})
}
u.Err().Println("Empty thread")
return nil
}
var attachDir string
if c.Download {
if strings.TrimSpace(c.OutputDir.Dir) == "" {
attachDir = "."
} else {
expanded, err := config.ExpandPath(c.OutputDir.Dir)
if err != nil {
return err
}
attachDir = filepath.Clean(expanded)
}
}
var allAttachments []attachmentDownloadOutput
for _, msg := range thread.Messages {
if msg == nil {
continue
}
attachments := collectAttachments(msg.Payload)
if c.Download {
downloads, err := downloadAttachmentOutputs(ctx, svc, msg.Id, attachments, attachDir)
if err != nil {
return err
}
allAttachments = append(allAttachments, downloads...)
continue
}
allAttachments = append(allAttachments, attachmentDownloadOutputsFromInfo(msg.Id, attachments)...)
}
if outfmt.IsJSON(ctx) {
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
"threadId": threadID,
"attachments": allAttachments,
})
}
if len(allAttachments) == 0 {
u.Out().Println("No attachments found")
return nil
}
u.Out().Printf("Found %d attachment(s):", len(allAttachments))
if c.Download {
for _, a := range allAttachments {
status := "Saved"
if a.Cached {
status = "Cached"
}
u.Out().Printf(" %s: %s (%s) - %s", status, a.Filename, a.SizeHuman, a.Path)
}
return nil
}
printAttachmentLines(u.Out(), attachmentOutputsFromDownloads(allAttachments))
return nil
}
type GmailURLCmd struct {
ThreadIDs []string `arg:"" name:"threadId" help:"Thread IDs"`
}
func (c *GmailURLCmd) Run(ctx context.Context, flags *RootFlags) error {
u := ui.FromContext(ctx)
account, err := requireAccount(flags)
if err != nil {
return err
}
if outfmt.IsJSON(ctx) {
urls := make([]map[string]string, 0, len(c.ThreadIDs))
for _, id := range c.ThreadIDs {
id = normalizeGmailThreadID(id)
urls = append(urls, map[string]string{
"id": id,
"url": fmt.Sprintf("https://mail.google.com/mail/?authuser=%s#all/%s", url.QueryEscape(account), id),
})
}
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{"urls": urls})
}
for _, id := range c.ThreadIDs {
id = normalizeGmailThreadID(id)
threadURL := fmt.Sprintf("https://mail.google.com/mail/?authuser=%s#all/%s", url.QueryEscape(account), id)
u.Out().Printf("%s\t%s", id, threadURL)
}
return nil
}
func bestBodyText(p *gmail.MessagePart) string {
if p == nil {
return ""
}
plain := findPartBody(p, "text/plain")
if plain != "" {
return plain
}
html := findPartBody(p, "text/html")
return html
}
func bestBodyHTML(p *gmail.MessagePart) string {
if p == nil {
return ""
}
html := findPartBody(p, "text/html")
if html != "" {
return html
}
plain := findPartBody(p, "text/plain")
return plain
}
func bestBodyForDisplay(p *gmail.MessagePart) (string, bool) {
if p == nil {
return "", false
}
plain := findPartBody(p, "text/plain")
if plain != "" {
if looksLikeHTML(plain) {
return plain, true
}
return plain, false
}
html := findPartBody(p, "text/html")
if html == "" {
return "", false
}
return html, true
}
func findPartBody(p *gmail.MessagePart, mimeType string) string {
if p == nil {
return ""
}
if mimeTypeMatches(p.MimeType, mimeType) && p.Body != nil && p.Body.Data != "" {
s, err := decodePartBody(p)
if err == nil {
return s
}
}
for _, part := range p.Parts {
if s := findPartBody(part, mimeType); s != "" {
return s
}
}
return ""
}
func mimeTypeMatches(partType string, want string) bool {
return normalizeMimeType(partType) == normalizeMimeType(want)
}
func normalizeMimeType(value string) string {
value = strings.TrimSpace(strings.ToLower(value))
if value == "" {
return ""
}
mediaType, _, err := mime.ParseMediaType(value)
if err == nil && mediaType != "" {
return strings.ToLower(mediaType)
}
if idx := strings.Index(value, ";"); idx != -1 {
return strings.TrimSpace(value[:idx])
}
return value
}
func looksLikeHTML(value string) bool {
trimmed := strings.TrimSpace(strings.ToLower(value))
if trimmed == "" {
return false
}
return strings.HasPrefix(trimmed, "<!doctype") ||
strings.HasPrefix(trimmed, "<html") ||
strings.HasPrefix(trimmed, "<head") ||
strings.HasPrefix(trimmed, "<body") ||
strings.HasPrefix(trimmed, "<meta") ||
strings.Contains(trimmed, "<html")
}
func decodePartBody(p *gmail.MessagePart) (string, error) {
if p == nil || p.Body == nil || p.Body.Data == "" {
return "", nil
}
raw, err := decodeBase64URLBytes(p.Body.Data)
if err != nil {
return "", err
}
decoded := raw
if cte := strings.TrimSpace(headerValue(p, "Content-Transfer-Encoding")); cte != "" {
decoded = decodeTransferEncoding(decoded, cte)
}
contentType := strings.TrimSpace(headerValue(p, "Content-Type"))
if contentType == "" {
contentType = strings.TrimSpace(p.MimeType)
}
if contentType != "" {
decoded = decodeBodyCharset(decoded, contentType)
}
return string(decoded), nil
}
func decodeTransferEncoding(data []byte, encoding string) []byte {
switch strings.ToLower(strings.TrimSpace(encoding)) {
case "base64":
if !looksLikeBase64(data) {
return data
}
if decoded, err := decodeAnyBase64(data); err == nil {
return decoded
}
case "quoted-printable":
if !looksLikeQuotedPrintable(data) {
return data
}
if decoded, err := io.ReadAll(quotedprintable.NewReader(bytes.NewReader(data))); err == nil {
return decoded
}
}
return data
}
func decodeBodyCharset(data []byte, contentType string) []byte {
charsetLabel := charsetLabelFromContentType(contentType)
normalized := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(charsetLabel), "_", "-"))
if charsetLabel == "" || normalized == "utf-8" || normalized == "utf8" {
return data
}
// The Gmail API may normalize body.data to UTF-8 before base64url-encoding,
// while preserving the original MIME charset header. If bytes are already
// valid UTF-8, avoid re-decoding them as the stale charset. ISO-2022 payloads
// are the main exception: encoded Japanese text is ASCII-valid but contains
// ESC shift sequences that still need charset decoding.
if utf8.Valid(data) && (!strings.HasPrefix(normalized, "iso-2022-") || !bytes.ContainsRune(data, '\x1b')) {
return data
}
if decoded, ok := decodeWithCharsetLabel(data, charsetLabel); ok {
return decoded
}
return data
}
func charsetLabelFromContentType(contentType string) string {
_, params, err := mime.ParseMediaType(contentType)
if err == nil {
if label := strings.TrimSpace(params["charset"]); label != "" {
return label
}
}
lower := strings.ToLower(contentType)
idx := strings.Index(lower, "charset=")
if idx == -1 {
return ""
}
label := contentType[idx+len("charset="):]
label = strings.TrimLeft(label, " \t")
if cut := strings.IndexAny(label, "; \t"); cut != -1 {
label = label[:cut]
}
return strings.Trim(label, "\"'")
}
func decodeWithCharsetLabel(data []byte, charsetLabel string) ([]byte, bool) {
label := strings.TrimSpace(charsetLabel)
if label == "" {
return nil, false
}
if decoded, ok := decodeWithEncodingIndex(data, label); ok {
return decoded, true
}
if strings.Contains(label, "_") {
alt := strings.ReplaceAll(label, "_", "-")
if decoded, ok := decodeWithEncodingIndex(data, alt); ok {
return decoded, true
}
}
return nil, false
}
func decodeWithEncodingIndex(data []byte, charsetLabel string) ([]byte, bool) {
if enc, err := ianaindex.MIME.Encoding(charsetLabel); err == nil && enc != nil {
if decoded, err := enc.NewDecoder().Bytes(data); err == nil {
return decoded, true
}
}
reader, err := charset.NewReaderLabel(charsetLabel, bytes.NewReader(data))
if err != nil {
return nil, false
}
decoded, err := io.ReadAll(reader)
if err != nil {
return nil, false
}
return decoded, true
}
func looksLikeBase64(data []byte) bool {
trimmed := bytes.TrimSpace(data)
if len(trimmed) == 0 {
return false
}
for _, b := range trimmed {
switch {
case b >= 'A' && b <= 'Z':
case b >= 'a' && b <= 'z':
case b >= '0' && b <= '9':
case b == '+', b == '/', b == '=', b == '-', b == '_':
case b == '\n', b == '\r', b == '\t', b == ' ':
default:
return false
}
}
return true
}
// looksLikeQuotedPrintable checks if data appears to contain quoted-printable
// encoded sequences. This prevents double-decoding when the Gmail API has
// already decoded the content.
//
// Detection strategy is intentionally conservative to avoid URL corruption:
// 1. Soft line breaks (=\r\n or =\n)
// 2. Escaped equals (=3D / =3d)
// 3. Chained hex escapes (=XX=YY...), common in UTF-8 quoted-printable text
func looksLikeQuotedPrintable(data []byte) bool {
for i := 0; i < len(data)-2; i++ {
if data[i] != '=' {
continue
}
// Soft line break (="\r\n" or "\n") is a definitive QP marker.
if data[i+1] == '\r' || data[i+1] == '\n' {
return true
}
if !isHexDigit(data[i+1]) || !isHexDigit(data[i+2]) {
continue
}
// =3D (case-insensitive) encodes literal '=' and is a strong marker.
if isHexPair(data[i+1], data[i+2], '3', 'D') {
return true
}
// Chained escapes like =E2=82=AC are common in real QP bodies.
if i+3 < len(data) && data[i+3] == '=' {
return true
}
}
return false
}
func isHexDigit(b byte) bool {
return (b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')
}
func isHexPair(a, b, hi, lo byte) bool {
return equalFoldHexNibble(a, hi) && equalFoldHexNibble(b, lo)
}
func equalFoldHexNibble(a, b byte) bool {
if a == b {
return true
}
if b >= 'A' && b <= 'F' {
return a == b+('a'-'A')
}
return false
}
func decodeAnyBase64(data []byte) ([]byte, error) {
cleaned := stripBase64Whitespace(data)
str := string(cleaned)
if decoded, err := base64.StdEncoding.DecodeString(str); err == nil {
return decoded, nil
}
if decoded, err := base64.RawStdEncoding.DecodeString(str); err == nil {
return decoded, nil
}
if decoded, err := base64.URLEncoding.DecodeString(str); err == nil {
return decoded, nil
}
return base64.RawURLEncoding.DecodeString(str)
}
func stripBase64Whitespace(data []byte) []byte {
out := make([]byte, 0, len(data))
for _, b := range data {
switch b {
case '\n', '\r', '\t', ' ':
continue
default:
out = append(out, b)
}
}
return out
}
func decodeBase64URLBytes(s string) ([]byte, error) {
if b, err := base64.RawURLEncoding.DecodeString(s); err == nil {
return b, nil
}
if b, err := base64.URLEncoding.DecodeString(s); err == nil {
return b, nil
}
if b, err := base64.RawStdEncoding.DecodeString(s); err == nil {
return b, nil
}
return base64.StdEncoding.DecodeString(s)
}
func decodeBase64URL(s string) (string, error) {
b, err := decodeBase64URLBytes(s)
if err != nil {
return "", err
}
return string(b), nil
}
func downloadAttachment(ctx context.Context, svc *gmail.Service, messageID string, a attachmentInfo, dir string) (string, bool, error) {
if strings.TrimSpace(messageID) == "" || strings.TrimSpace(a.AttachmentID) == "" {
return "", false, errors.New("missing messageID/attachmentID")
}
if strings.TrimSpace(dir) == "" {
dir = "."
}
shortID := a.AttachmentID
if len(shortID) > 8 {
shortID = shortID[:8]
}
// Sanitize filename to prevent path traversal attacks
safeFilename := filepath.Base(a.Filename)
if safeFilename == "" || safeFilename == "." || safeFilename == ".." {
safeFilename = "attachment"
}
filename := fmt.Sprintf("%s_%s_%s", messageID, shortID, safeFilename)
outPath := filepath.Join(dir, filename)
path, cached, _, err := downloadAttachmentToPath(ctx, svc, messageID, a.AttachmentID, outPath, a.Size)
if err != nil {
return "", false, err
}
return path, cached, nil
}