gogcli/internal/cmd/gmail_thread_helpers_test.go
2026-03-07 17:51:02 +00:00

387 lines
12 KiB
Go

package cmd
import (
"context"
"encoding/base64"
"os"
"path/filepath"
"testing"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/simplifiedchinese"
"google.golang.org/api/gmail/v1"
)
func TestStripHTMLTags_More(t *testing.T) {
input := "<div>Hello <b>World</b><script>bad()</script><style>.x{}</style></div>"
out := stripHTMLTags(input)
if out != "Hello World" {
t.Fatalf("unexpected stripped output: %q", out)
}
}
func TestFormatBytes(t *testing.T) {
if got := formatBytes(500); got != "500 B" {
t.Fatalf("unexpected bytes format: %q", got)
}
if got := formatBytes(2048); got != "2.0 KB" {
t.Fatalf("unexpected KB format: %q", got)
}
if got := formatBytes(5 * 1024 * 1024); got != "5.0 MB" {
t.Fatalf("unexpected MB format: %q", got)
}
if got := formatBytes(3 * 1024 * 1024 * 1024); got != "3.0 GB" {
t.Fatalf("unexpected GB format: %q", got)
}
}
func TestCollectAttachments_More(t *testing.T) {
part := &gmail.MessagePart{
Parts: []*gmail.MessagePart{
{
Filename: "file.txt",
MimeType: "text/plain",
Body: &gmail.MessagePartBody{
AttachmentId: "a1",
Size: 12,
},
},
{
Parts: []*gmail.MessagePart{
{
MimeType: "image/png",
Body: &gmail.MessagePartBody{
AttachmentId: "a2",
Size: 34,
},
},
},
},
},
}
attachments := collectAttachments(part)
if len(attachments) != 2 {
t.Fatalf("expected 2 attachments, got %d", len(attachments))
}
if attachments[0].Filename != "file.txt" || attachments[1].AttachmentID != "a2" {
t.Fatalf("unexpected attachments: %#v", attachments)
}
}
func TestAttachmentLine(t *testing.T) {
att := attachmentOutput{
Filename: "file.txt",
Size: 12,
SizeHuman: formatBytes(12),
MimeType: "text/plain",
AttachmentID: "a1",
}
if got := attachmentLine(att); got != "attachment\tfile.txt\t12 B\ttext/plain\ta1" {
t.Fatalf("unexpected attachment line: %q", got)
}
}
func TestBestBodySelection(t *testing.T) {
plain := base64.RawURLEncoding.EncodeToString([]byte("plain"))
html := base64.RawURLEncoding.EncodeToString([]byte("<b>html</b>"))
part := &gmail.MessagePart{
Parts: []*gmail.MessagePart{
{
MimeType: "text/plain",
Body: &gmail.MessagePartBody{Data: plain},
},
{
MimeType: "text/html",
Body: &gmail.MessagePartBody{Data: html},
},
},
}
if got := bestBodyText(part); got != "plain" {
t.Fatalf("unexpected best body text: %q", got)
}
body, isHTML := bestBodyForDisplay(part)
if body != "plain" || isHTML {
t.Fatalf("unexpected body display: %q html=%v", body, isHTML)
}
}
func TestFindPartBodyHTML(t *testing.T) {
html := base64.RawURLEncoding.EncodeToString([]byte("<p>hi</p>"))
part := &gmail.MessagePart{
MimeType: "multipart/alternative",
Parts: []*gmail.MessagePart{
{
MimeType: "text/html; charset=UTF-8",
Body: &gmail.MessagePartBody{Data: html},
},
},
}
got := findPartBody(part, "text/html")
if got != "<p>hi</p>" {
t.Fatalf("unexpected html body: %q", got)
}
}
func TestBestBodyForDisplay_DetectsHTMLInPlainPart(t *testing.T) {
html := base64.RawURLEncoding.EncodeToString([]byte("<html><body>hi</body></html>"))
part := &gmail.MessagePart{
Parts: []*gmail.MessagePart{
{
MimeType: "text/plain",
Body: &gmail.MessagePartBody{Data: html},
},
},
}
body, isHTML := bestBodyForDisplay(part)
if body == "" || !isHTML {
t.Fatalf("expected HTML detection, got body=%q html=%v", body, isHTML)
}
}
func TestFindPartBody_DecodesQuotedPrintable(t *testing.T) {
qp := "Precio =E2=82=AC99.99"
encoded := base64.RawURLEncoding.EncodeToString([]byte(qp))
part := &gmail.MessagePart{
MimeType: "text/plain",
Headers: []*gmail.MessagePartHeader{
{Name: "Content-Transfer-Encoding", Value: "quoted-printable"},
{Name: "Content-Type", Value: "text/plain; charset=utf-8"},
},
Body: &gmail.MessagePartBody{Data: encoded},
}
got := findPartBody(part, "text/plain")
if got != "Precio €99.99" {
t.Fatalf("unexpected decoded body: %q", got)
}
}
func TestFindPartBody_PreservesURLsWhenAlreadyDecoded(t *testing.T) {
// Gmail API sometimes returns already-decoded content even when
// Content-Transfer-Encoding header says quoted-printable.
// URLs with = should be preserved, not corrupted to U+FFFD.
// See: https://github.com/steipete/gogcli/issues/159
url := "https://example.com/auth?token_hash=ABCD12&type=magiclink"
encoded := base64.RawURLEncoding.EncodeToString([]byte(url))
part := &gmail.MessagePart{
MimeType: "text/plain",
Headers: []*gmail.MessagePartHeader{
{Name: "Content-Transfer-Encoding", Value: "quoted-printable"},
{Name: "Content-Type", Value: "text/plain; charset=utf-8"},
},
Body: &gmail.MessagePartBody{Data: encoded},
}
got := findPartBody(part, "text/plain")
if got != url {
t.Fatalf("URL corrupted: expected %q, got %q", url, got)
}
}
func TestLooksLikeQuotedPrintable(t *testing.T) {
tests := []struct {
name string
input string
want bool
}{
{"actual QP uppercase chain", "Price =E2=82=AC99", true},
{"actual QP lowercase chain", "Price =e2=82=ac99", true},
{"soft line break CRLF", "line=\r\ncontinued", true},
{"soft line break LF", "line=\ncontinued", true},
{"plain URL lowercase", "https://example.com?foo=bar", false},
{"URL with multiple params", "https://example.com?a=b1&c=d2", false},
{"URL with uppercase hex token", "https://example.com?token=ABCD12", false},
{"lowercase hex sequence", "test=ab", false},
{"uppercase hex sequence", "test=AB", false},
{"mixed case hex", "test=Ab", false},
{"plain text", "Hello World", false},
{"equals at end", "foo=", false},
{"short input", "=", false},
{"QP encoded equals uppercase", "foo=3Dbar", true},
{"QP encoded equals lowercase", "foo=3dbar", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := looksLikeQuotedPrintable([]byte(tt.input))
if got != tt.want {
t.Errorf("looksLikeQuotedPrintable(%q) = %v, want %v", tt.input, got, tt.want)
}
})
}
}
func TestFindPartBody_DecodesBase64Transfer(t *testing.T) {
inner := base64.StdEncoding.EncodeToString([]byte("plain body"))
encoded := base64.RawURLEncoding.EncodeToString([]byte(inner))
part := &gmail.MessagePart{
MimeType: "text/plain",
Headers: []*gmail.MessagePartHeader{
{Name: "Content-Transfer-Encoding", Value: "base64"},
{Name: "Content-Type", Value: "text/plain; charset=utf-8"},
},
Body: &gmail.MessagePartBody{Data: encoded},
}
got := findPartBody(part, "text/plain")
if got != "plain body" {
t.Fatalf("unexpected decoded body: %q", got)
}
}
func TestDecodeTransferEncoding_Base64Whitespace(t *testing.T) {
encoded := []byte("cGxhaW4gYm9keQ==\n")
got := decodeTransferEncoding(encoded, "base64")
if string(got) != "plain body" {
t.Fatalf("unexpected decoded body: %q", got)
}
}
func TestDecodeBodyCharset_ISO88591(t *testing.T) {
input := []byte{0x63, 0x61, 0x66, 0xe9} // "café" in ISO-8859-1
got := decodeBodyCharset(input, "text/plain; charset=iso-8859-1")
if string(got) != "café" {
t.Fatalf("unexpected decoded charset: %q", string(got))
}
}
func TestDecodeBodyCharset_ISO2022JP(t *testing.T) {
source := "\u65e5\u672c\u8a9e\u30c6\u30b9\u30c8"
encoded, err := japanese.ISO2022JP.NewEncoder().Bytes([]byte(source))
if err != nil {
t.Fatalf("encode iso-2022-jp: %v", err)
}
got := decodeBodyCharset(encoded, "text/plain; charset=iso-2022-jp")
if string(got) != source {
t.Fatalf("unexpected decoded charset: %q", string(got))
}
}
func TestDecodeBodyCharset_ISO2022JP_MixedASCIIAndJapanese(t *testing.T) {
// Test mixed ASCII and Japanese text (e.g., "Hello こんにちは World")
source := "Hello \u3053\u3093\u306b\u3061\u306f World"
encoded, err := japanese.ISO2022JP.NewEncoder().Bytes([]byte(source))
if err != nil {
t.Fatalf("encode iso-2022-jp: %v", err)
}
got := decodeBodyCharset(encoded, "text/plain; charset=iso-2022-jp")
if string(got) != source {
t.Fatalf("unexpected decoded charset: expected %q, got %q", source, string(got))
}
}
func TestDecodeBodyCharset_ISO2022JP_EmptyContent(t *testing.T) {
// Test empty content with ISO-2022-JP charset header
got := decodeBodyCharset([]byte{}, "text/plain; charset=iso-2022-jp")
if len(got) != 0 {
t.Fatalf("expected empty result for empty input, got %q", string(got))
}
}
func TestDecodeBodyCharset_ISO2022JP_MalformedSequence(t *testing.T) {
// Test malformed ISO-2022-JP sequences - should gracefully return original data
// ISO-2022-JP uses escape sequences like ESC $ B for switching to JIS X 0208
// This creates an invalid sequence: starts escape but doesn't complete properly
malformed := []byte{0x1b, 0x24, 0x42, 0xff, 0xfe, 0x1b, 0x28, 0x42} // ESC $ B + invalid bytes + ESC ( B
got := decodeBodyCharset(malformed, "text/plain; charset=iso-2022-jp")
// The decoder should either return the original malformed data or a decoded version
// (graceful degradation means it shouldn't panic or error)
if got == nil {
t.Fatalf("expected non-nil result for malformed input")
}
}
func TestDecodeBodyCharset_ISO2022JP_TruncatedEscapeSequence(t *testing.T) {
// Test truncated escape sequence - incomplete ISO-2022-JP escape
// ESC $ without the final byte is incomplete
truncated := []byte{0x1b, 0x24}
got := decodeBodyCharset(truncated, "text/plain; charset=iso-2022-jp")
// Should gracefully handle and return something (original or partial decode)
if got == nil {
t.Fatalf("expected non-nil result for truncated escape sequence")
}
}
func TestDecodeBodyCharset_GBK(t *testing.T) {
source := "您的阿里云账户已欠费即将停服提醒"
enc, err := ianaindex.MIME.Encoding("gbk")
if err != nil || enc == nil {
t.Fatalf("lookup gbk encoding: %v", err)
}
encoded, err := enc.NewEncoder().Bytes([]byte(source))
if err != nil {
t.Fatalf("encode gbk: %v", err)
}
got := decodeBodyCharset(encoded, "text/plain; charset=gbk")
if string(got) != source {
t.Fatalf("unexpected decoded charset: expected %q, got %q", source, string(got))
}
}
func TestFindPartBody_UsesMimeTypeCharsetWhenHeaderMissing(t *testing.T) {
source := "您的阿里云账户已欠费即将停服提醒"
encodedBody, err := simplifiedchinese.GBK.NewEncoder().Bytes([]byte(source))
if err != nil {
t.Fatalf("encode gb2312: %v", err)
}
part := &gmail.MessagePart{
MimeType: "text/plain; charset=gb2312",
Body: &gmail.MessagePartBody{
Data: base64.RawURLEncoding.EncodeToString(encodedBody),
},
}
got := findPartBody(part, "text/plain")
if got != source {
t.Fatalf("unexpected decoded body: expected %q, got %q", source, got)
}
}
func TestMimeTypeMatches(t *testing.T) {
if !mimeTypeMatches("Text/Plain; charset=UTF-8", "text/plain") {
t.Fatalf("expected mime match")
}
if mimeTypeMatches("application/json", "text/plain") {
t.Fatalf("unexpected mime match")
}
if normalizeMimeType("text/plain; charset=utf-8") != "text/plain" {
t.Fatalf("unexpected normalized mime type")
}
if normalizeMimeType("") != "" {
t.Fatalf("expected empty normalized mime type")
}
}
func TestDecodeBase64URL_Padded(t *testing.T) {
encoded := base64.URLEncoding.EncodeToString([]byte("hello"))
decoded, err := decodeBase64URL(encoded)
if err != nil {
t.Fatalf("decodeBase64URL: %v", err)
}
if decoded != "hello" {
t.Fatalf("unexpected decode: %q", decoded)
}
}
func TestDownloadAttachment_Cached(t *testing.T) {
dir := t.TempDir()
messageID := "msg1"
attachmentID := "att123456"
filename := "file.txt"
shortID := attachmentID[:8]
outPath := filepath.Join(dir, messageID+"_"+shortID+"_"+filename)
if err := os.WriteFile(outPath, []byte("abc"), 0o600); err != nil {
t.Fatalf("write file: %v", err)
}
info := attachmentInfo{
Filename: filename,
AttachmentID: attachmentID,
Size: 3,
}
gotPath, cached, err := downloadAttachment(context.Background(), nil, messageID, info, dir)
if err != nil {
t.Fatalf("downloadAttachment: %v", err)
}
if !cached || gotPath != outPath {
t.Fatalf("expected cached path %q, got %q cached=%v", outPath, gotPath, cached)
}
}