feat(gmail): add sanitized content reads

Co-authored-by: urasmutlu <urasmutlu@gmail.com>
This commit is contained in:
Peter Steinberger 2026-05-04 07:04:25 +01:00
parent d37caabdd7
commit 33284f03bb
No known key found for this signature in database
6 changed files with 442 additions and 19 deletions

View File

@ -4,6 +4,7 @@
### Added
- Install: publish a GHCR Docker image for release tags, with a non-root runtime image and file-keyring docs for container automation. (#539, #444) — thanks @HuckOps and @rdehuyss.
- Gmail: add `--sanitize-content` (`--safe`) to `gmail get` and `gmail thread get` for agent-oriented sanitized content output without raw Gmail payloads in JSON. (#238, #220) — thanks @urasmutlu.
- Agent safety: add baked safety-profile builds for fail-closed agent binaries, with `agent-safe`, `readonly`, and `full` profiles, filtered help/schema output, docs, and build tooling. (#366, #239) — thanks @drewburchfield.
- Calendar: add `--with-meet` to `calendar update` for adding Google Meet conferencing to existing events. (#538) — thanks @alexisperumal.
- Calendar: add `calendar move` / `calendar transfer` to move an event to another calendar and change its organizer. (#448) — thanks @markusbkoch.

View File

@ -742,8 +742,10 @@ gog gmail search 'newer_than:7d' --max 10
gog gmail thread get <threadId>
gog gmail thread get <threadId> --download # Download attachments to current dir
gog gmail thread get <threadId> --download --out-dir ./attachments
gog gmail thread get <threadId> --sanitize-content # Agent-oriented sanitized content output
gog gmail get <messageId>
gog gmail get <messageId> --format metadata
gog gmail get <messageId> --sanitize-content # Agent-oriented sanitized content output
gog gmail attachment <messageId> <attachmentId>
gog gmail attachment <messageId> <attachmentId> --out ./attachment.bin
gog gmail url <threadId> # Print Gmail web URL
@ -830,6 +832,16 @@ Gmail watch (Pub/Sub push):
- `watch serve --fetch-delay` defaults to `3s` and helps avoid Gmail History indexing races after push delivery.
- `watch serve --exclude-labels` defaults to `SPAM,TRASH`; IDs are case-sensitive.
Sanitized Gmail content (`--sanitize-content`, alias `--safe`):
- Converts HTML bodies to text with an HTML parser and removes script/style content.
- Replaces HTTP(S) URLs with `[url removed]` after decoding HTML entities.
- Omits raw Gmail `payload`/RFC822 data and unsubscribe links from sanitized JSON envelopes.
- Rejects `gmail get --format raw`, because raw output cannot be sanitized.
This reduces prompt-injection, phishing-link, and tracking-link exposure for agents.
It is not a sandbox; use command guards or baked safety profiles for command
boundaries.
### Encrypted Backup
```bash

View File

@ -12,9 +12,10 @@ import (
)
type GmailGetCmd struct {
MessageID string `arg:"" name:"messageId" help:"Message ID"`
Format string `name:"format" help:"Message format: full|metadata|raw" default:"full"`
Headers string `name:"headers" help:"Metadata headers (comma-separated; only for --format=metadata)"`
MessageID string `arg:"" name:"messageId" help:"Message ID"`
Format string `name:"format" help:"Message format: full|metadata|raw" default:"full"`
Headers string `name:"headers" help:"Metadata headers (comma-separated; only for --format=metadata)"`
SanitizeContent bool `name:"sanitize-content" aliases:"sanitize,safe" help:"Emit agent-oriented sanitized content: strip HTML, remove HTTP(S) URLs, and omit raw Gmail payloads from JSON"`
}
const (
@ -44,6 +45,9 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error {
default:
return fmt.Errorf("invalid --format: %q (expected full|metadata|raw)", format)
}
if c.SanitizeContent && format == gmailFormatRaw {
return usage("--sanitize-content cannot be used with --format raw")
}
svc, err := newGmailService(ctx, account)
if err != nil {
@ -68,6 +72,17 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error {
unsubscribe := bestUnsubscribeLink(msg.Payload)
if outfmt.IsJSON(ctx) {
if c.SanitizeContent {
output := sanitizedGmailMessage(msg, format == gmailFormatFull)
payload := map[string]any{
"message": output,
"headers": output.Headers,
}
if format == gmailFormatFull && output.Body != "" {
payload["body"] = output.Body
}
return outfmt.WriteJSON(ctx, os.Stdout, payload)
}
// Include a flattened headers map for easier querying
// (e.g., jq '.headers.to' instead of complex nested queries)
headers := map[string]string{
@ -120,13 +135,20 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error {
u.Out().Println(string(decoded))
return nil
case gmailFormatMetadata, gmailFormatFull:
u.Out().Printf("from\t%s", headerValue(msg.Payload, "From"))
u.Out().Printf("to\t%s", headerValue(msg.Payload, "To"))
u.Out().Printf("cc\t%s", headerValue(msg.Payload, "Cc"))
u.Out().Printf("bcc\t%s", headerValue(msg.Payload, "Bcc"))
u.Out().Printf("subject\t%s", headerValue(msg.Payload, "Subject"))
u.Out().Printf("date\t%s", headerValue(msg.Payload, "Date"))
if unsubscribe != "" {
header := func(name string) string {
value := headerValue(msg.Payload, name)
if c.SanitizeContent {
return sanitizeGmailText(value)
}
return value
}
u.Out().Printf("from\t%s", header("From"))
u.Out().Printf("to\t%s", header("To"))
u.Out().Printf("cc\t%s", header("Cc"))
u.Out().Printf("bcc\t%s", header("Bcc"))
u.Out().Printf("subject\t%s", header("Subject"))
u.Out().Printf("date\t%s", header("Date"))
if unsubscribe != "" && !c.SanitizeContent {
u.Out().Printf("unsubscribe\t%s", unsubscribe)
}
attachments := attachmentOutputs(collectAttachments(msg.Payload))
@ -137,6 +159,10 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error {
if format == gmailFormatFull {
body := bestBodyText(msg.Payload)
if body != "" {
if c.SanitizeContent {
displayBody, isHTML := bestBodyForDisplay(msg.Payload)
body = sanitizeGmailBody(displayBody, isHTML)
}
u.Out().Println("")
u.Out().Println(body)
}

View File

@ -0,0 +1,149 @@
package cmd
import (
htmlpkg "html"
"regexp"
"strings"
"golang.org/x/net/html"
"google.golang.org/api/gmail/v1"
)
var (
sanitizeURLPattern = regexp.MustCompile(`https?://[^\s<>"'` + "`" + `\]\)]+`)
sanitizeBlockTags = map[string]bool{
"article": true, "blockquote": true, "br": true, "dd": true, "div": true,
"dl": true, "dt": true, "footer": true, "h1": true, "h2": true,
"h3": true, "h4": true, "h5": true, "h6": true, "header": true,
"hr": true, "li": true, "ol": true, "p": true, "pre": true,
"section": true, "table": true, "tr": true, "ul": true,
}
)
type gmailSanitizedThreadOutput struct {
ID string `json:"id,omitempty"`
Messages []gmailSanitizedMessageOutput `json:"messages"`
}
type gmailSanitizedMessageOutput struct {
ID string `json:"id,omitempty"`
ThreadID string `json:"threadId,omitempty"`
LabelIDs []string `json:"labelIds,omitempty"`
Snippet string `json:"snippet,omitempty"`
InternalDate int64 `json:"internalDate,omitempty"`
SizeEstimate int64 `json:"sizeEstimate,omitempty"`
Headers map[string]string `json:"headers"`
Body string `json:"body,omitempty"`
Attachments []attachmentOutput `json:"attachments,omitempty"`
}
func sanitizeGmailText(value string) string {
value = htmlpkg.UnescapeString(value)
return sanitizeURLPattern.ReplaceAllString(value, "[url removed]")
}
func sanitizeGmailBody(body string, isHTML bool) string {
if body == "" {
return ""
}
text := body
if isHTML {
text = extractSanitizedHTMLText(text)
}
text = sanitizeGmailText(text)
text = whitespacePattern.ReplaceAllString(text, " ")
return strings.TrimSpace(text)
}
func extractSanitizedHTMLText(value string) string {
tokenizer := html.NewTokenizer(strings.NewReader(value))
var out strings.Builder
skipDepth := 0
for {
switch tokenizer.Next() {
case html.ErrorToken:
text := whitespacePattern.ReplaceAllString(out.String(), " ")
return strings.TrimSpace(text)
case html.StartTagToken, html.SelfClosingTagToken:
name, _ := tokenizer.TagName()
tag := strings.ToLower(string(name))
if tag == "script" || tag == "style" {
skipDepth++
}
if sanitizeBlockTags[tag] {
out.WriteByte(' ')
}
case html.EndTagToken:
name, _ := tokenizer.TagName()
tag := strings.ToLower(string(name))
if (tag == "script" || tag == "style") && skipDepth > 0 {
skipDepth--
}
if sanitizeBlockTags[tag] {
out.WriteByte(' ')
}
case html.TextToken:
if skipDepth == 0 {
out.Write(tokenizer.Text())
}
}
}
}
func sanitizedGmailHeaders(p *gmail.MessagePart) map[string]string {
headers := map[string]string{
"from": sanitizeGmailText(headerValue(p, "From")),
"to": sanitizeGmailText(headerValue(p, "To")),
"cc": sanitizeGmailText(headerValue(p, "Cc")),
"bcc": sanitizeGmailText(headerValue(p, "Bcc")),
"subject": sanitizeGmailText(headerValue(p, "Subject")),
"date": sanitizeGmailText(headerValue(p, "Date")),
"message_id": sanitizeGmailText(headerValue(p, "Message-ID")),
"in_reply_to": sanitizeGmailText(headerValue(p, "In-Reply-To")),
"references": sanitizeGmailText(headerValue(p, "References")),
}
for key, value := range headers {
if value == "" {
delete(headers, key)
}
}
return headers
}
func sanitizedGmailMessage(msg *gmail.Message, includeBody bool) gmailSanitizedMessageOutput {
if msg == nil {
return gmailSanitizedMessageOutput{Headers: map[string]string{}}
}
out := gmailSanitizedMessageOutput{
ID: msg.Id,
ThreadID: msg.ThreadId,
LabelIDs: msg.LabelIds,
Snippet: sanitizeGmailText(msg.Snippet),
InternalDate: msg.InternalDate,
SizeEstimate: msg.SizeEstimate,
Headers: sanitizedGmailHeaders(msg.Payload),
Attachments: attachmentOutputs(collectAttachments(msg.Payload)),
}
if includeBody {
body, isHTML := bestBodyForDisplay(msg.Payload)
out.Body = sanitizeGmailBody(body, isHTML)
}
return out
}
func sanitizedGmailThread(thread *gmail.Thread, includeBody bool) gmailSanitizedThreadOutput {
if thread == nil {
return gmailSanitizedThreadOutput{Messages: []gmailSanitizedMessageOutput{}}
}
out := gmailSanitizedThreadOutput{
ID: thread.Id,
Messages: make([]gmailSanitizedMessageOutput, 0, len(thread.Messages)),
}
for _, msg := range thread.Messages {
if msg == nil {
continue
}
out.Messages = append(out.Messages, sanitizedGmailMessage(msg, includeBody))
}
return out
}

View File

@ -0,0 +1,219 @@
package cmd
import (
"context"
"encoding/base64"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"google.golang.org/api/gmail/v1"
"google.golang.org/api/option"
)
func TestSanitizeGmailBody(t *testing.T) {
tests := []struct {
name string
body string
isHTML bool
want string
}{
{
name: "html strips scripts and visible urls",
body: `<script>fetch("https://tracker.example/open")</script><p>Hello https://phish.example/login</p>`,
isHTML: true,
want: "Hello [url removed]",
},
{
name: "plain decodes entity-obfuscated url",
body: `open &#104;ttps://evil.example/path now`,
isHTML: false,
want: "open [url removed] now",
},
{
name: "html keeps link text but drops href target",
body: `<p>Click <a href="https://evil.example">here</a></p>`,
isHTML: true,
want: "Click here",
},
{
name: "style block removed",
body: `<style>body{background:url(https://tracker.example)}</style><p>Visible</p>`,
isHTML: true,
want: "Visible",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := sanitizeGmailBody(tt.body, tt.isHTML); got != tt.want {
t.Fatalf("sanitizeGmailBody() = %q, want %q", got, tt.want)
}
})
}
}
func TestGmailGetCmd_SanitizeContent_JSONUsesSafeEnvelope(t *testing.T) {
origNew := newGmailService
t.Cleanup(func() { newGmailService = origNew })
htmlBody := base64.RawURLEncoding.EncodeToString([]byte(
`<html><body><script>fetch("https://tracker.example/open")</script><p>Hello https://phish.example/login</p></body></html>`,
))
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if !strings.Contains(r.URL.Path, "/gmail/v1/users/me/messages/") {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{
"id": "m1",
"threadId": "t1",
"labelIds": []string{"INBOX"},
"snippet": "snippet https://snippet.example",
"internalDate": "1766743200000",
"payload": map[string]any{
"mimeType": "text/html",
"body": map[string]any{"data": htmlBody},
"headers": []map[string]any{
{"name": "From", "value": "a@example.com"},
{"name": "To", "value": "b@example.com"},
{"name": "Subject", "value": "Visit https://evil.example now"},
{"name": "Date", "value": "Fri, 26 Dec 2025 10:00:00 +0000"},
{"name": "List-Unsubscribe", "value": "<https://unsub.example.com>"},
},
},
})
}))
defer srv.Close()
svc, err := gmail.NewService(context.Background(),
option.WithoutAuthentication(),
option.WithHTTPClient(srv.Client()),
option.WithEndpoint(srv.URL+"/"),
)
if err != nil {
t.Fatalf("NewService: %v", err)
}
newGmailService = func(context.Context, string) (*gmail.Service, error) { return svc, nil }
out := captureStdout(t, func() {
_ = captureStderr(t, func() {
err := Execute([]string{"--json", "--account", "a@b.com", "gmail", "get", "m1", "--sanitize-content"})
if err != nil {
t.Fatalf("Execute: %v", err)
}
})
})
if strings.Contains(out, "https://") || strings.Contains(out, "tracker.example") || strings.Contains(out, htmlBody) {
t.Fatalf("sanitized JSON leaked unsafe content: %s", out)
}
if strings.Contains(out, "payload") || strings.Contains(out, "unsubscribe") {
t.Fatalf("sanitized JSON should not expose raw Gmail payload/unsubscribe: %s", out)
}
var parsed struct {
Body string `json:"body"`
Message struct {
ID string `json:"id"`
Headers map[string]string `json:"headers"`
} `json:"message"`
}
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
t.Fatalf("decode JSON: %v", err)
}
if parsed.Body != "Hello [url removed]" {
t.Fatalf("unexpected body: %q", parsed.Body)
}
if parsed.Message.Headers["subject"] != "Visit [url removed] now" {
t.Fatalf("unexpected sanitized subject: %#v", parsed.Message.Headers)
}
}
func TestGmailGetCmd_SanitizeContentRejectsRaw(t *testing.T) {
err := Execute([]string{"--account", "a@b.com", "gmail", "get", "m1", "--format", "raw", "--sanitize-content"})
if err == nil || !strings.Contains(err.Error(), "--sanitize-content cannot be used with --format raw") {
t.Fatalf("expected raw/sanitize usage error, got: %v", err)
}
}
func TestGmailThreadGet_SanitizeContent_JSONUsesSafeEnvelope(t *testing.T) {
origNew := newGmailService
t.Cleanup(func() { newGmailService = origNew })
htmlBody := base64.RawURLEncoding.EncodeToString([]byte(
`<style>.x{background:url(https://tracker.example)}</style><p>Hello https://phish.example/login</p>`,
))
threadResp := map[string]any{
"id": "t1",
"messages": []map[string]any{
{
"id": "m1",
"threadId": "t1",
"payload": map[string]any{
"headers": []map[string]any{
{"name": "From", "value": "a@example.com"},
{"name": "To", "value": "b@example.com"},
{"name": "Subject", "value": "Check https://evil.example now"},
{"name": "Date", "value": "Mon, 1 Jan 2025 00:00:00 +0000"},
{"name": "List-Unsubscribe", "value": "<https://unsub.example.com>"},
},
"mimeType": "text/html",
"body": map[string]any{"data": htmlBody},
},
},
},
}
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
path := strings.TrimPrefix(r.URL.Path, "/gmail/v1")
if r.Method == http.MethodGet && path == "/users/me/threads/t1" {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(threadResp)
return
}
http.NotFound(w, r)
}))
defer srv.Close()
svc, err := gmail.NewService(context.Background(),
option.WithoutAuthentication(),
option.WithHTTPClient(srv.Client()),
option.WithEndpoint(srv.URL+"/"),
)
if err != nil {
t.Fatalf("NewService: %v", err)
}
newGmailService = func(context.Context, string) (*gmail.Service, error) { return svc, nil }
out := captureStdout(t, func() {
_ = captureStderr(t, func() {
err := Execute([]string{"--json", "--account", "a@b.com", "gmail", "thread", "get", "t1", "--sanitize-content"})
if err != nil {
t.Fatalf("Execute: %v", err)
}
})
})
if strings.Contains(out, "https://") || strings.Contains(out, "tracker.example") || strings.Contains(out, htmlBody) {
t.Fatalf("sanitized thread JSON leaked unsafe content: %s", out)
}
if strings.Contains(out, "payload") || strings.Contains(out, "unsubscribe") {
t.Fatalf("sanitized thread JSON should not expose raw Gmail payload/unsubscribe: %s", out)
}
var parsed struct {
Thread struct {
Messages []gmailSanitizedMessageOutput `json:"messages"`
} `json:"thread"`
}
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
t.Fatalf("decode JSON: %v", err)
}
if len(parsed.Thread.Messages) != 1 {
t.Fatalf("unexpected messages: %#v", parsed.Thread.Messages)
}
if got := parsed.Thread.Messages[0].Body; got != "Hello [url removed]" {
t.Fatalf("unexpected body: %q", got)
}
}

View File

@ -55,10 +55,11 @@ type GmailThreadCmd struct {
}
type GmailThreadGetCmd struct {
ThreadID string `arg:"" name:"threadId" help:"Thread ID"`
Download bool `name:"download" help:"Download attachments"`
Full bool `name:"full" help:"Show full message bodies"`
OutputDir OutputDirFlag `embed:""`
ThreadID string `arg:"" name:"threadId" help:"Thread ID"`
Download bool `name:"download" help:"Download attachments"`
Full bool `name:"full" help:"Show full message bodies"`
SanitizeContent bool `name:"sanitize-content" aliases:"sanitize,safe" help:"Emit agent-oriented sanitized content: strip HTML, remove HTTP(S) URLs, and omit raw Gmail payloads from JSON"`
OutputDir OutputDirFlag `embed:""`
}
func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
@ -111,6 +112,12 @@ func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
downloadedFiles = append(downloadedFiles, attachmentDownloadSummaries(downloads)...)
}
}
if c.SanitizeContent {
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
"thread": sanitizedGmailThread(thread, true),
"downloaded": downloadedFiles,
})
}
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
"thread": thread,
"downloaded": downloadedFiles,
@ -130,16 +137,25 @@ func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
continue
}
u.Out().Printf("=== Message %d/%d: %s ===", i+1, len(thread.Messages), msg.Id)
u.Out().Printf("From: %s", headerValue(msg.Payload, "From"))
u.Out().Printf("To: %s", headerValue(msg.Payload, "To"))
u.Out().Printf("Subject: %s", headerValue(msg.Payload, "Subject"))
u.Out().Printf("Date: %s", headerValue(msg.Payload, "Date"))
header := func(name string) string {
value := headerValue(msg.Payload, name)
if c.SanitizeContent {
return sanitizeGmailText(value)
}
return value
}
u.Out().Printf("From: %s", header("From"))
u.Out().Printf("To: %s", header("To"))
u.Out().Printf("Subject: %s", header("Subject"))
u.Out().Printf("Date: %s", header("Date"))
u.Out().Println("")
body, isHTML := bestBodyForDisplay(msg.Payload)
if body != "" {
cleanBody := body
if isHTML {
if c.SanitizeContent {
cleanBody = sanitizeGmailBody(body, isHTML)
} else if isHTML {
// Strip HTML tags for cleaner text output
cleanBody = stripHTMLTags(body)
}