diff --git a/CHANGELOG.md b/CHANGELOG.md index 24f98b1..f127c53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ - Backup: split Gmail checkpoint commits by row count and plaintext byte size so large messages stay below GitHub's blob limit. - Auth: keep `gog auth list` and `gog auth tokens list` useful when one file-keyring token cannot be decrypted; unreadable entries are now reported instead of aborting the whole listing. (#377) - Email tracking: deduplicate repeated pixel opens and cap recorded opens per IP per hour to reduce D1 abuse from replay or high-volume requests. (#294) +- Email tracking: add daily Worker retention cleanup for open rows older than 90 days and cap admin `/opens` responses at 500 rows. (#292) - Auth: time out Linux D-Bus keyring write operations and report when OAuth completed but saving the refresh token failed, so manual auth no longer looks like a stuck paste when token persistence is blocked. (#130) - Install docs: document Windows release ZIP/PATH setup and clarify that source builds require the Go version declared in `go.mod`, not Ubuntu 24.04's Go 1.22 package. (#157, #135) - CI: pin GitHub Actions workflow dependencies to immutable commit SHAs. (#288) diff --git a/docs/email-tracking.md b/docs/email-tracking.md index 234d1a5..5a79d83 100644 --- a/docs/email-tracking.md +++ b/docs/email-tracking.md @@ -20,7 +20,9 @@ Abuse controls: Privacy note: - Tracking is inherently sensitive. Treat this as *instrumentation you opt into per email*. -- The Worker stores IP + user-agent and can derive coarse geo (depending on CF headers/config). +- The Worker stores recipient email, subject hash, sent/open timestamps, IP, user-agent, bot classification, and coarse geo from Cloudflare request metadata when available. +- The deployed Worker includes a daily cron trigger that deletes open rows older than 90 days. +- Admin `/opens` queries default to 100 rows and are capped at 500 rows per request. ## Setup (local) @@ -76,6 +78,8 @@ Update `wrangler.toml` to reference the D1 `database_id`, then deploy: pnpm exec wrangler deploy ``` +`wrangler.toml` includes a daily cron trigger for retention cleanup. After deploy, Cloudflare calls the Worker once per day and the Worker deletes open rows older than 90 days. + ## Send tracked mail Tracked email constraints: diff --git a/internal/tracking/worker/src/index.test.ts b/internal/tracking/worker/src/index.test.ts index a70c1ee..4be84e3 100644 --- a/internal/tracking/worker/src/index.test.ts +++ b/internal/tracking/worker/src/index.test.ts @@ -12,6 +12,12 @@ interface OpenRow { opened_at: string; ip: string; user_agent: string; + is_bot?: number; + bot_type?: string | null; + city?: string | null; + region?: string | null; + country?: string | null; + timezone?: string | null; } class FakeD1 { @@ -53,6 +59,13 @@ class FakeStatement { } async run(): Promise { + if (this.sql.includes('DELETE FROM opens')) { + const cutoff = new Date(); + cutoff.setUTCDate(cutoff.getUTCDate() - 90); + this.db.rows = this.db.rows.filter(row => new Date(row.opened_at) >= cutoff); + return; + } + if (!this.sql.includes('INSERT INTO opens')) { return; } @@ -77,6 +90,17 @@ class FakeStatement { user_agent: String(userAgent), }); } + + async all(): Promise<{ results: OpenRow[] }> { + if (this.sql.includes('SELECT * FROM opens')) { + const limit = Number(this.params[this.params.length - 1] || 100); + return { + results: this.db.rows.slice(0, limit), + }; + } + + return { results: [] }; + } } async function pixelRequest(blob: string, ip = '203.0.113.10', userAgent = 'Mozilla/5.0'): Promise { @@ -127,3 +151,55 @@ describe('tracking worker pixel rate limiting', () => { expect(db.rows).toHaveLength(100); }); }); + +describe('tracking worker retention', () => { + it('purges opens older than 90 days from scheduled cron', async () => { + const db = new FakeD1(); + const env = { DB: db as unknown as D1Database, TRACKING_KEY: testKey, ADMIN_KEY: 'admin' }; + db.rows.push({ + tracking_id: 'old', + recipient: 'old@example.com', + subject_hash: 'old', + sent_at: new Date().toISOString(), + opened_at: '2020-01-01T00:00:00.000Z', + ip: '203.0.113.10', + user_agent: 'old-ua', + }); + db.rows.push({ + tracking_id: 'fresh', + recipient: 'fresh@example.com', + subject_hash: 'fresh', + sent_at: new Date().toISOString(), + opened_at: new Date().toISOString(), + ip: '203.0.113.10', + user_agent: 'fresh-ua', + }); + + await worker.scheduled({} as ScheduledEvent, env); + + expect(db.rows.map(row => row.tracking_id)).toEqual(['fresh']); + }); + + it('clamps admin opens limit', async () => { + const db = new FakeD1(); + const env = { DB: db as unknown as D1Database, TRACKING_KEY: testKey, ADMIN_KEY: 'admin' }; + for (let i = 0; i < 600; i++) { + db.rows.push({ + tracking_id: `open-${i}`, + recipient: 'recipient@example.com', + subject_hash: 'hash', + sent_at: new Date().toISOString(), + opened_at: new Date().toISOString(), + ip: '203.0.113.10', + user_agent: `ua-${i}`, + }); + } + + const response = await worker.fetch(new Request('https://tracker.example.com/opens?limit=999999', { + headers: { Authorization: 'Bearer admin' }, + }), env); + const body = await response.json() as { opens: unknown[] }; + + expect(body.opens).toHaveLength(500); + }); +}); diff --git a/internal/tracking/worker/src/index.ts b/internal/tracking/worker/src/index.ts index 9ae887f..2f553bb 100644 --- a/internal/tracking/worker/src/index.ts +++ b/internal/tracking/worker/src/index.ts @@ -6,6 +6,9 @@ import { pixelResponse } from './pixel'; const OPEN_DEDUP_WINDOW = '-1 hour'; const IP_RATE_WINDOW = '-1 hour'; const MAX_OPENS_PER_IP_PER_HOUR = 100; +const OPEN_RETENTION_WINDOW = '-90 days'; +const DEFAULT_ADMIN_LIMIT = 100; +const MAX_ADMIN_LIMIT = 500; export default { async fetch(request: Request, env: Env): Promise { @@ -39,6 +42,10 @@ export default { return new Response('Internal Error', { status: 500 }); } }, + + async scheduled(_event: ScheduledEvent, env: Env): Promise { + await purgeExpiredOpens(env); + }, }; async function handlePixel(request: Request, env: Env, path: string): Promise { @@ -141,6 +148,15 @@ async function shouldSkipOpen(env: Env, trackingId: string, ip: string, userAgen } } +async function purgeExpiredOpens(env: Env): Promise { + await env.DB.prepare(` + DELETE FROM opens + WHERE opened_at < datetime('now', ?) + `).bind( + OPEN_RETENTION_WINDOW + ).run(); +} + async function handleQuery(request: Request, env: Env, path: string): Promise { const blob = path.slice(3); // Remove '/q/' @@ -197,7 +213,7 @@ async function handleAdminOpens(request: Request, env: Env, url: URL): Promise