fix(tracking): expire old open records

This commit is contained in:
Peter Steinberger 2026-04-28 09:32:26 +01:00
parent dfc5b75f86
commit 9ce77eff13
No known key found for this signature in database
5 changed files with 111 additions and 2 deletions

View File

@ -26,6 +26,7 @@
- Backup: split Gmail checkpoint commits by row count and plaintext byte size so large messages stay below GitHub's blob limit.
- Auth: keep `gog auth list` and `gog auth tokens list` useful when one file-keyring token cannot be decrypted; unreadable entries are now reported instead of aborting the whole listing. (#377)
- Email tracking: deduplicate repeated pixel opens and cap recorded opens per IP per hour to reduce D1 abuse from replay or high-volume requests. (#294)
- Email tracking: add daily Worker retention cleanup for open rows older than 90 days and cap admin `/opens` responses at 500 rows. (#292)
- Auth: time out Linux D-Bus keyring write operations and report when OAuth completed but saving the refresh token failed, so manual auth no longer looks like a stuck paste when token persistence is blocked. (#130)
- Install docs: document Windows release ZIP/PATH setup and clarify that source builds require the Go version declared in `go.mod`, not Ubuntu 24.04's Go 1.22 package. (#157, #135)
- CI: pin GitHub Actions workflow dependencies to immutable commit SHAs. (#288)

View File

@ -20,7 +20,9 @@ Abuse controls:
Privacy note:
- Tracking is inherently sensitive. Treat this as *instrumentation you opt into per email*.
- The Worker stores IP + user-agent and can derive coarse geo (depending on CF headers/config).
- The Worker stores recipient email, subject hash, sent/open timestamps, IP, user-agent, bot classification, and coarse geo from Cloudflare request metadata when available.
- The deployed Worker includes a daily cron trigger that deletes open rows older than 90 days.
- Admin `/opens` queries default to 100 rows and are capped at 500 rows per request.
## Setup (local)
@ -76,6 +78,8 @@ Update `wrangler.toml` to reference the D1 `database_id`, then deploy:
pnpm exec wrangler deploy
```
`wrangler.toml` includes a daily cron trigger for retention cleanup. After deploy, Cloudflare calls the Worker once per day and the Worker deletes open rows older than 90 days.
## Send tracked mail
Tracked email constraints:

View File

@ -12,6 +12,12 @@ interface OpenRow {
opened_at: string;
ip: string;
user_agent: string;
is_bot?: number;
bot_type?: string | null;
city?: string | null;
region?: string | null;
country?: string | null;
timezone?: string | null;
}
class FakeD1 {
@ -53,6 +59,13 @@ class FakeStatement {
}
async run(): Promise<void> {
if (this.sql.includes('DELETE FROM opens')) {
const cutoff = new Date();
cutoff.setUTCDate(cutoff.getUTCDate() - 90);
this.db.rows = this.db.rows.filter(row => new Date(row.opened_at) >= cutoff);
return;
}
if (!this.sql.includes('INSERT INTO opens')) {
return;
}
@ -77,6 +90,17 @@ class FakeStatement {
user_agent: String(userAgent),
});
}
async all(): Promise<{ results: OpenRow[] }> {
if (this.sql.includes('SELECT * FROM opens')) {
const limit = Number(this.params[this.params.length - 1] || 100);
return {
results: this.db.rows.slice(0, limit),
};
}
return { results: [] };
}
}
async function pixelRequest(blob: string, ip = '203.0.113.10', userAgent = 'Mozilla/5.0'): Promise<Request> {
@ -127,3 +151,55 @@ describe('tracking worker pixel rate limiting', () => {
expect(db.rows).toHaveLength(100);
});
});
describe('tracking worker retention', () => {
it('purges opens older than 90 days from scheduled cron', async () => {
const db = new FakeD1();
const env = { DB: db as unknown as D1Database, TRACKING_KEY: testKey, ADMIN_KEY: 'admin' };
db.rows.push({
tracking_id: 'old',
recipient: 'old@example.com',
subject_hash: 'old',
sent_at: new Date().toISOString(),
opened_at: '2020-01-01T00:00:00.000Z',
ip: '203.0.113.10',
user_agent: 'old-ua',
});
db.rows.push({
tracking_id: 'fresh',
recipient: 'fresh@example.com',
subject_hash: 'fresh',
sent_at: new Date().toISOString(),
opened_at: new Date().toISOString(),
ip: '203.0.113.10',
user_agent: 'fresh-ua',
});
await worker.scheduled({} as ScheduledEvent, env);
expect(db.rows.map(row => row.tracking_id)).toEqual(['fresh']);
});
it('clamps admin opens limit', async () => {
const db = new FakeD1();
const env = { DB: db as unknown as D1Database, TRACKING_KEY: testKey, ADMIN_KEY: 'admin' };
for (let i = 0; i < 600; i++) {
db.rows.push({
tracking_id: `open-${i}`,
recipient: 'recipient@example.com',
subject_hash: 'hash',
sent_at: new Date().toISOString(),
opened_at: new Date().toISOString(),
ip: '203.0.113.10',
user_agent: `ua-${i}`,
});
}
const response = await worker.fetch(new Request('https://tracker.example.com/opens?limit=999999', {
headers: { Authorization: 'Bearer admin' },
}), env);
const body = await response.json() as { opens: unknown[] };
expect(body.opens).toHaveLength(500);
});
});

View File

@ -6,6 +6,9 @@ import { pixelResponse } from './pixel';
const OPEN_DEDUP_WINDOW = '-1 hour';
const IP_RATE_WINDOW = '-1 hour';
const MAX_OPENS_PER_IP_PER_HOUR = 100;
const OPEN_RETENTION_WINDOW = '-90 days';
const DEFAULT_ADMIN_LIMIT = 100;
const MAX_ADMIN_LIMIT = 500;
export default {
async fetch(request: Request, env: Env): Promise<Response> {
@ -39,6 +42,10 @@ export default {
return new Response('Internal Error', { status: 500 });
}
},
async scheduled(_event: ScheduledEvent, env: Env): Promise<void> {
await purgeExpiredOpens(env);
},
};
async function handlePixel(request: Request, env: Env, path: string): Promise<Response> {
@ -141,6 +148,15 @@ async function shouldSkipOpen(env: Env, trackingId: string, ip: string, userAgen
}
}
async function purgeExpiredOpens(env: Env): Promise<void> {
await env.DB.prepare(`
DELETE FROM opens
WHERE opened_at < datetime('now', ?)
`).bind(
OPEN_RETENTION_WINDOW
).run();
}
async function handleQuery(request: Request, env: Env, path: string): Promise<Response> {
const blob = path.slice(3); // Remove '/q/'
@ -197,7 +213,7 @@ async function handleAdminOpens(request: Request, env: Env, url: URL): Promise<R
const recipient = url.searchParams.get('recipient');
const since = url.searchParams.get('since');
const limit = parseInt(url.searchParams.get('limit') || '100', 10);
const limit = parseAdminLimit(url.searchParams.get('limit'));
let query = 'SELECT * FROM opens WHERE 1=1';
const params: any[] = [];
@ -234,3 +250,12 @@ async function handleAdminOpens(request: Request, env: Env, url: URL): Promise<R
})),
});
}
function parseAdminLimit(raw: string | null): number {
const parsed = Number.parseInt(raw || '', 10);
if (!Number.isFinite(parsed) || parsed <= 0) {
return DEFAULT_ADMIN_LIMIT;
}
return Math.min(parsed, MAX_ADMIN_LIMIT);
}

View File

@ -2,6 +2,9 @@ name = "gog-email-tracker"
main = "src/index.ts"
compatibility_date = "2024-12-01"
[triggers]
crons = ["0 2 * * *"]
[[d1_databases]]
binding = "DB"
database_name = "gog-email-tracker"