fix(tracking): expire old open records
This commit is contained in:
parent
dfc5b75f86
commit
9ce77eff13
@ -26,6 +26,7 @@
|
||||
- Backup: split Gmail checkpoint commits by row count and plaintext byte size so large messages stay below GitHub's blob limit.
|
||||
- Auth: keep `gog auth list` and `gog auth tokens list` useful when one file-keyring token cannot be decrypted; unreadable entries are now reported instead of aborting the whole listing. (#377)
|
||||
- Email tracking: deduplicate repeated pixel opens and cap recorded opens per IP per hour to reduce D1 abuse from replay or high-volume requests. (#294)
|
||||
- Email tracking: add daily Worker retention cleanup for open rows older than 90 days and cap admin `/opens` responses at 500 rows. (#292)
|
||||
- Auth: time out Linux D-Bus keyring write operations and report when OAuth completed but saving the refresh token failed, so manual auth no longer looks like a stuck paste when token persistence is blocked. (#130)
|
||||
- Install docs: document Windows release ZIP/PATH setup and clarify that source builds require the Go version declared in `go.mod`, not Ubuntu 24.04's Go 1.22 package. (#157, #135)
|
||||
- CI: pin GitHub Actions workflow dependencies to immutable commit SHAs. (#288)
|
||||
|
||||
@ -20,7 +20,9 @@ Abuse controls:
|
||||
|
||||
Privacy note:
|
||||
- Tracking is inherently sensitive. Treat this as *instrumentation you opt into per email*.
|
||||
- The Worker stores IP + user-agent and can derive coarse geo (depending on CF headers/config).
|
||||
- The Worker stores recipient email, subject hash, sent/open timestamps, IP, user-agent, bot classification, and coarse geo from Cloudflare request metadata when available.
|
||||
- The deployed Worker includes a daily cron trigger that deletes open rows older than 90 days.
|
||||
- Admin `/opens` queries default to 100 rows and are capped at 500 rows per request.
|
||||
|
||||
## Setup (local)
|
||||
|
||||
@ -76,6 +78,8 @@ Update `wrangler.toml` to reference the D1 `database_id`, then deploy:
|
||||
pnpm exec wrangler deploy
|
||||
```
|
||||
|
||||
`wrangler.toml` includes a daily cron trigger for retention cleanup. After deploy, Cloudflare calls the Worker once per day and the Worker deletes open rows older than 90 days.
|
||||
|
||||
## Send tracked mail
|
||||
|
||||
Tracked email constraints:
|
||||
|
||||
@ -12,6 +12,12 @@ interface OpenRow {
|
||||
opened_at: string;
|
||||
ip: string;
|
||||
user_agent: string;
|
||||
is_bot?: number;
|
||||
bot_type?: string | null;
|
||||
city?: string | null;
|
||||
region?: string | null;
|
||||
country?: string | null;
|
||||
timezone?: string | null;
|
||||
}
|
||||
|
||||
class FakeD1 {
|
||||
@ -53,6 +59,13 @@ class FakeStatement {
|
||||
}
|
||||
|
||||
async run(): Promise<void> {
|
||||
if (this.sql.includes('DELETE FROM opens')) {
|
||||
const cutoff = new Date();
|
||||
cutoff.setUTCDate(cutoff.getUTCDate() - 90);
|
||||
this.db.rows = this.db.rows.filter(row => new Date(row.opened_at) >= cutoff);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.sql.includes('INSERT INTO opens')) {
|
||||
return;
|
||||
}
|
||||
@ -77,6 +90,17 @@ class FakeStatement {
|
||||
user_agent: String(userAgent),
|
||||
});
|
||||
}
|
||||
|
||||
async all(): Promise<{ results: OpenRow[] }> {
|
||||
if (this.sql.includes('SELECT * FROM opens')) {
|
||||
const limit = Number(this.params[this.params.length - 1] || 100);
|
||||
return {
|
||||
results: this.db.rows.slice(0, limit),
|
||||
};
|
||||
}
|
||||
|
||||
return { results: [] };
|
||||
}
|
||||
}
|
||||
|
||||
async function pixelRequest(blob: string, ip = '203.0.113.10', userAgent = 'Mozilla/5.0'): Promise<Request> {
|
||||
@ -127,3 +151,55 @@ describe('tracking worker pixel rate limiting', () => {
|
||||
expect(db.rows).toHaveLength(100);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tracking worker retention', () => {
|
||||
it('purges opens older than 90 days from scheduled cron', async () => {
|
||||
const db = new FakeD1();
|
||||
const env = { DB: db as unknown as D1Database, TRACKING_KEY: testKey, ADMIN_KEY: 'admin' };
|
||||
db.rows.push({
|
||||
tracking_id: 'old',
|
||||
recipient: 'old@example.com',
|
||||
subject_hash: 'old',
|
||||
sent_at: new Date().toISOString(),
|
||||
opened_at: '2020-01-01T00:00:00.000Z',
|
||||
ip: '203.0.113.10',
|
||||
user_agent: 'old-ua',
|
||||
});
|
||||
db.rows.push({
|
||||
tracking_id: 'fresh',
|
||||
recipient: 'fresh@example.com',
|
||||
subject_hash: 'fresh',
|
||||
sent_at: new Date().toISOString(),
|
||||
opened_at: new Date().toISOString(),
|
||||
ip: '203.0.113.10',
|
||||
user_agent: 'fresh-ua',
|
||||
});
|
||||
|
||||
await worker.scheduled({} as ScheduledEvent, env);
|
||||
|
||||
expect(db.rows.map(row => row.tracking_id)).toEqual(['fresh']);
|
||||
});
|
||||
|
||||
it('clamps admin opens limit', async () => {
|
||||
const db = new FakeD1();
|
||||
const env = { DB: db as unknown as D1Database, TRACKING_KEY: testKey, ADMIN_KEY: 'admin' };
|
||||
for (let i = 0; i < 600; i++) {
|
||||
db.rows.push({
|
||||
tracking_id: `open-${i}`,
|
||||
recipient: 'recipient@example.com',
|
||||
subject_hash: 'hash',
|
||||
sent_at: new Date().toISOString(),
|
||||
opened_at: new Date().toISOString(),
|
||||
ip: '203.0.113.10',
|
||||
user_agent: `ua-${i}`,
|
||||
});
|
||||
}
|
||||
|
||||
const response = await worker.fetch(new Request('https://tracker.example.com/opens?limit=999999', {
|
||||
headers: { Authorization: 'Bearer admin' },
|
||||
}), env);
|
||||
const body = await response.json() as { opens: unknown[] };
|
||||
|
||||
expect(body.opens).toHaveLength(500);
|
||||
});
|
||||
});
|
||||
|
||||
@ -6,6 +6,9 @@ import { pixelResponse } from './pixel';
|
||||
const OPEN_DEDUP_WINDOW = '-1 hour';
|
||||
const IP_RATE_WINDOW = '-1 hour';
|
||||
const MAX_OPENS_PER_IP_PER_HOUR = 100;
|
||||
const OPEN_RETENTION_WINDOW = '-90 days';
|
||||
const DEFAULT_ADMIN_LIMIT = 100;
|
||||
const MAX_ADMIN_LIMIT = 500;
|
||||
|
||||
export default {
|
||||
async fetch(request: Request, env: Env): Promise<Response> {
|
||||
@ -39,6 +42,10 @@ export default {
|
||||
return new Response('Internal Error', { status: 500 });
|
||||
}
|
||||
},
|
||||
|
||||
async scheduled(_event: ScheduledEvent, env: Env): Promise<void> {
|
||||
await purgeExpiredOpens(env);
|
||||
},
|
||||
};
|
||||
|
||||
async function handlePixel(request: Request, env: Env, path: string): Promise<Response> {
|
||||
@ -141,6 +148,15 @@ async function shouldSkipOpen(env: Env, trackingId: string, ip: string, userAgen
|
||||
}
|
||||
}
|
||||
|
||||
async function purgeExpiredOpens(env: Env): Promise<void> {
|
||||
await env.DB.prepare(`
|
||||
DELETE FROM opens
|
||||
WHERE opened_at < datetime('now', ?)
|
||||
`).bind(
|
||||
OPEN_RETENTION_WINDOW
|
||||
).run();
|
||||
}
|
||||
|
||||
async function handleQuery(request: Request, env: Env, path: string): Promise<Response> {
|
||||
const blob = path.slice(3); // Remove '/q/'
|
||||
|
||||
@ -197,7 +213,7 @@ async function handleAdminOpens(request: Request, env: Env, url: URL): Promise<R
|
||||
|
||||
const recipient = url.searchParams.get('recipient');
|
||||
const since = url.searchParams.get('since');
|
||||
const limit = parseInt(url.searchParams.get('limit') || '100', 10);
|
||||
const limit = parseAdminLimit(url.searchParams.get('limit'));
|
||||
|
||||
let query = 'SELECT * FROM opens WHERE 1=1';
|
||||
const params: any[] = [];
|
||||
@ -234,3 +250,12 @@ async function handleAdminOpens(request: Request, env: Env, url: URL): Promise<R
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
function parseAdminLimit(raw: string | null): number {
|
||||
const parsed = Number.parseInt(raw || '', 10);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
return DEFAULT_ADMIN_LIMIT;
|
||||
}
|
||||
|
||||
return Math.min(parsed, MAX_ADMIN_LIMIT);
|
||||
}
|
||||
|
||||
@ -2,6 +2,9 @@ name = "gog-email-tracker"
|
||||
main = "src/index.ts"
|
||||
compatibility_date = "2024-12-01"
|
||||
|
||||
[triggers]
|
||||
crons = ["0 2 * * *"]
|
||||
|
||||
[[d1_databases]]
|
||||
binding = "DB"
|
||||
database_name = "gog-email-tracker"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user