Harden attachment file ref-counting when reusing files on disk

This commit is contained in:
trevor-signal 2026-06-17 12:52:25 -04:00 committed by GitHub
parent 7b1db7325c
commit cfe2193db0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 182 additions and 170 deletions

View File

@ -215,11 +215,20 @@ CREATE TABLE attachment_downloads_backup_stats (
```sql
CREATE TABLE attachments_protected_from_deletion (
path TEXT NOT NULL,
messageId TEXT NOT NULL,
PRIMARY KEY (path, messageId)
reuseToken TEXT NOT NULL,
PRIMARY KEY (path, reuseToken)
) STRICT
```
<details>
<summary>Index: attachments_protected_from_deletion → attachments_protected_from_deletion_reuseToken</summary>
```sql
CREATE INDEX attachments_protected_from_deletion_reuseToken ON attachments_protected_from_deletion (reuseToken)
```
</details>
<details>
<summary>Index: attachments_protected_from_deletion → sqlite_autoindex_attachments_protected_from_deletion_1</summary>
@ -1169,53 +1178,6 @@ CREATE INDEX message_attachments_thumbnailPath ON message_attachments (thumbnail
</details>
<details>
<summary>Trigger: message_attachments → stop_protecting_attachments_after_insert</summary>
```sql
CREATE TRIGGER stop_protecting_attachments_after_insert AFTER INSERT ON message_attachments BEGIN
DELETE FROM attachments_protected_from_deletion
WHERE
messageId IS NEW.messageId
AND path IN (
NEW.path,
NEW.thumbnailPath,
NEW.screenshotPath,
NEW.backupThumbnailPath
);
END
```
</details>
<details>
<summary>Trigger: message_attachments → stop_protecting_attachments_after_update</summary>
```sql
CREATE TRIGGER stop_protecting_attachments_after_update AFTER
UPDATE OF path,
thumbnailPath,
screenshotPath,
backupThumbnailPath ON message_attachments WHEN OLD.path IS NOT NEW.path
OR OLD.thumbnailPath IS NOT NEW.thumbnailPath
OR OLD.screenshotPath IS NOT NEW.screenshotPath
OR OLD.backupThumbnailPath IS NOT NEW.backupThumbnailPath BEGIN
DELETE FROM attachments_protected_from_deletion
WHERE
messageId IS NEW.messageId
AND path IN (
NEW.path,
NEW.thumbnailPath,
NEW.screenshotPath,
NEW.backupThumbnailPath
);
END
```
</details>
---
</details>

View File

@ -893,7 +893,6 @@ export async function runDownloadAttachmentJobInner({
const existingAttachmentData = await getExistingAttachmentDataForReuse({
plaintextHash: downloadedAttachment.plaintextHash,
contentType: attachment.contentType,
messageId,
logId,
});

View File

@ -1484,20 +1484,12 @@ type WritableInterface = {
plaintextHash,
version,
contentType,
messageId,
}: {
plaintextHash: string;
version: number;
contentType: MIMEType;
messageId: string;
}) => ExistingAttachmentData | undefined;
_protectAttachmentPathFromDeletion: ({
path,
messageId,
}: {
path: string;
messageId: string;
}) => void;
}) => (ExistingAttachmentData & { reuseToken: string }) | undefined;
_protectAttachmentPathFromDeletion: ({ path }: { path: string }) => string;
resetProtectedAttachmentPaths: () => void;
removeAll: () => void;

View File

@ -302,7 +302,10 @@ import type {
} from '../types/Colors.std.ts';
import { sqlLogger } from './sqlLogger.node.ts';
import { permissiveMessageAttachmentSchema } from './server/messageAttachments.std.ts';
import { getFilePathsReferencedByMessage } from '../util/messageFilePaths.std.ts';
import {
getFilePathsReferencedByAttachment,
getFilePathsReferencedByMessage,
} from '../util/messageFilePaths.std.ts';
import { createMessagesOnInsertTrigger } from './migrations/1500-search-polls.std.ts';
import { isValidPlaintextHash } from '../types/Crypto.std.ts';
import { Emoji } from '../axo/emoji.std.ts';
@ -2916,6 +2919,10 @@ function saveMessageAttachment({
logger.info('Recovered from invalid message_attachment save');
}
if (attachment.reuseToken != null) {
releaseAttachmentPathProtections(db, attachment);
}
}
function getAndProtectExistingAttachmentPath(
@ -2924,14 +2931,12 @@ function getAndProtectExistingAttachmentPath(
plaintextHash,
version,
contentType,
messageId,
}: {
plaintextHash: string;
version: number;
contentType: string;
messageId: string;
}
): ExistingAttachmentData | undefined {
): (ExistingAttachmentData & { reuseToken: string }) | undefined {
if (!isValidPlaintextHash(plaintextHash)) {
logger.error('getAndProtectExistingAttachmentPath: Invalid plaintextHash');
return;
@ -2971,40 +2976,71 @@ function getAndProtectExistingAttachmentPath(
LIMIT 1;
`;
const existingData = db.prepare(query).get<ExistingAttachmentData>(params);
return db.transaction(() => {
const existingData = db.prepare(query).get<ExistingAttachmentData>(params);
if (!existingData) {
return undefined;
}
if (!existingData) {
return undefined;
}
const [protectQuery, protectParams] = sql`
const reuseToken = randomBytes(16).toString('hex');
const [protectQuery, protectParams] = sql`
WITH existingMessageAttachmentPaths(path) AS (
VALUES
(${existingData.path}),
(${existingData.thumbnailPath}),
(${existingData.screenshotPath})
)
INSERT OR REPLACE INTO attachments_protected_from_deletion(path, messageId)
SELECT path, ${messageId}
INSERT OR REPLACE INTO attachments_protected_from_deletion(path, reuseToken)
SELECT path, ${reuseToken}
FROM existingMessageAttachmentPaths
WHERE path IS NOT NULL;
`;
db.prepare(protectQuery).run(protectParams);
db.prepare(protectQuery).run(protectParams);
return existingData;
return { ...existingData, reuseToken };
})();
}
function _protectAttachmentPathFromDeletion(
db: WritableDB,
{ path, messageId }: { path: string; messageId: string }
): void {
{ path }: { path: string }
): string {
const reuseToken = randomBytes(16).toString('hex');
const [protectQuery, protectParams] = sql`
INSERT OR REPLACE INTO attachments_protected_from_deletion
(path, messageId)
(path, reuseToken)
VALUES
(${path}, ${messageId});
(${path}, ${reuseToken});
`;
db.prepare(protectQuery).run(protectParams);
return reuseToken;
}
function releaseAttachmentPathProtections(
db: WritableDB,
attachment: AttachmentType
): void {
const { reuseToken } = attachment;
if (!reuseToken) {
return;
}
const { externalAttachments } =
getFilePathsReferencedByAttachment(attachment);
if (!externalAttachments.size) {
return;
}
const [query, params] = sql`
DELETE FROM attachments_protected_from_deletion
WHERE reuseToken = ${reuseToken}
AND path IN (${sqlJoin([...externalAttachments])});
`;
db.prepare(query).run(params);
}
function resetProtectedAttachmentPaths(db: WritableDB): void {
@ -3013,7 +3049,7 @@ function resetProtectedAttachmentPaths(db: WritableDB): void {
function getAllProtectedAttachmentPaths(db: ReadableDB): Array<string> {
return db
.prepare('SELECT path FROM attachments_protected_from_deletion', {
.prepare('SELECT DISTINCT path FROM attachments_protected_from_deletion', {
pluck: true,
})
.all<string>();

View File

@ -0,0 +1,24 @@
// Copyright 2026 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import type { WritableDB } from '../Interface.std.ts';
export default function updateToSchemaVersion1730(db: WritableDB): void {
// Each row is an outstanding reuse claim on a path, keyed by an ephemeral token
// returned to the caller. saveMessageAttachments releases the claim in the same
// transaction that writes the message_attachments row referencing the path.
db.exec(`
DROP TABLE attachments_protected_from_deletion;
CREATE TABLE attachments_protected_from_deletion (
path TEXT NOT NULL,
reuseToken TEXT NOT NULL,
PRIMARY KEY (path, reuseToken)
) STRICT;
CREATE INDEX attachments_protected_from_deletion_reuseToken
ON attachments_protected_from_deletion (reuseToken);
DROP TRIGGER stop_protecting_attachments_after_update;
DROP TRIGGER stop_protecting_attachments_after_insert;
`);
}

View File

@ -149,6 +149,7 @@ import updateToSchemaVersion1690 from './1690-poll-terminate-notification-timest
import updateToSchemaVersion1700 from './1700-trim-profile-names.std.ts';
import updateToSchemaVersion1710 from './1710-emoji-skin-tone-default.std.ts';
import updateToSchemaVersion1720 from './1720-update-recent-emoji.std.ts';
import updateToSchemaVersion1730 from './1730-protected-attachments-dedupe-token.std.ts';
import { DataWriter } from '../Server.node.ts';
import { strictAssert } from '../../util/assert.std.ts';
@ -1660,6 +1661,7 @@ export const SCHEMA_VERSIONS: ReadonlyArray<SchemaUpdateType> = [
{ version: 1700, update: updateToSchemaVersion1700 },
{ version: 1710, update: updateToSchemaVersion1710 },
{ version: 1720, update: updateToSchemaVersion1720 },
{ version: 1730, update: updateToSchemaVersion1730 },
];
class DBVersionFromFutureError extends Error {

View File

@ -277,7 +277,6 @@ describe('cleanupOrphanedAttachments', () => {
await DataWriter._protectAttachmentPathFromDeletion({
path: `attachment${attachmentIndex + 1}`,
messageId: 'messageId',
});
await DataWriter.cleanupOrphanedAttachments({ _block: true });

View File

@ -304,7 +304,6 @@ describe('deleteMessageAttachments', () => {
it('is not safe to delete if the file is protected, even if no references', async () => {
await DataWriter._protectAttachmentPathFromDeletion({
path: 'attachment0',
messageId: 'messageId',
});
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment0'));
@ -333,9 +332,6 @@ describe('deleteMessageAttachments', () => {
conversationId: 'convoId',
attachments: [attachment1],
};
const message2 = { ...message1, id: generateUuid() };
const message3 = { ...message1, id: generateUuid() };
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment1'));
await DataWriter.saveMessage(message1, {
@ -345,20 +341,43 @@ describe('deleteMessageAttachments', () => {
});
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
// Protect it twice
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment1.plaintextHash,
version: 2,
contentType: IMAGE_JPEG,
messageId: message2.id,
});
// Protect it twice, once for each message that will reuse the file
const existingDataForMessage2 =
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment1.plaintextHash,
version: 2,
contentType: IMAGE_JPEG,
});
strictAssert(existingDataForMessage2, 'existing attachment data exists');
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment1.plaintextHash,
version: 2,
contentType: IMAGE_JPEG,
messageId: message3.id,
});
const existingDataForMessage3 =
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment1.plaintextHash,
version: 2,
contentType: IMAGE_JPEG,
});
strictAssert(existingDataForMessage3, 'existing attachment data exists');
const message2: MessageAttributesType = {
...message1,
id: generateUuid(),
attachments: [
{
...attachment1,
...existingDataForMessage2,
},
],
};
const message3: MessageAttributesType = {
...message1,
id: generateUuid(),
attachments: [
{
...attachment1,
...existingDataForMessage3,
},
],
};
// Delete the original message
await DataWriter.removeMessageById(message1.id, {
@ -366,7 +385,7 @@ describe('deleteMessageAttachments', () => {
});
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
// Save message2
// Save message2; this releases message2's dedupe protection
await DataWriter.saveMessage(message2, {
forceSave: true,
ourAci: generateAci(),
@ -380,7 +399,7 @@ describe('deleteMessageAttachments', () => {
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
// Save message3
// Save message3; this releases message3's dedupe protection
await DataWriter.saveMessage(message3, {
forceSave: true,
ourAci: generateAci(),
@ -494,7 +513,6 @@ describe('deleteMessageAttachments', () => {
plaintextHash: attachment1.plaintextHash,
version: attachment1.version,
contentType: attachment1.contentType,
messageId: 'newmessage',
});
assert.strictEqual(existingAttachment?.path, attachment1.path);
@ -604,7 +622,6 @@ describe('deleteMessageAttachments', () => {
await DataWriter._protectAttachmentPathFromDeletion({
path: 'attachment0',
messageId: 'messageId',
});
await cleanupAttachmentFiles(attachment);
assert.sameDeepMembers(listFiles('attachment'), [

View File

@ -453,16 +453,12 @@ describe('Attachment', () => {
return FAKE_LOCAL_ATTACHMENT;
};
const actual = await migrateDataToFileSystem(
input,
{
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async () => null,
getPlaintextHashForInMemoryAttachment: () => 'fakeplaintextHash',
logger,
},
{ id: 'messageId' }
);
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async () => null,
getPlaintextHashForInMemoryAttachment: () => 'fakeplaintextHash',
logger,
});
assert.deepEqual(actual, expected);
});
@ -481,16 +477,12 @@ describe('Attachment', () => {
const writeNewAttachmentData = async () => FAKE_LOCAL_ATTACHMENT;
const actual = await migrateDataToFileSystem(
input,
{
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async () => null,
getPlaintextHashForInMemoryAttachment: () => 'fakeplaintextHash',
logger,
},
{ id: 'messageId' }
);
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async () => null,
getPlaintextHashForInMemoryAttachment: () => 'fakeplaintextHash',
logger,
});
assert.deepEqual(actual, expected);
});
@ -505,16 +497,12 @@ describe('Attachment', () => {
const writeNewAttachmentData = async () => FAKE_LOCAL_ATTACHMENT;
const actual = await migrateDataToFileSystem(
input,
{
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async () => null,
getPlaintextHashForInMemoryAttachment: () => 'fakeplaintextHash',
logger,
},
{ id: 'messageId' }
);
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async () => null,
getPlaintextHashForInMemoryAttachment: () => 'fakeplaintextHash',
logger,
});
assert.isUndefined(actual.data);
});
@ -528,27 +516,24 @@ describe('Attachment', () => {
const writeNewAttachmentData = sandbox.stub();
const actual = await migrateDataToFileSystem(
input,
{
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async ({
plaintextHash,
contentType,
}) => {
assert.strictEqual(plaintextHash, 'somePlaintextHash');
assert.strictEqual(contentType, MIME.IMAGE_JPEG);
return {
path: 'new-path',
version: 2,
localKey: 'new-local-key',
};
},
getPlaintextHashForInMemoryAttachment: () => 'somePlaintextHash',
logger,
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
getExistingAttachmentDataForReuse: async ({
plaintextHash,
contentType,
}) => {
assert.strictEqual(plaintextHash, 'somePlaintextHash');
assert.strictEqual(contentType, MIME.IMAGE_JPEG);
return {
path: 'new-path',
version: 2,
localKey: 'new-local-key',
reuseToken: 'reuse-token',
};
},
{ id: 'messageId' }
);
getPlaintextHashForInMemoryAttachment: () => 'somePlaintextHash',
logger,
});
assert.strictEqual(writeNewAttachmentData.callCount, 0);
assert.deepEqual(actual, {
version: 2,
@ -558,6 +543,7 @@ describe('Attachment', () => {
path: 'new-path',
localKey: 'new-local-key',
fileName: 'foo.jpg',
reuseToken: 'reuse-token',
});
});
});

View File

@ -27,6 +27,12 @@ export type BackupThumbnailType = WithOptionalProperties<ThumbnailType, 'size'>;
export type EphemeralAttachmentFields = {
totalDownloaded?: number;
data?: Uint8Array<ArrayBuffer>;
/**
* Identifies this attachment's claim on reused paths in
* attachments_protected_from_deletion; saveMessageAttachments releases the claim when
* the attachment is saved.
*/
reuseToken?: string;
/** Not included in protobuf, needs to be pulled from flags */
isVoiceMessage?: boolean;
/** For messages not already on disk, this will be a data url */

View File

@ -249,7 +249,7 @@ export function parseAndWriteAvatar(
...contact,
avatar: {
...avatar,
avatar: await upgradeAttachment(avatar.avatar, context, message),
avatar: await upgradeAttachment(avatar.avatar, context),
},
}
: omit(contact, ['avatar']);

View File

@ -529,11 +529,7 @@ const toVersion10 = _withSchemaVersion({
...stickerMessage,
sticker: {
...sticker,
data: await migrateDataToFileSystem(
sticker.data,
stickerContext,
stickerMessage
),
data: await migrateDataToFileSystem(sticker.data, stickerContext),
},
};
};

View File

@ -986,8 +986,7 @@ async function resolveReferences(packId: string): Promise<void> {
try {
attachments = await pMap(
messageIds,
messageId =>
copyStickerToAttachments({ packId, stickerId, messageId }),
() => copyStickerToAttachments({ packId, stickerId }),
{ concurrency: 3 }
);
} catch (error) {
@ -1076,11 +1075,9 @@ export function getSticker(
export async function copyStickerToAttachments({
packId,
stickerId,
messageId,
}: {
packId: string;
stickerId: number;
messageId: string;
}): Promise<AttachmentType> {
const sticker = getSticker(packId, stickerId);
if (!sticker) {
@ -1116,7 +1113,6 @@ export async function copyStickerToAttachments({
const existingAttachmentData = await getExistingAttachmentDataForReuse({
plaintextHash,
contentType,
messageId,
logId: 'copyStickerToAttachments',
});

View File

@ -23,19 +23,18 @@ type AttachmentDataToBeReused = WithRequiredProperties<
| 'screenshot'
| 'width'
| 'height'
| 'reuseToken'
>,
'path' | 'localKey' | 'version'
'path' | 'localKey' | 'version' | 'reuseToken'
>;
export async function getExistingAttachmentDataForReuse({
plaintextHash,
contentType,
messageId,
logId,
}: {
plaintextHash: string;
contentType: MIMEType;
messageId: string;
logId?: string;
}): Promise<AttachmentDataToBeReused | null> {
const existingAttachmentData =
@ -43,7 +42,6 @@ export async function getExistingAttachmentDataForReuse({
plaintextHash,
version: CURRENT_ATTACHMENT_VERSION,
contentType,
messageId,
});
if (!existingAttachmentData) {
@ -72,6 +70,7 @@ export async function getExistingAttachmentDataForReuse({
version: existingAttachmentData.version,
width: existingAttachmentData.width ?? undefined,
height: existingAttachmentData.height ?? undefined,
reuseToken: existingAttachmentData.reuseToken,
};
const { thumbnailPath, thumbnailSize, thumbnailContentType } =
existingAttachmentData;

View File

@ -4,7 +4,6 @@
import lodash from 'lodash';
import type { AttachmentType } from '../../types/Attachment.std.ts';
import type { ContextType } from '../../types/Message2.preload.ts';
import type { MessageAttributesType } from '../../model-types.d.ts';
const { isFunction, isTypedArray, isUndefined, omit } = lodash;
@ -21,8 +20,7 @@ export async function migrateDataToFileSystem(
| 'getExistingAttachmentDataForReuse'
| 'getPlaintextHashForInMemoryAttachment'
| 'logger'
>,
message: Pick<MessageAttributesType, 'id'>
>
): Promise<AttachmentType> {
if (!isFunction(writeNewAttachmentData)) {
throw new TypeError("'writeNewAttachmentData' must be a function");
@ -48,7 +46,6 @@ export async function migrateDataToFileSystem(
const existingData = await getExistingAttachmentDataForReuse({
plaintextHash,
messageId: message.id,
contentType: attachment.contentType,
logId: 'migrateDataToFileSystem',
});

View File

@ -127,6 +127,7 @@ export async function maybeForwardMessages(
data: {
...stickerWithData.data,
path: undefined,
reuseToken: undefined,
},
}
: undefined;
@ -152,6 +153,7 @@ export async function maybeForwardMessages(
thumbnail: undefined,
thumbnailFromBackup: undefined,
screenshot: undefined,
reuseToken: undefined,
}))
);
const attachmentsToSend = attachmentsWithData.filter(

View File

@ -353,7 +353,6 @@ export async function queueAttachmentDownloads(
const data = await copyStickerToAttachments({
packId,
stickerId,
messageId,
});
// Refresh sticker attachment since we had to await above
const freshSticker = message.get('sticker');