fix: harden URL balloon dedupe in watch stream (#64) (thanks @lesaai)

This commit is contained in:
Peter Steinberger 2026-03-02 02:26:43 +00:00
parent a2c16b3cac
commit c9fa1c2003
4 changed files with 158 additions and 17 deletions

View File

@ -1,6 +1,7 @@
# Changelog
## Unreleased
- fix: dedupe URL balloon preview duplicates in watch stream without cross-chat/schema regressions (#64, thanks @lesaai)
- fix: remove non-functional `typing` command and related RPC methods
- fix: remove unsupported standalone IMCore typing path and stale error branch
- test: drop typing-specific unit/integration tests with command/RPC surface removal

View File

@ -165,6 +165,7 @@ extension MessageStore {
let associatedTypeColumn = hasReactionColumns ? "m.associated_message_type" : "NULL"
let destinationCallerColumn = hasDestinationCallerID ? "m.destination_caller_id" : "NULL"
let audioMessageColumn = hasAudioMessageColumn ? "m.is_audio_message" : "0"
let balloonBundleIDColumn = hasBalloonBundleIDColumn ? "m.balloon_bundle_id" : "NULL"
let threadOriginatorColumn =
hasThreadOriginatorGUIDColumn ? "m.thread_originator_guid" : "NULL"
// Only filter out reactions if includeReactions is false
@ -186,7 +187,7 @@ extension MessageStore {
(SELECT COUNT(*) FROM message_attachment_join maj WHERE maj.message_id = m.ROWID) AS attachments,
\(bodyColumn) AS body,
\(threadOriginatorColumn) AS thread_originator_guid,
m.balloon_bundle_id
\(balloonBundleIDColumn) AS balloon_bundle_id
FROM message m
LEFT JOIN chat_message_join cmj ON m.ROWID = cmj.message_id
LEFT JOIN handle h ON m.handle_id = h.ROWID
@ -222,25 +223,24 @@ extension MessageStore {
return try withConnection { db in
var messages: [Message] = []
// Track URL balloon messages to deduplicate link preview re-deliveries.
// iMessage can write multiple rows for the same URL when the link preview resolves,
// producing duplicate messages with balloon_bundle_id = 'com.apple.messages.URLBalloonProvider'.
var seenURLBalloons: Set<String> = []
let urlBalloonProvider = "com.apple.messages.URLBalloonProvider"
for row in try db.prepare(sql, bindings) {
// Deduplicate URL balloon messages with the same sender + text
let decoded = try decodeMessageRow(row, columns: columns, fallbackChatID: chatID)
let balloonBundleID = stringValue(row[balloonBundleIDIndex])
if balloonBundleID == "com.apple.messages.URLBalloonProvider" {
let sender = stringValue(row[columns.sender])
let text = stringValue(row[columns.text])
let dedupeKey = "\(sender)|\(text)"
if seenURLBalloons.contains(dedupeKey) {
continue
}
seenURLBalloons.insert(dedupeKey)
if balloonBundleID == urlBalloonProvider,
shouldSkipURLBalloonDuplicate(
chatID: decoded.chatID,
sender: decoded.sender,
text: decoded.text,
isFromMe: decoded.isFromMe,
date: decoded.date,
rowID: decoded.rowID
)
{
continue
}
let decoded = try decodeMessageRow(row, columns: columns, fallbackChatID: chatID)
let replyToGUID = replyToGUID(
associatedGuid: decoded.associatedGUID,
associatedType: decoded.associatedType

View File

@ -20,6 +20,17 @@ public final class MessageStore: @unchecked Sendable {
let hasDestinationCallerID: Bool
let hasAudioMessageColumn: Bool
let hasAttachmentUserInfo: Bool
let hasBalloonBundleIDColumn: Bool
private struct URLBalloonDedupeEntry: Sendable {
let rowID: Int64
let date: Date
}
private static let urlBalloonDedupeWindow: TimeInterval = 90
private static let urlBalloonDedupeRetention: TimeInterval = 10 * 60
private var urlBalloonDedupe: [String: URLBalloonDedupeEntry] = [:]
public init(path: String = MessageStore.defaultPath) throws {
let normalized = NSString(string: path).expandingTildeInPath
@ -42,6 +53,7 @@ public final class MessageStore: @unchecked Sendable {
self.hasDestinationCallerID = messageColumns.contains("destination_caller_id")
self.hasAudioMessageColumn = messageColumns.contains("is_audio_message")
self.hasAttachmentUserInfo = attachmentColumns.contains("user_info")
self.hasBalloonBundleIDColumn = messageColumns.contains("balloon_bundle_id")
} catch {
throw MessageStore.enhance(error: error, path: normalized)
}
@ -55,7 +67,8 @@ public final class MessageStore: @unchecked Sendable {
hasThreadOriginatorGUIDColumn: Bool? = nil,
hasDestinationCallerID: Bool? = nil,
hasAudioMessageColumn: Bool? = nil,
hasAttachmentUserInfo: Bool? = nil
hasAttachmentUserInfo: Bool? = nil,
hasBalloonBundleIDColumn: Bool? = nil
) throws {
self.path = path
self.queue = DispatchQueue(label: "imsg.db.test", qos: .userInitiated)
@ -94,6 +107,11 @@ public final class MessageStore: @unchecked Sendable {
} else {
self.hasAttachmentUserInfo = attachmentColumns.contains("user_info")
}
if let hasBalloonBundleIDColumn {
self.hasBalloonBundleIDColumn = hasBalloonBundleIDColumn
} else {
self.hasBalloonBundleIDColumn = messageColumns.contains("balloon_bundle_id")
}
}
public func listChats(limit: Int) throws -> [Chat] {
@ -180,6 +198,38 @@ public final class MessageStore: @unchecked Sendable {
try block(connection)
}
}
func shouldSkipURLBalloonDuplicate(
chatID: Int64,
sender: String,
text: String,
isFromMe: Bool,
date: Date,
rowID: Int64
) -> Bool {
guard !text.isEmpty else { return false }
pruneURLBalloonDedupe(referenceDate: date)
let key = "\(chatID)|\(isFromMe ? 1 : 0)|\(sender)|\(text)"
let current = URLBalloonDedupeEntry(rowID: rowID, date: date)
guard let previous = urlBalloonDedupe[key] else {
urlBalloonDedupe[key] = current
return false
}
urlBalloonDedupe[key] = current
if rowID <= previous.rowID {
return true
}
return date.timeIntervalSince(previous.date) <= MessageStore.urlBalloonDedupeWindow
}
private func pruneURLBalloonDedupe(referenceDate: Date) {
guard !urlBalloonDedupe.isEmpty else { return }
let cutoff = referenceDate.addingTimeInterval(-MessageStore.urlBalloonDedupeRetention)
urlBalloonDedupe = urlBalloonDedupe.filter { $0.value.date >= cutoff }
}
}
extension MessageStore {

View File

@ -4,9 +4,13 @@ import Testing
@testable import IMsgCore
private func makeInMemoryMessageDB(includeThreadOriginatorGUID: Bool = false) throws -> Connection {
private func makeInMemoryMessageDB(
includeThreadOriginatorGUID: Bool = false,
includeBalloonBundleID: Bool = false
) throws -> Connection {
let db = try Connection(.inMemory)
let threadOriginatorColumn = includeThreadOriginatorGUID ? "thread_originator_guid TEXT," : ""
let balloonColumn = includeBalloonBundleID ? "balloon_bundle_id TEXT," : ""
try db.execute(
"""
CREATE TABLE message (
@ -17,6 +21,7 @@ private func makeInMemoryMessageDB(includeThreadOriginatorGUID: Bool = false) th
associated_message_guid TEXT,
associated_message_type INTEGER,
\(threadOriginatorColumn)
\(balloonColumn)
date INTEGER,
is_from_me INTEGER,
service TEXT
@ -137,6 +142,91 @@ func messagesAfterReturnsMessages() throws {
#expect(messages.first?.rowID == 2)
}
@Test
func messagesAfterDeduplicatesURLBalloonsAcrossPolls() throws {
let db = try makeInMemoryMessageDB(includeBalloonBundleID: true)
let now = Date()
try db.run("INSERT INTO handle(ROWID, id) VALUES (1, '+123')")
try db.run(
"""
INSERT INTO message(
ROWID, handle_id, text, guid, associated_message_guid, associated_message_type,
balloon_bundle_id, date, is_from_me, service
)
VALUES (1, 1, 'https://example.com', 'msg-guid-1', NULL, 0, 'com.apple.messages.URLBalloonProvider', ?, 0, 'iMessage')
""",
TestDatabase.appleEpoch(now)
)
try db.run("INSERT INTO chat_message_join(chat_id, message_id) VALUES (1, 1)")
let store = try MessageStore(connection: db, path: ":memory:")
let firstPoll = try store.messagesAfter(afterRowID: 0, chatID: 1, limit: 10)
#expect(firstPoll.map(\.rowID) == [1])
try db.run(
"""
INSERT INTO message(
ROWID, handle_id, text, guid, associated_message_guid, associated_message_type,
balloon_bundle_id, date, is_from_me, service
)
VALUES (2, 1, 'https://example.com', 'msg-guid-2', NULL, 0, 'com.apple.messages.URLBalloonProvider', ?, 0, 'iMessage')
""",
TestDatabase.appleEpoch(now.addingTimeInterval(30))
)
try db.run("INSERT INTO chat_message_join(chat_id, message_id) VALUES (1, 2)")
let secondPoll = try store.messagesAfter(afterRowID: 1, chatID: 1, limit: 10)
#expect(secondPoll.isEmpty)
try db.run(
"""
INSERT INTO message(
ROWID, handle_id, text, guid, associated_message_guid, associated_message_type,
balloon_bundle_id, date, is_from_me, service
)
VALUES (3, 1, 'https://example.com', 'msg-guid-3', NULL, 0, 'com.apple.messages.URLBalloonProvider', ?, 0, 'iMessage')
""",
TestDatabase.appleEpoch(now.addingTimeInterval(5 * 60))
)
try db.run("INSERT INTO chat_message_join(chat_id, message_id) VALUES (1, 3)")
let thirdPoll = try store.messagesAfter(afterRowID: 1, chatID: 1, limit: 10)
#expect(thirdPoll.map(\.rowID) == [3])
}
@Test
func messagesAfterURLBalloonDedupingDoesNotCrossChats() throws {
let db = try makeInMemoryMessageDB(includeBalloonBundleID: true)
let now = Date()
try db.run("INSERT INTO handle(ROWID, id) VALUES (1, '+123')")
try db.run(
"""
INSERT INTO message(
ROWID, handle_id, text, guid, associated_message_guid, associated_message_type,
balloon_bundle_id, date, is_from_me, service
)
VALUES (1, 1, 'https://example.com', 'msg-guid-1', NULL, 0, 'com.apple.messages.URLBalloonProvider', ?, 0, 'iMessage')
""",
TestDatabase.appleEpoch(now)
)
try db.run(
"""
INSERT INTO message(
ROWID, handle_id, text, guid, associated_message_guid, associated_message_type,
balloon_bundle_id, date, is_from_me, service
)
VALUES (2, 1, 'https://example.com', 'msg-guid-2', NULL, 0, 'com.apple.messages.URLBalloonProvider', ?, 0, 'iMessage')
""",
TestDatabase.appleEpoch(now.addingTimeInterval(15))
)
try db.run("INSERT INTO chat_message_join(chat_id, message_id) VALUES (1, 1)")
try db.run("INSERT INTO chat_message_join(chat_id, message_id) VALUES (2, 2)")
let store = try MessageStore(connection: db, path: ":memory:")
let messages = try store.messagesAfter(afterRowID: 0, chatID: nil, limit: 10)
#expect(messages.map(\.rowID) == [1, 2])
}
@Test
func messagesAfterExcludesReactionRows() throws {
let db = try makeInMemoryMessageDB()