fix: decode UTF-16LE attributed bodies

This commit is contained in:
Peter Steinberger 2026-05-04 05:55:51 +01:00
parent f2fff0bdcf
commit fb7b847531
No known key found for this signature in database
3 changed files with 16 additions and 0 deletions

View File

@ -1,6 +1,7 @@
# Changelog
## Unreleased
- fix: decode UTF-16LE BOM attributed message bodies in plain-text history output (#91, thanks @clawbunny)
- fix: confirm standard tapback reaction selection in Messages automation (#53, thanks @PeterRosdahl)
- fix: gate RPC watch reaction metadata on `include_reactions`, not `attachments` (#82)
- fix: dedupe URL balloon preview duplicates in watch stream without cross-chat/schema regressions (#64, thanks @lesaai)

View File

@ -4,6 +4,12 @@ enum TypedStreamParser {
static func parseAttributedBody(_ data: Data) -> String {
guard !data.isEmpty else { return "" }
let bytes = [UInt8](data)
if bytes.count >= 2, bytes[0] == 0xff, bytes[1] == 0xfe {
let payload = data.dropFirst(2)
if let text = String(data: payload, encoding: .utf16LittleEndian) {
return text.trimmingLeadingControlCharacters()
}
}
let start = [UInt8(0x01), UInt8(0x2b)]
let end = [UInt8(0x86), UInt8(0x84)]
var best = ""

View File

@ -102,6 +102,15 @@ func typedStreamParserTrimsControlCharacters() {
#expect(TypedStreamParser.parseAttributedBody(data) == "hello")
}
@Test
func typedStreamParserDecodesUTF16LittleEndianBOM() throws {
var data = Data([0xff, 0xfe])
let body = "hello 🌤️"
let encoded = try #require(body.data(using: .utf16LittleEndian))
data.append(encoded)
#expect(TypedStreamParser.parseAttributedBody(data) == body)
}
@Test
func phoneNumberNormalizerFormatsValidNumber() {
let normalizer = PhoneNumberNormalizer()