From 78436e61dda80595ddb0038582d8a55d9dd21a89 Mon Sep 17 00:00:00 2001 From: Max Radermacher Date: Wed, 11 Mar 2026 18:05:08 -0500 Subject: [PATCH] Consolidate code for parsing varints Co-authored-by: Sasha Weiss --- .../Archiving/BackupNonceMetadataStore.swift | 35 ++++---- .../ConversationSync/ChunkedInputStream.swift | 24 +---- SignalServiceKit/Util/Data+SSK.swift | 29 +++++++ .../Input/ChunkedInputStreamTransform.swift | 87 ++++++------------- .../NonceHeaderInputStreamTransform.swift | 29 +++---- .../tests/Util/Data+SSKTest.swift | 31 +++++++ 6 files changed, 115 insertions(+), 120 deletions(-) diff --git a/SignalServiceKit/Backups/Archiving/BackupNonceMetadataStore.swift b/SignalServiceKit/Backups/Archiving/BackupNonceMetadataStore.swift index 319038f63d..f577efc963 100644 --- a/SignalServiceKit/Backups/Archiving/BackupNonceMetadataStore.swift +++ b/SignalServiceKit/Backups/Archiving/BackupNonceMetadataStore.swift @@ -70,10 +70,11 @@ extension BackupNonce.MetadataHeader { /// If the provided bytes do not cover the complete header, throws a ``ParsingError/moreDataNeeded(_:)`` error /// with the minimum necessary bytes to try parsing again. public static func from(prefixBytes: Data?) throws(ParsingError) -> BackupNonce.MetadataHeader { - guard let rawData = prefixBytes?.nilIfEmpty else { + guard var rawData = prefixBytes?.nilIfEmpty else { owsFailDebug("Missing prefix data") throw .dataMissingOrEmpty } + let oldRawDataCount = rawData.count // Check the file signature let fileSignatureLength = BackupNonce.magicFileSignature.count @@ -83,31 +84,25 @@ extension BackupNonce.MetadataHeader { guard rawData.prefix(fileSignatureLength) == BackupNonce.magicFileSignature else { throw .unrecognizedFileSignature } + rawData.removeFirst(fileSignatureLength) // Read the varint length of the header metadata. - let (headerLength, varintLength) = ChunkedInputStreamTransform.decodeVariableLengthInteger( - buffer: rawData, - start: fileSignatureLength, - ) - guard - headerLength > 0, - varintLength >= 0, - let varintLength = UInt64(exactly: varintLength) - else { + let headerLength = try? rawData.removeFirstVarint() + guard let headerLength, headerLength > 0 else { throw .moreDataNeeded(BackupNonce.metadataHeaderByteLengthUpperBound) } - guard let minLength = UInt16(exactly: UInt64(fileSignatureLength) + headerLength + varintLength) else { - // We enforce via swift compiler that the header fits in 2^16 bytes, which is - // way overkill and should always be enough. This should never happen. - owsFailDebug("Header larger than 2^16 bytes!") - throw .headerTooLarge - } - if minLength > rawData.count { + guard rawData.count >= headerLength else { + let signatureAndVarintLength = UInt64(oldRawDataCount - rawData.count) + let (minLength, overflow) = signatureAndVarintLength.addingReportingOverflow(headerLength) + guard !overflow, let minLength = UInt16(exactly: minLength) else { + // We enforce via swift compiler that the header fits in 2^16 bytes, which is + // way overkill and should always be enough. This should never happen. + owsFailDebug("Header larger than 2^16 bytes!") + throw .headerTooLarge + } throw .moreDataNeeded(minLength) } - let rawHeader = rawData - .suffix(from: fileSignatureLength + Int(varintLength)) - .prefix(Int(headerLength)) + let rawHeader = rawData.prefix(Int(headerLength)) return BackupNonce.MetadataHeader(data: rawHeader) } diff --git a/SignalServiceKit/Devices/ConversationSync/ChunkedInputStream.swift b/SignalServiceKit/Devices/ConversationSync/ChunkedInputStream.swift index f86ed25ef2..d74677e28c 100644 --- a/SignalServiceKit/Devices/ConversationSync/ChunkedInputStream.swift +++ b/SignalServiceKit/Devices/ConversationSync/ChunkedInputStream.swift @@ -28,7 +28,7 @@ struct ChunkedInputStream { /// Based on SwiftProtobuf.BinaryDecoder.decodeSingularUInt32Field mutating func decodeSingularUInt32Field() throws -> UInt32 { - guard let result = UInt32(exactly: try decodeVarint()) else { + guard let result = UInt32(exactly: try remainingData.removeFirstVarint()) else { throw ChunkedInputStreamError.malformed } return result @@ -45,26 +45,4 @@ struct ChunkedInputStream { } self.remainingData = self.remainingData.dropFirst(length) } - - /// Private: Parse the next raw varint from the input. - /// - /// Based on SwiftProtobuf.BinaryDecoder.decodeVarint() - private mutating func decodeVarint() throws -> UInt64 { - var value = 0 as UInt64 - var shift = 0 as UInt64 - while true { - if shift > 63 { - throw ChunkedInputStreamError.malformed - } - if self.remainingData.isEmpty { - throw ChunkedInputStreamError.truncated - } - let c = self.remainingData.removeFirst() - value |= UInt64(c & 0x7f) << shift - if c & 0x80 == 0 { - return value - } - shift += 7 - } - } } diff --git a/SignalServiceKit/Util/Data+SSK.swift b/SignalServiceKit/Util/Data+SSK.swift index 064ed2d870..5de477680d 100644 --- a/SignalServiceKit/Util/Data+SSK.swift +++ b/SignalServiceKit/Util/Data+SSK.swift @@ -59,6 +59,35 @@ extension Data { } } == 0 } + + public enum VarintDecodingError: Error { + case malformed + case truncated + } + + /// Consume the next varint. + /// + /// Based on SwiftProtobuf.BinaryDecoder.decodeVarint() + public mutating func removeFirstVarint() throws(VarintDecodingError) -> UInt64 { + var temporarySelf = self + var value: UInt64 = 0 + var shift: UInt64 = 0 + while true { + if shift > 63 { + throw .malformed + } + if temporarySelf.isEmpty { + throw .truncated + } + let c = temporarySelf.removeFirst() + value |= UInt64(c & 0x7f) << shift + if c & 0x80 == 0 { + self = temporarySelf + return value + } + shift += 7 + } + } } public extension Data { diff --git a/SignalServiceKit/Util/StreamTransform/Input/ChunkedInputStreamTransform.swift b/SignalServiceKit/Util/StreamTransform/Input/ChunkedInputStreamTransform.swift index f6c4872dff..ca30d8118c 100644 --- a/SignalServiceKit/Util/StreamTransform/Input/ChunkedInputStreamTransform.swift +++ b/SignalServiceKit/Util/StreamTransform/Input/ChunkedInputStreamTransform.swift @@ -20,17 +20,24 @@ public class ChunkedInputStreamTransform: StreamTransform, BufferedStreamTransfo } public func transform(data: Data) throws -> Data { - // ChunkedInputStreamTransform, buy it's nature, will usually take in a large buffer of data - // and then return smaller chunks of data as it reads through the stream. To avoid unecessary - // copying of buffers, the class keeps an internal buffer of data that it appends new data to, - // and maintains a pointer that moves as it consumes chunks. To avoid this buffer growing - // unbounded, periodically check to see if the buffer can be reset to (or near) the `initilalBufferSize` + // ChunkedInputStreamTransform, by its nature, will usually take in a large + // buffer of data and then return smaller chunks of data as it reads + // through the stream. To avoid unecessary copying of buffers, the class + // keeps an internal buffer of data that it appends new data to and + // maintains a pointer that moves as it consumes chunks. To avoid this + // buffer growing unbounded, periodically check to see if the buffer can be + // reset to (or near) the `initialBufferSize` + // // These checks are roughly: - // 1) Whenever the caller passes in more data to transform, check if the buffer has been fully consumed. - // If so, reset the buffer and reset `consumedBytes` to zero. - // 2) If the buffer has grown beyond the initial buffer size, check if enough data has been - // consumed to reset the buffer to a smaller size. This prevents the buffer from growing - // unbounded over long running operations. + // + // 1) Whenever the caller passes in more data to transform, check if the + // buffer has been fully consumed. If so, reset the buffer and reset + // `consumedBytes` to zero. + // + // 2) If the buffer has grown beyond the initial buffer size, check if + // enough data has been consumed to reset the buffer to a smaller size. + // This prevents the buffer from growing unbounded over long running + // operations. // If the entire buffer has been consumed, reset to a new buffer if consumedBytes > initialBufferSize { @@ -59,76 +66,34 @@ public class ChunkedInputStreamTransform: StreamTransform, BufferedStreamTransfo /// Decode the next chunk of data, if enough data is present in the buffer. private func getNextChunk() throws -> Data { // decode the next variable length int - let (dataSize, intLength) = Self.decodeVariableLengthInteger(buffer: buffer, start: consumedBytes) + var buffer = self.buffer.dropFirst(consumedBytes) + let dataSize = try? buffer.removeFirstVarint() - guard dataSize > 0 else { + guard let dataSize else { needMoreData = true // Don't have enough data to decode an int, so return for now return Data() } - guard let intDataSize = Int(exactly: dataSize) else { - // The decoded integer is to large to fit into an Int - // The Data operations all require Int Ranges, so - owsFailDebug("Decoded data size too large") + guard dataSize > 0 else { + needMoreData = true + // The chunk is empty, so return for now? return Data() } // Only advance if there is enough data present to both // decode the variable length integer and read the specified // number of bytes. - let endOfBuffer = consumedBytes + intDataSize + intLength - guard buffer.count >= endOfBuffer else { + guard buffer.count >= dataSize else { needMoreData = true return Data() } // Return a chunk of data from the buffer and advance the buffer. - let returnBuffer = buffer.dropFirst(consumedBytes + intLength).prefix(intDataSize) + let returnBuffer = buffer.prefix(Int(dataSize)) - consumedBytes = endOfBuffer + consumedBytes = self.buffer.count - buffer.count + Int(dataSize) return returnBuffer } - - /// Inspect the incoming data and return the next variable length integer. - /// Because it's not guaranteed that there's enought data to either (a) decode the integer or - /// (b) read the amount of data specified by the returned integer, dont' remove the decoded - /// integer bytes from the buffer until we're certain there's enough data to fulfill reading - /// the specified amount of data. - public static func decodeVariableLengthInteger(buffer: Data, start: Int) -> (result: UInt64, length: Int) { - guard buffer.count > 0 else { return (result: 0, length: 0) } - - return buffer.withUnsafeBytes { - guard let baseAddress = $0.bindMemory(to: UInt8.self).baseAddress else { - return (result: 0, length: 0) - } - var start = baseAddress + start - var length = buffer.count - var count = 1 - var c = start[0] - start += 1 - length -= 1 - - if c & 0x80 == 0 { - return (result: UInt64(c), length: count) - } - var value = UInt64(c & 0x7f) - var shift = UInt64(7) - while true { - if length < 1 || shift > 63 { - return (result: 0, length: 0) - } - c = start[0] - start += 1 - length -= 1 - count += 1 - value |= UInt64(c & 0x7f) << shift - if c & 0x80 == 0 { - return (result: value, length: count) - } - shift += 7 - } - } - } } diff --git a/SignalServiceKit/Util/StreamTransform/Input/NonceHeaderInputStreamTransform.swift b/SignalServiceKit/Util/StreamTransform/Input/NonceHeaderInputStreamTransform.swift index 23a523cde4..00f5e86bd4 100644 --- a/SignalServiceKit/Util/StreamTransform/Input/NonceHeaderInputStreamTransform.swift +++ b/SignalServiceKit/Util/StreamTransform/Input/NonceHeaderInputStreamTransform.swift @@ -45,47 +45,44 @@ public class NonceHeaderInputStreamTransform: StreamTransform, BufferedStreamTra /// Decode the next chunk of data, if enough data is present in the buffer. private func skipPastMetadataHeader() throws -> Data { - guard buffer.count > BackupNonce.magicFileSignature.count else { + var buffer = self.buffer + guard buffer.starts(with: BackupNonce.magicFileSignature) else { // We dont have enough data to decode the signature, return for now. needMoreData = true return Data() } + buffer.removeFirst(BackupNonce.magicFileSignature.count) // decode the next variable length int - let (dataSize, intLength) = ChunkedInputStreamTransform.decodeVariableLengthInteger( - buffer: buffer, - start: BackupNonce.magicFileSignature.count, - ) + let dataSize = try? buffer.removeFirstVarint() - guard dataSize > 0 else { - needMoreData = true + guard let dataSize else { // Don't have enough data to decode an int, so return for now return Data() } - guard let intDataSize = Int(exactly: dataSize) else { - // The decoded integer is to large to fit into an Int - // The Data operations all require Int Ranges, so - owsFailDebug("Decoded data size too large") + guard dataSize > 0 else { + needMoreData = true + // The varint is zero, so return for now? return Data() } // Only advance if there is enough data present to both // decode the variable length integer and skip past the specified // number of bytes. - let endOfHeader = BackupNonce.magicFileSignature.count + intDataSize + intLength - guard buffer.count >= endOfHeader else { + guard buffer.count >= dataSize else { needMoreData = true return Data() } + buffer.removeFirst(Int(dataSize)) // Return any data past the header, skipping the header portion. - let returnBuffer = buffer.dropFirst(endOfHeader) + let returnBuffer = buffer - headerLength = endOfHeader + headerLength = self.buffer.count - buffer.count hasFinishedReadingHeader = true needMoreData = false - buffer = Data() + self.buffer = Data() return returnBuffer } diff --git a/SignalServiceKit/tests/Util/Data+SSKTest.swift b/SignalServiceKit/tests/Util/Data+SSKTest.swift index c07cc92fb1..5642f8dd32 100644 --- a/SignalServiceKit/tests/Util/Data+SSKTest.swift +++ b/SignalServiceKit/tests/Util/Data+SSKTest.swift @@ -4,6 +4,7 @@ // import Foundation +import Testing import XCTest @testable import SignalServiceKit @@ -59,3 +60,33 @@ class DataSSKTests: XCTestCase { XCTAssertTrue(Data().ows_constantTimeIsEqual(to: Data())) } } + +struct DataSSKTest { + @Test(arguments: [ + ([0], 0, []), + ([1], 1, []), + ([127], 127, []), + ([128, 1], 128, []), + ([0, 0], 0, [0]), + ([128, 1, 2], 128, [2]), + ([255, 255, 255, 255, 255, 255, 255, 255, 127], UInt64(Int64.max), []), + ] as [([UInt8], UInt64, [UInt8])]) + func testDecodeVarint(testCase: (inputData: [UInt8], expectedValue: UInt64, remainingData: [UInt8])) throws { + var inputData = Data(testCase.inputData) + let actualValue = try inputData.removeFirstVarint() + #expect(actualValue == testCase.expectedValue) + #expect(inputData == Data(testCase.remainingData)) + } + + @Test(arguments: [ + ([], .truncated), + ([128], .truncated), + ([128, 128, 128, 128, 128, 128, 128, 128, 128, 128], .malformed), + ] as [([UInt8], Data.VarintDecodingError)]) + func testDecodeVarintError(testCase: (inputData: [UInt8], expectedError: Data.VarintDecodingError)) throws { + let inputData = Data(testCase.inputData) + var mutableData = inputData + #expect(throws: testCase.expectedError, performing: { try mutableData.removeFirstVarint() }) + #expect(mutableData == inputData) + } +}