Consolidate code for parsing varints

Co-authored-by: Sasha Weiss <sasha@signal.org>
This commit is contained in:
Max Radermacher 2026-03-11 18:05:08 -05:00 committed by GitHub
parent f2eaa08ced
commit 78436e61dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 115 additions and 120 deletions

View File

@ -70,10 +70,11 @@ extension BackupNonce.MetadataHeader {
/// If the provided bytes do not cover the complete header, throws a ``ParsingError/moreDataNeeded(_:)`` error
/// with the minimum necessary bytes to try parsing again.
public static func from(prefixBytes: Data?) throws(ParsingError) -> BackupNonce.MetadataHeader {
guard let rawData = prefixBytes?.nilIfEmpty else {
guard var rawData = prefixBytes?.nilIfEmpty else {
owsFailDebug("Missing prefix data")
throw .dataMissingOrEmpty
}
let oldRawDataCount = rawData.count
// Check the file signature
let fileSignatureLength = BackupNonce.magicFileSignature.count
@ -83,31 +84,25 @@ extension BackupNonce.MetadataHeader {
guard rawData.prefix(fileSignatureLength) == BackupNonce.magicFileSignature else {
throw .unrecognizedFileSignature
}
rawData.removeFirst(fileSignatureLength)
// Read the varint length of the header metadata.
let (headerLength, varintLength) = ChunkedInputStreamTransform.decodeVariableLengthInteger(
buffer: rawData,
start: fileSignatureLength,
)
guard
headerLength > 0,
varintLength >= 0,
let varintLength = UInt64(exactly: varintLength)
else {
let headerLength = try? rawData.removeFirstVarint()
guard let headerLength, headerLength > 0 else {
throw .moreDataNeeded(BackupNonce.metadataHeaderByteLengthUpperBound)
}
guard let minLength = UInt16(exactly: UInt64(fileSignatureLength) + headerLength + varintLength) else {
// We enforce via swift compiler that the header fits in 2^16 bytes, which is
// way overkill and should always be enough. This should never happen.
owsFailDebug("Header larger than 2^16 bytes!")
throw .headerTooLarge
}
if minLength > rawData.count {
guard rawData.count >= headerLength else {
let signatureAndVarintLength = UInt64(oldRawDataCount - rawData.count)
let (minLength, overflow) = signatureAndVarintLength.addingReportingOverflow(headerLength)
guard !overflow, let minLength = UInt16(exactly: minLength) else {
// We enforce via swift compiler that the header fits in 2^16 bytes, which is
// way overkill and should always be enough. This should never happen.
owsFailDebug("Header larger than 2^16 bytes!")
throw .headerTooLarge
}
throw .moreDataNeeded(minLength)
}
let rawHeader = rawData
.suffix(from: fileSignatureLength + Int(varintLength))
.prefix(Int(headerLength))
let rawHeader = rawData.prefix(Int(headerLength))
return BackupNonce.MetadataHeader(data: rawHeader)
}

View File

@ -28,7 +28,7 @@ struct ChunkedInputStream {
/// Based on SwiftProtobuf.BinaryDecoder.decodeSingularUInt32Field
mutating func decodeSingularUInt32Field() throws -> UInt32 {
guard let result = UInt32(exactly: try decodeVarint()) else {
guard let result = UInt32(exactly: try remainingData.removeFirstVarint()) else {
throw ChunkedInputStreamError.malformed
}
return result
@ -45,26 +45,4 @@ struct ChunkedInputStream {
}
self.remainingData = self.remainingData.dropFirst(length)
}
/// Private: Parse the next raw varint from the input.
///
/// Based on SwiftProtobuf.BinaryDecoder.decodeVarint()
private mutating func decodeVarint() throws -> UInt64 {
var value = 0 as UInt64
var shift = 0 as UInt64
while true {
if shift > 63 {
throw ChunkedInputStreamError.malformed
}
if self.remainingData.isEmpty {
throw ChunkedInputStreamError.truncated
}
let c = self.remainingData.removeFirst()
value |= UInt64(c & 0x7f) << shift
if c & 0x80 == 0 {
return value
}
shift += 7
}
}
}

View File

@ -59,6 +59,35 @@ extension Data {
}
} == 0
}
public enum VarintDecodingError: Error {
case malformed
case truncated
}
/// Consume the next varint.
///
/// Based on SwiftProtobuf.BinaryDecoder.decodeVarint()
public mutating func removeFirstVarint() throws(VarintDecodingError) -> UInt64 {
var temporarySelf = self
var value: UInt64 = 0
var shift: UInt64 = 0
while true {
if shift > 63 {
throw .malformed
}
if temporarySelf.isEmpty {
throw .truncated
}
let c = temporarySelf.removeFirst()
value |= UInt64(c & 0x7f) << shift
if c & 0x80 == 0 {
self = temporarySelf
return value
}
shift += 7
}
}
}
public extension Data {

View File

@ -20,17 +20,24 @@ public class ChunkedInputStreamTransform: StreamTransform, BufferedStreamTransfo
}
public func transform(data: Data) throws -> Data {
// ChunkedInputStreamTransform, buy it's nature, will usually take in a large buffer of data
// and then return smaller chunks of data as it reads through the stream. To avoid unecessary
// copying of buffers, the class keeps an internal buffer of data that it appends new data to,
// and maintains a pointer that moves as it consumes chunks. To avoid this buffer growing
// unbounded, periodically check to see if the buffer can be reset to (or near) the `initilalBufferSize`
// ChunkedInputStreamTransform, by its nature, will usually take in a large
// buffer of data and then return smaller chunks of data as it reads
// through the stream. To avoid unecessary copying of buffers, the class
// keeps an internal buffer of data that it appends new data to and
// maintains a pointer that moves as it consumes chunks. To avoid this
// buffer growing unbounded, periodically check to see if the buffer can be
// reset to (or near) the `initialBufferSize`
//
// These checks are roughly:
// 1) Whenever the caller passes in more data to transform, check if the buffer has been fully consumed.
// If so, reset the buffer and reset `consumedBytes` to zero.
// 2) If the buffer has grown beyond the initial buffer size, check if enough data has been
// consumed to reset the buffer to a smaller size. This prevents the buffer from growing
// unbounded over long running operations.
//
// 1) Whenever the caller passes in more data to transform, check if the
// buffer has been fully consumed. If so, reset the buffer and reset
// `consumedBytes` to zero.
//
// 2) If the buffer has grown beyond the initial buffer size, check if
// enough data has been consumed to reset the buffer to a smaller size.
// This prevents the buffer from growing unbounded over long running
// operations.
// If the entire buffer has been consumed, reset to a new buffer
if consumedBytes > initialBufferSize {
@ -59,76 +66,34 @@ public class ChunkedInputStreamTransform: StreamTransform, BufferedStreamTransfo
/// Decode the next chunk of data, if enough data is present in the buffer.
private func getNextChunk() throws -> Data {
// decode the next variable length int
let (dataSize, intLength) = Self.decodeVariableLengthInteger(buffer: buffer, start: consumedBytes)
var buffer = self.buffer.dropFirst(consumedBytes)
let dataSize = try? buffer.removeFirstVarint()
guard dataSize > 0 else {
guard let dataSize else {
needMoreData = true
// Don't have enough data to decode an int, so return for now
return Data()
}
guard let intDataSize = Int(exactly: dataSize) else {
// The decoded integer is to large to fit into an Int
// The Data operations all require Int Ranges, so
owsFailDebug("Decoded data size too large")
guard dataSize > 0 else {
needMoreData = true
// The chunk is empty, so return for now?
return Data()
}
// Only advance if there is enough data present to both
// decode the variable length integer and read the specified
// number of bytes.
let endOfBuffer = consumedBytes + intDataSize + intLength
guard buffer.count >= endOfBuffer else {
guard buffer.count >= dataSize else {
needMoreData = true
return Data()
}
// Return a chunk of data from the buffer and advance the buffer.
let returnBuffer = buffer.dropFirst(consumedBytes + intLength).prefix(intDataSize)
let returnBuffer = buffer.prefix(Int(dataSize))
consumedBytes = endOfBuffer
consumedBytes = self.buffer.count - buffer.count + Int(dataSize)
return returnBuffer
}
/// Inspect the incoming data and return the next variable length integer.
/// Because it's not guaranteed that there's enought data to either (a) decode the integer or
/// (b) read the amount of data specified by the returned integer, dont' remove the decoded
/// integer bytes from the buffer until we're certain there's enough data to fulfill reading
/// the specified amount of data.
public static func decodeVariableLengthInteger(buffer: Data, start: Int) -> (result: UInt64, length: Int) {
guard buffer.count > 0 else { return (result: 0, length: 0) }
return buffer.withUnsafeBytes {
guard let baseAddress = $0.bindMemory(to: UInt8.self).baseAddress else {
return (result: 0, length: 0)
}
var start = baseAddress + start
var length = buffer.count
var count = 1
var c = start[0]
start += 1
length -= 1
if c & 0x80 == 0 {
return (result: UInt64(c), length: count)
}
var value = UInt64(c & 0x7f)
var shift = UInt64(7)
while true {
if length < 1 || shift > 63 {
return (result: 0, length: 0)
}
c = start[0]
start += 1
length -= 1
count += 1
value |= UInt64(c & 0x7f) << shift
if c & 0x80 == 0 {
return (result: value, length: count)
}
shift += 7
}
}
}
}

View File

@ -45,47 +45,44 @@ public class NonceHeaderInputStreamTransform: StreamTransform, BufferedStreamTra
/// Decode the next chunk of data, if enough data is present in the buffer.
private func skipPastMetadataHeader() throws -> Data {
guard buffer.count > BackupNonce.magicFileSignature.count else {
var buffer = self.buffer
guard buffer.starts(with: BackupNonce.magicFileSignature) else {
// We dont have enough data to decode the signature, return for now.
needMoreData = true
return Data()
}
buffer.removeFirst(BackupNonce.magicFileSignature.count)
// decode the next variable length int
let (dataSize, intLength) = ChunkedInputStreamTransform.decodeVariableLengthInteger(
buffer: buffer,
start: BackupNonce.magicFileSignature.count,
)
let dataSize = try? buffer.removeFirstVarint()
guard dataSize > 0 else {
needMoreData = true
guard let dataSize else {
// Don't have enough data to decode an int, so return for now
return Data()
}
guard let intDataSize = Int(exactly: dataSize) else {
// The decoded integer is to large to fit into an Int
// The Data operations all require Int Ranges, so
owsFailDebug("Decoded data size too large")
guard dataSize > 0 else {
needMoreData = true
// The varint is zero, so return for now?
return Data()
}
// Only advance if there is enough data present to both
// decode the variable length integer and skip past the specified
// number of bytes.
let endOfHeader = BackupNonce.magicFileSignature.count + intDataSize + intLength
guard buffer.count >= endOfHeader else {
guard buffer.count >= dataSize else {
needMoreData = true
return Data()
}
buffer.removeFirst(Int(dataSize))
// Return any data past the header, skipping the header portion.
let returnBuffer = buffer.dropFirst(endOfHeader)
let returnBuffer = buffer
headerLength = endOfHeader
headerLength = self.buffer.count - buffer.count
hasFinishedReadingHeader = true
needMoreData = false
buffer = Data()
self.buffer = Data()
return returnBuffer
}

View File

@ -4,6 +4,7 @@
//
import Foundation
import Testing
import XCTest
@testable import SignalServiceKit
@ -59,3 +60,33 @@ class DataSSKTests: XCTestCase {
XCTAssertTrue(Data().ows_constantTimeIsEqual(to: Data()))
}
}
struct DataSSKTest {
@Test(arguments: [
([0], 0, []),
([1], 1, []),
([127], 127, []),
([128, 1], 128, []),
([0, 0], 0, [0]),
([128, 1, 2], 128, [2]),
([255, 255, 255, 255, 255, 255, 255, 255, 127], UInt64(Int64.max), []),
] as [([UInt8], UInt64, [UInt8])])
func testDecodeVarint(testCase: (inputData: [UInt8], expectedValue: UInt64, remainingData: [UInt8])) throws {
var inputData = Data(testCase.inputData)
let actualValue = try inputData.removeFirstVarint()
#expect(actualValue == testCase.expectedValue)
#expect(inputData == Data(testCase.remainingData))
}
@Test(arguments: [
([], .truncated),
([128], .truncated),
([128, 128, 128, 128, 128, 128, 128, 128, 128, 128], .malformed),
] as [([UInt8], Data.VarintDecodingError)])
func testDecodeVarintError(testCase: (inputData: [UInt8], expectedError: Data.VarintDecodingError)) throws {
let inputData = Data(testCase.inputData)
var mutableData = inputData
#expect(throws: testCase.expectedError, performing: { try mutableData.removeFirstVarint() })
#expect(mutableData == inputData)
}
}