// // Copyright 2024 Signal Messenger, LLC // SPDX-License-Identifier: AGPL-3.0-only // import Foundation import zlib public class GzipStreamTransform: StreamTransform, FinalizableStreamTransform { public enum Operation { case compress case decompress } public enum GzipError: Swift.Error { case initializeFailed case streamError case dataError case outOfMemoryError case transformFailed case finalizeFailed } private enum Constants { static let BufferSize: Int = 65_536 // Use the maximum memory window (32K) for compressing the data static let MaxWindowBits = MAX_WBITS // adding 16 to the window bits will signal the gzip header should be written static let GzipDeflateHeaderWindowBits: Int32 = 16 // adding 32 to the window bits will signal the gzip header/footer should be read static let GzipInflateHeaderWindowBits: Int32 = 32 } public private(set) var hasFinalized = false private var outputCount: Int = 0 private var stream: z_stream private let operation: Operation init(_ operation: Operation) throws { self.operation = operation self.stream = z_stream() var status = Z_OK switch operation { case .compress: status = deflateInit2_( &stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, Constants.MaxWindowBits + Constants.GzipDeflateHeaderWindowBits, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY, ZLIB_VERSION, Int32(MemoryLayout.size), ) case .decompress: status = inflateInit2_( &stream, Constants.MaxWindowBits + Constants.GzipInflateHeaderWindowBits, ZLIB_VERSION, Int32(MemoryLayout.size), ) } guard status == Z_OK else { throw GzipError.initializeFailed } } /// Pass the supplied `data` to zlib for processing and return any data that results. /// Note that there is no guarantee that data will be retuned from the transform since compression/decompression /// will buffer internally. public func transform(data: Data) throws -> Data { try process(data: data, finalize: false) } private var buffer = Data(count: Constants.BufferSize) private func process(data: Data, finalize: Bool) throws -> Data { let flags: Int32 = finalize ? Z_FINISH : Z_NO_FLUSH var status: Int32 = Z_OK var currentOffset = 0 data.withUnsafeBytes { (ptr: UnsafeRawBufferPointer) in // Initialized the input buffer. // Set stream.next_in to point at the passed in data buffer. // Then move the pointer forward the amount of data that's aready been passed to deflate() stream.next_in = UnsafeMutablePointer(mutating: ptr.bindMemory(to: Bytef.self).baseAddress!) // Set stream.avail_in to the size of the remaining passed in data stream.avail_in = UInt32(clamping: data.count) // If avail_out is less than the size of the input, pre-emptively rewind to the beginning repeat { buffer.withUnsafeMutableBytes { (outputPtr: UnsafeMutableRawBufferPointer) in // Set stream.next_out to point at the output buffer and move the pointer // forward the amount of data that's already been written to the output buffer. // In most use cases `bufferWritten` should be '0', but there is nothing preventing // inflate/deflate from returning without having processed the entire input. // If this happens, and `avail_out` > 0, we should attempt to append to the output // buffer on subsequent calls into inflate/deflate stream.next_out = outputPtr.bindMemory(to: Bytef.self).baseAddress!.advanced(by: currentOffset) stream.avail_out = UInt32(outputPtr.count - currentOffset) switch operation { case .compress: status = deflate(&stream, flags) case .decompress: status = inflate(&stream, flags) } currentOffset = outputPtr.count - Int(stream.avail_out) stream.next_out = nil } // From zlib docs: // "If inflate (or deflate) returns Z_OK and with zero avail_out, it must be called again // after making room in the output buffer because there might be more output pending." if stream.avail_out == 0 { buffer.count *= 2 // currentOffset can remain the same } // Continue to call deflate/inflate as long as the status remains Z_OK and one of the // following is true: // a) The stream reports that the output buffer is full (avail_out == 0). This signals // that there may be additional output available, but the output buffer ran out of room. // b) There is still data available to pass into inflate/deflate (avail_in > 0). The // situations where this occurs should be less frequent (e.g. - input larger // than inflate/deflate can handle in one call) or happen in association // with (a) above. // // From the zlib docs: // "If not all input can be processed (because there is not enough room in the output // buffer), then next_in and avail_in are updated accordingly, and processing will // resume at this point for the next call of inflate (or deflate)." } while (stream.avail_out == 0 || stream.avail_in > 0) && status == Z_OK } switch status { case Z_OK, Z_STREAM_END: break case Z_STREAM_ERROR: throw GzipError.streamError case Z_DATA_ERROR: throw GzipError.dataError case Z_MEM_ERROR: throw GzipError.outOfMemoryError case Z_BUF_ERROR: // This usually indicates that the deflate has no more output, but the final // chunk of output _exactly_ fills the output buffer, causing avail_out to be 0. // This will cause the code above to attempt another deflate, but since there's // nothing further to read, deflate() needs to signal back to the caller with // Z_BUF_ERROR that it was no longer able to either consume input or produce output. // According to zlib docs, Z_BUF_ERROR doesn't need to be checked for inflate() break default: throw GzipError.transformFailed } let returnData = buffer.prefix(currentOffset) outputCount += returnData.count return returnData } public func finalize() throws -> Data { hasFinalized = true // Finalize the gzip and return any remaining data var finalData = try process(data: Data(), finalize: true) outputCount += finalData.count switch operation { case .compress: // Pad the gzip similar to how attachments are padded. Gzip will ignore // this trailing data during decompression. let unpaddedSize = UInt64(outputCount) let paddedSize = Cryptography.paddedSize(unpaddedSize: unpaddedSize)! // TODO: This may produce a 50MiB buffer for a 1GiB attachment (padding is up to 5%). finalData.count += Int(paddedSize - unpaddedSize) case .decompress: break } // Close the zlib stream var status = Z_OK switch operation { case .compress: status = deflateEnd(&stream) case .decompress: status = inflateEnd(&stream) } guard status == Z_OK else { throw GzipError.finalizeFailed } return finalData } }