From 28087b861d5dbb53f9450a67be76f4bc6c8e7431 Mon Sep 17 00:00:00 2001 From: Milan Stevanovic Date: Tue, 28 Jan 2025 17:43:28 -0800 Subject: [PATCH] Milan/sanitizing cumulative box sized files (#26) --- cli/src/main.rs | 38 ++++++++++++++++------------- mp4san/src/lib.rs | 49 +++++++++++++++++++++++++++++++++++++- mp4san/src/parse/header.rs | 5 ++++ test-data | 2 +- 4 files changed, 76 insertions(+), 18 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index f65876a..10ce6cb 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use anyhow::Context; use clap::{Parser as _, ValueEnum}; -use mp4san::SanitizedMetadata; +use mp4san::{Config, SanitizedMetadata}; #[derive(clap::Parser)] struct Args { @@ -20,6 +20,9 @@ struct Args { #[clap(long, short = 'o')] output: Option, + #[clap(long, short = 'c')] + cumulative_mdat_box_size: Option, + /// Path to the file to test sanitization on. file: PathBuf, } @@ -51,23 +54,26 @@ fn main() -> Result<(), anyhow::Error> { let mut infile = File::open(&args.file).context("Error opening file")?; match format { - Format::Mp4 => match mp4san::sanitize(&mut infile).context("Error parsing mp4 file")? { - SanitizedMetadata { metadata: Some(metadata), data } => { - if let Some(output_path) = args.output { - let mut outfile = File::create(output_path).context("Error opening output file")?; - outfile.write(&metadata).context("Error writing output")?; - infile - .seek(io::SeekFrom::Start(data.offset)) - .context("Error seeking input")?; - io::copy(&mut infile.take(data.len), &mut outfile).context("Error copying input to output")?; + Format::Mp4 => { + let config = Config { cumulative_mdat_box_size: args.cumulative_mdat_box_size, ..Default::default() }; + match mp4san::sanitize_with_config(&mut infile, config).context("Error parsing mp4 file")? { + SanitizedMetadata { metadata: Some(metadata), data } => { + if let Some(output_path) = args.output { + let mut outfile = File::create(output_path).context("Error opening output file")?; + outfile.write(&metadata).context("Error writing output")?; + infile + .seek(io::SeekFrom::Start(data.offset)) + .context("Error seeking input")?; + io::copy(&mut infile.take(data.len), &mut outfile).context("Error copying input to output")?; + } + } + SanitizedMetadata { metadata: None, .. } => { + if let Some(output_path) = args.output { + fs::copy(&args.file, output_path).context("Error writing output")?; + } } } - SanitizedMetadata { metadata: None, .. } => { - if let Some(output_path) = args.output { - fs::copy(&args.file, output_path).context("Error writing output")?; - } - } - }, + } Format::Webp => { webpsan::sanitize(infile).context("Error parsing webp file")?; if let Some(output_path) = args.output { diff --git a/mp4san/src/lib.rs b/mp4san/src/lib.rs index d903803..c338972 100644 --- a/mp4san/src/lib.rs +++ b/mp4san/src/lib.rs @@ -90,6 +90,29 @@ pub struct Config { /// The default is 1 GiB. #[builder(default = "1024 * 1024 * 1024")] pub max_metadata_size: u64, + /// The cumulative MDAT box size + /// + /// The value is tightly associated with a specific + /// use case scenario in which the transcoder internally + /// generates a sequence of MDAT boxes, but delivers + /// them compounded as a single monolythic MDAT box. + /// + /// In order to avoid constantly updating the single + /// MDAT box size (which may be impossible when the + /// payload is continually encrypted and written to + /// the file), the transcoder instead does the following: + /// a) writes the MDAT box size to be equal to the + /// fixed zero value + /// b) keeps accumulating the box size and passes + /// it as config argument to mp4sanitizer + /// + /// IMPORTANT: given the special circumstances of + /// the use case scenario of transcoding on mobile + /// devices, the MDAT box size is expected to not + /// exceed the 32-bit max bytes limit. Hence the + /// cumulative_mdat_box_size is a 32-bit value + #[builder(default = None)] + pub cumulative_mdat_box_size: Option, } /// Sanitized metadata returned by the sanitizer. @@ -260,7 +283,7 @@ pub async fn sanitize_async_with_config( while !reader.as_mut().fill_buf().await?.is_empty() { let start_pos = reader.as_mut().stream_position().await?; - let header = BoxHeader::read(&mut reader) + let mut header = BoxHeader::read(&mut reader) .await .map_eof(|_| Error::Parse(report_attach!(ParseError::TruncatedBox, "while parsing box header")))?; @@ -306,6 +329,12 @@ pub async fn sanitize_async_with_config( } BoxType::MDAT => { + if let Ok(None) = header.box_data_size() { + if let Some(t) = config.cumulative_mdat_box_size { + header.overwrite_size(t); + } + } + let box_size = skip_box(reader.as_mut(), &header).await? + header.encoded_len(); log::info!("mdat @ 0x{start_pos:08x}: {box_size} bytes"); @@ -803,4 +832,22 @@ mod test { assert_matches!(err.into_inner(), ParseError::InvalidBoxLayout); }); } + + #[test] + fn cumulative_mdat_box_size() { + let test_spec = test_mp4().mdat_data_until_eof().build_spec().unwrap(); + let test_1 = test_spec.build(); + let mdat_box_length = test_1.mdat.len as u32; + + let config_bad = Config::builder().build(); + assert_matches!(sanitize_with_config(test_1, config_bad).unwrap_err(), Error::Parse(err) => { + assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(_)); + }); + + let test_2 = test_spec.build(); + let config_good = Config::builder() + .cumulative_mdat_box_size(Some(mdat_box_length)) + .build(); + test_2.sanitize_ok_with_config(config_good); + } } diff --git a/mp4san/src/parse/header.rs b/mp4san/src/parse/header.rs index 763322d..7903900 100644 --- a/mp4san/src/parse/header.rs +++ b/mp4san/src/parse/header.rs @@ -131,6 +131,11 @@ impl BoxHeader { Ok(Self { box_type: name, box_size: size }) } + pub fn overwrite_size(&mut self, actual_box_size: u32) { + assert_eq!(self.box_size, BoxSize::UntilEof); + self.box_size = BoxSize::Size(actual_box_size); + } + pub const fn encoded_len(&self) -> u64 { let mut size = FourCC::size() + size_of::() as u64; if let BoxSize::Ext(_) = self.box_size { diff --git a/test-data b/test-data index b9f4d3f..b2121e3 160000 --- a/test-data +++ b/test-data @@ -1 +1 @@ -Subproject commit b9f4d3f3273819ce50170357098a56113eb088b2 +Subproject commit b2121e38ec04025fdb62306b4863566fa993c8cb