Milan/sanitizing cumulative box sized files (#26)

This commit is contained in:
Milan Stevanovic 2025-01-28 17:43:28 -08:00 committed by GitHub
parent d51a90c262
commit 28087b861d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 76 additions and 18 deletions

View File

@ -6,7 +6,7 @@ use std::path::PathBuf;
use anyhow::Context;
use clap::{Parser as _, ValueEnum};
use mp4san::SanitizedMetadata;
use mp4san::{Config, SanitizedMetadata};
#[derive(clap::Parser)]
struct Args {
@ -20,6 +20,9 @@ struct Args {
#[clap(long, short = 'o')]
output: Option<PathBuf>,
#[clap(long, short = 'c')]
cumulative_mdat_box_size: Option<u32>,
/// Path to the file to test sanitization on.
file: PathBuf,
}
@ -51,23 +54,26 @@ fn main() -> Result<(), anyhow::Error> {
let mut infile = File::open(&args.file).context("Error opening file")?;
match format {
Format::Mp4 => match mp4san::sanitize(&mut infile).context("Error parsing mp4 file")? {
SanitizedMetadata { metadata: Some(metadata), data } => {
if let Some(output_path) = args.output {
let mut outfile = File::create(output_path).context("Error opening output file")?;
outfile.write(&metadata).context("Error writing output")?;
infile
.seek(io::SeekFrom::Start(data.offset))
.context("Error seeking input")?;
io::copy(&mut infile.take(data.len), &mut outfile).context("Error copying input to output")?;
Format::Mp4 => {
let config = Config { cumulative_mdat_box_size: args.cumulative_mdat_box_size, ..Default::default() };
match mp4san::sanitize_with_config(&mut infile, config).context("Error parsing mp4 file")? {
SanitizedMetadata { metadata: Some(metadata), data } => {
if let Some(output_path) = args.output {
let mut outfile = File::create(output_path).context("Error opening output file")?;
outfile.write(&metadata).context("Error writing output")?;
infile
.seek(io::SeekFrom::Start(data.offset))
.context("Error seeking input")?;
io::copy(&mut infile.take(data.len), &mut outfile).context("Error copying input to output")?;
}
}
SanitizedMetadata { metadata: None, .. } => {
if let Some(output_path) = args.output {
fs::copy(&args.file, output_path).context("Error writing output")?;
}
}
}
SanitizedMetadata { metadata: None, .. } => {
if let Some(output_path) = args.output {
fs::copy(&args.file, output_path).context("Error writing output")?;
}
}
},
}
Format::Webp => {
webpsan::sanitize(infile).context("Error parsing webp file")?;
if let Some(output_path) = args.output {

View File

@ -90,6 +90,29 @@ pub struct Config {
/// The default is 1 GiB.
#[builder(default = "1024 * 1024 * 1024")]
pub max_metadata_size: u64,
/// The cumulative MDAT box size
///
/// The value is tightly associated with a specific
/// use case scenario in which the transcoder internally
/// generates a sequence of MDAT boxes, but delivers
/// them compounded as a single monolythic MDAT box.
///
/// In order to avoid constantly updating the single
/// MDAT box size (which may be impossible when the
/// payload is continually encrypted and written to
/// the file), the transcoder instead does the following:
/// a) writes the MDAT box size to be equal to the
/// fixed zero value
/// b) keeps accumulating the box size and passes
/// it as config argument to mp4sanitizer
///
/// IMPORTANT: given the special circumstances of
/// the use case scenario of transcoding on mobile
/// devices, the MDAT box size is expected to not
/// exceed the 32-bit max bytes limit. Hence the
/// cumulative_mdat_box_size is a 32-bit value
#[builder(default = None)]
pub cumulative_mdat_box_size: Option<u32>,
}
/// Sanitized metadata returned by the sanitizer.
@ -260,7 +283,7 @@ pub async fn sanitize_async_with_config<R: AsyncRead + AsyncSkip>(
while !reader.as_mut().fill_buf().await?.is_empty() {
let start_pos = reader.as_mut().stream_position().await?;
let header = BoxHeader::read(&mut reader)
let mut header = BoxHeader::read(&mut reader)
.await
.map_eof(|_| Error::Parse(report_attach!(ParseError::TruncatedBox, "while parsing box header")))?;
@ -306,6 +329,12 @@ pub async fn sanitize_async_with_config<R: AsyncRead + AsyncSkip>(
}
BoxType::MDAT => {
if let Ok(None) = header.box_data_size() {
if let Some(t) = config.cumulative_mdat_box_size {
header.overwrite_size(t);
}
}
let box_size = skip_box(reader.as_mut(), &header).await? + header.encoded_len();
log::info!("mdat @ 0x{start_pos:08x}: {box_size} bytes");
@ -803,4 +832,22 @@ mod test {
assert_matches!(err.into_inner(), ParseError::InvalidBoxLayout);
});
}
#[test]
fn cumulative_mdat_box_size() {
let test_spec = test_mp4().mdat_data_until_eof().build_spec().unwrap();
let test_1 = test_spec.build();
let mdat_box_length = test_1.mdat.len as u32;
let config_bad = Config::builder().build();
assert_matches!(sanitize_with_config(test_1, config_bad).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(_));
});
let test_2 = test_spec.build();
let config_good = Config::builder()
.cumulative_mdat_box_size(Some(mdat_box_length))
.build();
test_2.sanitize_ok_with_config(config_good);
}
}

View File

@ -131,6 +131,11 @@ impl BoxHeader {
Ok(Self { box_type: name, box_size: size })
}
pub fn overwrite_size(&mut self, actual_box_size: u32) {
assert_eq!(self.box_size, BoxSize::UntilEof);
self.box_size = BoxSize::Size(actual_box_size);
}
pub const fn encoded_len(&self) -> u64 {
let mut size = FourCC::size() + size_of::<u32>() as u64;
if let BoxSize::Ext(_) = self.box_size {

@ -1 +1 @@
Subproject commit b9f4d3f3273819ce50170357098a56113eb088b2
Subproject commit b2121e38ec04025fdb62306b4863566fa993c8cb