Skip to content

Commit

Permalink
Refactor: extract file type checking functions into file.rs.
Browse files Browse the repository at this point in the history
  • Loading branch information
mindeng committed Jul 12, 2024
1 parent 0316198 commit 111cc50
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 113 deletions.
11 changes: 10 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# Changelog

## nom-exif v1.2.4

[8c00f1b..HEAD](https://github.com/mindeng/nom-exif/compare/8c00f1b..HEAD)

### Improved

- **Compatibility** has been greatly improved: compatible brands in ftyp box
has been checked, and now it can support various compatible MP4/MOV files.

## nom-exif v1.2.3

[2861cbc..HEAD](https://github.com/mindeng/nom-exif/compare/2861cbc..HEAD)
[2861cbc..8c00f1b](https://github.com/mindeng/nom-exif/compare/2861cbc..8c00f1b)

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "nom-exif"
version = "1.2.3"
version = "1.2.4"
edition = "2021"
license-file = "LICENSE"
description = "Exif/metadata parsing library written in pure Rust, both JPEG/HEIF/HEIC images and MOV/MP4 videos are supported."
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ Exif/metadata parsing library written in pure Rust with [nom](https://github.com
- **Robustness and stability**: Through long-term [Fuzz
testing](https://github.com/rust-fuzz/afl.rs), and tons of crash issues
discovered during testing have been fixed. Thanks to
[@sigaloid](https://github.com/sigaloid) for [raising this
question](https://github.com/mindeng/nom-exif/pull/5).
[@sigaloid](https://github.com/sigaloid) for [pointing this
out](https://github.com/mindeng/nom-exif/pull/5)!


## Usage
Expand Down
31 changes: 0 additions & 31 deletions src/bbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use nom::{
bytes::{complete, streaming},
combinator::{fail, map_res},
error::context,
multi::many0,
number, AsChar, IResult, Needed,
};

Expand Down Expand Up @@ -301,36 +300,6 @@ fn parse_cstr(input: &[u8]) -> IResult<&[u8], String> {
Ok((&remain[1..], s)) // Safe-slice
}

pub fn get_ftyp_and_major_brand(input: &[u8]) -> crate::Result<(BoxHolder, Option<&[u8]>)> {
let (_, bbox) = BoxHolder::parse(input).map_err(|_| "parse ftyp failed")?;

if bbox.box_type() == "ftyp" {
if bbox.body_data().len() < 4 {
return Err(format!(
"parse ftyp failed; body size should greater than 4, got {}",
bbox.body_data().len()
)
.into());
}
let (_, ftyp) = complete::take(4_usize)(bbox.body_data())?;
Ok((bbox, Some(ftyp)))
} else if bbox.box_type() == "wide" {
// MOV files that extracted from HEIC starts with `wide` & `mdat` atoms
Ok((bbox, None))
} else {
Err(format!("parse ftyp failed; first box type is: {}", bbox.box_type()).into())
}
}

pub fn get_compatible_brands(body: &[u8]) -> crate::Result<Vec<&[u8]>> {
let Ok((_, brands)) = many0(complete::take::<usize, &[u8], nom::error::Error<&[u8]>>(
4_usize,
))(body) else {
return Err("get compatible brands failed".into());
};
Ok(brands)
}

#[cfg(test)]
mod tests {
use std::io::Read;
Expand Down
123 changes: 123 additions & 0 deletions src/file.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
use std::fmt::Display;

const HEIF_FTYPS: &[&[u8]] = &[
b"heic", // the usual HEIF images
b"heix", // 10bit images, or anything that uses h265 with range extension
b"hevc", // 'hevx': brands for image sequences
b"heim", // multiview
b"heis", // scalable
b"hevm", // multiview sequence
b"hevs", // scalable sequence
b"mif1", b"MiHE", b"miaf", b"MiHB", // QT file's compatible brands
];

const MP4_BRAND_NAMES: &[&str] = &[
"3g2a", "3gp4", "3gp5", "3gp6", "mp41", "mp42", "iso2", "isom", "vfj1",
];

const QT_BRAND_NAMES: &[&str] = &["qt ", "CAEP"];

#[allow(unused)]
#[derive(Debug, PartialEq, Eq)]
pub enum FileType {
Expand All @@ -9,8 +26,26 @@ pub enum FileType {
MP4,
}

use nom::{bytes::complete, multi::many0};
use FileType::*;

use crate::bbox::BoxHolder;

// Parse the input buffer and detect its file type
impl TryFrom<&[u8]> for FileType {
type Error = crate::Error;

fn try_from(input: &[u8]) -> Result<Self, Self::Error> {
// check qt & mp4 first, because a embedded QT file may not have a ftyp
// box
if let Ok(ft) = check_qt_mp4(input) {
Ok(ft)
} else {
check_heif(input)
}
}
}

impl Display for FileType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand All @@ -21,3 +56,91 @@ impl Display for FileType {
}
}
}

pub fn check_heif(input: &[u8]) -> crate::Result<FileType> {
let (ftyp, Some(major_brand)) = get_ftyp_and_major_brand(input)? else {
return Err("invalid ISOBMFF file; ftyp not found".into());
};

if HEIF_FTYPS.contains(&major_brand) {
Ok(FileType::Heif)
} else {
// Check compatible brands
let compatible_brands = get_compatible_brands(ftyp.body_data())?;
if HEIF_FTYPS.iter().any(|x| compatible_brands.contains(x)) {
Ok(FileType::Heif)
} else {
Err(format!("unsupported HEIF/HEIC file; major brand: {major_brand:?}").into())
}
}
}

pub fn check_qt_mp4(input: &[u8]) -> crate::Result<FileType> {
let (ftyp, Some(major_brand)) = get_ftyp_and_major_brand(input)? else {
// ftyp is None, assume it's a MOV file extracted from HEIC
return Ok(FileType::QuickTime);
};

// Check if it is a QuickTime file
if QT_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
return Ok(FileType::QuickTime);
}

// Check if it is a MP4 file
if MP4_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
return Ok(FileType::MP4);
}

// Check compatible brands
let compatible_brands = get_compatible_brands(ftyp.body_data())?;

if QT_BRAND_NAMES
.iter()
.any(|v| compatible_brands.iter().any(|x| v.as_bytes() == *x))
{
return Ok(FileType::QuickTime);
}

if MP4_BRAND_NAMES
.iter()
.any(|v| compatible_brands.iter().any(|x| v.as_bytes() == *x))
{
return Ok(FileType::MP4);
}

Err(format!(
"unsupported video file; major brand: '{}'",
major_brand.iter().map(|b| *b as char).collect::<String>()
)
.into())
}

pub fn get_ftyp_and_major_brand(input: &[u8]) -> crate::Result<(BoxHolder, Option<&[u8]>)> {
let (_, bbox) = BoxHolder::parse(input).map_err(|_| "parse ftyp failed")?;

if bbox.box_type() == "ftyp" {
if bbox.body_data().len() < 4 {
return Err(format!(
"parse ftyp failed; body size should greater than 4, got {}",
bbox.body_data().len()
)
.into());
}
let (_, ftyp) = complete::take(4_usize)(bbox.body_data())?;
Ok((bbox, Some(ftyp)))
} else if bbox.box_type() == "wide" {
// MOV files that extracted from HEIC starts with `wide` & `mdat` atoms
Ok((bbox, None))
} else {
Err(format!("parse ftyp failed; first box type is: {}", bbox.box_type()).into())
}
}

pub fn get_compatible_brands(body: &[u8]) -> crate::Result<Vec<&[u8]>> {
let Ok((_, brands)) = many0(complete::take::<usize, &[u8], nom::error::Error<&[u8]>>(
4_usize,
))(body) else {
return Err("get compatible brands failed".into());
};
Ok(brands)
}
40 changes: 15 additions & 25 deletions src/heif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use nom::combinator::fail;
use nom::Needed;
use nom::{number::complete::be_u32, IResult};

use crate::bbox::{get_compatible_brands, get_ftyp_and_major_brand};
use crate::exif::{parse_exif, Exif};
use crate::file::check_heif;
use crate::{
bbox::{travel_while, BoxHolder, MetaBox, ParseBox},
exif::check_exif_header,
Expand Down Expand Up @@ -62,19 +62,7 @@ pub fn parse_heif_exif<R: Read + Seek>(mut reader: R) -> crate::Result<Option<Ex
Err("file is empty")?;
}

let (ftyp, Some(major_brand)) =
get_ftyp_and_major_brand(&buf).map_err(|e| format!("unsupported HEIF/HEIC file; {}", e))?
else {
return Err("unsupported HEIF/HEIC file; ftyp not found".into());
};

if !HEIF_FTYPS.contains(&major_brand) {
// Check compatible brands
let compatible_brands = get_compatible_brands(ftyp.body_data())?;
if !HEIF_FTYPS.iter().any(|x| compatible_brands.contains(x)) {
return Err(format!("unsupported HEIF/HEIC file; major brand: {major_brand:?}").into());
}
}
check_heif(&buf)?;

let (_, exif_data) = loop {
to_read = match extract_exif_data(&buf) {
Expand Down Expand Up @@ -104,16 +92,6 @@ pub fn parse_heif_exif<R: Read + Seek>(mut reader: R) -> crate::Result<Option<Ex
exif_data.map(parse_exif).transpose()
}

const HEIF_FTYPS: [&[u8]; 7] = [
b"heic", // the usual HEIF images
b"heix", // 10bit images, or anything that uses h265 with range extension
b"hevc", // 'hevx': brands for image sequences
b"heim", // multiview
b"heis", // scalable
b"hevm", // multiview sequence
b"hevs", // scalable sequence
];

/// Extract Exif TIFF data from the bytes of a HEIF/HEIC file.
fn extract_exif_data(input: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
let remain = input;
Expand Down Expand Up @@ -141,7 +119,7 @@ fn extract_exif_data(input: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
#[cfg(test)]
mod tests {
use super::*;
use crate::testkit::*;
use crate::{file::FileType, testkit::*};
use test_case::test_case;

#[test_case("exif.heic")]
Expand Down Expand Up @@ -209,6 +187,18 @@ mod tests {
parse_heif_exif(reader).expect_err("should be ParseFailed error");
}

#[test_case("compatible-brands.heic", Some(FileType::Heif))]
#[test_case("compatible-brands-fail.heic", None)]
fn heic_compatible_brands(path: &str, ft: Option<FileType>) {
let buf = read_sample(path).unwrap();
let got = check_heif(&buf);
if let Some(ft) = ft {
assert_eq!(ft, got.unwrap());
} else {
got.unwrap_err();
}
}

#[test_case("no-exif.heic", 0x24-10)]
#[test_case("exif.heic", 0xa3a-10)]
fn heic_exif_data(path: &str, exif_size: usize) {
Expand Down
Loading

0 comments on commit 111cc50

Please sign in to comment.