Skip to content

Commit

Permalink
Simplify core parser routines
Browse files Browse the repository at this point in the history
Removal and consolidation of functions in preference to composition
  • Loading branch information
nickbabcock committed Aug 31, 2024
1 parent df476bc commit 89bbadd
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 80 deletions.
70 changes: 28 additions & 42 deletions src/core_parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::errors::ParseError;
use crate::parsing_utils::{decode_str, decode_utf16, decode_windows1252, le_i32};
use crate::parsing_utils::{decode_str, decode_utf16, decode_windows1252};

#[derive(Debug, Clone, PartialEq)]
pub struct CoreParser<'a> {
Expand All @@ -18,63 +18,49 @@ impl<'a> CoreParser<'a> {
self.col
}

/// Used for skipping some amount of data
pub fn advance(&mut self, ind: usize) {
self.col += ind as i32;
self.data = &self.data[ind..];
}

/// Returns a slice of the replay after ensuring there is enough space for the requested slice
pub fn view_data(&self, size: usize) -> Result<&'a [u8], ParseError> {
if size > self.data.len() {
Err(ParseError::InsufficientData(
size as i32,
self.data.len() as i32,
))
} else {
Ok(&self.data[..size])
}
self.data
.get(..size)
.ok_or_else(|| ParseError::InsufficientData(size as i32, self.data.len() as i32))
}

pub fn sub_parser(&mut self, size: usize) -> Result<CoreParser<'a>, ParseError> {
pub fn scope(&mut self, size: usize) -> Result<CoreParser<'a>, ParseError> {
let col = self.col;
let subbed = self.take_data(size)?;
Ok(CoreParser { data: subbed, col })
}

pub fn take_bytes<const N: usize>(&mut self, size: usize) -> Result<[u8; N], ParseError> {
let head = self.take_data(size)?;
let result = head
.first_chunk::<N>()
.ok_or_else(|| ParseError::InsufficientData(size as i32, self.data.len() as i32))?;
Ok(*result)
}

pub fn take_data(&mut self, size: usize) -> Result<&'a [u8], ParseError> {
let res = self.view_data(size)?;
self.advance(size);
Ok(res)
let (head, tail) = self
.data
.split_at_checked(size)
.ok_or_else(|| ParseError::InsufficientData(size as i32, self.data.len() as i32))?;
self.col += size as i32;
self.data = tail;
Ok(head)
}

/// Take the next `size` of bytes and interpret them in an infallible fashion
#[inline]
pub fn take<F, T>(&mut self, size: usize, mut f: F) -> Result<T, ParseError>
where
F: FnMut(&'a [u8]) -> T,
{
let res = f(self.view_data(size)?);
self.advance(size);
Ok(res)
pub fn take<const N: usize>(&mut self) -> Result<[u8; N], ParseError> {
let (head, tail) = self
.data
.split_first_chunk::<N>()
.ok_or_else(|| ParseError::InsufficientData(N as i32, self.data.len() as i32))?;
self.col += N as i32;
self.data = tail;
Ok(*head)
}

pub fn take_i32(&mut self, section: &'static str) -> Result<i32, ParseError> {
self.take(4, le_i32)
self.take::<4>()
.map(i32::from_le_bytes)
.map_err(|e| ParseError::ParseError(section, self.bytes_read(), Box::new(e)))
}

pub fn take_u32(&mut self, section: &'static str) -> Result<u32, ParseError> {
self.take(4, le_i32)
.map(|x| x as u32)
self.take::<4>()
.map(u32::from_le_bytes)
.map_err(|e| ParseError::ParseError(section, self.bytes_read(), Box::new(e)))
}

Expand All @@ -98,7 +84,7 @@ impl<'a> CoreParser<'a> {
where
F: FnMut(&mut Self) -> Result<T, ParseError>,
{
let size = self.take(4, le_i32)?;
let size = self.take::<4>().map(i32::from_le_bytes)?;
CoreParser::repeat(size as usize, || f(self))
}

Expand All @@ -108,15 +94,15 @@ impl<'a> CoreParser<'a> {

/// Parses UTF-8 string from replay
pub fn parse_str(&mut self) -> Result<&'a str, ParseError> {
let size = self.take(4, le_i32)? as usize;
self.take_data(size).and_then(decode_str)
let size = self.take::<4>().map(i32::from_le_bytes)?;
self.take_data(size as usize).and_then(decode_str)
}

/// Parses either UTF-16 or Windows-1252 encoded strings
pub fn parse_text(&mut self) -> Result<String, ParseError> {
// The number of bytes that the string is composed of. If negative, the string is UTF-16,
// else the string is windows 1252 encoded.
let characters = self.take(4, le_i32)?;
let characters = self.take::<4>().map(i32::from_le_bytes)?;

// size.abs() will panic at min_value, so we eschew it for manual checking
if characters == 0 {
Expand Down
20 changes: 11 additions & 9 deletions src/header.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::core_parser::CoreParser;
use crate::errors::ParseError;
use crate::models::HeaderProp;
use crate::parsing_utils::le_u64;

/// Intermediate parsing structure for the header
#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -93,7 +92,7 @@ fn parse_rdict(
}

let kind = rlp.parse_str()?;
let size = rlp.take(8, le_u64)? as usize;
let size = u64::from_le_bytes(rlp.take::<8>()?) as usize;
let val = match kind {
"BoolProperty" => match mode {
// The size SHOULD be zero, but we're ignoring it.
Expand All @@ -112,31 +111,34 @@ fn parse_rdict(
})
}
ParserMode::Quirks => rlp
.sub_parser(size)
.scope(size)
.and_then(|mut x| x.parse_text())
.map(|kind| HeaderProp::Byte { kind, value: None }),
},
"ArrayProperty" => rlp
.sub_parser(size)
.scope(size)
.and_then(|mut x| array_property(&mut x, mode)),
"FloatProperty" => rlp
.take_bytes::<4>(size)
.scope(size)
.and_then(|mut x| x.take::<4>())
.map(f32::from_le_bytes)
.map(HeaderProp::Float),
"IntProperty" => rlp
.take_bytes::<4>(size)
.scope(size)
.and_then(|mut x| x.take::<4>())
.map(i32::from_le_bytes)
.map(HeaderProp::Int),
"QWordProperty" => rlp
.take_bytes::<8>(size)
.scope(size)
.and_then(|mut x| x.take::<8>())
.map(u64::from_le_bytes)
.map(HeaderProp::QWord),
"NameProperty" => rlp
.sub_parser(size)
.scope(size)
.and_then(|mut x| x.parse_text())
.map(HeaderProp::Name),
"StrProperty" => rlp
.sub_parser(size)
.scope(size)
.and_then(|mut x| x.parse_text())
.map(HeaderProp::Str),
x => Err(ParseError::UnexpectedProperty(String::from(x))),
Expand Down
25 changes: 12 additions & 13 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ use crate::errors::{NetworkError, ParseError};
use crate::header::{self, Header};
use crate::models::*;
use crate::network;
use crate::parsing_utils::{le_f32, le_i32};

/// Determines under what circumstances the parser should perform the crc check for replay
/// corruption. Since the crc check is the most time consuming part when parsing the header,
Expand Down Expand Up @@ -471,7 +470,7 @@ impl<'a> Parser<'a> {

let network_size = self.core.take_i32("network size")?;

let network_data = self.core.take(network_size as usize, |d| d).map_err(|e| {
let network_data = self.core.take_data(network_size as usize).map_err(|e| {
ParseError::ParseError("network data", self.core.bytes_read(), Box::new(e))
})?;

Expand Down Expand Up @@ -522,25 +521,25 @@ impl<'a> Parser<'a> {
self.core.list_of(|s| {
Ok(TickMark {
description: s.parse_text()?,
frame: s.take(4, le_i32)?,
frame: s.take::<4>().map(i32::from_le_bytes)?,
})
})
}

fn parse_keyframe(&mut self) -> Result<Vec<KeyFrame>, ParseError> {
self.core.list_of(|s| {
Ok(KeyFrame {
time: s.take(4, le_f32)?,
frame: s.take(4, le_i32)?,
position: s.take(4, le_i32)?,
time: s.take::<4>().map(f32::from_le_bytes)?,
frame: s.take::<4>().map(i32::from_le_bytes)?,
position: s.take::<4>().map(i32::from_le_bytes)?,
})
})
}

fn parse_debuginfo(&mut self) -> Result<Vec<DebugInfo>, ParseError> {
self.core.list_of(|s| {
Ok(DebugInfo {
frame: s.take(4, le_i32)?,
frame: s.take::<4>().map(i32::from_le_bytes)?,
user: s.parse_text()?,
text: s.parse_text()?,
})
Expand All @@ -551,21 +550,21 @@ impl<'a> Parser<'a> {
self.core.list_of(|s| {
Ok(ClassIndex {
class: s.parse_str().map(String::from)?,
index: s.take(4, le_i32)?,
index: s.take::<4>().map(i32::from_le_bytes)?,
})
})
}

fn parse_classcache(&mut self) -> Result<Vec<ClassNetCache>, ParseError> {
self.core.list_of(|x| {
Ok(ClassNetCache {
object_ind: x.take(4, le_i32)?,
parent_id: x.take(4, le_i32)?,
cache_id: x.take(4, le_i32)?,
object_ind: x.take::<4>().map(i32::from_le_bytes)?,
parent_id: x.take::<4>().map(i32::from_le_bytes)?,
cache_id: x.take::<4>().map(i32::from_le_bytes)?,
properties: x.list_of(|s| {
Ok(CacheProp {
object_ind: s.take(4, le_i32)?,
stream_id: s.take(4, le_i32)?,
object_ind: s.take::<4>().map(i32::from_le_bytes)?,
stream_id: s.take::<4>().map(i32::from_le_bytes)?,
})
})?,
})
Expand Down
16 changes: 0 additions & 16 deletions src/parsing_utils.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,5 @@
use crate::errors::ParseError;
use encoding_rs::{UTF_16LE, WINDOWS_1252};
use std::convert::TryInto;

#[inline]
pub fn le_i32(d: &[u8]) -> i32 {
i32::from_le_bytes(d[..4].try_into().unwrap())
}

#[inline]
pub fn le_f32(d: &[u8]) -> f32 {
f32::from_le_bytes(d[..4].try_into().unwrap())
}

#[inline]
pub fn le_u64(d: &[u8]) -> u64 {
u64::from_le_bytes(d[..8].try_into().unwrap())
}

/// Reads a string of a given size from the data. The size includes a null
/// character as the last character, so we drop it in the returned string
Expand Down

0 comments on commit 89bbadd

Please sign in to comment.