From 89bbadd1310ead01881f84af488fe827aeab2e7f Mon Sep 17 00:00:00 2001 From: Nick Babcock Date: Sat, 31 Aug 2024 07:52:24 -0500 Subject: [PATCH] Simplify core parser routines Removal and consolidation of functions in preference to composition --- src/core_parser.rs | 70 ++++++++++++++++++-------------------------- src/header.rs | 20 +++++++------ src/parser.rs | 25 ++++++++-------- src/parsing_utils.rs | 16 ---------- 4 files changed, 51 insertions(+), 80 deletions(-) diff --git a/src/core_parser.rs b/src/core_parser.rs index a40530d..83e863c 100644 --- a/src/core_parser.rs +++ b/src/core_parser.rs @@ -1,5 +1,5 @@ use crate::errors::ParseError; -use crate::parsing_utils::{decode_str, decode_utf16, decode_windows1252, le_i32}; +use crate::parsing_utils::{decode_str, decode_utf16, decode_windows1252}; #[derive(Debug, Clone, PartialEq)] pub struct CoreParser<'a> { @@ -18,63 +18,49 @@ impl<'a> CoreParser<'a> { self.col } - /// Used for skipping some amount of data - pub fn advance(&mut self, ind: usize) { - self.col += ind as i32; - self.data = &self.data[ind..]; - } - /// Returns a slice of the replay after ensuring there is enough space for the requested slice pub fn view_data(&self, size: usize) -> Result<&'a [u8], ParseError> { - if size > self.data.len() { - Err(ParseError::InsufficientData( - size as i32, - self.data.len() as i32, - )) - } else { - Ok(&self.data[..size]) - } + self.data + .get(..size) + .ok_or_else(|| ParseError::InsufficientData(size as i32, self.data.len() as i32)) } - pub fn sub_parser(&mut self, size: usize) -> Result, ParseError> { + pub fn scope(&mut self, size: usize) -> Result, ParseError> { let col = self.col; let subbed = self.take_data(size)?; Ok(CoreParser { data: subbed, col }) } - pub fn take_bytes(&mut self, size: usize) -> Result<[u8; N], ParseError> { - let head = self.take_data(size)?; - let result = head - .first_chunk::() - .ok_or_else(|| ParseError::InsufficientData(size as i32, self.data.len() as i32))?; - Ok(*result) - } - pub fn take_data(&mut self, size: usize) -> Result<&'a [u8], ParseError> { - let res = self.view_data(size)?; - self.advance(size); - Ok(res) + let (head, tail) = self + .data + .split_at_checked(size) + .ok_or_else(|| ParseError::InsufficientData(size as i32, self.data.len() as i32))?; + self.col += size as i32; + self.data = tail; + Ok(head) } - /// Take the next `size` of bytes and interpret them in an infallible fashion #[inline] - pub fn take(&mut self, size: usize, mut f: F) -> Result - where - F: FnMut(&'a [u8]) -> T, - { - let res = f(self.view_data(size)?); - self.advance(size); - Ok(res) + pub fn take(&mut self) -> Result<[u8; N], ParseError> { + let (head, tail) = self + .data + .split_first_chunk::() + .ok_or_else(|| ParseError::InsufficientData(N as i32, self.data.len() as i32))?; + self.col += N as i32; + self.data = tail; + Ok(*head) } pub fn take_i32(&mut self, section: &'static str) -> Result { - self.take(4, le_i32) + self.take::<4>() + .map(i32::from_le_bytes) .map_err(|e| ParseError::ParseError(section, self.bytes_read(), Box::new(e))) } pub fn take_u32(&mut self, section: &'static str) -> Result { - self.take(4, le_i32) - .map(|x| x as u32) + self.take::<4>() + .map(u32::from_le_bytes) .map_err(|e| ParseError::ParseError(section, self.bytes_read(), Box::new(e))) } @@ -98,7 +84,7 @@ impl<'a> CoreParser<'a> { where F: FnMut(&mut Self) -> Result, { - let size = self.take(4, le_i32)?; + let size = self.take::<4>().map(i32::from_le_bytes)?; CoreParser::repeat(size as usize, || f(self)) } @@ -108,15 +94,15 @@ impl<'a> CoreParser<'a> { /// Parses UTF-8 string from replay pub fn parse_str(&mut self) -> Result<&'a str, ParseError> { - let size = self.take(4, le_i32)? as usize; - self.take_data(size).and_then(decode_str) + let size = self.take::<4>().map(i32::from_le_bytes)?; + self.take_data(size as usize).and_then(decode_str) } /// Parses either UTF-16 or Windows-1252 encoded strings pub fn parse_text(&mut self) -> Result { // The number of bytes that the string is composed of. If negative, the string is UTF-16, // else the string is windows 1252 encoded. - let characters = self.take(4, le_i32)?; + let characters = self.take::<4>().map(i32::from_le_bytes)?; // size.abs() will panic at min_value, so we eschew it for manual checking if characters == 0 { diff --git a/src/header.rs b/src/header.rs index 806b0b2..e85dcbc 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,7 +1,6 @@ use crate::core_parser::CoreParser; use crate::errors::ParseError; use crate::models::HeaderProp; -use crate::parsing_utils::le_u64; /// Intermediate parsing structure for the header #[derive(Debug, PartialEq)] @@ -93,7 +92,7 @@ fn parse_rdict( } let kind = rlp.parse_str()?; - let size = rlp.take(8, le_u64)? as usize; + let size = u64::from_le_bytes(rlp.take::<8>()?) as usize; let val = match kind { "BoolProperty" => match mode { // The size SHOULD be zero, but we're ignoring it. @@ -112,31 +111,34 @@ fn parse_rdict( }) } ParserMode::Quirks => rlp - .sub_parser(size) + .scope(size) .and_then(|mut x| x.parse_text()) .map(|kind| HeaderProp::Byte { kind, value: None }), }, "ArrayProperty" => rlp - .sub_parser(size) + .scope(size) .and_then(|mut x| array_property(&mut x, mode)), "FloatProperty" => rlp - .take_bytes::<4>(size) + .scope(size) + .and_then(|mut x| x.take::<4>()) .map(f32::from_le_bytes) .map(HeaderProp::Float), "IntProperty" => rlp - .take_bytes::<4>(size) + .scope(size) + .and_then(|mut x| x.take::<4>()) .map(i32::from_le_bytes) .map(HeaderProp::Int), "QWordProperty" => rlp - .take_bytes::<8>(size) + .scope(size) + .and_then(|mut x| x.take::<8>()) .map(u64::from_le_bytes) .map(HeaderProp::QWord), "NameProperty" => rlp - .sub_parser(size) + .scope(size) .and_then(|mut x| x.parse_text()) .map(HeaderProp::Name), "StrProperty" => rlp - .sub_parser(size) + .scope(size) .and_then(|mut x| x.parse_text()) .map(HeaderProp::Str), x => Err(ParseError::UnexpectedProperty(String::from(x))), diff --git a/src/parser.rs b/src/parser.rs index 3fd98c6..37e73af 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -221,7 +221,6 @@ use crate::errors::{NetworkError, ParseError}; use crate::header::{self, Header}; use crate::models::*; use crate::network; -use crate::parsing_utils::{le_f32, le_i32}; /// Determines under what circumstances the parser should perform the crc check for replay /// corruption. Since the crc check is the most time consuming part when parsing the header, @@ -471,7 +470,7 @@ impl<'a> Parser<'a> { let network_size = self.core.take_i32("network size")?; - let network_data = self.core.take(network_size as usize, |d| d).map_err(|e| { + let network_data = self.core.take_data(network_size as usize).map_err(|e| { ParseError::ParseError("network data", self.core.bytes_read(), Box::new(e)) })?; @@ -522,7 +521,7 @@ impl<'a> Parser<'a> { self.core.list_of(|s| { Ok(TickMark { description: s.parse_text()?, - frame: s.take(4, le_i32)?, + frame: s.take::<4>().map(i32::from_le_bytes)?, }) }) } @@ -530,9 +529,9 @@ impl<'a> Parser<'a> { fn parse_keyframe(&mut self) -> Result, ParseError> { self.core.list_of(|s| { Ok(KeyFrame { - time: s.take(4, le_f32)?, - frame: s.take(4, le_i32)?, - position: s.take(4, le_i32)?, + time: s.take::<4>().map(f32::from_le_bytes)?, + frame: s.take::<4>().map(i32::from_le_bytes)?, + position: s.take::<4>().map(i32::from_le_bytes)?, }) }) } @@ -540,7 +539,7 @@ impl<'a> Parser<'a> { fn parse_debuginfo(&mut self) -> Result, ParseError> { self.core.list_of(|s| { Ok(DebugInfo { - frame: s.take(4, le_i32)?, + frame: s.take::<4>().map(i32::from_le_bytes)?, user: s.parse_text()?, text: s.parse_text()?, }) @@ -551,7 +550,7 @@ impl<'a> Parser<'a> { self.core.list_of(|s| { Ok(ClassIndex { class: s.parse_str().map(String::from)?, - index: s.take(4, le_i32)?, + index: s.take::<4>().map(i32::from_le_bytes)?, }) }) } @@ -559,13 +558,13 @@ impl<'a> Parser<'a> { fn parse_classcache(&mut self) -> Result, ParseError> { self.core.list_of(|x| { Ok(ClassNetCache { - object_ind: x.take(4, le_i32)?, - parent_id: x.take(4, le_i32)?, - cache_id: x.take(4, le_i32)?, + object_ind: x.take::<4>().map(i32::from_le_bytes)?, + parent_id: x.take::<4>().map(i32::from_le_bytes)?, + cache_id: x.take::<4>().map(i32::from_le_bytes)?, properties: x.list_of(|s| { Ok(CacheProp { - object_ind: s.take(4, le_i32)?, - stream_id: s.take(4, le_i32)?, + object_ind: s.take::<4>().map(i32::from_le_bytes)?, + stream_id: s.take::<4>().map(i32::from_le_bytes)?, }) })?, }) diff --git a/src/parsing_utils.rs b/src/parsing_utils.rs index c70a169..312cf9f 100644 --- a/src/parsing_utils.rs +++ b/src/parsing_utils.rs @@ -1,21 +1,5 @@ use crate::errors::ParseError; use encoding_rs::{UTF_16LE, WINDOWS_1252}; -use std::convert::TryInto; - -#[inline] -pub fn le_i32(d: &[u8]) -> i32 { - i32::from_le_bytes(d[..4].try_into().unwrap()) -} - -#[inline] -pub fn le_f32(d: &[u8]) -> f32 { - f32::from_le_bytes(d[..4].try_into().unwrap()) -} - -#[inline] -pub fn le_u64(d: &[u8]) -> u64 { - u64::from_le_bytes(d[..8].try_into().unwrap()) -} /// Reads a string of a given size from the data. The size includes a null /// character as the last character, so we drop it in the returned string