diff --git a/lexical-parse-integer/docs/Algorithm.md b/lexical-parse-integer/docs/Algorithm.md index d8c83dc2..94067bd4 100644 --- a/lexical-parse-integer/docs/Algorithm.md +++ b/lexical-parse-integer/docs/Algorithm.md @@ -17,185 +17,3 @@ Therefore, only 1 optimization for parsing multiple digits was used for each typ Finally, for 32-bit and 64-bit signed integers, we use no multiple-digit optimizations, since they provide **no** benefit for 32-bit integers in any cases, and only ~23% benefit for large 64-bit integers. However, for simple integers, due to the increased branching, they induce a performance penalty of ~50%. In addition, rather than have separate branches for positive and negative numbers, both are parsed as unsigned integers, and then converted to the signed variant, after overflowing checking. - -**Overflow Checking** - -Rather than do checked multiplication and additions in each loop, which increases the amount of branching, we can check after if numeric overflow has occurred by checking the number of parsed digits, and the resulting value. - -Given the following unoptimized code: - -```rust,ignore -pub fn parse(bytes: &[u8]) -> Result { - let mut value: u64 = 0; - let mut iter = bytes.iter(); - while let Some(&c) = iter.next() { - let digit = match (c as char).to_digit(10) { - Some(v) => v, - None => return Err(()), - }; - value = match value.checked_mul(10) { - Some(v) => v, - None => return Err(()), - }; - value = match value.checked_add(digit as u64) { - Some(v) => v, - None => return Err(()), - }; - } - Ok(value) -} -``` - -This translates to the following assembly: - -```asm -example::parse: - xor r11d, r11d - mov r10d, 10 - xor eax, eax - mov r8d, 1 -.LBB0_1: - mov r9, rax - cmp rsi, r11 - je .LBB0_2 - movzx ecx, byte ptr [rdi + r11] - add ecx, -48 - cmp ecx, 10 - jae .LBB0_6 - mov rax, r9 - mul r10 - jo .LBB0_6 - mov ecx, ecx - add r11, 1 - add rax, rcx - jae .LBB0_1 -.LBB0_6: - mov rax, r8 - mov rdx, r9 - ret -.LBB0_2: - xor r8d, r8d - mov rax, r8 - mov rdx, r9 - ret -``` - -We optimize it to the following code: - -```rust,ignore -pub fn parse(bytes: &[u8]) -> Result { - let mut value: u64 = 0; - let mut iter = bytes.iter(); - while let Some(&c) = iter.next() { - let digit = match (c as char).to_digit(10) { - Some(v) => v, - None => return Err(()), - }; - value = value.wrapping_mul(10); - value = value.wrapping_mul(digit as u64); - } - Ok(value) -} -``` - -Which produces the following assembly: - -```asm -example::parse: - xor eax, eax - xor ecx, ecx -.LBB0_1: - cmp rsi, rcx - je .LBB0_4 - movzx edx, byte ptr [rdi + rcx] - add edx, -48 - add rcx, 1 - cmp edx, 10 - jb .LBB0_1 - mov eax, 1 -.LBB0_4: - xor edx, edx - ret -``` - -This is much more efficient, however, there is one major limitation: we cannot know if numerical overflow has occurred, and must do it after the fact. We have numerical overflow on two cases: we parsed more digits than we theoretically could, or we parsed the same number as the maximum, but the number wrapped. Since the number wrapping will always produce a smaller value than the minimum value for that number of digits, this is a simple comparison. - -For unsigned integers, this is quite easy: we merely need to know the maximum number of digits that can be parsed without guaranteeing numerical overflow, and the number of digits that were parsed. - -```rust,ignore -// For example, max could be 20 for u64. -let count = ...; // Actual number of digits parsed. -let max = ...; // Maximum number of digits that could be parsed. -// Calculate the minimum value from processing `max` digits. -let min_value = 10u64.pow(max as u32 - 1); -// If we've processed more than the max digits, or if the value wrapped, -// we have overflow. -let is_overflow = count > max || (count == max && value < min_value); -``` - -For signed integers, it's slightly more complicated, but still quite easy: - -```rust,ignore -// For example, max could be 18 for i64. -let count = ...; // Actual number of digits parsed. -let max = ...; // Maximum number of digits that could be parsed. -let is_negative = ...; // If the value is less than 0. -// Calculate the minimum value from processing `max` digits. -let min_value = 10u64.pow(max as u32 - 1); -let max_value = i64::MAX as u64 + 1; -let is_overflow = count > max - || (count == max && ( - value < min_value - || value > max_value - || (!is_negative && value == max_value) - )); -``` - -All of the powers and constant generation is resolved at compile-time, producing efficient routines. For example, for `u64`, the following rust code: - -```rust,ignore -pub fn is_overflow(value: u64, count: usize) -> bool { - let max: usize = 20; - let min_value = 10u64.pow(max as u32 - 1); - count > max || (count == max && value < min_value) -} -``` - -... produces the following assembly: - -```asm -example::is_overflow: - cmp rsi, 20 - seta cl - sete dl - movabs rax, -8446744073709551616 - cmp rdi, rax - setb al - and al, dl - or al, cl - ret -``` - -Not bad at all. - -**Compact** - -For our compact implementation, prioritizing code size at the cost of performance, we use a naive algorithm that parses 1 digit at a time, without any additional optimizations. This algorithm is trivial to verify, and is effectively analogous to the following code: - -```rust,ignore -let mut value = 0; -while let Some(&c) = iter.next() { - let digit = match (c as char).to_digit(radix) { - Some(v) => v, - None => return Err(...), - }; - value = match value.checked_mul(radix) { - Some(v) => v, - None => return Err(...), - }; - value = match value.checked_add(digit) { - Some(v) => v, - None => return Err(...), - }; -} -``` diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index a32f2836..ab6340b8 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -11,11 +11,10 @@ #![cfg(not(feature = "compact"))] #![doc(hidden)] -use crate::shared::is_overflow; use lexical_util::digit::char_to_digit_const; use lexical_util::format::NumberFormat; use lexical_util::iterator::{AsBytes, BytesIter}; -use lexical_util::num::{as_cast, Integer, UnsignedInteger}; +use lexical_util::num::{as_cast, Integer}; use lexical_util::result::Result; use lexical_util::step::min_step; @@ -39,7 +38,9 @@ macro_rules! parse_8digits { $value:ident, $iter:ident, $format:ident, - $t:ident + $t:ident, + $overflow:ident, + $op:ident ) => {{ let radix: $t = as_cast(NumberFormat::<{ $format }>::MANTISSA_RADIX); let radix2: $t = radix.wrapping_mul(radix); @@ -48,8 +49,13 @@ macro_rules! parse_8digits { // Try our fast, 8-digit at a time optimizations. while let Some(val8) = try_parse_8digits::<$t, _, $format>(&mut $iter) { - $value = $value.wrapping_mul(radix8); - $value = $value.wrapping_add(val8); + let optvalue = $value.checked_mul(radix8).and_then(|x| x.$op(val8)); + if let Some(unwrapped) = optvalue { + $value = unwrapped; + } else { + $overflow = true; + break; + } } }}; } @@ -65,7 +71,9 @@ macro_rules! parse_4digits { $value:ident, $iter:ident, $format:ident, - $t:ident + $t:ident, + $overflow:ident, + $op:ident ) => {{ let radix: $t = as_cast(NumberFormat::<{ $format }>::MANTISSA_RADIX); let radix2: $t = radix.wrapping_mul(radix); @@ -73,8 +81,13 @@ macro_rules! parse_4digits { // Try our fast, 4-digit at a time optimizations. while let Some(val4) = try_parse_4digits::<$t, _, $format>(&mut $iter) { - $value = $value.wrapping_mul(radix4); - $value = $value.wrapping_add(val4); + let optvalue = $value.checked_mul(radix4).and_then(|x| x.$op(val4)); + if let Some(unwrapped) = optvalue { + $value = unwrapped; + } else { + $overflow = true; + break; + } } }}; } @@ -90,8 +103,9 @@ macro_rules! parse_digits { $is_negative:ident, $start_index:ident, $t:ident, - $u:ident, - $invalid_digit:ident + $invalid_digit:ident, + $overflow:ident, + $op:ident ) => {{ // WARNING: // Performance is heavily dependent on the amount of branching. @@ -120,42 +134,37 @@ macro_rules! parse_digits { // Optimizations for reading 8-digits at a time. // Makes no sense to do 8 digits at a time for 32-bit values, // since it can only hold 8 digits for base 10. - if <$t>::BITS == 128 && can_try_parse_multidigits!($iter, radix) { - parse_8digits!($value, $iter, $format, $u); - } - if <$t>::BITS == 64 && can_try_parse_multidigits!($iter, radix) && !<$t>::IS_SIGNED { - parse_8digits!($value, $iter, $format, $u); + // NOTE: These values were determined as optimization + if (<$t>::BITS == 128 || <$t>::BITS == 64) && can_try_parse_multidigits!($iter, radix) && $iter.length() >= 8 { + parse_8digits!($value, $iter, $format, $t, $overflow, $op); } // Optimizations for reading 4-digits at a time. // 36^4 is larger than a 16-bit integer. Likewise, 10^4 is almost // the limit of u16, so it's not worth it. - if <$t>::BITS == 32 && can_try_parse_multidigits!($iter, radix) && !<$t>::IS_SIGNED { - parse_4digits!($value, $iter, $format, $u); + if <$t>::BITS == 32 && can_try_parse_multidigits!($iter, radix) && $iter.length() >= 4 && !$overflow { + parse_4digits!($value, $iter, $format, $t, $overflow, $op); } - - parse_1digit!($value, $iter, $format, $is_negative, $start_index, $t, $u, $invalid_digit) + parse_1digit!($value, $iter, $format, $is_negative, $start_index, $t, $invalid_digit, $overflow, $op); }}; } /// Algorithm for the complete parser. #[inline(always)] -pub fn algorithm_complete(bytes: &[u8]) -> Result +pub fn algorithm_complete(bytes: &[u8]) -> Result where T: Integer, - Unsigned: UnsignedInteger, { - algorithm!(bytes, FORMAT, T, Unsigned, parse_digits, invalid_digit_complete, into_ok_complete) + algorithm!(bytes, FORMAT, T, parse_digits, invalid_digit_complete, into_ok_complete) } /// Algorithm for the partial parser. #[inline(always)] -pub fn algorithm_partial(bytes: &[u8]) -> Result<(T, usize)> +pub fn algorithm_partial(bytes: &[u8]) -> Result<(T, usize)> where T: Integer, - Unsigned: UnsignedInteger, { - algorithm!(bytes, FORMAT, T, Unsigned, parse_digits, invalid_digit_partial, into_ok_partial) + algorithm!(bytes, FORMAT, T, parse_digits, invalid_digit_partial, into_ok_partial) } // DIGIT OPTIMIZATIONS diff --git a/lexical-parse-integer/src/api.rs b/lexical-parse-integer/src/api.rs index 69c608ee..776177e6 100644 --- a/lexical-parse-integer/src/api.rs +++ b/lexical-parse-integer/src/api.rs @@ -20,7 +20,7 @@ macro_rules! integer_from_lexical { #[cfg_attr(not(feature = "compact"), inline)] fn from_lexical(bytes: &[u8]) -> lexical_util::result::Result { - Self::parse_complete::<$unsigned, STANDARD>(bytes) + Self::parse_complete::(bytes) } $(#[$meta:meta])? @@ -29,7 +29,7 @@ macro_rules! integer_from_lexical { bytes: &[u8], ) -> lexical_util::result::Result<(Self, usize)> { - Self::parse_partial::<$unsigned, STANDARD>(bytes) + Self::parse_partial::(bytes) } } @@ -47,7 +47,7 @@ macro_rules! integer_from_lexical { if !format.is_valid() { return Err(format.error()); } - Self::parse_complete::<$unsigned, FORMAT>(bytes) + Self::parse_complete::(bytes) } $(#[$meta:meta])? @@ -61,7 +61,7 @@ macro_rules! integer_from_lexical { if !format.is_valid() { return Err(format.error()); } - Self::parse_partial::<$unsigned, FORMAT>(bytes) + Self::parse_partial::(bytes) } } )*) diff --git a/lexical-parse-integer/src/compact.rs b/lexical-parse-integer/src/compact.rs index 28b72596..da18d086 100644 --- a/lexical-parse-integer/src/compact.rs +++ b/lexical-parse-integer/src/compact.rs @@ -5,28 +5,25 @@ #![cfg(feature = "compact")] #![doc(hidden)] -use crate::shared::is_overflow; use lexical_util::digit::char_to_digit_const; use lexical_util::format::NumberFormat; use lexical_util::iterator::{AsBytes, BytesIter}; -use lexical_util::num::{as_cast, Integer, UnsignedInteger}; +use lexical_util::num::{as_cast, Integer}; use lexical_util::result::Result; use lexical_util::step::min_step; /// Algorithm for the complete parser. -pub fn algorithm_complete(bytes: &[u8]) -> Result +pub fn algorithm_complete(bytes: &[u8]) -> Result where T: Integer, - Unsigned: UnsignedInteger, { - algorithm!(bytes, FORMAT, T, Unsigned, parse_1digit, invalid_digit_complete, into_ok_complete) + algorithm!(bytes, FORMAT, T, parse_1digit, invalid_digit_complete, into_ok_complete) } /// Algorithm for the partial parser. -pub fn algorithm_partial(bytes: &[u8]) -> Result<(T, usize)> +pub fn algorithm_partial(bytes: &[u8]) -> Result<(T, usize)> where T: Integer, - Unsigned: UnsignedInteger, { - algorithm!(bytes, FORMAT, T, Unsigned, parse_1digit, invalid_digit_partial, into_ok_partial) + algorithm!(bytes, FORMAT, T, parse_1digit, invalid_digit_partial, into_ok_partial) } diff --git a/lexical-parse-integer/src/parse.rs b/lexical-parse-integer/src/parse.rs index af131f9a..e50205c0 100644 --- a/lexical-parse-integer/src/parse.rs +++ b/lexical-parse-integer/src/parse.rs @@ -8,23 +8,21 @@ use crate::algorithm::{algorithm_complete, algorithm_partial}; #[cfg(feature = "compact")] use crate::compact::{algorithm_complete, algorithm_partial}; -use lexical_util::num::{Integer, UnsignedInteger}; +use lexical_util::num::Integer; use lexical_util::result::Result; /// Parse integer trait, implemented in terms of the optimized back-end. pub trait ParseInteger: Integer { /// Forward complete parser parameters to the backend. #[cfg_attr(not(feature = "compact"), inline(always))] - fn parse_complete(bytes: &[u8]) -> Result { - algorithm_complete::<_, Unsigned, { FORMAT }>(bytes) + fn parse_complete(bytes: &[u8]) -> Result { + algorithm_complete::<_, { FORMAT }>(bytes) } /// Forward partial parser parameters to the backend. #[cfg_attr(not(feature = "compact"), inline(always))] - fn parse_partial( - bytes: &[u8], - ) -> Result<(Self, usize)> { - algorithm_partial::<_, Unsigned, { FORMAT }>(bytes) + fn parse_partial(bytes: &[u8]) -> Result<(Self, usize)> { + algorithm_partial::<_, { FORMAT }>(bytes) } } diff --git a/lexical-parse-integer/src/shared.rs b/lexical-parse-integer/src/shared.rs index 5e5c2906..744bb3ec 100644 --- a/lexical-parse-integer/src/shared.rs +++ b/lexical-parse-integer/src/shared.rs @@ -24,10 +24,6 @@ #![doc(hidden)] -use lexical_util::format::NumberFormat; -use lexical_util::num::{as_cast, Integer, UnsignedInteger}; -use lexical_util::step::max_step; - /// Return an error, returning the index and the error. macro_rules! into_error { ($code:ident, $index:expr) => { @@ -51,15 +47,7 @@ macro_rules! into_ok_partial { /// Return an error for a complete parser upon an invalid digit. macro_rules! invalid_digit_complete { - ( - $value:ident, - $iter:ident, - $format:ident, - $is_negative:ident, - $start_index:ident, - $t:ident, - $u:ident - ) => {{ + ($value:ident, $iter:ident) => {{ // Don't do any overflow checking here: we don't need it. into_error!(InvalidDigit, $iter.cursor() - 1) }}; @@ -68,29 +56,10 @@ macro_rules! invalid_digit_complete { /// Return a value for a partial parser upon an invalid digit. /// This checks for numeric overflow, and returns the appropriate error. macro_rules! invalid_digit_partial { - ( - $value:ident, - $iter:ident, - $format:ident, - $is_negative:ident, - $start_index:ident, - $t:ident, - $u:ident - ) => {{ - let radix = NumberFormat::<{ $format }>::MANTISSA_RADIX; - let count = $iter.current_count() - $start_index - 1; - if is_overflow::<$t, $u, $format>($value, count, $is_negative) { - let min = min_step(radix, <$t as Integer>::BITS, <$t>::IS_SIGNED); - if <$t>::IS_SIGNED && $is_negative { - into_error!(Underflow, (count - 1).min(min + 1)) - } else { - into_error!(Overflow, (count - 1).min(min + 1)) - } - } else if <$t>::IS_SIGNED && $is_negative { - into_ok_partial!($value.wrapping_neg(), $iter.cursor() - 1) - } else { - into_ok_partial!($value, $iter.cursor() - 1) - } + ($value:ident, $iter:ident) => {{ + let cursor = $iter.cursor(); + // NOTE: The value is already positive/negative + into_ok_partial!($value, cursor - 1) }}; } @@ -140,43 +109,6 @@ macro_rules! parse_sign { }; } -/// Determine if the value has overflowed. -#[cfg_attr(not(feature = "compact"), inline(always))] -pub(super) fn is_overflow( - value: U, - count: usize, - is_negative: bool, -) -> bool -where - T: Integer, - U: UnsignedInteger, -{ - let format = NumberFormat::<{ FORMAT }> {}; - - let max = max_step(format.radix(), T::BITS, T::IS_SIGNED); - let radix: U = as_cast(format.radix()); - let min_value: U = radix.pow(max as u32 - 1); - if T::IS_SIGNED { - // Signed type: have to deal with 2's complement. - let max_value: U = as_cast::(T::MAX) + U::ONE; - if count > max - || (count == max - && (value < min_value || value > max_value || (!is_negative && value == max_value))) - { - // Must have overflowed, or wrapped. - // 1. Guaranteed overflow due to too many digits. - // 2. Guaranteed overflow due to wrap. - // 3. Guaranteed overflow since it's too large for the signed type. - // 4. Guaranteed overflow due to 2's complement. - return true; - } - } else if count > max || (count == max && value < min_value) { - // Must have overflowed: too many digits or wrapped. - return true; - } - false -} - /// Parse the value for the given type. macro_rules! parse_value { ( @@ -185,40 +117,47 @@ macro_rules! parse_value { $format:ident, $start_index:ident, $t:ident, - $u:ident, $parser:ident, $invalid_digit:ident, - $into_ok:ident + $into_ok:ident, + $op:ident ) => {{ // Use a simple optimization: parse as an unsigned integer, using // unsigned arithmetic , avoiding any branching in the initial stage. // We can then validate the input based on the signed integer limits, // and cast the value over, which is fast. Leads to substantial // improvements due to decreased branching for all but `i8`. - let mut value = <$u>::ZERO; + let mut value = <$t>::ZERO; + let mut overflow = false; let format = NumberFormat::<{ $format }> {}; - $parser!(value, $iter, $format, $is_negative, $start_index, $t, $u, $invalid_digit); + $parser!( + value, + $iter, + $format, + $is_negative, + $start_index, + $t, + $invalid_digit, + overflow, + $op + ); let count = $iter.current_count() - $start_index; + let length = $iter.length(); - if is_overflow::<$t, $u, $format>(value, count, $is_negative) { + if overflow { + _ = length; let min = min_step(format.radix(), <$t as Integer>::BITS, <$t>::IS_SIGNED); if <$t>::IS_SIGNED && $is_negative { into_error!(Underflow, (count - 1).min(min + 1)) } else { into_error!(Overflow, (count - 1).min(min + 1)) } - } else if <$t>::IS_SIGNED && $is_negative { - // Need to cast it to the signed type first, so we don't - // get an invalid representation for i128 if it's widened. - $into_ok!(as_cast::<$t, _>(value.wrapping_neg()), $iter.length()) } else { - $into_ok!(value, $iter.length()) + $into_ok!(value, length) } }}; } -/// Parse a single digit at a time. -/// This has no multiple-digit optimizations. #[rustfmt::skip] macro_rules! parse_1digit { ( @@ -228,8 +167,10 @@ macro_rules! parse_1digit { $is_negative:ident, $start_index:ident, $t:ident, - $u:ident, - $invalid_digit:ident + $invalid_digit:ident, + $overflow:ident, + // This is the checked_add or checked_sub + $op:ident ) => {{ let format = NumberFormat::<{ $format }>; let radix = NumberFormat::<{ $format }>::MANTISSA_RADIX; @@ -262,19 +203,17 @@ macro_rules! parse_1digit { } } // Might have handled our base-prefix here. - return $invalid_digit!( - $value, - $iter, - $format, - $is_negative, - $start_index, - $t, - $u - ); + return $invalid_digit!($value, $iter); }, }; - $value = $value.wrapping_mul(as_cast(radix)); - $value = $value.wrapping_add(as_cast(digit)); + let optvalue = $value.checked_mul(as_cast(radix)) + .and_then(|x| x.$op(as_cast(digit))); + if let Some(unwrapped) = optvalue { + $value = unwrapped; + } else { + $overflow = true; + break; + } } }}; } @@ -289,7 +228,6 @@ macro_rules! algorithm { $bytes:ident, $format:ident, $t:ident, - $u:ident, $parser:ident, $invalid_digit:ident, $into_ok:ident @@ -380,16 +318,30 @@ macro_rules! algorithm { // and even if parsing a 64-bit integer is marginally faster, it // culminates in **way** slower performance overall for simple // integers, and no improvement for large integers. - parse_value!( - iter, - is_negative, - $format, - start_index, - $t, - $u, - $parser, - $invalid_digit, - $into_ok - ) + if is_negative { + parse_value!( + iter, + is_negative, + $format, + start_index, + $t, + $parser, + $invalid_digit, + $into_ok, + checked_sub + ) + } else { + parse_value!( + iter, + is_negative, + $format, + start_index, + $t, + $parser, + $invalid_digit, + $into_ok, + checked_add + ) + } }}; } diff --git a/lexical-parse-integer/tests/algorithm_tests.rs b/lexical-parse-integer/tests/algorithm_tests.rs index e8b7d9d3..4d2ab310 100644 --- a/lexical-parse-integer/tests/algorithm_tests.rs +++ b/lexical-parse-integer/tests/algorithm_tests.rs @@ -121,14 +121,14 @@ fn test_try_parse_8digits() { #[cfg(feature = "power-of-two")] macro_rules! parse_radix { ($i:literal) => { - |bytes: &[u8]| algorithm::algorithm_partial::(bytes) + |bytes: &[u8]| algorithm::algorithm_partial::(bytes) }; } #[test] fn algorithm_test() { - let parse_u32 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); - let parse_i32 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); + let parse_u32 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); + let parse_i32 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); assert_eq!(parse_u32(b"12345"), Ok((12345, 5))); assert_eq!(parse_u32(b"+12345"), Ok((12345, 6))); @@ -160,8 +160,8 @@ fn algorithm_test() { #[test] fn algorithm_128_test() { - let parse_u128 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); - let parse_i128 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); + let parse_u128 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); + let parse_i128 = |bytes: &[u8]| algorithm::algorithm_partial::(bytes); assert_eq!(parse_u128(b"12345"), Ok((12345, 5))); assert_eq!(parse_u128(b"+12345"), Ok((12345, 6))); diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index 329b244f..295af355 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -96,7 +96,7 @@ fn i64_decimal_test() { // Add tests discovered via fuzzing. This won't necessarily be the // proper index, since we use multi-digit parsing. - assert!(i64::from_lexical(b"406260572150672006000066000000060060007667760000000000000000000+00000006766767766666767665670000000000000000000000666").err().unwrap().is_invalid_digit()); + assert!(i64::from_lexical(b"406260572150672006000066000000060060007667760000000000000000000+00000006766767766666767665670000000000000000000000666").err().unwrap().is_overflow()); assert!(i64::from_lexical(b"406260572150672006000066000000060060007667760000000000000000000") .err() .unwrap() diff --git a/lexical-parse-integer/tests/compact_tests.rs b/lexical-parse-integer/tests/compact_tests.rs index 84aa7871..b199c92e 100644 --- a/lexical-parse-integer/tests/compact_tests.rs +++ b/lexical-parse-integer/tests/compact_tests.rs @@ -5,8 +5,8 @@ use lexical_util::format::STANDARD; #[test] fn algorithm_test() { - let parse_u32 = |digits: &[u8]| compact::algorithm_partial::(digits); - let parse_i32 = |digits: &[u8]| compact::algorithm_partial::(digits); + let parse_u32 = |digits: &[u8]| compact::algorithm_partial::(digits); + let parse_i32 = |digits: &[u8]| compact::algorithm_partial::(digits); assert_eq!(parse_u32(b"12345"), Ok((12345, 5))); assert_eq!(parse_u32(b"+12345"), Ok((12345, 6))); diff --git a/lexical-parse-integer/tests/issue_91_tests.rs b/lexical-parse-integer/tests/issue_91_tests.rs new file mode 100644 index 00000000..f31b1e8b --- /dev/null +++ b/lexical-parse-integer/tests/issue_91_tests.rs @@ -0,0 +1,27 @@ +use lexical_parse_integer::FromLexical; + +#[test] +fn issue_91_test() { + // Derived from: + // https://github.com/Alexhuszagh/rust-lexical/issues/91 + assert!(u8::from_lexical(b"354").is_err()); + assert!(u8::from_lexical(b"355").is_err()); + assert!(u8::from_lexical(b"356").is_err()); + assert!(u8::from_lexical(b"357").is_err()); + assert!(u8::from_lexical(b"358").is_err()); + assert!(u8::from_lexical(b"510").is_err()); + assert!(u8::from_lexical(b"511").is_err()); + assert!(u8::from_lexical(b"512").is_err()); + assert!(u8::from_lexical(b"513").is_err()); + assert!(u8::from_lexical(b"514").is_err()); + assert!(u8::from_lexical(b"612").is_err()); + assert!(u8::from_lexical(b"999").is_err()); + assert!(u8::from_lexical(b"1000").is_err()); + + let n = u32::MAX as u64 + 1_000_000_000; + assert!(u32::from_lexical((n - 1).to_string().as_bytes()).is_err()); + assert!(u32::from_lexical(n.to_string().as_bytes()).is_err()); + assert!(u32::from_lexical((n + 1).to_string().as_bytes()).is_err()); + + assert!(u8::from_lexical(b"357").is_err()); +} diff --git a/lexical-util/src/num.rs b/lexical-util/src/num.rs index 2e94588d..91b4a140 100644 --- a/lexical-util/src/num.rs +++ b/lexical-util/src/num.rs @@ -383,10 +383,13 @@ pub trait Integer: fn leading_zeros(self) -> u32; fn trailing_zeros(self) -> u32; fn pow(self, exp: u32) -> Self; + fn checked_pow(self, exp: u32) -> Option; + fn overflowing_pow(self, exp: u32) -> (Self, bool); fn checked_add(self, i: Self) -> Option; fn checked_sub(self, i: Self) -> Option; fn checked_mul(self, i: Self) -> Option; fn overflowing_add(self, i: Self) -> (Self, bool); + fn overflowing_sub(self, i: Self) -> (Self, bool); fn overflowing_mul(self, i: Self) -> (Self, bool); fn wrapping_add(self, i: Self) -> Self; fn wrapping_sub(self, i: Self) -> Self; @@ -484,6 +487,11 @@ macro_rules! integer_impl { $t::overflowing_add(self, i) } + #[inline] + fn overflowing_sub(self, i: Self) -> (Self, bool) { + $t::overflowing_sub(self, i) + } + #[inline] fn overflowing_mul(self, i: Self) -> (Self, bool) { $t::overflowing_mul(self, i) @@ -514,6 +522,16 @@ macro_rules! integer_impl { Self::pow(self, exp) } + #[inline] + fn checked_pow(self, exp: u32) -> Option { + Self::checked_pow(self, exp) + } + + #[inline] + fn overflowing_pow(self, exp: u32) -> (Self, bool) { + Self::overflowing_pow(self, exp) + } + #[inline] fn saturating_add(self, i: Self) -> Self { $t::saturating_add(self, i) @@ -532,7 +550,7 @@ macro_rules! integer_impl { )*) } -integer_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize } +integer_impl! { u8 u16 u32 u64 u128 i8 i16 i32 i64 i128 usize isize } // SIGNED INTEGER // -------------- diff --git a/scripts/asm.sh b/scripts/asm.sh index 50870693..2bbdf886 100755 --- a/scripts/asm.sh +++ b/scripts/asm.sh @@ -4,7 +4,7 @@ set -e # Change to our project home. -script_dir=`dirname "${BASH_SOURCE[0]}"` +script_dir=$(dirname "${BASH_SOURCE[0]}") cd "$script_dir"/../lexical-asm export RUSTFLAGS="--emit asm -C llvm-args=-x86-asm-syntax=intel"