diff --git a/CHANGELOG b/CHANGELOG index 0201481a..8c1c71f9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Inlining inconsistency between public API methods (credit to @zheland) +- Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled. +- Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`. ## [1.0.1] 2024-09-16 diff --git a/README.md b/README.md index d96bf721..7603e81d 100644 --- a/README.md +++ b/README.md @@ -346,7 +346,6 @@ A benchmarks for values with a large integers. ![Simple Int64](https://github.com/Alexhuszagh/lexical-benchmarks/raw/main/results/da4728e/plot/random_simple_int64%20-%20write%20float%20-%20dtoa,fmt,lexical,ryu.png) - **Random** ![Random](https://github.com/Alexhuszagh/lexical-benchmarks/raw/main/results/da4728e/plot/json%20-%20write%20float%20-%20dtoa,fmt,lexical,ryu.png) diff --git a/lexical-core/tests/issue_97_tests.rs b/lexical-core/tests/issue_97_tests.rs new file mode 100644 index 00000000..d5efbe67 --- /dev/null +++ b/lexical-core/tests/issue_97_tests.rs @@ -0,0 +1,62 @@ +#![cfg(all(feature = "parse", feature = "format"))] + +use core::num; + +use lexical_core::{Error, FromLexical, FromLexicalWithOptions, NumberFormatBuilder}; + +#[test] +fn issue_97_test() { + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .build(); + + let fopts = lexical_core::ParseFloatOptions::new(); + let iopts = lexical_core::ParseIntegerOptions::new(); + + assert_eq!( + i64::from_lexical_with_options::(b"_1234", &iopts), + Err(Error::InvalidDigit(0)) + ); + assert_eq!( + i64::from_lexical_with_options::(b"1234_", &iopts), + Err(Error::InvalidDigit(4)) + ); + + assert_eq!( + f64::from_lexical_with_options::(b"_1234", &fopts), + Err(Error::InvalidDigit(0)) + ); + assert_eq!( + f64::from_lexical_with_options::(b"1234_", &fopts), + Err(Error::InvalidDigit(4)) + ); + + assert_eq!( + f64::from_lexical_with_options::(b"_12.34", &fopts), + Err(Error::InvalidDigit(0)) + ); + assert_eq!( + f64::from_lexical_with_options::(b"12.34_", &fopts), + Err(Error::InvalidDigit(5)) + ); + + assert_eq!(f64::from_lexical_with_options::(b"1_2.34", &fopts), Ok(12.34)); +} + +#[test] +fn issue_97_nofmt_test() { + assert_eq!(i64::from_lexical(b"_1234"), Err(Error::InvalidDigit(0))); + assert_eq!(i64::from_lexical(b"1234_"), Err(Error::InvalidDigit(4))); + + assert_eq!(f64::from_lexical(b"_1234"), Err(Error::InvalidDigit(0))); + assert_eq!(f64::from_lexical(b"1234_"), Err(Error::InvalidDigit(4))); + + assert_eq!(f64::from_lexical(b"_12.34"), Err(Error::InvalidDigit(0))); + assert_eq!(f64::from_lexical(b"12.34_"), Err(Error::InvalidDigit(5))); + + assert_eq!(f64::from_lexical(b"_.34"), Err(Error::InvalidDigit(0))); + assert_eq!(f64::from_lexical(b"0_0.34"), Err(Error::InvalidDigit(1))); + + assert_eq!(f64::from_lexical(b".34"), Ok(0.34)); +} diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index e4acbb0d..e667cc0d 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -566,7 +566,8 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( let mut implicit_exponent: i64; let int_end = n_digits as i64; let mut fraction_digits = None; - if byte.first_is_cased(decimal_point) { + let has_decimal = byte.first_is_cased(decimal_point); + if has_decimal { // SAFETY: byte cannot be empty due to first_is unsafe { byte.step_unchecked() }; let before = byte.clone(); @@ -599,19 +600,33 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( } } + // NOTE: Check if we have our exponent **BEFORE** checking if the + // mantissa is empty, so we can ensure + let has_exponent = byte + .first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format")); + + // check to see if we have any inval;id leading zeros n_digits += n_after_dot; if format.required_mantissa_digits() && n_digits == 0 { - return Err(Error::EmptyMantissa(byte.cursor())); + let any_digits = start.clone().integer_iter().peek().is_some(); + // NOTE: This is because numbers like `_12.34` have significant digits, + // they just don't have a valid digit (#97). + if has_decimal || has_exponent || !any_digits { + return Err(Error::EmptyMantissa(byte.cursor())); + } else { + return Err(Error::InvalidDigit(start.cursor())); + } } // EXPONENT // Handle scientific notation. let mut explicit_exponent = 0_i64; - let is_exponent = byte - .first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format")); - if is_exponent { - // SAFETY: byte cannot be empty due to `first_is` from `is_exponent`.` + if has_exponent { + // NOTE: See above for the safety invariant above `required_mantissa_digits`. + // This is separated for correctness concerns, and therefore the two cannot + // be on the same line. + // SAFETY: byte cannot be empty due to `first_is` from `has_exponent`.` unsafe { byte.step_unchecked() }; // Check float format syntax checks.