Ensure leading invalid digits for floats correctly error.

This corrects the behavior from reporting an EmptyMantissa to correctly report an InvalidDigit. Closes #97
Alexhuszagh · Sep 20, 2024 · 7317a23 · 7317a23
1 parent f07e3db
commit 7317a23
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 7 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 - Inlining inconsistency between public API methods (credit to @zheland)
+- Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled.
+- Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`.
 
 ## [1.0.1] 2024-09-16
 

diff --git a/README.md b/README.md
@@ -346,7 +346,6 @@ A benchmarks for values with a large integers.
 
 ![Simple Int64](https://github.com/Alexhuszagh/lexical-benchmarks/raw/main/results/da4728e/plot/random_simple_int64%20-%20write%20float%20-%20dtoa,fmt,lexical,ryu.png)
 
-
 **Random**
 
 ![Random](https://github.com/Alexhuszagh/lexical-benchmarks/raw/main/results/da4728e/plot/json%20-%20write%20float%20-%20dtoa,fmt,lexical,ryu.png)

diff --git a/lexical-core/tests/issue_97_tests.rs b/lexical-core/tests/issue_97_tests.rs
@@ -0,0 +1,62 @@
+#![cfg(all(feature = "parse", feature = "format"))]
+
+use core::num;
+
+use lexical_core::{Error, FromLexical, FromLexicalWithOptions, NumberFormatBuilder};
+
+#[test]
+fn issue_97_test() {
+    const FMT: u128 = NumberFormatBuilder::new()
+        .digit_separator(num::NonZeroU8::new(b'_'))
+        .internal_digit_separator(true)
+        .build();
+
+    let fopts = lexical_core::ParseFloatOptions::new();
+    let iopts = lexical_core::ParseIntegerOptions::new();
+
+    assert_eq!(
+        i64::from_lexical_with_options::<FMT>(b"_1234", &iopts),
+        Err(Error::InvalidDigit(0))
+    );
+    assert_eq!(
+        i64::from_lexical_with_options::<FMT>(b"1234_", &iopts),
+        Err(Error::InvalidDigit(4))
+    );
+
+    assert_eq!(
+        f64::from_lexical_with_options::<FMT>(b"_1234", &fopts),
+        Err(Error::InvalidDigit(0))
+    );
+    assert_eq!(
+        f64::from_lexical_with_options::<FMT>(b"1234_", &fopts),
+        Err(Error::InvalidDigit(4))
+    );
+
+    assert_eq!(
+        f64::from_lexical_with_options::<FMT>(b"_12.34", &fopts),
+        Err(Error::InvalidDigit(0))
+    );
+    assert_eq!(
+        f64::from_lexical_with_options::<FMT>(b"12.34_", &fopts),
+        Err(Error::InvalidDigit(5))
+    );
+
+    assert_eq!(f64::from_lexical_with_options::<FMT>(b"1_2.34", &fopts), Ok(12.34));
+}
+
+#[test]
+fn issue_97_nofmt_test() {
+    assert_eq!(i64::from_lexical(b"_1234"), Err(Error::InvalidDigit(0)));
+    assert_eq!(i64::from_lexical(b"1234_"), Err(Error::InvalidDigit(4)));
+
+    assert_eq!(f64::from_lexical(b"_1234"), Err(Error::InvalidDigit(0)));
+    assert_eq!(f64::from_lexical(b"1234_"), Err(Error::InvalidDigit(4)));
+
+    assert_eq!(f64::from_lexical(b"_12.34"), Err(Error::InvalidDigit(0)));
+    assert_eq!(f64::from_lexical(b"12.34_"), Err(Error::InvalidDigit(5)));
+
+    assert_eq!(f64::from_lexical(b"_.34"), Err(Error::InvalidDigit(0)));
+    assert_eq!(f64::from_lexical(b"0_0.34"), Err(Error::InvalidDigit(1)));
+
+    assert_eq!(f64::from_lexical(b".34"), Ok(0.34));
+}
diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs
@@ -566,7 +566,8 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
     let mut implicit_exponent: i64;
     let int_end = n_digits as i64;
     let mut fraction_digits = None;
-    if byte.first_is_cased(decimal_point) {
+    let has_decimal = byte.first_is_cased(decimal_point);
+    if has_decimal {
         // SAFETY: byte cannot be empty due to first_is
         unsafe { byte.step_unchecked() };
         let before = byte.clone();
@@ -599,19 +600,33 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
         }
     }
 
+    // NOTE: Check if we have our exponent **BEFORE** checking if the
+    // mantissa is empty, so we can ensure
+    let has_exponent = byte
+        .first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format"));
+
+    // check to see if we have any inval;id leading zeros
     n_digits += n_after_dot;
     if format.required_mantissa_digits() && n_digits == 0 {
-        return Err(Error::EmptyMantissa(byte.cursor()));
+        let any_digits = start.clone().integer_iter().peek().is_some();
+        // NOTE: This is because numbers like `_12.34` have significant digits,
+        // they just don't have a valid digit (#97).
+        if has_decimal || has_exponent || !any_digits {
+            return Err(Error::EmptyMantissa(byte.cursor()));
+        } else {
+            return Err(Error::InvalidDigit(start.cursor()));
+        }
     }
 
     // EXPONENT
 
     // Handle scientific notation.
     let mut explicit_exponent = 0_i64;
-    let is_exponent = byte
-        .first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format"));
-    if is_exponent {
-        // SAFETY: byte cannot be empty due to `first_is` from `is_exponent`.`
+    if has_exponent {
+        // NOTE: See above for the safety invariant above `required_mantissa_digits`.
+        // This is separated for correctness concerns, and therefore the two cannot
+        // be on the same line.
+        // SAFETY: byte cannot be empty due to `first_is` from `has_exponent`.`
         unsafe { byte.step_unchecked() };
 
         // Check float format syntax checks.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -346,7 +346,6 @@ A benchmarks for values with a large integers.

		![Simple Int64](https://github.com/Alexhuszagh/lexical-benchmarks/raw/main/results/da4728e/plot/random_simple_int64%20-%20write%20float%20-%20dtoa,fmt,lexical,ryu.png)


		Random

		![Random](https://github.com/Alexhuszagh/lexical-benchmarks/raw/main/results/da4728e/plot/json%20-%20write%20float%20-%20dtoa,fmt,lexical,ryu.png)
Expand Down