Skip to content

Commit

Permalink
Make multi-digit optimizations optional.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexhuszagh committed Sep 12, 2024
1 parent a46edd5 commit 866a420
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 36 deletions.
30 changes: 17 additions & 13 deletions lexical-parse-integer/src/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#![doc(hidden)]

use crate::Options;
use lexical_util::buffer::Buffer;
use lexical_util::digit::char_to_digit_const;
use lexical_util::error::Error;
Expand Down Expand Up @@ -366,11 +367,9 @@ macro_rules! parse_1digit_checked {
/// optimizations. Otherwise, if the type size is large and we're not manually
/// skipping manual optimizations, then we do this here.
macro_rules! parse_digits_unchecked {
($value:ident, $iter:ident, $add_op:ident, $start_index:ident, $invalid_digit:ident, $is_end:expr) => {{
// TODO: Disable multi-digit optimizations by default, make configurable
const DISABLE_MULTIDIGIT: bool = false;
($value:ident, $iter:ident, $add_op:ident, $start_index:ident, $invalid_digit:ident, $no_multi_digit:expr, $is_end:expr) => {{
let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter);
let use_multi = can_multi && !DISABLE_MULTIDIGIT;
let use_multi = can_multi && !$no_multi_digit;

// these cannot overflow. also, we use at most 3 for a 128-bit float and 1 for a 64-bit float
// NOTE: Miri will complain about this if we use radices >= 16 but since they won't go
Expand Down Expand Up @@ -408,6 +407,7 @@ macro_rules! parse_digits_checked {
$start_index:ident,
$invalid_digit:ident,
$overflow:ident,
$no_multi_digit:expr,
$overflow_digits:expr
) => {{
// Can use the unchecked for the max_digits here
Expand All @@ -419,6 +419,7 @@ macro_rules! parse_digits_checked {
$add_op_uc,
$start_index,
$invalid_digit,
$no_multi_digit,
false
);
}
Expand All @@ -436,7 +437,7 @@ macro_rules! parse_digits_checked {
/// * `into_ok` - Behavior when returning a valid value.
#[rustfmt::skip]
macro_rules! algorithm {
($bytes:ident, $into_ok:ident, $invalid_digit:ident) => {{
($bytes:ident, $into_ok:ident, $invalid_digit:ident, $no_multi_digit:expr) => {{
// WARNING:
// --------
// None of this code can be changed for optimization reasons.
Expand Down Expand Up @@ -532,13 +533,13 @@ macro_rules! algorithm {
// integers, and no improvement for large integers.
let mut value = T::ZERO;
if cannot_overflow && is_negative {
parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, true);
parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, $no_multi_digit, true);
} if cannot_overflow {
parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, true);
parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, $no_multi_digit, true);
} else if is_negative {
parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, overflow_digits);
parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, $no_multi_digit, overflow_digits);
} else {
parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, overflow_digits);
parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits);
}

$into_ok!(value, iter.length())
Expand All @@ -547,18 +548,21 @@ macro_rules! algorithm {

/// Algorithm for the complete parser.
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn algorithm_complete<T, const FORMAT: u128>(bytes: &[u8]) -> Result<T>
pub fn algorithm_complete<T, const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<T>
where
T: Integer,
{
algorithm!(bytes, into_ok_complete, invalid_digit_complete)
algorithm!(bytes, into_ok_complete, invalid_digit_complete, options.get_no_multi_digit())
}

/// Algorithm for the partial parser.
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn algorithm_partial<T, const FORMAT: u128>(bytes: &[u8]) -> Result<(T, usize)>
pub fn algorithm_partial<T, const FORMAT: u128>(
bytes: &[u8],
options: &Options,
) -> Result<(T, usize)>
where
T: Integer,
{
algorithm!(bytes, into_ok_partial, invalid_digit_partial)
algorithm!(bytes, into_ok_partial, invalid_digit_partial, options.get_no_multi_digit())
}
14 changes: 7 additions & 7 deletions lexical-parse-integer/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#![doc(hidden)]

use crate::options::Options;
use crate::options::{Options, STANDARD as DEFAULT_OPTIONS};
use crate::parse::ParseInteger;
use lexical_util::format::{NumberFormat, STANDARD};
use lexical_util::{from_lexical, from_lexical_with_options};
Expand All @@ -20,7 +20,7 @@ macro_rules! integer_from_lexical {
#[cfg_attr(not(feature = "compact"), inline)]
fn from_lexical(bytes: &[u8]) -> lexical_util::result::Result<Self>
{
Self::parse_complete::<STANDARD>(bytes)
Self::parse_complete::<STANDARD>(bytes, &DEFAULT_OPTIONS)
}

$(#[$meta:meta])?
Expand All @@ -29,7 +29,7 @@ macro_rules! integer_from_lexical {
bytes: &[u8],
) -> lexical_util::result::Result<(Self, usize)>
{
Self::parse_partial::<STANDARD>(bytes)
Self::parse_partial::<STANDARD>(bytes, &DEFAULT_OPTIONS)
}
}

Expand All @@ -40,28 +40,28 @@ macro_rules! integer_from_lexical {
#[cfg_attr(not(feature = "compact"), inline)]
fn from_lexical_with_options<const FORMAT: u128>(
bytes: &[u8],
_: &Self::Options,
options: &Self::Options,
) -> lexical_util::result::Result<Self>
{
let format = NumberFormat::<{ FORMAT }> {};
if !format.is_valid() {
return Err(format.error());
}
Self::parse_complete::<FORMAT>(bytes)
Self::parse_complete::<FORMAT>(bytes, options)
}

$(#[$meta:meta])?
#[cfg_attr(not(feature = "compact"), inline)]
fn from_lexical_partial_with_options<const FORMAT: u128>(
bytes: &[u8],
_: &Self::Options,
options: &Self::Options,
) -> lexical_util::result::Result<(Self, usize)>
{
let format = NumberFormat::<{ FORMAT }> {};
if !format.is_valid() {
return Err(format.error());
}
Self::parse_partial::<FORMAT>(bytes)
Self::parse_partial::<FORMAT>(bytes, options)
}
}
)*)
Expand Down
94 changes: 87 additions & 7 deletions lexical-parse-integer/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,41 @@ use static_assertions::const_assert;

/// Builder for `Options`.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct OptionsBuilder {}
pub struct OptionsBuilder {
/// Disable multi-digit optimizations.
///
/// Using multi-digit optimizations allows parsing many digits
/// from longer input strings at once which can dramatically
/// improve performance (>70%) for long strings, but the
/// increased branching can decrease performance for simple
/// strings by 5-20%. Choose based on your inputs.
no_multi_digit: bool,
}

impl OptionsBuilder {
/// Create new options builder with default options.
#[inline(always)]
pub const fn new() -> Self {
Self {}
Self {
no_multi_digit: true,
}
}

// GETTERS

/// Get if we disable the use of multi-digit optimizations.
#[inline(always)]
pub const fn get_no_multi_digit(&self) -> bool {
self.no_multi_digit
}

// SETTERS

/// Set if we disable the use of multi-digit optimizations.
#[inline(always)]
pub const fn no_multi_digit(mut self, no_multi_digit: bool) -> Self {
self.no_multi_digit = no_multi_digit;
self
}

// BUILDERS
Expand All @@ -30,7 +58,9 @@ impl OptionsBuilder {
/// Safe as long as`is_valid` is true.
#[inline(always)]
pub const unsafe fn build_unchecked(&self) -> Options {
Options {}
Options {
no_multi_digit: self.no_multi_digit,
}
}

/// Build the Options struct.
Expand Down Expand Up @@ -62,19 +92,49 @@ impl Default for OptionsBuilder {
/// # }
/// ```
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Options {}
pub struct Options {
/// Disable multi-digit optimizations.
///
/// Using multi-digit optimizations allows parsing many digits
/// from longer input strings at once which can dramatically
/// improve performance (>70%) for long strings, but the
/// increased branching can decrease performance for simple
/// strings by 5-20%. Choose based on your inputs.
no_multi_digit: bool,
}

impl Options {
/// Create options with default values.
#[inline(always)]
pub const fn new() -> Self {
Self {}
// SAFETY: always safe since it uses the default arguments.
unsafe { Self::builder().build_unchecked() }
}

// GETTERS

/// Check if the options state is valid.
#[inline(always)]
pub const fn is_valid(&self) -> bool {
true
self.rebuild().is_valid()
}

/// Get if we disable the use of multi-digit optimizations.
#[inline(always)]
pub const fn get_no_multi_digit(&self) -> bool {
self.no_multi_digit
}

// SETTERS

/// Set if we disable the use of multi-digit optimizations.
///
/// # Safety
///
/// Always safe, just marked as unsafe for API compatibility.
#[inline(always)]
pub unsafe fn no_multi_digit(&mut self, no_multi_digit: bool) {
self.no_multi_digit = no_multi_digit;
}

// BUILDERS
Expand All @@ -88,7 +148,9 @@ impl Options {
/// Create OptionsBuilder using existing values.
#[inline(always)]
pub const fn rebuild(&self) -> OptionsBuilder {
OptionsBuilder {}
OptionsBuilder {
no_multi_digit: self.no_multi_digit,
}
}
}

Expand All @@ -113,3 +175,21 @@ impl ParseOptions for Options {
#[rustfmt::skip]
pub const STANDARD: Options = Options::new();
const_assert!(STANDARD.is_valid());

/// Optiobns optimized for small numbers.
#[rustfmt::skip]
pub const SMALL_NUMBERS: Options = unsafe {
Options::builder()
.no_multi_digit(true)
.build_unchecked()
};
const_assert!(SMALL_NUMBERS.is_valid());

/// Optiobns optimized for large numbers and long strings.
#[rustfmt::skip]
pub const LARGE_NUMBERS: Options = unsafe {
Options::builder()
.no_multi_digit(false)
.build_unchecked()
};
const_assert!(LARGE_NUMBERS.is_valid());
9 changes: 5 additions & 4 deletions lexical-parse-integer/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

// Select the correct back-end.
use crate::algorithm::{algorithm_complete, algorithm_partial};
use crate::Options;

use lexical_util::num::Integer;
use lexical_util::result::Result;
Expand All @@ -12,14 +13,14 @@ use lexical_util::result::Result;
pub trait ParseInteger: Integer {
/// Forward complete parser parameters to the backend.
#[cfg_attr(not(feature = "compact"), inline(always))]
fn parse_complete<const FORMAT: u128>(bytes: &[u8]) -> Result<Self> {
algorithm_complete::<_, { FORMAT }>(bytes)
fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
algorithm_complete::<_, { FORMAT }>(bytes, options)
}

/// Forward partial parser parameters to the backend.
#[cfg_attr(not(feature = "compact"), inline(always))]
fn parse_partial<const FORMAT: u128>(bytes: &[u8]) -> Result<(Self, usize)> {
algorithm_partial::<_, { FORMAT }>(bytes)
fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<(Self, usize)> {
algorithm_partial::<_, { FORMAT }>(bytes, options)
}
}

Expand Down
15 changes: 10 additions & 5 deletions lexical-parse-integer/tests/algorithm_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
mod util;

use lexical_parse_integer::algorithm;
use lexical_parse_integer::options::SMALL_NUMBERS;
use lexical_util::format::STANDARD;
use lexical_util::iterator::AsBytes;
use proptest::prelude::*;
Expand Down Expand Up @@ -121,14 +122,16 @@ fn test_try_parse_8digits() {
#[cfg(feature = "power-of-two")]
macro_rules! parse_radix {
($i:literal) => {
|bytes: &[u8]| algorithm::algorithm_partial::<u32, { from_radix($i) }>(bytes)
|bytes: &[u8]| algorithm::algorithm_partial::<u32, { from_radix($i) }>(bytes, &SMALL_NUMBERS)
};
}

#[test]
fn algorithm_test() {
let parse_u32 = |bytes: &[u8]| algorithm::algorithm_partial::<u32, STANDARD>(bytes);
let parse_i32 = |bytes: &[u8]| algorithm::algorithm_partial::<i32, STANDARD>(bytes);
let parse_u32 =
|bytes: &[u8]| algorithm::algorithm_partial::<u32, STANDARD>(bytes, &SMALL_NUMBERS);
let parse_i32 =
|bytes: &[u8]| algorithm::algorithm_partial::<i32, STANDARD>(bytes, &SMALL_NUMBERS);

assert_eq!(parse_u32(b"12345"), Ok((12345, 5)));
assert_eq!(parse_u32(b"+12345"), Ok((12345, 6)));
Expand Down Expand Up @@ -160,8 +163,10 @@ fn algorithm_test() {

#[test]
fn algorithm_128_test() {
let parse_u128 = |bytes: &[u8]| algorithm::algorithm_partial::<u128, STANDARD>(bytes);
let parse_i128 = |bytes: &[u8]| algorithm::algorithm_partial::<i128, STANDARD>(bytes);
let parse_u128 =
|bytes: &[u8]| algorithm::algorithm_partial::<u128, STANDARD>(bytes, &SMALL_NUMBERS);
let parse_i128 =
|bytes: &[u8]| algorithm::algorithm_partial::<i128, STANDARD>(bytes, &SMALL_NUMBERS);

assert_eq!(parse_u128(b"12345"), Ok((12345, 5)));
assert_eq!(parse_u128(b"+12345"), Ok((12345, 6)));
Expand Down

0 comments on commit 866a420

Please sign in to comment.