From 46270c3d3cde25452b06383283f1130c144aa555 Mon Sep 17 00:00:00 2001 From: Ryann Graham Date: Thu, 29 Jun 2023 13:38:43 -0700 Subject: [PATCH] rust: add basic skipping The switch from enumberate() to a regular loop is actually slower for the single-stepping case, but the performance gains from being able to skip large chunks of data with minimal comparisons more than makes up for it. This is the most basic optimization for this type of algorithm and this is the simplified version of it. If we're not in a match, check 20 characters ahead and if that also isn't a match, skip 20. If it is a match, then just continue on with checking each byte. --- README.md | 1 + main.rs | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dcf9ca2..a7bdd6e 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ implementations compare to each other. | ripgrep | 0m1.709s | 0m1.541s | 0m0.147s | | simple (Go) | 0m1.737s | 0m1.594s | 0m0.142s | | simple (Rust) | 0m1.461s | 0m1.325s | 0m0.131s | +| skip (Rust) | 0m0.231s | 0m0.105s | 0m0.124s | | simple (Node) | 0m6.458s | 0m6.043s | 0m0.627s | | custom (C) | **0m0.222s** | **0m0.079s** | **0m0.141s** | diff --git a/main.rs b/main.rs index 748b96f..2c8d4ec 100644 --- a/main.rs +++ b/main.rs @@ -25,15 +25,26 @@ fn hit(needle: &[u8]) { fn scan_slice(inb: &[u8]) -> usize { let mut count = 0; let len = inb.len(); - for (i, &b) in inb.into_iter().enumerate() { + let mut i = 0usize; + while i < len { + let b = inb[i]; + if count == 0 && i+20 < len { + let bs = inb[i+20]; + if !bs.is_ascii_digit() && !(b'a'..=b'f').contains(&bs) { + i += 20; + continue; + } + } if b.is_ascii_digit() || (b'a'..=b'f').contains(&b) { count += 1; + i += 1; continue } if count == 40 { hit(&inb[i-40..i]); } - count = 0 + count = 0; + i += 1; } if count == 40 { hit(&inb[len-40..]);