From 46270c3d3cde25452b06383283f1130c144aa555 Mon Sep 17 00:00:00 2001
From: Ryann Graham <r.m.graham@gmail.com>
Date: Thu, 29 Jun 2023 13:38:43 -0700
Subject: [PATCH] rust: add basic skipping

The switch from enumberate() to a regular loop is actually slower for
the single-stepping case, but the performance gains from being able to
skip large chunks of data with minimal comparisons more than makes up
for it.

This is the most basic optimization for this type of algorithm and this
is the simplified version of it. If we're not in a match, check 20
characters ahead and if that also isn't a match, skip 20. If it is a
match, then just continue on with checking each byte.
---
 README.md |  1 +
 main.rs   | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index dcf9ca2..a7bdd6e 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,7 @@ implementations compare to each other.
 | ripgrep | 0m1.709s | 0m1.541s | 0m0.147s |
 | simple (Go) | 0m1.737s | 0m1.594s | 0m0.142s |
 | simple (Rust) | 0m1.461s | 0m1.325s | 0m0.131s |
+| skip (Rust) | 0m0.231s | 0m0.105s | 0m0.124s |
 | simple (Node) | 0m6.458s | 0m6.043s | 0m0.627s |
 | custom (C) | **0m0.222s** | **0m0.079s** | **0m0.141s** |
 
diff --git a/main.rs b/main.rs
index 748b96f..2c8d4ec 100644
--- a/main.rs
+++ b/main.rs
@@ -25,15 +25,26 @@ fn hit(needle: &[u8]) {
 fn scan_slice(inb: &[u8]) -> usize {
 	let mut count = 0;
 	let len = inb.len();
-	for (i, &b) in inb.into_iter().enumerate() {
+	let mut i = 0usize;
+	while i < len {
+		let b = inb[i];
+		if count == 0 && i+20 < len {
+			let bs = inb[i+20];
+			if !bs.is_ascii_digit() && !(b'a'..=b'f').contains(&bs) {
+				i += 20;
+				continue;
+			}
+		}
 		if b.is_ascii_digit() || (b'a'..=b'f').contains(&b) {
 			count += 1;
+			i += 1;
 			continue
 		}
 		if count == 40 {
 			hit(&inb[i-40..i]);
 		}
-		count = 0
+		count = 0;
+		i += 1;
 	}
 	if count == 40 {
 		hit(&inb[len-40..]);