diff --git a/src/parser.rs b/src/parser.rs index 810cdec..5f5ca60 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -10,8 +10,8 @@ pub fn parse_regex(input: &str) -> Result { } fn parse_alternation(input: &str) -> Result<(Node, &str)> { - parse_concat(input).and_then(|(lhs, rest)| match rest.get(..1) { - Some("|") => { + parse_concat(input).and_then(|(lhs, rest)| match rest.chars().next() { + Some('|') => { parse_alternation(&rest[1..]).map(|(rhs, rest)| (Node::alternation(lhs, rhs), rest)) } _ => Ok((lhs, rest)), @@ -19,18 +19,18 @@ fn parse_alternation(input: &str) -> Result<(Node, &str)> { } fn parse_concat(input: &str) -> Result<(Node, &str)> { - parse_quantifier(input).and_then(|(lhs, rest)| match rest.get(..1) { - Some("|") | Some(")") | None => Ok((lhs, rest)), + parse_quantifier(input).and_then(|(lhs, rest)| match rest.chars().next() { + Some('|') | Some(')') | None => Ok((lhs, rest)), Some(_) => parse_concat(rest).map(|(rhs, rest)| (Node::concatenation(lhs, rhs), rest)), }) } fn parse_quantifier(input: &str) -> Result<(Node, &str)> { - parser_atom(input).and_then(|(result, rest)| match rest.get(..1) { - Some("+") => Ok((Node::plus(result), &rest[1..])), - Some("*") => Ok((Node::star(result), &rest[1..])), - Some("?") => Ok((Node::optional(result), &rest[1..])), - Some("{") => { + parser_atom(input).and_then(|(result, rest)| match rest.chars().next() { + Some('+') => Ok((Node::plus(result), &rest[1..])), + Some('*') => Ok((Node::star(result), &rest[1..])), + Some('?') => Ok((Node::optional(result), &rest[1..])), + Some('{') => { parse_range(&rest[1..]).map(|(range, rest)| (Node::range(result, range), rest)) } _ => Ok((result, rest)), @@ -38,22 +38,24 @@ fn parse_quantifier(input: &str) -> Result<(Node, &str)> { } fn parse_range(input: &str) -> Result<(Range, &str)> { - take_number(input).and_then(|(lower, rest)| match (lower, rest.get(..1)) { - (Some(lower), Some(",")) => { + take_number(input).and_then(|(lower, rest)| match (lower, rest.chars().next()) { + (Some(lower), Some(',')) => { parse_range_upper(&rest[1..]).map(|(upper, rest)| (Range::new(lower, upper), rest)) } - (Some(lower), Some("}")) => Ok((Range::new(lower, Some(lower)), &rest[1..])), + (Some(lower), Some('}')) => Ok((Range::new(lower, Some(lower)), &rest[1..])), _ => Err(ParsingError::InvalidRangeQuantifier), }) } fn parse_range_upper(input: &str) -> Result<(Option, &str)> { - match input.get(..1) { - Some("}") => Ok((None, &input[1..])), - Some(_) => take_number(input).and_then(|(number, rest)| match (number, rest.get(..1)) { - (Some(number), Some("}")) => Ok((Some(number), &rest[1..])), - _ => Err(ParsingError::InvalidRangeQuantifier), - }), + match input.chars().next() { + Some('}') => Ok((None, &input[1..])), + Some(_) => { + take_number(input).and_then(|(number, rest)| match (number, rest.chars().next()) { + (Some(number), Some('}')) => Ok((Some(number), &rest[1..])), + _ => Err(ParsingError::InvalidRangeQuantifier), + }) + } None => Err(ParsingError::InvalidRangeQuantifier), } } @@ -66,7 +68,10 @@ fn parser_atom(input: &str) -> Result<(Node, &str)> { '\\' => parse_metachar(&input[1..]), '.' => Ok((Node::Wildcard, &input[1..])), ')' => Ok((Node::Empty, input)), - _ => Ok((Node::Character(c), &input[c.len_utf8()..])), + _ => { + let rest = &input[c.len_utf8()..]; + Ok((Node::Character(c), rest)) + } }, None => Ok((Node::Empty, input)), } diff --git a/src/regex.rs b/src/regex.rs index 02d58ab..c096869 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -54,23 +54,29 @@ impl<'a> Regex { } pub fn captures(&self, input: &'a str) -> Option> { + let input_len = input.len(); + let mut char_count = 0; let mut captures = HashMap::new(); let mut named_captures = HashMap::new(); let mut states = HashSet::new(); let mut end = None; + let mut char_index_map = HashMap::new(); states.insert(INITAL_STATE); - for (i, ch) in input.char_indices() { + for (idx, ch) in input.char_indices() { + char_index_map.insert(idx, char_count); + char_count += 1; + states = states .iter() .flat_map(|&s| self.nfa.epsilon_closure(s)) .collect(); - self.update_captures(&mut captures, &mut named_captures, &states, i); + self.update_captures(&mut captures, &mut named_captures, &states, idx); if self.has_accepting_state(&states) { - end = Some(i) + end = Some(idx) } states = states @@ -83,15 +89,16 @@ impl<'a> Regex { } } + char_index_map.insert(input_len, char_count); states = states .iter() .flat_map(|&s| self.nfa.epsilon_closure(s)) .collect(); - self.update_captures(&mut captures, &mut named_captures, &states, input.len()); + self.update_captures(&mut captures, &mut named_captures, &states, input_len); if self.has_accepting_state(&states) { - end = Some(input.len()); + end = Some(input_len); } if end.is_none() { @@ -149,7 +156,7 @@ impl<'a> Regex { .collect(); if self.has_accepting_state(&states) { - end = Some(i + j) + end = Some(i + j + ch.len_utf8()); } if states.is_empty() { @@ -158,7 +165,7 @@ impl<'a> Regex { } if let Some(end) = end { - let m = Match::new(i, end, &input[i..=end]); + let m = Match::new(i, end, &input[i..end]); if !all { return vec![m]; @@ -424,4 +431,19 @@ mod test { assert_eq!(matches.get_name("hour"), Some(&Match::new(0, 2, "19"))); assert_eq!(matches.get_name("minute"), Some(&Match::new(3, 5, "30"))); } + + #[test] + fn test_find() { + let regex = Regex::new(r#"wh(at|o|y)"#).unwrap(); + let matches = regex.find_all("what? who? why?"); + + assert_eq!( + matches, + vec![ + Match::new(0, 4, "what"), + Match::new(6, 9, "who"), + Match::new(11, 14, "why") + ] + ); + } } diff --git a/src/wasm.rs b/src/wasm.rs index dc9d2ad..80ea9a1 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -1,6 +1,7 @@ use crate::{ nfa::{StateId, TransitionKind}, - regex::{Capture, Match, Regex}, + regex::{Capture, Regex}, + Match, }; use std::collections::{BTreeMap, HashMap}; use wasm_bindgen::prelude::*; @@ -20,19 +21,29 @@ impl RegexEngine { } pub fn captures(&self, input: &str) -> Option { - self.engine.captures(input).map(OwnedCapture::from) + let index_map = get_char_index(input); + + self.engine + .captures(input) + .map(|c| OwnedCapture::from_capture(c, &index_map)) } pub fn find(&self, input: &str) -> Option { - self.engine.find(input).map(OwnedMatch::from) + let index_map = get_char_index(input); + + self.engine + .find(input) + .map(|m| OwnedMatch::from_match(m, &index_map)) } #[wasm_bindgen(js_name = "findAll")] pub fn find_all(&self, input: &str) -> Vec { + let index_map = get_char_index(input); + self.engine .find_all(input) .into_iter() - .map(OwnedMatch::from) + .map(|m| OwnedMatch::from_match(m, &index_map)) .collect() } @@ -60,38 +71,39 @@ impl RegexEngine { } #[wasm_bindgen] -#[derive(Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct OwnedMatch { pub start: usize, pub end: usize, } -impl From> for OwnedMatch { - fn from(value: Match<'_>) -> Self { +impl OwnedMatch { + fn from_match(value: Match<'_>, index_map: &HashMap) -> Self { Self { - start: value.start, - end: value.end, + start: index_map[&value.start], + end: index_map[&value.end], } } } #[wasm_bindgen] +#[derive(Debug, Clone)] pub struct OwnedCapture { captures: BTreeMap, named_captures: HashMap, } -impl From> for OwnedCapture { - fn from(value: Capture) -> Self { +impl OwnedCapture { + fn from_capture(value: Capture, index_map: &HashMap) -> Self { let captures = value .captures .into_iter() - .map(|(i, v)| (i, OwnedMatch::from(v))) + .map(|(i, v)| (i, OwnedMatch::from_match(v, index_map))) .collect(); let named_captures = value .named_captures .into_iter() - .map(|(i, v)| (i, OwnedMatch::from(v))) + .map(|(i, v)| (i, OwnedMatch::from_match(v, index_map))) .collect(); Self { @@ -136,3 +148,31 @@ impl Transition { self.kind.to_string() } } + +fn get_char_index(input: &str) -> HashMap { + input + .char_indices() + .enumerate() + .map(|(char_idx, (slice_idex, _))| (slice_idex, char_idx)) + .chain([(input.len(), input.chars().count())]) + .collect() +} + +#[cfg(test)] +mod tests { + use super::{OwnedMatch, RegexEngine}; + + #[test] + fn test_unicode_range() { + let regex = RegexEngine::new(r#"こ"#); + let matches = regex.find_all("ここで"); + + assert_eq!( + matches, + vec![ + OwnedMatch { start: 0, end: 1 }, + OwnedMatch { start: 1, end: 2 }, + ] + ); + } +} diff --git a/web/package-lock.json b/web/package-lock.json index 1602559..3d6a5ce 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,13 @@ { - "name": "web", - "version": "0.0.0", + "name": "regex-potata-web", + "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "web", - "version": "0.0.0", + "name": "regex-potata-web", + "version": "0.1.0", + "license": "MIT", "dependencies": { "@headlessui/react": "^1.7.18", "@headlessui/tailwindcss": "^0.2.0", diff --git a/web/src/App.tsx b/web/src/App.tsx index 654d463..1a431aa 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -2,11 +2,12 @@ import { useEffect, useRef, useState } from "react"; import { Viz, instance } from "@viz-js/viz"; import Navbar from "./components/Navbar"; import ExpressionsPopup from "./components/ExpressionsPopup"; -import { RiCloseCircleFill, RiQuestionFill } from "react-icons/ri"; +import { RiQuestionFill } from "react-icons/ri"; import { OwnedMatch, RegexEngine } from "regex-potata"; import { dotFromRegex } from "./utils/viz"; import TestInput from "./components/TestInput"; import Footer from "./components/Footer"; +import RegexInput from "./components/RegexInput"; const App = () => { const [regexInput, setRegexInput] = useState(""); @@ -20,8 +21,9 @@ const App = () => { useEffect(() => { (async () => { const i = await instance(); + const engine = new RegexEngine(""); vizInstance.current = i; - setRegexInstance(new RegexEngine("")); + setRegexInstance(engine); })(); }, []); @@ -60,8 +62,7 @@ const App = () => {
@@ -70,21 +71,11 @@ const App = () => {
- setRegexInput(e.target.value)} - className={`py-3 px-5 w-full - rounded-md border-[1px] border-slate-800 - bg-transparent focus:outline-none focus:border-cyan-300 - ${!regexInstance && "!border-red-400"}`} + error={!regexInstance} + onInput={(v) => setRegexInput(v)} /> - {!regexInstance && ( -
- - Invalid Regular expression -
- )}
Test input
@@ -97,11 +88,13 @@ const App = () => {
NFA Visualizer
- + {svg && ( + + )}
diff --git a/web/src/components/ExpressionsPopup.tsx b/web/src/components/ExpressionsPopup.tsx index b767349..74b63a6 100644 --- a/web/src/components/ExpressionsPopup.tsx +++ b/web/src/components/ExpressionsPopup.tsx @@ -1,11 +1,6 @@ import { Dialog } from "@headlessui/react"; import { useRef } from "react"; - -const Snippet = ({ children }: { children: React.ReactNode }) => ( - - {children} - -); +import Snippet from "./Snippet"; const expressions = [ { desc: "Basic regex", pat: ["foo", "(bar)", "foo|bar", "fo."] }, diff --git a/web/src/components/RegexInput.tsx b/web/src/components/RegexInput.tsx index e69de29..7fc892d 100644 --- a/web/src/components/RegexInput.tsx +++ b/web/src/components/RegexInput.tsx @@ -0,0 +1,29 @@ +import { RiCloseCircleFill } from "react-icons/ri"; + +type InputProps = { + value: string; + error: boolean; + onInput: (value: string) => void; +}; + +const RegexInput = ({ value, error, onInput }: InputProps) => ( + <> + onInput(e.target.value)} + className={`py-3 px-5 w-full + rounded-md border-[1px] border-slate-800 + bg-transparent focus:outline-none focus:border-cyan-300 + ${error && "!border-red-400"}`} + /> + {error && ( +
+ + Invalid Regular expression +
+ )} + +); + +export default RegexInput; diff --git a/web/src/components/Snippet.tsx b/web/src/components/Snippet.tsx new file mode 100644 index 0000000..93464b6 --- /dev/null +++ b/web/src/components/Snippet.tsx @@ -0,0 +1,7 @@ +const Snippet = ({ children }: { children: React.ReactNode }) => ( + + {children} + +); + +export default Snippet; diff --git a/web/src/components/TestInput.tsx b/web/src/components/TestInput.tsx index d09b537..7bee174 100644 --- a/web/src/components/TestInput.tsx +++ b/web/src/components/TestInput.tsx @@ -13,32 +13,34 @@ type InputProps = { onInput: (value: string) => void; }; +const decoration = Decoration.mark({ + class: "highlight-chunk", + inclusiveStart: true, + inclusiveEnd: false, +}); + const TestInput = ({ input, matches, onInput }: InputProps) => { const [highlightExtension, setHighlightExtension] = useState(); useEffect(() => { - try { - const decoration = Decoration.mark({ - class: "highlight-chunk", - inclusiveStart: true, - inclusiveEnd: false, - }); - - const decorationBuilder = new RangeSetBuilder(); - - for (const match of matches) { - decorationBuilder.add(match.start, match.end, decoration); - } - - const plugin = ViewPlugin.define( - () => ({ - decorations: decorationBuilder.finish(), - }), - { decorations: (plugin) => plugin.decorations } - ); - - setHighlightExtension(plugin.extension); - } catch {} + if (!matches.length) { + return setHighlightExtension(undefined); + } + + const decorationBuilder = new RangeSetBuilder(); + + for (const match of matches) { + decorationBuilder.add(match.start, match.end, decoration); + } + + const plugin = ViewPlugin.define( + () => ({ + decorations: decorationBuilder.finish(), + }), + { decorations: (plugin) => plugin.decorations } + ); + + setHighlightExtension(plugin.extension); }, [matches]); return ( diff --git a/web/src/index.css b/web/src/index.css index 6923967..c8db3f2 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -3,7 +3,11 @@ @tailwind utilities; .cm-editor { - @apply border-[1px] border-slate-800 rounded-md p-4 !bg-transparent; + @apply border-[1px] border-slate-800 rounded-md p-3 !bg-transparent; +} + +.cm-content { + @apply !font-sans !leading-6; } .cm-focused { @@ -20,6 +24,6 @@ @layer utilities { .highlight-chunk { - @apply bg-cyan-200 text-slate-900; + @apply bg-cyan-100 text-slate-900; } }