Skip to content

Commit

Permalink
fix: highlight & unicode support
Browse files Browse the repository at this point in the history
  • Loading branch information
luckasRanarison committed Jan 12, 2024
1 parent 97b7787 commit c4fe65d
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 95 deletions.
43 changes: 24 additions & 19 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,52 @@ pub fn parse_regex(input: &str) -> Result<Node> {
}

fn parse_alternation(input: &str) -> Result<(Node, &str)> {
parse_concat(input).and_then(|(lhs, rest)| match rest.get(..1) {
Some("|") => {
parse_concat(input).and_then(|(lhs, rest)| match rest.chars().next() {
Some('|') => {
parse_alternation(&rest[1..]).map(|(rhs, rest)| (Node::alternation(lhs, rhs), rest))
}
_ => Ok((lhs, rest)),
})
}

fn parse_concat(input: &str) -> Result<(Node, &str)> {
parse_quantifier(input).and_then(|(lhs, rest)| match rest.get(..1) {
Some("|") | Some(")") | None => Ok((lhs, rest)),
parse_quantifier(input).and_then(|(lhs, rest)| match rest.chars().next() {
Some('|') | Some(')') | None => Ok((lhs, rest)),
Some(_) => parse_concat(rest).map(|(rhs, rest)| (Node::concatenation(lhs, rhs), rest)),
})
}

fn parse_quantifier(input: &str) -> Result<(Node, &str)> {
parser_atom(input).and_then(|(result, rest)| match rest.get(..1) {
Some("+") => Ok((Node::plus(result), &rest[1..])),
Some("*") => Ok((Node::star(result), &rest[1..])),
Some("?") => Ok((Node::optional(result), &rest[1..])),
Some("{") => {
parser_atom(input).and_then(|(result, rest)| match rest.chars().next() {
Some('+') => Ok((Node::plus(result), &rest[1..])),
Some('*') => Ok((Node::star(result), &rest[1..])),
Some('?') => Ok((Node::optional(result), &rest[1..])),
Some('{') => {
parse_range(&rest[1..]).map(|(range, rest)| (Node::range(result, range), rest))
}
_ => Ok((result, rest)),
})
}

fn parse_range(input: &str) -> Result<(Range, &str)> {
take_number(input).and_then(|(lower, rest)| match (lower, rest.get(..1)) {
(Some(lower), Some(",")) => {
take_number(input).and_then(|(lower, rest)| match (lower, rest.chars().next()) {
(Some(lower), Some(',')) => {
parse_range_upper(&rest[1..]).map(|(upper, rest)| (Range::new(lower, upper), rest))
}
(Some(lower), Some("}")) => Ok((Range::new(lower, Some(lower)), &rest[1..])),
(Some(lower), Some('}')) => Ok((Range::new(lower, Some(lower)), &rest[1..])),
_ => Err(ParsingError::InvalidRangeQuantifier),
})
}

fn parse_range_upper(input: &str) -> Result<(Option<usize>, &str)> {
match input.get(..1) {
Some("}") => Ok((None, &input[1..])),
Some(_) => take_number(input).and_then(|(number, rest)| match (number, rest.get(..1)) {
(Some(number), Some("}")) => Ok((Some(number), &rest[1..])),
_ => Err(ParsingError::InvalidRangeQuantifier),
}),
match input.chars().next() {
Some('}') => Ok((None, &input[1..])),
Some(_) => {
take_number(input).and_then(|(number, rest)| match (number, rest.chars().next()) {
(Some(number), Some('}')) => Ok((Some(number), &rest[1..])),
_ => Err(ParsingError::InvalidRangeQuantifier),
})
}
None => Err(ParsingError::InvalidRangeQuantifier),
}
}
Expand All @@ -66,7 +68,10 @@ fn parser_atom(input: &str) -> Result<(Node, &str)> {
'\\' => parse_metachar(&input[1..]),
'.' => Ok((Node::Wildcard, &input[1..])),
')' => Ok((Node::Empty, input)),
_ => Ok((Node::Character(c), &input[c.len_utf8()..])),
_ => {
let rest = &input[c.len_utf8()..];
Ok((Node::Character(c), rest))
}
},
None => Ok((Node::Empty, input)),
}
Expand Down
36 changes: 29 additions & 7 deletions src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,29 @@ impl<'a> Regex {
}

pub fn captures(&self, input: &'a str) -> Option<Capture<'a>> {
let input_len = input.len();
let mut char_count = 0;
let mut captures = HashMap::new();
let mut named_captures = HashMap::new();
let mut states = HashSet::new();
let mut end = None;
let mut char_index_map = HashMap::new();

states.insert(INITAL_STATE);

for (i, ch) in input.char_indices() {
for (idx, ch) in input.char_indices() {
char_index_map.insert(idx, char_count);
char_count += 1;

states = states
.iter()
.flat_map(|&s| self.nfa.epsilon_closure(s))
.collect();

self.update_captures(&mut captures, &mut named_captures, &states, i);
self.update_captures(&mut captures, &mut named_captures, &states, idx);

if self.has_accepting_state(&states) {
end = Some(i)
end = Some(idx)
}

states = states
Expand All @@ -83,15 +89,16 @@ impl<'a> Regex {
}
}

char_index_map.insert(input_len, char_count);
states = states
.iter()
.flat_map(|&s| self.nfa.epsilon_closure(s))
.collect();

self.update_captures(&mut captures, &mut named_captures, &states, input.len());
self.update_captures(&mut captures, &mut named_captures, &states, input_len);

if self.has_accepting_state(&states) {
end = Some(input.len());
end = Some(input_len);
}

if end.is_none() {
Expand Down Expand Up @@ -149,7 +156,7 @@ impl<'a> Regex {
.collect();

if self.has_accepting_state(&states) {
end = Some(i + j)
end = Some(i + j + ch.len_utf8());
}

if states.is_empty() {
Expand All @@ -158,7 +165,7 @@ impl<'a> Regex {
}

if let Some(end) = end {
let m = Match::new(i, end, &input[i..=end]);
let m = Match::new(i, end, &input[i..end]);

if !all {
return vec![m];
Expand Down Expand Up @@ -424,4 +431,19 @@ mod test {
assert_eq!(matches.get_name("hour"), Some(&Match::new(0, 2, "19")));
assert_eq!(matches.get_name("minute"), Some(&Match::new(3, 5, "30")));
}

#[test]
fn test_find() {
let regex = Regex::new(r#"wh(at|o|y)"#).unwrap();
let matches = regex.find_all("what? who? why?");

assert_eq!(
matches,
vec![
Match::new(0, 4, "what"),
Match::new(6, 9, "who"),
Match::new(11, 14, "why")
]
);
}
}
66 changes: 53 additions & 13 deletions src/wasm.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
nfa::{StateId, TransitionKind},
regex::{Capture, Match, Regex},
regex::{Capture, Regex},
Match,
};
use std::collections::{BTreeMap, HashMap};
use wasm_bindgen::prelude::*;
Expand All @@ -20,19 +21,29 @@ impl RegexEngine {
}

pub fn captures(&self, input: &str) -> Option<OwnedCapture> {
self.engine.captures(input).map(OwnedCapture::from)
let index_map = get_char_index(input);

self.engine
.captures(input)
.map(|c| OwnedCapture::from_capture(c, &index_map))
}

pub fn find(&self, input: &str) -> Option<OwnedMatch> {
self.engine.find(input).map(OwnedMatch::from)
let index_map = get_char_index(input);

self.engine
.find(input)
.map(|m| OwnedMatch::from_match(m, &index_map))
}

#[wasm_bindgen(js_name = "findAll")]
pub fn find_all(&self, input: &str) -> Vec<OwnedMatch> {
let index_map = get_char_index(input);

self.engine
.find_all(input)
.into_iter()
.map(OwnedMatch::from)
.map(|m| OwnedMatch::from_match(m, &index_map))
.collect()
}

Expand Down Expand Up @@ -60,38 +71,39 @@ impl RegexEngine {
}

#[wasm_bindgen]
#[derive(Clone)]
#[derive(Debug, Clone, PartialEq)]
pub struct OwnedMatch {
pub start: usize,
pub end: usize,
}

impl From<Match<'_>> for OwnedMatch {
fn from(value: Match<'_>) -> Self {
impl OwnedMatch {
fn from_match(value: Match<'_>, index_map: &HashMap<usize, usize>) -> Self {
Self {
start: value.start,
end: value.end,
start: index_map[&value.start],
end: index_map[&value.end],
}
}
}

#[wasm_bindgen]
#[derive(Debug, Clone)]
pub struct OwnedCapture {
captures: BTreeMap<usize, OwnedMatch>,
named_captures: HashMap<String, OwnedMatch>,
}

impl From<Capture<'_>> for OwnedCapture {
fn from(value: Capture) -> Self {
impl OwnedCapture {
fn from_capture(value: Capture, index_map: &HashMap<usize, usize>) -> Self {
let captures = value
.captures
.into_iter()
.map(|(i, v)| (i, OwnedMatch::from(v)))
.map(|(i, v)| (i, OwnedMatch::from_match(v, index_map)))
.collect();
let named_captures = value
.named_captures
.into_iter()
.map(|(i, v)| (i, OwnedMatch::from(v)))
.map(|(i, v)| (i, OwnedMatch::from_match(v, index_map)))
.collect();

Self {
Expand Down Expand Up @@ -136,3 +148,31 @@ impl Transition {
self.kind.to_string()
}
}

fn get_char_index(input: &str) -> HashMap<usize, usize> {
input
.char_indices()
.enumerate()
.map(|(char_idx, (slice_idex, _))| (slice_idex, char_idx))
.chain([(input.len(), input.chars().count())])
.collect()
}

#[cfg(test)]
mod tests {
use super::{OwnedMatch, RegexEngine};

#[test]
fn test_unicode_range() {
let regex = RegexEngine::new(r#"こ"#);
let matches = regex.find_all("ここで");

assert_eq!(
matches,
vec![
OwnedMatch { start: 0, end: 1 },
OwnedMatch { start: 1, end: 2 },
]
);
}
}
9 changes: 5 additions & 4 deletions web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 15 additions & 22 deletions web/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ import { useEffect, useRef, useState } from "react";
import { Viz, instance } from "@viz-js/viz";
import Navbar from "./components/Navbar";
import ExpressionsPopup from "./components/ExpressionsPopup";
import { RiCloseCircleFill, RiQuestionFill } from "react-icons/ri";
import { RiQuestionFill } from "react-icons/ri";
import { OwnedMatch, RegexEngine } from "regex-potata";
import { dotFromRegex } from "./utils/viz";
import TestInput from "./components/TestInput";
import Footer from "./components/Footer";
import RegexInput from "./components/RegexInput";

const App = () => {
const [regexInput, setRegexInput] = useState("");
Expand All @@ -20,8 +21,9 @@ const App = () => {
useEffect(() => {
(async () => {
const i = await instance();
const engine = new RegexEngine("");
vizInstance.current = i;
setRegexInstance(new RegexEngine(""));
setRegexInstance(engine);
})();
}, []);

Expand Down Expand Up @@ -60,8 +62,7 @@ const App = () => {
<div className="py-5 px-3 w-full flex justify-center">
<div
className="py-4 px-4 md:px-8 md:py-6 w-full max-w-2xl space-y-8
flex flex-col justify-center
rounded-md"
flex flex-col justify-center"
>
<div className="space-y-4">
<div className="space-x-3 flex items-center font-semibold">
Expand All @@ -70,21 +71,11 @@ const App = () => {
<RiQuestionFill />
</button>
</div>
<input
<RegexInput
value={regexInput}
placeholder="Insert a regular expression..."
onChange={(e) => setRegexInput(e.target.value)}
className={`py-3 px-5 w-full
rounded-md border-[1px] border-slate-800
bg-transparent focus:outline-none focus:border-cyan-300
${!regexInstance && "!border-red-400"}`}
error={!regexInstance}
onInput={(v) => setRegexInput(v)}
/>
{!regexInstance && (
<div className="flex items-center space-x-3 font-semibold text-red-400">
<RiCloseCircleFill />
<span>Invalid Regular expression</span>
</div>
)}
</div>
<div className="space-y-4">
<div className="font-semibold">Test input</div>
Expand All @@ -97,11 +88,13 @@ const App = () => {
<div className="space-y-10">
<div className="font-semibold">NFA Visualizer</div>
<div className="w-full overflow-scroll">
<svg
height={svg?.height.baseVal.value}
width={svg?.width.baseVal.value}
dangerouslySetInnerHTML={{ __html: svg?.innerHTML ?? "" }}
></svg>
{svg && (
<svg
height={svg?.height.baseVal.value}
width={svg?.width.baseVal.value}
dangerouslySetInnerHTML={{ __html: svg.innerHTML }}
></svg>
)}
</div>
</div>
</div>
Expand Down
Loading

0 comments on commit c4fe65d

Please sign in to comment.