Skip to content

Commit

Permalink
feat: capture groups & matches
Browse files Browse the repository at this point in the history
  • Loading branch information
luckasRanarison committed Jan 6, 2024
1 parent e3eafa9 commit 20cb6b4
Show file tree
Hide file tree
Showing 6 changed files with 520 additions and 62 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn main() {

- [x] Basic regex `foo` `(bar)` `|` `.`
- [x] Quantifiers `+` `?` `*` `{x}` `{x,y}` `{x,}`
- [x] Character classes `[a-z]` `[^x]`
- [ ] Captures
- [x] Character classes `[a-z]` `[^x]` `\d` `\D` `\w` `\W` `\s` `\S`
- [x] Captures `(foo)` `(:?bar)` `(?<named>foo)`
- [ ] Anchors `^` `$`
- [ ] NFA visualizer
23 changes: 20 additions & 3 deletions src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub enum Node {
Plus(Box<Node>),
Optional(Box<Node>),
Range { inner: Box<Node>, range: Range },
Group(Box<Node>),
Group(Group),
Wildcard,
Character(char),
CharacterClass(CharacterClass),
Expand Down Expand Up @@ -43,15 +43,32 @@ impl Node {
}
}

pub fn group(inner: Node) -> Self {
Self::Group(Box::new(inner))
pub fn group(inner: Node, is_capturing: bool, name: Option<&str>) -> Self {
Self::Group(Group::new(inner, is_capturing, name.map(str::to_string)))
}

pub fn class(negate: bool, members: Vec<ClassMember>) -> Self {
Self::CharacterClass(CharacterClass { negate, members })
}
}

#[derive(Debug, PartialEq)]
pub struct Group {
pub inner: Box<Node>,
pub name: Option<String>,
pub is_capturing: bool,
}

impl Group {
fn new(inner: Node, is_capturing: bool, name: Option<String>) -> Self {
Self {
inner: Box::new(inner),
name,
is_capturing,
}
}
}

#[derive(Debug, PartialEq)]
pub struct Range {
pub min: usize,
Expand Down
2 changes: 2 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub enum ParsingError {
InvalidRangeQuantifier,
#[error("Invalid character class")]
InvalidCharacterClass,
#[error("Invalid capture name")]
InvalidCaptureName,
#[error("Range out of order")]
RangeOutOfOrder,
}
83 changes: 73 additions & 10 deletions src/nfa.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use crate::ast::{CharacterClass, ClassMember, Node, Range};
use crate::ast::{CharacterClass, ClassMember, Group, Node, Range};
use std::{
collections::{BTreeMap, HashSet, VecDeque},
collections::{BTreeMap, HashMap, HashSet, VecDeque},
fmt::{self, Debug},
};

pub const START: usize = 0;

type StateId = usize;
pub type StateId = usize;
type TransitionMap = BTreeMap<usize, Vec<Transition>>;

#[derive(Clone, PartialEq)]
Expand Down Expand Up @@ -60,10 +60,18 @@ impl Transition {
}
}

#[derive(Debug, Clone, PartialEq)]
pub struct CaptureGroup {
pub start: StateId,
pub end: StateId,
}

#[derive(Clone, PartialEq)]
pub struct Nfa {
state_count: usize,
transitions: TransitionMap,
capture_groups: Vec<CaptureGroup>,
named_capture_groups: HashMap<String, CaptureGroup>,
}

impl Nfa {
Expand Down Expand Up @@ -93,7 +101,7 @@ impl Nfa {
let offset = self.state_count;

NfaBuilder::from(self)
.extend(other.transitions, offset)
.extend(other, offset)
.transition(offset - 1, TransitionKind::Epsilon, offset)
.build()
}
Expand All @@ -105,8 +113,8 @@ impl Nfa {
NfaBuilder::default()
.transition(START, TransitionKind::Epsilon, 1)
.transition(START, TransitionKind::Epsilon, offset)
.extend(self.transitions, 1)
.extend(other.transitions, offset)
.extend(self, 1)
.extend(other, offset)
.transition(offset - 1, TransitionKind::Epsilon, new_end)
.transition(new_end - 1, TransitionKind::Epsilon, new_end)
.build()
Expand All @@ -117,7 +125,7 @@ impl Nfa {

NfaBuilder::default()
.transition(START, TransitionKind::Epsilon, 1)
.extend(self.transitions, 1)
.extend(self, 1)
.transition(offset, TransitionKind::Epsilon, 1)
.transition(offset, TransitionKind::Epsilon, offset + 1)
.build()
Expand Down Expand Up @@ -159,6 +167,17 @@ impl Nfa {
}
}

fn group(group: Group) -> Self {
let nfa = Nfa::from(*group.inner);
let end = nfa.end();

match group.name {
Some(name) => NfaBuilder::from(nfa).named_group(START, end, name).build(),
None if group.is_capturing => NfaBuilder::from(nfa).group(START, end).build(),
None => nfa,
}
}

fn class(class: CharacterClass) -> Self {
NfaBuilder::default()
.transition(START, TransitionKind::CharacterClass(class), 1)
Expand Down Expand Up @@ -201,6 +220,14 @@ impl Nfa {
pub fn is_accepting(&self, state: StateId) -> bool {
self.end() == state
}

pub fn capture_groups(&self) -> &Vec<CaptureGroup> {
&self.capture_groups
}

pub fn named_capture_groups(&self) -> &HashMap<String, CaptureGroup> {
&self.named_capture_groups
}
}

impl From<Node> for Nfa {
Expand All @@ -209,7 +236,7 @@ impl From<Node> for Nfa {
Node::Empty => Nfa::epsilon(),
Node::Character(ch) => Nfa::character(ch),
Node::Wildcard => Nfa::wildcard(),
Node::Group(node) => Nfa::from(*node),
Node::Group(group) => Nfa::group(group),
Node::Plus(node) => Nfa::from(*node).one_or_more(),
Node::Star(node) => Nfa::from(*node).zero_or_more(),
Node::Optional(node) => Nfa::from(*node).zero_or_one(),
Expand All @@ -224,6 +251,8 @@ impl From<Node> for Nfa {
impl Debug for Nfa {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "State count: {:?}", self.state_count)?;
writeln!(f, "Groups: {:?}", self.capture_groups)?;
writeln!(f, "Transitions:")?;

for (start, transitions) in &self.transitions {
for transition in transitions {
Expand All @@ -239,6 +268,8 @@ impl Debug for Nfa {
pub struct NfaBuilder {
state_count: usize,
transitions: TransitionMap,
capture_groups: Vec<CaptureGroup>,
named_capture_groups: HashMap<String, CaptureGroup>,
}

impl NfaBuilder {
Expand All @@ -261,20 +292,50 @@ impl NfaBuilder {
self
}

fn extend(mut self, transitions: BTreeMap<usize, Vec<Transition>>, offset: usize) -> Self {
for (start, transitions) in transitions {
fn extend(mut self, other: Nfa, offset: usize) -> Self {
for (start, transitions) in other.transitions {
for transition in transitions {
self.add_transition(start + offset, transition.kind, transition.end + offset);
}
}

for group in other.capture_groups {
self.capture_groups.push(CaptureGroup {
start: group.start + offset,
end: group.end + offset,
});
}

for (name, group) in other.named_capture_groups {
self.named_capture_groups.insert(
name,
CaptureGroup {
start: group.start + offset,
end: group.end + offset,
},
);
}

self
}

fn group(mut self, start: StateId, end: StateId) -> Self {
self.capture_groups.push(CaptureGroup { start, end });
self
}

fn named_group(mut self, start: StateId, end: StateId, name: String) -> Self {
self.named_capture_groups
.insert(name, CaptureGroup { start, end });
self
}

fn build(self) -> Nfa {
Nfa {
state_count: self.state_count,
transitions: self.transitions,
capture_groups: self.capture_groups,
named_capture_groups: self.named_capture_groups,
}
}
}
Expand All @@ -284,6 +345,8 @@ impl From<Nfa> for NfaBuilder {
Self {
state_count: value.state_count,
transitions: value.transitions,
capture_groups: value.capture_groups,
named_capture_groups: value.named_capture_groups,
}
}
}
Expand Down
Loading

0 comments on commit 20cb6b4

Please sign in to comment.