From 32da297dae78588ffe72f9c6fd6e56f55ef247da Mon Sep 17 00:00:00 2001 From: Saphereye Date: Sun, 7 Apr 2024 15:30:48 +0530 Subject: [PATCH] docs: Added documentation for remaining stuff --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/lib.rs | 18 ++++++++++++++++++ src/nfa.rs | 4 ++++ src/translation/node.rs | 11 +++++++---- 5 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a98ae8..7982bb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 3 [[package]] name = "gregex" -version = "0.5.0" +version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index ce94411..5bd996a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gregex" -version = "0.5.0" +version = "0.5.1" edition = "2021" authors = ["Saphereye "] license = "MIT" diff --git a/src/lib.rs b/src/lib.rs index cb0e829..e5f1a3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ use translation::node::*; type Regex = NFA; +/// Translates a regular expression tree to a NFA. This NFA can then be called to simulate inputs. pub fn regex(regex_tree: &Node) -> Regex { let prefix_set = &prefix_set(regex_tree); let suffix_set = &suffix_set(regex_tree); @@ -37,8 +38,13 @@ pub fn regex(regex_tree: &Node) -> Regex { NFA::set_to_nfa(prefix_set, suffix_set, factors_set) } +/// Keeps count of the terminals created. This is used to create unique terminals. static TERMINAL_COUNT: AtomicU32 = AtomicU32::new(0); +/// Represents the `concatenation` action in regex. Can concatenate multiple nodes. +/// +/// Regex: ab +/// Gregex: concatenate!(terminal('a'), terminal('b')) #[macro_export] macro_rules! concatenate { ($($node:expr),+ $(,)?) => { @@ -51,11 +57,19 @@ macro_rules! concatenate { }; } +/// Represents a `terminal` in regex. This is a single character. +/// +/// Regex: a +/// Gregex: terminal('a') pub fn terminal(symbol: char) -> Node { let count = TERMINAL_COUNT.fetch_add(1, Ordering::SeqCst); Node::Terminal(symbol, count) } +/// Represents the `or`` action in regex. Can 'or' multiple nodes. +/// +/// Regex: a|b +/// Gregex: or!(terminal('a'), terminal('b')) #[macro_export] macro_rules! or { ($($node:expr),+ $(,)?) => { @@ -68,6 +82,10 @@ macro_rules! or { }; } +/// Represents the `production` action in regex. This is a single node. +/// +/// Regex: a* +/// Gregex: production!(terminal('a')) #[macro_export] macro_rules! production { ($child:expr) => { diff --git a/src/nfa.rs b/src/nfa.rs index cdcfe68..4b01665 100644 --- a/src/nfa.rs +++ b/src/nfa.rs @@ -7,13 +7,16 @@ use std::collections::{HashMap, HashSet}; /// The `NFA` struct represents a non-deterministic finite automaton. #[derive(Debug, Default)] pub struct NFA { + /// Set of all possible states of the NFA. states: HashSet, + /// Set of all accepting states. If the NFA ends at any one if these the simulation is succesful. accept: HashSet, /// The transition function is a map from a pair of a state and a character to a set of states. transition_function: HashMap<(u32, char), HashSet>, } impl NFA { + /// Simulates the NFA with the given input. pub fn simulate(&self, input: &str) -> bool { let mut current_states = HashSet::new(); current_states.insert(0); @@ -29,6 +32,7 @@ impl NFA { !current_states.is_disjoint(&self.accept) } + /// Converts the prefix, suffix and factors sets to a NFA. pub fn set_to_nfa( prefix_set: &HashSet, suffix_set: &HashSet, diff --git a/src/translation/node.rs b/src/translation/node.rs index a290271..47fc22b 100644 --- a/src/translation/node.rs +++ b/src/translation/node.rs @@ -7,12 +7,13 @@ use std::collections::HashSet; /// The `Node` enum represents the different types of nodes that can be used in a regular expression tree. #[derive(Debug, PartialEq, Eq)] pub enum Node { + /// Represents an operation on one or two nodes. Operation(Operator, Box, Option>), /// `char` represents the character, `u32` represent the unique identifier of the node. Terminal(char, u32), } -/// The `nullability_set` function returns the set of nullability of a regular expression tree. +/// The `nullability_set` function returns the set of [SetTerminal] that are nullable in a regular expression tree. pub fn nullability_set(regex_tree: &Node) -> HashSet { let mut set = HashSet::new(); match regex_tree { @@ -38,7 +39,7 @@ pub fn nullability_set(regex_tree: &Node) -> HashSet { set } -/// The `prefix_set` function returns the set of prefixes of a regular expression tree. +/// The `prefix_set` function returns the set of [SetTerminal] that are prefixes of a regular expression tree. pub fn prefix_set(regex_tree: &Node) -> HashSet { let mut set = HashSet::new(); match regex_tree { @@ -73,7 +74,7 @@ pub fn prefix_set(regex_tree: &Node) -> HashSet { set } -/// The `suffix_set` function returns the set of suffixes of a regular expression tree. +/// The `suffix_set` function returns the set of [SetTerminal] that are suffixes of a regular expression tree. pub fn suffix_set(regex_tree: &Node) -> HashSet { let mut set = HashSet::new(); match regex_tree { @@ -108,7 +109,9 @@ pub fn suffix_set(regex_tree: &Node) -> HashSet { set } -/// The `factors_set` function returns the set of factors of a regular expression tree. +/// The `factors_set` function returns the set of [SetTerminal] that are factors of a regular expression tree. +/// +/// Factors in this scenario mean the set of terminals that can be produced by the regular expression. pub fn factors_set(regex_tree: &Node) -> HashSet { let mut set = HashSet::new(); match regex_tree {