From e491afe66c3f5341d9e018fdddd4667cabf8ccfc Mon Sep 17 00:00:00 2001 From: Maciej Hirsz <1096222+maciejhirsz@users.noreply.github.com> Date: Sun, 9 Dec 2018 19:23:21 +0100 Subject: [PATCH] Proper fix to #42 (#45) --- Cargo.lock | 26 ++++---------------------- logos-derive/Cargo.toml | 2 +- logos-derive/src/regex.rs | 37 ++++++++++++++++++++++--------------- logos-derive/src/tree.rs | 32 +++++++++++++++++++++++--------- tests/Cargo.toml | 2 +- tests/tests/advanced.rs | 2 +- 6 files changed, 52 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7465b99e..444b7318 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,14 +3,6 @@ name = "byteorder" version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "logos" version = "0.9.0" @@ -22,6 +14,7 @@ dependencies = [ [[package]] name = "logos-derive" version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", @@ -32,8 +25,7 @@ dependencies = [ [[package]] name = "logos-derive" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" +version = "0.9.1" dependencies = [ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", @@ -89,16 +81,8 @@ name = "tests" version = "0.0.0" dependencies = [ "logos 0.9.0", - "logos-derive 0.9.0", - "toolshed 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "toolshed" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "logos-derive 0.9.1", + "toolshed 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -126,14 +110,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] "checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d" -"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" "checksum logos-derive 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "36411219f8203825c35c04b4ff04540ce58b7bffd458cfd3405df2ff57acc1b9" "checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" "checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" "checksum regex-syntax 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fbc557aac2b708fe84121caf261346cc2eed71978024337e42eb46b8a252ac6e" "checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8" "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" -"checksum toolshed 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "54a272adbf14cfbb486774d09ee3e00c38d488cd390084a528f70e10e3a184a8" "checksum toolshed 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8d2a2c31273526c0c85f79642dba51e95b25c5cb3addef94ca97e680d547dbbf" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" diff --git a/logos-derive/Cargo.toml b/logos-derive/Cargo.toml index 46ada8fa..ea1e2fbe 100644 --- a/logos-derive/Cargo.toml +++ b/logos-derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "logos-derive" -version = "0.9.0" +version = "0.9.1" authors = ["maciejhirsz "] license = "MIT/Apache-2.0" description = "Create ridiculously fast Lexers" diff --git a/logos-derive/src/regex.rs b/logos-derive/src/regex.rs index 9efa6323..b6a0c10a 100644 --- a/logos-derive/src/regex.rs +++ b/logos-derive/src/regex.rs @@ -459,8 +459,11 @@ impl Pattern { } } - // FIXME: this can be more robust pub fn intersect(&self, other: &Pattern) -> Option { + if self == other { + return None; + } + if self.contains(other) { Some(other.clone()) } else if other.contains(self) { @@ -471,20 +474,24 @@ impl Pattern { } pub fn contains(&self, other: &Pattern) -> bool { - use self::Pattern::*; - - if let Byte(x) = other { - match self { - Byte(_) => false, - Range(a, b) => { - *a <= *x && *x <= *b - }, - Class(class) => { - class.iter().any(|pat| pat.contains(other)) - }, - } - } else { - false + match other { + Pattern::Byte(x) => self.contains_range(*x, *x), + Pattern::Range(a, b) => self.contains_range(*a, *b), + Pattern::Class(class) => { + class.iter().all(|pat| self.contains(pat)) + }, + } + } + + fn contains_range(&self, xa: u8, xb: u8) -> bool { + match self { + Pattern::Byte(a) => *a == xa && *a == xb, + Pattern::Range(a, b) => { + (*a <= xa && xa <= *b) && (*a <= xb && xb <= *b) + }, + Pattern::Class(class) => { + class.iter().any(|pat| pat.contains_range(xa, xb)) + }, } } diff --git a/logos-derive/src/tree.rs b/logos-derive/src/tree.rs index 070293d0..213a4cf2 100644 --- a/logos-derive/src/tree.rs +++ b/logos-derive/src/tree.rs @@ -593,6 +593,8 @@ impl<'a> Node<'a> { } } + /// Checks if the fork contains one branch that is a generalization of all other branches, + /// and if so removes and returns that branch. pub fn fallback(&mut self) -> Option> { match self { Node::Fork(fork) => { @@ -603,23 +605,35 @@ impl<'a> Node<'a> { // This is a bit weird, but it basically checks if the fork // has one and only one branch that is heavy and if so, it // removes that branch and returns it. - // - // FIXME: This should check if all other branches in the tree - // are specializations of that regex let mut index = None; + let mut len = 1; for (idx, branch) in fork.arms.iter().enumerate() { - if branch.regex.first().weight() > 1 { - // Make sure we only get one - if index.is_some() { - return None; - } + let other_len = branch.regex.first().len(); + if other_len > len { index = Some(idx); + len = other_len; } } - index.map(|idx| fork.arms.remove(idx)) + let index = index?; + let selected = &fork.arms[index]; + + // FIXME: Specialization check should be deeper than first Pattern + let specialization = fork.arms.iter().enumerate().all(|(idx, branch)| { + if idx == index { + return true; + } + + selected.regex.first().contains(branch.regex.first()) + }); + + if specialization { + Some(fork.arms.remove(index)) + } else { + None + } } _ => None, } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 5b9fa143..ea91bd84 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -9,4 +9,4 @@ edition = "2018" [dependencies] logos = { path = "../logos", default-features = false, features = ["nul_term_source"] } logos-derive = { path = "../logos-derive" } -toolshed = "0.6" +toolshed = "0.8" diff --git a/tests/tests/advanced.rs b/tests/tests/advanced.rs index 0100b78b..b2cfb248 100644 --- a/tests/tests/advanced.rs +++ b/tests/tests/advanced.rs @@ -28,7 +28,7 @@ enum Token { #[regex="~[a-z][a-z]+"] LiteralUrbitAddress, - #[regex="~(m|h|s)[0-9]+"] + #[regex="~[mhs][0-9]+"] LiteralRelDate, #[regex = "🦀+"]