From dd18c72715de6b1c04812a3171944189a0b28555 Mon Sep 17 00:00:00 2001 From: Robert Marek Date: Wed, 20 Dec 2023 21:55:44 +0100 Subject: [PATCH] Add proper support for escape sequences. Co-authored-by: kasiaMarek --- .../net/marek/tyre/pattern/StringParser.scala | 16 +++++++++++----- .../marek/tyre/pattern/StringParserTest.scala | 1 + todo.md | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/main/scala/net/marek/tyre/pattern/StringParser.scala b/src/main/scala/net/marek/tyre/pattern/StringParser.scala index 51931c1..e220392 100644 --- a/src/main/scala/net/marek/tyre/pattern/StringParser.scala +++ b/src/main/scala/net/marek/tyre/pattern/StringParser.scala @@ -41,16 +41,21 @@ private object TyreParser extends Parsers: case vSpace extends CharClass('v', List('\n', '\r', '\f', '\u000B', '\u0085', '\u2028', '\u2029')) case word extends CharClass('w', List('_', Range('a', 'z'), Range('A', 'Z'), Range('0', '9'))) case digit extends CharClass('d', List(Range('0', '9'))) - case tab extends CharClass('t', List('\t')) - case nl extends CharClass('\n', List('\n')) - case cr extends CharClass('\r', List('\r')) - case ff extends CharClass('\f', List('\u000C')) private object CharClass: val vals = values.map(p => p.input -> p.output).toMap val negs = values.map(p => p.input.toUpper -> p.output).toMap def hasVal(c: Char): Boolean = vals.keySet(c) def hasNeg(c: Char): Boolean = negs.keySet(c) + private enum EscapedSeq(val input: Char, val output: Char): + case tab extends EscapedSeq('t', '\t') + case nl extends EscapedSeq('n', '\n') + case cr extends EscapedSeq('r', '\r') + case ff extends EscapedSeq('f', '\u000C') + private object EscapedSeq: + val vals = values.map(p => p.input -> p.output).toMap + def hasVal(c: Char): Boolean = vals.keySet(c) + import net.marek.tyre.utils.NumberHelper.* private val unicodeSymbol: Parser[Char] = accept("unicode symbol", { case 'u' => 'u' }) @@ -64,7 +69,8 @@ private object TyreParser extends Parsers: private val hole = accept("hole", { case Hole(idx) => idx }) private val literal: Parser[Char] = accept("literal", { case el: Char if !Reserved.chars(el) => el }) | escape ~> accept("escaped literal", { case el: Char if Reserved.chars(el) => el }) | - escape ~> unicodeSymbol ~> repN(4, unicodeValue) ^^ { case seq => hex(seq: _*).toChar } + escape ~> unicodeSymbol ~> repN(4, unicodeValue) ^^ { case seq => hex(seq: _*).toChar } | + escape ~> accept("predef class", { case el: Char if EscapedSeq.hasVal(el) => EscapedSeq.vals(el) }) private val charClassIn = escape ~> accept("predef class", { case el: Char if CharClass.hasVal(el) => CharClass.vals(el) }) private val charClassNotIn = diff --git a/src/test/scala/net/marek/tyre/pattern/StringParserTest.scala b/src/test/scala/net/marek/tyre/pattern/StringParserTest.scala index ac0566e..18ee9fd 100644 --- a/src/test/scala/net/marek/tyre/pattern/StringParserTest.scala +++ b/src/test/scala/net/marek/tyre/pattern/StringParserTest.scala @@ -54,6 +54,7 @@ class StringParserTest extends AnyFunSuite: assertParses("x.\\.", ReAnd(char('x'), ReAnd(ReAny, char('.')))) assertParses("\u0078", char('x')) assertParses("[\u0141-\u0142\u017B]", ReIn(List(Range('Ł', 'ł'), Range('Ż')))) + assertParses("[\n-x]", ReIn(List(Range('\n', 'x')))) assertParses("(abc)!sx", ReAnd(ReCast(ReAnd(char('a'), ReAnd(char('b'), char('c'))), CastOp.Stringify), char('x'))) assertDoesNotParse("x)y") assertDoesNotParse("x|*") diff --git a/todo.md b/todo.md index 210dfb3..60a4417 100644 --- a/todo.md +++ b/todo.md @@ -17,7 +17,7 @@ TODO - [X] Check if TyRE matching is greedy - [X] Support for Unicode character values (\uhhhh) -- [ ] Allow escaped characters in ranges (eg. [\t-s]) and generally in brackets (eg. [^\s]) +- [X] Allow escaped characters in ranges (eg. [\t-s]) and generally in brackets (eg. [^\s]) - [ ] Support for singleton types - [ ] Helper functions for handling digits and numbers - [ ] Unicode mode