From 73885820b948dd624b01449b7e8a32f1eecd287c Mon Sep 17 00:00:00 2001 From: Sergey Igushkin Date: Sat, 2 Dec 2017 17:58:09 +0300 Subject: [PATCH] Add internally stored Regex to Token to preserve originally used Regex Make Parsed.remainder public Optimize TokenizerMatchesSequence to reduce number of objects. Push version to 0.3.2 --- build.gradle | 2 +- .../h0tk3y/betterParse/grammar/Grammar.kt | 12 ++++----- .../github/h0tk3y/betterParse/lexer/Token.kt | 25 ++++++++++++++----- .../h0tk3y/betterParse/lexer/Tokenizer.kt | 13 +++++----- .../h0tk3y/betterParse/parser/Parser.kt | 2 +- .../betterParse/utils/CachedSequence.kt | 6 ++--- 6 files changed, 37 insertions(+), 23 deletions(-) diff --git a/build.gradle b/build.gradle index 6b2cdc6..47341d1 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ group 'com.github.h0tk3y.betterParse' -version '0.3.1' +version '0.3.2' buildscript { ext.kotlin_version = '1.1.51' diff --git a/src/main/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt b/src/main/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt index 68f5330..ee03b2c 100644 --- a/src/main/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt +++ b/src/main/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt @@ -33,8 +33,8 @@ abstract class Grammar : Parser { open val declaredParsers get() = (_parsers + _tokens + rootParser).toSet() fun token(@Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(null, pattern, ignore) - fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toString(), ignore) - fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern.toString(), ignore) + fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toRegex(), ignore) + fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern, ignore) /** A [Lexer] that is built with the [Token]s defined within this [Grammar], in their order of declaration */ open val tokenizer: Tokenizer by lazy { DefaultTokenizer(tokens) } @@ -59,12 +59,12 @@ abstract class Grammar : Parser { } fun token(name: String, @Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(name, pattern, ignore) -fun token(name: String, pattern: Pattern, ignore: Boolean = false) = Token(name, pattern.toString(), ignore) -fun token(name: String, pattern: Regex, ignore: Boolean = false) = Token(name, pattern.toString(), ignore) +fun token(name: String, pattern: Pattern, ignore: Boolean = false) = Token(name, pattern.toRegex(), ignore) +fun token(name: String, pattern: Regex, ignore: Boolean = false) = Token(name, pattern, ignore) fun token(@Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(null, pattern, ignore) -fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toString(), ignore) -fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern.toString(), ignore) +fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toRegex(), ignore) +fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern, ignore) /** A convenience function to use for referencing a parser that is not initialized up to this moment. */ fun parser(block: () -> Parser): Parser = ParserReference(block) diff --git a/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt b/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt index baa6eeb..f63c679 100644 --- a/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt +++ b/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt @@ -10,15 +10,28 @@ import org.intellij.lang.annotations.RegExp * Parses to [TokenMatch]. * The [name] only provides additional information. */ -class Token( - name: String?, - @RegExp @Language("RegExp") val pattern: String, - val ignored: Boolean = false -) : Parser { +class Token : Parser { + val pattern: String + val regex: Regex? + val ignored: Boolean - var name: String? = name + var name: String? = null internal set + constructor(name: String?, @RegExp @Language("RegExp") patternString: String, ignored: Boolean = false) { + this.name = name + this.ignored = ignored + pattern = patternString + regex = null + } + + constructor(name: String?, regex: Regex, ignored: Boolean = false) { + this.name = name + this.ignored = ignored + pattern = regex.pattern + this.regex = regex + } + override fun toString() = (if (name != null) "$name ($pattern)" else pattern) + if (ignored) " [ignorable]" else "" diff --git a/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Tokenizer.kt b/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Tokenizer.kt index 6069eac..b567b6f 100644 --- a/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Tokenizer.kt +++ b/src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Tokenizer.kt @@ -1,15 +1,16 @@ package com.github.h0tk3y.betterParse.lexer import com.github.h0tk3y.betterParse.utils.CachedSequence -import com.github.h0tk3y.betterParse.utils.cached import java.io.InputStream import java.util.* import kotlin.coroutines.experimental.buildSequence internal class TokenizerMatchesSequence( - val tokens: CachedSequence, - val tokenizer: Tokenizer -) : Sequence by tokens + iterator: Iterator, + val tokenizer: Tokenizer, + cache: ArrayList = arrayListOf(), + startAt: Int = 0 +) : CachedSequence(iterator, cache, startAt) interface Tokenizer { val tokens: List @@ -34,7 +35,7 @@ class DefaultTokenizer(override val tokens: List) : Tokenizer { require(tokens.isNotEmpty()) { "The tokens list should not be empty" } } - val patterns = tokens.map { it to it.pattern.toPattern() } + val patterns = tokens.map { it to (it.regex?.toPattern() ?: it.pattern.toPattern()) } /** Tokenizes the [input] from a [String] into a [TokenizerMatchesSequence]. */ override fun tokenize(input: String) = tokenize(Scanner(input)) @@ -81,5 +82,5 @@ class DefaultTokenizer(override val tokens: List) : Tokenizer { yield(result) } - }.constrainOnce().cached().let { TokenizerMatchesSequence(it as CachedSequence, this) } + }.constrainOnce().iterator().let { TokenizerMatchesSequence(it, this) } } \ No newline at end of file diff --git a/src/main/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt b/src/main/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt index 2b880ab..605ed58 100644 --- a/src/main/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt +++ b/src/main/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt @@ -32,7 +32,7 @@ sealed class ParseResult /** Represents a successful parsing result of a [Parser] that produced [value] and left a * possibly empty input sequence [remainder] unprocessed.*/ -data class Parsed(val value: T, internal val remainder: Sequence) : ParseResult() { +data class Parsed(val value: T, val remainder: Sequence) : ParseResult() { override fun toString(): String = "Parsed($value)" } diff --git a/src/main/kotlin/com/github/h0tk3y/betterParse/utils/CachedSequence.kt b/src/main/kotlin/com/github/h0tk3y/betterParse/utils/CachedSequence.kt index ced33b2..ba6236c 100644 --- a/src/main/kotlin/com/github/h0tk3y/betterParse/utils/CachedSequence.kt +++ b/src/main/kotlin/com/github/h0tk3y/betterParse/utils/CachedSequence.kt @@ -1,10 +1,10 @@ package com.github.h0tk3y.betterParse.utils -import com.github.h0tk3y.betterParse.lexer.TokenizerMatchesSequence import com.github.h0tk3y.betterParse.lexer.TokenMatch +import com.github.h0tk3y.betterParse.lexer.TokenizerMatchesSequence import java.util.* -internal class CachedSequence constructor( +internal open class CachedSequence constructor( val source: Iterator, val cache: ArrayList, val startAt: Int @@ -28,7 +28,7 @@ internal class CachedSequence constructor( } internal fun Sequence.skipOne(): Sequence = when (this) { - is TokenizerMatchesSequence -> TokenizerMatchesSequence(tokens.skipOne() as CachedSequence, tokenizer) + is TokenizerMatchesSequence -> TokenizerMatchesSequence(source, tokenizer, cache, startAt + 1) is CachedSequence -> CachedSequence(source, cache, startAt + 1) else -> drop(1) }