From 134e20af61b3dd5afef4eb61c193413926477960 Mon Sep 17 00:00:00 2001 From: Smuuf Date: Fri, 27 Mar 2020 22:40:40 +0100 Subject: [PATCH] Create "next stack" inside rule handlers only if necessary. + Some code reformatting. --- lib/hafriedlander/Peg/Compiler/PHPBuilder.php | 46 +++- lib/hafriedlander/Peg/Compiler/PHPWriter.php | 95 ++++---- lib/hafriedlander/Peg/Compiler/Rule.php | 214 ++++++++++-------- lib/hafriedlander/Peg/Compiler/Token.php | 2 +- .../Peg/Compiler/Token/Literal.php | 2 +- .../Peg/Compiler/Token/Recurse.php | 63 +++--- lib/hafriedlander/Peg/Parser/Basic.php | 179 +++++++++------ lib/hafriedlander/Peg/Parser/CachedRegexp.php | 26 +-- 8 files changed, 363 insertions(+), 264 deletions(-) diff --git a/lib/hafriedlander/Peg/Compiler/PHPBuilder.php b/lib/hafriedlander/Peg/Compiler/PHPBuilder.php index 743e195..0a9b792 100644 --- a/lib/hafriedlander/Peg/Compiler/PHPBuilder.php +++ b/lib/hafriedlander/Peg/Compiler/PHPBuilder.php @@ -4,6 +4,8 @@ class PHPBuilder { + public $needsStack = false; + static function build () { return new PHPBuilder(); } @@ -12,30 +14,50 @@ function __construct() { $this->lines = []; } - function l() { - foreach (\func_get_args() as $lines) { - if (!$lines) continue; + function l(...$args) { + + foreach ($args as $lines) { + + if (!$lines) { + continue; + } - if (is_string($lines)) $lines = preg_split('/\r\n|\r|\n/', $lines); - if (!$lines) continue; + if (is_string($lines)) { + $lines = preg_split('/\r\n|\r|\n/', $lines); + } - if ($lines instanceof PHPBuilder) $lines = $lines->lines; - else $lines = \array_map('ltrim', $lines); - if (!$lines) continue; + if ($lines instanceof PHPBuilder) { + if ($lines->needsStack) { + $this->needsStack = true; + } + $lines = $lines->lines; + } else { + $lines = \array_map('rtrim', $lines); + } + + if (!$lines) { + continue; + } $this->lines = \array_merge($this->lines, $lines); + } + return $this; } - function b() { - $args = \func_get_args(); + function b(...$args) { + $entry = \array_shift($args); $block = new PHPBuilder(); - call_user_func_array(array($block, 'l'), $args); + $block->l(...$args); + + if ($block->needsStack) { + $this->needsStack = true; + } - $this->lines[] = array($entry, $block->lines); + $this->lines[] = [$entry, $block->lines]; return $this; } diff --git a/lib/hafriedlander/Peg/Compiler/PHPWriter.php b/lib/hafriedlander/Peg/Compiler/PHPWriter.php index 56c0543..cf93c03 100644 --- a/lib/hafriedlander/Peg/Compiler/PHPWriter.php +++ b/lib/hafriedlander/Peg/Compiler/PHPWriter.php @@ -7,73 +7,76 @@ */ class PHPWriter { - static $varid = 0 ; + public static $varid = 0; - function varid() { - return '_' . (self::$varid++) ; + public function varid() { + return '_' . (self::$varid++); } - function function_name( $str ) { - $str = \preg_replace( '/-/', '_', $str ) ; - $str = \preg_replace( '/\$/', 'DLR', $str ) ; - $str = \preg_replace( '/\*/', 'STR', $str ) ; - $str = \preg_replace( '/[^\w]+/', '', $str ) ; - return $str ; + public function function_name($str) { + $str = \preg_replace('/-/', '_', $str); + $str = \preg_replace('/\$/', 'DLR', $str); + $str = \preg_replace('/\*/', 'STR', $str); + $str = \preg_replace('/[^\w]+/', '', $str); + return $str; } - function save($id) { + public function save($id) { return PHPBuilder::build() ->l( - '$res'.$id.' = $result;', - '$pos'.$id.' = $this->pos;' - ); + '$res' . $id . ' = $result;', + '$pos' . $id . ' = $this->pos;' + ); } - function restore( $id, $remove = \false ) { + public function restore($id, $remove = \false) { $code = PHPBuilder::build() ->l( - '$result = $res'.$id.';', - '$this->pos = $pos'.$id.';' - ); + '$result = $res' . $id . ';', + '$this->pos = $pos' . $id . ';' + ); - if ( $remove ) $code->l( - 'unset( $res'.$id.' );', - 'unset( $pos'.$id.' );' - ); + if ($remove) { + $code->l( + 'unset($res' . $id . ', $pos' . $id . ');' + ); + } - return $code ; + return $code; } - function match_fail_conditional( $on, $match = \null, $fail = \null ) { + public function match_fail_conditional($on, $match = \null, $fail = \null) { return PHPBuilder::build() - ->b( 'if (' . $on . ')', - $match, - 'MATCH' - ) - ->b( 'else', - $fail, - 'FAIL' - ); + ->b( + 'if (' . $on . ')', + $match, + 'MATCH' + ) + ->b( + 'else', + $fail, + 'FAIL' + ); } - function match_fail_block( $code ) { - $id = $this->varid() ; + public function match_fail_block($code) { + $id = $this->varid(); return PHPBuilder::build() ->l( - '$'.$id.' = \null;' - ) - ->b( 'do', - $code->replace([ - 'MBREAK' => '$'.$id.' = \true; break;', - 'FBREAK' => '$'.$id.' = \false; break;' - ]) - ) + '$' . $id . ' = \null;' + ) + ->b( + 'do', + $code->replace([ + 'MBREAK' => '$' . $id . ' = \true; break;', + 'FBREAK' => '$' . $id . ' = \false; break;' + ]) + ) ->l( - 'while(0);' - ) - ->b( 'if( $'.$id.' === \true )', 'MATCH' ) - ->b( 'if( $'.$id.' === \false)', 'FAIL' ) - ; + 'while(false);' + ) + ->b('if( $' . $id . ' === \true )', 'MATCH') + ->b('if( $' . $id . ' === \false)', 'FAIL'); } } diff --git a/lib/hafriedlander/Peg/Compiler/Rule.php b/lib/hafriedlander/Peg/Compiler/Rule.php index 00edeb9..8e26e36 100644 --- a/lib/hafriedlander/Peg/Compiler/Rule.php +++ b/lib/hafriedlander/Peg/Compiler/Rule.php @@ -11,7 +11,7 @@ */ class Rule extends PHPWriter { - static $rule_rx = '@ + public static $rule_rx = '@ (? [\w-]+) # The name of the rule ( \s+ extends \s+ (?[\w-]+) )? # The extends word ( \s* \( (?.*) \) )? # Any variable setters @@ -23,21 +23,21 @@ class Rule extends PHPWriter { (?[\s\S]*) @x'; - static $argument_rx = '@ + public static $argument_rx = '@ ( [^=]+ ) # Name = # Seperator ( [^=,]+ ) # Variable (,|$) @x'; - static $replacement_rx = '@ + public static $replacement_rx = '@ ( ([^=]|=[^>])+ ) # What to replace => # The replacement mark ( [^,]+ ) # What to replace it with (,|$) @x'; - static $function_rx = '@^\s+function\s+([^\s(]+)\s*(.*)@' ; + public static $function_rx = '@^\s+function\s+([^\s(]+)\s*(.*)@' ; protected $parser; protected $lines; @@ -47,13 +47,16 @@ class Rule extends PHPWriter { public $mode; public $rule; - function __construct($parser, $lines) { + public function __construct($parser, $lines) { $this->parser = $parser; $this->lines = $lines; // Find the first line (if any) that's an attached function definition. Can skip first line (unless this block is malformed) - for ($i = 1; $i < \count($lines); $i++) { - if (\preg_match(self::$function_rx, $lines[$i])) break; + $lineCount = \count($lines); + for ($i = 1; $i < $lineCount; $i++) { + if (\preg_match(self::$function_rx, $lines[$i])) { + break; + } } // Then split into the two parts @@ -62,13 +65,17 @@ function __construct($parser, $lines) { // Parse out the spec $spec = \implode("\n", $spec); - if (!\preg_match(self::$rule_rx, $spec, $specmatch)) \user_error('Malformed rule spec ' . $spec, E_USER_ERROR); + if (!\preg_match(self::$rule_rx, $spec, $specmatch)) { + \user_error('Malformed rule spec ' . $spec, E_USER_ERROR); + } $this->name = $specmatch['name']; if ($specmatch['extends']) { $this->extends = $this->parser->rules[$specmatch['extends']]; - if (!$this->extends) \user_error('Extended rule '.$specmatch['extends'].' is not defined before being extended', E_USER_ERROR); + if (!$this->extends) { + \user_error('Extended rule ' . $specmatch['extends'] . ' is not defined before being extended', E_USER_ERROR); + } } $this->arguments = []; @@ -76,7 +83,7 @@ function __construct($parser, $lines) { if ($specmatch['arguments']) { \preg_match_all(self::$argument_rx, $specmatch['arguments'], $arguments, \PREG_SET_ORDER); - foreach ($arguments as $argument){ + foreach ($arguments as $argument) { $this->arguments[\trim($argument[1])] = \trim($argument[2]); } } @@ -86,9 +93,10 @@ function __construct($parser, $lines) { if ($this->mode == 'rule') { $this->rule = $specmatch['rule']; $this->parse_rule() ; - } - else { - if (!$this->extends) user_error('Replace matcher, but not on an extends rule', E_USER_ERROR); + } else { + if (!$this->extends) { + user_error('Replace matcher, but not on an extends rule', E_USER_ERROR); + } $this->replacements = []; \preg_match_all(self::$replacement_rx, $specmatch['rule'], $replacements, \PREG_SET_ORDER); @@ -97,9 +105,12 @@ function __construct($parser, $lines) { foreach ($replacements as $replacement) { $search = \trim($replacement[1]); - $replace = \trim($replacement[3]); if ($replace == "''" || $replace == '""') $replace = ""; + $replace = \trim($replacement[3]); + if ($replace == "''" || $replace == '""') { + $replace = ''; + } - $rule = \str_replace($search, ' '.$replace.' ', $rule); + $rule = \str_replace($search, ' ' . $replace . ' ', $rule); } $this->rule = $rule; @@ -112,88 +123,88 @@ function __construct($parser, $lines) { $active_function = \null ; - foreach( $funcs as $line ) { + foreach ($funcs as $line) { /* Handle function definitions */ - if ( \preg_match( self::$function_rx, $line, $func_match, 0 ) ) { + if (\preg_match(self::$function_rx, $line, $func_match, 0)) { $active_function = $func_match[1]; $this->functions[$active_function] = $func_match[2] . \PHP_EOL; + } else { + $this->functions[$active_function] .= $line . \PHP_EOL ; } - else $this->functions[$active_function] .= $line . \PHP_EOL ; } } /* Manual parsing, because we can't bootstrap ourselves yet */ - function parse_rule() { - $rule = \trim( $this->rule ) ; + public function parse_rule() { + $rule = \trim($this->rule) ; $tokens = [] ; - $this->tokenize( $rule, $tokens ) ; - $this->parsed = ( \count( $tokens ) == 1 ? \array_pop( $tokens ) : new Token\Sequence( $tokens ) ) ; + $this->tokenize($rule, $tokens) ; + $this->parsed = (\count($tokens) == 1 ? \array_pop($tokens) : new Token\Sequence($tokens)) ; } - static $rx_rx = '@\G/( + public static $rx_rx = '@\G/( ((\\\\\\\\)*\\\\/) # Escaped \/, making sure to catch all the \\ first, so that we dont think \\/ is an escaped / | [^/] # Anything except / )*/[a-zA-Z]*@xu' ; - function tokenize( $str, &$tokens, $o = 0 ) { + public function tokenize($str, &$tokens, $o = 0) { $length = \strlen($str); $pending = new Rule\PendingState() ; - while ( $o < $length ) { - + while ($o < $length) { /* Absorb white-space */ - if ( \preg_match( '/\G\s+/', $str, $match, 0, $o ) ) { - $o += \strlen( $match[0] ) ; + if (\preg_match('/\G\s+/', $str, $match, 0, $o)) { + $o += \strlen($match[0]) ; } /* Handle expression labels */ - elseif ( \preg_match( '/\G(\w*):/', $str, $match, 0, $o ) ) { - $pending->set( 'tag', isset( $match[1] ) ? $match[1] : '' ) ; - $o += \strlen( $match[0] ) ; + elseif (\preg_match('/\G(\w*):/', $str, $match, 0, $o)) { + $pending->set('tag', isset($match[1]) ? $match[1] : '') ; + $o += \strlen($match[0]) ; } /* Handle descent token */ - elseif ( \preg_match( '/\G[\w-]+/', $str, $match, 0, $o ) ) { - $tokens[] = $t = new Token\Recurse( $match[0] ) ; - $pending->apply_if_present( $t ) ; - $o += \strlen( $match[0] ) ; + elseif (\preg_match('/\G[\w-]+/', $str, $match, 0, $o)) { + $tokens[] = $t = new Token\Recurse($match[0]) ; + $pending->apply_if_present($t) ; + $o += \strlen($match[0]) ; } /* Handle " quoted literals */ - elseif ( \preg_match( '/\G"[^"]*"/', $str, $match, 0, $o ) ) { - $tokens[] = $t = new Token\Literal( $match[0] ) ; - $pending->apply_if_present( $t ) ; - $o += \strlen( $match[0] ) ; + elseif (\preg_match('/\G"[^"]*"/', $str, $match, 0, $o)) { + $tokens[] = $t = new Token\Literal($match[0]) ; + $pending->apply_if_present($t) ; + $o += \strlen($match[0]) ; } /* Handle ' quoted literals */ - elseif ( \preg_match( "/\G'[^']*'/", $str, $match, 0, $o ) ) { - $tokens[] = $t = new Token\Literal( $match[0] ) ; - $pending->apply_if_present( $t ) ; - $o += \strlen( $match[0] ) ; + elseif (\preg_match("/\G'[^']*'/", $str, $match, 0, $o)) { + $tokens[] = $t = new Token\Literal($match[0]) ; + $pending->apply_if_present($t) ; + $o += \strlen($match[0]) ; } /* Handle regexs */ - elseif ( \preg_match( self::$rx_rx, $str, $match, 0, $o ) ) { - $tokens[] = $t = new Token\Regex( $match[0] ) ; - $pending->apply_if_present( $t ) ; - $o += \strlen( $match[0] ) ; + elseif (\preg_match(self::$rx_rx, $str, $match, 0, $o)) { + $tokens[] = $t = new Token\Regex($match[0]) ; + $pending->apply_if_present($t) ; + $o += \strlen($match[0]) ; } /* Handle $ call literals */ - elseif ( \preg_match( '/\G\$(\w+)/', $str, $match, 0, $o ) ) { - $tokens[] = $t = new Token\ExpressionedRecurse( $match[1] ) ; - $pending->apply_if_present( $t ) ; - $o += \strlen( $match[0] ) ; + elseif (\preg_match('/\G\$(\w+)/', $str, $match, 0, $o)) { + $tokens[] = $t = new Token\ExpressionedRecurse($match[1]) ; + $pending->apply_if_present($t) ; + $o += \strlen($match[0]) ; } /* Handle flags */ - elseif ( \preg_match( '/\G\@(\w+)/', $str, $match, 0, $o ) ) { - $l = \count( $tokens ) - 1 ; - $o += \strlen( $match[0] ) ; - \user_error( "TODO: Flags not currently supported", E_USER_WARNING ) ; + elseif (\preg_match('/\G\@(\w+)/', $str, $match, 0, $o)) { + $l = \count($tokens) - 1 ; + $o += \strlen($match[0]) ; + \user_error('TODO: Flags not currently supported', E_USER_WARNING) ; } /* Handle control tokens */ else { - $c = \substr( $str, $o, 1 ) ; - $l = \count( $tokens ) - 1 ; + $c = \substr($str, $o, 1) ; + $l = \count($tokens) - 1 ; $o += 1 ; - switch( $c ) { + switch ($c) { case '?': $tokens[$l]->quantifier = ['min' => 0, 'max' => 1]; break ; @@ -206,42 +217,42 @@ function tokenize( $str, &$tokens, $o = 0 ) { case '{': if (\preg_match('/\G\{([0-9]+)(,([0-9]*))?\}/', $str, $matches, 0, $o - 1)) { $min = $max = (int) $matches[1]; - if(isset($matches[2])) { + if (isset($matches[2])) { $max = $matches[3] ? (int) $matches[3] : \null; } $tokens[$l]->quantifier = ['min' => $min, 'max' => $max]; $o += \strlen($matches[0]) - 1; } else { throw new \Exception(sprintf( - "Unknown quantifier: %s", + 'Unknown quantifier: %s', substr($str, $o, 10) )); } break; case '&': - $pending->set( 'positive_lookahead' ) ; + $pending->set('positive_lookahead') ; break ; case '!': - $pending->set( 'negative_lookahead' ) ; + $pending->set('negative_lookahead') ; break ; case '.': - $pending->set( 'silent' ); + $pending->set('silent'); break; case '[': case ']': - $tokens[] = new Token\Whitespace( \false ) ; + $tokens[] = new Token\Whitespace(\false) ; break ; case '<': case '>': - $tokens[] = new Token\Whitespace( \true ) ; + $tokens[] = new Token\Whitespace(\true) ; break ; case '(': $subtokens = [] ; - $o = $this->tokenize( $str, $subtokens, $o ) ; - $tokens[] = $t = new Token\Sequence( $subtokens ) ; $pending->apply_if_present( $t ) ; + $o = $this->tokenize($str, $subtokens, $o) ; + $tokens[] = $t = new Token\Sequence($subtokens) ; $pending->apply_if_present($t) ; break ; case ')': return $o ; @@ -249,18 +260,18 @@ function tokenize( $str, &$tokens, $o = 0 ) { case '|': $option1 = $tokens ; $option2 = [] ; - $o = $this->tokenize( $str, $option2, $o ) ; + $o = $this->tokenize($str, $option2, $o) ; - $option1 = (\count($option1) == 1) ? $option1[0] : new Token\Sequence( $option1 ); - $option2 = (\count($option2) == 1) ? $option2[0] : new Token\Sequence( $option2 ); + $option1 = (\count($option1) == 1) ? $option1[0] : new Token\Sequence($option1); + $option2 = (\count($option2) == 1) ? $option2[0] : new Token\Sequence($option2); - $pending->apply_if_present( $option2 ) ; + $pending->apply_if_present($option2) ; - $tokens = [new Token\Option( $option1, $option2 )] ; + $tokens = [new Token\Option($option1, $option2)] ; return $o ; default: - \user_error( "Can't parse '$c' - attempting to skip", E_USER_WARNING ) ; + \user_error("Can't parse '$c' - attempting to skip", E_USER_WARNING) ; } } } @@ -271,49 +282,62 @@ function tokenize( $str, &$tokens, $o = 0 ) { /** * Generate the PHP code for a function to match against a string for this rule */ - function compile($indent) { - $function_name = $this->function_name( $this->name ) ; + public function compile($indent) { + $function_name = $this->function_name($this->name) ; // Build the typestack - $typestack = []; $class=$this; + $typestack = []; + $class = $this; do { $typestack[] = $this->function_name($class->name); - } - while($class = $class->extends); + } while ($class = $class->extends); $typestack = "['" . \implode("','", $typestack) . "']"; // Build an array of additional arguments to add to result node (if any) if (empty($this->arguments)) { - $arguments = '\null'; - } - else { - $arguments = "["; - foreach ($this->arguments as $k=>$v) { $arguments .= "'$k' => '$v', "; } - $arguments .= "]"; + $arguments = false; + } else { + $arguments = '['; + foreach ($this->arguments as $k => $v) { + $arguments .= "'$k' => '$v', "; + } + $arguments .= ']'; } $match = PHPBuilder::build() ; $match->l("protected \$match_{$function_name}_typestack = $typestack;"); - $match->b( "function match_{$function_name} (\$stack = [])", - '$matchrule = "'.$function_name.'"; $result = $this->construct($matchrule, $matchrule, '.$arguments.'); $newStack = \array_merge($stack, [$result]);', - $this->parsed->compile()->replace(array( - 'MATCH' => 'return $this->finalise($result);', - 'FAIL' => 'return \false;' - )) + $block = $this->parsed->compile()->replace([ + 'MATCH' => 'return $this->finalise($result);', + 'FAIL' => 'return \false;' + ]); + + // This is only needed if '$newStack' variable is actually used. + $newStack = $block->needsStack + ? '$newStack = \array_merge($stack, [$result]);' + : ''; + + $arguments = $arguments ? ", {$arguments}" : ''; + $match->b( + "function match_{$function_name} (\$stack = [])", + '$matchrule = "' . $function_name . '"; $result = $this->construct($matchrule, $matchrule' . $arguments . '); ' . $newStack , + $block ); $functions = [] ; - foreach( $this->functions as $name => $function ) { - $function_name = $this->function_name( \preg_match( '/^_/', $name ) ? $this->name.$name : $this->name.'_'.$name ) ; - $functions[] = \implode( \PHP_EOL, array( + foreach ($this->functions as $name => $function) { + $function_name = $this->function_name(\preg_match('/^_/', $name) ? $this->name . $name : $this->name . '_' . $name) ; + $functions[] = \implode(\PHP_EOL, [ 'public function ' . $function_name . ' ' . $function - )); + ]); } // print_r( $match ) ; return '' ; - return $match->render(\null, $indent) . \PHP_EOL . \PHP_EOL . \implode( \PHP_EOL, $functions ) ; + return $match->render(\null, $indent) + . \PHP_EOL + . \PHP_EOL + . \implode(\PHP_EOL, $functions) ; } } diff --git a/lib/hafriedlander/Peg/Compiler/Token.php b/lib/hafriedlander/Peg/Compiler/Token.php index f0d7202..e4189cf 100644 --- a/lib/hafriedlander/Peg/Compiler/Token.php +++ b/lib/hafriedlander/Peg/Compiler/Token.php @@ -153,7 +153,7 @@ protected function n_or_more($code, $id, $n) protected function n_to_x($code, $id, $min, $max) { if(1 === $min && 1 === $max) return $code; - + $counterName = '$count' . $id; return PHPBuilder::build()->l( $counterName . ' = 0;' diff --git a/lib/hafriedlander/Peg/Compiler/Token/Literal.php b/lib/hafriedlander/Peg/Compiler/Token/Literal.php index d5811b5..cb897af 100644 --- a/lib/hafriedlander/Peg/Compiler/Token/Literal.php +++ b/lib/hafriedlander/Peg/Compiler/Token/Literal.php @@ -12,7 +12,7 @@ function __construct( $value ) { function match_code( $value ) { // We inline single-character matches for speed if ( !$this->contains_expression($value) && \strlen( eval( 'return '. $value . ';' ) ) === 1 ) { - return $this->match_fail_conditional( '\substr($this->string,$this->pos,1) === '.$value, + return $this->match_fail_conditional( '\substr($this->string, $this->pos, 1) === '.$value, PHPBuilder::build()->l( '$this->pos += 1;', $this->set_text($value) diff --git a/lib/hafriedlander/Peg/Compiler/Token/Recurse.php b/lib/hafriedlander/Peg/Compiler/Token/Recurse.php index e65aaf5..d55c908 100644 --- a/lib/hafriedlander/Peg/Compiler/Token/Recurse.php +++ b/lib/hafriedlander/Peg/Compiler/Token/Recurse.php @@ -6,58 +6,67 @@ use hafriedlander\Peg\Compiler\PHPBuilder; class Recurse extends Token { - function __construct( $value ) { - parent::__construct( 'recurse', $value ) ; + + public function __construct($value) { + parent::__construct('recurse', $value) ; } - function match_function( $value ) { - return "'".$this->function_name($value)."'"; + public function match_function($value) { + return $this->function_name($value); } - function match_code( $value ) { + public function match_code($value) { $function = $this->match_function($value) ; - $storetag = $this->function_name( $this->tag ? $this->tag : $this->match_function($value) ) ; + $storetag = $this->function_name($this->tag ? $this->tag : $this->match_function($value)) ; - if ( \hafriedlander\Peg\Compiler::$debug ) { + if (\hafriedlander\Peg\Compiler::$debug) { $debug_header = PHPBuilder::build() ->l( - '$indent = str_repeat("\e[90m| \e[0m", $this->depth / 2);', - '$this->depth += 2;', - '$sub = (strlen( $this->string ) - $this->pos > 40) ? substr($this->string, $this->pos, 40) . "...") : substr($this->string, $this->pos);', - '$sub = preg_replace(\'/(\r|\n)+/\', " {NL} ", $sub);', - sprintf('print $indent . "Matching \e[32m%s\e[0m \"\e[36m".$sub."\e[0m\" \n";', trim($function, "'")) - ); + '$indent = str_repeat("\e[90m| \e[0m", $this->depth / 2);', + '$this->depth += 2;', + '$sub = (strlen( $this->string ) - $this->pos > 40) ? substr($this->string, $this->pos, 40) . "...") : substr($this->string, $this->pos);', + '$sub = preg_replace(\'/(\r|\n)+/\', " {NL} ", $sub);', + sprintf('print $indent . "Matching \e[32m%s\e[0m \"\e[36m".$sub."\e[0m\" \n";', $function) + ); $debug_match = PHPBuilder::build() ->l( - 'print $indent . "\e[1m\e[42mOK\n\e[0m";', - '$this->depth -= 2;' - ); + 'print $indent . "\e[1m\e[42mOK\n\e[0m";', + '$this->depth -= 2;' + ); $debug_fail = PHPBuilder::build() ->l( - 'print $indent . "-\n";', - '$this->depth -= 2;' - ); - } - else { + 'print $indent . "-\n";', + '$this->depth -= 2;' + ); + } else { $debug_header = $debug_match = $debug_fail = \null ; } - return PHPBuilder::build()->l( - '$matcher = \'match_\'.'.$function.'; $key = $matcher; $pos = $this->pos;', + $builder = PHPBuilder::build()->l( + '$key = \'match_' . $function . '\'; $pos = $this->pos;', $debug_header, - '$subres = $this->packhas($key, $pos) ? $this->packread($key, $pos) : $this->packwrite($key, $pos, $this->$matcher($newStack));', - $this->match_fail_conditional( '$subres !== \false', + '$subres = $this->packhas($key, $pos)' . "\n\t" + . '? $this->packread($key, $pos)' . "\n\t" + . ': $this->packwrite($key, $pos, $this->match_' . $function . '($newStack));', + $this->match_fail_conditional( + '$subres !== \false', PHPBuilder::build()->l( $debug_match, $this->tag === \false ? '$this->store($result, $subres);' : - '$this->store($result, $subres, "'.$storetag.'");' + '$this->store($result, $subres, "' . $storetag . '");' ), PHPBuilder::build()->l( $debug_fail ) - )); + ) + ); + + $builder->needsStack = true; + return $builder; + } + } diff --git a/lib/hafriedlander/Peg/Parser/Basic.php b/lib/hafriedlander/Peg/Parser/Basic.php index c6b9608..7b85703 100644 --- a/lib/hafriedlander/Peg/Parser/Basic.php +++ b/lib/hafriedlander/Peg/Parser/Basic.php @@ -10,133 +10,178 @@ * for result construction and building */ class Basic { - function __construct( $string ) { - $this->string = $string ; - $this->pos = 0 ; - $this->depth = 0 ; + private $isCallableCache = []; - $this->regexps = [] ; + public function __construct($string) { + $this->string = $string; + $this->pos = 0; + $this->depth = 0; + $this->regexps = []; } - function whitespace() { - $matched = \preg_match( '/[ \t]+/', $this->string, $matches, \PREG_OFFSET_CAPTURE, $this->pos ) ; - if ( $matched && $matches[0][1] == $this->pos ) { - $this->pos += \strlen( $matches[0][0] ); - return ' ' ; + protected function isCallable($name) { + return $this->isCallableCache[$name] + ?? ($this->isCallableCache[$name] = \is_callable($this, $name)); + } + + public function whitespace() { + + $matched = \preg_match( + '/[ \t]+/', + $this->string, + $matches, + \PREG_OFFSET_CAPTURE, + $this->pos + ); + + if ($matched && $matches[0][1] === $this->pos) { + $this->pos += \strlen($matches[0][0]); + return ' '; } - return \false ; + + return \false; + } - function literal( $token ) { + public function literal($token) { /* Debugging: * / print( "Looking for token '$token' @ '" . substr( $this->string, $this->pos ) . "'\n" ) ; /* */ - $toklen = \strlen( $token ) ; - $substr = \substr( $this->string, $this->pos, $toklen ) ; - if ( $substr == $token ) { - $this->pos += $toklen ; - return $token ; + $toklen = \strlen($token); + $substr = \substr($this->string, $this->pos, $toklen); + if ($substr === $token) { + $this->pos += $toklen; + return $token; } - return \false ; + + return \false; } - function rx( $rx ) { - if ( !isset( $this->regexps[$rx] ) ) $this->regexps[$rx] = new CachedRegexp( $this, $rx ) ; - return $this->regexps[$rx]->match() ; + public function rx($rx) { + + if (!isset($this->regexps[$rx])) { + $this->regexps[$rx] = new CachedRegexp($this, $rx); + } + + return $this->regexps[$rx]->match(); } - function expression( $result, $stack, $value ) { - $stack[] = $result; $rv = \false; + public function expression($result, $stack, $value) { + $stack[] = $result; + $rv = \false; /* Search backwards through the sub-expression stacks */ - for ( $i = \count($stack) - 1 ; $i >= 0 ; $i-- ) { + for ($i = \count($stack) - 1; $i >= 0; $i--) { $node = $stack[$i]; - if ( isset($node[$value]) ) { $rv = $node[$value]; break; } + if (isset($node[$value])) { + $rv = $node[$value]; + break; + } foreach ($this->typestack($node['_matchrule']) as $type) { - $callback = array($this, "{$type}_DLR{$value}"); - if ( is_callable( $callback ) ) { $rv = \call_user_func( $callback ) ; if ($rv !== \false) break; } + $callback = [$this, "{$type}_DLR{$value}"]; + if (is_callable($callback)) { + $rv = \call_user_func($callback); + if ($rv !== \false) { + break; + } + } } } - if ($rv === \false) $rv = @$this->$value; - if ($rv === \false) $rv = @$this->$value(); + if ($rv === \false) { + $rv = @$this->$value; + } + + if ($rv === \false) { + $rv = @$this->$value(); + } return \is_array($rv) ? $rv['text'] : ($rv ? $rv : ''); } - function packhas( $key, $pos ) { - return \false ; + public function packhas($key, $pos) { + return \false; } - function packread( $key, $pos ) { - throw new \Exception('PackRead after PackHas=>\false in Parser.php') ; + public function packread($key, $pos) { + throw new \Exception('PackRead after PackHas=>\false in Parser.php'); } - function packwrite( $key, $pos, $res ) { - return $res ; + public function packwrite($key, $pos, $res) { + return $res; } - function typestack( $name ) { - $prop = "match_{$name}_typestack"; - return $this->$prop; + public function typestack($name) { + return $this->{"match_{$name}_typestack"}; } - function construct( $matchrule, $name, $arguments = \null ) { + public function construct($matchrule, $name, $arguments = []) { - $result = array( '_matchrule' => $matchrule, 'name' => $name, 'text' => '' ); - $result['offset'] = $this->pos; + $result = [ + '_matchrule' => $matchrule, + 'name' => $name, + 'text' => '', + 'offset' => $this->pos, + ]; - if ($arguments) $result = array_merge($result, $arguments) ; + if ($arguments) { + $result = array_merge($result, $arguments); + } foreach ($this->typestack($matchrule) as $type) { - $callback = array( $this, "{$type}__construct" ) ; - if ( \is_callable( $callback ) ) { - \call_user_func_array( $callback, array( &$result ) ) ; + if ($method = $this->isCallable("{$type}__construct")) { + $this->{$method}(...[&$result]); break; } } - return $result ; - } + return $result; - function finalise( &$result ) { + } + public function finalise(&$result) { foreach ($this->typestack($result['_matchrule']) as $type) { - $callback = array( $this, "{$type}__finalise" ) ; - if ( \is_callable( $callback ) ) { - \call_user_func_array( $callback, array( &$result ) ) ; + if ($method = $this->isCallable("{$type}__finalise")) { + $this->{$method}(...[&$result]); break; } } - return $result ; + return $result; } - function store ( &$result, $subres, $storetag = \null ) { - + public function store(&$result, $subres, $storetag = \null) { $result['text'] .= $subres['text']; $storecalled = \false; foreach ($this->typestack($result['_matchrule']) as $type) { - $callback = array( $this, $storetag ? "{$type}_{$storetag}" : "{$type}_{$subres['name']}" ) ; - if ( \is_callable( $callback ) ) { - \call_user_func_array( $callback, array( &$result, $subres ) ) ; - $storecalled = \true; break; + if ($method = $this->isCallable($storetag ? "{$type}_{$storetag}" : "{$type}_{$subres['name']}")) { + $this->{$method}(...[&$result, $subres]); + $storecalled = \true; + break; } - $globalcb = array( $this, "{$type}_STR" ) ; - if ( \is_callable( $globalcb ) ) { - \call_user_func_array( $globalcb, array( &$result, $subres ) ) ; - $storecalled = \true; break; + if ($method = $this->isCallable("{$type}_STR")) { + $this->{$method}(...[&$result, $subres]); + $storecalled = \true; + break; } } - if ( $storetag && !$storecalled ) { - if ( !isset( $result[$storetag] ) ) $result[$storetag] = $subres ; - else { - if ( isset( $result[$storetag]['text'] ) ) $result[$storetag] = array( $result[$storetag] ) ; - $result[$storetag][] = $subres ; + + if ($storetag && !$storecalled) { + if (!isset($result[$storetag])) { + $result[$storetag] = $subres; + } else { + if (isset($result[$storetag]['text'])) { + $result[$storetag] = [$result[$storetag]]; + } + + $result[$storetag][] = $subres; + } } + } + } diff --git a/lib/hafriedlander/Peg/Parser/CachedRegexp.php b/lib/hafriedlander/Peg/Parser/CachedRegexp.php index f63e871..8561c58 100644 --- a/lib/hafriedlander/Peg/Parser/CachedRegexp.php +++ b/lib/hafriedlander/Peg/Parser/CachedRegexp.php @@ -12,20 +12,16 @@ */ class CachedRegexp { - const DEFAULT_MODIFIERS = [ - "S", // Extra analysis is performed. - "x", // Ignore extra whitespace. - ]; + // S: Extra analysis is performed. + // x: Ignore extra whitespace. + const DEFAULT_MODIFIERS = 'Sx'; - public $modifiers = []; - - function __construct($parser, $rx) { + public function __construct($parser, $rx) { $this->parser = $parser; - - $modifiers = \str_split(\substr($rx, \strrpos($rx, '/') + 1)); - $this->modifiers = \array_unique(\array_merge(self::DEFAULT_MODIFIERS, $modifiers)); - $this->rx = $rx . \implode('', $this->modifiers); + // Modifiers can be specified multiple times, so no need to check for + // uniqueness. + $this->rx = $rx . self::DEFAULT_MODIFIERS; $this->matches = \null; $this->match_pos = \null; // \null is no-match-to-end-of-string, unless check_pos also == \null, in which case means undefined. @@ -33,12 +29,13 @@ function __construct($parser, $rx) { } - function match() { + public function match() { $current_pos = $this->parser->pos; - $dirty = $this->check_pos === \null || $this->check_pos > $current_pos || ($this->match_pos !== \null && $this->match_pos < $current_pos); + $dirty = $this->check_pos === \null + || $this->check_pos > $current_pos + || ($this->match_pos !== \null && $this->match_pos < $current_pos); if ($dirty) { - $this->check_pos = $current_pos; $matched = \preg_match($this->rx, $this->parser->string, $this->matches, \PREG_OFFSET_CAPTURE, $this->check_pos); @@ -47,7 +44,6 @@ function match() { } else { $this->match_pos = \null; } - } if ($this->match_pos === $current_pos) {