From 235b150e0921292b4b98df3059bb5ffc3220631c Mon Sep 17 00:00:00 2001 From: Bogdan Condorachi Date: Wed, 15 May 2024 22:38:27 +0300 Subject: [PATCH 1/5] Add python support --- src/Highlighter.php | 2 + .../Python/Patterns/PyArgumentPattern.php | 24 ++++++ .../Python/Patterns/PyBooleanPattern.php | 24 ++++++ .../Python/Patterns/PyBuiltinPattern.php | 38 ++++++++++ .../Python/Patterns/PyClassNamePattern.php | 27 +++++++ .../Python/Patterns/PyCommentPattern.php | 24 ++++++ .../Python/Patterns/PyDecoratorPattern.php | 27 +++++++ .../Python/Patterns/PyFunctionPattern.php | 26 +++++++ .../Python/Patterns/PyKeywordPattern.php | 33 +++++++++ .../Python/Patterns/PyOperatorPattern.php | 24 ++++++ .../PyTripleDoubleQuoteStringPattern.php | 24 ++++++ .../PyTripleSingleQuoteStringPattern.php | 24 ++++++ src/Languages/Python/PythonLanguage.php | 74 +++++++++++++++++++ 13 files changed, 371 insertions(+) create mode 100644 src/Languages/Python/Patterns/PyArgumentPattern.php create mode 100644 src/Languages/Python/Patterns/PyBooleanPattern.php create mode 100644 src/Languages/Python/Patterns/PyBuiltinPattern.php create mode 100644 src/Languages/Python/Patterns/PyClassNamePattern.php create mode 100644 src/Languages/Python/Patterns/PyCommentPattern.php create mode 100644 src/Languages/Python/Patterns/PyDecoratorPattern.php create mode 100644 src/Languages/Python/Patterns/PyFunctionPattern.php create mode 100644 src/Languages/Python/Patterns/PyKeywordPattern.php create mode 100644 src/Languages/Python/Patterns/PyOperatorPattern.php create mode 100644 src/Languages/Python/Patterns/PyTripleDoubleQuoteStringPattern.php create mode 100644 src/Languages/Python/Patterns/PyTripleSingleQuoteStringPattern.php create mode 100644 src/Languages/Python/PythonLanguage.php diff --git a/src/Highlighter.php b/src/Highlighter.php index 1c76e06..1e24796 100644 --- a/src/Highlighter.php +++ b/src/Highlighter.php @@ -17,6 +17,7 @@ use Tempest\Highlight\Languages\JavaScript\JavaScriptLanguage; use Tempest\Highlight\Languages\Json\JsonLanguage; use Tempest\Highlight\Languages\Php\PhpLanguage; +use Tempest\Highlight\Languages\Python\PythonLanguage; use Tempest\Highlight\Languages\Sql\SqlLanguage; use Tempest\Highlight\Languages\Text\TextLanguage; use Tempest\Highlight\Languages\Twig\TwigLanguage; @@ -49,6 +50,7 @@ public function __construct( ->addLanguage(new JavaScriptLanguage()) ->addLanguage(new JsonLanguage()) ->addLanguage(new PhpLanguage()) + ->addLanguage(new PythonLanguage()) ->addLanguage(new SqlLanguage()) ->addLanguage(new XmlLanguage()) ->addLanguage(new YamlLanguage()) diff --git a/src/Languages/Python/Patterns/PyArgumentPattern.php b/src/Languages/Python/Patterns/PyArgumentPattern.php new file mode 100644 index 0000000..fd5e1e4 --- /dev/null +++ b/src/Languages/Python/Patterns/PyArgumentPattern.php @@ -0,0 +1,24 @@ +\w+)(s*=)'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::VARIABLE; + } +} diff --git a/src/Languages/Python/Patterns/PyBooleanPattern.php b/src/Languages/Python/Patterns/PyBooleanPattern.php new file mode 100644 index 0000000..dd7f83d --- /dev/null +++ b/src/Languages/Python/Patterns/PyBooleanPattern.php @@ -0,0 +1,24 @@ +(?:False|None|True))\b'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::TYPE; + } +} diff --git a/src/Languages/Python/Patterns/PyBuiltinPattern.php b/src/Languages/Python/Patterns/PyBuiltinPattern.php new file mode 100644 index 0000000..60275bb --- /dev/null +++ b/src/Languages/Python/Patterns/PyBuiltinPattern.php @@ -0,0 +1,38 @@ +builtinFunctions); + + return "\b(?(?:{$builtinFunctions}))\b"; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::TYPE; + } +} diff --git a/src/Languages/Python/Patterns/PyClassNamePattern.php b/src/Languages/Python/Patterns/PyClassNamePattern.php new file mode 100644 index 0000000..b667051 --- /dev/null +++ b/src/Languages/Python/Patterns/PyClassNamePattern.php @@ -0,0 +1,27 @@ +\w*)(?=[\s*\:(])'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::PROPERTY; + } +} diff --git a/src/Languages/Python/Patterns/PyCommentPattern.php b/src/Languages/Python/Patterns/PyCommentPattern.php new file mode 100644 index 0000000..bb96a8d --- /dev/null +++ b/src/Languages/Python/Patterns/PyCommentPattern.php @@ -0,0 +1,24 @@ +#.*)'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::COMMENT; + } +} diff --git a/src/Languages/Python/Patterns/PyDecoratorPattern.php b/src/Languages/Python/Patterns/PyDecoratorPattern.php new file mode 100644 index 0000000..7989504 --- /dev/null +++ b/src/Languages/Python/Patterns/PyDecoratorPattern.php @@ -0,0 +1,27 @@ +@\s*\w*(?:\.\w+)*)'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::PROPERTY; + } +} diff --git a/src/Languages/Python/Patterns/PyFunctionPattern.php b/src/Languages/Python/Patterns/PyFunctionPattern.php new file mode 100644 index 0000000..cc92274 --- /dev/null +++ b/src/Languages/Python/Patterns/PyFunctionPattern.php @@ -0,0 +1,26 @@ +\w*)(?=\s*\()'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::PROPERTY; + } +} diff --git a/src/Languages/Python/Patterns/PyKeywordPattern.php b/src/Languages/Python/Patterns/PyKeywordPattern.php new file mode 100644 index 0000000..c5a1554 --- /dev/null +++ b/src/Languages/Python/Patterns/PyKeywordPattern.php @@ -0,0 +1,33 @@ +keywords); + + return "\b(?(?:_(?=\s*:){$keywords}))\b"; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::KEYWORD; + } +} diff --git a/src/Languages/Python/Patterns/PyOperatorPattern.php b/src/Languages/Python/Patterns/PyOperatorPattern.php new file mode 100644 index 0000000..9920a9d --- /dev/null +++ b/src/Languages/Python/Patterns/PyOperatorPattern.php @@ -0,0 +1,24 @@ +([-+&%=]=?|!=|:=|>>=|<<=|\|=|\^=|\*\*?=?|\/\/?=?|<[<=]?|>[=>]?|[\|^~]))"; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::OPERATOR; + } +} diff --git a/src/Languages/Python/Patterns/PyTripleDoubleQuoteStringPattern.php b/src/Languages/Python/Patterns/PyTripleDoubleQuoteStringPattern.php new file mode 100644 index 0000000..7f249b7 --- /dev/null +++ b/src/Languages/Python/Patterns/PyTripleDoubleQuoteStringPattern.php @@ -0,0 +1,24 @@ +"""(.|\n)*?""")/m'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::VALUE; + } +} diff --git a/src/Languages/Python/Patterns/PyTripleSingleQuoteStringPattern.php b/src/Languages/Python/Patterns/PyTripleSingleQuoteStringPattern.php new file mode 100644 index 0000000..7f12024 --- /dev/null +++ b/src/Languages/Python/Patterns/PyTripleSingleQuoteStringPattern.php @@ -0,0 +1,24 @@ +\'\'\'(.|\n)*?\'\'\')/m'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::VALUE; + } +} diff --git a/src/Languages/Python/PythonLanguage.php b/src/Languages/Python/PythonLanguage.php new file mode 100644 index 0000000..6d13f1b --- /dev/null +++ b/src/Languages/Python/PythonLanguage.php @@ -0,0 +1,74 @@ + Date: Thu, 16 May 2024 00:09:29 +0300 Subject: [PATCH 2/5] Fix styling --- .../Python/Patterns/PyBuiltinPattern.php | 20 ++++++++++--------- .../Python/Patterns/PyKeywordPattern.php | 4 +++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Languages/Python/Patterns/PyBuiltinPattern.php b/src/Languages/Python/Patterns/PyBuiltinPattern.php index 60275bb..061aa2c 100644 --- a/src/Languages/Python/Patterns/PyBuiltinPattern.php +++ b/src/Languages/Python/Patterns/PyBuiltinPattern.php @@ -13,16 +13,18 @@ use IsPattern; public function __construct(private array $builtinFunctions = [ - '__import__', 'abs', 'aiter', 'all', 'any', 'anext', 'ascii', 'bin', 'bool', - 'breakpoint', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'compile', - 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'exec', - 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', - 'hash', 'help', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', - 'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', - 'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed', - 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', + '__import__', 'abs', 'aiter', 'all', 'any', 'anext', 'ascii', 'bin', 'bool', + 'breakpoint', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'compile', + 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'exec', + 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', + 'hash', 'help', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', + 'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', + 'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed', + 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'vars', 'zip', - ]) {} + ]) + { + } public function getPattern(): string { diff --git a/src/Languages/Python/Patterns/PyKeywordPattern.php b/src/Languages/Python/Patterns/PyKeywordPattern.php index c5a1554..eb13e4c 100644 --- a/src/Languages/Python/Patterns/PyKeywordPattern.php +++ b/src/Languages/Python/Patterns/PyKeywordPattern.php @@ -17,7 +17,9 @@ public function __construct(private array $keywords = [ 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield', - ]) {} + ]) + { + } public function getPattern(): string { From 6de46b156c078302978ad4f35d2761843345b5fd Mon Sep 17 00:00:00 2001 From: Bogdan Condorachi Date: Fri, 17 May 2024 01:04:19 +0300 Subject: [PATCH 3/5] Improve python argument match --- src/Languages/Python/Patterns/PyArgumentPattern.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Languages/Python/Patterns/PyArgumentPattern.php b/src/Languages/Python/Patterns/PyArgumentPattern.php index fd5e1e4..05f74ac 100644 --- a/src/Languages/Python/Patterns/PyArgumentPattern.php +++ b/src/Languages/Python/Patterns/PyArgumentPattern.php @@ -14,7 +14,7 @@ public function getPattern(): string { - return '(?\w+)(s*=)'; + return '(?<=,|\()\s*(?\w+)s*='; } public function getTokenType(): TokenTypeEnum From 116da0e9a99568f55c23d4ba9a2b5b8214e2d0cd Mon Sep 17 00:00:00 2001 From: Bogdan Condorachi Date: Fri, 17 May 2024 01:05:27 +0300 Subject: [PATCH 4/5] Add python unit test --- tests/Languages/Python/PythonLanguageTest.php | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/Languages/Python/PythonLanguageTest.php diff --git a/tests/Languages/Python/PythonLanguageTest.php b/tests/Languages/Python/PythonLanguageTest.php new file mode 100644 index 0000000..61ceda5 --- /dev/null +++ b/tests/Languages/Python/PythonLanguageTest.php @@ -0,0 +1,58 @@ +assertSame( + $expected, + $highlighter->parse($content, 'python'), + ); + + $this->assertSame( + $expected, + $highlighter->parse($content, 'py'), + ); + } + + public static function data(): array + { + return [ + [<<def fib(n): # write Fibonacci series up to n + """Print a Fibonacci series up to n.""" + a, b = 0, 1 + while a < n: + print(a, end=' ') + a, b = b, a+b + print() + +# Now call the function we just defined: +fib(2000) +TXT], + ]; + } +} From dbde1c2ed6e9b63e43fe2837684ba59241d13fce Mon Sep 17 00:00:00 2001 From: Bogdan Condorachi Date: Thu, 23 May 2024 14:07:35 +0300 Subject: [PATCH 5/5] Add python #123 support --- .../Python/Patterns/PyBooleanPattern.php | 2 +- .../Python/Patterns/PyNumberPattern.php | 24 +++++++++++++++++++ src/Languages/Python/PythonLanguage.php | 8 ++++++- tests/Languages/Python/PythonLanguageTest.php | 4 ++-- 4 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 src/Languages/Python/Patterns/PyNumberPattern.php diff --git a/src/Languages/Python/Patterns/PyBooleanPattern.php b/src/Languages/Python/Patterns/PyBooleanPattern.php index dd7f83d..e42d3af 100644 --- a/src/Languages/Python/Patterns/PyBooleanPattern.php +++ b/src/Languages/Python/Patterns/PyBooleanPattern.php @@ -19,6 +19,6 @@ public function getPattern(): string public function getTokenType(): TokenTypeEnum { - return TokenTypeEnum::TYPE; + return TokenTypeEnum::BOOLEAN; } } diff --git a/src/Languages/Python/Patterns/PyNumberPattern.php b/src/Languages/Python/Patterns/PyNumberPattern.php new file mode 100644 index 0000000..b8e14aa --- /dev/null +++ b/src/Languages/Python/Patterns/PyNumberPattern.php @@ -0,0 +1,24 @@ +\b0(?:[bB](?:_?[01])+|[oO](?:_?[0-7])+|[xX](?:_?[a-fA-F0-9])+)\b|(?:\b\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\B\.\d+(?:_\d+)*)(?:[eE][+-]?\d+(?:_\d+)*)?j?(?!\w))'; + } + + public function getTokenType(): TokenTypeEnum + { + return TokenTypeEnum::NUMBER; + } +} diff --git a/src/Languages/Python/PythonLanguage.php b/src/Languages/Python/PythonLanguage.php index 6d13f1b..9b0ced0 100644 --- a/src/Languages/Python/PythonLanguage.php +++ b/src/Languages/Python/PythonLanguage.php @@ -15,6 +15,7 @@ use Tempest\Highlight\Languages\Python\Patterns\PyDecoratorPattern; use Tempest\Highlight\Languages\Python\Patterns\PyFunctionPattern; use Tempest\Highlight\Languages\Python\Patterns\PyKeywordPattern; +use Tempest\Highlight\Languages\Python\Patterns\PyNumberPattern; use Tempest\Highlight\Languages\Python\Patterns\PyOperatorPattern; use Tempest\Highlight\Languages\Python\Patterns\PyTripleDoubleQuoteStringPattern; use Tempest\Highlight\Languages\Python\Patterns\PyTripleSingleQuoteStringPattern; @@ -52,12 +53,17 @@ public function getPatterns(): array new PyFunctionPattern(), // TYPES - new PyBooleanPattern(), new PyBuiltinPattern(), // COMMENTS new PyCommentPattern(), + // NUMBERS + new PyNumberPattern(), + + // BOOLEANS + new PyBooleanPattern(), + // OPERATORS new PyOperatorPattern(), diff --git a/tests/Languages/Python/PythonLanguageTest.php b/tests/Languages/Python/PythonLanguageTest.php index 61ceda5..72e861a 100644 --- a/tests/Languages/Python/PythonLanguageTest.php +++ b/tests/Languages/Python/PythonLanguageTest.php @@ -44,14 +44,14 @@ public static function data(): array <<def fib(n): # write Fibonacci series up to n """Print a Fibonacci series up to n.""" - a, b = 0, 1 + a, b = 0, 1 while a < n: print(a, end=' ') a, b = b, a+b print() # Now call the function we just defined: -fib(2000) +fib(2000) TXT], ]; }