Skip to content

Commit

Permalink
Merge pull request #121 from bogdancondorachi/python-language
Browse files Browse the repository at this point in the history
Add python support
  • Loading branch information
brendt authored May 23, 2024
2 parents 4c05008 + dbde1c2 commit a38bed6
Show file tree
Hide file tree
Showing 15 changed files with 463 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/Highlighter.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
use Tempest\Highlight\Languages\JavaScript\JavaScriptLanguage;
use Tempest\Highlight\Languages\Json\JsonLanguage;
use Tempest\Highlight\Languages\Php\PhpLanguage;
use Tempest\Highlight\Languages\Python\PythonLanguage;
use Tempest\Highlight\Languages\Sql\SqlLanguage;
use Tempest\Highlight\Languages\Text\TextLanguage;
use Tempest\Highlight\Languages\Twig\TwigLanguage;
Expand Down Expand Up @@ -49,6 +50,7 @@ public function __construct(
->addLanguage(new JavaScriptLanguage())
->addLanguage(new JsonLanguage())
->addLanguage(new PhpLanguage())
->addLanguage(new PythonLanguage())
->addLanguage(new SqlLanguage())
->addLanguage(new XmlLanguage())
->addLanguage(new YamlLanguage())
Expand Down
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyArgumentPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyArgumentPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '(?<=,|\()\s*(?<match>\w+)s*=';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::VARIABLE;
}
}
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyBooleanPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyBooleanPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '\b(?<match>(?:False|None|True))\b';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::BOOLEAN;
}
}
40 changes: 40 additions & 0 deletions src/Languages/Python/Patterns/PyBuiltinPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyBuiltinPattern implements Pattern
{
use IsPattern;

public function __construct(private array $builtinFunctions = [
'__import__', 'abs', 'aiter', 'all', 'any', 'anext', 'ascii', 'bin', 'bool',
'breakpoint', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'compile',
'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'exec',
'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr',
'hash', 'help', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter',
'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object',
'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed',
'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum',
'super', 'tuple', 'type', 'vars', 'zip',
])
{
}

public function getPattern(): string
{
$builtinFunctions = implode('|', $this->builtinFunctions);

return "\b(?<match>(?:{$builtinFunctions}))\b";
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::TYPE;
}
}
27 changes: 27 additions & 0 deletions src/Languages/Python/Patterns/PyClassNamePattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\PatternTest;
use Tempest\Highlight\Tokens\TokenTypeEnum;

#[PatternTest(input: 'class MyClass:', output: 'MyClass')]
#[PatternTest(input: 'class HisClass(MyClass):', output: 'HisClass')]
final readonly class PyClassNamePattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '\bclass\s+(?<match>\w*)(?=[\s*\:(])';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::PROPERTY;
}
}
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyCommentPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyCommentPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '(^|[^\\\\])(?<match>#.*)';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::COMMENT;
}
}
27 changes: 27 additions & 0 deletions src/Languages/Python/Patterns/PyDecoratorPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\PatternTest;
use Tempest\Highlight\Tokens\TokenTypeEnum;

#[PatternTest(input: '@decorator', output: '@decorator')]
#[PatternTest(input: '@decorator.chained', output: '@decorator.chained')]
final readonly class PyDecoratorPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '(^|\n)\s*(?<match>@\s*\w*(?:\.\w+)*)';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::PROPERTY;
}
}
26 changes: 26 additions & 0 deletions src/Languages/Python/Patterns/PyFunctionPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\PatternTest;
use Tempest\Highlight\Tokens\TokenTypeEnum;

#[PatternTest(input: 'def fibonacci(n)', output: 'fibonacci')]
final readonly class PyFunctionPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '\bdef\s+(?<match>\w*)(?=\s*\()';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::PROPERTY;
}
}
35 changes: 35 additions & 0 deletions src/Languages/Python/Patterns/PyKeywordPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyKeywordPattern implements Pattern
{
use IsPattern;

public function __construct(private array $keywords = [
'and', 'as', 'assert', 'async', 'await', 'break', 'class', 'continue',
'def', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global',
'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass',
'raise', 'return', 'try', 'while', 'with', 'yield',
])
{
}

public function getPattern(): string
{
$keywords = implode('|', $this->keywords);

return "\b(?<match>(?:_(?=\s*:){$keywords}))\b";
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::KEYWORD;
}
}
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyNumberPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyNumberPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '(?<match>\b0(?:[bB](?:_?[01])+|[oO](?:_?[0-7])+|[xX](?:_?[a-fA-F0-9])+)\b|(?:\b\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\B\.\d+(?:_\d+)*)(?:[eE][+-]?\d+(?:_\d+)*)?j?(?!\w))';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::NUMBER;
}
}
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyOperatorPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyOperatorPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return "(?<match>([-+&%=]=?|!=|:=|>>=|<<=|\|=|\^=|\*\*?=?|\/\/?=?|<[<=]?|>[=>]?|[\|^~]))";
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::OPERATOR;
}
}
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyTripleDoubleQuoteStringPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyTripleDoubleQuoteStringPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '/(?<match>"""(.|\n)*?""")/m';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::VALUE;
}
}
24 changes: 24 additions & 0 deletions src/Languages/Python/Patterns/PyTripleSingleQuoteStringPattern.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Tempest\Highlight\Languages\Python\Patterns;

use Tempest\Highlight\IsPattern;
use Tempest\Highlight\Pattern;
use Tempest\Highlight\Tokens\TokenTypeEnum;

final readonly class PyTripleSingleQuoteStringPattern implements Pattern
{
use IsPattern;

public function getPattern(): string
{
return '/(?<match>\'\'\'(.|\n)*?\'\'\')/m';
}

public function getTokenType(): TokenTypeEnum
{
return TokenTypeEnum::VALUE;
}
}
Loading

0 comments on commit a38bed6

Please sign in to comment.