diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..b22a26b --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,97 @@ +name: Publish Python Package to PyPI + +on: + push: + branches: + - main + paths: + - "flowquery-py/**" + pull_request: + branches: + - main + paths: + - "flowquery-py/**" + release: + types: [published] + workflow_dispatch: + inputs: + target: + description: "Publish target" + required: true + default: "testpypi" + type: choice + options: + - testpypi + - pypi + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + defaults: + run: + working-directory: flowquery-py + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install build dependencies + run: pip install build + + - name: Build package + run: python -m build + + - name: Upload distribution artifacts + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: flowquery-py/dist/ + + publish-testpypi: + name: Publish to TestPyPI + if: github.event_name == 'workflow_dispatch' && github.event.inputs.target == 'testpypi' + needs: build + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/flowquery + permissions: + id-token: write + + steps: + - name: Download distribution artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + publish-pypi: + name: Publish to PyPI + if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.target == 'pypi') + needs: build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/flowquery + permissions: + id-token: write + + steps: + - name: Download distribution artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/flowquery-py/.gitignore b/flowquery-py/.gitignore new file mode 100644 index 0000000..479bdd5 --- /dev/null +++ b/flowquery-py/.gitignore @@ -0,0 +1,84 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ +.project +.pydevproject +.settings/ + +# Jupyter Notebook +.ipynb_checkpoints + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# OS +.DS_Store +Thumbs.db diff --git a/flowquery-py/README.md b/flowquery-py/README.md new file mode 100644 index 0000000..4f7acba --- /dev/null +++ b/flowquery-py/README.md @@ -0,0 +1,166 @@ +# FlowQuery Python Implementation + +This is the Python implementation of FlowQuery, a declarative query language for data processing pipelines. + +## Installation + +### From Source + +```bash +git clone https://github.com/microsoft/FlowQuery.git +cd FlowQuery/flowquery-py +pip install -e . +``` + +### With Development Dependencies + +```bash +pip install -e ".[dev]" +``` + +## Quick Start + +### Command Line Interface + +After installation, you can start the interactive REPL: + +```bash +flowquery +``` + +### Using Conda (Alternative) + +**Windows (PowerShell):** + +```powershell +cd flowquery-py +.\setup_env.ps1 +conda activate flowquery +``` + +**Linux/macOS:** + +```bash +cd flowquery-py +chmod +x setup_env.sh +./setup_env.sh +conda activate flowquery +``` + +The setup scripts automatically: + +1. Read the Python version from `pyproject.toml` +2. Create a conda environment named `flowquery` +3. Install the package with all dev dependencies + +## Requirements + +- Python 3.10+ (defined in `pyproject.toml`) +- pytest (for running tests) +- pytest-asyncio (for async test support) +- aiohttp (for HTTP requests) + +All dependencies are managed in `pyproject.toml`. + +## Programmatic Usage + +```python +import asyncio +from flowquery import Runner + +runner = Runner("WITH 1 as x RETURN x + 1 as result") +asyncio.run(runner.run()) +print(runner.results) # [{'result': 2}] +``` + +## Running Tests + +```bash +pytest tests/ +``` + +## Project Structure + +``` +flowquery-py/ +├── pyproject.toml # Dependencies & project config (single source of truth) +├── setup_env.ps1 # Windows conda setup script +├── setup_env.sh # Linux/macOS conda setup script +├── README.md +├── src/ +│ ├── __init__.py # Main package entry point +│ ├── extensibility.py # Public API for custom functions +│ ├── compute/ +│ │ └── runner.py # Query execution engine +│ ├── graph/ +│ │ ├── node.py # Graph node representation +│ │ ├── relationship.py # Graph relationship representation +│ │ ├── pattern.py # Pattern matching +│ │ └── database.py # In-memory graph database +│ ├── io/ +│ │ └── command_line.py # Interactive REPL +│ ├── parsing/ +│ │ ├── parser.py # Main parser +│ │ ├── ast_node.py # AST node base class +│ │ ├── expressions/ # Expression types (numbers, strings, operators) +│ │ ├── functions/ # Built-in and custom functions +│ │ ├── operations/ # Query operations (WITH, RETURN, UNWIND, etc.) +│ │ ├── components/ # LOAD clause components +│ │ ├── data_structures/ # Arrays, objects, lookups +│ │ └── logic/ # CASE/WHEN/THEN/ELSE +│ ├── tokenization/ +│ │ ├── tokenizer.py # Lexer +│ │ ├── token.py # Token class +│ │ └── ... # Token types and mappers +│ └── utils/ +│ ├── string_utils.py # String manipulation utilities +│ └── object_utils.py # Object utilities +└── tests/ + ├── test_extensibility.py + ├── compute/ + │ └── test_runner.py + ├── graph/ + │ ├── test_create.py + │ ├── test_data.py + │ └── test_match.py + ├── parsing/ + │ ├── test_parser.py + │ ├── test_context.py + │ └── test_expression.py + └── tokenization/ + ├── test_tokenizer.py + ├── test_token_mapper.py + └── test_trie.py +``` + +## Creating Custom Functions + +```python +from flowquery.extensibility import Function, FunctionDef + +@FunctionDef({ + "description": "Converts a string to uppercase", + "category": "string", + "parameters": [ + {"name": "text", "description": "String to convert", "type": "string"} + ], + "output": {"description": "Uppercase string", "type": "string"} +}) +class UpperCase(Function): + def __init__(self): + super().__init__("uppercase") + self._expected_parameter_count = 1 + + def value(self) -> str: + return str(self.get_children()[0].value()).upper() +``` + +## License + +MIT License - see [LICENSE](LICENSE) for details. + +## Links + +- [Homepage](https://github.com/microsoft/FlowQuery/flowquery-py) +- [Repository](https://github.com/microsoft/FlowQuery/flowquery-py) +- [Issues](https://github.com/microsoft/FlowQuery/issues) diff --git a/flowquery-py/pyproject.toml b/flowquery-py/pyproject.toml new file mode 100644 index 0000000..acbd499 --- /dev/null +++ b/flowquery-py/pyproject.toml @@ -0,0 +1,75 @@ +[project] +name = "flowquery" +version = "1.0.0" +description = "A declarative query language for data processing pipelines" +readme = "README.md" +requires-python = ">=3.10" +license = {text = "MIT"} +authors = [ + {name = "FlowQuery Contributors"} +] +keywords = ["query", "data-processing", "pipeline", "declarative"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Database :: Front-Ends", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "aiohttp>=3.8.0", +] + +[project.scripts] +flowquery = "flowquery.io.command_line:main" + +[project.urls] +Homepage = "https://github.com/microsoft/FlowQuery/flowquery-py" +Repository = "https://github.com/microsoft/FlowQuery/flowquery-py" +Documentation = "https://github.com/microsoft/FlowQuery/flowquery-py#readme" +Issues = "https://github.com/microsoft/FlowQuery/issues" + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", +] + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.package-dir] +"flowquery" = "src" +"flowquery.compute" = "src/compute" +"flowquery.graph" = "src/graph" +"flowquery.io" = "src/io" +"flowquery.parsing" = "src/parsing" +"flowquery.parsing.components" = "src/parsing/components" +"flowquery.parsing.data_structures" = "src/parsing/data_structures" +"flowquery.parsing.expressions" = "src/parsing/expressions" +"flowquery.parsing.functions" = "src/parsing/functions" +"flowquery.parsing.logic" = "src/parsing/logic" +"flowquery.parsing.operations" = "src/parsing/operations" +"flowquery.tokenization" = "src/tokenization" +"flowquery.utils" = "src/utils" + +[tool.setuptools] +packages = ["flowquery", "flowquery.compute", "flowquery.graph", "flowquery.io", "flowquery.parsing", "flowquery.parsing.components", "flowquery.parsing.data_structures", "flowquery.parsing.expressions", "flowquery.parsing.functions", "flowquery.parsing.logic", "flowquery.parsing.operations", "flowquery.tokenization", "flowquery.utils"] + +[tool.pytest.ini_options] +minversion = "7.0" +asyncio_mode = "auto" +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = "-v --tb=short" + +[tool.pytest-asyncio] +mode = "auto" \ No newline at end of file diff --git a/flowquery-py/setup_env.ps1 b/flowquery-py/setup_env.ps1 new file mode 100644 index 0000000..a03c054 --- /dev/null +++ b/flowquery-py/setup_env.ps1 @@ -0,0 +1,92 @@ +# FlowQuery Python Environment Setup Script for Windows (PowerShell) +# This script creates a conda environment for FlowQuery development + +Write-Host "========================================" +Write-Host "FlowQuery Python Environment Setup" +Write-Host "========================================" +Write-Host "" + +# Check if conda is available +$condaPath = Get-Command conda -ErrorAction SilentlyContinue +if (-not $condaPath) { + Write-Host "ERROR: Conda is not installed or not in PATH." -ForegroundColor Red + Write-Host "Please install Anaconda or Miniconda first." + exit 1 +} + +# Set environment name +$envName = "flowquery" + +# Get Python version from pyproject.toml +$pyprojectContent = Get-Content "pyproject.toml" -Raw +if ($pyprojectContent -match 'requires-python\s*=\s*">=([0-9.]+)"') { + $pythonVersion = $matches[1] +} else { + $pythonVersion = "3.10" # fallback +} +Write-Host "Using Python version: $pythonVersion" + +# Check if environment already exists +$envList = conda env list +if ($envList -match "^$envName\s") { + Write-Host "Environment '$envName' already exists." + $recreate = Read-Host "Do you want to recreate it? (y/n)" + if ($recreate -eq "y" -or $recreate -eq "Y") { + Write-Host "Removing existing environment..." + conda env remove -n $envName -y + } else { + Write-Host "Keeping existing environment..." + Write-Host "" + Write-Host "========================================" + Write-Host "Environment ready!" + Write-Host "========================================" + Write-Host "" + Write-Host "To activate the environment, run:" + Write-Host " conda activate $envName" -ForegroundColor Cyan + Write-Host "" + Write-Host "To run tests:" + Write-Host " pytest tests/" -ForegroundColor Cyan + Write-Host "" + Write-Host "To deactivate when done:" + Write-Host " conda deactivate" -ForegroundColor Cyan + exit 0 + } +} + +# Create the environment +Write-Host "" +Write-Host "Creating conda environment '$envName' with Python $pythonVersion..." +conda create -n $envName python=$pythonVersion pip -y + +if ($LASTEXITCODE -ne 0) { + Write-Host "" + Write-Host "ERROR: Failed to create conda environment." -ForegroundColor Red + exit 1 +} + +# Activate and install package in dev mode +Write-Host "" +Write-Host "Installing package in development mode..." +conda activate $envName +pip install -e ".[dev]" + +if ($LASTEXITCODE -ne 0) { + Write-Host "" + Write-Host "ERROR: Failed to install package." -ForegroundColor Red + exit 1 +} + +Write-Host "" +Write-Host "========================================" +Write-Host "Environment created successfully!" -ForegroundColor Green +Write-Host "========================================" +Write-Host "" +Write-Host "To activate the environment, run:" +Write-Host " conda activate $envName" -ForegroundColor Cyan +Write-Host "" +Write-Host "To run tests:" +Write-Host " pytest tests/" -ForegroundColor Cyan +Write-Host "" +Write-Host "To deactivate when done:" +Write-Host " conda deactivate" -ForegroundColor Cyan +Write-Host "" diff --git a/flowquery-py/setup_env.sh b/flowquery-py/setup_env.sh new file mode 100644 index 0000000..7b584f9 --- /dev/null +++ b/flowquery-py/setup_env.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# FlowQuery Python Environment Setup Script for Linux/macOS +# This script creates a conda environment for FlowQuery development + +echo "========================================" +echo "FlowQuery Python Environment Setup" +echo "========================================" +echo + +# Check if conda is available +if ! command -v conda &> /dev/null; then + echo "ERROR: Conda is not installed or not in PATH." + echo "Please install Anaconda or Miniconda first." + exit 1 +fi + +# Set environment name +ENV_NAME="flowquery" + +# Get Python version from pyproject.toml +PYTHON_VERSION=$(grep -oP 'requires-python\s*=\s*">=\K[0-9.]+' pyproject.toml 2>/dev/null || echo "3.10") +echo "Using Python version: $PYTHON_VERSION" + +# Check if environment already exists +if conda env list | grep -q "^${ENV_NAME} "; then + echo "Environment '${ENV_NAME}' already exists." + read -p "Do you want to recreate it? (y/n): " RECREATE + if [[ "$RECREATE" =~ ^[Yy]$ ]]; then + echo "Removing existing environment..." + conda env remove -n "$ENV_NAME" -y + else + echo "Keeping existing environment..." + echo + echo "========================================" + echo "Environment ready!" + echo "========================================" + echo + echo "To activate the environment, run:" + echo " conda activate $ENV_NAME" + echo + echo "To run tests:" + echo " pytest tests/" + echo + echo "To deactivate when done:" + echo " conda deactivate" + exit 0 + fi +fi + +# Create the environment +echo +echo "Creating conda environment '${ENV_NAME}' with Python ${PYTHON_VERSION}..." +conda create -n "$ENV_NAME" python="$PYTHON_VERSION" pip -y + +if [ $? -ne 0 ]; then + echo + echo "ERROR: Failed to create conda environment." + exit 1 +fi + +# Activate and install package in dev mode +echo +echo "Installing package in development mode..." +source "$(conda info --base)/etc/profile.d/conda.sh" +conda activate "$ENV_NAME" +pip install -e ".[dev]" + +if [ $? -ne 0 ]; then + echo + echo "ERROR: Failed to install package." + exit 1 +fi + +echo +echo "========================================" +echo "Environment created successfully!" +echo "========================================" +echo +echo "To activate the environment, run:" +echo " conda activate $ENV_NAME" +echo +echo "To run tests:" +echo " pytest tests/" +echo +echo "To deactivate when done:" +echo " conda deactivate" +echo diff --git a/flowquery-py/src/__init__.py b/flowquery-py/src/__init__.py new file mode 100644 index 0000000..b2c1fd5 --- /dev/null +++ b/flowquery-py/src/__init__.py @@ -0,0 +1,34 @@ +""" +FlowQuery - A declarative query language for data processing pipelines. + +This is the Python implementation of FlowQuery. +""" + +from .compute.runner import Runner +from .io.command_line import CommandLine +from .parsing.parser import Parser +from .parsing.functions.function import Function +from .parsing.functions.aggregate_function import AggregateFunction +from .parsing.functions.async_function import AsyncFunction +from .parsing.functions.predicate_function import PredicateFunction +from .parsing.functions.reducer_element import ReducerElement +from .parsing.functions.function_metadata import ( + FunctionDef, + FunctionMetadata, + FunctionCategory, +) + +__all__ = [ + "Runner", + "CommandLine", + "Parser", + "Function", + "AggregateFunction", + "AsyncFunction", + "PredicateFunction", + "ReducerElement", + "FunctionDef", + "FunctionMetadata", + "FunctionCategory", +] + diff --git a/flowquery-py/src/__main__.py b/flowquery-py/src/__main__.py new file mode 100644 index 0000000..1557b61 --- /dev/null +++ b/flowquery-py/src/__main__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +"""FlowQuery command-line entry point. + +Run with: python -m src +""" + +from . import CommandLine + +if __name__ == "__main__": + CommandLine().loop() diff --git a/flowquery-py/src/compute/__init__.py b/flowquery-py/src/compute/__init__.py new file mode 100644 index 0000000..18c9da9 --- /dev/null +++ b/flowquery-py/src/compute/__init__.py @@ -0,0 +1,5 @@ +"""Compute module for FlowQuery.""" + +from .runner import Runner + +__all__ = ["Runner"] diff --git a/flowquery-py/src/compute/runner.py b/flowquery-py/src/compute/runner.py new file mode 100644 index 0000000..5a03eba --- /dev/null +++ b/flowquery-py/src/compute/runner.py @@ -0,0 +1,60 @@ +"""Executes a FlowQuery statement and retrieves the results.""" + +from typing import Any, Dict, List, Optional + +from ..parsing.ast_node import ASTNode +from ..parsing.operations.operation import Operation +from ..parsing.parser import Parser + + +class Runner: + """Executes a FlowQuery statement and retrieves the results. + + The Runner class parses a FlowQuery statement into an AST and executes it, + managing the execution flow from the first operation to the final return statement. + + Example: + runner = Runner("WITH 1 as x RETURN x") + await runner.run() + print(runner.results) # [{ x: 1 }] + """ + + def __init__( + self, + statement: Optional[str] = None, + ast: Optional[ASTNode] = None + ): + """Creates a new Runner instance and parses the FlowQuery statement. + + Args: + statement: The FlowQuery statement to execute + ast: An already-parsed AST (optional) + + Raises: + ValueError: If neither statement nor AST is provided + """ + if (statement is None or statement == "") and ast is None: + raise ValueError("Either statement or AST must be provided") + + _ast = ast if ast is not None else Parser().parse(statement) + self._first: Operation = _ast.first_child() + self._last: Operation = _ast.last_child() + + async def run(self) -> None: + """Executes the parsed FlowQuery statement. + + Raises: + Exception: If an error occurs during execution + """ + await self._first.initialize() + await self._first.run() + await self._first.finish() + + @property + def results(self) -> List[Dict[str, Any]]: + """Gets the results from the executed statement. + + Returns: + The results from the last operation (typically a RETURN statement) + """ + return self._last.results diff --git a/flowquery-py/src/extensibility.py b/flowquery-py/src/extensibility.py new file mode 100644 index 0000000..aa8225c --- /dev/null +++ b/flowquery-py/src/extensibility.py @@ -0,0 +1,52 @@ +"""FlowQuery Extensibility API + +This module provides all the exports needed to create custom FlowQuery functions. + +Example: + from flowquery.extensibility import Function, FunctionDef + + @FunctionDef({ + 'description': "Converts a string to uppercase", + 'category': "string", + 'parameters': [{'name': "text", 'description': "String to convert", 'type': "string"}], + 'output': {'description': "Uppercase string", 'type': "string"} + }) + class UpperCase(Function): + def __init__(self): + super().__init__("uppercase") + self._expected_parameter_count = 1 + + def value(self) -> str: + return str(self.get_children()[0].value()).upper() +""" + +# Base function classes for creating custom functions +from .parsing.functions.function import Function +from .parsing.functions.aggregate_function import AggregateFunction +from .parsing.functions.async_function import AsyncFunction +from .parsing.functions.predicate_function import PredicateFunction +from .parsing.functions.reducer_element import ReducerElement + +# Decorator and metadata types for function registration +from .parsing.functions.function_metadata import ( + FunctionDef, + FunctionMetadata, + FunctionDefOptions, + ParameterSchema, + OutputSchema, + FunctionCategory, +) + +__all__ = [ + "Function", + "AggregateFunction", + "AsyncFunction", + "PredicateFunction", + "ReducerElement", + "FunctionDef", + "FunctionMetadata", + "FunctionDefOptions", + "ParameterSchema", + "OutputSchema", + "FunctionCategory", +] diff --git a/flowquery-py/src/graph/__init__.py b/flowquery-py/src/graph/__init__.py new file mode 100644 index 0000000..cb3a637 --- /dev/null +++ b/flowquery-py/src/graph/__init__.py @@ -0,0 +1,31 @@ +"""Graph module for FlowQuery.""" + +from .node import Node +from .relationship import Relationship +from .pattern import Pattern +from .patterns import Patterns +from .pattern_expression import PatternExpression +from .database import Database +from .hops import Hops +from .node_data import NodeData +from .node_reference import NodeReference +from .relationship_data import RelationshipData +from .relationship_reference import RelationshipReference +from .physical_node import PhysicalNode +from .physical_relationship import PhysicalRelationship + +__all__ = [ + "Node", + "Relationship", + "Pattern", + "Patterns", + "PatternExpression", + "Database", + "Hops", + "NodeData", + "NodeReference", + "RelationshipData", + "RelationshipReference", + "PhysicalNode", + "PhysicalRelationship", +] diff --git a/flowquery-py/src/graph/data.py b/flowquery-py/src/graph/data.py new file mode 100644 index 0000000..9415557 --- /dev/null +++ b/flowquery-py/src/graph/data.py @@ -0,0 +1,118 @@ +"""Data class for graph record iteration and indexing.""" + +from typing import Any, Dict, List, Optional + + +class IndexEntry: + """Index entry for tracking positions of records with a specific key value.""" + + def __init__(self, positions: Optional[List[int]] = None): + self._positions: List[int] = positions if positions is not None else [] + self._index: int = -1 + + def add(self, position: int) -> None: + """Add a position to the index entry.""" + self._positions.append(position) + + @property + def position(self) -> int: + """Get the current position.""" + return self._positions[self._index] + + def reset(self) -> None: + """Reset the index to the beginning.""" + self._index = -1 + + def next(self) -> bool: + """Move to the next position. Returns True if successful.""" + if self._index < len(self._positions) - 1: + self._index += 1 + return True + return False + + def clone(self) -> "IndexEntry": + """Create a copy of this index entry.""" + return IndexEntry(list(self._positions)) + + +class Layer: + """Layer for managing index state at a specific level.""" + + def __init__(self, index: Dict[str, IndexEntry]): + self._index: Dict[str, IndexEntry] = index + self._current: int = -1 + + @property + def index(self) -> Dict[str, IndexEntry]: + """Get the index dictionary.""" + return self._index + + @property + def current(self) -> int: + """Get the current position.""" + return self._current + + @current.setter + def current(self, value: int) -> None: + """Set the current position.""" + self._current = value + + +class Data: + """Base class for graph data with record iteration and indexing.""" + + def __init__(self, records: Optional[List[Dict[str, Any]]] = None): + self._records: List[Dict[str, Any]] = records if records is not None else [] + self._layers: Dict[int, Layer] = {0: Layer({})} + + def _build_index(self, key: str, level: int = 0) -> None: + """Build an index for the given key at the specified level.""" + self.layer(level).index.clear() + for idx, record in enumerate(self._records): + if key in record: + if record[key] not in self.layer(level).index: + self.layer(level).index[record[key]] = IndexEntry() + self.layer(level).index[record[key]].add(idx) + + def layer(self, level: int = 0) -> Layer: + """Get or create a layer at the specified level.""" + if level not in self._layers: + first = self._layers[0] + cloned = {} + for key, entry in first.index.items(): + cloned[key] = entry.clone() + self._layers[level] = Layer(cloned) + return self._layers[level] + + def _find(self, key: str, level: int = 0) -> bool: + """Find the next record with the given key value.""" + if key not in self.layer(level).index: + self.layer(level).current = len(self._records) # Move to end + return False + else: + entry = self.layer(level).index[key] + more = entry.next() + if not more: + self.layer(level).current = len(self._records) # Move to end + return False + self.layer(level).current = entry.position + return True + + def reset(self) -> None: + """Reset iteration to the beginning.""" + self.layer(0).current = -1 + for entry in self.layer(0).index.values(): + entry.reset() + + def next(self, level: int = 0) -> bool: + """Move to the next record. Returns True if successful.""" + if self.layer(level).current < len(self._records) - 1: + self.layer(level).current += 1 + return True + return False + + def current(self, level: int = 0) -> Optional[Dict[str, Any]]: + """Get the current record.""" + if self.layer(level).current < len(self._records): + return self._records[self.layer(level).current] + return None diff --git a/flowquery-py/src/graph/database.py b/flowquery-py/src/graph/database.py new file mode 100644 index 0000000..d0f35c2 --- /dev/null +++ b/flowquery-py/src/graph/database.py @@ -0,0 +1,82 @@ +"""Graph database for FlowQuery.""" + +from typing import Any, Dict, Optional, Union, TYPE_CHECKING + +from ..parsing.ast_node import ASTNode + +if TYPE_CHECKING: + from .node import Node + from .relationship import Relationship + from .node_data import NodeData + from .relationship_data import RelationshipData + + +class Database: + """Singleton database for storing graph data.""" + + _instance: Optional['Database'] = None + _nodes: Dict[str, 'PhysicalNode'] = {} + _relationships: Dict[str, 'PhysicalRelationship'] = {} + + def __init__(self): + pass + + @classmethod + def get_instance(cls) -> 'Database': + if cls._instance is None: + cls._instance = Database() + return cls._instance + + def add_node(self, node: 'Node', statement: ASTNode) -> None: + """Adds a node to the database.""" + from .physical_node import PhysicalNode + if node.label is None: + raise ValueError("Node label is null") + physical = PhysicalNode(None, node.label) + physical.statement = statement + Database._nodes[node.label] = physical + + def get_node(self, node: 'Node') -> Optional['PhysicalNode']: + """Gets a node from the database.""" + return Database._nodes.get(node.label) if node.label else None + + def add_relationship(self, relationship: 'Relationship', statement: ASTNode) -> None: + """Adds a relationship to the database.""" + from .physical_relationship import PhysicalRelationship + if relationship.type is None: + raise ValueError("Relationship type is null") + physical = PhysicalRelationship() + physical.type = relationship.type + physical.statement = statement + Database._relationships[relationship.type] = physical + + def get_relationship(self, relationship: 'Relationship') -> Optional['PhysicalRelationship']: + """Gets a relationship from the database.""" + return Database._relationships.get(relationship.type) if relationship.type else None + + async def get_data(self, element: Union['Node', 'Relationship']) -> Union['NodeData', 'RelationshipData']: + """Gets data for a node or relationship.""" + from .node import Node + from .relationship import Relationship + from .node_data import NodeData + from .relationship_data import RelationshipData + + if isinstance(element, Node): + node = self.get_node(element) + if node is None: + raise ValueError(f"Physical node not found for label {element.label}") + data = await node.data() + return NodeData(data) + elif isinstance(element, Relationship): + relationship = self.get_relationship(element) + if relationship is None: + raise ValueError(f"Physical relationship not found for type {element.type}") + data = await relationship.data() + return RelationshipData(data) + else: + raise ValueError("Element is neither Node nor Relationship") + + +# Import for type hints +from .physical_node import PhysicalNode +from .physical_relationship import PhysicalRelationship diff --git a/flowquery-py/src/graph/hops.py b/flowquery-py/src/graph/hops.py new file mode 100644 index 0000000..33dee3a --- /dev/null +++ b/flowquery-py/src/graph/hops.py @@ -0,0 +1,43 @@ +"""Hops specification for variable-length relationships.""" + +import sys +from typing import Optional + + +class Hops: + """Specifies the number of hops for a relationship pattern.""" + + def __init__(self, min_hops: Optional[int] = None, max_hops: Optional[int] = None): + # Default min=0, max=1 (matching TypeScript implementation) + if min_hops is None: + self._min: int = 0 + else: + self._min = min_hops + if max_hops is None: + self._max: int = 1 + else: + self._max = max_hops + + @property + def min(self) -> int: + return self._min + + @min.setter + def min(self, value: int) -> None: + self._min = value + + @property + def max(self) -> int: + return self._max + + @max.setter + def max(self, value: int) -> None: + self._max = value + + def multi(self) -> bool: + """Returns True if this represents a variable-length relationship.""" + return self._max > 1 or self._max == -1 or self._max == sys.maxsize + + def unbounded(self) -> bool: + """Returns True if the max is unbounded.""" + return self._max == sys.maxsize diff --git a/flowquery-py/src/graph/node.py b/flowquery-py/src/graph/node.py new file mode 100644 index 0000000..e7fe34c --- /dev/null +++ b/flowquery-py/src/graph/node.py @@ -0,0 +1,112 @@ +"""Graph node representation for FlowQuery.""" + +from typing import Any, Callable, Dict, Optional, TYPE_CHECKING + +from ..parsing.ast_node import ASTNode +from ..parsing.expressions.expression import Expression + +if TYPE_CHECKING: + from .relationship import Relationship + from .node_data import NodeData, NodeRecord + + +class Node(ASTNode): + """Represents a node in a graph pattern.""" + + def __init__( + self, + identifier: Optional[str] = None, + label: Optional[str] = None + ): + super().__init__() + self._identifier = identifier + self._label = label + self._properties: Dict[str, Expression] = {} + self._value: Optional['NodeRecord'] = None + self._incoming: Optional['Relationship'] = None + self._outgoing: Optional['Relationship'] = None + self._data: Optional['NodeData'] = None + self._todo_next: Optional[Callable[[], None]] = None + + @property + def identifier(self) -> Optional[str]: + return self._identifier + + @identifier.setter + def identifier(self, value: str) -> None: + self._identifier = value + + @property + def label(self) -> Optional[str]: + return self._label + + @label.setter + def label(self, value: str) -> None: + self._label = value + + @property + def properties(self) -> Dict[str, Expression]: + return self._properties + + def set_property(self, key: str, value: Expression) -> None: + self._properties[key] = value + + def get_property(self, key: str) -> Optional[Expression]: + return self._properties.get(key) + + def set_value(self, value: 'NodeRecord') -> None: + self._value = value + + def value(self) -> Optional['NodeRecord']: + return self._value + + @property + def outgoing(self) -> Optional['Relationship']: + return self._outgoing + + @outgoing.setter + def outgoing(self, relationship: Optional['Relationship']) -> None: + self._outgoing = relationship + + @property + def incoming(self) -> Optional['Relationship']: + return self._incoming + + @incoming.setter + def incoming(self, relationship: Optional['Relationship']) -> None: + self._incoming = relationship + + def set_data(self, data: Optional['NodeData']) -> None: + self._data = data + + async def next(self) -> None: + if self._data: + self._data.reset() + while self._data.next(): + self.set_value(self._data.current()) + if self._outgoing: + await self._outgoing.find(self._value['id']) + await self.run_todo_next() + + async def find(self, id_: str, hop: int = 0) -> None: + if self._data: + self._data.reset() + while self._data.find(id_, hop): + self.set_value(self._data.current(hop)) + if self._incoming: + self._incoming.set_end_node(self) + if self._outgoing: + await self._outgoing.find(self._value['id'], hop) + await self.run_todo_next() + + @property + def todo_next(self) -> Optional[Callable[[], None]]: + return self._todo_next + + @todo_next.setter + def todo_next(self, func: Optional[Callable[[], None]]) -> None: + self._todo_next = func + + async def run_todo_next(self) -> None: + if self._todo_next: + await self._todo_next() diff --git a/flowquery-py/src/graph/node_data.py b/flowquery-py/src/graph/node_data.py new file mode 100644 index 0000000..8575ee2 --- /dev/null +++ b/flowquery-py/src/graph/node_data.py @@ -0,0 +1,26 @@ +"""Node data class for FlowQuery.""" + +from typing import Any, Dict, List, Optional, TypedDict + +from .data import Data + + +class NodeRecord(TypedDict, total=False): + """Represents a node record from the database.""" + id: str + + +class NodeData(Data): + """Node data class extending Data with ID-based indexing.""" + + def __init__(self, records: Optional[List[Dict[str, Any]]] = None): + super().__init__(records) + self._build_index("id") + + def find(self, id_: str, hop: int = 0) -> bool: + """Find a record by ID.""" + return self._find(id_, hop) + + def current(self, hop: int = 0) -> Optional[Dict[str, Any]]: + """Get the current record.""" + return super().current(hop) diff --git a/flowquery-py/src/graph/node_reference.py b/flowquery-py/src/graph/node_reference.py new file mode 100644 index 0000000..cce4694 --- /dev/null +++ b/flowquery-py/src/graph/node_reference.py @@ -0,0 +1,49 @@ +"""Node reference for FlowQuery.""" + +from typing import Optional, TYPE_CHECKING + +from .node import Node + +if TYPE_CHECKING: + from ..parsing.ast_node import ASTNode + + +class NodeReference(Node): + """Represents a reference to an existing node variable.""" + + def __init__(self, base: Node, reference: Node): + super().__init__(base.identifier, base.label) + self._reference: Node = reference + # Copy properties from base + self._properties = base._properties + self._outgoing = base.outgoing + self._incoming = base.incoming + + @property + def reference(self) -> Node: + return self._reference + + # Keep referred as alias for backward compatibility + @property + def referred(self) -> Node: + return self._reference + + def value(self): + return self._reference.value() if self._reference else None + + async def next(self) -> None: + """Process next using the referenced node's value.""" + self.set_value(self._reference.value()) + if self._outgoing and self._value: + await self._outgoing.find(self._value['id']) + await self.run_todo_next() + + async def find(self, id_: str, hop: int = 0) -> None: + """Find by ID, only matching if it equals the referenced node's ID.""" + referenced = self._reference.value() + if referenced is None or id_ != referenced.get('id'): + return + self.set_value(referenced) + if self._outgoing and self._value: + await self._outgoing.find(self._value['id'], hop) + await self.run_todo_next() diff --git a/flowquery-py/src/graph/pattern.py b/flowquery-py/src/graph/pattern.py new file mode 100644 index 0000000..151479c --- /dev/null +++ b/flowquery-py/src/graph/pattern.py @@ -0,0 +1,125 @@ +"""Graph pattern representation for FlowQuery.""" + +from typing import Any, Generator, List, Optional, TYPE_CHECKING, Union + +from ..parsing.ast_node import ASTNode + +if TYPE_CHECKING: + from .node import Node + from .relationship import Relationship + + +class Pattern(ASTNode): + """Represents a graph pattern for matching.""" + + def __init__(self): + super().__init__() + self._identifier: Optional[str] = None + self._chain: List[Union['Node', 'Relationship']] = [] + + @property + def identifier(self) -> Optional[str]: + return self._identifier + + @identifier.setter + def identifier(self, value: str) -> None: + self._identifier = value + + @property + def chain(self) -> List[Union['Node', 'Relationship']]: + return self._chain + + @property + def elements(self) -> List[ASTNode]: + return self._chain + + def add_element(self, element: Union['Node', 'Relationship']) -> None: + from .node import Node + from .relationship import Relationship + + if (len(self._chain) > 0 and + type(self._chain[-1]) == type(element)): + raise ValueError("Cannot add two consecutive elements of the same type to the graph pattern") + + if len(self._chain) > 0: + last = self._chain[-1] + if isinstance(last, Node) and isinstance(element, Relationship): + last.outgoing = element + element.source = last + if isinstance(last, Relationship) and isinstance(element, Node): + last.target = element + element.incoming = last + + self._chain.append(element) + self.add_child(element) + + @property + def start_node(self) -> 'Node': + from .node import Node + if len(self._chain) == 0: + raise ValueError("Pattern is empty") + first = self._chain[0] + if isinstance(first, Node): + return first + raise ValueError("Pattern does not start with a node") + + @property + def end_node(self) -> 'Node': + from .node import Node + if len(self._chain) == 0: + raise ValueError("Pattern is empty") + last = self._chain[-1] + if isinstance(last, Node): + return last + raise ValueError("Pattern does not end with a node") + + def first_node(self) -> Optional['Node']: + if len(self._chain) > 0: + return self._chain[0] + return None + + def value(self) -> List[Any]: + return list(self.values()) + + def values(self) -> Generator[Any, None, None]: + from .node import Node + from .relationship import Relationship + + for element in self._chain: + if isinstance(element, Node): + yield element.value() + elif isinstance(element, Relationship): + i = 0 + for match in element.matches: + yield match + if i < len(element.matches) - 1: + yield match["endNode"] + i += 1 + + async def fetch_data(self) -> None: + """Loads data from the database for all elements.""" + from .database import Database + from .node import Node + from .relationship import Relationship + from .node_reference import NodeReference + from .relationship_reference import RelationshipReference + from .node_data import NodeData + from .relationship_data import RelationshipData + + db = Database.get_instance() + for element in self._chain: + if isinstance(element, (NodeReference, RelationshipReference)): + continue + data = await db.get_data(element) + if isinstance(element, Node): + element.set_data(data) + elif isinstance(element, Relationship): + element.set_data(data) + + async def initialize(self) -> None: + await self.fetch_data() + + async def traverse(self) -> None: + first = self.first_node() + if first: + await first.next() diff --git a/flowquery-py/src/graph/pattern_expression.py b/flowquery-py/src/graph/pattern_expression.py new file mode 100644 index 0000000..3ebefd2 --- /dev/null +++ b/flowquery-py/src/graph/pattern_expression.py @@ -0,0 +1,62 @@ +"""Pattern expression for FlowQuery.""" + +from typing import Any + +from ..parsing.ast_node import ASTNode +from .node_reference import NodeReference +from .pattern import Pattern + + +class PatternExpression(Pattern): + """Represents a pattern expression that can be evaluated. + + PatternExpression is used in WHERE clauses to test whether a graph pattern + exists. It evaluates to True if the pattern is matched, False otherwise. + """ + + def __init__(self): + super().__init__() + self._fetched: bool = False + self._evaluation: bool = False + + def add_element(self, element) -> None: + """Add an element to the pattern, ensuring it starts with a NodeReference.""" + if len(self._chain) == 0 and not isinstance(element, NodeReference): + raise ValueError("PatternExpression must start with a NodeReference") + super().add_element(element) + + @property + def identifier(self): + return None + + @identifier.setter + def identifier(self, value): + raise ValueError("Cannot set identifier on PatternExpression") + + async def fetch_data(self) -> None: + """Fetches data for the pattern expression with caching.""" + if self._fetched: + return + await super().fetch_data() + self._fetched = True + + async def evaluate(self) -> None: + """Evaluates the pattern expression by traversing the graph. + + Sets _evaluation to True if the pattern is matched, False otherwise. + """ + self._evaluation = False + + async def set_evaluation_true(): + self._evaluation = True + + self.end_node.todo_next = set_evaluation_true + await self.start_node.next() + + def value(self) -> bool: + """Returns the result of the pattern evaluation.""" + return self._evaluation + + def is_operand(self) -> bool: + """PatternExpression is an operand in expressions.""" + return True diff --git a/flowquery-py/src/graph/patterns.py b/flowquery-py/src/graph/patterns.py new file mode 100644 index 0000000..29c10b8 --- /dev/null +++ b/flowquery-py/src/graph/patterns.py @@ -0,0 +1,42 @@ +"""Collection of graph patterns for FlowQuery.""" + +from typing import Awaitable, Callable, List, Optional + +from .pattern import Pattern + + +class Patterns: + """Manages a collection of graph patterns.""" + + def __init__(self, patterns: Optional[List[Pattern]] = None): + self._patterns = patterns or [] + self._to_do_next: Optional[Callable[[], Awaitable[None]]] = None + + @property + def patterns(self) -> List[Pattern]: + return self._patterns + + @property + def to_do_next(self) -> Optional[Callable[[], Awaitable[None]]]: + return self._to_do_next + + @to_do_next.setter + def to_do_next(self, func: Optional[Callable[[], Awaitable[None]]]) -> None: + self._to_do_next = func + if self._patterns: + self._patterns[-1].end_node.todo_next = func + + async def initialize(self) -> None: + previous: Optional[Pattern] = None + for pattern in self._patterns: + await pattern.fetch_data() # Ensure data is loaded + if previous is not None: + # Chain the patterns together + async def next_pattern_start(p=pattern): + await p.start_node.next() + previous.end_node.todo_next = next_pattern_start + previous = pattern + + async def traverse(self) -> None: + if self._patterns: + await self._patterns[0].start_node.next() diff --git a/flowquery-py/src/graph/physical_node.py b/flowquery-py/src/graph/physical_node.py new file mode 100644 index 0000000..de1952a --- /dev/null +++ b/flowquery-py/src/graph/physical_node.py @@ -0,0 +1,40 @@ +"""Physical node representation for FlowQuery.""" + +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from ..parsing.ast_node import ASTNode + +from .node import Node + + +class PhysicalNode(Node): + """Represents a physical node in the graph database.""" + + def __init__(self, id_: Optional[str], label: str, properties: Optional[Dict[str, Any]] = None): + super().__init__(id_, label) + # Store additional physical properties in a separate dict + # (Node.properties is for Expression-based pattern properties) + self._physical_properties = properties or {} + self._statement: Optional["ASTNode"] = None + + @property + def physical_properties(self) -> Dict[str, Any]: + """Get the physical properties (values, not expressions).""" + return self._physical_properties + + @property + def statement(self) -> Optional["ASTNode"]: + return self._statement + + @statement.setter + def statement(self, value: Optional["ASTNode"]) -> None: + self._statement = value + + async def data(self) -> List[Dict[str, Any]]: + if self._statement is None: + raise ValueError("Statement is null") + from ..compute.runner import Runner + runner = Runner(ast=self._statement) + await runner.run() + return runner.results diff --git a/flowquery-py/src/graph/physical_relationship.py b/flowquery-py/src/graph/physical_relationship.py new file mode 100644 index 0000000..e36da42 --- /dev/null +++ b/flowquery-py/src/graph/physical_relationship.py @@ -0,0 +1,36 @@ +"""Physical relationship representation for FlowQuery.""" + +from __future__ import annotations +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +from .relationship import Relationship + +if TYPE_CHECKING: + from ..parsing.ast_node import ASTNode + + +class PhysicalRelationship(Relationship): + """Represents a physical relationship in the graph database.""" + + def __init__(self): + super().__init__() + self._statement: Optional[ASTNode] = None + + @property + def statement(self) -> Optional[ASTNode]: + """Get the statement for this relationship.""" + return self._statement + + @statement.setter + def statement(self, value: Optional[ASTNode]) -> None: + """Set the statement for this relationship.""" + self._statement = value + + async def data(self) -> List[Dict[str, Any]]: + """Execute the statement and return results.""" + if self._statement is None: + raise ValueError("Statement is null") + from ..compute.runner import Runner + runner = Runner(None, self._statement) + await runner.run() + return runner.results diff --git a/flowquery-py/src/graph/relationship.py b/flowquery-py/src/graph/relationship.py new file mode 100644 index 0000000..40968b3 --- /dev/null +++ b/flowquery-py/src/graph/relationship.py @@ -0,0 +1,135 @@ +"""Graph relationship representation for FlowQuery.""" + +from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union + +from ..parsing.ast_node import ASTNode +from .hops import Hops +from .relationship_match_collector import RelationshipMatchCollector, RelationshipMatchRecord + +if TYPE_CHECKING: + from .node import Node + from .relationship_data import RelationshipData, RelationshipRecord + + +class Relationship(ASTNode): + """Represents a relationship in a graph pattern.""" + + def __init__(self): + super().__init__() + self._identifier: Optional[str] = None + self._type: Optional[str] = None + self._hops: Hops = Hops() + self._source: Optional['Node'] = None + self._target: Optional['Node'] = None + self._data: Optional['RelationshipData'] = None + self._value: Optional[Union[RelationshipMatchRecord, List[RelationshipMatchRecord]]] = None + self._matches: RelationshipMatchCollector = RelationshipMatchCollector() + self._properties: Dict[str, Any] = {} + + @property + def identifier(self) -> Optional[str]: + return self._identifier + + @identifier.setter + def identifier(self, value: str) -> None: + self._identifier = value + + @property + def type(self) -> Optional[str]: + return self._type + + @type.setter + def type(self, value: str) -> None: + self._type = value + + @property + def hops(self) -> Hops: + return self._hops + + @hops.setter + def hops(self, value: Hops) -> None: + self._hops = value + + @property + def properties(self) -> Dict[str, Any]: + """Get properties from relationship data.""" + if self._data: + return self._data.properties() or {} + return {} + + @property + def source(self) -> Optional['Node']: + return self._source + + @source.setter + def source(self, value: 'Node') -> None: + self._source = value + + @property + def target(self) -> Optional['Node']: + return self._target + + @target.setter + def target(self, value: 'Node') -> None: + self._target = value + + # Keep start/end aliases for backward compatibility + @property + def start(self) -> Optional['Node']: + return self._source + + @start.setter + def start(self, value: 'Node') -> None: + self._source = value + + @property + def end(self) -> Optional['Node']: + return self._target + + @end.setter + def end(self, value: 'Node') -> None: + self._target = value + + def set_data(self, data: Optional['RelationshipData']) -> None: + self._data = data + + def set_value(self, relationship: 'Relationship') -> None: + """Set value by pushing match to collector.""" + self._matches.push(relationship) + self._value = self._matches.value() + + def value(self) -> Optional[Union[RelationshipMatchRecord, List[RelationshipMatchRecord]]]: + return self._value + + @property + def matches(self) -> List[RelationshipMatchRecord]: + return self._matches.matches + + def set_end_node(self, node: 'Node') -> None: + """Set the end node for the current match.""" + self._matches.end_node = node + + async def find(self, left_id: str, hop: int = 0) -> None: + """Find relationships starting from the given node ID.""" + # Save original source node + original = self._source + if hop > 0: + # For hops greater than 0, the source becomes the target of the previous hop + self._source = self._target + if hop == 0: + self._data.reset() if self._data else None + + while self._data and self._data.find(left_id, hop): + data = self._data.current(hop) + if data and self._hops and hop >= self._hops.min: + self.set_value(self) + if self._target and 'right_id' in data: + await self._target.find(data['right_id'], hop) + if self._matches.is_circular(): + raise ValueError("Circular relationship detected") + if self._hops and hop + 1 < self._hops.max: + await self.find(data['right_id'], hop + 1) + self._matches.pop() + + # Restore original source node + self._source = original diff --git a/flowquery-py/src/graph/relationship_data.py b/flowquery-py/src/graph/relationship_data.py new file mode 100644 index 0000000..cb6d193 --- /dev/null +++ b/flowquery-py/src/graph/relationship_data.py @@ -0,0 +1,33 @@ +"""Relationship data class for FlowQuery.""" + +from typing import Any, Dict, List, Optional, TypedDict + +from .data import Data + + +class RelationshipRecord(TypedDict, total=False): + """Represents a relationship record from the database.""" + left_id: str + right_id: str + + +class RelationshipData(Data): + """Relationship data class extending Data with left_id-based indexing.""" + + def __init__(self, records: Optional[List[Dict[str, Any]]] = None): + super().__init__(records) + self._build_index("left_id") + + def find(self, left_id: str, hop: int = 0) -> bool: + """Find a relationship by start node ID.""" + return self._find(left_id, hop) + + def properties(self) -> Optional[Dict[str, Any]]: + """Get properties of current relationship, excluding left_id and right_id.""" + current = self.current() + if current: + props = dict(current) + props.pop("left_id", None) + props.pop("right_id", None) + return props + return None diff --git a/flowquery-py/src/graph/relationship_match_collector.py b/flowquery-py/src/graph/relationship_match_collector.py new file mode 100644 index 0000000..9c94c66 --- /dev/null +++ b/flowquery-py/src/graph/relationship_match_collector.py @@ -0,0 +1,77 @@ +"""Collector for relationship match records.""" + +from typing import Any, Dict, List, Optional, TYPE_CHECKING, TypedDict, Union + +if TYPE_CHECKING: + from .relationship import Relationship + from .node import Node + + +class RelationshipMatchRecord(TypedDict, total=False): + """Represents a matched relationship record.""" + type: str + startNode: Dict[str, Any] + endNode: Optional[Dict[str, Any]] + properties: Dict[str, Any] + + +class RelationshipMatchCollector: + """Collects relationship matches during graph traversal.""" + + def __init__(self): + self._matches: List[RelationshipMatchRecord] = [] + self._node_ids: List[str] = [] + + def push(self, relationship: 'Relationship') -> RelationshipMatchRecord: + """Push a new match onto the collector.""" + match: RelationshipMatchRecord = { + "type": relationship.type or "", + "startNode": relationship.source.value() if relationship.source else {}, + "endNode": None, + "properties": relationship.properties, + } + self._matches.append(match) + start_node_value = match.get("startNode", {}) + if isinstance(start_node_value, dict): + self._node_ids.append(start_node_value.get("id", "")) + return match + + @property + def end_node(self) -> Optional[Dict[str, Any]]: + """Get the end node of the last match.""" + if self._matches: + return self._matches[-1].get("endNode") + return None + + @end_node.setter + def end_node(self, node: 'Node') -> None: + """Set the end node of the last match.""" + if self._matches: + self._matches[-1]["endNode"] = node.value() + + def pop(self) -> Optional[RelationshipMatchRecord]: + """Pop the last match from the collector.""" + if self._node_ids: + self._node_ids.pop() + if self._matches: + return self._matches.pop() + return None + + def value(self) -> Optional[Union[RelationshipMatchRecord, List[RelationshipMatchRecord]]]: + """Get the current value(s).""" + if len(self._matches) == 0: + return None + elif len(self._matches) == 1: + return self._matches[0] + else: + return self._matches + + @property + def matches(self) -> List[RelationshipMatchRecord]: + """Get all matches.""" + return self._matches + + def is_circular(self) -> bool: + """Check if the collected relationships form a circular pattern.""" + seen = set(self._node_ids) + return len(seen) < len(self._node_ids) diff --git a/flowquery-py/src/graph/relationship_reference.py b/flowquery-py/src/graph/relationship_reference.py new file mode 100644 index 0000000..9419c4f --- /dev/null +++ b/flowquery-py/src/graph/relationship_reference.py @@ -0,0 +1,21 @@ +"""Relationship reference for FlowQuery.""" + +from .relationship import Relationship +from ..parsing.ast_node import ASTNode + + +class RelationshipReference(Relationship): + """Represents a reference to an existing relationship variable.""" + + def __init__(self, relationship: Relationship, referred: ASTNode): + super().__init__() + self._referred = referred + if relationship.type: + self.type = relationship.type + + @property + def referred(self) -> ASTNode: + return self._referred + + def value(self): + return self._referred.value() if self._referred else None diff --git a/flowquery-py/src/io/__init__.py b/flowquery-py/src/io/__init__.py new file mode 100644 index 0000000..a32ae42 --- /dev/null +++ b/flowquery-py/src/io/__init__.py @@ -0,0 +1,5 @@ +"""IO module for FlowQuery.""" + +from .command_line import CommandLine + +__all__ = ["CommandLine"] diff --git a/flowquery-py/src/io/command_line.py b/flowquery-py/src/io/command_line.py new file mode 100644 index 0000000..f17c666 --- /dev/null +++ b/flowquery-py/src/io/command_line.py @@ -0,0 +1,67 @@ +"""Interactive command-line interface for FlowQuery.""" + +import asyncio +from typing import Optional + +from ..compute.runner import Runner + + +class CommandLine: + """Interactive command-line interface for FlowQuery. + + Provides a REPL (Read-Eval-Print Loop) for executing FlowQuery statements + and displaying results. + + Example: + cli = CommandLine() + cli.loop() # Starts interactive mode + """ + + def loop(self) -> None: + """Starts the interactive command loop. + + Prompts the user for FlowQuery statements, executes them, and displays results. + Type "exit" to quit the loop. End multi-line queries with ";". + """ + print('Welcome to FlowQuery! Type "exit" to quit.') + print('End queries with ";" to execute. Multi-line input supported.') + + while True: + try: + lines = [] + prompt = "> " + while True: + line = input(prompt) + if line.strip() == "exit": + print("Exiting FlowQuery.") + return + lines.append(line) + user_input = "\n".join(lines) + if user_input.strip().endswith(";"): + break + prompt = "... " + except EOFError: + break + + if user_input.strip() == "": + continue + + # Remove the termination semicolon before sending to the engine + user_input = user_input.strip().rstrip(";") + + try: + runner = Runner(user_input) + asyncio.run(self._execute(runner)) + except Exception as e: + print(f"Error: {e}") + + print("Exiting FlowQuery.") + + async def _execute(self, runner: Runner) -> None: + await runner.run() + print(runner.results) + + +def main() -> None: + """Entry point for the flowquery CLI command.""" + CommandLine().loop() diff --git a/flowquery-py/src/parsing/__init__.py b/flowquery-py/src/parsing/__init__.py new file mode 100644 index 0000000..8e2be71 --- /dev/null +++ b/flowquery-py/src/parsing/__init__.py @@ -0,0 +1,17 @@ +"""Parsing module for FlowQuery.""" + +from .ast_node import ASTNode +from .context import Context +from .alias import Alias +from .alias_option import AliasOption +from .base_parser import BaseParser +from .parser import Parser + +__all__ = [ + "ASTNode", + "Context", + "Alias", + "AliasOption", + "BaseParser", + "Parser", +] diff --git a/flowquery-py/src/parsing/alias.py b/flowquery-py/src/parsing/alias.py new file mode 100644 index 0000000..8c7c2be --- /dev/null +++ b/flowquery-py/src/parsing/alias.py @@ -0,0 +1,20 @@ +"""Alias node for FlowQuery AST.""" + +from .ast_node import ASTNode + + +class Alias(ASTNode): + """Represents an alias in the FlowQuery AST.""" + + def __init__(self, alias: str): + super().__init__() + self._alias = alias + + def __str__(self) -> str: + return f"Alias ({self._alias})" + + def get_alias(self) -> str: + return self._alias + + def value(self) -> str: + return self._alias diff --git a/flowquery-py/src/parsing/alias_option.py b/flowquery-py/src/parsing/alias_option.py new file mode 100644 index 0000000..b4b7e95 --- /dev/null +++ b/flowquery-py/src/parsing/alias_option.py @@ -0,0 +1,11 @@ +"""Alias option enumeration for FlowQuery parsing.""" + +from enum import Enum + + +class AliasOption(Enum): + """Enumeration of alias options for parsing.""" + + NOT_ALLOWED = 0 + OPTIONAL = 1 + REQUIRED = 2 diff --git a/flowquery-py/src/parsing/ast_node.py b/flowquery-py/src/parsing/ast_node.py new file mode 100644 index 0000000..c431ee2 --- /dev/null +++ b/flowquery-py/src/parsing/ast_node.py @@ -0,0 +1,146 @@ +"""Represents a node in the Abstract Syntax Tree (AST).""" + +from __future__ import annotations +from typing import List, Any, Generator, Optional + + +class ASTNode: + """Represents a node in the Abstract Syntax Tree (AST). + + The AST is a tree representation of the parsed FlowQuery statement structure. + Each node can have children and maintains a reference to its parent. + + Example: + root = ASTNode() + child = ASTNode() + root.add_child(child) + """ + + def __init__(self): + self._parent: Optional[ASTNode] = None + self.children: List[ASTNode] = [] + + def add_child(self, child: ASTNode) -> None: + """Adds a child node to this node and sets the child's parent reference. + + Args: + child: The child node to add + """ + child._parent = self + self.children.append(child) + + def first_child(self) -> ASTNode: + """Returns the first child node. + + Returns: + The first child node + + Raises: + ValueError: If the node has no children + """ + if len(self.children) == 0: + raise ValueError('Expected child') + return self.children[0] + + def last_child(self) -> ASTNode: + """Returns the last child node. + + Returns: + The last child node + + Raises: + ValueError: If the node has no children + """ + if len(self.children) == 0: + raise ValueError('Expected child') + return self.children[-1] + + def get_children(self) -> List[ASTNode]: + """Returns all child nodes. + + Returns: + Array of child nodes + """ + return self.children + + def child_count(self) -> int: + """Returns the number of child nodes. + + Returns: + The count of children + """ + return len(self.children) + + def value(self) -> Any: + """Returns the value of this node. Override in subclasses to provide specific values. + + Returns: + The node's value, or None if not applicable + """ + return None + + def is_operator(self) -> bool: + """Checks if this node represents an operator. + + Returns: + True if this is an operator node, False otherwise + """ + return False + + def is_operand(self) -> bool: + """Checks if this node represents an operand (the opposite of an operator). + + Returns: + True if this is an operand node, False otherwise + """ + return not self.is_operator() + + @property + def precedence(self) -> int: + """Gets the operator precedence for this node. Higher values indicate higher precedence. + + Returns: + The precedence value (0 for non-operators) + """ + return 0 + + @property + def left_associative(self) -> bool: + """Indicates whether this operator is left-associative. + + Returns: + True if left-associative, False otherwise + """ + return False + + def print(self) -> str: + """Prints a string representation of the AST tree starting from this node. + + Returns: + A formatted string showing the tree structure + """ + return '\n'.join(self._print(0)) + + def _print(self, indent: int) -> Generator[str, None, None]: + """Generator function for recursively printing the tree structure. + + Args: + indent: The current indentation level + + Yields: + Lines representing each node in the tree + """ + if indent == 0: + yield self.__class__.__name__ + elif indent > 0: + yield '-' * indent + f' {self}' + for child in self.children: + yield from child._print(indent + 1) + + def __str__(self) -> str: + """Returns a string representation of this node. Override in subclasses for custom formatting. + + Returns: + The string representation + """ + return self.__class__.__name__ diff --git a/flowquery-py/src/parsing/base_parser.py b/flowquery-py/src/parsing/base_parser.py new file mode 100644 index 0000000..1a6bde9 --- /dev/null +++ b/flowquery-py/src/parsing/base_parser.py @@ -0,0 +1,84 @@ +"""Base class for parsers providing common token manipulation functionality.""" + +from typing import List, Optional + +from ..tokenization.token import Token +from ..tokenization.tokenizer import Tokenizer + + +class BaseParser: + """Base class for parsers providing common token manipulation functionality. + + This class handles tokenization and provides utility methods for navigating + through tokens, peeking ahead, and checking token sequences. + """ + + def __init__(self, tokens: Optional[List[Token]] = None): + self._tokens: List[Token] = tokens or [] + self._token_index: int = 0 + + def tokenize(self, statement: str) -> None: + """Tokenizes a statement and initializes the token array. + + Args: + statement: The input statement to tokenize + """ + self._tokens = Tokenizer(statement).tokenize() + self._token_index = 0 + + def set_next_token(self) -> None: + """Advances to the next token in the sequence.""" + self._token_index += 1 + + def peek(self) -> Optional[Token]: + """Peeks at the next token without advancing the current position. + + Returns: + The next token, or None if at the end of the token stream + """ + if self._token_index + 1 >= len(self._tokens): + return None + return self._tokens[self._token_index + 1] + + def ahead(self, tokens: List[Token], skip_whitespace_and_comments: bool = True) -> bool: + """Checks if a sequence of tokens appears ahead in the token stream. + + Args: + tokens: The sequence of tokens to look for + skip_whitespace_and_comments: Whether to skip whitespace and comments when matching + + Returns: + True if the token sequence is found ahead, False otherwise + """ + j = 0 + for i in range(self._token_index, len(self._tokens)): + if skip_whitespace_and_comments and self._tokens[i].is_whitespace_or_comment(): + continue + if not self._tokens[i].equals(tokens[j]): + return False + j += 1 + if j == len(tokens): + break + return j == len(tokens) + + @property + def token(self) -> Token: + """Gets the current token. + + Returns: + The current token, or EOF if at the end + """ + if self._token_index >= len(self._tokens): + return Token.EOF + return self._tokens[self._token_index] + + @property + def previous_token(self) -> Token: + """Gets the previous token. + + Returns: + The previous token, or EOF if at the beginning + """ + if self._token_index - 1 < 0: + return Token.EOF + return self._tokens[self._token_index - 1] diff --git a/flowquery-py/src/parsing/components/__init__.py b/flowquery-py/src/parsing/components/__init__.py new file mode 100644 index 0000000..d9a4396 --- /dev/null +++ b/flowquery-py/src/parsing/components/__init__.py @@ -0,0 +1,19 @@ +"""Components module for FlowQuery parsing.""" + +from .csv import CSV +from .json import JSON +from .text import Text +from .from_ import From +from .headers import Headers +from .post import Post +from .null import Null + +__all__ = [ + "CSV", + "JSON", + "Text", + "From", + "Headers", + "Post", + "Null", +] diff --git a/flowquery-py/src/parsing/components/csv.py b/flowquery-py/src/parsing/components/csv.py new file mode 100644 index 0000000..d43aa30 --- /dev/null +++ b/flowquery-py/src/parsing/components/csv.py @@ -0,0 +1,8 @@ +"""CSV component node.""" + +from ..ast_node import ASTNode + + +class CSV(ASTNode): + """Represents a CSV data type marker in LOAD operations.""" + pass diff --git a/flowquery-py/src/parsing/components/from_.py b/flowquery-py/src/parsing/components/from_.py new file mode 100644 index 0000000..e6c956c --- /dev/null +++ b/flowquery-py/src/parsing/components/from_.py @@ -0,0 +1,10 @@ +"""From component node.""" + +from ..ast_node import ASTNode + + +class From(ASTNode): + """Represents a FROM clause in LOAD operations.""" + + def value(self) -> str: + return self.children[0].value() diff --git a/flowquery-py/src/parsing/components/headers.py b/flowquery-py/src/parsing/components/headers.py new file mode 100644 index 0000000..98f0a23 --- /dev/null +++ b/flowquery-py/src/parsing/components/headers.py @@ -0,0 +1,12 @@ +"""Headers component node.""" + +from typing import Dict + +from ..ast_node import ASTNode + + +class Headers(ASTNode): + """Represents a HEADERS clause in LOAD operations.""" + + def value(self) -> Dict: + return self.first_child().value() or {} diff --git a/flowquery-py/src/parsing/components/json.py b/flowquery-py/src/parsing/components/json.py new file mode 100644 index 0000000..9f6fef1 --- /dev/null +++ b/flowquery-py/src/parsing/components/json.py @@ -0,0 +1,8 @@ +"""JSON component node.""" + +from ..ast_node import ASTNode + + +class JSON(ASTNode): + """Represents a JSON data type marker in LOAD operations.""" + pass diff --git a/flowquery-py/src/parsing/components/null.py b/flowquery-py/src/parsing/components/null.py new file mode 100644 index 0000000..89ab287 --- /dev/null +++ b/flowquery-py/src/parsing/components/null.py @@ -0,0 +1,10 @@ +"""Null component node.""" + +from ..ast_node import ASTNode + + +class Null(ASTNode): + """Represents a NULL value in the AST.""" + + def value(self): + return None diff --git a/flowquery-py/src/parsing/components/post.py b/flowquery-py/src/parsing/components/post.py new file mode 100644 index 0000000..0d31c90 --- /dev/null +++ b/flowquery-py/src/parsing/components/post.py @@ -0,0 +1,8 @@ +"""Post component node.""" + +from ..ast_node import ASTNode + + +class Post(ASTNode): + """Represents a POST clause in LOAD operations.""" + pass diff --git a/flowquery-py/src/parsing/components/text.py b/flowquery-py/src/parsing/components/text.py new file mode 100644 index 0000000..5a15462 --- /dev/null +++ b/flowquery-py/src/parsing/components/text.py @@ -0,0 +1,8 @@ +"""Text component node.""" + +from ..ast_node import ASTNode + + +class Text(ASTNode): + """Represents a Text data type marker in LOAD operations.""" + pass diff --git a/flowquery-py/src/parsing/context.py b/flowquery-py/src/parsing/context.py new file mode 100644 index 0000000..a557e59 --- /dev/null +++ b/flowquery-py/src/parsing/context.py @@ -0,0 +1,50 @@ +"""Maintains a stack of AST nodes to track parsing context.""" + +from typing import List, Optional, Type + +from .ast_node import ASTNode + + +class Context: + """Maintains a stack of AST nodes to track parsing context. + + Used during parsing to maintain the current context and check for specific node types + in the parsing hierarchy, which helps with context-sensitive parsing decisions. + + Example: + context = Context() + context.push(node) + has_return = context.contains_type(Return) + """ + + def __init__(self): + self._nodes: List[ASTNode] = [] + + def push(self, node: ASTNode) -> None: + """Pushes a node onto the context stack. + + Args: + node: The AST node to push + """ + self._nodes.append(node) + + def pop(self) -> Optional[ASTNode]: + """Pops the top node from the context stack. + + Returns: + The popped node, or None if the stack is empty + """ + if len(self._nodes) == 0: + return None + return self._nodes.pop() + + def contains_type(self, type_: Type[ASTNode]) -> bool: + """Checks if the nodes stack contains a node of the specified type. + + Args: + type_: The class of the node type to search for + + Returns: + True if a node of the specified type is found in the stack, False otherwise + """ + return any(isinstance(v, type_) for v in self._nodes) diff --git a/flowquery-py/src/parsing/data_structures/__init__.py b/flowquery-py/src/parsing/data_structures/__init__.py new file mode 100644 index 0000000..804fb08 --- /dev/null +++ b/flowquery-py/src/parsing/data_structures/__init__.py @@ -0,0 +1,15 @@ +"""Data structures module for FlowQuery parsing.""" + +from .associative_array import AssociativeArray +from .json_array import JSONArray +from .key_value_pair import KeyValuePair +from .lookup import Lookup +from .range_lookup import RangeLookup + +__all__ = [ + "AssociativeArray", + "JSONArray", + "KeyValuePair", + "Lookup", + "RangeLookup", +] diff --git a/flowquery-py/src/parsing/data_structures/associative_array.py b/flowquery-py/src/parsing/data_structures/associative_array.py new file mode 100644 index 0000000..57a09d4 --- /dev/null +++ b/flowquery-py/src/parsing/data_structures/associative_array.py @@ -0,0 +1,41 @@ +"""Represents an associative array (object/dictionary) in the AST.""" + +from typing import Any, Dict + +from ..ast_node import ASTNode +from .key_value_pair import KeyValuePair + + +class AssociativeArray(ASTNode): + """Represents an associative array (object/dictionary) in the AST. + + Associative arrays map string keys to values, similar to JSON objects. + + Example: + # For { name: "Alice", age: 30 } + obj = AssociativeArray() + obj.add_key_value(KeyValuePair("name", name_expr)) + obj.add_key_value(KeyValuePair("age", age_expr)) + """ + + def add_key_value(self, key_value_pair: KeyValuePair) -> None: + """Adds a key-value pair to the associative array. + + Args: + key_value_pair: The key-value pair to add + """ + self.add_child(key_value_pair) + + def __str__(self) -> str: + return 'AssociativeArray' + + def _value(self): + for child in self.children: + key_value = child + yield {key_value.key: key_value._value} + + def value(self) -> Dict[str, Any]: + result = {} + for item in self._value(): + result.update(item) + return result diff --git a/flowquery-py/src/parsing/data_structures/json_array.py b/flowquery-py/src/parsing/data_structures/json_array.py new file mode 100644 index 0000000..ade7ff4 --- /dev/null +++ b/flowquery-py/src/parsing/data_structures/json_array.py @@ -0,0 +1,30 @@ +"""Represents a JSON array in the AST.""" + +from typing import Any, List + +from ..ast_node import ASTNode + + +class JSONArray(ASTNode): + """Represents a JSON array in the AST. + + JSON arrays are ordered collections of values. + + Example: + # For [1, 2, 3] + arr = JSONArray() + arr.add_value(Number("1")) + arr.add_value(Number("2")) + arr.add_value(Number("3")) + """ + + def add_value(self, value: ASTNode) -> None: + """Adds a value to the array. + + Args: + value: The AST node representing the value to add + """ + self.add_child(value) + + def value(self) -> List[Any]: + return [child.value() for child in self.children] diff --git a/flowquery-py/src/parsing/data_structures/key_value_pair.py b/flowquery-py/src/parsing/data_structures/key_value_pair.py new file mode 100644 index 0000000..3ca9831 --- /dev/null +++ b/flowquery-py/src/parsing/data_structures/key_value_pair.py @@ -0,0 +1,38 @@ +"""Represents a key-value pair in an associative array.""" + +from typing import Any + +from ..ast_node import ASTNode +from ..expressions.string import String + + +class KeyValuePair(ASTNode): + """Represents a key-value pair in an associative array. + + Used to build object literals in FlowQuery. + + Example: + kvp = KeyValuePair("name", String("Alice")) + """ + + def __init__(self, key: str, value: ASTNode): + """Creates a new key-value pair. + + Args: + key: The key string + value: The AST node representing the value + """ + super().__init__() + self.add_child(String(key)) + self.add_child(value) + + @property + def key(self) -> str: + return self.children[0].value() + + @property + def _value(self) -> Any: + return self.children[1].value() + + def __str__(self) -> str: + return "KeyValuePair" diff --git a/flowquery-py/src/parsing/data_structures/lookup.py b/flowquery-py/src/parsing/data_structures/lookup.py new file mode 100644 index 0000000..374b24d --- /dev/null +++ b/flowquery-py/src/parsing/data_structures/lookup.py @@ -0,0 +1,49 @@ +"""Represents a lookup operation (array/object indexing) in the AST.""" + +from typing import Any + +from ..ast_node import ASTNode + + +class Lookup(ASTNode): + """Represents a lookup operation (array/object indexing) in the AST. + + Lookups access elements from arrays or properties from objects using an index or key. + + Example: + # For array[0] or obj.property or obj["key"] + lookup = Lookup() + lookup.variable = array_or_obj_node + lookup.index = index_node + """ + + @property + def index(self) -> ASTNode: + return self.children[0] + + @index.setter + def index(self, index: ASTNode) -> None: + self.add_child(index) + + @property + def variable(self) -> ASTNode: + return self.children[1] + + @variable.setter + def variable(self, variable: ASTNode) -> None: + self.add_child(variable) + + def is_operand(self) -> bool: + return True + + def value(self) -> Any: + obj = self.variable.value() + key = self.index.value() + # Try dict-like access first, then fall back to attribute access for objects + try: + return obj[key] + except (TypeError, KeyError): + # For objects with attributes (like dataclasses), use getattr + if hasattr(obj, key): + return getattr(obj, key) + raise diff --git a/flowquery-py/src/parsing/data_structures/range_lookup.py b/flowquery-py/src/parsing/data_structures/range_lookup.py new file mode 100644 index 0000000..d2882cd --- /dev/null +++ b/flowquery-py/src/parsing/data_structures/range_lookup.py @@ -0,0 +1,42 @@ +"""Represents a range lookup operation in the AST.""" + +from typing import Any, List + +from ..ast_node import ASTNode + + +class RangeLookup(ASTNode): + """Represents a range lookup (array slicing) operation in the AST.""" + + @property + def from_(self) -> ASTNode: + return self.children[0] + + @from_.setter + def from_(self, from_: ASTNode) -> None: + self.add_child(from_) + + @property + def to(self) -> ASTNode: + return self.children[1] + + @to.setter + def to(self, to: ASTNode) -> None: + self.add_child(to) + + @property + def variable(self) -> ASTNode: + return self.children[2] + + @variable.setter + def variable(self, variable: ASTNode) -> None: + self.add_child(variable) + + def is_operand(self) -> bool: + return True + + def value(self) -> List[Any]: + array = self.variable.value() + from_val = self.from_.value() or 0 + to_val = self.to.value() or len(array) + return array[from_val:to_val] diff --git a/flowquery-py/src/parsing/expressions/__init__.py b/flowquery-py/src/parsing/expressions/__init__.py new file mode 100644 index 0000000..44392b8 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/__init__.py @@ -0,0 +1,57 @@ +"""Expressions module for FlowQuery parsing.""" + +from .expression import Expression +from .boolean import Boolean +from .number import Number +from .string import String +from .identifier import Identifier +from .reference import Reference +from .f_string import FString +from .expression_map import ExpressionMap +from .operator import ( + Operator, + Add, + Subtract, + Multiply, + Divide, + Modulo, + Power, + Equals, + NotEquals, + GreaterThan, + LessThan, + GreaterThanOrEqual, + LessThanOrEqual, + And, + Or, + Not, + Is, +) + +__all__ = [ + "Expression", + "Boolean", + "Number", + "String", + "Identifier", + "Reference", + "FString", + "ExpressionMap", + "Operator", + "Add", + "Subtract", + "Multiply", + "Divide", + "Modulo", + "Power", + "Equals", + "NotEquals", + "GreaterThan", + "LessThan", + "GreaterThanOrEqual", + "LessThanOrEqual", + "And", + "Or", + "Not", + "Is", +] diff --git a/flowquery-py/src/parsing/expressions/boolean.py b/flowquery-py/src/parsing/expressions/boolean.py new file mode 100644 index 0000000..e0089e6 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/boolean.py @@ -0,0 +1,20 @@ +"""Represents a boolean literal in the AST.""" + +from ..ast_node import ASTNode + + +class Boolean(ASTNode): + """Represents a boolean literal in the AST.""" + + def __init__(self, value: str): + super().__init__() + _value = value.upper() + if _value == "TRUE": + self._value = True + elif _value == "FALSE": + self._value = False + else: + raise ValueError(f"Invalid boolean value: {value}") + + def value(self) -> bool: + return self._value diff --git a/flowquery-py/src/parsing/expressions/expression.py b/flowquery-py/src/parsing/expressions/expression.py new file mode 100644 index 0000000..61bc379 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/expression.py @@ -0,0 +1,138 @@ +"""Represents an expression in the FlowQuery AST.""" + +from typing import Any, List, Optional, Generator, TYPE_CHECKING + +from ..ast_node import ASTNode + +if TYPE_CHECKING: + from ..functions.aggregate_function import AggregateFunction + from ...graph.pattern_expression import PatternExpression + + +class Expression(ASTNode): + """Represents an expression in the FlowQuery AST. + + Expressions are built using the Shunting Yard algorithm to handle operator + precedence and associativity. They can contain operands (numbers, strings, identifiers) + and operators (arithmetic, logical, comparison). + + Example: + expr = Expression() + expr.add_node(number_node) + expr.add_node(plus_operator) + expr.add_node(another_number_node) + expr.finish() + """ + + def __init__(self): + super().__init__() + self._operators: List[ASTNode] = [] + self._output: List[ASTNode] = [] + self._alias: Optional[str] = None + self._overridden: Any = None + self._reducers: Optional[List['AggregateFunction']] = None + self._patterns: Optional[List['PatternExpression']] = None + + def add_node(self, node: ASTNode) -> None: + """Adds a node (operand or operator) to the expression. + + Uses the Shunting Yard algorithm to maintain correct operator precedence. + + Args: + node: The AST node to add (operand or operator) + """ + # Implements the Shunting Yard algorithm + if node.is_operand(): + self._output.append(node) + elif node.is_operator(): + operator1 = node + while len(self._operators) > 0: + operator2 = self._operators[-1] + if (operator2.precedence > operator1.precedence or + (operator2.precedence == operator1.precedence and operator1.left_associative)): + self._output.append(operator2) + self._operators.pop() + else: + break + self._operators.append(operator1) + + def finish(self) -> None: + """Finalizes the expression by converting it to a tree structure. + + Should be called after all nodes have been added. + """ + while self._operators: + self._output.append(self._operators.pop()) + self.add_child(self._to_tree()) + + def _to_tree(self) -> ASTNode: + if not self._output: + return ASTNode() + node = self._output.pop() + if node.is_operator(): + rhs = self._to_tree() + lhs = self._to_tree() + node.add_child(lhs) + node.add_child(rhs) + return node + + def nodes_added(self) -> bool: + return len(self._operators) > 0 or len(self._output) > 0 + + def value(self) -> Any: + if self._overridden is not None: + return self._overridden + if self.child_count() != 1: + raise ValueError("Expected one child") + return self.children[0].value() + + def set_alias(self, alias: str) -> None: + self._alias = alias + + @property + def alias(self) -> Optional[str]: + from .reference import Reference + if isinstance(self.first_child(), Reference) and self._alias is None: + return self.first_child().identifier + return self._alias + + @alias.setter + def alias(self, value: str) -> None: + self._alias = value + + def __str__(self) -> str: + if self._alias is not None: + return f"Expression ({self._alias})" + return "Expression" + + def reducers(self) -> List['AggregateFunction']: + if self._reducers is None: + from ..functions.aggregate_function import AggregateFunction + self._reducers = list(self._extract(self, AggregateFunction)) + return self._reducers + + def patterns(self) -> List['PatternExpression']: + if self._patterns is None: + from ...graph.pattern_expression import PatternExpression + self._patterns = list(self._extract(self, PatternExpression)) + return self._patterns + + def _extract(self, node: ASTNode, of_type: type) -> Generator[Any, None, None]: + if isinstance(node, of_type): + yield node + for child in node.get_children(): + yield from self._extract(child, of_type) + + def mappable(self) -> bool: + return len(self.reducers()) == 0 + + def has_reducers(self) -> bool: + return len(self.reducers()) > 0 + + @property + def overridden(self) -> Any: + return self._overridden + + @overridden.setter + def overridden(self, value: Any) -> None: + self._overridden = value diff --git a/flowquery-py/src/parsing/expressions/expression_map.py b/flowquery-py/src/parsing/expressions/expression_map.py new file mode 100644 index 0000000..34fcf81 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/expression_map.py @@ -0,0 +1,26 @@ +"""Expression map for managing named expressions.""" + +from typing import Optional, List, TYPE_CHECKING + +if TYPE_CHECKING: + from .expression import Expression + + +class ExpressionMap: + """Maps expression aliases to their corresponding Expression objects.""" + + def __init__(self): + self._map: dict[str, Expression] = {} + + def get(self, alias: str) -> Optional['Expression']: + return self._map.get(alias) + + def has(self, alias: str) -> bool: + return alias in self._map + + def set_map(self, expressions: List['Expression']) -> None: + self._map.clear() + for expr in expressions: + if expr.alias is None: + continue + self._map[expr.alias] = expr diff --git a/flowquery-py/src/parsing/expressions/f_string.py b/flowquery-py/src/parsing/expressions/f_string.py new file mode 100644 index 0000000..122a46e --- /dev/null +++ b/flowquery-py/src/parsing/expressions/f_string.py @@ -0,0 +1,27 @@ +"""Represents a formatted string (f-string) in the AST.""" + +from typing import TYPE_CHECKING + +from ..ast_node import ASTNode + +if TYPE_CHECKING: + from .expression import Expression + + +class FString(ASTNode): + """Represents a formatted string (f-string) in the AST. + + F-strings allow embedding expressions within string literals. + Child nodes represent the parts of the f-string (literal strings and expressions). + + Example: + # For f"Hello {name}!" + fstr = FString() + fstr.add_child(String("Hello ")) + fstr.add_child(name_expression) + fstr.add_child(String("!")) + """ + + def value(self) -> str: + parts = self.get_children() + return "".join(str(part.value()) for part in parts) diff --git a/flowquery-py/src/parsing/expressions/identifier.py b/flowquery-py/src/parsing/expressions/identifier.py new file mode 100644 index 0000000..c22d8a2 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/identifier.py @@ -0,0 +1,20 @@ +"""Represents an identifier in the AST.""" + +from .string import String +from typing import Any + + +class Identifier(String): + """Represents an identifier in the AST. + + Identifiers are used for variable names, property names, and similar constructs. + + Example: + id = Identifier("myVariable") + """ + + def __str__(self) -> str: + return f"Identifier ({self._value})" + + def value(self) -> Any: + return super().value() diff --git a/flowquery-py/src/parsing/expressions/number.py b/flowquery-py/src/parsing/expressions/number.py new file mode 100644 index 0000000..5535be5 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/number.py @@ -0,0 +1,32 @@ +"""Represents a numeric literal in the AST.""" + +from ..ast_node import ASTNode + + +class Number(ASTNode): + """Represents a numeric literal in the AST. + + Parses string representations of numbers into integer or float values. + + Example: + num = Number("42") + print(num.value()) # 42 + """ + + def __init__(self, value: str): + """Creates a new Number node by parsing the string value. + + Args: + value: The string representation of the number + """ + super().__init__() + if '.' in value: + self._value = float(value) + else: + self._value = int(value) + + def value(self) -> float | int: + return self._value + + def __str__(self) -> str: + return f"{self.__class__.__name__} ({self._value})" diff --git a/flowquery-py/src/parsing/expressions/operator.py b/flowquery-py/src/parsing/expressions/operator.py new file mode 100644 index 0000000..1ab47f3 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/operator.py @@ -0,0 +1,169 @@ +"""Operator classes for FlowQuery expressions.""" + +from abc import ABC, abstractmethod +from typing import Any + +from ..ast_node import ASTNode + + +class Operator(ASTNode, ABC): + """Base class for all operators in FlowQuery.""" + + def __init__(self, precedence: int, left_associative: bool): + super().__init__() + self._precedence = precedence + self._left_associative = left_associative + + def is_operator(self) -> bool: + return True + + @property + def precedence(self) -> int: + return self._precedence + + @property + def left_associative(self) -> bool: + return self._left_associative + + @abstractmethod + def value(self) -> Any: + pass + + @property + def lhs(self) -> ASTNode: + return self.get_children()[0] + + @property + def rhs(self) -> ASTNode: + return self.get_children()[1] + + +class Add(Operator): + def __init__(self): + super().__init__(1, True) + + def value(self) -> Any: + return self.lhs.value() + self.rhs.value() + + +class Subtract(Operator): + def __init__(self): + super().__init__(1, True) + + def value(self) -> Any: + return self.lhs.value() - self.rhs.value() + + +class Multiply(Operator): + def __init__(self): + super().__init__(2, True) + + def value(self) -> Any: + return self.lhs.value() * self.rhs.value() + + +class Divide(Operator): + def __init__(self): + super().__init__(2, True) + + def value(self) -> Any: + return self.lhs.value() / self.rhs.value() + + +class Modulo(Operator): + def __init__(self): + super().__init__(2, True) + + def value(self) -> Any: + return self.lhs.value() % self.rhs.value() + + +class Power(Operator): + def __init__(self): + super().__init__(3, False) + + def value(self) -> Any: + return self.lhs.value() ** self.rhs.value() + + +class Equals(Operator): + def __init__(self): + super().__init__(0, True) + + def value(self) -> int: + return 1 if self.lhs.value() == self.rhs.value() else 0 + + +class NotEquals(Operator): + def __init__(self): + super().__init__(0, True) + + def value(self) -> int: + return 1 if self.lhs.value() != self.rhs.value() else 0 + + +class GreaterThan(Operator): + def __init__(self): + super().__init__(0, True) + + def value(self) -> int: + return 1 if self.lhs.value() > self.rhs.value() else 0 + + +class LessThan(Operator): + def __init__(self): + super().__init__(0, True) + + def value(self) -> int: + return 1 if self.lhs.value() < self.rhs.value() else 0 + + +class GreaterThanOrEqual(Operator): + def __init__(self): + super().__init__(0, True) + + def value(self) -> int: + return 1 if self.lhs.value() >= self.rhs.value() else 0 + + +class LessThanOrEqual(Operator): + def __init__(self): + super().__init__(0, True) + + def value(self) -> int: + return 1 if self.lhs.value() <= self.rhs.value() else 0 + + +class And(Operator): + def __init__(self): + super().__init__(-1, True) + + def value(self) -> int: + return 1 if (self.lhs.value() and self.rhs.value()) else 0 + + +class Or(Operator): + def __init__(self): + super().__init__(-1, True) + + def value(self) -> int: + return 1 if (self.lhs.value() or self.rhs.value()) else 0 + + +class Not(Operator): + def __init__(self): + super().__init__(0, True) + + def is_operator(self) -> bool: + return False + + def value(self) -> int: + return 1 if not self.lhs.value() else 0 + + +class Is(Operator): + def __init__(self): + super().__init__(-1, True) + + def value(self) -> int: + return 1 if self.lhs.value() == self.rhs.value() else 0 diff --git a/flowquery-py/src/parsing/expressions/reference.py b/flowquery-py/src/parsing/expressions/reference.py new file mode 100644 index 0000000..9797228 --- /dev/null +++ b/flowquery-py/src/parsing/expressions/reference.py @@ -0,0 +1,47 @@ +"""Represents a reference to a previously defined variable or expression.""" + +from typing import Any, Optional + +from ..ast_node import ASTNode +from .identifier import Identifier + + +class Reference(Identifier): + """Represents a reference to a previously defined variable or expression. + + References point to values defined earlier in the query (e.g., in WITH or LOAD statements). + + Example: + ref = Reference("myVar", previous_node) + print(ref.value()) # Gets value from referred node + """ + + def __init__(self, value: str, referred: Optional[ASTNode] = None): + """Creates a new Reference to a variable. + + Args: + value: The identifier name + referred: The node this reference points to (optional) + """ + super().__init__(value) + self._referred = referred + + @property + def referred(self) -> Optional[ASTNode]: + return self._referred + + @referred.setter + def referred(self, node: ASTNode) -> None: + self._referred = node + + def __str__(self) -> str: + return f"Reference ({self._value})" + + def value(self) -> Any: + if self._referred is not None: + return self._referred.value() + return None + + @property + def identifier(self) -> str: + return self._value diff --git a/flowquery-py/src/parsing/expressions/string.py b/flowquery-py/src/parsing/expressions/string.py new file mode 100644 index 0000000..73fbc9a --- /dev/null +++ b/flowquery-py/src/parsing/expressions/string.py @@ -0,0 +1,27 @@ +"""Represents a string literal in the AST.""" + +from ..ast_node import ASTNode + + +class String(ASTNode): + """Represents a string literal in the AST. + + Example: + s = String("hello") + print(s.value()) # "hello" + """ + + def __init__(self, value: str): + """Creates a new String node with the given value. + + Args: + value: The string value + """ + super().__init__() + self._value = value + + def value(self) -> str: + return self._value + + def __str__(self) -> str: + return f"String ({self._value})" diff --git a/flowquery-py/src/parsing/functions/__init__.py b/flowquery-py/src/parsing/functions/__init__.py new file mode 100644 index 0000000..4452ecb --- /dev/null +++ b/flowquery-py/src/parsing/functions/__init__.py @@ -0,0 +1,75 @@ +"""Functions module for FlowQuery parsing.""" + +from .function import Function +from .aggregate_function import AggregateFunction +from .async_function import AsyncFunction +from .predicate_function import PredicateFunction +from .reducer_element import ReducerElement +from .value_holder import ValueHolder +from .function_metadata import ( + FunctionCategory, + ParameterSchema, + OutputSchema, + FunctionMetadata, + FunctionDef, + FunctionDefOptions, + get_registered_function_metadata, + get_registered_function_factory, + get_function_metadata, +) +from .function_factory import FunctionFactory + +# Built-in functions +from .sum import Sum +from .avg import Avg +from .collect import Collect +from .join import Join +from .keys import Keys +from .rand import Rand +from .range_ import Range +from .replace import Replace +from .round_ import Round +from .size import Size +from .split import Split +from .stringify import Stringify +from .to_json import ToJson +from .type_ import Type +from .functions import Functions +from .predicate_sum import PredicateSum + +__all__ = [ + # Base classes + "Function", + "AggregateFunction", + "AsyncFunction", + "PredicateFunction", + "ReducerElement", + "ValueHolder", + "FunctionCategory", + "ParameterSchema", + "OutputSchema", + "FunctionMetadata", + "FunctionDef", + "FunctionDefOptions", + "get_registered_function_metadata", + "get_registered_function_factory", + "get_function_metadata", + "FunctionFactory", + # Built-in functions + "Sum", + "Avg", + "Collect", + "Join", + "Keys", + "Rand", + "Range", + "Replace", + "Round", + "Size", + "Split", + "Stringify", + "ToJson", + "Type", + "Functions", + "PredicateSum", +] diff --git a/flowquery-py/src/parsing/functions/aggregate_function.py b/flowquery-py/src/parsing/functions/aggregate_function.py new file mode 100644 index 0000000..136aef9 --- /dev/null +++ b/flowquery-py/src/parsing/functions/aggregate_function.py @@ -0,0 +1,60 @@ +"""Base class for aggregate functions that reduce multiple values to a single value.""" + +from typing import Any, Optional + +from .function import Function +from .reducer_element import ReducerElement + + +class AggregateFunction(Function): + """Base class for aggregate functions that reduce multiple values to a single value. + + Aggregate functions like SUM, AVG, and COLLECT process multiple input values + and produce a single output. They cannot be nested within other aggregate functions. + + Example: + sum_func = Sum() + # Used in: RETURN SUM(values) + """ + + def __init__(self, name: Optional[str] = None): + """Creates a new AggregateFunction with the given name. + + Args: + name: The function name + """ + super().__init__(name) + self._overridden: Any = None + + def reduce(self, value: ReducerElement) -> None: + """Processes a value during the aggregation phase. + + Args: + value: The element to aggregate + + Raises: + NotImplementedError: If not implemented by subclass + """ + raise NotImplementedError("Method not implemented.") + + def element(self) -> ReducerElement: + """Creates a reducer element for this aggregate function. + + Returns: + A ReducerElement instance + + Raises: + NotImplementedError: If not implemented by subclass + """ + raise NotImplementedError("Method not implemented.") + + @property + def overridden(self) -> Any: + return self._overridden + + @overridden.setter + def overridden(self, value: Any) -> None: + self._overridden = value + + def value(self) -> Any: + return self._overridden diff --git a/flowquery-py/src/parsing/functions/async_function.py b/flowquery-py/src/parsing/functions/async_function.py new file mode 100644 index 0000000..3d6bdef --- /dev/null +++ b/flowquery-py/src/parsing/functions/async_function.py @@ -0,0 +1,62 @@ +"""Represents an async data provider function call for use in LOAD operations.""" + +from typing import Any, AsyncGenerator, List + +from ..ast_node import ASTNode +from .function import Function + + +class AsyncFunction(Function): + """Represents an async data provider function call for use in LOAD operations. + + This class holds the function name and arguments, and provides async iteration + over the results from a registered async data provider. + + Example: + # Used in: LOAD JSON FROM myDataSource('arg1', 'arg2') AS data + async_func = AsyncFunction("myDataSource") + async_func.parameters = [arg1_node, arg2_node] + async for item in async_func.execute(): + print(item) + """ + + @property + def parameters(self) -> List[ASTNode]: + return self.children + + @parameters.setter + def parameters(self, nodes: List[ASTNode]) -> None: + """Sets the function parameters. + + Args: + nodes: Array of AST nodes representing the function arguments + """ + self.children = nodes + + def get_arguments(self) -> List[Any]: + """Evaluates all parameters and returns their values. + Used by the framework to pass arguments to generate(). + + Returns: + Array of parameter values + """ + return [child.value() for child in self.children] + + async def generate(self, *args: Any) -> AsyncGenerator[Any, None]: + """Generates the async data provider function results. + + Subclasses override this method with their own typed parameters. + The framework automatically evaluates the AST children and spreads + them as arguments when calling this method. + + Args: + args: Arguments passed from the query (e.g., myFunc(arg1, arg2)) + + Yields: + Data items from the async provider + + Raises: + NotImplementedError: If the function is not registered as an async provider + """ + raise NotImplementedError("generate method must be overridden in subclasses.") + yield # Make this a generator diff --git a/flowquery-py/src/parsing/functions/avg.py b/flowquery-py/src/parsing/functions/avg.py new file mode 100644 index 0000000..404534a --- /dev/null +++ b/flowquery-py/src/parsing/functions/avg.py @@ -0,0 +1,55 @@ +"""Avg aggregate function.""" + +from typing import Optional + +from .aggregate_function import AggregateFunction +from .reducer_element import ReducerElement +from .function_metadata import FunctionDef + + +class AvgReducerElement(ReducerElement): + """Reducer element for Avg aggregate function.""" + + def __init__(self): + self._count: int = 0 + self._sum: Optional[float] = None + + @property + def value(self) -> Optional[float]: + if self._sum is None: + return None + return self._sum / self._count + + @value.setter + def value(self, val: float) -> None: + self._count += 1 + if self._sum is not None: + self._sum += val + else: + self._sum = val + + +@FunctionDef({ + "description": "Calculates the average of numeric values across grouped rows", + "category": "aggregate", + "parameters": [ + {"name": "value", "description": "Numeric value to average", "type": "number"} + ], + "output": {"description": "Average of all values", "type": "number", "example": 50}, + "examples": ["WITH [10, 20, 30] AS nums UNWIND nums AS n RETURN avg(n)"] +}) +class Avg(AggregateFunction): + """Avg aggregate function. + + Calculates the average of numeric values across grouped rows. + """ + + def __init__(self): + super().__init__("avg") + self._expected_parameter_count = 1 + + def reduce(self, element: AvgReducerElement) -> None: + element.value = self.first_child().value() + + def element(self) -> AvgReducerElement: + return AvgReducerElement() diff --git a/flowquery-py/src/parsing/functions/collect.py b/flowquery-py/src/parsing/functions/collect.py new file mode 100644 index 0000000..7597faf --- /dev/null +++ b/flowquery-py/src/parsing/functions/collect.py @@ -0,0 +1,75 @@ +"""Collect aggregate function.""" + +from typing import Any, Dict, List, Union +import json + +from .aggregate_function import AggregateFunction +from .reducer_element import ReducerElement +from .function_metadata import FunctionDef + + +class CollectReducerElement(ReducerElement): + """Reducer element for Collect aggregate function.""" + + def __init__(self): + self._value: List[Any] = [] + + @property + def value(self) -> Any: + return self._value + + @value.setter + def value(self, val: Any) -> None: + self._value.append(val) + + +class DistinctCollectReducerElement(ReducerElement): + """Reducer element for Collect aggregate function with DISTINCT.""" + + def __init__(self): + self._value: Dict[str, Any] = {} + + @property + def value(self) -> Any: + return list(self._value.values()) + + @value.setter + def value(self, val: Any) -> None: + key: str = json.dumps(val, sort_keys=True, default=str) + if key not in self._value: + self._value[key] = val + + +@FunctionDef({ + "description": "Collects values into an array across grouped rows", + "category": "aggregate", + "parameters": [ + {"name": "value", "description": "Value to collect", "type": "any"} + ], + "output": {"description": "Array of collected values", "type": "array", "example": [1, 2, 3]}, + "examples": ["WITH [1, 2, 3] AS nums UNWIND nums AS n RETURN collect(n)"] +}) +class Collect(AggregateFunction): + """Collect aggregate function. + + Collects values into an array across grouped rows. + """ + + def __init__(self): + super().__init__("collect") + self._expected_parameter_count = 1 + self._distinct: bool = False + + def reduce(self, element: CollectReducerElement) -> None: + element.value = self.first_child().value() + + def element(self) -> Union[CollectReducerElement, DistinctCollectReducerElement]: + return DistinctCollectReducerElement() if self._distinct else CollectReducerElement() + + @property + def distinct(self) -> bool: + return self._distinct + + @distinct.setter + def distinct(self, val: bool) -> None: + self._distinct = val diff --git a/flowquery-py/src/parsing/functions/function.py b/flowquery-py/src/parsing/functions/function.py new file mode 100644 index 0000000..a1890a7 --- /dev/null +++ b/flowquery-py/src/parsing/functions/function.py @@ -0,0 +1,68 @@ +"""Base class for all functions in FlowQuery.""" + +from typing import List, Optional, Any + +from ..ast_node import ASTNode + + +class Function(ASTNode): + """Base class for all functions in FlowQuery. + + Functions can have parameters and may support the DISTINCT modifier. + Subclasses implement specific function logic. + + Example: + func = FunctionFactory.create("sum") + func.parameters = [expression1, expression2] + """ + + def __init__(self, name: Optional[str] = None): + """Creates a new Function with the given name. + + Args: + name: The function name + """ + super().__init__() + self._name = name or self.__class__.__name__ + self._expected_parameter_count: Optional[int] = None + self._supports_distinct: bool = False + + @property + def parameters(self) -> List[ASTNode]: + """Gets the function parameters.""" + return self.children + + @parameters.setter + def parameters(self, nodes: List[ASTNode]) -> None: + """Sets the function parameters. + + Args: + nodes: Array of AST nodes representing the function arguments + + Raises: + ValueError: If the number of parameters doesn't match expected count + """ + if self._expected_parameter_count is not None and self._expected_parameter_count != len(nodes): + raise ValueError( + f"Function {self._name} expected {self._expected_parameter_count} parameters, " + f"but got {len(nodes)}" + ) + self.children = nodes + + @property + def name(self) -> str: + return self._name + + def __str__(self) -> str: + return f"Function ({self._name})" + + @property + def distinct(self) -> bool: + return self._supports_distinct + + @distinct.setter + def distinct(self, value: bool) -> None: + if self._supports_distinct: + self._supports_distinct = value + else: + raise ValueError(f"Function {self._name} does not support distinct") diff --git a/flowquery-py/src/parsing/functions/function_factory.py b/flowquery-py/src/parsing/functions/function_factory.py new file mode 100644 index 0000000..7c9def1 --- /dev/null +++ b/flowquery-py/src/parsing/functions/function_factory.py @@ -0,0 +1,173 @@ +"""Factory for creating function instances by name.""" + +from typing import Any, Callable, Dict, List, Optional + +from .function import Function +from .async_function import AsyncFunction +from .predicate_function import PredicateFunction +from .function_metadata import ( + FunctionMetadata, + get_function_metadata, + get_registered_function_factory, + get_registered_function_metadata, +) + + +class FunctionFactory: + """Factory for creating function instances by name. + + All functions are registered via the @FunctionDef decorator. + Maps function names (case-insensitive) to their corresponding implementation classes. + Supports built-in functions like sum, avg, collect, range, split, join, etc. + + Example: + sum_func = FunctionFactory.create("sum") + avg_func = FunctionFactory.create("AVG") + """ + + @staticmethod + def get_async_provider(name: str) -> Optional[Callable]: + """Gets an async data provider by name. + + Args: + name: The function name (case-insensitive) + + Returns: + The async data provider, or None if not found + """ + return get_registered_function_factory(name.lower()) + + @staticmethod + def is_async_provider(name: str) -> bool: + """Checks if a function name is registered as an async data provider. + + Args: + name: The function name (case-insensitive) + + Returns: + True if the function is an async data provider + """ + return get_registered_function_factory(name.lower(), "async") is not None + + @staticmethod + def get_metadata(name: str) -> Optional[FunctionMetadata]: + """Gets metadata for a specific function. + + Args: + name: The function name (case-insensitive) + + Returns: + The function metadata, or None if not found + """ + return get_function_metadata(name.lower()) + + @staticmethod + def list_functions( + category: Optional[str] = None, + async_only: bool = False, + sync_only: bool = False + ) -> List[FunctionMetadata]: + """Lists all registered functions with their metadata. + + Args: + category: Optional category filter + async_only: If True, only return async functions + sync_only: If True, only return sync functions + + Returns: + Array of function metadata + """ + result: List[FunctionMetadata] = [] + + for meta in get_registered_function_metadata(): + if category and meta.category != category: + continue + if async_only and meta.category != "async": + continue + if sync_only and meta.category == "async": + continue + result.append(meta) + + return result + + @staticmethod + def list_function_names() -> List[str]: + """Lists all registered function names. + + Returns: + Array of function names + """ + return [m.name for m in get_registered_function_metadata()] + + @staticmethod + def to_json() -> Dict[str, Any]: + """Gets all function metadata as a JSON-serializable object for LLM consumption. + + Returns: + Object with functions grouped by category + """ + functions = FunctionFactory.list_functions() + categories = list(set(f.category for f in functions if f.category)) + return {"functions": functions, "categories": categories} + + @staticmethod + def create(name: str) -> Function: + """Creates a function instance by name. + + Args: + name: The function name (case-insensitive) + + Returns: + A Function instance of the appropriate type + + Raises: + ValueError: If the function name is not registered + """ + lower_name = name.lower() + + # Check decorator-registered functions + decorator_factory = get_registered_function_factory(lower_name) + if decorator_factory: + return decorator_factory() + + raise ValueError(f"Unknown function: {name}") + + @staticmethod + def create_predicate(name: str) -> PredicateFunction: + """Creates a predicate function instance by name. + + Args: + name: The function name (case-insensitive) + + Returns: + A PredicateFunction instance of the appropriate type + + Raises: + ValueError: If the predicate function name is not registered + """ + lower_name = name.lower() + + decorator_factory = get_registered_function_factory(lower_name, "predicate") + if decorator_factory: + return decorator_factory() + + raise ValueError(f"Unknown predicate function: {name}") + + @staticmethod + def create_async(name: str) -> AsyncFunction: + """Creates an async function instance by name. + + Args: + name: The function name (case-insensitive) + + Returns: + An AsyncFunction instance of the appropriate type + + Raises: + ValueError: If the async function name is not registered + """ + lower_name = name.lower() + decorator_factory = get_registered_function_factory(lower_name, "async") + if decorator_factory: + return decorator_factory() + raise ValueError(f"Unknown async function: {name}") diff --git a/flowquery-py/src/parsing/functions/function_metadata.py b/flowquery-py/src/parsing/functions/function_metadata.py new file mode 100644 index 0000000..83b418d --- /dev/null +++ b/flowquery-py/src/parsing/functions/function_metadata.py @@ -0,0 +1,149 @@ +"""Function metadata and decorator for FlowQuery functions.""" + +from typing import Any, Callable, Dict, List, Optional, TypedDict, Union +from dataclasses import dataclass + + +# Type definitions +FunctionCategory = str # "scalar" | "aggregate" | "predicate" | "async" | string + + +class ParameterSchema(TypedDict, total=False): + """Schema definition for function arguments.""" + name: str + description: str + type: str # "string" | "number" | "boolean" | "object" | "array" | "null" + required: bool + default: Any + items: Dict[str, Any] + properties: Dict[str, Any] + enum: List[Any] + example: Any + + +class OutputSchema(TypedDict, total=False): + """Schema definition for function output.""" + description: str + type: str + items: Dict[str, Any] + properties: Dict[str, Any] + example: Any + + +@dataclass +class FunctionMetadata: + """Metadata for a registered function, designed for LLM consumption.""" + name: str + description: str + category: FunctionCategory + parameters: List[ParameterSchema] + output: OutputSchema + examples: Optional[List[str]] = None + notes: Optional[str] = None + + +class FunctionDefOptions(TypedDict, total=False): + """Decorator options - metadata without the name (derived from class).""" + description: str + category: FunctionCategory + parameters: List[ParameterSchema] + output: OutputSchema + examples: List[str] + notes: str + + +class FunctionRegistry: + """Centralized registry for function metadata, factories, and async providers.""" + + _metadata: Dict[str, FunctionMetadata] = {} + _factories: Dict[str, Callable[[], Any]] = {} + + @classmethod + def register(cls, constructor: type, options: FunctionDefOptions) -> None: + """Registers a regular function class.""" + instance = constructor() + display_name = getattr(instance, 'name', constructor.__name__).lower() + category = options.get('category', '') + registry_key = f"{display_name}:{category}" if category else display_name + + metadata = FunctionMetadata( + name=display_name, + description=options.get('description', ''), + category=options.get('category', 'scalar'), + parameters=options.get('parameters', []), + output=options.get('output', {'description': '', 'type': 'any'}), + examples=options.get('examples'), + notes=options.get('notes'), + ) + cls._metadata[registry_key] = metadata + + if category != 'predicate': + cls._factories[display_name] = lambda c=constructor: c() + cls._factories[registry_key] = lambda c=constructor: c() + + @classmethod + def get_all_metadata(cls) -> List[FunctionMetadata]: + return list(cls._metadata.values()) + + @classmethod + def get_metadata(cls, name: str, category: Optional[str] = None) -> Optional[FunctionMetadata]: + lower_name = name.lower() + if category: + return cls._metadata.get(f"{lower_name}:{category}") + for meta in cls._metadata.values(): + if meta.name.lower() == lower_name: + return meta + return None + + @classmethod + def get_factory(cls, name: str, category: Optional[str] = None) -> Optional[Callable[[], Any]]: + lower_name = name.lower() + if category: + return cls._factories.get(f"{lower_name}:{category}") + return cls._factories.get(lower_name) + + +def FunctionDef(options: FunctionDefOptions): + """Class decorator that registers function metadata. + + The function name is derived from the class's constructor. + + Args: + options: Function metadata (excluding name) + + Returns: + Class decorator + + Example: + @FunctionDef({ + 'description': "Adds two numbers", + 'category': "scalar", + 'parameters': [ + {'name': "a", 'description': "First number", 'type': "number"}, + {'name': "b", 'description': "Second number", 'type': "number"} + ], + 'output': {'description': "Sum of a and b", 'type': "number"}, + }) + class AddFunction(Function): + def __init__(self): + super().__init__("add") + """ + def decorator(cls: type) -> type: + FunctionRegistry.register(cls, options) + return cls + return decorator + + +def get_registered_function_metadata() -> List[FunctionMetadata]: + """Gets all registered function metadata from decorators.""" + return FunctionRegistry.get_all_metadata() + + +def get_registered_function_factory(name: str, category: Optional[str] = None) -> Optional[Callable[[], Any]]: + """Gets a registered function factory by name.""" + return FunctionRegistry.get_factory(name, category) + + +def get_function_metadata(name: str, category: Optional[str] = None) -> Optional[FunctionMetadata]: + """Gets metadata for a specific function by name.""" + return FunctionRegistry.get_metadata(name, category) diff --git a/flowquery-py/src/parsing/functions/functions.py b/flowquery-py/src/parsing/functions/functions.py new file mode 100644 index 0000000..62a3eee --- /dev/null +++ b/flowquery-py/src/parsing/functions/functions.py @@ -0,0 +1,59 @@ +"""Functions introspection function.""" + +from typing import Any, Dict, List, Optional + +from .function import Function +from .function_factory import FunctionFactory +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Lists all registered functions with their metadata. Useful for discovering available functions and their documentation.", + "category": "scalar", + "parameters": [ + {"name": "category", "description": "Optional category to filter by (e.g., 'aggregation', 'string', 'math')", "type": "string", "required": False} + ], + "output": { + "description": "Array of function metadata objects", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"description": "Function name", "type": "string"}, + "description": {"description": "What the function does", "type": "string"}, + "category": {"description": "Function category", "type": "string"}, + "parameters": {"description": "Array of parameter definitions", "type": "array"}, + "output": {"description": "Output schema", "type": "object"}, + "examples": {"description": "Usage examples", "type": "array"} + } + } + }, + "examples": [ + "WITH functions() AS funcs RETURN funcs", + "WITH functions('aggregation') AS funcs UNWIND funcs AS f RETURN f.name, f.description" + ] +}) +class Functions(Function): + """Functions introspection function. + + Lists all registered functions with their metadata. + """ + + def __init__(self): + super().__init__("functions") + self._expected_parameter_count = None # 0 or 1 parameter + + def value(self) -> Any: + children = self.get_children() + + if len(children) == 0: + # Return all functions + return FunctionFactory.list_functions() + elif len(children) == 1: + # Filter by category + category = children[0].value() + if isinstance(category, str): + return FunctionFactory.list_functions(category=category) + raise ValueError("functions() category parameter must be a string") + else: + raise ValueError("functions() takes 0 or 1 parameters") diff --git a/flowquery-py/src/parsing/functions/join.py b/flowquery-py/src/parsing/functions/join.py new file mode 100644 index 0000000..7f4fefc --- /dev/null +++ b/flowquery-py/src/parsing/functions/join.py @@ -0,0 +1,47 @@ +"""Join function.""" + +from typing import Any, List + +from .function import Function +from ..ast_node import ASTNode +from ..expressions.string import String +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Joins an array of strings with a delimiter", + "category": "scalar", + "parameters": [ + {"name": "array", "description": "Array of values to join", "type": "array"}, + {"name": "delimiter", "description": "Delimiter to join with", "type": "string"} + ], + "output": {"description": "Joined string", "type": "string", "example": "a,b,c"}, + "examples": ["WITH ['a', 'b', 'c'] AS arr RETURN join(arr, ',')"] +}) +class Join(Function): + """Join function. + + Joins an array of strings with a delimiter. + """ + + def __init__(self): + super().__init__("join") + self._expected_parameter_count = 2 + + @property + def parameters(self) -> List[ASTNode]: + return self.get_children() + + @parameters.setter + def parameters(self, nodes: List[ASTNode]) -> None: + if len(nodes) == 1: + nodes.append(String("")) + for node in nodes: + self.add_child(node) + + def value(self) -> Any: + array = self.get_children()[0].value() + delimiter = self.get_children()[1].value() + if not isinstance(array, list) or not isinstance(delimiter, str): + raise ValueError("Invalid arguments for join function") + return delimiter.join(str(item) for item in array) diff --git a/flowquery-py/src/parsing/functions/keys.py b/flowquery-py/src/parsing/functions/keys.py new file mode 100644 index 0000000..bfecc47 --- /dev/null +++ b/flowquery-py/src/parsing/functions/keys.py @@ -0,0 +1,34 @@ +"""Keys function.""" + +from typing import Any, List + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Returns the keys of an object (associative array) as an array", + "category": "scalar", + "parameters": [ + {"name": "object", "description": "Object to extract keys from", "type": "object"} + ], + "output": {"description": "Array of keys", "type": "array", "example": ["name", "age"]}, + "examples": ["WITH { name: 'Alice', age: 30 } AS obj RETURN keys(obj)"] +}) +class Keys(Function): + """Keys function. + + Returns the keys of an object (associative array) as an array. + """ + + def __init__(self): + super().__init__("keys") + self._expected_parameter_count = 1 + + def value(self) -> Any: + obj = self.get_children()[0].value() + if obj is None: + return [] + if not isinstance(obj, dict): + raise ValueError("keys() expects an object, not an array or primitive") + return list(obj.keys()) diff --git a/flowquery-py/src/parsing/functions/predicate_function.py b/flowquery-py/src/parsing/functions/predicate_function.py new file mode 100644 index 0000000..ca2050f --- /dev/null +++ b/flowquery-py/src/parsing/functions/predicate_function.py @@ -0,0 +1,46 @@ +"""Base class for predicate functions in FlowQuery.""" + +from typing import Any, Optional + +from ..ast_node import ASTNode +from ..expressions.expression import Expression +from ..expressions.reference import Reference +from .value_holder import ValueHolder + + +class PredicateFunction(ASTNode): + """Base class for predicate functions.""" + + def __init__(self, name: Optional[str] = None): + super().__init__() + self._name = name or self.__class__.__name__ + self._value_holder = ValueHolder() + + @property + def name(self) -> str: + return self._name + + @property + def reference(self) -> Reference: + return self.first_child() + + @property + def array(self) -> ASTNode: + return self.get_children()[1].first_child() + + @property + def _return(self) -> Expression: + return self.get_children()[2] + + @property + def where(self) -> Optional['Where']: + from ..operations.where import Where + if len(self.get_children()) == 4: + return self.get_children()[3] + return None + + def value(self) -> Any: + raise NotImplementedError("Method not implemented.") + + def __str__(self) -> str: + return f"PredicateFunction ({self._name})" diff --git a/flowquery-py/src/parsing/functions/predicate_sum.py b/flowquery-py/src/parsing/functions/predicate_sum.py new file mode 100644 index 0000000..05510e2 --- /dev/null +++ b/flowquery-py/src/parsing/functions/predicate_sum.py @@ -0,0 +1,47 @@ +"""PredicateSum function.""" + +from typing import Any, List, Optional + +from .predicate_function import PredicateFunction +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Calculates the sum of values in an array with optional filtering. Uses list comprehension syntax: sum(variable IN array [WHERE condition] | expression)", + "category": "predicate", + "parameters": [ + {"name": "variable", "description": "Variable name to bind each element", "type": "string"}, + {"name": "array", "description": "Array to iterate over", "type": "array"}, + {"name": "expression", "description": "Expression to sum for each element", "type": "any"}, + {"name": "where", "description": "Optional filter condition", "type": "boolean", "required": False} + ], + "output": {"description": "Sum of the evaluated expressions", "type": "number", "example": 6}, + "examples": [ + "WITH [1, 2, 3] AS nums RETURN sum(n IN nums | n)", + "WITH [1, 2, 3, 4] AS nums RETURN sum(n IN nums WHERE n > 1 | n * 2)" + ] +}) +class PredicateSum(PredicateFunction): + """PredicateSum function. + + Calculates the sum of values in an array with optional filtering. + """ + + def __init__(self): + super().__init__("sum") + + def value(self) -> Any: + self.reference.referred = self._value_holder + array = self.array.value() + if array is None or not isinstance(array, list): + raise ValueError("Invalid array for sum function") + + _sum: Optional[Any] = None + for item in array: + self._value_holder.holder = item + if self.where is None or self.where.value(): + if _sum is None: + _sum = self._return.value() + else: + _sum += self._return.value() + return _sum diff --git a/flowquery-py/src/parsing/functions/rand.py b/flowquery-py/src/parsing/functions/rand.py new file mode 100644 index 0000000..b3178c5 --- /dev/null +++ b/flowquery-py/src/parsing/functions/rand.py @@ -0,0 +1,28 @@ +"""Rand function.""" + +import random +from typing import Any + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Generates a random number between 0 and 1", + "category": "scalar", + "parameters": [], + "output": {"description": "Random number between 0 and 1", "type": "number", "example": 0.7234}, + "examples": ["WITH rand() AS r RETURN r"] +}) +class Rand(Function): + """Rand function. + + Generates a random number between 0 and 1. + """ + + def __init__(self): + super().__init__("rand") + self._expected_parameter_count = 0 + + def value(self) -> Any: + return random.random() diff --git a/flowquery-py/src/parsing/functions/range_.py b/flowquery-py/src/parsing/functions/range_.py new file mode 100644 index 0000000..9239107 --- /dev/null +++ b/flowquery-py/src/parsing/functions/range_.py @@ -0,0 +1,34 @@ +"""Range function.""" + +from typing import Any, List + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Generates an array of sequential integers", + "category": "scalar", + "parameters": [ + {"name": "start", "description": "Starting number (inclusive)", "type": "number"}, + {"name": "end", "description": "Ending number (inclusive)", "type": "number"} + ], + "output": {"description": "Array of integers from start to end", "type": "array", "items": {"type": "number"}, "example": [1, 2, 3, 4, 5]}, + "examples": ["WITH range(1, 5) AS nums RETURN nums"] +}) +class Range(Function): + """Range function. + + Generates an array of sequential integers. + """ + + def __init__(self): + super().__init__("range") + self._expected_parameter_count = 2 + + def value(self) -> Any: + start = self.get_children()[0].value() + end = self.get_children()[1].value() + if not isinstance(start, (int, float)) or not isinstance(end, (int, float)): + raise ValueError("Invalid arguments for range function") + return list(range(int(start), int(end) + 1)) diff --git a/flowquery-py/src/parsing/functions/reducer_element.py b/flowquery-py/src/parsing/functions/reducer_element.py new file mode 100644 index 0000000..49eb971 --- /dev/null +++ b/flowquery-py/src/parsing/functions/reducer_element.py @@ -0,0 +1,15 @@ +"""Reducer element for aggregate functions.""" + +from typing import Any + + +class ReducerElement: + """Base class for reducer elements used in aggregate functions.""" + + @property + def value(self) -> Any: + raise NotImplementedError("Method not implemented.") + + @value.setter + def value(self, val: Any) -> None: + raise NotImplementedError("Method not implemented.") diff --git a/flowquery-py/src/parsing/functions/replace.py b/flowquery-py/src/parsing/functions/replace.py new file mode 100644 index 0000000..68b345a --- /dev/null +++ b/flowquery-py/src/parsing/functions/replace.py @@ -0,0 +1,37 @@ +"""Replace function.""" + +import re +from typing import Any + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Replaces occurrences of a pattern in a string", + "category": "scalar", + "parameters": [ + {"name": "text", "description": "Source string", "type": "string"}, + {"name": "pattern", "description": "Pattern to find", "type": "string"}, + {"name": "replacement", "description": "Replacement string", "type": "string"} + ], + "output": {"description": "String with replacements", "type": "string", "example": "hello world"}, + "examples": ["WITH 'hello there' AS s RETURN replace(s, 'there', 'world')"] +}) +class Replace(Function): + """Replace function. + + Replaces occurrences of a pattern in a string. + """ + + def __init__(self): + super().__init__("replace") + self._expected_parameter_count = 3 + + def value(self) -> Any: + text = self.get_children()[0].value() + pattern = self.get_children()[1].value() + replacement = self.get_children()[2].value() + if not isinstance(text, str) or not isinstance(pattern, str) or not isinstance(replacement, str): + raise ValueError("Invalid arguments for replace function") + return re.sub(re.escape(pattern), replacement, text) diff --git a/flowquery-py/src/parsing/functions/round_.py b/flowquery-py/src/parsing/functions/round_.py new file mode 100644 index 0000000..9c51f8e --- /dev/null +++ b/flowquery-py/src/parsing/functions/round_.py @@ -0,0 +1,32 @@ +"""Round function.""" + +from typing import Any + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Rounds a number to the nearest integer", + "category": "scalar", + "parameters": [ + {"name": "value", "description": "Number to round", "type": "number"} + ], + "output": {"description": "Rounded integer", "type": "number", "example": 4}, + "examples": ["WITH 3.7 AS n RETURN round(n)"] +}) +class Round(Function): + """Round function. + + Rounds a number to the nearest integer. + """ + + def __init__(self): + super().__init__("round") + self._expected_parameter_count = 1 + + def value(self) -> Any: + val = self.get_children()[0].value() + if not isinstance(val, (int, float)): + raise ValueError("Invalid argument for round function") + return round(val) diff --git a/flowquery-py/src/parsing/functions/size.py b/flowquery-py/src/parsing/functions/size.py new file mode 100644 index 0000000..d985c10 --- /dev/null +++ b/flowquery-py/src/parsing/functions/size.py @@ -0,0 +1,32 @@ +"""Size function.""" + +from typing import Any + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Returns the length of an array or string", + "category": "scalar", + "parameters": [ + {"name": "value", "description": "Array or string to measure", "type": "array"} + ], + "output": {"description": "Length of the input", "type": "number", "example": 3}, + "examples": ["WITH [1, 2, 3] AS arr RETURN size(arr)"] +}) +class Size(Function): + """Size function. + + Returns the length of an array or string. + """ + + def __init__(self): + super().__init__("size") + self._expected_parameter_count = 1 + + def value(self) -> Any: + val = self.get_children()[0].value() + if not isinstance(val, (list, str)): + raise ValueError("Invalid argument for size function") + return len(val) diff --git a/flowquery-py/src/parsing/functions/split.py b/flowquery-py/src/parsing/functions/split.py new file mode 100644 index 0000000..0d9c28d --- /dev/null +++ b/flowquery-py/src/parsing/functions/split.py @@ -0,0 +1,47 @@ +"""Split function.""" + +from typing import Any, List + +from .function import Function +from ..ast_node import ASTNode +from ..expressions.string import String +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Splits a string into an array by a delimiter", + "category": "scalar", + "parameters": [ + {"name": "text", "description": "String to split", "type": "string"}, + {"name": "delimiter", "description": "Delimiter to split by", "type": "string"} + ], + "output": {"description": "Array of string parts", "type": "array", "items": {"type": "string"}, "example": ["a", "b", "c"]}, + "examples": ["WITH 'a,b,c' AS s RETURN split(s, ',')"] +}) +class Split(Function): + """Split function. + + Splits a string into an array by a delimiter. + """ + + def __init__(self): + super().__init__("split") + self._expected_parameter_count = 2 + + @property + def parameters(self) -> List[ASTNode]: + return self.get_children() + + @parameters.setter + def parameters(self, nodes: List[ASTNode]) -> None: + if len(nodes) == 1: + nodes.append(String("")) + for node in nodes: + self.add_child(node) + + def value(self) -> Any: + text = self.get_children()[0].value() + delimiter = self.get_children()[1].value() + if not isinstance(text, str) or not isinstance(delimiter, str): + raise ValueError("Invalid arguments for split function") + return text.split(delimiter) diff --git a/flowquery-py/src/parsing/functions/stringify.py b/flowquery-py/src/parsing/functions/stringify.py new file mode 100644 index 0000000..3342093 --- /dev/null +++ b/flowquery-py/src/parsing/functions/stringify.py @@ -0,0 +1,47 @@ +"""Stringify function.""" + +import json +from typing import Any, List + +from .function import Function +from ..ast_node import ASTNode +from ..expressions.number import Number +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Converts a value to its JSON string representation", + "category": "scalar", + "parameters": [ + {"name": "value", "description": "Value to stringify", "type": "any"} + ], + "output": {"description": "JSON string", "type": "string", "example": '{"a":1}'}, + "examples": ["WITH {a: 1} AS obj RETURN stringify(obj)"] +}) +class Stringify(Function): + """Stringify function. + + Converts a value to its JSON string representation. + """ + + def __init__(self): + super().__init__("stringify") + self._expected_parameter_count = 2 + + @property + def parameters(self) -> List[ASTNode]: + return self.get_children() + + @parameters.setter + def parameters(self, nodes: List[ASTNode]) -> None: + if len(nodes) == 1: + nodes.append(Number("3")) # Default indent of 3 + for node in nodes: + self.add_child(node) + + def value(self) -> Any: + val = self.get_children()[0].value() + indent = int(self.get_children()[1].value()) + if not isinstance(val, (dict, list)): + raise ValueError("Invalid argument for stringify function") + return json.dumps(val, indent=indent, default=str) diff --git a/flowquery-py/src/parsing/functions/sum.py b/flowquery-py/src/parsing/functions/sum.py new file mode 100644 index 0000000..6419824 --- /dev/null +++ b/flowquery-py/src/parsing/functions/sum.py @@ -0,0 +1,51 @@ +"""Sum aggregate function.""" + +from typing import Any + +from .aggregate_function import AggregateFunction +from .reducer_element import ReducerElement +from .function_metadata import FunctionDef + + +class SumReducerElement(ReducerElement): + """Reducer element for Sum aggregate function.""" + + def __init__(self): + self._value: Any = None + + @property + def value(self) -> Any: + return self._value + + @value.setter + def value(self, val: Any) -> None: + if self._value is not None: + self._value += val + else: + self._value = val + + +@FunctionDef({ + "description": "Calculates the sum of numeric values across grouped rows", + "category": "aggregate", + "parameters": [ + {"name": "value", "description": "Numeric value to sum", "type": "number"} + ], + "output": {"description": "Sum of all values", "type": "number", "example": 150}, + "examples": ["WITH [1, 2, 3] AS nums UNWIND nums AS n RETURN sum(n)"] +}) +class Sum(AggregateFunction): + """Sum aggregate function. + + Calculates the sum of numeric values across grouped rows. + """ + + def __init__(self): + super().__init__("sum") + self._expected_parameter_count = 1 + + def reduce(self, element: SumReducerElement) -> None: + element.value = self.first_child().value() + + def element(self) -> SumReducerElement: + return SumReducerElement() diff --git a/flowquery-py/src/parsing/functions/to_json.py b/flowquery-py/src/parsing/functions/to_json.py new file mode 100644 index 0000000..257825c --- /dev/null +++ b/flowquery-py/src/parsing/functions/to_json.py @@ -0,0 +1,33 @@ +"""ToJson function.""" + +import json +from typing import Any + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Parses a JSON string into an object", + "category": "scalar", + "parameters": [ + {"name": "text", "description": "JSON string to parse", "type": "string"} + ], + "output": {"description": "Parsed object or array", "type": "object", "example": {"a": 1}}, + "examples": ["WITH '{\"a\": 1}' AS s RETURN tojson(s)"] +}) +class ToJson(Function): + """ToJson function. + + Parses a JSON string into an object. + """ + + def __init__(self): + super().__init__("tojson") + self._expected_parameter_count = 1 + + def value(self) -> Any: + text = self.get_children()[0].value() + if not isinstance(text, str): + raise ValueError("Invalid arguments for tojson function") + return json.loads(text) diff --git a/flowquery-py/src/parsing/functions/type_.py b/flowquery-py/src/parsing/functions/type_.py new file mode 100644 index 0000000..20daa51 --- /dev/null +++ b/flowquery-py/src/parsing/functions/type_.py @@ -0,0 +1,47 @@ +"""Type function.""" + +from typing import Any + +from .function import Function +from .function_metadata import FunctionDef + + +@FunctionDef({ + "description": "Returns the type of a value as a string", + "category": "scalar", + "parameters": [ + {"name": "value", "description": "Value to check the type of", "type": "any"} + ], + "output": {"description": "Type of the input value", "type": "string", "example": "string"}, + "examples": [ + "WITH 'hello' AS val RETURN type(val)", + "WITH 42 AS val RETURN type(val)", + "WITH [1, 2, 3] AS val RETURN type(val)" + ] +}) +class Type(Function): + """Type function. + + Returns the type of a value as a string. + """ + + def __init__(self): + super().__init__("type") + self._expected_parameter_count = 1 + + def value(self) -> Any: + val = self.get_children()[0].value() + + if val is None: + return "null" + if isinstance(val, list): + return "array" + if isinstance(val, dict): + return "object" + if isinstance(val, bool): + return "boolean" + if isinstance(val, (int, float)): + return "number" + if isinstance(val, str): + return "string" + return type(val).__name__ diff --git a/flowquery-py/src/parsing/functions/value_holder.py b/flowquery-py/src/parsing/functions/value_holder.py new file mode 100644 index 0000000..07cc90a --- /dev/null +++ b/flowquery-py/src/parsing/functions/value_holder.py @@ -0,0 +1,24 @@ +"""Value holder node for FlowQuery AST.""" + +from typing import Any + +from ..ast_node import ASTNode + + +class ValueHolder(ASTNode): + """Holds a value that can be set and retrieved.""" + + def __init__(self): + super().__init__() + self._holder: Any = None + + @property + def holder(self) -> Any: + return self._holder + + @holder.setter + def holder(self, value: Any) -> None: + self._holder = value + + def value(self) -> Any: + return self._holder diff --git a/flowquery-py/src/parsing/logic/__init__.py b/flowquery-py/src/parsing/logic/__init__.py new file mode 100644 index 0000000..6dcde2e --- /dev/null +++ b/flowquery-py/src/parsing/logic/__init__.py @@ -0,0 +1,15 @@ +"""Logic module for FlowQuery parsing.""" + +from .case import Case +from .when import When +from .then import Then +from .else_ import Else +from .end import End + +__all__ = [ + "Case", + "When", + "Then", + "Else", + "End", +] diff --git a/flowquery-py/src/parsing/logic/case.py b/flowquery-py/src/parsing/logic/case.py new file mode 100644 index 0000000..92ba8a1 --- /dev/null +++ b/flowquery-py/src/parsing/logic/case.py @@ -0,0 +1,29 @@ +"""Represents a CASE expression in the AST.""" + +from typing import Any + +from ..ast_node import ASTNode +from .when import When +from .then import Then + + +class Case(ASTNode): + """Represents a CASE expression in the AST.""" + + def value(self) -> Any: + i = 0 + children = self.get_children() + child = children[i] + while isinstance(child, When): + then = children[i + 1] + if child.value(): + return then.value() + i += 2 + if i < len(children): + child = children[i] + else: + break + # Return the else clause if exists + if i < len(children): + return children[i].value() + return None diff --git a/flowquery-py/src/parsing/logic/else_.py b/flowquery-py/src/parsing/logic/else_.py new file mode 100644 index 0000000..cca5984 --- /dev/null +++ b/flowquery-py/src/parsing/logic/else_.py @@ -0,0 +1,12 @@ +"""Represents an ELSE clause in a CASE expression.""" + +from typing import Any + +from ..ast_node import ASTNode + + +class Else(ASTNode): + """Represents an ELSE clause in a CASE expression.""" + + def value(self) -> Any: + return self.get_children()[0].value() diff --git a/flowquery-py/src/parsing/logic/end.py b/flowquery-py/src/parsing/logic/end.py new file mode 100644 index 0000000..f57c80f --- /dev/null +++ b/flowquery-py/src/parsing/logic/end.py @@ -0,0 +1,8 @@ +"""Represents an END marker in a CASE expression.""" + +from ..ast_node import ASTNode + + +class End(ASTNode): + """Represents an END marker in a CASE expression.""" + pass diff --git a/flowquery-py/src/parsing/logic/then.py b/flowquery-py/src/parsing/logic/then.py new file mode 100644 index 0000000..c48587a --- /dev/null +++ b/flowquery-py/src/parsing/logic/then.py @@ -0,0 +1,12 @@ +"""Represents a THEN clause in a CASE expression.""" + +from typing import Any + +from ..ast_node import ASTNode + + +class Then(ASTNode): + """Represents a THEN clause in a CASE expression.""" + + def value(self) -> Any: + return self.get_children()[0].value() diff --git a/flowquery-py/src/parsing/logic/when.py b/flowquery-py/src/parsing/logic/when.py new file mode 100644 index 0000000..94b8707 --- /dev/null +++ b/flowquery-py/src/parsing/logic/when.py @@ -0,0 +1,10 @@ +"""Represents a WHEN clause in a CASE expression.""" + +from ..ast_node import ASTNode + + +class When(ASTNode): + """Represents a WHEN clause in a CASE expression.""" + + def value(self) -> bool: + return self.get_children()[0].value() diff --git a/flowquery-py/src/parsing/operations/__init__.py b/flowquery-py/src/parsing/operations/__init__.py new file mode 100644 index 0000000..ea03767 --- /dev/null +++ b/flowquery-py/src/parsing/operations/__init__.py @@ -0,0 +1,35 @@ +"""Operations module for FlowQuery parsing.""" + +from .operation import Operation +from .projection import Projection +from .return_op import Return +from .with_op import With +from .unwind import Unwind +from .load import Load +from .where import Where +from .limit import Limit +from .aggregated_return import AggregatedReturn +from .aggregated_with import AggregatedWith +from .call import Call +from .group_by import GroupBy +from .match import Match +from .create_node import CreateNode +from .create_relationship import CreateRelationship + +__all__ = [ + "Operation", + "Projection", + "Return", + "With", + "Unwind", + "Load", + "Where", + "Limit", + "AggregatedReturn", + "AggregatedWith", + "Call", + "GroupBy", + "Match", + "CreateNode", + "CreateRelationship", +] diff --git a/flowquery-py/src/parsing/operations/aggregated_return.py b/flowquery-py/src/parsing/operations/aggregated_return.py new file mode 100644 index 0000000..3b37daa --- /dev/null +++ b/flowquery-py/src/parsing/operations/aggregated_return.py @@ -0,0 +1,24 @@ +"""Represents an aggregated RETURN operation.""" + +from typing import Any, Dict, List + +from .return_op import Return +from .group_by import GroupBy +from ..expressions.expression import Expression + + +class AggregatedReturn(Return): + """Represents an aggregated RETURN operation that groups and reduces values.""" + + def __init__(self, expressions): + super().__init__(expressions) + self._group_by = GroupBy(self.children) + + async def run(self) -> None: + await self._group_by.run() + + @property + def results(self) -> List[Dict[str, Any]]: + if self._where is not None: + self._group_by.where = self._where + return list(self._group_by.generate_results()) diff --git a/flowquery-py/src/parsing/operations/aggregated_with.py b/flowquery-py/src/parsing/operations/aggregated_with.py new file mode 100644 index 0000000..11fdd7d --- /dev/null +++ b/flowquery-py/src/parsing/operations/aggregated_with.py @@ -0,0 +1,22 @@ +"""Represents an aggregated WITH operation.""" + +from .return_op import Return +from .group_by import GroupBy +from ..expressions.expression import Expression + + +class AggregatedWith(Return): + """Represents an aggregated WITH operation that groups and reduces values.""" + + def __init__(self, expressions): + super().__init__(expressions) + self._group_by = GroupBy(self.children) + + async def run(self) -> None: + await self._group_by.run() + + async def finish(self) -> None: + for _ in self._group_by.generate_results(): + if self.next: + await self.next.run() + await super().finish() diff --git a/flowquery-py/src/parsing/operations/call.py b/flowquery-py/src/parsing/operations/call.py new file mode 100644 index 0000000..26a415a --- /dev/null +++ b/flowquery-py/src/parsing/operations/call.py @@ -0,0 +1,74 @@ +"""Represents a CALL operation for invoking async functions.""" + +from typing import Any, Dict, List, Optional + +from ..expressions.expression import Expression +from ..expressions.expression_map import ExpressionMap +from ..functions.async_function import AsyncFunction +from .projection import Projection + + +DEFAULT_VARIABLE_NAME = "value" + + +class Call(Projection): + """Represents a CALL operation for invoking async functions.""" + + def __init__(self): + super().__init__([]) + self._function: Optional[AsyncFunction] = None + self._map = ExpressionMap() + self._results: List[Dict[str, Any]] = [] + + @property + def function(self) -> Optional[AsyncFunction]: + return self._function + + @function.setter + def function(self, async_function: AsyncFunction) -> None: + self._function = async_function + + @property + def yielded(self) -> List[Expression]: + return self.children + + @yielded.setter + def yielded(self, expressions: List[Expression]) -> None: + self.children = expressions + self._map.set_map(expressions) + + @property + def has_yield(self) -> bool: + return len(self.children) > 0 + + async def run(self) -> None: + if self._function is None: + raise ValueError("No function set for Call operation.") + + args = self._function.get_arguments() + async for item in self._function.generate(*args): + if not self.is_last: + if isinstance(item, dict): + for key, value in item.items(): + expression = self._map.get(key) + if expression: + expression.overridden = value + else: + expression = self._map.get(DEFAULT_VARIABLE_NAME) + if expression: + expression.overridden = item + if self.next: + await self.next.run() + else: + record: Dict[str, Any] = {} + if isinstance(item, dict): + for key, value in item.items(): + if self._map.has(key) or not self.has_yield: + record[key] = value + else: + record[DEFAULT_VARIABLE_NAME] = item + self._results.append(record) + + @property + def results(self) -> List[Dict[str, Any]]: + return self._results diff --git a/flowquery-py/src/parsing/operations/create_node.py b/flowquery-py/src/parsing/operations/create_node.py new file mode 100644 index 0000000..242b607 --- /dev/null +++ b/flowquery-py/src/parsing/operations/create_node.py @@ -0,0 +1,34 @@ +"""Represents a CREATE operation for creating virtual nodes.""" + +from typing import Any, Dict, List + +from .operation import Operation +from ..ast_node import ASTNode + + +class CreateNode(Operation): + """Represents a CREATE operation for creating virtual nodes.""" + + def __init__(self, node, statement: ASTNode): + super().__init__() + self._node = node + self._statement = statement + + @property + def node(self): + return self._node + + @property + def statement(self) -> ASTNode: + return self._statement + + async def run(self) -> None: + if self._node is None: + raise ValueError("Node is null") + from ...graph.database import Database + db = Database.get_instance() + db.add_node(self._node, self._statement) + + @property + def results(self) -> List[Dict[str, Any]]: + return [] diff --git a/flowquery-py/src/parsing/operations/create_relationship.py b/flowquery-py/src/parsing/operations/create_relationship.py new file mode 100644 index 0000000..4b7a133 --- /dev/null +++ b/flowquery-py/src/parsing/operations/create_relationship.py @@ -0,0 +1,34 @@ +"""Represents a CREATE operation for creating virtual relationships.""" + +from typing import Any, Dict, List + +from .operation import Operation +from ..ast_node import ASTNode + + +class CreateRelationship(Operation): + """Represents a CREATE operation for creating virtual relationships.""" + + def __init__(self, relationship, statement: ASTNode): + super().__init__() + self._relationship = relationship + self._statement = statement + + @property + def relationship(self): + return self._relationship + + @property + def statement(self) -> ASTNode: + return self._statement + + async def run(self) -> None: + if self._relationship is None: + raise ValueError("Relationship is null") + from ...graph.database import Database + db = Database.get_instance() + db.add_relationship(self._relationship, self._statement) + + @property + def results(self) -> List[Dict[str, Any]]: + return [] diff --git a/flowquery-py/src/parsing/operations/group_by.py b/flowquery-py/src/parsing/operations/group_by.py new file mode 100644 index 0000000..d4136b0 --- /dev/null +++ b/flowquery-py/src/parsing/operations/group_by.py @@ -0,0 +1,130 @@ +"""GroupBy implementation for aggregate operations.""" + +from typing import Any, Dict, Generator, List, Optional + +from ..expressions.expression import Expression +from ..functions.aggregate_function import AggregateFunction +from ..functions.reducer_element import ReducerElement +from .projection import Projection + + +class GroupByNode: + """Represents a node in the group-by tree.""" + + def __init__(self, value: Any = None): + self._value = value + self._children: Dict[Any, 'GroupByNode'] = {} + self._elements: Optional[List[ReducerElement]] = None + + @property + def value(self) -> Any: + return self._value + + @property + def children(self) -> Dict[Any, 'GroupByNode']: + return self._children + + @property + def elements(self) -> Optional[List[ReducerElement]]: + return self._elements + + @elements.setter + def elements(self, elements: List[ReducerElement]) -> None: + self._elements = elements + + +class GroupBy(Projection): + """Implements grouping and aggregation for FlowQuery operations.""" + + def __init__(self, expressions: List[Expression]): + super().__init__(expressions) + self._root = GroupByNode() + self._current = self._root + self._mappers: Optional[List[Expression]] = None + self._reducers: Optional[List[AggregateFunction]] = None + self._where = None + + async def run(self) -> None: + self._reset_tree() + self._map() + self._reduce() + + @property + def _root_node(self) -> GroupByNode: + return self._root + + def _reset_tree(self) -> None: + self._current = self._root + + def _map(self) -> None: + node = self._current + for mapper in self.mappers: + value = mapper.value() + child = node.children.get(value) + if child is None: + child = GroupByNode(value) + node.children[value] = child + node = child + self._current = node + + def _reduce(self) -> None: + if self._current.elements is None: + self._current.elements = [reducer.element() for reducer in self.reducers] + elements = self._current.elements + for i, reducer in enumerate(self.reducers): + reducer.reduce(elements[i]) + + @property + def mappers(self) -> List[Expression]: + if self._mappers is None: + self._mappers = list(self._generate_mappers()) + return self._mappers + + def _generate_mappers(self) -> Generator[Expression, None, None]: + for expression, _ in self.expressions(): + if expression.mappable(): + yield expression + + @property + def reducers(self) -> List[AggregateFunction]: + if self._reducers is None: + self._reducers = [] + for child in self.children: + self._reducers.extend(child.reducers()) + return self._reducers + + def generate_results( + self, + mapper_index: int = 0, + node: Optional[GroupByNode] = None + ) -> Generator[Dict[str, Any], None, None]: + if node is None: + node = self._root + + if len(node.children) > 0: + for child in node.children.values(): + self.mappers[mapper_index].overridden = child.value + yield from self.generate_results(mapper_index + 1, child) + else: + if node.elements: + for i, element in enumerate(node.elements): + self.reducers[i].overridden = element.value + record: Dict[str, Any] = {} + for expression, alias in self.expressions(): + record[alias] = expression.value() + if self.where_condition: + yield record + + @property + def where(self): + return self._where + + @where.setter + def where(self, where) -> None: + self._where = where + + @property + def where_condition(self) -> bool: + if self._where is None: + return True + return self._where.value() diff --git a/flowquery-py/src/parsing/operations/limit.py b/flowquery-py/src/parsing/operations/limit.py new file mode 100644 index 0000000..515ce47 --- /dev/null +++ b/flowquery-py/src/parsing/operations/limit.py @@ -0,0 +1,22 @@ +"""Represents a LIMIT operation that limits the number of results.""" + +from .operation import Operation + + +class Limit(Operation): + """Represents a LIMIT operation that limits the number of results.""" + + def __init__(self, limit: int): + super().__init__() + self._count = 0 + self._limit = limit + + async def run(self) -> None: + if self._count >= self._limit: + return + self._count += 1 + if self.next: + await self.next.run() + + def reset(self) -> None: + self._count = 0 diff --git a/flowquery-py/src/parsing/operations/load.py b/flowquery-py/src/parsing/operations/load.py new file mode 100644 index 0000000..3944894 --- /dev/null +++ b/flowquery-py/src/parsing/operations/load.py @@ -0,0 +1,140 @@ +"""Represents a LOAD operation that fetches data from external sources.""" + +import json +from typing import Any, Dict, Optional + +from .operation import Operation +from ..functions.async_function import AsyncFunction + + +class Load(Operation): + """Represents a LOAD operation that fetches data from external sources.""" + + def __init__(self): + super().__init__() + self._value: Any = None + + @property + def type(self): + """Gets the data type (JSON, CSV, or Text).""" + return self.children[0] + + @property + def from_component(self): + """Gets the From component which contains either a URL expression or an AsyncFunction.""" + return self.children[1] + + @property + def is_async_function(self) -> bool: + """Checks if the data source is an async function.""" + return isinstance(self.from_component.first_child(), AsyncFunction) + + @property + def async_function(self) -> Optional[AsyncFunction]: + """Gets the async function if the source is a function, otherwise None.""" + child = self.from_component.first_child() + return child if isinstance(child, AsyncFunction) else None + + @property + def from_(self) -> str: + return self.children[1].value() + + @property + def headers(self) -> Dict[str, str]: + from ..components.headers import Headers + if self.child_count() > 2 and isinstance(self.children[2], Headers): + return self.children[2].value() or {} + return {} + + @property + def payload(self): + from ..components.post import Post + post = None + if self.child_count() > 2 and isinstance(self.children[2], Post): + post = self.children[2] + elif self.child_count() > 3 and isinstance(self.children[3], Post): + post = self.children[3] + return post.first_child() if post else None + + def _method(self) -> str: + return "GET" if self.payload is None else "POST" + + def _options(self) -> Dict[str, Any]: + headers = dict(self.headers) + payload = self.payload + data = payload.value() if payload else None + if data is not None and isinstance(data, dict) and "Content-Type" not in headers: + headers["Content-Type"] = "application/json" + options: Dict[str, Any] = { + "method": self._method(), + "headers": headers, + } + if payload is not None: + options["body"] = json.dumps(payload.value()) + return options + + async def _load_from_function(self) -> None: + """Loads data from an async function source.""" + async_func = self.async_function + if async_func is None: + return + args = async_func.get_arguments() + async for item in async_func.generate(*args): + self._value = item + if self.next: + await self.next.run() + + async def _load_from_url(self) -> None: + """Loads data from a URL source.""" + import aiohttp + from ..components.json import JSON as JSONComponent + from ..components.text import Text + + async with aiohttp.ClientSession() as session: + options = self._options() + method = options.pop("method") + headers = options.pop("headers", {}) + body = options.pop("body", None) + + async with session.request( + method, + self.from_, + headers=headers, + data=body + ) as response: + if isinstance(self.type, JSONComponent): + data = await response.json() + elif isinstance(self.type, Text): + data = await response.text() + else: + data = await response.text() + + if isinstance(data, list): + for item in data: + self._value = item + if self.next: + await self.next.run() + elif isinstance(data, dict): + self._value = data + if self.next: + await self.next.run() + elif isinstance(data, str): + self._value = data + if self.next: + await self.next.run() + + async def load(self) -> None: + if self.is_async_function: + await self._load_from_function() + else: + await self._load_from_url() + + async def run(self) -> None: + try: + await self.load() + except Exception as e: + source = self.async_function.name if self.is_async_function else self.from_ + raise RuntimeError(f"Failed to load data from {source}. Error: {e}") + + def value(self) -> Any: + return self._value diff --git a/flowquery-py/src/parsing/operations/match.py b/flowquery-py/src/parsing/operations/match.py new file mode 100644 index 0000000..954494b --- /dev/null +++ b/flowquery-py/src/parsing/operations/match.py @@ -0,0 +1,29 @@ +"""Represents a MATCH operation for graph pattern matching.""" + +from typing import List + +from .operation import Operation + + +class Match(Operation): + """Represents a MATCH operation for graph pattern matching.""" + + def __init__(self, patterns=None): + super().__init__() + from ...graph.patterns import Patterns + self._patterns = Patterns(patterns or []) + + @property + def patterns(self): + return self._patterns.patterns if self._patterns else [] + + async def run(self) -> None: + """Executes the match operation by chaining the patterns together.""" + await self._patterns.initialize() + + async def to_do_next(): + if self.next: + await self.next.run() + + self._patterns.to_do_next = to_do_next + await self._patterns.traverse() diff --git a/flowquery-py/src/parsing/operations/operation.py b/flowquery-py/src/parsing/operations/operation.py new file mode 100644 index 0000000..dc1ced8 --- /dev/null +++ b/flowquery-py/src/parsing/operations/operation.py @@ -0,0 +1,69 @@ +"""Base class for all FlowQuery operations.""" + +from abc import ABC +from typing import Any, Dict, List, Optional + +from ..ast_node import ASTNode + + +class Operation(ASTNode, ABC): + """Base class for all FlowQuery operations. + + Operations represent the main statements in FlowQuery (WITH, UNWIND, RETURN, LOAD, WHERE). + They form a linked list structure and can be executed sequentially. + """ + + def __init__(self): + super().__init__() + self._previous: Optional[Operation] = None + self._next: Optional[Operation] = None + + @property + def previous(self) -> Optional['Operation']: + return self._previous + + @previous.setter + def previous(self, value: Optional['Operation']) -> None: + self._previous = value + + @property + def next(self) -> Optional['Operation']: + return self._next + + @next.setter + def next(self, value: Optional['Operation']) -> None: + self._next = value + + def add_sibling(self, operation: 'Operation') -> None: + if self._parent: + self._parent.add_child(operation) + operation.previous = self + self.next = operation + + @property + def is_last(self) -> bool: + return self._next is None + + async def run(self) -> None: + """Executes this operation. Must be implemented by subclasses. + + Raises: + NotImplementedError: If not implemented by subclass + """ + raise NotImplementedError("Not implemented") + + async def finish(self) -> None: + """Finishes execution by calling finish on the next operation in the chain.""" + if self.next: + await self.next.finish() + + async def initialize(self) -> None: + if self.next: + await self.next.initialize() + + def reset(self) -> None: + pass + + @property + def results(self) -> List[Dict[str, Any]]: + raise NotImplementedError("Not implemented") diff --git a/flowquery-py/src/parsing/operations/projection.py b/flowquery-py/src/parsing/operations/projection.py new file mode 100644 index 0000000..e14c10d --- /dev/null +++ b/flowquery-py/src/parsing/operations/projection.py @@ -0,0 +1,21 @@ +"""Base class for projection operations.""" + +from typing import Generator, List, Tuple, Optional + +from ..expressions.expression import Expression +from .operation import Operation + + +class Projection(Operation): + """Base class for operations that project expressions.""" + + def __init__(self, expressions: List[Expression]): + super().__init__() + self.children = expressions + + def expressions(self) -> Generator[Tuple[Expression, str], None, None]: + """Yields tuples of (expression, alias) for all child expressions.""" + for i, child in enumerate(self.children): + expression: Expression = child + alias = expression.alias or f"expr{i}" + yield (expression, alias) diff --git a/flowquery-py/src/parsing/operations/return_op.py b/flowquery-py/src/parsing/operations/return_op.py new file mode 100644 index 0000000..914550b --- /dev/null +++ b/flowquery-py/src/parsing/operations/return_op.py @@ -0,0 +1,50 @@ +"""Represents a RETURN operation that produces the final query results.""" + +import copy +from typing import Any, Dict, List, Optional + +from .projection import Projection + + +class Return(Projection): + """Represents a RETURN operation that produces the final query results. + + The RETURN operation evaluates expressions and collects them into result records. + It can optionally have a WHERE clause to filter results. + + Example: + # RETURN x, y WHERE x > 0 + """ + + def __init__(self, expressions): + super().__init__(expressions) + self._where: Optional['Where'] = None + self._results: List[Dict[str, Any]] = [] + + @property + def where(self) -> bool: + if self._where is None: + return True + return self._where.value() + + @where.setter + def where(self, where: 'Where') -> None: + self._where = where + + async def run(self) -> None: + if not self.where: + return + record: Dict[str, Any] = {} + for expression, alias in self.expressions(): + raw = expression.value() + # Deep copy objects to preserve their state + value = copy.deepcopy(raw) if isinstance(raw, (dict, list)) else raw + record[alias] = value + self._results.append(record) + + async def initialize(self) -> None: + self._results = [] + + @property + def results(self) -> List[Dict[str, Any]]: + return self._results diff --git a/flowquery-py/src/parsing/operations/unwind.py b/flowquery-py/src/parsing/operations/unwind.py new file mode 100644 index 0000000..8505150 --- /dev/null +++ b/flowquery-py/src/parsing/operations/unwind.py @@ -0,0 +1,37 @@ +"""Represents an UNWIND operation that iterates over arrays.""" + +from typing import Any + +from ..expressions.expression import Expression +from .operation import Operation + + +class Unwind(Operation): + """Represents an UNWIND operation that iterates over an array expression.""" + + def __init__(self, expression: Expression): + super().__init__() + self._value: Any = None + self.add_child(expression) + + @property + def expression(self) -> Expression: + return self.children[0] + + @property + def as_(self) -> str: + return self.children[1].value() + + async def run(self) -> None: + expression_value = self.expression.value() + if not isinstance(expression_value, list): + raise ValueError("Expected array") + for item in expression_value: + self._value = item + if self.next: + await self.next.run() + if self.next: + self.next.reset() + + def value(self) -> Any: + return self._value diff --git a/flowquery-py/src/parsing/operations/where.py b/flowquery-py/src/parsing/operations/where.py new file mode 100644 index 0000000..6ddb7f9 --- /dev/null +++ b/flowquery-py/src/parsing/operations/where.py @@ -0,0 +1,41 @@ +"""Represents a WHERE operation that filters data based on a condition.""" + +from typing import Any + +from ..expressions.expression import Expression +from .operation import Operation + + +class Where(Operation): + """Represents a WHERE operation that filters data based on a condition. + + The WHERE operation evaluates a boolean expression and only continues + execution to the next operation if the condition is true. + + Example: + # RETURN x WHERE x > 0 + """ + + def __init__(self, expression: Expression): + """Creates a new WHERE operation with the given condition. + + Args: + expression: The boolean expression to evaluate + """ + super().__init__() + self.add_child(expression) + + @property + def expression(self) -> Expression: + return self.children[0] + + async def run(self) -> None: + for pattern in self.expression.patterns(): + await pattern.fetch_data() + await pattern.evaluate() + if self.expression.value(): + if self.next: + await self.next.run() + + def value(self) -> Any: + return self.expression.value() diff --git a/flowquery-py/src/parsing/operations/with_op.py b/flowquery-py/src/parsing/operations/with_op.py new file mode 100644 index 0000000..e796111 --- /dev/null +++ b/flowquery-py/src/parsing/operations/with_op.py @@ -0,0 +1,18 @@ +"""Represents a WITH operation that defines variables or intermediate results.""" + +from .projection import Projection + + +class With(Projection): + """Represents a WITH operation that defines variables or intermediate results. + + The WITH operation creates named expressions that can be referenced later in the query. + It passes control to the next operation in the chain. + + Example: + # WITH x = 1, y = 2 RETURN x + y + """ + + async def run(self) -> None: + if self.next: + await self.next.run() diff --git a/flowquery-py/src/parsing/parser.py b/flowquery-py/src/parsing/parser.py new file mode 100644 index 0000000..fc6cb0c --- /dev/null +++ b/flowquery-py/src/parsing/parser.py @@ -0,0 +1,1011 @@ +"""Main parser for FlowQuery statements.""" + +from typing import Dict, Iterator, List, Optional + +from ..tokenization.token import Token +from ..utils.object_utils import ObjectUtils +from .alias import Alias +from .alias_option import AliasOption +from .ast_node import ASTNode +from .base_parser import BaseParser +from .context import Context +from .components.from_ import From +from .components.headers import Headers +from .components.null import Null +from .components.post import Post +from .data_structures.associative_array import AssociativeArray +from .data_structures.json_array import JSONArray +from .data_structures.key_value_pair import KeyValuePair +from .data_structures.lookup import Lookup +from .data_structures.range_lookup import RangeLookup +from .expressions.expression import Expression +from .expressions.f_string import FString +from .expressions.identifier import Identifier +from .expressions.operator import Not +from .expressions.reference import Reference +from .expressions.string import String +from .functions.aggregate_function import AggregateFunction +from .functions.async_function import AsyncFunction +from .functions.function import Function +from .functions.function_factory import FunctionFactory +from .functions.predicate_function import PredicateFunction +from .logic.case import Case +from .logic.when import When +from .logic.then import Then +from .logic.else_ import Else +from .operations.aggregated_return import AggregatedReturn +from .operations.aggregated_with import AggregatedWith +from .operations.call import Call +from .operations.limit import Limit +from .operations.load import Load +from .operations.match import Match +from .operations.operation import Operation +from .operations.return_op import Return +from .operations.unwind import Unwind +from .operations.where import Where +from .operations.with_op import With +from ..graph.node import Node +from ..graph.node_reference import NodeReference +from ..graph.pattern import Pattern +from ..graph.pattern_expression import PatternExpression +from ..graph.relationship import Relationship +from .operations.create_node import CreateNode +from .operations.create_relationship import CreateRelationship + + +class Parser(BaseParser): + """Main parser for FlowQuery statements. + + Parses FlowQuery declarative query language statements into an Abstract Syntax Tree (AST). + Supports operations like WITH, UNWIND, RETURN, LOAD, WHERE, and LIMIT, along with + expressions, functions, data structures, and logical constructs. + + Example: + parser = Parser() + ast = parser.parse("unwind [1, 2, 3, 4, 5] as num return num") + """ + + def __init__(self, tokens: Optional[List[Token]] = None): + super().__init__(tokens) + self._variables: Dict[str, ASTNode] = {} + self._context = Context() + self._returns = 0 + + def parse(self, statement: str) -> ASTNode: + """Parses a FlowQuery statement into an Abstract Syntax Tree. + + Args: + statement: The FlowQuery statement to parse + + Returns: + The root AST node containing the parsed structure + + Raises: + ValueError: If the statement is malformed or contains syntax errors + """ + self.tokenize(statement) + return self._parse_tokenized() + + def _parse_tokenized(self, is_sub_query: bool = False) -> ASTNode: + root = ASTNode() + previous: Optional[Operation] = None + operation: Optional[Operation] = None + + while not self.token.is_eof(): + if root.child_count() > 0: + self._expect_and_skip_whitespace_and_comments() + else: + self._skip_whitespace_and_comments() + + operation = self._parse_operation() + if operation is None and not is_sub_query: + raise ValueError("Expected one of WITH, UNWIND, RETURN, LOAD, OR CALL") + elif operation is None and is_sub_query: + return root + + if self._returns > 1: + raise ValueError("Only one RETURN statement is allowed") + + if isinstance(previous, Call) and not previous.has_yield: + raise ValueError( + "CALL operations must have a YIELD clause unless they are the last operation" + ) + + if previous is not None: + previous.add_sibling(operation) + else: + root.add_child(operation) + + where = self._parse_where() + if where is not None: + if isinstance(operation, Return): + operation.where = where + else: + operation.add_sibling(where) + operation = where + + limit = self._parse_limit() + if limit is not None: + operation.add_sibling(limit) + operation = limit + + previous = operation + + if not isinstance(operation, (Return, Call, CreateNode, CreateRelationship)): + raise ValueError("Last statement must be a RETURN, WHERE, CALL, or CREATE statement") + + return root + + def _parse_operation(self) -> Optional[Operation]: + return ( + self._parse_with() or + self._parse_unwind() or + self._parse_return() or + self._parse_load() or + self._parse_call() or + self._parse_match() or + self._parse_create() + ) + + def _parse_with(self) -> Optional[With]: + if not self.token.is_with(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + expressions = list(self._parse_expressions(AliasOption.REQUIRED)) + if len(expressions) == 0: + raise ValueError("Expected expression") + if any(expr.has_reducers() for expr in expressions): + return AggregatedWith(expressions) + return With(expressions) + + def _parse_unwind(self) -> Optional[Unwind]: + if not self.token.is_unwind(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression") + if not ObjectUtils.is_instance_of_any( + expression.first_child(), + [JSONArray, Function, Reference, Lookup, RangeLookup] + ): + raise ValueError("Expected array, function, reference, or lookup.") + self._expect_and_skip_whitespace_and_comments() + alias = self._parse_alias() + if alias is not None: + expression.set_alias(alias.get_alias()) + else: + raise ValueError("Expected alias") + unwind = Unwind(expression) + self._variables[alias.get_alias()] = unwind + return unwind + + def _parse_return(self) -> Optional[Return]: + if not self.token.is_return(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + expressions = list(self._parse_expressions(AliasOption.OPTIONAL)) + if len(expressions) == 0: + raise ValueError("Expected expression") + if any(expr.has_reducers() for expr in expressions): + return AggregatedReturn(expressions) + self._returns += 1 + return Return(expressions) + + def _parse_where(self) -> Optional[Where]: + if not self.token.is_where(): + return None + self._expect_previous_token_to_be_whitespace_or_comment() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression") + if ObjectUtils.is_instance_of_any( + expression.first_child(), + [JSONArray, AssociativeArray] + ): + raise ValueError("Expected an expression which can be evaluated to a boolean") + return Where(expression) + + def _parse_load(self) -> Optional[Load]: + if not self.token.is_load(): + return None + load = Load() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + if not (self.token.is_json() or self.token.is_csv() or self.token.is_text()): + raise ValueError("Expected JSON, CSV, or TEXT") + load.add_child(self.token.node) + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_from(): + raise ValueError("Expected FROM") + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + from_node = From() + load.add_child(from_node) + + # Check if source is async function + async_func = self._parse_async_function() + if async_func is not None: + from_node.add_child(async_func) + else: + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression or async function") + from_node.add_child(expression) + + self._expect_and_skip_whitespace_and_comments() + if self.token.is_headers(): + headers = Headers() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + header = self._parse_expression() + if header is None: + raise ValueError("Expected expression") + headers.add_child(header) + load.add_child(headers) + self._expect_and_skip_whitespace_and_comments() + + if self.token.is_post(): + post = Post() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + payload = self._parse_expression() + if payload is None: + raise ValueError("Expected expression") + post.add_child(payload) + load.add_child(post) + self._expect_and_skip_whitespace_and_comments() + + alias = self._parse_alias() + if alias is not None: + load.add_child(alias) + self._variables[alias.get_alias()] = load + else: + raise ValueError("Expected alias") + return load + + def _parse_call(self) -> Optional[Call]: + if not self.token.is_call(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + async_function = self._parse_async_function() + if async_function is None: + raise ValueError("Expected async function") + call = Call() + call.function = async_function + self._skip_whitespace_and_comments() + if self.token.is_yield(): + self._expect_previous_token_to_be_whitespace_or_comment() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + expressions = list(self._parse_expressions(AliasOption.OPTIONAL)) + if len(expressions) == 0: + raise ValueError("Expected at least one expression") + call.yielded = expressions + return call + + def _parse_match(self) -> Optional[Match]: + if not self.token.is_match(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + patterns = list(self._parse_patterns()) + if len(patterns) == 0: + raise ValueError("Expected graph pattern") + return Match(patterns) + + def _parse_create(self) -> Optional[Operation]: + """Parse CREATE VIRTUAL statement for nodes and relationships.""" + if not self.token.is_create(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_virtual(): + raise ValueError("Expected VIRTUAL") + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + + node = self._parse_node() + if node is None: + raise ValueError("Expected node definition") + + relationship: Optional[Relationship] = None + if self.token.is_subtract() and self.peek() and self.peek().is_opening_bracket(): + self.set_next_token() # skip - + self.set_next_token() # skip [ + if not self.token.is_colon(): + raise ValueError("Expected ':' for relationship type") + self.set_next_token() + if not self.token.is_identifier(): + raise ValueError("Expected relationship type identifier") + rel_type = self.token.value or "" + self.set_next_token() + if not self.token.is_closing_bracket(): + raise ValueError("Expected closing bracket for relationship definition") + self.set_next_token() + if not self.token.is_subtract(): + raise ValueError("Expected '-' for relationship definition") + self.set_next_token() + # Skip optional direction indicator '>' + if self.token.is_greater_than(): + self.set_next_token() + target = self._parse_node() + if target is None: + raise ValueError("Expected target node definition") + relationship = Relationship() + relationship.type = rel_type + + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_as(): + raise ValueError("Expected AS") + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + + query = self._parse_sub_query() + if query is None: + raise ValueError("Expected sub-query") + + if relationship is not None: + return CreateRelationship(relationship, query) + else: + return CreateNode(node, query) + + def _parse_sub_query(self) -> Optional[ASTNode]: + """Parse a sub-query enclosed in braces.""" + if not self.token.is_opening_brace(): + return None + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + query = self._parse_tokenized(is_sub_query=True) + self._skip_whitespace_and_comments() + if not self.token.is_closing_brace(): + raise ValueError("Expected closing brace for sub-query") + self.set_next_token() + return query + + def _parse_patterns(self) -> Iterator[Pattern]: + while True: + identifier: Optional[str] = None + if self.token.is_identifier(): + identifier = self.token.value + self.set_next_token() + self._skip_whitespace_and_comments() + if not self.token.is_equals(): + raise ValueError("Expected '=' for pattern assignment") + self.set_next_token() + self._skip_whitespace_and_comments() + pattern = self._parse_pattern() + if pattern is not None: + if identifier is not None: + pattern.identifier = identifier + self._variables[identifier] = pattern + yield pattern + else: + break + self._skip_whitespace_and_comments() + if not self.token.is_comma(): + break + self.set_next_token() + self._skip_whitespace_and_comments() + + def _parse_pattern(self) -> Optional[Pattern]: + if not self.token.is_left_parenthesis(): + return None + pattern = Pattern() + node = self._parse_node() + if node is None: + raise ValueError("Expected node definition") + pattern.add_element(node) + while True: + relationship = self._parse_relationship() + if relationship is None: + break + pattern.add_element(relationship) + node = self._parse_node() + if node is None: + raise ValueError("Expected target node definition") + pattern.add_element(node) + return pattern + + def _parse_pattern_expression(self) -> Optional[PatternExpression]: + """Parse a pattern expression for WHERE clauses. + + PatternExpression is used to test if a graph pattern exists. + It must start with a NodeReference (referencing an existing variable). + """ + if not self.token.is_left_parenthesis(): + return None + pattern = PatternExpression() + node = self._parse_node() + if node is None: + raise ValueError("Expected node definition") + if not isinstance(node, NodeReference): + raise ValueError("PatternExpression must start with a NodeReference") + pattern.add_element(node) + while True: + relationship = self._parse_relationship() + if relationship is None: + break + if relationship.hops and relationship.hops.multi(): + raise ValueError("PatternExpression does not support variable-length relationships") + pattern.add_element(relationship) + node = self._parse_node() + if node is None: + raise ValueError("Expected target node definition") + pattern.add_element(node) + return pattern + + def _parse_node(self) -> Optional[Node]: + if not self.token.is_left_parenthesis(): + return None + self.set_next_token() + self._skip_whitespace_and_comments() + identifier: Optional[str] = None + if self.token.is_identifier(): + identifier = self.token.value + self.set_next_token() + self._skip_whitespace_and_comments() + label: Optional[str] = None + peek = self.peek() + if not self.token.is_colon() and peek is not None and peek.is_identifier(): + raise ValueError("Expected ':' for node label") + if self.token.is_colon() and (peek is None or not peek.is_identifier()): + raise ValueError("Expected node label identifier") + if self.token.is_colon() and peek is not None and peek.is_identifier(): + self.set_next_token() + label = self.token.value + self.set_next_token() + self._skip_whitespace_and_comments() + node = Node() + node.label = label + if label is not None and identifier is not None: + node.identifier = identifier + self._variables[identifier] = node + elif identifier is not None: + reference = self._variables.get(identifier) + from ..graph.node_reference import NodeReference + if reference is None or not isinstance(reference, Node): + raise ValueError(f"Undefined node reference: {identifier}") + node = NodeReference(node, reference) + if not self.token.is_right_parenthesis(): + raise ValueError("Expected closing parenthesis for node definition") + self.set_next_token() + return node + + def _parse_relationship(self) -> Optional[Relationship]: + if self.token.is_less_than() and self.peek() is not None and self.peek().is_subtract(): + self.set_next_token() + self.set_next_token() + elif self.token.is_subtract(): + self.set_next_token() + else: + return None + if not self.token.is_opening_bracket(): + return None + self.set_next_token() + variable: Optional[str] = None + if self.token.is_identifier(): + variable = self.token.value + self.set_next_token() + if not self.token.is_colon(): + raise ValueError("Expected ':' for relationship type") + self.set_next_token() + if not self.token.is_identifier(): + raise ValueError("Expected relationship type identifier") + rel_type: str = self.token.value or "" + self.set_next_token() + hops = self._parse_relationship_hops() + if not self.token.is_closing_bracket(): + raise ValueError("Expected closing bracket for relationship definition") + self.set_next_token() + if not self.token.is_subtract(): + raise ValueError("Expected '-' for relationship definition") + self.set_next_token() + if self.token.is_greater_than(): + self.set_next_token() + relationship = Relationship() + if rel_type is not None and variable is not None: + relationship.identifier = variable + self._variables[variable] = relationship + elif variable is not None: + reference = self._variables.get(variable) + from ..graph.relationship_reference import RelationshipReference + if reference is None or not isinstance(reference, Relationship): + raise ValueError(f"Undefined relationship reference: {variable}") + relationship = RelationshipReference(relationship, reference) + if hops is not None: + relationship.hops = hops + relationship.type = rel_type + return relationship + + def _parse_relationship_hops(self): + import sys + from ..graph.hops import Hops + if not self.token.is_multiply(): + return None + hops = Hops() + self.set_next_token() + if self.token.is_number(): + hops.min = int(self.token.value or "0") + self.set_next_token() + if self.token.is_dot(): + self.set_next_token() + if not self.token.is_dot(): + raise ValueError("Expected '..' for relationship hops") + self.set_next_token() + if not self.token.is_number(): + raise ValueError("Expected number for relationship hops") + hops.max = int(self.token.value or "0") + self.set_next_token() + else: + # Just * without numbers means unbounded + hops.min = 0 + hops.max = sys.maxsize + return hops + + def _parse_limit(self) -> Optional[Limit]: + self._skip_whitespace_and_comments() + if not self.token.is_limit(): + return None + self._expect_previous_token_to_be_whitespace_or_comment() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_number(): + raise ValueError("Expected number") + limit = Limit(int(self.token.value or "0")) + self.set_next_token() + return limit + + def _parse_expressions( + self, alias_option: AliasOption = AliasOption.NOT_ALLOWED + ) -> Iterator[Expression]: + while True: + expression = self._parse_expression() + if expression is not None: + alias = self._parse_alias() + if isinstance(expression.first_child(), Reference) and alias is None: + reference = expression.first_child() + expression.set_alias(reference.identifier) + self._variables[reference.identifier] = expression + elif (alias_option == AliasOption.REQUIRED and + alias is None and + not isinstance(expression.first_child(), Reference)): + raise ValueError("Alias required") + elif alias_option == AliasOption.NOT_ALLOWED and alias is not None: + raise ValueError("Alias not allowed") + elif alias_option in (AliasOption.OPTIONAL, AliasOption.REQUIRED) and alias is not None: + expression.set_alias(alias.get_alias()) + self._variables[alias.get_alias()] = expression + yield expression + else: + break + self._skip_whitespace_and_comments() + if not self.token.is_comma(): + break + self.set_next_token() + + def _parse_expression(self) -> Optional[Expression]: + expression = Expression() + while True: + self._skip_whitespace_and_comments() + if self.token.is_identifier() and (self.peek() is None or not self.peek().is_left_parenthesis()): + identifier = self.token.value or "" + reference = Reference(identifier, self._variables.get(identifier)) + self.set_next_token() + lookup = self._parse_lookup(reference) + expression.add_node(lookup) + elif self.token.is_identifier() and self.peek() is not None and self.peek().is_left_parenthesis(): + func = self._parse_predicate_function() or self._parse_function() + if func is not None: + lookup = self._parse_lookup(func) + expression.add_node(lookup) + elif self.token.is_left_parenthesis() and self.peek() is not None and self.peek().is_identifier(): + # Possible graph pattern expression + pattern = self._parse_pattern_expression() + if pattern is not None: + expression.add_node(pattern) + elif self.token.is_operand(): + expression.add_node(self.token.node) + self.set_next_token() + elif self.token.is_f_string(): + f_string = self._parse_f_string() + if f_string is None: + raise ValueError("Expected f-string") + expression.add_node(f_string) + elif self.token.is_left_parenthesis(): + self.set_next_token() + sub = self._parse_expression() + if sub is None: + raise ValueError("Expected expression") + if not self.token.is_right_parenthesis(): + raise ValueError("Expected right parenthesis") + self.set_next_token() + lookup = self._parse_lookup(sub) + expression.add_node(lookup) + elif self.token.is_opening_brace() or self.token.is_opening_bracket(): + json = self._parse_json() + if json is None: + raise ValueError("Expected JSON object") + lookup = self._parse_lookup(json) + expression.add_node(lookup) + elif self.token.is_case(): + case = self._parse_case() + if case is None: + raise ValueError("Expected CASE statement") + expression.add_node(case) + elif self.token.is_not(): + not_node = Not() + self.set_next_token() + sub = self._parse_expression() + if sub is None: + raise ValueError("Expected expression") + not_node.add_child(sub) + expression.add_node(not_node) + else: + if expression.nodes_added(): + raise ValueError("Expected operand or left parenthesis") + else: + break + self._skip_whitespace_and_comments() + if self.token.is_operator(): + expression.add_node(self.token.node) + else: + break + self.set_next_token() + + if expression.nodes_added(): + expression.finish() + return expression + return None + + def _parse_lookup(self, node: ASTNode) -> ASTNode: + variable = node + lookup = None + while True: + if self.token.is_dot(): + self.set_next_token() + if not self.token.is_identifier() and not self.token.is_keyword(): + raise ValueError("Expected identifier") + lookup = Lookup() + lookup.index = Identifier(self.token.value or "") + lookup.variable = variable + self.set_next_token() + elif self.token.is_opening_bracket(): + self.set_next_token() + self._skip_whitespace_and_comments() + index = self._parse_expression() + to = None + self._skip_whitespace_and_comments() + if self.token.is_colon(): + self.set_next_token() + self._skip_whitespace_and_comments() + lookup = RangeLookup() + to = self._parse_expression() + else: + if index is None: + raise ValueError("Expected expression") + lookup = Lookup() + self._skip_whitespace_and_comments() + if not self.token.is_closing_bracket(): + raise ValueError("Expected closing bracket") + self.set_next_token() + if isinstance(lookup, RangeLookup): + lookup.from_ = index or Null() + lookup.to = to or Null() + elif isinstance(lookup, Lookup) and index is not None: + lookup.index = index + lookup.variable = variable + else: + break + variable = lookup or variable + return variable + + def _parse_case(self) -> Optional[Case]: + if not self.token.is_case(): + return None + self.set_next_token() + case = Case() + parts = 0 + self._expect_and_skip_whitespace_and_comments() + while True: + when = self._parse_when() + if when is None and parts == 0: + raise ValueError("Expected WHEN") + elif when is None and parts > 0: + break + elif when is not None: + case.add_child(when) + self._expect_and_skip_whitespace_and_comments() + then = self._parse_then() + if then is None: + raise ValueError("Expected THEN") + else: + case.add_child(then) + self._expect_and_skip_whitespace_and_comments() + parts += 1 + else_ = self._parse_else() + if else_ is None: + raise ValueError("Expected ELSE") + else: + case.add_child(else_) + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_end(): + raise ValueError("Expected END") + self.set_next_token() + return case + + def _parse_when(self) -> Optional[When]: + if not self.token.is_when(): + return None + self.set_next_token() + when = When() + self._expect_and_skip_whitespace_and_comments() + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression") + when.add_child(expression) + return when + + def _parse_then(self) -> Optional[Then]: + if not self.token.is_then(): + return None + self.set_next_token() + then = Then() + self._expect_and_skip_whitespace_and_comments() + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression") + then.add_child(expression) + return then + + def _parse_else(self) -> Optional[Else]: + if not self.token.is_else(): + return None + self.set_next_token() + else_ = Else() + self._expect_and_skip_whitespace_and_comments() + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression") + else_.add_child(expression) + return else_ + + def _parse_alias(self) -> Optional[Alias]: + self._skip_whitespace_and_comments() + if not self.token.is_as(): + return None + self._expect_previous_token_to_be_whitespace_or_comment() + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_identifier(): + raise ValueError("Expected identifier") + alias = Alias(self.token.value or "") + self.set_next_token() + return alias + + def _parse_predicate_function(self) -> Optional[PredicateFunction]: + """Parse a predicate function like sum(n in [...] | n where condition).""" + # Lookahead: identifier ( identifier in + if not self.ahead([ + Token.IDENTIFIER(""), + Token.LEFT_PARENTHESIS, + Token.IDENTIFIER(""), + Token.IN, + ]): + return None + if self.token.value is None: + raise ValueError("Expected identifier") + func = FunctionFactory.create_predicate(self.token.value) + self.set_next_token() + if not self.token.is_left_parenthesis(): + raise ValueError("Expected left parenthesis") + self.set_next_token() + self._skip_whitespace_and_comments() + if not self.token.is_identifier(): + raise ValueError("Expected identifier") + reference = Reference(self.token.value) + self._variables[reference.identifier] = reference + func.add_child(reference) + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + if not self.token.is_in(): + raise ValueError("Expected IN") + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + expression = self._parse_expression() + if expression is None: + raise ValueError("Expected expression") + if not ObjectUtils.is_instance_of_any(expression.first_child(), [ + JSONArray, + Reference, + Lookup, + Function, + ]): + raise ValueError("Expected array or reference") + func.add_child(expression) + self._skip_whitespace_and_comments() + if not self.token.is_pipe(): + raise ValueError("Expected pipe") + self.set_next_token() + return_expr = self._parse_expression() + if return_expr is None: + raise ValueError("Expected expression") + func.add_child(return_expr) + where = self._parse_where() + if where is not None: + func.add_child(where) + self._skip_whitespace_and_comments() + if not self.token.is_right_parenthesis(): + raise ValueError("Expected right parenthesis") + self.set_next_token() + del self._variables[reference.identifier] + return func + + def _parse_function(self) -> Optional[Function]: + if not self.token.is_identifier(): + return None + name = self.token.value or "" + if not self.peek() or not self.peek().is_left_parenthesis(): + return None + + try: + func = FunctionFactory.create(name) + except ValueError: + raise ValueError(f"Unknown function: {name}") + + # Check for nested aggregate functions + if isinstance(func, AggregateFunction) and self._context.contains_type(AggregateFunction): + raise ValueError("Aggregate functions cannot be nested") + + self._context.push(func) + self.set_next_token() # skip function name + self.set_next_token() # skip left parenthesis + self._skip_whitespace_and_comments() + + # Check for DISTINCT keyword + if self.token.is_distinct(): + func.distinct = True + self.set_next_token() + self._expect_and_skip_whitespace_and_comments() + + params = list(self._parse_function_parameters()) + func.parameters = params + + if not self.token.is_right_parenthesis(): + raise ValueError("Expected right parenthesis") + self.set_next_token() + self._context.pop() + return func + + def _parse_async_function(self) -> Optional[AsyncFunction]: + if not self.token.is_identifier(): + return None + name = self.token.value or "" + if not FunctionFactory.is_async_provider(name): + return None + self.set_next_token() + if not self.token.is_left_parenthesis(): + raise ValueError("Expected left parenthesis") + self.set_next_token() + + func = FunctionFactory.create_async(name) + params = list(self._parse_function_parameters()) + func.parameters = params + + if not self.token.is_right_parenthesis(): + raise ValueError("Expected right parenthesis") + self.set_next_token() + return func + + def _parse_function_parameters(self) -> Iterator[ASTNode]: + while True: + self._skip_whitespace_and_comments() + if self.token.is_right_parenthesis(): + break + expr = self._parse_expression() + if expr is not None: + yield expr + self._skip_whitespace_and_comments() + if not self.token.is_comma(): + break + self.set_next_token() + + def _parse_json(self) -> Optional[ASTNode]: + if self.token.is_opening_brace(): + return self._parse_associative_array() + elif self.token.is_opening_bracket(): + return self._parse_json_array() + return None + + def _parse_associative_array(self) -> AssociativeArray: + if not self.token.is_opening_brace(): + raise ValueError("Expected opening brace") + self.set_next_token() + array = AssociativeArray() + while True: + self._skip_whitespace_and_comments() + if self.token.is_closing_brace(): + break + if not self.token.is_identifier() and not self.token.is_string() and not self.token.is_keyword(): + raise ValueError("Expected key identifier or string") + key = self.token.value or "" + self.set_next_token() + self._skip_whitespace_and_comments() + if not self.token.is_colon(): + raise ValueError("Expected colon") + self.set_next_token() + self._skip_whitespace_and_comments() + value = self._parse_expression() + if value is None: + raise ValueError("Expected value") + array.add_key_value(KeyValuePair(key, value)) + self._skip_whitespace_and_comments() + if not self.token.is_comma(): + break + self.set_next_token() + if not self.token.is_closing_brace(): + raise ValueError("Expected closing brace") + self.set_next_token() + return array + + def _parse_json_array(self) -> JSONArray: + if not self.token.is_opening_bracket(): + raise ValueError("Expected opening bracket") + self.set_next_token() + array = JSONArray() + while True: + self._skip_whitespace_and_comments() + if self.token.is_closing_bracket(): + break + value = self._parse_expression() + if value is None: + break + array.add_value(value) + self._skip_whitespace_and_comments() + if not self.token.is_comma(): + break + self.set_next_token() + if not self.token.is_closing_bracket(): + raise ValueError("Expected closing bracket") + self.set_next_token() + return array + + def _parse_f_string(self) -> Optional[FString]: + if not self.token.is_f_string(): + return None + f_string = FString() + while self.token.is_f_string() or self.token.is_opening_brace(): + if self.token.is_f_string(): + f_string.add_child(String(self.token.value or "")) + self.set_next_token() + elif self.token.is_opening_brace(): + self.set_next_token() + expr = self._parse_expression() + if expr is not None: + f_string.add_child(expr) + if self.token.is_closing_brace(): + self.set_next_token() + return f_string + + def _skip_whitespace_and_comments(self) -> bool: + skipped: bool = self.previous_token.is_whitespace_or_comment() if self.previous_token else False + while self.token.is_whitespace_or_comment(): + self.set_next_token() + skipped = True + return skipped + + def _expect_and_skip_whitespace_and_comments(self) -> None: + skipped = self._skip_whitespace_and_comments() + if not skipped: + raise ValueError("Expected whitespace") + + def _expect_previous_token_to_be_whitespace_or_comment(self) -> None: + if not self.previous_token.is_whitespace_or_comment(): + raise ValueError("Expected previous token to be whitespace or comment") diff --git a/flowquery-py/src/parsing/token_to_node.py b/flowquery-py/src/parsing/token_to_node.py new file mode 100644 index 0000000..ff6f526 --- /dev/null +++ b/flowquery-py/src/parsing/token_to_node.py @@ -0,0 +1,109 @@ +"""Converts tokens to AST nodes.""" + +from ..tokenization.token import Token +from .ast_node import ASTNode +from .components.csv import CSV +from .components.json import JSON +from .components.null import Null +from .components.text import Text +from .expressions.boolean import Boolean +from .expressions.identifier import Identifier +from .expressions.number import Number +from .expressions.string import String +from .expressions.operator import ( + Add, + And, + Divide, + Equals, + GreaterThan, + GreaterThanOrEqual, + Is, + LessThan, + LessThanOrEqual, + Modulo, + Multiply, + Not, + NotEquals, + Or, + Power, + Subtract, +) +from .logic.else_ import Else +from .logic.end import End +from .logic.then import Then +from .logic.when import When + + +class TokenToNode: + """Converts tokens to their corresponding AST nodes.""" + + @staticmethod + def convert(token: Token) -> ASTNode: + if token.is_number(): + if token.value is None: + raise ValueError("Number token has no value") + return Number(token.value) + elif token.is_string(): + if token.value is None: + raise ValueError("String token has no value") + return String(token.value) + elif token.is_identifier(): + if token.value is None: + raise ValueError("Identifier token has no value") + return Identifier(token.value) + elif token.is_operator(): + if token.is_add(): + return Add() + elif token.is_subtract(): + return Subtract() + elif token.is_multiply(): + return Multiply() + elif token.is_divide(): + return Divide() + elif token.is_modulo(): + return Modulo() + elif token.is_exponent(): + return Power() + elif token.is_equals(): + return Equals() + elif token.is_not_equals(): + return NotEquals() + elif token.is_less_than(): + return LessThan() + elif token.is_greater_than(): + return GreaterThan() + elif token.is_greater_than_or_equal(): + return GreaterThanOrEqual() + elif token.is_less_than_or_equal(): + return LessThanOrEqual() + elif token.is_and(): + return And() + elif token.is_or(): + return Or() + elif token.is_is(): + return Is() + elif token.is_unary_operator(): + if token.is_not(): + return Not() + elif token.is_keyword(): + if token.is_json(): + return JSON() + elif token.is_csv(): + return CSV() + elif token.is_text(): + return Text() + elif token.is_when(): + return When() + elif token.is_then(): + return Then() + elif token.is_else(): + return Else() + elif token.is_end(): + return End() + elif token.is_null(): + return Null() + elif token.is_boolean(): + return Boolean(token.value) + else: + raise ValueError("Unknown token") + return ASTNode() diff --git a/flowquery-py/src/tokenization/__init__.py b/flowquery-py/src/tokenization/__init__.py new file mode 100644 index 0000000..a488b8b --- /dev/null +++ b/flowquery-py/src/tokenization/__init__.py @@ -0,0 +1,23 @@ +"""Tokenization module for FlowQuery.""" + +from .tokenizer import Tokenizer +from .token import Token +from .token_type import TokenType +from .keyword import Keyword +from .operator import Operator +from .symbol import Symbol +from .token_mapper import TokenMapper +from .string_walker import StringWalker +from .trie import Trie + +__all__ = [ + "Tokenizer", + "Token", + "TokenType", + "Keyword", + "Operator", + "Symbol", + "TokenMapper", + "StringWalker", + "Trie", +] diff --git a/flowquery-py/src/tokenization/keyword.py b/flowquery-py/src/tokenization/keyword.py new file mode 100644 index 0000000..0fa955b --- /dev/null +++ b/flowquery-py/src/tokenization/keyword.py @@ -0,0 +1,48 @@ +"""Keyword enumeration for FlowQuery tokenization.""" + +from enum import Enum + + +class Keyword(Enum): + """Enumeration of all keywords in FlowQuery.""" + + RETURN = "RETURN" + MATCH = "MATCH" + WHERE = "WHERE" + CREATE = "CREATE" + VIRTUAL = "VIRTUAL" + MERGE = "MERGE" + DELETE = "DELETE" + DETACH = "DETACH" + SET = "SET" + REMOVE = "REMOVE" + FOREACH = "FOREACH" + WITH = "WITH" + CALL = "CALL" + YIELD = "YIELD" + LOAD = "LOAD" + HEADERS = "HEADERS" + POST = "POST" + FROM = "FROM" + CSV = "CSV" + JSON = "JSON" + TEXT = "TEXT" + AS = "AS" + UNWIND = "UNWIND" + SUM = "SUM" + COLLECT = "COLLECT" + DISTINCT = "DISTINCT" + ORDER = "ORDER" + BY = "BY" + ASC = "ASC" + DESC = "DESC" + SKIP = "SKIP" + LIMIT = "LIMIT" + EOF = "EOF" + CASE = "CASE" + WHEN = "WHEN" + THEN = "THEN" + ELSE = "ELSE" + END = "END" + NULL = "NULL" + IN = "IN" diff --git a/flowquery-py/src/tokenization/operator.py b/flowquery-py/src/tokenization/operator.py new file mode 100644 index 0000000..713b403 --- /dev/null +++ b/flowquery-py/src/tokenization/operator.py @@ -0,0 +1,29 @@ +"""Operator enumeration for FlowQuery tokenization.""" + +from enum import Enum + + +class Operator(Enum): + """Enumeration of all operators in FlowQuery.""" + + # Arithmetic + ADD = "+" + SUBTRACT = "-" + MULTIPLY = "*" + DIVIDE = "/" + MODULO = "%" + EXPONENT = "^" + # Comparison + EQUALS = "=" + NOT_EQUALS = "<>" + LESS_THAN = "<" + LESS_THAN_OR_EQUAL = "<=" + GREATER_THAN = ">" + GREATER_THAN_OR_EQUAL = ">=" + IS = "IS" + # Logical + AND = "AND" + OR = "OR" + NOT = "NOT" + IN = "IN" + PIPE = "|" diff --git a/flowquery-py/src/tokenization/string_walker.py b/flowquery-py/src/tokenization/string_walker.py new file mode 100644 index 0000000..582b10b --- /dev/null +++ b/flowquery-py/src/tokenization/string_walker.py @@ -0,0 +1,158 @@ +"""Utility class for walking through a string character by character during tokenization.""" + +from ..utils.string_utils import StringUtils + + +class StringWalker: + """Utility class for walking through a string character by character during tokenization. + + Provides methods to check for specific character patterns, move through the string, + and extract substrings. Used by the Tokenizer to process input text. + + Example: + walker = StringWalker("WITH x as variable") + while not walker.is_at_end: + # Process characters + """ + + def __init__(self, text: str): + """Creates a new StringWalker for the given text. + + Args: + text: The input text to walk through + """ + self._text = text + self._position = 0 + + @property + def position(self) -> int: + return self._position + + @property + def current_char(self) -> str: + if self._position >= len(self._text): + return '' + return self._text[self._position] + + @property + def next_char(self) -> str: + if self._position + 1 >= len(self._text): + return '' + return self._text[self._position + 1] + + @property + def previous_char(self) -> str: + if self._position - 1 < 0: + return '' + return self._text[self._position - 1] + + @property + def is_at_end(self) -> bool: + return self._position >= len(self._text) + + def get_string(self, start_position: int) -> str: + return self._text[start_position:self._position] + + def get_remaining_string(self) -> str: + return self._text[self._position:] + + def check_for_single_comment(self) -> bool: + if self.single_line_comment_start(): + while not self.is_at_end and not self.new_line(): + self._position += 1 + return True + return False + + def check_for_multi_line_comment(self) -> bool: + if self.multi_line_comment_start(): + while not self.is_at_end: + if self.multi_line_comment_end(): + self._position += 2 + return True + self._position += 1 + raise ValueError(f"Unterminated multi-line comment at position {self._position}") + return False + + def single_line_comment_start(self) -> bool: + return self.current_char == '/' and self.next_char == '/' + + def multi_line_comment_start(self) -> bool: + return self.current_char == '/' and self.next_char == '*' + + def multi_line_comment_end(self) -> bool: + return self.current_char == '*' and self.next_char == '/' + + def new_line(self) -> bool: + return self.current_char == '\n' + + def escaped(self, char: str) -> bool: + return self.current_char == '\\' and self.next_char == char + + def escaped_brace(self) -> bool: + return ((self.current_char == '{' and self.next_char == '{') or + (self.current_char == '}' and self.next_char == '}')) + + def opening_brace(self) -> bool: + return self.current_char == '{' + + def closing_brace(self) -> bool: + return self.current_char == '}' + + def check_for_under_score(self) -> bool: + found_under_score = self.current_char == '_' + if found_under_score: + self._position += 1 + return found_under_score + + def check_for_letter(self) -> bool: + found_letter = self.current_char.lower() in StringUtils.letters + if found_letter: + self._position += 1 + return found_letter + + def check_for_digit(self) -> bool: + found_digit = self.current_char in StringUtils.digits + if found_digit: + self._position += 1 + return found_digit + + def check_for_quote(self) -> str | None: + quote_char = self.current_char + if quote_char in ('"', "'", '`'): + self._position += 1 + return quote_char + return None + + def check_for_string(self, value: str) -> bool: + _string = self._text[self._position:self._position + len(value)] + found_string = _string.lower() == value.lower() + if found_string: + self._position += len(value) + return found_string + + def check_for_whitespace(self) -> bool: + return self.current_char in StringUtils.whitespace + + def check_for_f_string_start(self) -> bool: + return self.current_char.lower() == 'f' and self.next_char in ("'", '"', '`') + + def move_next(self) -> None: + self._position += 1 + + def move_by(self, steps: int) -> None: + self._position += steps + + def move_previous(self) -> None: + self._position -= 1 + + def is_word(self, word: str | None) -> bool: + if word is None: + return False + return self._text[self._position:self._position + len(word)] == word + + def word_continuation(self, word: str) -> bool: + next_pos = self._position + len(word) + if next_pos >= len(self._text): + return False + next_char = self._text[next_pos] + return next_char in StringUtils.word_valid_chars diff --git a/flowquery-py/src/tokenization/symbol.py b/flowquery-py/src/tokenization/symbol.py new file mode 100644 index 0000000..a8ecc7d --- /dev/null +++ b/flowquery-py/src/tokenization/symbol.py @@ -0,0 +1,19 @@ +"""Symbol enumeration for FlowQuery tokenization.""" + +from enum import Enum + + +class Symbol(Enum): + """Enumeration of all symbols in FlowQuery.""" + + LEFT_PARENTHESIS = "(" + RIGHT_PARENTHESIS = ")" + COMMA = "," + DOT = "." + COLON = ":" + WHITESPACE = "" + OPENING_BRACE = "{" + CLOSING_BRACE = "}" + OPENING_BRACKET = "[" + CLOSING_BRACKET = "]" + BACKTICK = "`" diff --git a/flowquery-py/src/tokenization/token.py b/flowquery-py/src/tokenization/token.py new file mode 100644 index 0000000..e44a8b1 --- /dev/null +++ b/flowquery-py/src/tokenization/token.py @@ -0,0 +1,659 @@ +"""Represents a single token in the FlowQuery language.""" + +from __future__ import annotations +from typing import TYPE_CHECKING, Optional, Any + +from .token_type import TokenType +from .keyword import Keyword +from .operator import Operator +from .symbol import Symbol +from ..utils.string_utils import StringUtils + +if TYPE_CHECKING: + from ..parsing.ast_node import ASTNode + + +class Token: + """Represents a single token in the FlowQuery language. + + Tokens are the atomic units of lexical analysis, produced by the tokenizer + and consumed by the parser. Each token has a type (keyword, operator, identifier, etc.) + and an optional value. + + Example: + with_token = Token.WITH + ident_token = Token.IDENTIFIER("myVar") + num_token = Token.NUMBER("42") + """ + + def __init__(self, type_: TokenType, value: Optional[str] = None): + """Creates a new Token instance. + + Args: + type_: The type of the token + value: The optional value associated with the token + """ + self._position: int = -1 + self._type = type_ + self._value = value + self._case_sensitive_value: Optional[str] = None + self._can_be_identifier = StringUtils.can_be_identifier(value or "") + + def equals(self, other: Token) -> bool: + """Checks if this token equals another token. + + Args: + other: The token to compare against + + Returns: + True if tokens are equal, False otherwise + """ + if self._type == TokenType.IDENTIFIER and other.type == TokenType.IDENTIFIER: + return True # Identifier values are not compared + return self._type == other.type and self._value == other.value + + @property + def position(self) -> int: + return self._position + + @position.setter + def position(self, value: int) -> None: + self._position = value + + @property + def type(self) -> TokenType: + return self._type + + @property + def value(self) -> Optional[str]: + return self._case_sensitive_value or self._value + + @property + def case_sensitive_value(self) -> Optional[str]: + return self._case_sensitive_value + + @case_sensitive_value.setter + def case_sensitive_value(self, value: str) -> None: + self._case_sensitive_value = value + + @property + def can_be_identifier(self) -> bool: + return self._can_be_identifier + + @property + def node(self) -> ASTNode: + from ..parsing.token_to_node import TokenToNode + return TokenToNode.convert(self) + + def __str__(self) -> str: + return f"{self._type.value} {self._value}" + + # Comment tokens + + @staticmethod + def COMMENT(comment: str) -> Token: + return Token(TokenType.COMMENT, comment) + + def is_comment(self) -> bool: + return self._type == TokenType.COMMENT + + # Identifier token + + @staticmethod + def IDENTIFIER(value: str) -> Token: + return Token(TokenType.IDENTIFIER, value) + + def is_identifier(self) -> bool: + return self._type == TokenType.IDENTIFIER or self._type == TokenType.BACKTICK_STRING + + # String token + + @staticmethod + def STRING(value: str, quote_char: str = '"') -> Token: + unquoted = StringUtils.unquote(value) + unescaped = StringUtils.remove_escaped_quotes(unquoted, quote_char) + return Token(TokenType.STRING, unescaped) + + def is_string(self) -> bool: + return self._type == TokenType.STRING or self._type == TokenType.BACKTICK_STRING + + @staticmethod + def BACKTICK_STRING(value: str, quote_char: str = '"') -> Token: + unquoted = StringUtils.unquote(value) + unescaped = StringUtils.remove_escaped_quotes(unquoted, quote_char) + return Token(TokenType.BACKTICK_STRING, unescaped) + + @staticmethod + def F_STRING(value: str, quote_char: str = '"') -> Token: + unquoted = StringUtils.unquote(value) + unescaped = StringUtils.remove_escaped_quotes(unquoted, quote_char) + fstring = StringUtils.remove_escaped_braces(unescaped) + return Token(TokenType.F_STRING, fstring) + + def is_f_string(self) -> bool: + return self._type == TokenType.F_STRING + + # Number token + + @staticmethod + def NUMBER(value: str) -> Token: + return Token(TokenType.NUMBER, value) + + def is_number(self) -> bool: + return self._type == TokenType.NUMBER + + # Boolean token + + @staticmethod + def BOOLEAN(value: str) -> Token: + return Token(TokenType.BOOLEAN, value) + + def is_boolean(self) -> bool: + return self._type == TokenType.BOOLEAN and self._value in ("TRUE", "FALSE") + + # Symbol tokens + + @classmethod + @property + def LEFT_PARENTHESIS(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.LEFT_PARENTHESIS.value) + + def is_left_parenthesis(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.LEFT_PARENTHESIS.value + + @classmethod + @property + def RIGHT_PARENTHESIS(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.RIGHT_PARENTHESIS.value) + + def is_right_parenthesis(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.RIGHT_PARENTHESIS.value + + @classmethod + @property + def COMMA(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.COMMA.value) + + def is_comma(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.COMMA.value + + @classmethod + @property + def DOT(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.DOT.value) + + def is_dot(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.DOT.value + + @classmethod + @property + def COLON(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.COLON.value) + + def is_colon(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.COLON.value + + @classmethod + @property + def OPENING_BRACE(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.OPENING_BRACE.value) + + def is_opening_brace(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.OPENING_BRACE.value + + @classmethod + @property + def CLOSING_BRACE(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.CLOSING_BRACE.value) + + def is_closing_brace(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.CLOSING_BRACE.value + + @classmethod + @property + def OPENING_BRACKET(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.OPENING_BRACKET.value) + + def is_opening_bracket(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.OPENING_BRACKET.value + + @classmethod + @property + def CLOSING_BRACKET(cls) -> Token: + return Token(TokenType.SYMBOL, Symbol.CLOSING_BRACKET.value) + + def is_closing_bracket(self) -> bool: + return self._type == TokenType.SYMBOL and self._value == Symbol.CLOSING_BRACKET.value + + # Whitespace token + + @classmethod + @property + def WHITESPACE(cls) -> Token: + return Token(TokenType.WHITESPACE) + + def is_whitespace(self) -> bool: + return self._type == TokenType.WHITESPACE + + # Operator tokens + + def is_operator(self) -> bool: + return self._type == TokenType.OPERATOR + + def is_unary_operator(self) -> bool: + return self._type == TokenType.UNARY_OPERATOR + + @classmethod + @property + def ADD(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.ADD.value) + + def is_add(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.ADD.value + + @classmethod + @property + def SUBTRACT(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.SUBTRACT.value) + + def is_subtract(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.SUBTRACT.value + + def is_negation(self) -> bool: + return self.is_subtract() + + @classmethod + @property + def MULTIPLY(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.MULTIPLY.value) + + def is_multiply(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.MULTIPLY.value + + @classmethod + @property + def DIVIDE(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.DIVIDE.value) + + def is_divide(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.DIVIDE.value + + @classmethod + @property + def EXPONENT(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.EXPONENT.value) + + def is_exponent(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.EXPONENT.value + + @classmethod + @property + def MODULO(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.MODULO.value) + + def is_modulo(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.MODULO.value + + @classmethod + @property + def EQUALS(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.EQUALS.value) + + def is_equals(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.EQUALS.value + + @classmethod + @property + def NOT_EQUALS(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.NOT_EQUALS.value) + + def is_not_equals(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.NOT_EQUALS.value + + @classmethod + @property + def LESS_THAN(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.LESS_THAN.value) + + def is_less_than(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.LESS_THAN.value + + @classmethod + @property + def LESS_THAN_OR_EQUAL(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.LESS_THAN_OR_EQUAL.value) + + def is_less_than_or_equal(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.LESS_THAN_OR_EQUAL.value + + @classmethod + @property + def GREATER_THAN(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.GREATER_THAN.value) + + def is_greater_than(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.GREATER_THAN.value + + @classmethod + @property + def GREATER_THAN_OR_EQUAL(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.GREATER_THAN_OR_EQUAL.value) + + def is_greater_than_or_equal(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.GREATER_THAN_OR_EQUAL.value + + @classmethod + @property + def AND(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.AND.value) + + def is_and(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.AND.value + + @classmethod + @property + def OR(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.OR.value) + + def is_or(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.OR.value + + @classmethod + @property + def NOT(cls) -> Token: + return Token(TokenType.UNARY_OPERATOR, Operator.NOT.value) + + def is_not(self) -> bool: + return self._type == TokenType.UNARY_OPERATOR and self._value == Operator.NOT.value + + @classmethod + @property + def IS(cls) -> Token: + return Token(TokenType.OPERATOR, Operator.IS.value) + + def is_is(self) -> bool: + return self._type == TokenType.OPERATOR and self._value == Operator.IS.value + + # Keyword tokens + + def is_keyword(self) -> bool: + return self._type == TokenType.KEYWORD + + @classmethod + @property + def WITH(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.WITH.value) + + def is_with(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.WITH.value + + @classmethod + @property + def RETURN(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.RETURN.value) + + def is_return(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.RETURN.value + + @classmethod + @property + def LOAD(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.LOAD.value) + + def is_load(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.LOAD.value + + @classmethod + @property + def CALL(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.CALL.value) + + def is_call(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.CALL.value + + @classmethod + @property + def YIELD(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.YIELD.value) + + def is_yield(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.YIELD.value + + @classmethod + @property + def JSON(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.JSON.value) + + def is_json(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.JSON.value + + @classmethod + @property + def CSV(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.CSV.value) + + def is_csv(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.CSV.value + + @classmethod + @property + def TEXT(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.TEXT.value) + + def is_text(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.TEXT.value + + @classmethod + @property + def FROM(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.FROM.value) + + def is_from(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.FROM.value + + @classmethod + @property + def HEADERS(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.HEADERS.value) + + def is_headers(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.HEADERS.value + + @classmethod + @property + def POST(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.POST.value) + + def is_post(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.POST.value + + @classmethod + @property + def UNWIND(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.UNWIND.value) + + def is_unwind(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.UNWIND.value + + @classmethod + @property + def MATCH(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.MATCH.value) + + def is_match(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.MATCH.value + + @classmethod + @property + def AS(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.AS.value) + + def is_as(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.AS.value + + @classmethod + @property + def WHERE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.WHERE.value) + + def is_where(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.WHERE.value + + @classmethod + @property + def MERGE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.MERGE.value) + + def is_merge(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.MERGE.value + + @classmethod + @property + def CREATE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.CREATE.value) + + def is_create(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.CREATE.value + + @classmethod + @property + def VIRTUAL(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.VIRTUAL.value) + + def is_virtual(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.VIRTUAL.value + + @classmethod + @property + def DELETE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.DELETE.value) + + def is_delete(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.DELETE.value + + @classmethod + @property + def SET(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.SET.value) + + def is_set(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.SET.value + + @classmethod + @property + def REMOVE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.REMOVE.value) + + def is_remove(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.REMOVE.value + + @classmethod + @property + def CASE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.CASE.value) + + def is_case(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.CASE.value + + @classmethod + @property + def WHEN(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.WHEN.value) + + def is_when(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.WHEN.value + + @classmethod + @property + def THEN(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.THEN.value) + + def is_then(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.THEN.value + + @classmethod + @property + def ELSE(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.ELSE.value) + + def is_else(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.ELSE.value + + @classmethod + @property + def END(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.END.value) + + def is_end(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.END.value + + @classmethod + @property + def NULL(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.NULL.value) + + def is_null(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.NULL.value + + @classmethod + @property + def IN(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.IN.value) + + def is_in(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.IN.value + + @classmethod + @property + def PIPE(cls) -> Token: + return Token(TokenType.KEYWORD, Operator.PIPE.value) + + def is_pipe(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Operator.PIPE.value + + @classmethod + @property + def DISTINCT(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.DISTINCT.value) + + def is_distinct(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.DISTINCT.value + + @classmethod + @property + def LIMIT(cls) -> Token: + return Token(TokenType.KEYWORD, Keyword.LIMIT.value) + + def is_limit(self) -> bool: + return self._type == TokenType.KEYWORD and self._value == Keyword.LIMIT.value + + # End of file token + + @classmethod + @property + def EOF(cls) -> Token: + return Token(TokenType.EOF) + + def is_eof(self) -> bool: + return self._type == TokenType.EOF + + # Other utility methods + + def is_operand(self) -> bool: + return self.is_number() or self.is_boolean() or self.is_string() or self.is_null() + + def is_whitespace_or_comment(self) -> bool: + return self.is_whitespace() or self.is_comment() + + def is_symbol(self) -> bool: + return self._type == TokenType.SYMBOL + + # Static class method lookup via string + @staticmethod + def method(name: str) -> Optional[Token]: + name_upper = name.upper() + if hasattr(Token, name_upper): + attr = getattr(Token, name_upper) + if isinstance(attr, Token): + return attr + return None diff --git a/flowquery-py/src/tokenization/token_mapper.py b/flowquery-py/src/tokenization/token_mapper.py new file mode 100644 index 0000000..0c0143a --- /dev/null +++ b/flowquery-py/src/tokenization/token_mapper.py @@ -0,0 +1,52 @@ +"""Maps string values to tokens using a Trie for efficient lookup.""" + +from typing import Optional + +from .token import Token +from .trie import Trie + + +class TokenMapper: + """Maps string values to tokens using a Trie for efficient lookup. + + Takes an enum of keywords, operators, or symbols and builds a trie + for fast token matching during tokenization. + + Example: + mapper = TokenMapper(Keyword) + token = mapper.map("WITH") + """ + + def __init__(self, enum_class): + """Creates a TokenMapper from an enum of token values. + + Args: + enum_class: An enum class containing token values + """ + self._trie = Trie() + self._enum = enum_class + + for member in enum_class: + token = Token.method(member.name) + if token is not None and token.value is not None: + self._trie.insert(token) + + def map(self, value: str) -> Optional[Token]: + """Maps a string value to its corresponding token. + + Args: + value: The string value to map + + Returns: + The matched token, or None if no match found + """ + return self._trie.find(value) + + @property + def last_found(self) -> Optional[str]: + """Gets the last matched string from the most recent map operation. + + Returns: + The last found string, or None if no match + """ + return self._trie.last_found diff --git a/flowquery-py/src/tokenization/token_type.py b/flowquery-py/src/tokenization/token_type.py new file mode 100644 index 0000000..40e90a9 --- /dev/null +++ b/flowquery-py/src/tokenization/token_type.py @@ -0,0 +1,21 @@ +"""Token type enumeration for FlowQuery tokenization.""" + +from enum import Enum + + +class TokenType(Enum): + """Enumeration of all token types in FlowQuery.""" + + KEYWORD = "KEYWORD" + BOOLEAN = "BOOLEAN" + OPERATOR = "OPERATOR" + UNARY_OPERATOR = "UNARY_OPERATOR" + IDENTIFIER = "IDENTIFIER" + STRING = "STRING" + F_STRING = "F-STRING" + BACKTICK_STRING = "BACKTICK_STRING" + NUMBER = "NUMBER" + SYMBOL = "SYMBOL" + WHITESPACE = "WHITESPACE" + COMMENT = "COMMENT" + EOF = "EOF" diff --git a/flowquery-py/src/tokenization/tokenizer.py b/flowquery-py/src/tokenization/tokenizer.py new file mode 100644 index 0000000..b680649 --- /dev/null +++ b/flowquery-py/src/tokenization/tokenizer.py @@ -0,0 +1,214 @@ +"""Tokenizes FlowQuery input strings into a sequence of tokens.""" + +from typing import List, Optional, Iterator, Callable + +from ..utils.string_utils import StringUtils +from .keyword import Keyword +from .operator import Operator +from .string_walker import StringWalker +from .symbol import Symbol +from .token import Token +from .token_mapper import TokenMapper + + +class Tokenizer: + """Tokenizes FlowQuery input strings into a sequence of tokens. + + The tokenizer performs lexical analysis, breaking down the input text into + meaningful tokens such as keywords, identifiers, operators, strings, numbers, + and symbols. It handles comments, whitespace, and f-strings. + + Example: + tokenizer = Tokenizer("WITH x = 1 RETURN x") + tokens = tokenizer.tokenize() + """ + + def __init__(self, input_: str): + """Creates a new Tokenizer instance for the given input. + + Args: + input_: The FlowQuery input string to tokenize + """ + self._walker = StringWalker(input_) + self._keywords = TokenMapper(Keyword) + self._symbols = TokenMapper(Symbol) + self._operators = TokenMapper(Operator) + + def tokenize(self) -> List[Token]: + """Tokenizes the input string into an array of tokens. + + Returns: + An array of Token objects representing the tokenized input + + Raises: + ValueError: If an unrecognized token is encountered + """ + tokens: List[Token] = [] + last: Optional[Token] = None + + while not self._walker.is_at_end: + tokens.extend(self._f_string()) + last = self._get_last_non_whitespace_or_non_comment_token(tokens) or last + token = self._get_next_token(last) + if token is None: + raise ValueError(f"Unrecognized token at position {self._walker.position}") + token.position = self._walker.position + tokens.append(token) + + return tokens + + def _get_last_non_whitespace_or_non_comment_token(self, tokens: List[Token]) -> Optional[Token]: + if len(tokens) == 0: + return None + if not tokens[-1].is_whitespace_or_comment(): + return tokens[-1] + return None + + def _get_next_token(self, last: Optional[Token] = None) -> Optional[Token]: + if self._walker.is_at_end: + return Token.EOF + return ( + self._comment() or + self._whitespace() or + self._lookup(self._keywords) or + self._lookup(self._operators, last, self._skip_minus) or + self._boolean() or + self._identifier() or + self._string() or + self._number() or + self._lookup(self._symbols) + ) + + def _comment(self) -> Optional[Token]: + start_position = self._walker.position + if self._walker.check_for_single_comment() or self._walker.check_for_multi_line_comment(): + uncommented = StringUtils.uncomment(self._walker.get_string(start_position)) + return Token.COMMENT(uncommented) + return None + + def _boolean(self) -> Optional[Token]: + start_position = self._walker.position + if self._walker.check_for_string("TRUE"): + return Token.BOOLEAN(self._walker.get_string(start_position).upper()) + if self._walker.check_for_string("FALSE"): + return Token.BOOLEAN(self._walker.get_string(start_position).upper()) + return None + + def _identifier(self) -> Optional[Token]: + start_position = self._walker.position + if self._walker.check_for_under_score() or self._walker.check_for_letter(): + while (not self._walker.is_at_end and + (self._walker.check_for_letter() or + self._walker.check_for_digit() or + self._walker.check_for_under_score())): + pass + return Token.IDENTIFIER(self._walker.get_string(start_position)) + return None + + def _string(self) -> Optional[Token]: + start_position = self._walker.position + quote_char = self._walker.check_for_quote() + if quote_char is None: + return None + + while not self._walker.is_at_end: + if self._walker.escaped(quote_char): + self._walker.move_next() + self._walker.move_next() + continue + if self._walker.check_for_string(quote_char): + value = self._walker.get_string(start_position) + if quote_char == Symbol.BACKTICK.value: + return Token.BACKTICK_STRING(value, quote_char) + return Token.STRING(value, quote_char) + self._walker.move_next() + + raise ValueError(f"Unterminated string at position {start_position}") + + def _f_string(self) -> Iterator[Token]: + if not self._walker.check_for_f_string_start(): + return + + self._walker.move_next() # skip the f + position = self._walker.position + quote_char = self._walker.check_for_quote() + if quote_char is None: + return + + while not self._walker.is_at_end: + if self._walker.escaped(quote_char) or self._walker.escaped_brace(): + self._walker.move_next() + self._walker.move_next() + continue + + if self._walker.opening_brace(): + yield Token.F_STRING(self._walker.get_string(position), quote_char) + position = self._walker.position + yield Token.OPENING_BRACE + self._walker.move_next() # skip the opening brace + position = self._walker.position + + while not self._walker.is_at_end and not self._walker.closing_brace(): + token = self._get_next_token() + if token is not None: + yield token + else: + break + if self._walker.closing_brace(): + yield Token.CLOSING_BRACE + self._walker.move_next() # skip the closing brace + position = self._walker.position + break + + if self._walker.check_for_string(quote_char): + yield Token.F_STRING(self._walker.get_string(position), quote_char) + return + + self._walker.move_next() + + def _whitespace(self) -> Optional[Token]: + found_whitespace = False + while not self._walker.is_at_end and self._walker.check_for_whitespace(): + self._walker.move_next() + found_whitespace = True + return Token.WHITESPACE if found_whitespace else None + + def _number(self) -> Optional[Token]: + start_position = self._walker.position + if self._walker.check_for_string("-") or self._walker.check_for_digit(): + while not self._walker.is_at_end and self._walker.check_for_digit(): + pass + if self._walker.check_for_string(Symbol.DOT.value): + decimal_digits = 0 + while not self._walker.is_at_end and self._walker.check_for_digit(): + decimal_digits += 1 + if decimal_digits == 0: + self._walker.move_previous() + number_str = self._walker.get_string(start_position) + return Token.NUMBER(number_str) + return None + + def _lookup( + self, + mapper: TokenMapper, + last: Optional[Token] = None, + skip: Optional[Callable[[Optional[Token], Token], bool]] = None + ) -> Optional[Token]: + token = mapper.map(self._walker.get_remaining_string()) + if token is not None and token.value is not None: + if token.can_be_identifier and self._walker.word_continuation(token.value): + return None + if skip and last and skip(last, token): + return None + self._walker.move_by(len(token.value)) + if mapper.last_found is not None: + token.case_sensitive_value = mapper.last_found + return token + return None + + def _skip_minus(self, last: Optional[Token], current: Token) -> bool: + if last is None: + return False + if (last.is_keyword() or last.is_comma() or last.is_colon()) and current.is_negation(): + return True + return False diff --git a/flowquery-py/src/tokenization/trie.py b/flowquery-py/src/tokenization/trie.py new file mode 100644 index 0000000..616fe22 --- /dev/null +++ b/flowquery-py/src/tokenization/trie.py @@ -0,0 +1,124 @@ +"""Trie (prefix tree) data structure for efficient keyword and operator lookup.""" + +from __future__ import annotations +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from .token import Token + + +class TrieNode: + """Represents a node in a Trie data structure. + + Each node can have children nodes (one per character) and may contain a token + if the path to this node represents a complete word. + """ + + def __init__(self): + self._children: dict[str, TrieNode] = {} + self._token: Optional[Token] = None + + def map(self, char: str) -> TrieNode: + if char not in self._children: + self._children[char] = TrieNode() + return self._children[char] + + def retrieve(self, char: str) -> Optional[TrieNode]: + return self._children.get(char) + + @property + def token(self) -> Optional[Token]: + return self._token + + @token.setter + def token(self, token: Token) -> None: + self._token = token + + def is_end_of_word(self) -> bool: + return self._token is not None + + def no_children(self) -> bool: + return len(self._children) == 0 + + +class Trie: + """Trie (prefix tree) data structure for efficient keyword and operator lookup. + + Used during tokenization to quickly match input strings against known keywords + and operators. Supports case-insensitive matching and tracks the longest match found. + + Example: + trie = Trie() + trie.insert(Token.WITH) + found = trie.find("WITH") + """ + + def __init__(self): + self._root = TrieNode() + self._max_length = 0 + self._last_found: Optional[str] = None + + def insert(self, token: Token) -> None: + """Inserts a token into the trie. + + Args: + token: The token to insert + + Raises: + ValueError: If the token value is None or empty + """ + if token.value is None or len(token.value) == 0: + raise ValueError("Token value cannot be null or empty") + + current_node = self._root + for char in token.value: + current_node = current_node.map(char.lower()) + + if len(token.value) > self._max_length: + self._max_length = len(token.value) + + current_node.token = token + + def find(self, value: str) -> Optional[Token]: + """Finds a token by searching for the longest matching prefix in the trie. + + Args: + value: The string value to search for + + Returns: + The token if found, None otherwise + """ + if len(value) == 0: + return None + + index = 0 + current: Optional[TrieNode] = None + found: Optional[Token] = None + self._last_found = None + + while True: + next_node = (current or self._root).retrieve(value[index].lower()) + if next_node is None: + break + current = next_node + if current.is_end_of_word(): + found = current.token + self._last_found = value[:index + 1] + index += 1 + if index >= len(value) or index > self._max_length: + break + + if current is not None and current.is_end_of_word(): + found = current.token + self._last_found = value[:index] + + return found + + @property + def last_found(self) -> Optional[str]: + """Gets the last matched string from the most recent find operation. + + Returns: + The last found string, or None if no match was found + """ + return self._last_found diff --git a/flowquery-py/src/utils/__init__.py b/flowquery-py/src/utils/__init__.py new file mode 100644 index 0000000..942325e --- /dev/null +++ b/flowquery-py/src/utils/__init__.py @@ -0,0 +1,6 @@ +"""Utils module for FlowQuery.""" + +from .string_utils import StringUtils +from .object_utils import ObjectUtils + +__all__ = ["StringUtils", "ObjectUtils"] diff --git a/flowquery-py/src/utils/object_utils.py b/flowquery-py/src/utils/object_utils.py new file mode 100644 index 0000000..1bc19f2 --- /dev/null +++ b/flowquery-py/src/utils/object_utils.py @@ -0,0 +1,20 @@ +"""Utility class for object-related operations.""" + +from typing import Any, List, Type + + +class ObjectUtils: + """Utility class for object-related operations.""" + + @staticmethod + def is_instance_of_any(obj: Any, classes: List[Type]) -> bool: + """Checks if an object is an instance of any of the provided classes. + + Args: + obj: The object to check + classes: Array of class constructors to test against + + Returns: + True if the object is an instance of any class, False otherwise + """ + return any(isinstance(obj, cls) for cls in classes) diff --git a/flowquery-py/src/utils/string_utils.py b/flowquery-py/src/utils/string_utils.py new file mode 100644 index 0000000..e623ab9 --- /dev/null +++ b/flowquery-py/src/utils/string_utils.py @@ -0,0 +1,113 @@ +"""Utility class for string manipulation and validation.""" + + +class StringUtils: + """Utility class for string manipulation and validation. + + Provides methods for handling quoted strings, comments, escape sequences, + and identifier validation. + """ + + quotes = ['"', "'", '`'] + letters = 'abcdefghijklmnopqrstuvwxyz' + digits = '0123456789' + whitespace = ' \t\n\r' + word_valid_chars = letters + letters.upper() + digits + '_' + + @staticmethod + def unquote(s: str) -> str: + """Removes surrounding quotes from a string. + + Args: + s: The string to unquote + + Returns: + The unquoted string + """ + if len(s) == 0: + return s + if len(s) == 1 and s in StringUtils.quotes: + return '' + first = s[0] + last = s[-1] + if first in StringUtils.quotes and first == last: + return s[1:-1] + if last in StringUtils.quotes and first != last: + return s[:-1] + if first in StringUtils.quotes and first != last: + return s[1:] + return s + + @staticmethod + def uncomment(s: str) -> str: + """Removes comment markers from a string. + + Args: + s: The comment string + + Returns: + The string without comment markers + """ + if len(s) < 2: + return s + if s[0] == '/' and s[1] == '/': + return s[2:] + if s[0] == '/' and s[1] == '*' and s[-2] == '*' and s[-1] == '/': + return s[2:-2] + return s + + @staticmethod + def remove_escaped_quotes(s: str, quote_char: str) -> str: + """Removes escape sequences before quotes in a string. + + Args: + s: The string to process + quote_char: The quote character that was escaped + + Returns: + The string with escape sequences removed + """ + unescaped = '' + i = 0 + while i < len(s): + if i < len(s) - 1 and s[i] == '\\' and s[i + 1] == quote_char: + i += 1 + unescaped += s[i] + i += 1 + return unescaped + + @staticmethod + def remove_escaped_braces(s: str) -> str: + """Removes escaped braces ({{ and }}) from f-strings. + + Args: + s: The string to process + + Returns: + The string with escaped braces resolved + """ + unescaped = '' + i = 0 + while i < len(s): + if i < len(s) - 1 and ((s[i] == '{' and s[i + 1] == '{') or (s[i] == '}' and s[i + 1] == '}')): + i += 1 + unescaped += s[i] + i += 1 + return unescaped + + @staticmethod + def can_be_identifier(s: str) -> bool: + """Checks if a string is a valid identifier. + + Args: + s: The string to validate + + Returns: + True if the string can be used as an identifier, false otherwise + """ + lower = s.lower() + if len(lower) == 0: + return False + if lower[0] not in StringUtils.letters and lower[0] != '_': + return False + return all(char in StringUtils.word_valid_chars for char in lower) diff --git a/flowquery-py/tests/__init__.py b/flowquery-py/tests/__init__.py new file mode 100644 index 0000000..e9073bf --- /dev/null +++ b/flowquery-py/tests/__init__.py @@ -0,0 +1 @@ +"""Tests package for FlowQuery.""" diff --git a/flowquery-py/tests/compute/__init__.py b/flowquery-py/tests/compute/__init__.py new file mode 100644 index 0000000..7fd42c5 --- /dev/null +++ b/flowquery-py/tests/compute/__init__.py @@ -0,0 +1 @@ +"""Compute tests package.""" diff --git a/flowquery-py/tests/compute/test_runner.py b/flowquery-py/tests/compute/test_runner.py new file mode 100644 index 0000000..a939e79 --- /dev/null +++ b/flowquery-py/tests/compute/test_runner.py @@ -0,0 +1,1335 @@ +"""Tests for the FlowQuery Runner.""" + +import pytest +from typing import AsyncIterator +from flowquery.compute.runner import Runner +from flowquery.parsing.functions.async_function import AsyncFunction +from flowquery.parsing.functions.function_metadata import FunctionDef + + +# Test classes for CALL operation tests +@FunctionDef({ + "description": "Asynchronous function for testing CALL operation", + "category": "async", + "parameters": [], + "output": {"description": "Yields test values", "type": "any"}, +}) +class _CallTestFunction(AsyncFunction): + """Test async function for CALL operation.""" + + def __init__(self): + super().__init__("calltestfunction") + self._expected_parameter_count = 0 + + async def generate(self) -> AsyncIterator: + yield {"result": 1, "dummy": "a"} + yield {"result": 2, "dummy": "b"} + yield {"result": 3, "dummy": "c"} + + +@FunctionDef({ + "description": "Asynchronous function for testing CALL operation with no yielded expressions", + "category": "async", + "parameters": [], + "output": {"description": "Yields test values", "type": "any"}, +}) +class _CallTestFunctionNoObject(AsyncFunction): + """Test async function for CALL operation without object output.""" + + def __init__(self): + super().__init__("calltestfunctionnoobject") + self._expected_parameter_count = 0 + + async def generate(self) -> AsyncIterator: + yield 1 + yield 2 + yield 3 + + +class TestRunner: + """Test cases for the Runner class.""" + + @pytest.mark.asyncio + async def test_return(self): + """Test return operation.""" + runner = Runner("return 1 + 2 as sum") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 3} + + @pytest.mark.asyncio + async def test_return_with_multiple_expressions(self): + """Test return with multiple expressions.""" + runner = Runner("return 1 + 2 as sum, 3 + 4 as sum2") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 3, "sum2": 7} + + @pytest.mark.asyncio + async def test_unwind_and_return(self): + """Test unwind and return.""" + runner = Runner("unwind [1, 2, 3] as num return num") + await runner.run() + results = runner.results + assert len(results) == 3 + assert results[0] == {"num": 1} + assert results[1] == {"num": 2} + assert results[2] == {"num": 3} + + @pytest.mark.asyncio + async def test_load_and_return(self): + """Test load and return.""" + runner = Runner( + 'load json from "https://jsonplaceholder.typicode.com/todos" as todo return todo' + ) + await runner.run() + results = runner.results + assert len(results) > 0 + + @pytest.mark.asyncio + async def test_load_with_post_and_return(self): + """Test load with post and return.""" + runner = Runner( + 'load json from "https://jsonplaceholder.typicode.com/posts" post {userId: 1} as data return data' + ) + await runner.run() + results = runner.results + assert len(results) == 1 + + @pytest.mark.asyncio + async def test_load_which_should_throw_error(self): + """Test load which should throw error.""" + runner = Runner('load json from "http://non_existing" as data return data') + with pytest.raises(Exception) as exc_info: + await runner.run() + assert "non_existing" in str(exc_info.value).lower() or "failed" in str(exc_info.value).lower() + + @pytest.mark.asyncio + async def test_aggregated_return(self): + """Test aggregated return.""" + runner = Runner( + "unwind [1, 1, 2, 2] as i unwind [1, 2, 3, 4] as j return i, sum(j) as sum" + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "sum": 20} + assert results[1] == {"i": 2, "sum": 20} + + @pytest.mark.asyncio + async def test_aggregated_return_with_string(self): + """Test aggregated return with string.""" + runner = Runner( + 'unwind [1, 1, 2, 2] as i unwind ["a", "b", "c", "d"] as j return i, sum(j) as sum' + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "sum": "abcdabcd"} + assert results[1] == {"i": 2, "sum": "abcdabcd"} + + @pytest.mark.asyncio + async def test_aggregated_return_with_object(self): + """Test aggregated return with object.""" + runner = Runner( + "unwind [1, 1, 2, 2] as i unwind [1, 2, 3, 4] as j return i, {sum: sum(j)} as sum" + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "sum": {"sum": 20}} + assert results[1] == {"i": 2, "sum": {"sum": 20}} + + @pytest.mark.asyncio + async def test_aggregated_return_with_array(self): + """Test aggregated return with array.""" + runner = Runner( + "unwind [1, 1, 2, 2] as i unwind [1, 2, 3, 4] as j return i, [sum(j)] as sum" + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "sum": [20]} + assert results[1] == {"i": 2, "sum": [20]} + + @pytest.mark.asyncio + async def test_aggregated_return_with_multiple_aggregates(self): + """Test aggregated return with multiple aggregates.""" + runner = Runner( + "unwind [1, 1, 2, 2] as i unwind [1, 2, 3, 4] as j return i, sum(j) as sum, avg(j) as avg" + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "sum": 20, "avg": 2.5} + assert results[1] == {"i": 2, "sum": 20, "avg": 2.5} + + @pytest.mark.asyncio + async def test_avg_with_null(self): + """Test avg with null.""" + runner = Runner("return avg(null) as avg") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"avg": None} + + @pytest.mark.asyncio + async def test_sum_with_null(self): + """Test sum with null.""" + runner = Runner("return sum(null) as sum") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": None} + + @pytest.mark.asyncio + async def test_avg_with_one_value(self): + """Test avg with one value.""" + runner = Runner("return avg(1) as avg") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"avg": 1} + + @pytest.mark.asyncio + async def test_with_and_return(self): + """Test with and return.""" + runner = Runner("with 1 as a return a") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"a": 1} + + def test_nested_aggregate_functions(self): + """Test nested aggregate functions throw error.""" + with pytest.raises(Exception, match="Aggregate functions cannot be nested"): + Runner("unwind [1, 2, 3, 4] as i return sum(sum(i)) as sum") + + @pytest.mark.asyncio + async def test_with_and_return_with_unwind(self): + """Test with and return with unwind.""" + runner = Runner("with [1, 2, 3] as a unwind a as b return b as renamed") + await runner.run() + results = runner.results + assert len(results) == 3 + assert results[0] == {"renamed": 1} + assert results[1] == {"renamed": 2} + assert results[2] == {"renamed": 3} + + @pytest.mark.asyncio + async def test_predicate_function(self): + """Test predicate function.""" + runner = Runner("RETURN sum(n in [1, 2, 3] | n where n > 1) as sum") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 5} + + @pytest.mark.asyncio + async def test_predicate_without_where(self): + """Test predicate without where.""" + runner = Runner("RETURN sum(n in [1, 2, 3] | n) as sum") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 6} + + @pytest.mark.asyncio + async def test_predicate_with_return_expression(self): + """Test predicate with return expression.""" + runner = Runner("RETURN sum(n in [1+2+3, 2, 3] | n^2) as sum") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 49} + + @pytest.mark.asyncio + async def test_range_function(self): + """Test range function.""" + runner = Runner("RETURN range(1, 3) as range") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"range": [1, 2, 3]} + + @pytest.mark.asyncio + async def test_range_function_with_unwind_and_case(self): + """Test range function with unwind and case.""" + runner = Runner( + "unwind range(1, 3) as num return case when num > 1 then num else null end as ret" + ) + await runner.run() + results = runner.results + assert len(results) == 3 + assert results[0] == {"ret": None} + assert results[1] == {"ret": 2} + assert results[2] == {"ret": 3} + + @pytest.mark.asyncio + async def test_size_function(self): + """Test size function.""" + runner = Runner("RETURN size([1, 2, 3]) as size") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"size": 3} + + @pytest.mark.asyncio + async def test_rand_and_round_functions(self): + """Test rand and round functions.""" + runner = Runner("RETURN round(rand() * 10) as rand") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0]["rand"] <= 10 + + @pytest.mark.asyncio + async def test_split_function(self): + """Test split function.""" + runner = Runner('RETURN split("a,b,c", ",") as split') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"split": ["a", "b", "c"]} + + @pytest.mark.asyncio + async def test_f_string(self): + """Test f-string.""" + runner = Runner( + 'with range(1,3) as numbers RETURN f"hello {sum(n in numbers | n)}" as f' + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"f": "hello 6"} + + @pytest.mark.asyncio + async def test_aggregated_with_and_return(self): + """Test aggregated with and return.""" + runner = Runner( + """ + unwind [1, 1, 2, 2] as i + unwind range(1, 3) as j + with i, sum(j) as sum + return i, sum + """ + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "sum": 12} + assert results[1] == {"i": 2, "sum": 12} + + @pytest.mark.asyncio + async def test_aggregated_with_using_collect_and_return(self): + """Test aggregated with using collect and return.""" + runner = Runner( + """ + unwind [1, 1, 2, 2] as i + unwind range(1, 3) as j + with i, collect(j) as collected + return i, collected + """ + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "collected": [1, 2, 3, 1, 2, 3]} + assert results[1] == {"i": 2, "collected": [1, 2, 3, 1, 2, 3]} + + @pytest.mark.asyncio + async def test_collect_distinct(self): + """Test collect distinct.""" + runner = Runner( + """ + unwind [1, 1, 2, 2] as i + unwind range(1, 3) as j + with i, collect(distinct j) as collected + return i, collected + """ + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "collected": [1, 2, 3]} + assert results[1] == {"i": 2, "collected": [1, 2, 3]} + + @pytest.mark.asyncio + async def test_collect_distinct_with_associative_array(self): + """Test collect distinct with associative array.""" + runner = Runner( + """ + unwind [1, 1, 2, 2] as i + unwind range(1, 3) as j + with i, collect(distinct {j: j}) as collected + return i, collected + """ + ) + await runner.run() + results = runner.results + assert len(results) == 2 + assert results[0] == {"i": 1, "collected": [{"j": 1}, {"j": 2}, {"j": 3}]} + assert results[1] == {"i": 2, "collected": [{"j": 1}, {"j": 2}, {"j": 3}]} + + @pytest.mark.asyncio + async def test_join_function(self): + """Test join function.""" + runner = Runner('RETURN join(["a", "b", "c"], ",") as join') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"join": "a,b,c"} + + @pytest.mark.asyncio + async def test_join_function_with_empty_array(self): + """Test join function with empty array.""" + runner = Runner('RETURN join([], ",") as join') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"join": ""} + + @pytest.mark.asyncio + async def test_tojson_function(self): + """Test tojson function.""" + runner = Runner("RETURN tojson('{\"a\": 1, \"b\": 2}') as tojson") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"tojson": {"a": 1, "b": 2}} + + @pytest.mark.asyncio + async def test_tojson_function_with_lookup(self): + """Test tojson function with lookup.""" + runner = Runner("RETURN tojson('{\"a\": 1, \"b\": 2}').a as tojson") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"tojson": 1} + + @pytest.mark.asyncio + async def test_replace_function(self): + """Test replace function.""" + runner = Runner('RETURN replace("hello", "l", "x") as replace') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"replace": "hexxo"} + + @pytest.mark.asyncio + async def test_f_string_with_escaped_braces(self): + """Test f-string with escaped braces.""" + runner = Runner( + 'with range(1,3) as numbers RETURN f"hello {{sum(n in numbers | n)}}" as f' + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"f": "hello {sum(n in numbers | n)}"} + + @pytest.mark.asyncio + async def test_predicate_function_with_collection_from_lookup(self): + """Test predicate function with collection from lookup.""" + runner = Runner("RETURN sum(n in tojson('{\"a\": [1, 2, 3]}').a | n) as sum") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 6} + + @pytest.mark.asyncio + async def test_stringify_function(self): + """Test stringify function.""" + runner = Runner("RETURN stringify({a: 1, b: 2}) as stringify") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"stringify": '{\n "a": 1,\n "b": 2\n}'} + + @pytest.mark.asyncio + async def test_associative_array_with_key_which_is_keyword(self): + """Test associative array with key which is keyword.""" + runner = Runner("RETURN {return: 1} as aa") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"aa": {"return": 1}} + + @pytest.mark.asyncio + async def test_lookup_which_is_keyword(self): + """Test lookup which is keyword.""" + runner = Runner("RETURN {return: 1}.return as aa") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"aa": 1} + + @pytest.mark.asyncio + async def test_lookup_which_is_keyword_bracket(self): + """Test lookup which is keyword with bracket notation.""" + runner = Runner('RETURN {return: 1}["return"] as aa') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"aa": 1} + + @pytest.mark.asyncio + async def test_return_with_expression_alias_which_starts_with_keyword(self): + """Test return with expression alias which starts with keyword.""" + runner = Runner('RETURN 1 as return1, ["hello", "world"] as notes') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"return1": 1, "notes": ["hello", "world"]} + + @pytest.mark.asyncio + async def test_return_with_where_clause(self): + """Test return with where clause.""" + runner = Runner("unwind range(1,100) as n with n return n where n >= 20 and n <= 30") + await runner.run() + results = runner.results + assert len(results) == 11 + assert results[0] == {"n": 20} + assert results[10] == {"n": 30} + + @pytest.mark.asyncio + async def test_return_with_where_clause_and_expression_alias(self): + """Test return with where clause and expression alias.""" + runner = Runner( + "unwind range(1,100) as n with n return n as number where n >= 20 and n <= 30" + ) + await runner.run() + results = runner.results + assert len(results) == 11 + assert results[0] == {"number": 20} + assert results[10] == {"number": 30} + + @pytest.mark.asyncio + async def test_aggregated_return_with_where_clause(self): + """Test aggregated return with where clause.""" + runner = Runner( + "unwind range(1,100) as n with n where n >= 20 and n <= 30 return sum(n) as sum" + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"sum": 275} + + @pytest.mark.asyncio + async def test_chained_aggregated_return_with_where_clause(self): + """Test chained aggregated return with where clause.""" + runner = Runner( + """ + unwind [1, 1, 2, 2] as i + unwind range(1, 4) as j + return i, sum(j) as sum + where i = 1 + """ + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"i": 1, "sum": 20} + + @pytest.mark.asyncio + async def test_predicate_function_with_collection_from_function(self): + """Test predicate function with collection from function.""" + runner = Runner( + """ + unwind range(1, 10) as i + unwind range(1, 10) as j + return i, sum(j), avg(j), sum(n in collect(j) | n) as sum + """ + ) + await runner.run() + results = runner.results + assert len(results) == 10 + assert results[0] == {"i": 1, "expr1": 55, "expr2": 5.5, "sum": 55} + + @pytest.mark.asyncio + async def test_limit(self): + """Test limit.""" + runner = Runner( + """ + unwind range(1, 10) as i + unwind range(1, 10) as j + limit 5 + return j + """ + ) + await runner.run() + results = runner.results + assert len(results) == 50 + + @pytest.mark.asyncio + async def test_range_lookup(self): + """Test range lookup.""" + runner = Runner( + """ + with range(1, 10) as numbers + return + numbers[:] as subset1, + numbers[0:3] as subset2, + numbers[:-2] as subset3 + """ + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == { + "subset1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "subset2": [1, 2, 3], + "subset3": [1, 2, 3, 4, 5, 6, 7, 8], + } + + @pytest.mark.asyncio + async def test_return_negative_number(self): + """Test return -1.""" + runner = Runner("return -1 as num") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"num": -1} + + @pytest.mark.asyncio + async def test_unwind_range_lookup(self): + """Test unwind range lookup.""" + runner = Runner( + """ + with range(1,10) as arr + unwind arr[2:-2] as a + return a + """ + ) + await runner.run() + results = runner.results + assert len(results) == 6 + assert results[0] == {"a": 3} + assert results[5] == {"a": 8} + + @pytest.mark.asyncio + async def test_range_with_size(self): + """Test range with size.""" + runner = Runner( + """ + with range(1,10) as data + return range(0, size(data)-1) as indices + """ + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"indices": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]} + + @pytest.mark.asyncio + async def test_keys_function(self): + """Test keys function.""" + runner = Runner('RETURN keys({name: "Alice", age: 30}) as keys') + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"keys": ["name", "age"]} + + @pytest.mark.asyncio + async def test_type_function(self): + """Test type function.""" + runner = Runner( + """ + RETURN type(123) as type1, + type("hello") as type2, + type([1, 2, 3]) as type3, + type({a: 1, b: 2}) as type4, + type(null) as type5 + """ + ) + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == { + "type1": "number", + "type2": "string", + "type3": "array", + "type4": "object", + "type5": "null", + } + + @pytest.mark.asyncio + async def test_equality_comparison(self): + """Test equality comparison.""" + runner = Runner( + """ + unwind range(1,10) as i + return i=5 as `isEqual`, i<>5 as `isNotEqual` + """ + ) + await runner.run() + results = runner.results + assert len(results) == 10 + for index, result in enumerate(results): + if index + 1 == 5: + assert result == {"isEqual": 1, "isNotEqual": 0} + else: + assert result == {"isEqual": 0, "isNotEqual": 1} + + @pytest.mark.asyncio + async def test_create_node_operation(self): + """Test create node operation.""" + runner = Runner( + """ + CREATE VIRTUAL (:TestPerson) AS { + with 1 as x + RETURN x + } + """ + ) + await runner.run() + results = runner.results + assert len(results) == 0 + + @pytest.mark.asyncio + async def test_create_node_and_match_operations(self): + """Test create node and match operations.""" + create = Runner( + """ + CREATE VIRTUAL (:MatchPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ) + await create.run() + match = Runner("MATCH (n:MatchPerson) RETURN n") + await match.run() + results = match.results + assert len(results) == 2 + assert results[0]["n"] is not None + assert results[0]["n"]["id"] == 1 + assert results[0]["n"]["name"] == "Person 1" + assert results[1]["n"] is not None + assert results[1]["n"]["id"] == 2 + assert results[1]["n"]["name"] == "Person 2" + + @pytest.mark.asyncio + async def test_complex_match_operation(self): + """Test complex match operation.""" + await Runner( + """ + CREATE VIRTUAL (:AgePerson) AS { + unwind [ + {id: 1, name: 'Person 1', age: 30}, + {id: 2, name: 'Person 2', age: 25}, + {id: 3, name: 'Person 3', age: 35} + ] as record + RETURN record.id as id, record.name as name, record.age as age + } + """ + ).run() + match = Runner( + """ + MATCH (n:AgePerson) + WHERE n.age > 29 + RETURN n.name AS name, n.age AS age + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + assert results[0] == {"name": "Person 1", "age": 30} + assert results[1] == {"name": "Person 3", "age": 35} + + @pytest.mark.asyncio + async def test_match(self): + """Test match operation.""" + await Runner( + """ + CREATE VIRTUAL (:SimplePerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + match = Runner( + """ + MATCH (n:SimplePerson) + RETURN n.name AS name + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + assert results[0] == {"name": "Person 1"} + assert results[1] == {"name": "Person 2"} + + @pytest.mark.asyncio + async def test_match_with_nested_join(self): + """Test match with nested join.""" + await Runner( + """ + CREATE VIRTUAL (:JoinPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + match = Runner( + """ + MATCH (a:JoinPerson), (b:JoinPerson) + WHERE a.id <> b.id + RETURN a.name AS name1, b.name AS name2 + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + assert results[0] == {"name1": "Person 1", "name2": "Person 2"} + assert results[1] == {"name1": "Person 2", "name2": "Person 1"} + + @pytest.mark.asyncio + async def test_match_with_graph_pattern(self): + """Test match with graph pattern.""" + await Runner( + """ + CREATE VIRTUAL (:User) AS { + UNWIND [ + {id: 1, name: 'User 1', manager_id: null}, + {id: 2, name: 'User 2', manager_id: 1}, + {id: 3, name: 'User 3', manager_id: 1}, + {id: 4, name: 'User 4', manager_id: 2} + ] AS record + RETURN record.id AS id, record.name AS name, record.manager_id AS manager_id + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:User)-[:MANAGED_BY]-(:User) AS { + UNWIND [ + {id: 1, manager_id: null}, + {id: 2, manager_id: 1}, + {id: 3, manager_id: 1}, + {id: 4, manager_id: 2} + ] AS record + RETURN record.id AS left_id, record.manager_id AS right_id + } + """ + ).run() + match = Runner( + """ + MATCH (user:User)-[r:MANAGED_BY]-(manager:User) + RETURN user.name AS user, manager.name AS manager + """ + ) + await match.run() + results = match.results + assert len(results) == 3 + assert results[0] == {"user": "User 2", "manager": "User 1"} + assert results[1] == {"user": "User 3", "manager": "User 1"} + assert results[2] == {"user": "User 4", "manager": "User 2"} + + @pytest.mark.asyncio + async def test_match_with_multiple_hop_graph_pattern(self): + """Test match with multiple hop graph pattern.""" + await Runner( + """ + CREATE VIRTUAL (:HopPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:HopPerson)-[:KNOWS]-(:HopPerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH (a:HopPerson)-[:KNOWS*]-(c:HopPerson) + RETURN a.name AS name1, c.name AS name2 + """ + ) + await match.run() + results = match.results + assert len(results) == 3 + assert results[0] == {"name1": "Person 1", "name2": "Person 2"} + assert results[1] == {"name1": "Person 1", "name2": "Person 3"} + assert results[2] == {"name1": "Person 2", "name2": "Person 3"} + + @pytest.mark.asyncio + async def test_match_with_double_graph_pattern(self): + """Test match with double graph pattern.""" + await Runner( + """ + CREATE VIRTUAL (:DoublePerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:DoublePerson)-[:KNOWS]-(:DoublePerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH (a:DoublePerson)-[:KNOWS]-(b:DoublePerson)-[:KNOWS]-(c:DoublePerson) + RETURN a.name AS name1, b.name AS name2, c.name AS name3 + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + assert results[0] == {"name1": "Person 1", "name2": "Person 2", "name3": "Person 3"} + assert results[1] == {"name1": "Person 2", "name2": "Person 3", "name3": "Person 4"} + + @pytest.mark.asyncio + async def test_match_with_referenced_to_previous_variable(self): + """Test match with referenced to previous variable.""" + await Runner( + """ + CREATE VIRTUAL (:RefPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:RefPerson)-[:KNOWS]-(:RefPerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH (a:RefPerson)-[:KNOWS]-(b:RefPerson) + MATCH (b)-[:KNOWS]-(c:RefPerson) + RETURN a.name AS name1, b.name AS name2, c.name AS name3 + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + assert results[0] == {"name1": "Person 1", "name2": "Person 2", "name3": "Person 3"} + assert results[1] == {"name1": "Person 2", "name2": "Person 3", "name3": "Person 4"} + + @pytest.mark.asyncio + async def test_match_and_return_full_node(self): + """Test match and return full node.""" + await Runner( + """ + CREATE VIRTUAL (:FullPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + match = Runner( + """ + MATCH (n:FullPerson) + RETURN n + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + assert results[0]["n"] is not None + assert results[0]["n"]["id"] == 1 + assert results[0]["n"]["name"] == "Person 1" + assert results[1]["n"] is not None + assert results[1]["n"]["id"] == 2 + assert results[1]["n"]["name"] == "Person 2" + + @pytest.mark.asyncio + async def test_call_operation_with_async_function(self): + """Test call operation with async function.""" + runner = Runner("CALL calltestfunction() YIELD result RETURN result") + await runner.run() + results = runner.results + assert len(results) == 3 + assert results[0] == {"result": 1} + assert results[1] == {"result": 2} + assert results[2] == {"result": 3} + + @pytest.mark.asyncio + async def test_call_operation_with_aggregation(self): + """Test call operation with aggregation.""" + runner = Runner("CALL calltestfunction() YIELD result RETURN sum(result) as total") + await runner.run() + results = runner.results + assert len(results) == 1 + assert results[0] == {"total": 6} + + @pytest.mark.asyncio + async def test_call_operation_as_last_operation(self): + """Test call operation as last operation.""" + runner = Runner("CALL calltestfunction()") + await runner.run() + results = runner.results + assert len(results) == 3 + assert results[0] == {"result": 1, "dummy": "a"} + assert results[1] == {"result": 2, "dummy": "b"} + assert results[2] == {"result": 3, "dummy": "c"} + + @pytest.mark.asyncio + async def test_call_operation_as_last_operation_with_yield(self): + """Test call operation as last operation with yield.""" + runner = Runner("CALL calltestfunction() YIELD result") + await runner.run() + results = runner.results + assert len(results) == 3 + assert results[0] == {"result": 1} + assert results[1] == {"result": 2} + assert results[2] == {"result": 3} + + def test_call_operation_with_no_yielded_expressions(self): + """Test call operation with no yielded expressions throws error.""" + with pytest.raises(ValueError, match="CALL operations must have a YIELD clause"): + Runner("CALL calltestfunctionnoobject() RETURN 1") + + @pytest.mark.asyncio + async def test_return_graph_pattern(self): + """Test return graph pattern.""" + await Runner( + """ + CREATE VIRTUAL (:PatternPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:PatternPerson)-[:KNOWS]-(:PatternPerson) AS { + unwind [ + {left_id: 1, since: '2020-01-01', right_id: 2} + ] as record + RETURN record.left_id as left_id, record.since as since, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH p=(:PatternPerson)-[:KNOWS]-(:PatternPerson) + RETURN p AS pattern + """ + ) + await match.run() + results = match.results + assert len(results) == 1 + assert results[0]["pattern"] is not None + assert len(results[0]["pattern"]) == 3 + + @pytest.mark.asyncio + async def test_circular_graph_pattern(self): + """Test circular graph pattern.""" + await Runner( + """ + CREATE VIRTUAL (:CircularPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:CircularPerson)-[:KNOWS]-(:CircularPerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 1} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH p=(:CircularPerson)-[:KNOWS]-(:CircularPerson)-[:KNOWS]-(:CircularPerson) + RETURN p AS pattern + """ + ) + await match.run() + results = match.results + assert len(results) == 2 + + @pytest.mark.asyncio + async def test_circular_graph_pattern_with_variable_length_should_throw_error(self): + """Test circular graph pattern with variable length should throw error.""" + await Runner( + """ + CREATE VIRTUAL (:CircularVarPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:CircularVarPerson)-[:KNOWS]-(:CircularVarPerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 1} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH p=(:CircularVarPerson)-[:KNOWS*]-(:CircularVarPerson) + RETURN p AS pattern + """ + ) + with pytest.raises(ValueError, match="Circular relationship detected"): + await match.run() + + @pytest.mark.asyncio + async def test_multi_hop_match_with_variable_length_relationships(self): + """Test multi-hop match with variable length relationships.""" + await Runner( + """ + CREATE VIRTUAL (:MultiHopPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:MultiHopPerson)-[:KNOWS]-(:MultiHopPerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH (a:MultiHopPerson)-[r:KNOWS*0..3]->(b:MultiHopPerson) + RETURN a, r, b + """ + ) + await match.run() + results = match.results + assert len(results) == 6 + + @pytest.mark.asyncio + async def test_return_match_pattern_with_variable_length_relationships(self): + """Test return match pattern with variable length relationships.""" + await Runner( + """ + CREATE VIRTUAL (:VarLenPerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:VarLenPerson)-[:KNOWS]-(:VarLenPerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH p=(a:VarLenPerson)-[:KNOWS*0..3]->(b:VarLenPerson) + RETURN p AS pattern + """ + ) + await match.run() + results = match.results + assert len(results) == 6 + + @pytest.mark.asyncio + async def test_statement_with_graph_pattern_in_where_clause(self): + """Test statement with graph pattern in where clause.""" + await Runner( + """ + CREATE VIRTUAL (:WherePerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:WherePerson)-[:KNOWS]-(:WherePerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH (a:WherePerson), (b:WherePerson) + WHERE (a)-[:KNOWS]->(b) + RETURN a.name AS name1, b.name AS name2 + """ + ) + await match.run() + results = match.results + assert len(results) == 3 + assert results[0] == {"name1": "Person 1", "name2": "Person 2"} + assert results[1] == {"name1": "Person 2", "name2": "Person 3"} + assert results[2] == {"name1": "Person 3", "name2": "Person 4"} + + @pytest.mark.asyncio + async def test_person_who_does_not_know_anyone(self): + """Test person who does not know anyone.""" + await Runner( + """ + CREATE VIRTUAL (:LonePerson) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:LonePerson)-[:KNOWS]-(:LonePerson) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 1} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH (a:LonePerson) + WHERE NOT (a)-[:KNOWS]->(:LonePerson) + RETURN a.name AS name + """ + ) + await match.run() + results = match.results + assert len(results) == 1 + assert results[0] == {"name": "Person 3"} + + @pytest.mark.asyncio + async def test_manager_chain(self): + """Test manager chain.""" + await Runner( + """ + CREATE VIRTUAL (:ChainEmployee) AS { + unwind [ + {id: 1, name: 'Employee 1'}, + {id: 2, name: 'Employee 2'}, + {id: 3, name: 'Employee 3'}, + {id: 4, name: 'Employee 4'} + ] as record + RETURN record.id as id, record.name as name + } + """ + ).run() + await Runner( + """ + CREATE VIRTUAL (:ChainEmployee)-[:MANAGED_BY]-(:ChainEmployee) AS { + unwind [ + {left_id: 2, right_id: 1}, + {left_id: 3, right_id: 2}, + {left_id: 4, right_id: 2} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + """ + ).run() + match = Runner( + """ + MATCH p=(e:ChainEmployee)-[:MANAGED_BY*]->(m:ChainEmployee) + WHERE NOT (m)-[:MANAGED_BY]->(:ChainEmployee) + RETURN p + """ + ) + await match.run() + results = match.results + assert len(results) == 2 \ No newline at end of file diff --git a/flowquery-py/tests/graph/__init__.py b/flowquery-py/tests/graph/__init__.py new file mode 100644 index 0000000..82ed69f --- /dev/null +++ b/flowquery-py/tests/graph/__init__.py @@ -0,0 +1 @@ +"""Graph tests package.""" diff --git a/flowquery-py/tests/graph/test_create.py b/flowquery-py/tests/graph/test_create.py new file mode 100644 index 0000000..34a0e4e --- /dev/null +++ b/flowquery-py/tests/graph/test_create.py @@ -0,0 +1,56 @@ +"""Tests for graph node and relationship creation.""" + +import pytest +from flowquery.graph.database import Database +from flowquery.graph.physical_node import PhysicalNode +from flowquery.graph.physical_relationship import PhysicalRelationship +from flowquery.parsing.operations.create_node import CreateNode +from flowquery.parsing.operations.create_relationship import CreateRelationship +from flowquery.parsing.parser import Parser + + +class TestCreateNode: + """Test cases for CreateNode operation.""" + + @pytest.mark.asyncio + async def test_create_node_operation(self): + """Test CreateNode operation.""" + node = PhysicalNode(None, "Person") + assert node.label == "Person" + assert node.statement is None + + parser = Parser() + statement = parser.parse("WITH 1 as x RETURN x") + op = CreateNode(node, statement) + await op.run() + + db = Database.get_instance() + found = db.get_node(node) + assert found is not None + assert found.label == node.label + + data = await found.data() + assert data == [{"x": 1}] + + +class TestCreateRelationship: + """Test cases for CreateRelationship operation.""" + + @pytest.mark.asyncio + async def test_create_relationship_operation(self): + """Test CreateRelationship operation.""" + relationship = PhysicalRelationship() + relationship.type = "KNOWS" + assert relationship.type == "KNOWS" + assert relationship.statement is None + + parser = Parser() + statement = parser.parse("WITH 1 as x RETURN x") + op = CreateRelationship(relationship, statement) + await op.run() + + db = Database.get_instance() + found = db.get_relationship(relationship) + + data = await found.data() + assert data == [{"x": 1}] diff --git a/flowquery-py/tests/graph/test_data.py b/flowquery-py/tests/graph/test_data.py new file mode 100644 index 0000000..3392eb8 --- /dev/null +++ b/flowquery-py/tests/graph/test_data.py @@ -0,0 +1,73 @@ +"""Tests for graph data iteration.""" + +import pytest +from flowquery.graph.data import Data +from flowquery.graph.node_data import NodeData +from flowquery.graph.relationship_data import RelationshipData + + +class TestDataIteration: + """Test cases for Data class iteration.""" + + def test_data_iteration(self): + """Test data iteration.""" + records = [ + {"id": "1", "name": "Alice"}, + {"id": "2", "name": "Bob"}, + {"id": "3", "name": "Charlie"}, + ] + data = Data(records) + assert data.next() is True + assert data.next() is True + assert data.next() is True + assert data.next() is False + + +class TestNodeDataFind: + """Test cases for NodeData find operations.""" + + def test_data_find(self): + """Test data find.""" + records = [ + {"id": "1", "name": "Alice"}, + {"id": "2", "name": "Bob"}, + {"id": "3", "name": "Charlie"}, + {"id": "2", "name": "Bob Duplicate"}, + ] + data = NodeData(records) + data.find("2") + assert data.current() == {"id": "2", "name": "Bob"} + assert data.find("2") is True + assert data.current() == {"id": "2", "name": "Bob Duplicate"} + assert data.find("2") is False + + def test_data_find_non_existing(self): + """Test data find non-existing.""" + records = [ + {"id": "1", "name": "Alice"}, + {"id": "2", "name": "Bob"}, + ] + data = NodeData(records) + assert data.find("3") is False + + +class TestRelationshipDataFind: + """Test cases for RelationshipData find operations.""" + + def test_relationship_data_find(self): + """Test RelationshipData find.""" + records = [ + {"left_id": "1", "right_id": "2", "type": "FRIEND", "id": "r1"}, + {"left_id": "2", "right_id": "3", "type": "COLLEAGUE", "id": "r2"}, + {"left_id": "1", "right_id": "3", "type": "FRIEND", "id": "r3"}, + ] + data = RelationshipData(records) + data.find("1") + assert data.current() == {"left_id": "1", "right_id": "2", "type": "FRIEND", "id": "r1"} + assert data.find("1") is True + assert data.current() == {"left_id": "1", "right_id": "3", "type": "FRIEND", "id": "r3"} + assert data.find("1") is False + assert data.find("2") is True + assert data.current() == {"left_id": "2", "right_id": "3", "type": "COLLEAGUE", "id": "r2"} + assert data.find("2") is False + assert data.find("4") is False diff --git a/flowquery-py/tests/graph/test_match.py b/flowquery-py/tests/graph/test_match.py new file mode 100644 index 0000000..2dbbf1e --- /dev/null +++ b/flowquery-py/tests/graph/test_match.py @@ -0,0 +1,40 @@ +"""Tests for graph pattern matching.""" + +import pytest +from flowquery.compute.runner import Runner +from flowquery.graph.physical_node import PhysicalNode +from flowquery.parsing.operations.create_node import CreateNode +from flowquery.parsing.parser import Parser + + +class TestMatch: + """Test cases for Match operation.""" + + @pytest.mark.asyncio + async def test_create_node_and_match_operations(self): + """Test CreateNode and match operations.""" + node = PhysicalNode(None, "Person") + assert node.label == "Person" + assert node.statement is None + + parser = Parser() + statement = parser.parse(""" + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + """) + op = CreateNode(node, statement) + await op.run() + + runner = Runner("match (n:Person) RETURN n") + await runner.run() + + assert len(runner.results) == 2 + assert runner.results[0]["n"] is not None + assert runner.results[0]["n"]["id"] == 1 + assert runner.results[0]["n"]["name"] == "Person 1" + assert runner.results[1]["n"] is not None + assert runner.results[1]["n"]["id"] == 2 + assert runner.results[1]["n"]["name"] == "Person 2" diff --git a/flowquery-py/tests/parsing/__init__.py b/flowquery-py/tests/parsing/__init__.py new file mode 100644 index 0000000..568104a --- /dev/null +++ b/flowquery-py/tests/parsing/__init__.py @@ -0,0 +1 @@ +"""Parsing tests package.""" diff --git a/flowquery-py/tests/parsing/test_context.py b/flowquery-py/tests/parsing/test_context.py new file mode 100644 index 0000000..b5ac74e --- /dev/null +++ b/flowquery-py/tests/parsing/test_context.py @@ -0,0 +1,34 @@ +"""Tests for the parsing context.""" + +import pytest +from flowquery.parsing.context import Context +from flowquery.parsing.functions.sum import Sum +from flowquery.parsing.functions.aggregate_function import AggregateFunction + + +class TestContext: + """Test cases for the Context class.""" + + def test_context_contains_type(self): + """Test Context containsType.""" + context = Context() + sum_func = Sum() + context.push(sum_func) + assert context.contains_type(AggregateFunction) is True + + def test_context_contains_type_false(self): + """Test Context containsType false.""" + context = Context() + assert context.contains_type(AggregateFunction) is False + + def test_context_push_and_pop(self): + """Test Context push and pop.""" + context = Context() + sum_func = Sum() + context.push(sum_func) + assert context.pop() is sum_func + + def test_context_pop_none(self): + """Test Context pop returns None when empty.""" + context = Context() + assert context.pop() is None diff --git a/flowquery-py/tests/parsing/test_expression.py b/flowquery-py/tests/parsing/test_expression.py new file mode 100644 index 0000000..e39b2b9 --- /dev/null +++ b/flowquery-py/tests/parsing/test_expression.py @@ -0,0 +1,49 @@ +"""Tests for expression evaluation.""" + +import pytest +from flowquery.parsing.expressions.expression import Expression +from flowquery.parsing.expressions.operator import ( + Add, Subtract, Multiply, Power, GreaterThan, And +) +from flowquery.parsing.expressions.number import Number + + +class TestExpression: + """Test cases for the Expression class.""" + + def test_expression_shunting_yard_algorithm(self): + """Test Expression Shunting Yard algorithm.""" + expression = Expression() + expression.add_node(Number("2")) + expression.add_node(Add()) + expression.add_node(Number("3")) + expression.add_node(Multiply()) + expression.add_node(Number("4")) + expression.add_node(Subtract()) + expression.add_node(Number("2")) + expression.add_node(Power()) + expression.add_node(Number("2")) + expression.finish() + assert expression.value() == 10 + + def test_expression_with_and_operator(self): + """Test Expression with and operator.""" + expression = Expression() + expression.add_node(Number("2")) + expression.add_node(And()) + expression.add_node(Number("3")) + expression.finish() + assert expression.value() == 1 + + def test_comparison_with_and(self): + """Test 1 > 0 and 2 > 1.""" + expression = Expression() + expression.add_node(Number("1")) + expression.add_node(GreaterThan()) + expression.add_node(Number("0")) + expression.add_node(And()) + expression.add_node(Number("2")) + expression.add_node(GreaterThan()) + expression.add_node(Number("1")) + expression.finish() + assert expression.value() == 1 diff --git a/flowquery-py/tests/parsing/test_parser.py b/flowquery-py/tests/parsing/test_parser.py new file mode 100644 index 0000000..fc0d0dc --- /dev/null +++ b/flowquery-py/tests/parsing/test_parser.py @@ -0,0 +1,674 @@ +"""Tests for the FlowQuery parser.""" + +import pytest +from typing import AsyncIterator +from flowquery.parsing.parser import Parser +from flowquery.parsing.functions.async_function import AsyncFunction +from flowquery.parsing.functions.function_metadata import FunctionDef + + +# Test async function for CALL operation parsing test +# Named with underscore prefix to prevent pytest from trying to collect it as a test class +@FunctionDef({ + "description": "Asynchronous function for testing CALL operation", + "category": "async", + "parameters": [], + "output": {"description": "Yields test values", "type": "any"}, +}) +class _Test(AsyncFunction): + """Async function for CALL operation testing, registered as 'test'.""" + + def __init__(self): + super().__init__("test") # Register as 'test' + self._expected_parameter_count = 0 + + async def generate(self) -> AsyncIterator: + yield 1 + yield 2 + yield 3 + + +class TestParser: + """Test cases for the Parser class.""" + + def test_parser_basic(self): + """Test basic parser functionality.""" + parser = Parser() + ast = parser.parse("RETURN 1, 2, 3") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Number (1)\n" + "-- Expression\n" + "--- Number (2)\n" + "-- Expression\n" + "--- Number (3)" + ) + assert ast.print() == expected + + def test_parser_with_function(self): + """Test parser with function.""" + parser = Parser() + ast = parser.parse("RETURN rand()") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Function (rand)" + ) + assert ast.print() == expected + + def test_parser_with_associative_array(self): + """Test parser with associative array.""" + parser = Parser() + ast = parser.parse("RETURN {a: 1, b: 2}") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- AssociativeArray\n" + "---- KeyValuePair\n" + "----- String (a)\n" + "----- Expression\n" + "------ Number (1)\n" + "---- KeyValuePair\n" + "----- String (b)\n" + "----- Expression\n" + "------ Number (2)" + ) + assert ast.print() == expected + + def test_parser_with_json_array(self): + """Test parser with JSON array.""" + parser = Parser() + ast = parser.parse("RETURN [1, 2]") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- JSONArray\n" + "---- Expression\n" + "----- Number (1)\n" + "---- Expression\n" + "----- Number (2)" + ) + assert ast.print() == expected + + def test_parser_with_nested_associative_array(self): + """Test parser with nested associative array.""" + parser = Parser() + ast = parser.parse("RETURN {a:{}}") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- AssociativeArray\n" + "---- KeyValuePair\n" + "----- String (a)\n" + "----- Expression\n" + "------ AssociativeArray" + ) + assert ast.print() == expected + + def test_parser_with_multiple_operations(self): + """Test parser with multiple operations.""" + parser = Parser() + ast = parser.parse("WITH 1 AS n RETURN n") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (n)\n" + "--- Number (1)\n" + "- Return\n" + "-- Expression (n)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_parser_with_comments(self): + """Test parser with comments.""" + parser = Parser() + ast = parser.parse("WITH 1 AS n /* comment */ RETURN n") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (n)\n" + "--- Number (1)\n" + "- Return\n" + "-- Expression (n)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_parser_with_unwind(self): + """Test parser with UNWIND.""" + parser = Parser() + ast = parser.parse("UNWIND [1, 2, 3] AS n RETURN n") + expected = ( + "ASTNode\n" + "- Unwind\n" + "-- Expression (n)\n" + "--- JSONArray\n" + "---- Expression\n" + "----- Number (1)\n" + "---- Expression\n" + "----- Number (2)\n" + "---- Expression\n" + "----- Number (3)\n" + "- Return\n" + "-- Expression (n)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_unwind_with_invalid_expression(self): + """Test Unwind with invalid expression.""" + parser = Parser() + with pytest.raises(Exception, match="Expected array, function, reference, or lookup"): + parser.parse("UNWIND 1 AS n RETURN n") + + def test_unwind_with_invalid_alias(self): + """Test Unwind with invalid alias.""" + parser = Parser() + with pytest.raises(Exception, match="Expected identifier"): + parser.parse("UNWIND [1, 2, 3] AS 1 RETURN n") + + def test_unwind_with_missing_alias(self): + """Test Unwind with missing alias.""" + parser = Parser() + with pytest.raises(Exception, match="Expected alias"): + parser.parse("UNWIND [1, 2, 3] RETURN n") + + def test_statement_with_where_clause(self): + """Test statement with where clause.""" + parser = Parser() + ast = parser.parse("with 1 as n where n > 0 return n") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (n)\n" + "--- Number (1)\n" + "- Where\n" + "-- Expression\n" + "--- GreaterThan\n" + "---- Reference (n)\n" + "---- Number (0)\n" + "- Return\n" + "-- Expression (n)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_lookup(self): + """Test lookup expression.""" + parser = Parser() + ast = parser.parse("return {a: 1}.a") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Lookup\n" + "---- Identifier (a)\n" + "---- AssociativeArray\n" + "----- KeyValuePair\n" + "------ String (a)\n" + "------ Expression\n" + "------- Number (1)" + ) + assert ast.print() == expected + + def test_lookup_as_part_of_expression(self): + """Test lookup as part of expression.""" + parser = Parser() + ast = parser.parse("return {a: 1}.a + 1") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Add\n" + "---- Lookup\n" + "----- Identifier (a)\n" + "----- AssociativeArray\n" + "------ KeyValuePair\n" + "------- String (a)\n" + "------- Expression\n" + "-------- Number (1)\n" + "---- Number (1)" + ) + assert ast.print() == expected + + def test_lookup_with_nested_associative_array(self): + """Test lookup with nested associative array.""" + parser = Parser() + ast = parser.parse("return {a: {b: 1}}.a.b") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Lookup\n" + "---- Identifier (b)\n" + "---- Lookup\n" + "----- Identifier (a)\n" + "----- AssociativeArray\n" + "------ KeyValuePair\n" + "------- String (a)\n" + "------- Expression\n" + "-------- AssociativeArray\n" + "--------- KeyValuePair\n" + "---------- String (b)\n" + "---------- Expression\n" + "----------- Number (1)" + ) + assert ast.print() == expected + _return = ast.first_child() + assert _return.first_child().value() == 1 + + def test_lookup_with_json_array(self): + """Test lookup with JSON array.""" + parser = Parser() + ast = parser.parse("return [1, 2][1]") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Lookup\n" + "---- Expression\n" + "----- Number (1)\n" + "---- JSONArray\n" + "----- Expression\n" + "------ Number (1)\n" + "----- Expression\n" + "------ Number (2)" + ) + assert ast.print() == expected + _return = ast.first_child() + assert _return.first_child().value() == 2 + + def test_load_with_post(self): + """Test load with post.""" + parser = Parser() + ast = parser.parse( + 'load json from "https://jsonplaceholder.typicode.com/posts" post {userId: 1} as data return data' + ) + expected = ( + "ASTNode\n" + "- Load\n" + "-- JSON\n" + "-- From\n" + "--- Expression\n" + "---- String (https://jsonplaceholder.typicode.com/posts)\n" + "-- Post\n" + "--- Expression\n" + "---- AssociativeArray\n" + "----- KeyValuePair\n" + "------ String (userId)\n" + "------ Expression\n" + "------- Number (1)\n" + "-- Alias (data)\n" + "- Return\n" + "-- Expression (data)\n" + "--- Reference (data)" + ) + assert ast.print() == expected + + def test_nested_aggregate_functions(self): + """Test nested aggregate functions.""" + parser = Parser() + with pytest.raises(Exception, match="Aggregate functions cannot be nested"): + parser.parse("RETURN sum(sum(1))") + + def test_with_and_return_with_renamed_variable(self): + """Test with and return with renamed variable.""" + parser = Parser() + ast = parser.parse("WITH 1 AS n RETURN n AS m") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (n)\n" + "--- Number (1)\n" + "- Return\n" + "-- Expression (m)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_with_and_return_with_variable_lookup(self): + """Test with and return with variable lookup.""" + parser = Parser() + ast = parser.parse("WITH {a: n} AS obj RETURN obj.a") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (obj)\n" + "--- AssociativeArray\n" + "---- KeyValuePair\n" + "----- String (a)\n" + "----- Expression\n" + "------ Reference (n)\n" + "- Return\n" + "-- Expression\n" + "--- Lookup\n" + "---- Identifier (a)\n" + "---- Reference (obj)" + ) + assert ast.print() == expected + + def test_unwind(self): + """Test unwind.""" + parser = Parser() + ast = parser.parse("WITH [1, 2, 4] as n unwind n as i return i") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (n)\n" + "--- JSONArray\n" + "---- Expression\n" + "----- Number (1)\n" + "---- Expression\n" + "----- Number (2)\n" + "---- Expression\n" + "----- Number (4)\n" + "- Unwind\n" + "-- Expression (i)\n" + "--- Reference (n)\n" + "- Return\n" + "-- Expression (i)\n" + "--- Reference (i)" + ) + assert ast.print() == expected + + def test_predicate_function(self): + """Test predicate function.""" + parser = Parser() + ast = parser.parse("RETURN sum(n in [1, 2, 3] | n where n > 1)") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- PredicateFunction (sum)\n" + "---- Reference (n)\n" + "---- Expression\n" + "----- JSONArray\n" + "------ Expression\n" + "------- Number (1)\n" + "------ Expression\n" + "------- Number (2)\n" + "------ Expression\n" + "------- Number (3)\n" + "---- Expression\n" + "----- Reference (n)\n" + "---- Where\n" + "----- Expression\n" + "------ GreaterThan\n" + "------- Reference (n)\n" + "------- Number (1)" + ) + assert ast.print() == expected + + def test_case_statement(self): + """Test case statement.""" + parser = Parser() + ast = parser.parse("RETURN CASE WHEN 1 THEN 2 ELSE 3 END") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Case\n" + "---- When\n" + "----- Expression\n" + "------ Number (1)\n" + "---- Then\n" + "----- Expression\n" + "------ Number (2)\n" + "---- Else\n" + "----- Expression\n" + "------ Number (3)" + ) + assert ast.print() == expected + + def test_functions_with_wrong_number_of_arguments(self): + """Test functions with wrong number of arguments.""" + parser = Parser() + with pytest.raises(Exception, match="Function range expected 2 parameters, but got 1"): + parser.parse("RETURN range(1)") + with pytest.raises(Exception, match="Function range expected 2 parameters, but got 3"): + parser.parse("RETURN range(1, 2, 3)") + with pytest.raises(Exception, match="Function avg expected 1 parameters, but got 3"): + parser.parse("RETURN avg(1, 2, 3)") + with pytest.raises(Exception, match="Function size expected 1 parameters, but got 2"): + parser.parse("RETURN size(1, 2)") + with pytest.raises(Exception, match="Function round expected 1 parameters, but got 2"): + parser.parse("RETURN round(1, 2)") + + def test_non_well_formed_statements(self): + """Test non-well formed statements.""" + parser = Parser() + with pytest.raises(Exception, match="Only one RETURN statement is allowed"): + parser.parse("return 1 return 1") + # Note: Python implementation throws "Only one RETURN" for this case too + with pytest.raises(Exception, match="Only one RETURN statement is allowed"): + parser.parse("return 1 with 1 as n") + + def test_associative_array_with_backtick_string(self): + """Test associative array with backtick string.""" + parser = Parser() + ast = parser.parse("RETURN {`key`: `value`}") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- AssociativeArray\n" + "---- KeyValuePair\n" + "----- String (key)\n" + "----- Expression\n" + "------ Reference (value)" + ) + assert ast.print() == expected + + def test_limit(self): + """Test limit.""" + parser = Parser() + ast = parser.parse("unwind range(1, 10) as n limit 5 return n") + expected = ( + "ASTNode\n" + "- Unwind\n" + "-- Expression (n)\n" + "--- Function (range)\n" + "---- Expression\n" + "----- Number (1)\n" + "---- Expression\n" + "----- Number (10)\n" + "- Limit\n" + "- Return\n" + "-- Expression (n)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_return_negative_number(self): + """Test return -2.""" + parser = Parser() + ast = parser.parse("return -2") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Number (-2)" + ) + assert ast.print() == expected + + def test_call_operation(self): + """Test call operation.""" + parser = Parser() + ast = parser.parse("CALL test() YIELD result RETURN result") + expected = ( + "ASTNode\n" + "- Call\n" + "-- Expression (result)\n" + "--- Reference (result)\n" + "- Return\n" + "-- Expression (result)\n" + "--- Reference (result)" + ) + assert ast.print() == expected + + def test_f_string(self): + """Test f-string.""" + parser = Parser() + ast = parser.parse("with 1 as value RETURN f'Value is: {value}.'") + expected = ( + "ASTNode\n" + "- With\n" + "-- Expression (value)\n" + "--- Number (1)\n" + "- Return\n" + "-- Expression\n" + "--- FString\n" + "---- String (Value is: )\n" + "---- Expression\n" + "----- Reference (value)\n" + "---- String (.)" + ) + assert ast.print() == expected + + def test_not_equal_operator(self): + """Test not equal operator.""" + parser = Parser() + ast = parser.parse("RETURN 1 <> 2") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- NotEquals\n" + "---- Number (1)\n" + "---- Number (2)" + ) + assert ast.print() == expected + + def test_equal_operator(self): + """Test equal operator.""" + parser = Parser() + ast = parser.parse("RETURN 1 = 2") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Equals\n" + "---- Number (1)\n" + "---- Number (2)" + ) + assert ast.print() == expected + + def test_not_operator(self): + """Test not operator.""" + parser = Parser() + ast = parser.parse("RETURN NOT true") + expected = ( + "ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Not\n" + "---- Expression\n" + "----- Boolean" + ) + assert ast.print() == expected + + def test_create_node_operation(self): + """Test create node operation.""" + parser = Parser() + ast = parser.parse( + """ + CREATE VIRTUAL (:Person) AS { + unwind range(1, 3) AS id + return id, f'Person {id}' AS name + } + """ + ) + expected = ( + "ASTNode\n" + "- CreateNode" + ) + assert ast.print() == expected + + def test_match_operation(self): + """Test match operation.""" + parser = Parser() + ast = parser.parse("MATCH (n:Person) RETURN n") + expected = ( + "ASTNode\n" + "- Match\n" + "- Return\n" + "-- Expression (n)\n" + "--- Reference (n)" + ) + assert ast.print() == expected + + def test_create_relationship_operation(self): + """Test create relationship operation.""" + parser = Parser() + ast = parser.parse( + """ + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {from_id: 1, to_id: 2, since: '2020-01-01'}, + {from_id: 2, to_id: 3, since: '2021-01-01'} + ] AS pair + return pair.from_id AS left_id, pair.to_id AS right_id + } + """ + ) + expected = ( + "ASTNode\n" + "- CreateRelationship" + ) + assert ast.print() == expected + + def test_match_with_graph_pattern_including_relationships(self): + """Test match with graph pattern including relationships.""" + parser = Parser() + ast = parser.parse("MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a, b") + expected = ( + "ASTNode\n" + "- Match\n" + "- Return\n" + "-- Expression (a)\n" + "--- Reference (a)\n" + "-- Expression (b)\n" + "--- Reference (b)" + ) + assert ast.print() == expected + + def test_parse_relationship_with_hops(self): + """Test parse relationship with hops.""" + parser = Parser() + ast = parser.parse("MATCH (a:Test)-[:KNOWS*1..3]->(b:Test) RETURN a, b") + expected = ( + "ASTNode\n" + "- Match\n" + "- Return\n" + "-- Expression (a)\n" + "--- Reference (a)\n" + "-- Expression (b)\n" + "--- Reference (b)" + ) + assert ast.print() == expected + + def test_parse_statement_with_graph_pattern_in_where_clause(self): + """Test parse statement with graph pattern in where clause.""" + parser = Parser() + ast = parser.parse("MATCH (a:Person) WHERE (a)-[:KNOWS]->(:Person) RETURN a") + expected = ( + "ASTNode\n" + "- Match\n" + "- Where\n" + "-- Expression\n" + "--- PatternExpression\n" + "---- NodeReference\n" + "---- Relationship\n" + "---- Node\n" + "- Return\n" + "-- Expression (a)\n" + "--- Reference (a)" + ) + assert ast.print() == expected diff --git a/flowquery-py/tests/test_extensibility.py b/flowquery-py/tests/test_extensibility.py new file mode 100644 index 0000000..bb454b2 --- /dev/null +++ b/flowquery-py/tests/test_extensibility.py @@ -0,0 +1,611 @@ +"""Tests for the FlowQuery extensibility API.""" + +import pytest +from typing import TypedDict, Optional, List, Any +from flowquery.parsing.functions.function import Function +from flowquery.parsing.functions.aggregate_function import AggregateFunction +from flowquery.parsing.functions.async_function import AsyncFunction +from flowquery.parsing.functions.predicate_function import PredicateFunction +from flowquery.parsing.functions.reducer_element import ReducerElement +from flowquery.parsing.functions.function_metadata import ( + FunctionDef, + FunctionMetadata, + FunctionCategory, + ParameterSchema, + OutputSchema, + FunctionDefOptions, + get_function_metadata, + get_registered_function_factory, +) + + +class TestExtensibilityExports: + """Test cases for the extensibility API.""" + + def test_function_class_can_be_extended(self): + """Function class is exported and can be extended.""" + class CustomFunction(Function): + def __init__(self): + super().__init__("customFunc") + self._expected_parameter_count = 1 + + def value(self) -> str: + return "custom value" + + func = CustomFunction() + assert func.name == "customFunc" + assert str(func) == "Function (customFunc)" + assert func.value() == "custom value" + + def test_function_validates_parameter_count(self): + """Function validates parameter count when set.""" + class TwoParamFunction(Function): + def __init__(self): + super().__init__("twoParam") + self._expected_parameter_count = 2 + + func = TwoParamFunction() + + # Should throw when wrong number of parameters + with pytest.raises(ValueError, match="Function twoParam expected 2 parameters, but got 0"): + func.parameters = [] + + def test_function_without_expected_count_accepts_any(self): + """Function without expected parameter count accepts any number.""" + class FlexibleFunction(Function): + def __init__(self): + super().__init__("flexible") + # _expected_parameter_count is None by default + + func = FlexibleFunction() + # Should not throw + func.parameters = [] + assert len(func.get_children()) == 0 + + +class TestAggregateFunctionExtension: + """Test cases for AggregateFunction extension.""" + + def test_aggregate_function_can_be_extended(self): + """AggregateFunction class is exported and can be extended.""" + class SumElement(ReducerElement): + def __init__(self): + self._value: float = 0 + + @property + def value(self) -> float: + return self._value + + @value.setter + def value(self, v: float) -> None: + self._value = v + + class CustomSum(AggregateFunction): + def __init__(self): + super().__init__("customSum") + self._total: float = 0 + + def reduce(self, element: ReducerElement) -> None: + self._total += element.value + + def element(self) -> ReducerElement: + el = SumElement() + el.value = self._total + return el + + def value(self) -> float: + return self._total + + func = CustomSum() + assert func.name == "customSum" + + +class TestFunctionDefDecorator: + """Test cases for the FunctionDef decorator.""" + + def test_function_def_decorator_registers_metadata(self): + """FunctionDef decorator registers function metadata.""" + @FunctionDef({ + "description": "Test function for unit testing", + "category": "scalar", + "parameters": [ + {"name": "value", "description": "Input value", "type": "any"} + ], + "output": {"description": "Result", "type": "any"}, + "examples": ["WITH test(1) AS x RETURN x"] + }) + class TestFunction(Function): + def __init__(self): + super().__init__("testFunc") + self._expected_parameter_count = 1 + + def value(self): + return self.get_children()[0].value() + + # Get the registered metadata using the function name (as registered by @FunctionDef) + metadata = get_function_metadata("testFunc", "scalar") + assert metadata is not None + assert metadata.description == "Test function for unit testing" + assert metadata.category == "scalar" + assert len(metadata.parameters) == 1 + assert metadata.parameters[0]["name"] == "value" + + def test_function_def_decorator_for_aggregate_function(self): + """FunctionDef decorator can be applied to an aggregate function.""" + @FunctionDef({ + "description": "Test aggregate function", + "category": "aggregate", + "parameters": [{"name": "value", "description": "Numeric value", "type": "number"}], + "output": {"description": "Aggregated result", "type": "number"}, + }) + class TestAggExt(AggregateFunction): + def __init__(self): + super().__init__("testAggExt") + self._sum = 0 + + def value(self): + return self._sum + + instance = TestAggExt() + assert instance.name == "testAggExt" + assert instance.value() == 0 + + def test_function_def_decorator_for_predicate_function(self): + """FunctionDef decorator can be applied to a predicate function.""" + @FunctionDef({ + "description": "Test predicate function", + "category": "predicate", + "parameters": [{"name": "list", "description": "List to check", "type": "array"}], + "output": {"description": "Boolean result", "type": "boolean"}, + }) + class TestPredExt(PredicateFunction): + def __init__(self): + super().__init__("testPredExt") + + def value(self): + return True + + instance = TestPredExt() + assert instance.name == "testPredExt" + assert instance.value() is True + + @pytest.mark.asyncio + async def test_function_def_decorator_for_async_provider(self): + """FunctionDef decorator can be applied to an async provider.""" + from flowquery.parsing.functions.function_metadata import ( + get_function_metadata, + get_registered_function_factory, + ) + + @FunctionDef({ + "description": "Test async provider for extensibility", + "category": "async", + "parameters": [ + { + "name": "count", + "description": "Number of items", + "type": "number", + "required": False, + "default": 1, + }, + ], + "output": {"description": "Data object", "type": "object"}, + }) + class Simple(AsyncFunction): + async def generate(self, count: int = 1): + for i in range(count): + yield {"id": i, "data": f"item{i}"} + + # Verify the decorated class still works correctly + loader = Simple("simple") + results = [] + async for item in loader.generate(2): + results.append(item) + assert len(results) == 2 + assert results[0] == {"id": 0, "data": "item0"} + assert results[1] == {"id": 1, "data": "item1"} + + # Verify the async provider was registered (using class name) + provider = get_registered_function_factory("simple", "async") + assert provider is not None + assert callable(provider) + + # Verify the metadata was registered + metadata = get_function_metadata("simple", "async") + assert metadata is not None + assert metadata.name == "simple" + assert metadata.category == "async" + assert metadata.description == "Test async provider for extensibility" + + +class TestPredicateFunctionExtension: + """Test cases for PredicateFunction extension.""" + + def test_predicate_function_can_be_extended(self): + """PredicateFunction class is exported and can be extended.""" + class CustomPredicate(PredicateFunction): + def __init__(self): + super().__init__("customPredicate") + + def value(self): + return True + + pred = CustomPredicate() + assert pred.name == "customPredicate" + assert str(pred) == "PredicateFunction (customPredicate)" + assert pred.value() is True + + +class TestAsyncFunctionExtension: + """Test cases for AsyncFunction extension.""" + + def test_async_function_can_be_instantiated(self): + """AsyncFunction class is exported and can be instantiated.""" + async_func = AsyncFunction("testAsync") + assert async_func.name == "testAsync" + + +class TestReducerElementExtension: + """Test cases for ReducerElement extension.""" + + def test_reducer_element_can_be_extended(self): + """ReducerElement class is exported and can be extended.""" + class NumberElement(ReducerElement): + def __init__(self): + self._num = 0 + + @property + def value(self): + return self._num + + @value.setter + def value(self, v): + self._num = v + + elem = NumberElement() + elem.value = 42 + assert elem.value == 42 + + +class TestTypeExports: + """Test cases for type exports.""" + + def test_function_metadata_type(self): + """FunctionMetadata type can be used.""" + meta = FunctionMetadata( + name="typeTest", + description="Testing type exports", + category="scalar", + parameters=[], + output={"description": "Output", "type": "string"}, + ) + assert meta.name == "typeTest" + assert meta.description == "Testing type exports" + + def test_function_category_accepts_standard_and_custom(self): + """FunctionCategory type accepts standard and custom categories.""" + scalar: FunctionCategory = "scalar" + aggregate: FunctionCategory = "aggregate" + predicate: FunctionCategory = "predicate" + async_cat: FunctionCategory = "async" + custom: FunctionCategory = "myCustomCategory" + + assert scalar == "scalar" + assert aggregate == "aggregate" + assert predicate == "predicate" + assert async_cat == "async" + assert custom == "myCustomCategory" + + +class TestPluginFunctionsIntegration: + """Test cases for plugin functions integration with FlowQuery.""" + + @pytest.mark.asyncio + async def test_custom_scalar_function_in_query(self): + """Custom scalar function can be used in a FlowQuery statement.""" + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "Doubles a number", + "category": "scalar", + "parameters": [{"name": "value", "description": "Number to double", "type": "number"}], + "output": {"description": "Doubled value", "type": "number"}, + }) + class Double(Function): + def __init__(self): + super().__init__("double") + self._expected_parameter_count = 1 + + def value(self): + return self.get_children()[0].value() * 2 + + runner = Runner("WITH 5 AS num RETURN double(num) AS result") + await runner.run() + + assert len(runner.results) == 1 + assert runner.results[0] == {"result": 10} + + @pytest.mark.asyncio + async def test_custom_string_function_in_query(self): + """Custom string function can be used in a FlowQuery statement.""" + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "Reverses a string", + "category": "scalar", + "parameters": [{"name": "text", "description": "String to reverse", "type": "string"}], + "output": {"description": "Reversed string", "type": "string"}, + }) + class StrReverse(Function): + def __init__(self): + super().__init__("strreverse") + self._expected_parameter_count = 1 + + def value(self): + input_str = str(self.get_children()[0].value()) + return input_str[::-1] + + runner = Runner("WITH 'hello' AS s RETURN strreverse(s) AS reversed") + await runner.run() + + assert len(runner.results) == 1 + assert runner.results[0] == {"reversed": "olleh"} + + @pytest.mark.asyncio + async def test_custom_function_with_expressions(self): + """Custom function works with expressions and other functions.""" + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "Adds 100 to a number", + "category": "scalar", + "parameters": [{"name": "value", "description": "Number", "type": "number"}], + "output": {"description": "Number plus 100", "type": "number"}, + }) + class AddHundred(Function): + def __init__(self): + super().__init__("addhundred") + self._expected_parameter_count = 1 + + def value(self): + return self.get_children()[0].value() + 100 + + runner = Runner("WITH 5 * 3 AS num RETURN addhundred(num) + 1 AS result") + await runner.run() + + assert len(runner.results) == 1 + assert runner.results[0] == {"result": 116} # (5*3) + 100 + 1 = 116 + + @pytest.mark.asyncio + async def test_multiple_custom_functions_together(self): + """Multiple custom functions can be used together.""" + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "Triples a number", + "category": "scalar", + "parameters": [{"name": "value", "description": "Number to triple", "type": "number"}], + "output": {"description": "Tripled value", "type": "number"}, + }) + class Triple(Function): + def __init__(self): + super().__init__("triple") + self._expected_parameter_count = 1 + + def value(self): + return self.get_children()[0].value() * 3 + + @FunctionDef({ + "description": "Squares a number", + "category": "scalar", + "parameters": [{"name": "value", "description": "Number to square", "type": "number"}], + "output": {"description": "Squared value", "type": "number"}, + }) + class Square(Function): + def __init__(self): + super().__init__("square") + self._expected_parameter_count = 1 + + def value(self): + v = self.get_children()[0].value() + return v * v + + runner = Runner("WITH 2 AS num RETURN triple(num) AS tripled, square(num) AS squared") + await runner.run() + + assert len(runner.results) == 1 + assert runner.results[0] == {"tripled": 6, "squared": 4} + + @pytest.mark.asyncio + async def test_custom_aggregate_function_in_query(self): + """Custom aggregate function can be used in a FlowQuery statement.""" + from flowquery.compute.runner import Runner + + # Custom reducer element for MinValue + class MinReducerElement(ReducerElement): + def __init__(self): + self._value = None + + @property + def value(self): + return self._value + + @value.setter + def value(self, val): + self._value = val + + @FunctionDef({ + "description": "Collects the minimum value", + "category": "aggregate", + "parameters": [{"name": "value", "description": "Value to compare", "type": "number"}], + "output": {"description": "Minimum value", "type": "number"}, + }) + class MinValue(AggregateFunction): + def __init__(self): + super().__init__("minvalue") + self._expected_parameter_count = 1 + + def reduce(self, element): + current = self.first_child().value() + if element.value is None or current < element.value: + element.value = current + + def element(self): + return MinReducerElement() + + runner = Runner("unwind [5, 2, 8, 1, 9] AS num RETURN minvalue(num) AS min") + await runner.run() + + assert len(runner.results) == 1 + assert runner.results[0] == {"min": 1} + + @pytest.mark.asyncio + async def test_custom_async_provider_in_load_json_from_statement(self): + """Custom async provider can be used in LOAD JSON FROM statement.""" + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "Provides example data for testing", + "category": "async", + "parameters": [], + "output": {"description": "Example data object", "type": "object"}, + }) + class _GetExampleData(AsyncFunction): + def __init__(self): + super().__init__("getexampledata") + self._expected_parameter_count = 0 + + async def generate(self): + yield {"id": 1, "name": "Alice"} + yield {"id": 2, "name": "Bob"} + + runner = Runner("LOAD JSON FROM getexampledata() AS data RETURN data.id AS id, data.name AS name") + await runner.run() + + assert len(runner.results) == 2 + assert runner.results[0] == {"id": 1, "name": "Alice"} + assert runner.results[1] == {"id": 2, "name": "Bob"} + + @pytest.mark.asyncio + async def test_function_names_are_case_insensitive(self): + """Function names are case-insensitive.""" + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "Test function for case insensitivity", + "category": "async", + "parameters": [], + "output": {"description": "Test data", "type": "object"}, + }) + class _MixedCaseFunc(AsyncFunction): + def __init__(self): + super().__init__("mixedcasefunc") + self._expected_parameter_count = 0 + + async def generate(self): + yield {"value": 42} + + # Test using different casings in FlowQuery statements + runner1 = Runner("LOAD JSON FROM mixedcasefunc() AS d RETURN d.value AS v") + await runner1.run() + assert runner1.results[0] == {"v": 42} + + runner2 = Runner("LOAD JSON FROM MIXEDCASEFUNC() AS d RETURN d.value AS v") + await runner2.run() + assert runner2.results[0] == {"v": 42} + + def test_parameter_schema_type_can_be_used(self): + """ParameterSchema type can be used.""" + param: ParameterSchema = { + "name": "testParam", + "description": "A test parameter", + "type": "string", + "required": True, + "default": "default value", + "example": "example value", + } + + assert param["name"] == "testParam" + assert param["required"] is True + + def test_parameter_schema_with_nested_types(self): + """ParameterSchema with nested types.""" + array_param: ParameterSchema = { + "name": "items", + "description": "Array of items", + "type": "array", + } + + object_param: ParameterSchema = { + "name": "config", + "description": "Configuration object", + "type": "object", + } + + assert array_param["type"] == "array" + assert object_param["type"] == "object" + + def test_output_schema_type_can_be_used(self): + """OutputSchema type can be used.""" + output: OutputSchema = { + "description": "Result output", + "type": "object", + "example": {"success": True, "data": []}, + } + + assert output["type"] == "object" + assert output["example"]["success"] is True + + def test_function_def_options_type_can_be_used(self): + """FunctionDefOptions type can be used.""" + options: FunctionDefOptions = { + "description": "Function options test", + "category": "scalar", + "parameters": [], + "output": {"description": "Output", "type": "string"}, + "notes": "Some additional notes", + } + + assert options["description"] == "Function options test" + assert options["notes"] == "Some additional notes" + + @pytest.mark.asyncio + async def test_custom_function_retrieved_via_functions(self): + """Custom function can be retrieved via functions() in a FlowQuery statement.""" + from flowquery.extensibility import FunctionDef + from flowquery.parsing.functions.function import Function + from flowquery.parsing.functions.function_metadata import get_function_metadata + from flowquery.compute.runner import Runner + + @FunctionDef({ + "description": "A unique test function for introspection", + "category": "scalar", + "parameters": [{"name": "x", "description": "Input value", "type": "number"}], + "output": {"description": "Output value", "type": "number"}, + }) + class IntrospectTestFunc(Function): + def __init__(self): + super().__init__("introspectTestFunc") + self._expected_parameter_count = 1 + + def value(self): + return self.get_children()[0].value() + 42 + + # First verify the function is registered + metadata = get_function_metadata("introspectTestFunc") + assert metadata is not None + assert metadata.name == "introspecttestfunc" + + # Use functions() with UNWIND to find the registered function + runner = Runner(""" + WITH functions() AS funcs + UNWIND funcs AS f + WITH f WHERE f.name = 'introspecttestfunc' + RETURN f.name AS name, f.description AS description, f.category AS category + """) + await runner.run() + + assert len(runner.results) == 1 + assert runner.results[0]["name"] == "introspecttestfunc" + assert runner.results[0]["description"] == "A unique test function for introspection" + assert runner.results[0]["category"] == "scalar" diff --git a/flowquery-py/tests/tokenization/__init__.py b/flowquery-py/tests/tokenization/__init__.py new file mode 100644 index 0000000..9183ce9 --- /dev/null +++ b/flowquery-py/tests/tokenization/__init__.py @@ -0,0 +1 @@ +"""Tokenization tests package.""" diff --git a/flowquery-py/tests/tokenization/test_token_mapper.py b/flowquery-py/tests/tokenization/test_token_mapper.py new file mode 100644 index 0000000..5887e3b --- /dev/null +++ b/flowquery-py/tests/tokenization/test_token_mapper.py @@ -0,0 +1,60 @@ +"""Tests for the TokenMapper class.""" + +import pytest +from flowquery.tokenization.token_mapper import TokenMapper +from flowquery.tokenization.symbol import Symbol +from flowquery.tokenization.keyword import Keyword +from flowquery.tokenization.operator import Operator + + +class TestTokenMapper: + """Test cases for the TokenMapper class.""" + + def test_mapper_with_symbols(self): + """Test mapper with Symbol enum.""" + mapper = TokenMapper(Symbol) + + assert mapper.map(Symbol.LEFT_PARENTHESIS.value) is not None + assert mapper.map(Symbol.RIGHT_PARENTHESIS.value) is not None + assert mapper.map(Symbol.COMMA.value) is not None + assert mapper.map(Symbol.DOT.value) is not None + assert mapper.map(Symbol.COLON.value) is not None + + # Operator should not be found in symbol mapper + assert mapper.map(Operator.ADD.value) is None + + def test_mapper_with_keywords(self): + """Test mapper with Keyword enum.""" + mapper = TokenMapper(Keyword) + + assert mapper.map(Keyword.MATCH.value) is not None + assert mapper.map(Keyword.RETURN.value) is not None + assert mapper.map(Keyword.WHERE.value) is not None + + assert mapper.map("not_a_keyword") is None + + def test_mapper_with_operators(self): + """Test mapper with Operator enum.""" + mapper = TokenMapper(Operator) + + assert mapper.map(Operator.GREATER_THAN_OR_EQUAL.value) is not None + assert mapper.map(Operator.ADD.value) is not None + assert mapper.map(Operator.SUBTRACT.value) is not None + assert mapper.map(Operator.NOT.value) is not None + assert mapper.map(Operator.EQUALS.value) is not None + assert mapper.map(Operator.NOT_EQUALS.value) is not None + assert mapper.map(Operator.LESS_THAN.value) is not None + assert mapper.map(Operator.LESS_THAN_OR_EQUAL.value) is not None + + # Partial match should still work + assert mapper.map(Operator.GREATER_THAN_OR_EQUAL.value + "1") is not None + + assert mapper.map("i_s_n_o_t_an_operator") is None + + def test_mapper_with_mixed_types(self): + """Test mapper with mixed types.""" + mapper = TokenMapper(Symbol) + + assert mapper.map(Symbol.LEFT_PARENTHESIS.value) is not None + assert mapper.map(Symbol.RIGHT_PARENTHESIS.value) is not None + assert mapper.map(Symbol.COMMA.value) is not None diff --git a/flowquery-py/tests/tokenization/test_tokenizer.py b/flowquery-py/tests/tokenization/test_tokenizer.py new file mode 100644 index 0000000..e49b5ac --- /dev/null +++ b/flowquery-py/tests/tokenization/test_tokenizer.py @@ -0,0 +1,164 @@ +"""Tests for the FlowQuery tokenizer.""" + +import pytest +from flowquery.tokenization.tokenizer import Tokenizer + + +class TestTokenizer: + """Test cases for the Tokenizer class.""" + + def test_tokenize_returns_array_of_tokens(self): + """Tokenizer.tokenize() should return an array of tokens.""" + tokenizer = Tokenizer("MATCH (n:Person) RETURN n") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_tokenize_handles_escaped_quotes(self): + """Tokenizer.tokenize() should handle escaped quotes.""" + tokenizer = Tokenizer('return "hello \\"world"') + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_predicate_function(self): + """Test predicate function tokenization.""" + tokenizer = Tokenizer("RETURN sum(n in [1, 2, 3] | n where n > 1)") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_f_string(self): + """Test f-string tokenization.""" + tokenizer = Tokenizer('RETURN f"hello {world}"') + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_another_f_string(self): + """Test another f-string tokenization.""" + tokenizer = Tokenizer("RETURN f`Value is: {value}`") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_basic(self): + """Test basic tokenization.""" + tokenizer = Tokenizer("WITH 1 AS n RETURN n") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_associative_array_with_backtick_string(self): + """Test associative array with backtick string.""" + tokenizer = Tokenizer("RETURN {`key`: `value`}") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_limit(self): + """Test limit keyword.""" + tokenizer = Tokenizer("unwind range(1, 10) as n limit 5 return n") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_return_negative_number(self): + """Test return with negative number.""" + tokenizer = Tokenizer("return [:-2], -2") + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_range_with_function(self): + """Test range with function.""" + tokenizer = Tokenizer(""" + with range(1,10) as data + return range(0, size(data)-1) as indices + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_create_virtual_node(self): + """Test CREATE VIRTUAL node tokenization.""" + tokenizer = Tokenizer(""" + CREATE VIRTUAL (:Person) AS { + call users() YIELD id, name + } + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_create_virtual_relationship(self): + """Test CREATE VIRTUAL relationship tokenization.""" + tokenizer = Tokenizer(""" + CREATE VIRTUAL (:Person)-[:KNOWS]->(:Person) AS { + call friendships() YIELD user1_id, user2_id + } + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_match_based_on_virtual_node(self): + """Test match based on virtual node.""" + tokenizer = Tokenizer(""" + MATCH (a:Person) + RETURN a.name + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_match_based_on_virtual_nodes_and_relationships(self): + """Test match based on virtual nodes and relationships.""" + tokenizer = Tokenizer(""" + MATCH (a:Person)-[r:KNOWS]->(b:Person) + RETURN a.name, b.name + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_not_equal_operator(self): + """Test not equal operator.""" + tokenizer = Tokenizer(""" + MATCH (n:Person) + WHERE n.age <> 30 + RETURN n.name AS name, n.age AS age + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_equal_operator(self): + """Test equal operator.""" + tokenizer = Tokenizer(""" + MATCH (n:Person) + WHERE n.age = 30 + RETURN n.name AS name, n.age AS age + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_boolean_operators(self): + """Test boolean operators.""" + tokenizer = Tokenizer(""" + return true AND false OR true NOT false + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 + + def test_relationship_with_hops(self): + """Test relationship with hops.""" + tokenizer = Tokenizer(""" + MATCH (a:Person)-[r:KNOWS*1..3]->(b:Person) + RETURN a.name, b.name + """) + tokens = tokenizer.tokenize() + assert tokens is not None + assert len(tokens) > 0 diff --git a/flowquery-py/tests/tokenization/test_trie.py b/flowquery-py/tests/tokenization/test_trie.py new file mode 100644 index 0000000..7677f3e --- /dev/null +++ b/flowquery-py/tests/tokenization/test_trie.py @@ -0,0 +1,30 @@ +"""Tests for the Trie data structure.""" + +import pytest +from flowquery.tokenization.trie import Trie +from flowquery.tokenization.token import Token +from flowquery.tokenization.keyword import Keyword + + +class TestTrie: + """Test cases for the Trie class.""" + + def test_trie_insert_and_find(self): + """Test Trie insert and find operations.""" + trie = Trie() + + # Insert all keywords + for keyword in Keyword: + token = Token.method(keyword.value) + if token is not None and token.value is not None: + trie.insert(token) + found = trie.find(keyword.value) + assert found is not None + + # Test for non-existent values + assert trie.find("not_a_keyword") is None + assert trie.find("not_an_operator") is None + assert trie.find("not_a_keyword_or_operator") is None + assert trie.find("") is None + assert trie.find(" ") is None + assert trie.find("a") is None diff --git a/src/compute/runner.ts b/src/compute/runner.ts index bfcb3ee..7b3f0d9 100644 --- a/src/compute/runner.ts +++ b/src/compute/runner.ts @@ -1,14 +1,15 @@ +import ASTNode from "../parsing/ast_node"; +import Function from "../parsing/functions/function"; +import { FunctionMetadata } from "../parsing/functions/function_metadata"; import Operation from "../parsing/operations/operation"; import Parser from "../parsing/parser"; -import { FunctionMetadata } from "../parsing/functions/function_metadata"; -import Function from "../parsing/functions/function"; /** * Executes a FlowQuery statement and retrieves the results. - * + * * The Runner class parses a FlowQuery statement into an AST and executes it, * managing the execution flow from the first operation to the final return statement. - * + * * @example * ```typescript * const runner = new Runner("WITH 1 as x RETURN x"); @@ -24,7 +25,11 @@ class Runner { * List all registered functions with their metadata. * Added dynamically in index.browser.ts / index.node.ts */ - static listFunctions: (options?: { category?: string; asyncOnly?: boolean; syncOnly?: boolean }) => FunctionMetadata[]; + static listFunctions: (options?: { + category?: string; + asyncOnly?: boolean; + syncOnly?: boolean; + }) => FunctionMetadata[]; /** * Get metadata for a specific function. @@ -37,32 +42,32 @@ class Runner { * Added dynamically in index.browser.ts / index.node.ts */ static Function: typeof Function; - + /** * Creates a new Runner instance and parses the FlowQuery statement. - * + * * @param statement - The FlowQuery statement to execute * @throws {Error} If the statement is null, empty, or contains syntax errors */ - constructor(statement: string | null = null) { - if(statement === null || statement === "") { - throw new Error("Statement cannot be null or empty"); + constructor(statement: string | null = null, ast: ASTNode | null = null) { + if ((statement === null || statement === "") && ast === null) { + throw new Error("Either statement or AST must be provided"); } - const parser = new Parser(); - const ast = parser.parse(statement); - this.first = ast.firstChild() as Operation; - this.last = ast.lastChild() as Operation; + const _ast = ast !== null ? ast : new Parser().parse(statement!); + this.first = _ast.firstChild() as Operation; + this.last = _ast.lastChild() as Operation; } - + /** * Executes the parsed FlowQuery statement. - * + * * @returns A promise that resolves when execution completes * @throws {Error} If an error occurs during execution */ public async run(): Promise { return new Promise(async (resolve, reject) => { try { + await this.first.initialize(); await this.first.run(); await this.first.finish(); resolve(); @@ -71,10 +76,10 @@ class Runner { } }); } - + /** * Gets the results from the executed statement. - * + * * @returns The results from the last operation (typically a RETURN statement) */ public get results(): any { @@ -82,4 +87,4 @@ class Runner { } } -export default Runner; \ No newline at end of file +export default Runner; diff --git a/src/graph/data.ts b/src/graph/data.ts new file mode 100644 index 0000000..1c4f47b --- /dev/null +++ b/src/graph/data.ts @@ -0,0 +1,112 @@ +class IndexEntry { + private _positions: number[]; + private _index: number = -1; + + constructor(positions: number[] = []) { + this._positions = positions; + } + public add(position: number): void { + this._positions.push(position); + } + public get position(): number { + return this._positions[this._index]; + } + public reset(): void { + this._index = -1; + } + public next(): boolean { + if (this._index < this._positions.length - 1) { + this._index++; + return true; + } + return false; + } + public clone(): IndexEntry { + return new IndexEntry([...this._positions]); + } +} + +class Layer { + private _index: Map = new Map(); + private _current: number = -1; + constructor(index: Map) { + this._index = index; + } + public get index(): Map { + return this._index; + } + public get current(): number { + return this._current; + } + public set current(value: number) { + this._current = value; + } +} + +class Data { + protected _records: Record[] = []; + private _layers: Map = new Map(); + + constructor(records: Record[] = []) { + this._records = records; + this._layers.set(0, new Layer(new Map())); + } + protected _buildIndex(key: string, level: number = 0): void { + this.layer(level).index.clear(); + this._records.forEach((record, idx) => { + if (record.hasOwnProperty(key)) { + if (!this.layer(level).index.has(record[key])) { + this.layer(level).index.set(record[key], new IndexEntry()); + } + this.layer(level).index.get(record[key])!.add(idx); + } + }); + } + public layer(level: number = 0): Layer { + if (!this._layers.has(level)) { + const first = this._layers.get(0)!; + const cloned = new Map(); + for (const [key, entry] of first.index) { + cloned.set(key, entry.clone()); + } + this._layers.set(level, new Layer(cloned)); + } + return this._layers.get(level)!; + } + protected _find(key: string, level: number = 0): boolean { + if (!this.layer(level).index.has(key)) { + this.layer(level).current = this._records.length; // Move to end + return false; + } else { + const entry = this.layer(level).index.get(key)!; + const more = entry.next(); + if (!more) { + this.layer(level).current = this._records.length; // Move to end + return false; + } + this.layer(level).current = entry.position; + return true; + } + } + public reset(): void { + this.layer(0).current = -1; + for (const entry of this.layer(0).index.values()) { + entry.reset(); + } + } + public next(level: number = 0): boolean { + if (this.layer(level).current < this._records.length - 1) { + this.layer(level).current++; + return true; + } + return false; + } + public current(level: number = 0): Record | null { + if (this.layer(level).current < this._records.length) { + return this._records[this.layer(level).current]; + } + return null; + } +} + +export default Data; diff --git a/src/graph/database.ts b/src/graph/database.ts new file mode 100644 index 0000000..16a36c5 --- /dev/null +++ b/src/graph/database.ts @@ -0,0 +1,63 @@ +import ASTNode from "../parsing/ast_node"; +import Node from "./node"; +import NodeData, { NodeRecord } from "./node_data"; +import PhysicalNode from "./physical_node"; +import PhysicalRelationship from "./physical_relationship"; +import Relationship from "./relationship"; +import RelationshipData, { RelationshipRecord } from "./relationship_data"; + +class Database { + private static instance: Database; + private static nodes: Map = new Map(); + private static relationships: Map = new Map(); + + public static getInstance(): Database { + if (!Database.instance) { + Database.instance = new Database(); + } + return Database.instance; + } + public addNode(node: Node, statement: ASTNode): void { + if (node.label === null) { + throw new Error("Node label is null"); + } + const physical = new PhysicalNode(null, node.label); + physical.statement = statement; + Database.nodes.set(node.label, physical); + } + public getNode(node: Node): PhysicalNode | null { + return Database.nodes.get(node.label!) || null; + } + public addRelationship(relationship: Relationship, statement: ASTNode): void { + if (relationship.type === null) { + throw new Error("Relationship type is null"); + } + const physical = new PhysicalRelationship(null, relationship.type); + physical.statement = statement; + Database.relationships.set(relationship.type, physical); + } + public getRelationship(relationship: Relationship): PhysicalRelationship | null { + return Database.relationships.get(relationship.type!) || null; + } + public async getData(element: Node | Relationship): Promise { + if (element instanceof Node) { + const node = this.getNode(element); + if (node === null) { + throw new Error(`Physical node not found for label ${element.label}`); + } + const data = await node.data(); + return new NodeData(data as NodeRecord[]); + } else if (element instanceof Relationship) { + const relationship = this.getRelationship(element); + if (relationship === null) { + throw new Error(`Physical relationship not found for type ${element.type}`); + } + const data = await relationship.data(); + return new RelationshipData(data as RelationshipRecord[]); + } else { + throw new Error("Element is neither Node nor Relationship"); + } + } +} + +export default Database; diff --git a/src/graph/hops.ts b/src/graph/hops.ts new file mode 100644 index 0000000..28501d0 --- /dev/null +++ b/src/graph/hops.ts @@ -0,0 +1,22 @@ +class Hops { + private _min: number = 0; + private _max: number = 1; + + public set min(min: number) { + this._min = min; + } + public get min(): number { + return this._min; + } + public set max(max: number) { + this._max = max; + } + public get max(): number { + return this._max; + } + public multi(): boolean { + return this._max > 1 || this._max === -1; + } +} + +export default Hops; diff --git a/src/graph/node.ts b/src/graph/node.ts new file mode 100644 index 0000000..6211bea --- /dev/null +++ b/src/graph/node.ts @@ -0,0 +1,99 @@ +import ASTNode from "../parsing/ast_node"; +import Expression from "../parsing/expressions/expression"; +import NodeData, { NodeRecord } from "./node_data"; +import Relationship from "./relationship"; + +class Node extends ASTNode { + protected _identifier: string | null = null; + protected _label: string | null = null; + protected _properties: Map = new Map(); + protected _value: NodeRecord | null = null; + + protected _incoming: Relationship | null = null; + protected _outgoing: Relationship | null = null; + + private _data: NodeData | null = null; + + // Function to be called after each 'next' and 'find' operation + // It is used to chain operations in a traversal + // For example, after matching on a graph pattern, we may want to + // continue to the next node or relationship in the pattern, or + // perform the next operation in a statement. + private _todoNext: (() => Promise) | null = null; + + constructor(identifier: string | null = null, label: string | null = null) { + super(); + this._identifier = identifier; + this._label = label; + } + public set identifier(identifier: string) { + this._identifier = identifier; + } + public get identifier(): string | null { + return this._identifier; + } + public set label(label: string) { + this._label = label; + } + public get label(): string | null { + return this._label; + } + public get properties(): Map { + return this._properties; + } + public setProperty(key: string, value: Expression): void { + this._properties.set(key, value); + } + public getProperty(key: string): Expression | null { + return this._properties.get(key) || null; + } + public setValue(value: NodeRecord): void { + this._value = value; + } + public value(): NodeRecord | null { + return this._value; + } + public set outgoing(relationship: Relationship | null) { + this._outgoing = relationship; + } + public get outgoing(): Relationship | null { + return this._outgoing; + } + public set incoming(relationship: Relationship | null) { + this._incoming = relationship; + } + public get incoming(): Relationship | null { + return this._incoming; + } + public setData(data: NodeData | null): void { + this._data = data; + } + public async next(): Promise { + this._data?.reset(); + while (this._data?.next()) { + this.setValue(this._data?.current()!); + await this._outgoing?.find(this._value!.id); + await this.runTodoNext(); + } + } + public async find(id: string, hop: number = 0): Promise { + this._data?.reset(); + while (this._data?.find(id, hop)) { + this.setValue(this._data?.current(hop) as NodeRecord); + this._incoming?.setEndNode(this); + await this._outgoing?.find(this._value!.id, hop); + await this.runTodoNext(); + } + } + // For setting a function to be called after each 'next' and 'find' operation + public set todoNext(func: (() => Promise) | null) { + this._todoNext = func; + } + public async runTodoNext(): Promise { + if (this._todoNext) { + await this._todoNext(); + } + } +} + +export default Node; diff --git a/src/graph/node_data.ts b/src/graph/node_data.ts new file mode 100644 index 0000000..6b3ec9f --- /dev/null +++ b/src/graph/node_data.ts @@ -0,0 +1,18 @@ +import Data from "./data"; + +export type NodeRecord = { id: string } & Record; + +class NodeData extends Data { + constructor(records: NodeRecord[] = []) { + super(records); + super._buildIndex("id"); + } + public find(id: string, hop: number = 0): boolean { + return super._find(id, hop); + } + public current(hop: number = 0): NodeRecord | null { + return super.current(hop) as NodeRecord | null; + } +} + +export default NodeData; diff --git a/src/graph/node_reference.ts b/src/graph/node_reference.ts new file mode 100644 index 0000000..d390f26 --- /dev/null +++ b/src/graph/node_reference.ts @@ -0,0 +1,33 @@ +import Node from "./node"; + +class NodeReference extends Node { + private _reference: Node | null = null; + constructor(base: Node, reference: Node) { + super(); + this._identifier = base.identifier; + this._label = base.label; + this._properties = base.properties; + this._outgoing = base.outgoing; + this._incoming = base.incoming; + this._reference = reference; + } + public get reference(): Node | null { + return this._reference; + } + public async next(): Promise { + this.setValue(this._reference!.value()!); + await this._outgoing?.find(this._value!.id); + await this.runTodoNext(); + } + public async find(id: string, hop: number = 0): Promise { + const referenced = this._reference?.value(); + if (id !== referenced?.id) { + return; + } + this.setValue(referenced!); + await this._outgoing?.find(this._value!.id, hop); + await this.runTodoNext(); + } +} + +export default NodeReference; diff --git a/src/graph/pattern.ts b/src/graph/pattern.ts new file mode 100644 index 0000000..3948058 --- /dev/null +++ b/src/graph/pattern.ts @@ -0,0 +1,101 @@ +import ASTNode from "../parsing/ast_node"; +import Database from "./database"; +import Node from "./node"; +import NodeData from "./node_data"; +import NodeReference from "./node_reference"; +import Relationship from "./relationship"; +import RelationshipData from "./relationship_data"; +import RelationshipReference from "./relationship_reference"; + +class Pattern extends ASTNode { + private _identifier: string | null = null; + protected _chain: (Node | Relationship)[] = []; + public set identifier(id: string | null) { + this._identifier = id; + } + public get identifier(): string | null { + return this._identifier; + } + public addElement(element: Relationship | Node): void { + if ( + this._chain.length > 0 && + this._chain[this._chain.length - 1].constructor === element.constructor + ) { + throw new Error( + "Cannot add two consecutive elements of the same type to the graph pattern" + ); + } + if (this._chain.length > 0) { + const last = this._chain[this._chain.length - 1]; + if (last instanceof Node && element instanceof Relationship) { + last.outgoing = element as Relationship; + element.source = last as Node; + } + if (last instanceof Relationship && element instanceof Node) { + last.target = element as Node; + element.incoming = last as Relationship; + } + } + this._chain.push(element); + } + public get chain(): (Node | Relationship)[] { + return this._chain; + } + public get startNode(): Node { + if (this._chain.length === 0) { + throw new Error("Pattern is empty"); + } + const first = this._chain[0]; + if (first instanceof Node) { + return first; + } + throw new Error("Pattern does not start with a node"); + } + public get endNode(): Node { + if (this._chain.length === 0) { + throw new Error("Pattern is empty"); + } + const last = this._chain[this._chain.length - 1]; + if (last instanceof Node) { + return last; + } + throw new Error("Pattern does not end with a node"); + } + public value(): any { + return Array.from(this.values()); + } + public *values(): Generator { + for (const element of this._chain) { + if (element instanceof Node) { + yield element.value(); + } else if (element instanceof Relationship) { + let i = 0; + for (const match of element.matches) { + yield match; + if (i < element.matches.length - 1) { + yield match.endNode; + } + i++; + } + } + } + } + public async fetchData(): Promise { + const db: Database = Database.getInstance(); + for (const element of this._chain) { + if ( + element.constructor === NodeReference || + element.constructor === RelationshipReference + ) { + continue; + } + const data = await db.getData(element); + if (element.constructor === Node) { + element.setData(data as NodeData); + } else if (element.constructor === Relationship) { + element.setData(data as RelationshipData); + } + } + } +} +export default Pattern; diff --git a/src/graph/pattern_expression.ts b/src/graph/pattern_expression.ts new file mode 100644 index 0000000..65f414e --- /dev/null +++ b/src/graph/pattern_expression.ts @@ -0,0 +1,37 @@ +import Node from "./node"; +import NodeReference from "./node_reference"; +import Pattern from "./pattern"; +import Relationship from "./relationship"; + +class PatternExpression extends Pattern { + private _fetched: boolean = false; + private _evaluation: boolean = false; + public set identifier(id: string | null) { + throw new Error("Cannot set identifier on PatternExpression"); + } + public addElement(element: Relationship | Node): void { + if (this._chain.length == 0 && !(element instanceof NodeReference)) { + throw new Error("PatternExpression must start with a NodeReference"); + } + super.addElement(element); + } + public async evaluate(): Promise { + this._evaluation = false; + this.endNode.todoNext = async () => { + this._evaluation = true; + }; + await this.startNode.next(); + } + public value(): boolean { + return this._evaluation; + } + public async fetchData(): Promise { + if (this._fetched) { + return; + } + await super.fetchData(); + this._fetched = true; + } +} + +export default PatternExpression; diff --git a/src/graph/patterns.ts b/src/graph/patterns.ts new file mode 100644 index 0000000..a78a58d --- /dev/null +++ b/src/graph/patterns.ts @@ -0,0 +1,36 @@ +import Pattern from "./pattern"; + +class Patterns { + private _patterns: Pattern[] = []; + constructor(patterns: Pattern[] = []) { + this._patterns = patterns; + } + public get patterns(): Pattern[] { + return this._patterns; + } + public async initialize(): Promise { + let previous: Pattern | null = null; + for (const pattern of this._patterns) { + await pattern.fetchData(); // Ensure data is loaded + if (previous !== null) { + // Chain the patterns together + previous.endNode.todoNext = async () => { + await pattern.startNode.next(); + }; + } + previous = pattern; + } + } + public set toDoNext(func: () => Promise) { + if (this._patterns.length > 0) { + this._patterns[this._patterns.length - 1].endNode.todoNext = func; + } + } + public async traverse(): Promise { + if (this._patterns.length > 0) { + await this._patterns[0].startNode.next(); + } + } +} + +export default Patterns; diff --git a/src/graph/physical_node.ts b/src/graph/physical_node.ts new file mode 100644 index 0000000..f99bb35 --- /dev/null +++ b/src/graph/physical_node.ts @@ -0,0 +1,23 @@ +import Runner from "../compute/runner"; +import ASTNode from "../parsing/ast_node"; +import Node from "./node"; + +class PhysicalNode extends Node { + private _statement: ASTNode | null = null; + public set statement(statement: ASTNode | null) { + this._statement = statement; + } + public get statement(): ASTNode | null { + return this._statement; + } + public async data(): Promise[]> { + if (this._statement === null) { + throw new Error("Statement is null"); + } + const runner = new Runner(null, this._statement); + await runner.run(); + return runner.results; + } +} + +export default PhysicalNode; diff --git a/src/graph/physical_relationship.ts b/src/graph/physical_relationship.ts new file mode 100644 index 0000000..6ccc1cf --- /dev/null +++ b/src/graph/physical_relationship.ts @@ -0,0 +1,23 @@ +import Runner from "../compute/runner"; +import ASTNode from "../parsing/ast_node"; +import Relationship from "./relationship"; + +class PhysicalRelationship extends Relationship { + private _statement: ASTNode | null = null; + public set statement(statement: ASTNode | null) { + this._statement = statement; + } + public get statement(): ASTNode | null { + return this._statement; + } + public async data(): Promise[]> { + if (this._statement === null) { + throw new Error("Statement is null"); + } + const runner = new Runner(null, this._statement); + await runner.run(); + return runner.results; + } +} + +export default PhysicalRelationship; diff --git a/src/graph/relationship.ts b/src/graph/relationship.ts new file mode 100644 index 0000000..ac1810e --- /dev/null +++ b/src/graph/relationship.ts @@ -0,0 +1,116 @@ +import ASTNode from "../parsing/ast_node"; +import Expression from "../parsing/expressions/expression"; +import Hops from "./hops"; +import Node from "./node"; +import RelationshipData, { RelationshipRecord } from "./relationship_data"; +import RelationshipMatchCollector, { + RelationshipMatchRecord, +} from "./relationship_match_collector"; + +class Relationship extends ASTNode { + protected _identifier: string | null = null; + protected _type: string | null = null; + protected _properties: Map = new Map(); + protected _hops: Hops = new Hops(); + + protected _value: RelationshipMatchRecord | RelationshipMatchRecord[] | null = null; + protected _matches: RelationshipMatchCollector = new RelationshipMatchCollector(); + + protected _source: Node | null = null; + protected _target: Node | null = null; + + private _data: RelationshipData | null = null; + + constructor(identifier: string | null = null, type: string | null = null) { + super(); + this._identifier = identifier; + this._type = type; + } + public set identifier(identifier: string) { + this._identifier = identifier; + } + public get identifier(): string | null { + return this._identifier; + } + public set type(type: string) { + this._type = type; + } + public get type(): string | null { + return this._type; + } + public get properties(): Record { + return this._data?.properties() || {}; + } + public setProperty(key: string, value: Expression): void { + this._properties.set(key, value); + } + public getProperty(key: string): Expression | null { + return this._properties.get(key) || null; + } + public set hops(hops: Hops) { + this._hops = hops; + } + public get hops(): Hops | null { + return this._hops; + } + public setValue(relationship: Relationship): void { + const match: RelationshipMatchRecord = this._matches.push(relationship); + this._value = this._matches.value(); + } + public set source(node: Node | null) { + this._source = node; + } + public get source(): Node | null { + return this._source; + } + public set target(node: Node | null) { + this._target = node; + } + public get target(): Node | null { + return this._target; + } + public value(): RelationshipMatchRecord | RelationshipMatchRecord[] | null { + return this._value; + } + public get matches(): RelationshipMatchRecord[] { + return this._matches.matches; + } + public setData(data: RelationshipData | null): void { + this._data = data; + } + public getData(): RelationshipData | null { + return this._data; + } + public setEndNode(node: Node): void { + this._matches.endNode = node; + } + public async find(left_id: string, hop: number = 0): Promise { + // Save original source node + const original = this._source; + if (hop > 0) { + // For hops greater than 0, the source becomes the target of the previous hop + this._source = this._target; + } + if (hop === 0) { + this._data?.reset(); + } + while (this._data?.find(left_id, hop)) { + const data: RelationshipRecord = this._data?.current(hop) as RelationshipRecord; + if (hop >= this.hops!.min) { + this.setValue(this); + await this._target?.find(data.right_id, hop); + if (this._matches.isCircular()) { + throw new Error("Circular relationship detected"); + } + if (hop + 1 < this.hops!.max) { + await this.find(data.right_id, hop + 1); + } + this._matches.pop(); + } + } + // Restore original source node + this._source = original; + } +} + +export default Relationship; diff --git a/src/graph/relationship_data.ts b/src/graph/relationship_data.ts new file mode 100644 index 0000000..216e258 --- /dev/null +++ b/src/graph/relationship_data.ts @@ -0,0 +1,27 @@ +import Data from "./data"; + +export type RelationshipRecord = { left_id: string; right_id: string } & Record; + +class RelationshipData extends Data { + constructor(records: RelationshipRecord[] = []) { + super(records); + super._buildIndex("left_id"); + } + public find(left_id: string, hop: number = 0): boolean { + return super._find(left_id, hop); + } + /* + ** Get the properties of the current relationship record + '' excluding the left_id and right_id fields + */ + public properties(): Record | null { + const current = this.current(); + if (current) { + const { left_id, right_id, ...props } = current; + return props; + } + return null; + } +} + +export default RelationshipData; diff --git a/src/graph/relationship_match_collector.ts b/src/graph/relationship_match_collector.ts new file mode 100644 index 0000000..da98935 --- /dev/null +++ b/src/graph/relationship_match_collector.ts @@ -0,0 +1,58 @@ +import Relationship from "./relationship"; + +export type RelationshipMatchRecord = { + type: string; + startNode: Record; + endNode: Record | null; + properties: Record; +}; + +class RelationshipMatchCollector { + private _matches: RelationshipMatchRecord[] = []; + private _nodeIds: Array = []; + + public push(relationship: Relationship): RelationshipMatchRecord { + const match: RelationshipMatchRecord = { + type: relationship.type!, + startNode: relationship.source?.value() || {}, + endNode: null, + properties: relationship.properties, + }; + this._matches.push(match); + this._nodeIds.push(match.startNode.id); + return match; + } + public set endNode(node: any) { + if (this._matches.length > 0) { + this._matches[this._matches.length - 1].endNode = node.value(); + } + } + public pop(): RelationshipMatchRecord | undefined { + this._nodeIds.pop(); + return this._matches.pop(); + } + public value(): RelationshipMatchRecord | RelationshipMatchRecord[] | null { + if (this._matches.length === 0) { + return null; + } else if (this._matches.length === 1) { + const _match = this._matches[0]; + return _match; + } else { + const _matches = this._matches; + return _matches; + } + } + public get matches(): RelationshipMatchRecord[] { + return this._matches; + } + /* + ** Checks if the collected relationships form a circular pattern + ** meaning the same node id occur more than 2 times in the collected matches + */ + public isCircular(): boolean { + const seen = new Set(this._nodeIds); + return seen.size < this._nodeIds.length; + } +} + +export default RelationshipMatchCollector; diff --git a/src/graph/relationship_reference.ts b/src/graph/relationship_reference.ts new file mode 100644 index 0000000..c0c39e8 --- /dev/null +++ b/src/graph/relationship_reference.ts @@ -0,0 +1,24 @@ +import Relationship from "./relationship"; +import { RelationshipRecord } from "./relationship_data"; + +class RelationshipReference extends Relationship { + private _reference: Relationship | null = null; + constructor(base: Relationship, reference: Relationship) { + super(); + this._identifier = base.identifier; + this._type = base.type; + this._hops = base.hops!; + this._source = base.source; + this._target = base.target; + this._reference = reference; + } + public async find(left_id: string, hop: number = 0): Promise { + this.setValue(this._reference!); + const data: RelationshipRecord = this._reference!.getData()?.current( + hop + ) as RelationshipRecord; + await this._target?.find(data.right_id, hop); + } +} + +export default RelationshipReference; diff --git a/src/parsing/base_parser.ts b/src/parsing/base_parser.ts index 8cf62e8..0d6e8e2 100644 --- a/src/parsing/base_parser.ts +++ b/src/parsing/base_parser.ts @@ -3,7 +3,7 @@ import Tokenizer from "../tokenization/tokenizer"; /** * Base class for parsers providing common token manipulation functionality. - * + * * This class handles tokenization and provides utility methods for navigating * through tokens, peeking ahead, and checking token sequences. */ @@ -11,9 +11,15 @@ class BaseParser { private tokens: Token[] = []; private tokenIndex: number = 0; + constructor(tokens: Token[] | null = null) { + if (tokens !== null) { + this.tokens = tokens; + } + } + /** * Tokenizes a statement and initializes the token array. - * + * * @param statement - The input statement to tokenize */ protected tokenize(statement: string): void { @@ -30,11 +36,11 @@ class BaseParser { /** * Peeks at the next token without advancing the current position. - * + * * @returns The next token, or null if at the end of the token stream */ protected peek(): Token | null { - if(this.tokenIndex + 1 >= this.tokens.length) { + if (this.tokenIndex + 1 >= this.tokens.length) { return null; } return this.tokens[this.tokenIndex + 1]; @@ -42,22 +48,22 @@ class BaseParser { /** * Checks if a sequence of tokens appears ahead in the token stream. - * + * * @param tokens - The sequence of tokens to look for * @param skipWhitespaceAndComments - Whether to skip whitespace and comments when matching * @returns True if the token sequence is found ahead, false otherwise */ protected ahead(tokens: Token[], skipWhitespaceAndComments: boolean = true): boolean { let j = 0; - for(let i=this.tokenIndex; i= this.tokens.length) { + if (this.tokenIndex >= this.tokens.length) { return Token.EOF; } return this.tokens[this.tokenIndex]; @@ -78,15 +84,15 @@ class BaseParser { /** * Gets the previous token. - * + * * @returns The previous token, or EOF if at the beginning */ protected get previousToken(): Token { - if(this.tokenIndex - 1 < 0) { + if (this.tokenIndex - 1 < 0) { return Token.EOF; } return this.tokens[this.tokenIndex - 1]; } } -export default BaseParser; \ No newline at end of file +export default BaseParser; diff --git a/src/parsing/context.ts b/src/parsing/context.ts index b7c34f0..c5e36ae 100644 --- a/src/parsing/context.ts +++ b/src/parsing/context.ts @@ -2,10 +2,10 @@ import ASTNode from "./ast_node"; /** * Maintains a stack of AST nodes to track parsing context. - * + * * Used during parsing to maintain the current context and check for specific node types * in the parsing hierarchy, which helps with context-sensitive parsing decisions. - * + * * @example * ```typescript * const context = new Context(); @@ -14,35 +14,35 @@ import ASTNode from "./ast_node"; * ``` */ class Context { - private stack: ASTNode[] = []; - + private nodes: ASTNode[] = []; + /** * Pushes a node onto the context stack. - * + * * @param node - The AST node to push */ public push(node: ASTNode): void { - this.stack.push(node); + this.nodes.push(node); } - + /** * Pops the top node from the context stack. - * + * * @returns The popped node, or undefined if the stack is empty */ public pop(): ASTNode | undefined { - return this.stack.pop(); + return this.nodes.pop(); } - + /** - * Checks if the stack contains a node of the specified type. - * + * Checks if the nodes stack contains a node of the specified type. + * * @param type - The constructor of the node type to search for * @returns True if a node of the specified type is found in the stack, false otherwise */ public containsType(type: new (...args: any[]) => ASTNode): boolean { - return this.stack.some((v) => v instanceof type); + return this.nodes.some((v) => v instanceof type); } } -export default Context; \ No newline at end of file +export default Context; diff --git a/src/parsing/expressions/boolean.ts b/src/parsing/expressions/boolean.ts new file mode 100644 index 0000000..f807069 --- /dev/null +++ b/src/parsing/expressions/boolean.ts @@ -0,0 +1,21 @@ +import ASTNode from "../ast_node"; + +class Boolean extends ASTNode { + private _value: boolean; + constructor(value: string) { + super(); + const _value = value.toUpperCase(); + if (_value === "TRUE") { + this._value = true; + } else if (_value === "FALSE") { + this._value = false; + } else { + throw new Error(`Invalid boolean value: ${value}`); + } + } + public value(): boolean { + return this._value; + } +} + +export default Boolean; diff --git a/src/parsing/expressions/expression.ts b/src/parsing/expressions/expression.ts index 167b52e..a7908cd 100644 --- a/src/parsing/expressions/expression.ts +++ b/src/parsing/expressions/expression.ts @@ -1,14 +1,15 @@ +import PatternExpression from "../../graph/pattern_expression"; import ASTNode from "../ast_node"; import AggregateFunction from "../functions/aggregate_function"; import Reference from "./reference"; /** * Represents an expression in the FlowQuery AST. - * + * * Expressions are built using the Shunting Yard algorithm to handle operator * precedence and associativity. They can contain operands (numbers, strings, identifiers) * and operators (arithmetic, logical, comparison). - * + * * @example * ```typescript * const expr = new Expression(); @@ -24,23 +25,24 @@ class Expression extends ASTNode { private _alias: string | null = null; private _overridden: any | null = null; private _reducers: AggregateFunction[] | null = null; + private _patterns: PatternExpression[] | null = null; /** * Adds a node (operand or operator) to the expression. - * + * * Uses the Shunting Yard algorithm to maintain correct operator precedence. - * + * * @param node - The AST node to add (operand or operator) */ public addNode(node: ASTNode): void { /* Implements the Shunting Yard algorithm */ - if(node.isOperand()) { + if (node.isOperand()) { this.output.push(node); - } else if(node.isOperator()) { + } else if (node.isOperator()) { const operator1: ASTNode = node; - while(this.operators.length > 0) { + while (this.operators.length > 0) { let operator2 = this.operators[this.operators.length - 1]; - if( + if ( operator2.precedence > operator1.precedence || (operator2.precedence === operator1.precedence && operator1.leftAssociative) ) { @@ -56,25 +58,25 @@ class Expression extends ASTNode { /** * Finalizes the expression by converting it to a tree structure. - * + * * Should be called after all nodes have been added. */ public finish(): void { let last: ASTNode | undefined; - while(last = this.operators.pop()) { + while ((last = this.operators.pop())) { this.output.push(last); - }; + } this.addChild(this.toTree()); } private toTree(): ASTNode { const node = this.output.pop() || new ASTNode(); - if(node.isOperator()) { + if (node.isOperator()) { const rhs = this.toTree(); const lhs = this.toTree(); node.addChild(lhs); node.addChild(rhs); - } + } return node; } @@ -83,11 +85,11 @@ class Expression extends ASTNode { } public value(): any { - if(this._overridden !== null) { + if (this._overridden !== null) { return this._overridden; } - if(this.childCount() !== 1) { - throw new Error('Expected one child'); + if (this.childCount() !== 1) { + throw new Error("Expected one child"); } return this.children[0].value(); } @@ -101,31 +103,37 @@ class Expression extends ASTNode { } public get alias(): string | null { - if(this.firstChild() instanceof Reference && this._alias === null) { + if (this.firstChild() instanceof Reference && this._alias === null) { return (this.firstChild()).identifier; } return this._alias; } public toString(): string { - if(this._alias !== null) { + if (this._alias !== null) { return `Expression (${this._alias})`; } else { - return 'Expression'; + return "Expression"; } } public reducers(): AggregateFunction[] { - if(this._reducers === null) { - this._reducers = [...this._extract_reducers()]; + if (this._reducers === null) { + this._reducers = [...this._extract(this, AggregateFunction)]; } return this._reducers; } - private *_extract_reducers(node: ASTNode = this): Generator { - if(node instanceof AggregateFunction) { + public patterns(): PatternExpression[] { + if (this._patterns === null) { + this._patterns = [...this._extract(this, PatternExpression)]; + } + return this._patterns; + } + private *_extract(node: ASTNode = this, of_type: any): Generator { + if (node instanceof of_type) { yield node; } - for(const child of node.getChildren()) { - yield* this._extract_reducers(child); + for (const child of node.getChildren()) { + yield* this._extract(child, of_type); } } public mappable(): boolean { @@ -139,4 +147,4 @@ class Expression extends ASTNode { } } -export default Expression; \ No newline at end of file +export default Expression; diff --git a/src/parsing/expressions/operator.ts b/src/parsing/expressions/operator.ts index 2d5eefd..59cd5cd 100644 --- a/src/parsing/expressions/operator.ts +++ b/src/parsing/expressions/operator.ts @@ -176,4 +176,22 @@ class Is extends Operator { } } -export { Operator, Add, Subtract, Multiply, Divide, Modulo, Power, Equals, NotEquals, GreaterThan, LessThan, GreaterThanOrEqual, LessThanOrEqual, And, Or, Not, Is }; \ No newline at end of file +export { + Operator, + Add, + Subtract, + Multiply, + Divide, + Modulo, + Power, + Equals, + NotEquals, + GreaterThan, + LessThan, + GreaterThanOrEqual, + LessThanOrEqual, + And, + Or, + Not, + Is, +}; diff --git a/src/parsing/functions/function_factory.ts b/src/parsing/functions/function_factory.ts index e3d0bdc..1768a72 100644 --- a/src/parsing/functions/function_factory.ts +++ b/src/parsing/functions/function_factory.ts @@ -1,42 +1,41 @@ +import AsyncFunction from "./async_function"; +import "./avg"; +import "./collect"; import Function from "./function"; +import { + AsyncDataProvider, + FunctionMetadata, + getFunctionMetadata, + getRegisteredFunctionFactory, + getRegisteredFunctionMetadata, +} from "./function_metadata"; +import "./functions"; +import "./join"; +import "./keys"; import PredicateFunction from "./predicate_function"; -// Import built-in functions to ensure their @FunctionDef decorators run -import "./sum"; -import "./collect"; -import "./avg"; -import "./range"; +import "./predicate_sum"; import "./rand"; +import "./range"; +import "./replace"; import "./round"; +import "./size"; import "./split"; -import "./join"; -import "./keys"; -import "./to_json"; -import "./replace"; import "./stringify"; -import "./size"; -import "./functions"; -import "./predicate_sum"; +// Import built-in functions to ensure their @FunctionDef decorators run +import "./sum"; +import "./to_json"; import "./type"; -import { - FunctionMetadata, - getRegisteredFunctionMetadata, - getFunctionMetadata, - getRegisteredFunctionFactory, - AsyncDataProvider -} from "./function_metadata"; -import AsyncFunction from "./async_function"; -import { get } from "node:http"; // Re-export AsyncDataProvider for backwards compatibility export { AsyncDataProvider }; /** * Factory for creating function instances by name. - * + * * All functions are registered via the @FunctionDef decorator. * Maps function names (case-insensitive) to their corresponding implementation classes. * Supports built-in functions like sum, avg, collect, range, split, join, etc. - * + * * @example * ```typescript * const sumFunc = FunctionFactory.create("sum"); @@ -46,7 +45,7 @@ export { AsyncDataProvider }; class FunctionFactory { /** * Gets an async data provider by name. - * + * * @param name - The function name (case-insensitive) * @returns The async data provider, or undefined if not found */ @@ -56,7 +55,7 @@ class FunctionFactory { /** * Checks if a function name is registered as an async data provider. - * + * * @param name - The function name (case-insensitive) * @returns True if the function is an async data provider */ @@ -66,7 +65,7 @@ class FunctionFactory { /** * Gets metadata for a specific function. - * + * * @param name - The function name (case-insensitive) * @returns The function metadata, or undefined if not found */ @@ -76,56 +75,58 @@ class FunctionFactory { /** * Lists all registered functions with their metadata. - * + * * @param options - Optional filter options * @returns Array of function metadata */ - public static listFunctions(options?: { - category?: string; + public static listFunctions(options?: { + category?: string; asyncOnly?: boolean; syncOnly?: boolean; }): FunctionMetadata[] { const result: FunctionMetadata[] = []; - + for (const meta of getRegisteredFunctionMetadata()) { if (options?.category && meta.category !== options.category) continue; - if (options?.asyncOnly && meta.category !== 'async') continue; - if (options?.syncOnly && meta.category === 'async') continue; + if (options?.asyncOnly && meta.category !== "async") continue; + if (options?.syncOnly && meta.category === "async") continue; result.push(meta); } - + return result; } /** * Lists all registered function names. - * + * * @returns Array of function names */ public static listFunctionNames(): string[] { - return getRegisteredFunctionMetadata().map(m => m.name); + return getRegisteredFunctionMetadata().map((m) => m.name); } /** * Gets all function metadata as a JSON-serializable object for LLM consumption. - * + * * @returns Object with functions grouped by category */ public static toJSON(): { functions: FunctionMetadata[]; categories: string[] } { const functions = FunctionFactory.listFunctions(); - const categories = [...new Set(functions.map(f => f.category).filter(Boolean))] as string[]; + const categories = [ + ...new Set(functions.map((f) => f.category).filter(Boolean)), + ] as string[]; return { functions, categories }; } /** * Creates a function instance by name. - * + * * @param name - The function name (case-insensitive) * @returns A Function instance of the appropriate type */ public static create(name: string): Function { const lowerName: string = name.toLowerCase(); - + // Check decorator-registered functions (built-ins use @FunctionDef) const decoratorFactory = getRegisteredFunctionFactory(lowerName); if (decoratorFactory) { @@ -138,15 +139,15 @@ class FunctionFactory { /** * Creates a predicate function instance by name. * Predicate functions are used in WHERE clauses with quantifiers (e.g., ANY, ALL). - * + * * @param name - The function name (case-insensitive) * @returns A PredicateFunction instance of the appropriate type */ public static createPredicate(name: string): PredicateFunction { const lowerName: string = name.toLowerCase(); - + // Check decorator-registered predicate functions - const decoratorFactory = getRegisteredFunctionFactory(lowerName, 'predicate'); + const decoratorFactory = getRegisteredFunctionFactory(lowerName, "predicate"); if (decoratorFactory) { return decoratorFactory(); } @@ -156,13 +157,12 @@ class FunctionFactory { public static createAsync(name: string): AsyncFunction { const lowerName: string = name.toLowerCase(); - const decoratorFactory = getRegisteredFunctionFactory(lowerName, 'async'); + const decoratorFactory = getRegisteredFunctionFactory(lowerName, "async"); if (decoratorFactory) { return decoratorFactory() as AsyncFunction; } throw new Error(`Unknown async function: ${name}`); } - } -export default FunctionFactory; \ No newline at end of file +export default FunctionFactory; diff --git a/src/parsing/operations/create_node.ts b/src/parsing/operations/create_node.ts new file mode 100644 index 0000000..676f3dd --- /dev/null +++ b/src/parsing/operations/create_node.ts @@ -0,0 +1,39 @@ +import Database from "../../graph/database"; +import Node from "../../graph/node"; +import ASTNode from "../ast_node"; +import Operation from "./operation"; + +class CreateNode extends Operation { + private _node: Node | null = null; + private _statement: ASTNode | null = null; + constructor(node: Node, statement: ASTNode) { + super(); + this._node = node; + this._statement = statement; + } + public get node(): Node | null { + return this._node; + } + public get statement(): ASTNode | null { + return this._statement; + } + public run(): Promise { + return new Promise(async (resolve, reject) => { + try { + if (this._node === null) { + throw new Error("Node is null"); + } + const db: Database = Database.getInstance(); + db.addNode(this._node, this._statement!); + resolve(); + } catch (error) { + reject(error); + } + }); + } + public get results(): Record[] { + return []; + } +} + +export default CreateNode; diff --git a/src/parsing/operations/create_relationship.ts b/src/parsing/operations/create_relationship.ts new file mode 100644 index 0000000..4cfd1b1 --- /dev/null +++ b/src/parsing/operations/create_relationship.ts @@ -0,0 +1,38 @@ +import Database from "../../graph/database"; +import Relationship from "../../graph/relationship"; +import ASTNode from "../ast_node"; +import Operation from "./operation"; + +class CreateRelationship extends Operation { + private _relationship: Relationship | null = null; + private _statement: ASTNode | null = null; + constructor(relationship: Relationship, statement: ASTNode) { + super(); + this._relationship = relationship; + this._statement = statement; + } + public get relationship(): Relationship | null { + return this._relationship; + } + public get statement(): ASTNode | null { + return this._statement; + } + public run(): Promise { + return new Promise(async (resolve, reject) => { + try { + if (this._relationship === null) { + throw new Error("Relationship is null"); + } + const db = Database.getInstance(); + db.addRelationship(this._relationship, this._statement!); + resolve(); + } catch (error) { + reject(error); + } + }); + } + public get results(): Record[] { + return []; + } +} +export default CreateRelationship; diff --git a/src/parsing/operations/match.ts b/src/parsing/operations/match.ts new file mode 100644 index 0000000..280adb2 --- /dev/null +++ b/src/parsing/operations/match.ts @@ -0,0 +1,31 @@ +import Pattern from "../../graph/pattern"; +import Patterns from "../../graph/patterns"; +import Operation from "./operation"; + +class Match extends Operation { + private _patterns: Patterns | null = null; + + constructor(patterns: Pattern[] = []) { + super(); + this._patterns = new Patterns(patterns); + } + public get patterns(): Pattern[] { + return this._patterns ? this._patterns.patterns : []; + } + /** + * Executes the match operation by chaining the patterns together. + * After each pattern match, it continues to the next operation in the chain. + * @return Promise + */ + public async run(): Promise { + await this._patterns!.initialize(); + this._patterns!.toDoNext = async () => { + // Continue to the next operation after all patterns are matched + await this.next?.run(); + }; + // Kick off the graph pattern traversal + await this._patterns!.traverse(); + } +} + +export default Match; diff --git a/src/parsing/operations/operation.ts b/src/parsing/operations/operation.ts index b4e2b7c..04a132c 100644 --- a/src/parsing/operations/operation.ts +++ b/src/parsing/operations/operation.ts @@ -57,6 +57,9 @@ abstract class Operation extends ASTNode { public async finish(): Promise { await this.next?.finish(); } + public async initialize(): Promise { + await this.next?.initialize(); + } public reset(): void {} public get results(): Record[] { throw new Error("Not implemented"); diff --git a/src/parsing/operations/return.ts b/src/parsing/operations/return.ts index 3c1370b..9539260 100644 --- a/src/parsing/operations/return.ts +++ b/src/parsing/operations/return.ts @@ -3,10 +3,10 @@ import Where from "./where"; /** * Represents a RETURN operation that produces the final query results. - * + * * The RETURN operation evaluates expressions and collects them into result records. * It can optionally have a WHERE clause to filter results. - * + * * @example * ```typescript * // RETURN x, y WHERE x > 0 @@ -19,25 +19,29 @@ class Return extends Projection { this._where = where; } public get where(): boolean { - if(this._where === null) { + if (this._where === null) { return true; } return this._where.value(); } public async run(): Promise { - if(!this.where) { + if (!this.where) { return; } const record: Map = new Map(); - for(const [expression, alias] of this.expressions()) { - const value: any = expression.value(); + for (const [expression, alias] of this.expressions()) { + const raw = expression.value(); + const value: any = typeof raw === "object" && raw !== null ? structuredClone(raw) : raw; record.set(alias, value); } this._results.push(Object.fromEntries(record)); } + public async initialize(): Promise { + this._results = []; + } public get results(): Record[] { return this._results; } } -export default Return; \ No newline at end of file +export default Return; diff --git a/src/parsing/operations/where.ts b/src/parsing/operations/where.ts index 1b84f8b..8eb882b 100644 --- a/src/parsing/operations/where.ts +++ b/src/parsing/operations/where.ts @@ -1,12 +1,12 @@ -import Operation from "./operation"; import Expression from "../expressions/expression"; +import Operation from "./operation"; /** * Represents a WHERE operation that filters data based on a condition. - * + * * The WHERE operation evaluates a boolean expression and only continues * execution to the next operation if the condition is true. - * + * * @example * ```typescript * // RETURN x WHERE x > 0 @@ -15,7 +15,7 @@ import Expression from "../expressions/expression"; class Where extends Operation { /** * Creates a new WHERE operation with the given condition. - * + * * @param expression - The boolean expression to evaluate */ constructor(expression: Expression) { @@ -26,7 +26,11 @@ class Where extends Operation { return this.children[0] as Expression; } public async run(): Promise { - if(this.expression.value()) { + for (const pattern of this.expression.patterns()) { + await pattern.fetchData(); + await pattern.evaluate(); + } + if (this.expression.value()) { await this.next?.run(); } } @@ -35,4 +39,4 @@ class Where extends Operation { } } -export default Where; \ No newline at end of file +export default Where; diff --git a/src/parsing/parser.ts b/src/parsing/parser.ts index ff923cd..b8fdf3e 100644 --- a/src/parsing/parser.ts +++ b/src/parsing/parser.ts @@ -1,3 +1,10 @@ +import Hops from "../graph/hops"; +import Node from "../graph/node"; +import NodeReference from "../graph/node_reference"; +import Pattern from "../graph/pattern"; +import PatternExpression from "../graph/pattern_expression"; +import Relationship from "../graph/relationship"; +import RelationshipReference from "../graph/relationship_reference"; import Token from "../tokenization/token"; import ObjectUtils from "../utils/object_utils"; import Alias from "./alias"; @@ -32,8 +39,11 @@ import When from "./logic/when"; import AggregatedReturn from "./operations/aggregated_return"; import AggregatedWith from "./operations/aggregated_with"; import Call from "./operations/call"; +import CreateNode from "./operations/create_node"; +import CreateRelationship from "./operations/create_relationship"; import Limit from "./operations/limit"; import Load from "./operations/load"; +import Match from "./operations/match"; import Operation from "./operations/operation"; import Return from "./operations/return"; import Unwind from "./operations/unwind"; @@ -72,6 +82,10 @@ class Parser extends BaseParser { */ public parse(statement: string): ASTNode { this.tokenize(statement); + return this._parseTokenized(); + } + + private _parseTokenized(isSubQuery: boolean = false): ASTNode { const root: ASTNode = new ASTNode(); let previous: Operation | null = null; let operation: Operation | null = null; @@ -82,8 +96,10 @@ class Parser extends BaseParser { this.skipWhitespaceAndComments(); } operation = this.parseOperation(); - if (operation === null) { + if (operation === null && !isSubQuery) { throw new Error("Expected one of WITH, UNWIND, RETURN, LOAD, OR CALL"); + } else if (operation === null && isSubQuery) { + return root; } if (this._returns > 1) { throw new Error("Only one RETURN statement is allowed"); @@ -94,28 +110,33 @@ class Parser extends BaseParser { ); } if (previous !== null) { - previous.addSibling(operation); + previous.addSibling(operation!); } else { - root.addChild(operation); + root.addChild(operation!); } const where = this.parseWhere(); if (where !== null) { if (operation instanceof Return) { (operation as Return).where = where; } else { - operation.addSibling(where); + operation!.addSibling(where); operation = where; } } const limit = this.parseLimit(); if (limit !== null) { - operation.addSibling(limit); + operation!.addSibling(limit); operation = limit; } previous = operation; } - if (!(operation instanceof Return) && !(operation instanceof Call)) { - throw new Error("Last statement must be a RETURN, WHERE, or a CALL statement"); + if ( + !(operation instanceof Return) && + !(operation instanceof Call) && + !(operation instanceof CreateNode) && + !(operation instanceof CreateRelationship) + ) { + throw new Error("Last statement must be a RETURN, WHERE, CALL, or CREATE statement"); } return root; } @@ -126,7 +147,9 @@ class Parser extends BaseParser { this.parseUnwind() || this.parseReturn() || this.parseLoad() || - this.parseCall() + this.parseCall() || + this.parseCreate() || + this.parseMatch() ); } @@ -307,6 +330,315 @@ class Parser extends BaseParser { return call; } + private parseCreate(): CreateNode | CreateRelationship | null { + if (!this.token.isCreate()) { + return null; + } + this.setNextToken(); + this.expectAndSkipWhitespaceAndComments(); + if (!this.token.isVirtual()) { + throw new Error("Expected VIRTUAL"); + } + this.setNextToken(); + this.expectAndSkipWhitespaceAndComments(); + const node: Node | null = this.parseNode(); + if (node === null) { + throw new Error("Expected node definition"); + } + let relationship: Relationship | null = null; + if (this.token.isSubtract() && this.peek()?.isOpeningBracket()) { + this.setNextToken(); + this.setNextToken(); + if (!this.token.isColon()) { + throw new Error("Expected ':' for relationship type"); + } + this.setNextToken(); + if (!this.token.isIdentifier()) { + throw new Error("Expected relationship type identifier"); + } + const type: string = this.token.value || ""; + this.setNextToken(); + if (!this.token.isClosingBracket()) { + throw new Error("Expected closing bracket for relationship definition"); + } + this.setNextToken(); + if (!this.token.isSubtract()) { + throw new Error("Expected '-' for relationship definition"); + } + this.setNextToken(); + const target: Node | null = this.parseNode(); + if (target === null) { + throw new Error("Expected target node definition"); + } + relationship = new Relationship(); + relationship.type = type; + } + this.expectAndSkipWhitespaceAndComments(); + if (!this.token.isAs()) { + throw new Error("Expected AS"); + } + this.setNextToken(); + this.expectAndSkipWhitespaceAndComments(); + const query: ASTNode | null = this.parseSubQuery(); + if (query === null) { + throw new Error("Expected sub-query"); + } + let create: CreateNode | CreateRelationship; + if (relationship !== null) { + create = new CreateRelationship(relationship, query); + } else { + create = new CreateNode(node, query); + } + return create; + } + + private parseMatch(): Match | null { + if (!this.token.isMatch()) { + return null; + } + this.setNextToken(); + this.expectAndSkipWhitespaceAndComments(); + const patterns: Pattern[] = Array.from(this.parsePatterns()); + if (patterns.length === 0) { + throw new Error("Expected graph pattern"); + } + return new Match(patterns); + } + + private parseNode(): Node | null { + if (!this.token.isLeftParenthesis()) { + return null; + } + this.setNextToken(); + this.skipWhitespaceAndComments(); + let identifier: string | null = null; + if (this.token.isIdentifier()) { + identifier = this.token.value || ""; + this.setNextToken(); + } + this.skipWhitespaceAndComments(); + let label: string | null = null; + if (!this.token.isColon() && this.peek()?.isIdentifier()) { + throw new Error("Expected ':' for node label"); + } + if (this.token.isColon() && !this.peek()?.isIdentifier()) { + throw new Error("Expected node label identifier"); + } + if (this.token.isColon() && this.peek()?.isIdentifier()) { + this.setNextToken(); + label = this.token.value || ""; + this.setNextToken(); + } + this.skipWhitespaceAndComments(); + let node = new Node(); + node.label = label!; + if (label !== null && identifier !== null) { + node.identifier = identifier; + this.variables.set(identifier, node); + } else if (identifier !== null) { + const reference = this.variables.get(identifier); + if (reference === undefined || reference.constructor !== Node) { + throw new Error(`Undefined node reference: ${identifier}`); + } + node = new NodeReference(node, reference); + } + if (!this.token.isRightParenthesis()) { + throw new Error("Expected closing parenthesis for node definition"); + } + this.setNextToken(); + return node; + } + + private *parsePatterns(): IterableIterator { + while (true) { + let identifier: string | null = null; + if (this.token.isIdentifier()) { + identifier = this.token.value || ""; + this.setNextToken(); + this.skipWhitespaceAndComments(); + if (!this.token.isEquals()) { + throw new Error("Expected '=' for pattern assignment"); + } + this.setNextToken(); + this.skipWhitespaceAndComments(); + } + const pattern: Pattern | null = this.parsePattern(); + if (pattern !== null) { + if (identifier !== null) { + pattern.identifier = identifier; + this.variables.set(identifier, pattern); + } + yield pattern; + } else { + break; + } + this.skipWhitespaceAndComments(); + if (!this.token.isComma()) { + break; + } + this.setNextToken(); + this.skipWhitespaceAndComments(); + } + } + + private parsePattern(): Pattern | null { + if (!this.token.isLeftParenthesis()) { + return null; + } + const pattern = new Pattern(); + let node = this.parseNode(); + if (node === null) { + throw new Error("Expected node definition"); + } + pattern.addElement(node); + let relationship: Relationship | null = null; + while (true) { + relationship = this.parseRelationship(); + if (relationship === null) { + break; + } + pattern.addElement(relationship); + node = this.parseNode(); + if (node === null) { + throw new Error("Expected target node definition"); + } + pattern.addElement(node); + } + return pattern; + } + + private parsePatternExpression(): PatternExpression | null { + if (!this.token.isLeftParenthesis()) { + return null; + } + const pattern = new PatternExpression(); + let node = this.parseNode(); + if (node === null) { + throw new Error("Expected node definition"); + } + if (!(node instanceof NodeReference)) { + throw new Error("PatternExpression must start with a NodeReference"); + } + pattern.addElement(node); + let relationship: Relationship | null = null; + while (true) { + relationship = this.parseRelationship(); + if (relationship === null) { + break; + } + if (relationship.hops?.multi()) { + throw new Error("PatternExpression does not support variable-length relationships"); + } + pattern.addElement(relationship); + node = this.parseNode(); + if (node === null) { + throw new Error("Expected target node definition"); + } + pattern.addElement(node); + } + return pattern; + } + + private parseRelationship(): Relationship | null { + if (this.token.isLessThan() && this.peek()?.isSubtract()) { + this.setNextToken(); + this.setNextToken(); + } else if (this.token.isSubtract()) { + this.setNextToken(); + } else { + return null; + } + if (!this.token.isOpeningBracket()) { + return null; + } + this.setNextToken(); + let variable: string | null = null; + if (this.token.isIdentifier()) { + variable = this.token.value || ""; + this.setNextToken(); + } + if (!this.token.isColon()) { + throw new Error("Expected ':' for relationship type"); + } + this.setNextToken(); + if (!this.token.isIdentifier()) { + throw new Error("Expected relationship type identifier"); + } + const type: string = this.token.value || ""; + this.setNextToken(); + const hops: Hops | null = this.parseRelationshipHops(); + if (!this.token.isClosingBracket()) { + throw new Error("Expected closing bracket for relationship definition"); + } + this.setNextToken(); + if (!this.token.isSubtract()) { + throw new Error("Expected '-' for relationship definition"); + } + this.setNextToken(); + if (this.token.isGreaterThan()) { + this.setNextToken(); + } + let relationship = new Relationship(); + if (type !== null && variable !== null) { + relationship.identifier = variable; + this.variables.set(variable, relationship); + } else if (variable !== null) { + const reference = this.variables.get(variable); + if (reference === undefined || reference.constructor !== Relationship) { + throw new Error(`Undefined relationship reference: ${variable}`); + } + relationship = new RelationshipReference(relationship, reference); + } + if (hops !== null) { + relationship.hops = hops; + } + relationship.type = type; + return relationship; + } + + private parseRelationshipHops(): Hops | null { + if (!this.token.isMultiply()) { + return null; + } + const hops = new Hops(); + this.setNextToken(); + if (this.token.isNumber()) { + hops.min = parseInt(this.token.value || "0"); + this.setNextToken(); + if (this.token.isDot()) { + this.setNextToken(); + if (!this.token.isDot()) { + throw new Error("Expected '..' for relationship hops"); + } + this.setNextToken(); + if (!this.token.isNumber()) { + throw new Error("Expected number for relationship hops"); + } + hops.max = parseInt(this.token.value || "0"); + this.setNextToken(); + } + } else { + hops.min = 0; + hops.max = Number.MAX_SAFE_INTEGER; + } + return hops; + } + + private parseSubQuery(): ASTNode | null { + if (!this.token.isOpeningBrace()) { + return null; + } + this.setNextToken(); + this.expectAndSkipWhitespaceAndComments(); + const query: ASTNode = this._parseTokenized(true); + this.skipWhitespaceAndComments(); + if (!this.token.isClosingBrace()) { + throw new Error("Expected closing brace for sub-query"); + } + this.setNextToken(); + return query; + } + private parseLimit(): Limit | null { this.skipWhitespaceAndComments(); if (!this.token.isLimit()) { @@ -377,6 +709,12 @@ class Parser extends BaseParser { const lookup = this.parseLookup(func); expression.addNode(lookup); } + } else if (this.token.isLeftParenthesis() && this.peek()?.isIdentifier()) { + // Possible graph pattern expression + const pattern = this.parsePatternExpression(); + if (pattern !== null) { + expression.addNode(pattern); + } } else if (this.token.isOperand()) { expression.addNode(this.token.node); this.setNextToken(); diff --git a/src/parsing/token_to_node.ts b/src/parsing/token_to_node.ts index fd3b50b..196bd03 100644 --- a/src/parsing/token_to_node.ts +++ b/src/parsing/token_to_node.ts @@ -4,6 +4,7 @@ import CSV from "./components/csv"; import JSON from "./components/json"; import Null from "./components/null"; import Text from "./components/text"; +import Boolean from "./expressions/boolean"; import Identifier from "./expressions/identifier"; import Number from "./expressions/number"; import { @@ -19,6 +20,7 @@ import { Modulo, Multiply, Not, + NotEquals, Or, Power, Subtract, @@ -61,6 +63,8 @@ class TokenToNode { return new Power(); } else if (token.isEquals()) { return new Equals(); + } else if (token.isNotEquals()) { + return new NotEquals(); } else if (token.isLessThan()) { return new LessThan(); } else if (token.isGreaterThan()) { @@ -98,6 +102,8 @@ class TokenToNode { } else if (token.isNull()) { return new Null(); } + } else if (token.isBoolean()) { + return new Boolean(token.value!); } else { throw new Error("Unknown token"); } diff --git a/src/tokenization/keyword.ts b/src/tokenization/keyword.ts index 6d4415f..b78443b 100644 --- a/src/tokenization/keyword.ts +++ b/src/tokenization/keyword.ts @@ -1,43 +1,44 @@ enum Keyword { - RETURN = 'RETURN', - MATCH = 'MATCH', - WHERE = 'WHERE', - CREATE = 'CREATE', - MERGE = 'MERGE', - DELETE = 'DELETE', - DETACH = 'DETACH', - SET = 'SET', - REMOVE = 'REMOVE', - FOREACH = 'FOREACH', - WITH = 'WITH', - CALL = 'CALL', - YIELD = 'YIELD', - LOAD = 'LOAD', - HEADERS = 'HEADERS', - POST = 'POST', - FROM = 'FROM', - CSV = 'CSV', - JSON = 'JSON', - TEXT = 'TEXT', - AS = 'AS', - UNWIND = 'UNWIND', - SUM = 'SUM', - COLLECT = 'COLLECT', - DISTINCT = 'DISTINCT', - ORDER = 'ORDER', - BY = 'BY', - ASC = 'ASC', - DESC = 'DESC', - SKIP = 'SKIP', - LIMIT = 'LIMIT', - EOF = 'EOF', - CASE = 'CASE', - WHEN = 'WHEN', - THEN = 'THEN', - ELSE = 'ELSE', - END = 'END', - NULL = 'NULL', - IN = 'IN', + RETURN = "RETURN", + MATCH = "MATCH", + WHERE = "WHERE", + CREATE = "CREATE", + VIRTUAL = "VIRTUAL", + MERGE = "MERGE", + DELETE = "DELETE", + DETACH = "DETACH", + SET = "SET", + REMOVE = "REMOVE", + FOREACH = "FOREACH", + WITH = "WITH", + CALL = "CALL", + YIELD = "YIELD", + LOAD = "LOAD", + HEADERS = "HEADERS", + POST = "POST", + FROM = "FROM", + CSV = "CSV", + JSON = "JSON", + TEXT = "TEXT", + AS = "AS", + UNWIND = "UNWIND", + SUM = "SUM", + COLLECT = "COLLECT", + DISTINCT = "DISTINCT", + ORDER = "ORDER", + BY = "BY", + ASC = "ASC", + DESC = "DESC", + SKIP = "SKIP", + LIMIT = "LIMIT", + EOF = "EOF", + CASE = "CASE", + WHEN = "WHEN", + THEN = "THEN", + ELSE = "ELSE", + END = "END", + NULL = "NULL", + IN = "IN", } -export default Keyword; \ No newline at end of file +export default Keyword; diff --git a/src/tokenization/token.ts b/src/tokenization/token.ts index 49e0676..fcf038a 100644 --- a/src/tokenization/token.ts +++ b/src/tokenization/token.ts @@ -143,6 +143,18 @@ class Token { return this._type === TokenType.NUMBER; } + // Boolean token + + public static BOOLEAN(value: string): Token { + return new Token(TokenType.BOOLEAN, value); + } + + public isBoolean(): boolean { + return ( + this._type === TokenType.BOOLEAN && (this._value === "TRUE" || this._value === "FALSE") + ); + } + // Symbol tokens public static get LEFT_PARENTHESIS(): Token { @@ -511,6 +523,14 @@ class Token { return this._type === TokenType.KEYWORD && this._value === Keyword.CREATE; } + public static get VIRTUAL(): Token { + return new Token(TokenType.KEYWORD, Keyword.VIRTUAL); + } + + public isVirtual(): boolean { + return this._type === TokenType.KEYWORD && this._value === Keyword.VIRTUAL; + } + public static get DELETE(): Token { return new Token(TokenType.KEYWORD, Keyword.DELETE); } @@ -628,7 +648,7 @@ class Token { // Other utility methods public isOperand(): boolean { - return this.isNumber() || this.isString() || this.isNull(); + return this.isNumber() || this.isBoolean() || this.isString() || this.isNull(); } public isWhitespaceOrComment(): boolean { diff --git a/src/tokenization/token_type.ts b/src/tokenization/token_type.ts index a39369b..8c12ce0 100644 --- a/src/tokenization/token_type.ts +++ b/src/tokenization/token_type.ts @@ -1,5 +1,6 @@ enum TokenType { KEYWORD = "KEYWORD", + BOOLEAN = "BOOLEAN", OPERATOR = "OPERATOR", UNARY_OPERATOR = "UNARY_OPERATOR", IDENTIFIER = "IDENTIFIER", @@ -12,4 +13,4 @@ enum TokenType { COMMENT = "COMMENT", EOF = "EOF", } -export default TokenType; \ No newline at end of file +export default TokenType; diff --git a/src/tokenization/tokenizer.ts b/src/tokenization/tokenizer.ts index 131ff5c..98639db 100644 --- a/src/tokenization/tokenizer.ts +++ b/src/tokenization/tokenizer.ts @@ -1,18 +1,18 @@ -import Keyword from './keyword'; -import Token from './token'; -import StringWalker from './string_walker'; -import StringUtils from '../utils/string_utils'; -import Symbol from './symbol'; -import Operator from './operator'; -import TokenMapper from './token_mapper'; +import StringUtils from "../utils/string_utils"; +import Keyword from "./keyword"; +import Operator from "./operator"; +import StringWalker from "./string_walker"; +import Symbol from "./symbol"; +import Token from "./token"; +import TokenMapper from "./token_mapper"; /** * Tokenizes FlowQuery input strings into a sequence of tokens. - * + * * The tokenizer performs lexical analysis, breaking down the input text into * meaningful tokens such as keywords, identifiers, operators, strings, numbers, * and symbols. It handles comments, whitespace, and f-strings. - * + * * @example * ```typescript * const tokenizer = new Tokenizer("WITH x = 1 RETURN x"); @@ -27,7 +27,7 @@ class Tokenizer { /** * Creates a new Tokenizer instance for the given input. - * + * * @param input - The FlowQuery input string to tokenize */ constructor(input: string) { @@ -36,7 +36,7 @@ class Tokenizer { /** * Tokenizes the input string into an array of tokens. - * + * * @returns An array of Token objects representing the tokenized input * @throws {Error} If an unrecognized token is encountered */ @@ -60,7 +60,7 @@ class Tokenizer { if (tokens.length === 0) { return null; } - if(!tokens[tokens.length - 1].isWhitespaceOrComment()) { + if (!tokens[tokens.length - 1].isWhitespaceOrComment()) { return tokens[tokens.length - 1]; } return null; @@ -75,6 +75,7 @@ class Tokenizer { this.whitespace() || this.lookup(this.keywords) || this.lookup(this.operators, last, this.skipMinus) || + this.boolean() || this.identifier() || this.string() || this.number() || @@ -91,12 +92,26 @@ class Tokenizer { return null; } + private boolean(): Token | null { + const startPosition = this.walker.position; + if (this.walker.checkForString("TRUE")) { + return Token.BOOLEAN(this.walker.getString(startPosition).toUpperCase()); + } + if (this.walker.checkForString("FALSE")) { + return Token.BOOLEAN(this.walker.getString(startPosition).toUpperCase()); + } + return null; + } + private identifier(): Token | null { const startPosition = this.walker.position; if (this.walker.checkForUnderScore() || this.walker.checkForLetter()) { - while (!this.walker.isAtEnd && (this.walker.checkForLetter() || this.walker.checkForDigit() || this.walker.checkForUnderScore())) { - ; - } + while ( + !this.walker.isAtEnd && + (this.walker.checkForLetter() || + this.walker.checkForDigit() || + this.walker.checkForUnderScore()) + ) {} return Token.IDENTIFIER(this.walker.getString(startPosition)); } return null; @@ -127,7 +142,7 @@ class Tokenizer { } private *f_string(): Iterable { - if(!this.walker.checkForFStringStart()) { + if (!this.walker.checkForFStringStart()) { return; } this.walker.moveNext(); // skip the f @@ -142,20 +157,20 @@ class Tokenizer { this.walker.moveNext(); continue; } - if(this.walker.openingBrace()) { + if (this.walker.openingBrace()) { yield Token.F_STRING(this.walker.getString(position), quoteChar); position = this.walker.position; yield Token.OPENING_BRACE; this.walker.moveNext(); // skip the opening brace position = this.walker.position; - while(!this.walker.isAtEnd && !this.walker.closingBrace()) { + while (!this.walker.isAtEnd && !this.walker.closingBrace()) { const token = this.getNextToken(); - if(token !== null) { + if (token !== null) { yield token; } else { break; } - if(this.walker.closingBrace()) { + if (this.walker.closingBrace()) { yield Token.CLOSING_BRACE; this.walker.moveNext(); // skip the closing brace position = this.walker.position; @@ -166,7 +181,7 @@ class Tokenizer { if (this.walker.checkForString(quoteChar)) { yield Token.F_STRING(this.walker.getString(position), quoteChar); return; - }; + } this.walker.moveNext(); } } @@ -182,13 +197,15 @@ class Tokenizer { private number(): Token | null { const startPosition = this.walker.position; - if (this.walker.checkForString('-') || this.walker.checkForDigit()) { - while (!this.walker.isAtEnd && this.walker.checkForDigit()) { - ; - } + if (this.walker.checkForString("-") || this.walker.checkForDigit()) { + while (!this.walker.isAtEnd && this.walker.checkForDigit()) {} if (this.walker.checkForString(Symbol.DOT)) { + let decimalDigits: number = 0; while (!this.walker.isAtEnd && this.walker.checkForDigit()) { - ; + decimalDigits++; + } + if (decimalDigits === 0) { + this.walker.movePrevious(); } } const _number = this.walker.getString(startPosition); @@ -197,17 +214,21 @@ class Tokenizer { return null; } - private lookup(mapper: TokenMapper, last: Token | null = null, skip?: (last: Token | null, current: Token) => boolean): Token | null { + private lookup( + mapper: TokenMapper, + last: Token | null = null, + skip?: (last: Token | null, current: Token) => boolean + ): Token | null { const token = mapper.map(this.walker.getRemainingString()); if (token !== undefined && token.value !== null) { - if(token.can_be_identifier && this.walker.word_continuation(token.value)) { + if (token.can_be_identifier && this.walker.word_continuation(token.value)) { return null; } if (skip && last && skip(last, token)) { return null; } this.walker.moveBy(token.value.length); - if(mapper.last_found !== null) { + if (mapper.last_found !== null) { token.case_sensitive_value = mapper.last_found; } return token; @@ -219,11 +240,11 @@ class Tokenizer { if (last === null) { return false; } - if((last.isKeyword() || last.isComma() || last.isColon()) && current.isNegation()) { + if ((last.isKeyword() || last.isComma() || last.isColon()) && current.isNegation()) { return true; } return false; } } -export default Tokenizer; \ No newline at end of file +export default Tokenizer; diff --git a/tests/compute/runner.test.ts b/tests/compute/runner.test.ts index 40b8ceb..6ce356d 100644 --- a/tests/compute/runner.test.ts +++ b/tests/compute/runner.test.ts @@ -1,4 +1,6 @@ import Runner from "../../src/compute/runner"; +import Database from "../../src/graph/database"; +import Node from "../../src/graph/node"; import AsyncFunction from "../../src/parsing/functions/async_function"; import { FunctionDef } from "../../src/parsing/functions/function_metadata"; @@ -636,3 +638,655 @@ test("Test call operation with no yielded expressions", async () => { const runner = new Runner("CALL calltestfunctionnoobject() RETURN 1"); }).toThrow("CALL operations must have a YIELD clause unless they are the last operation"); }); + +test("Test create node operation", async () => { + const db = Database.getInstance(); + const runner = new Runner(` + CREATE VIRTUAL (:Person) AS { + with 1 as x + RETURN x + } + `); + await runner.run(); + const results = runner.results; + expect(results.length).toBe(0); + expect(db.getNode(new Node(null, "Person"))).not.toBeNull(); +}); + +test("Test create node and match operations", async () => { + const create = new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `); + await create.run(); + const match = new Runner("MATCH (n:Person) RETURN n"); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0].n).toBeDefined(); + expect(results[0].n.id).toBe(1); + expect(results[0].n.name).toBe("Person 1"); + expect(results[1].n).toBeDefined(); + expect(results[1].n.id).toBe(2); + expect(results[1].n.name).toBe("Person 2"); +}); + +test("Test complex match operation", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1', age: 30}, + {id: 2, name: 'Person 2', age: 25}, + {id: 3, name: 'Person 3', age: 35} + ] as record + RETURN record.id as id, record.name as name, record.age as age + } + `).run(); + const match = new Runner(` + MATCH (n:Person) + WHERE n.age > 29 + RETURN n.name AS name, n.age AS age + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0]).toEqual({ name: "Person 1", age: 30 }); + expect(results[1]).toEqual({ name: "Person 3", age: 35 }); +}); + +test("Test match", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + const match = new Runner(` + MATCH (n:Person) + RETURN n.name AS name + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0]).toEqual({ name: "Person 1" }); + expect(results[1]).toEqual({ name: "Person 2" }); +}); + +test("Test match with nested join", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + const match = new Runner(` + MATCH (a:Person), (b:Person) + WHERE a.id <> b.id + RETURN a.name AS name1, b.name AS name2 + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0]).toEqual({ name1: "Person 1", name2: "Person 2" }); + expect(results[1]).toEqual({ name1: "Person 2", name2: "Person 1" }); +}); + +test("Test match with graph pattern", async () => { + await new Runner(` + CREATE VIRTUAL (:User) AS { + UNWIND [ + {id: 1, name: 'User 1', manager_id: null}, + {id: 2, name: 'User 2', manager_id: 1}, + {id: 3, name: 'User 3', manager_id: 1}, + {id: 4, name: 'User 4', manager_id: 2} + ] AS record + RETURN record.id AS id, record.name AS name, record.manager_id AS manager_id + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:User)-[:MANAGED_BY]-(:User) AS { + UNWIND [ + {id: 1, manager_id: null}, + {id: 2, manager_id: 1}, + {id: 3, manager_id: 1}, + {id: 4, manager_id: 2} + ] AS record + RETURN record.id AS left_id, record.manager_id AS right_id + } + `).run(); + const match = new Runner(` + MATCH (user:User)-[r:MANAGED_BY]-(manager:User) + RETURN user.name AS user, manager.name AS manager + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(3); + expect(results[0]).toEqual({ user: "User 2", manager: "User 1" }); + expect(results[1]).toEqual({ user: "User 3", manager: "User 1" }); + expect(results[2]).toEqual({ user: "User 4", manager: "User 2" }); +}); + +test("Test match with multiple hop graph pattern", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH (a:Person)-[:KNOWS*]-(c:Person) + RETURN a.name AS name1, c.name AS name2 + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(3); + expect(results[0]).toEqual({ name1: "Person 1", name2: "Person 2" }); + expect(results[1]).toEqual({ name1: "Person 1", name2: "Person 3" }); + expect(results[2]).toEqual({ name1: "Person 2", name2: "Person 3" }); +}); + +test("Test match with double graph pattern", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH (a:Person)-[:KNOWS]-(b:Person)-[:KNOWS]-(c:Person) + RETURN a.name AS name1, b.name AS name2, c.name AS name3 + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0]).toEqual({ name1: "Person 1", name2: "Person 2", name3: "Person 3" }); + expect(results[1]).toEqual({ name1: "Person 2", name2: "Person 3", name3: "Person 4" }); +}); + +test("Test match with referenced to previous variable", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH (a:Person)-[:KNOWS]-(b:Person) + MATCH (b)-[:KNOWS]-(c:Person) + RETURN a.name AS name1, b.name AS name2, c.name AS name3 + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0]).toEqual({ name1: "Person 1", name2: "Person 2", name3: "Person 3" }); + expect(results[1]).toEqual({ name1: "Person 2", name2: "Person 3", name3: "Person 4" }); +}); + +test("Test match and return full node", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + const match = new Runner(` + MATCH (n:Person) + RETURN n + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0].n).toBeDefined(); + expect(results[0].n.id).toBe(1); + expect(results[0].n.name).toBe("Person 1"); + expect(results[1].n).toBeDefined(); + expect(results[1].n.id).toBe(2); + expect(results[1].n.name).toBe("Person 2"); +}); + +test("Test return graph pattern", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, since: "2020-01-01", right_id: 2} + ] as record + RETURN record.left_id as left_id, record.since as since, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH p=(:Person)-[:KNOWS]-(:Person) + RETURN p AS pattern + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(1); + expect(results[0].pattern).toBeDefined(); + expect(results[0].pattern.length).toBe(3); + expect(results[0].pattern[0].id).toBe(1); + expect(results[0].pattern[1].properties.since).toBe("2020-01-01"); + expect(results[0].pattern[2].id).toBe(2); +}); + +test("Test circular graph pattern", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 1} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH p=(:Person)-[:KNOWS]-(:Person)-[:KNOWS]-(:Person) + RETURN p AS pattern + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); + expect(results[0].pattern).toBeDefined(); + expect(results[0].pattern.length).toBe(5); + expect(results[0].pattern[0].id).toBe(1); + expect(results[0].pattern[1].id).toBeUndefined(); + expect(results[0].pattern[2].id).toBe(2); + expect(results[0].pattern[3].id).toBeUndefined(); + expect(results[0].pattern[4].id).toBe(1); +}); + +test("Test circular graph pattern with variable length should throw error", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 1} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH p=(:Person)-[:KNOWS*]-(:Person) + RETURN p AS pattern + `); + await expect(async () => { + await match.run(); + }).rejects.toThrow("Circular relationship detected"); +}); + +test("Test multi-hop match with variable length relationships", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH (a:Person)-[r:KNOWS*0..3]->(b:Person) + RETURN a, r, b + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(6); + + expect(results[0].a.id).toBe(1); + expect(results[0].b.id).toBe(2); + expect(results[0].r.length).toBe(undefined); + expect(results[0].r.startNode.id).toBe(1); + expect(results[0].r.endNode.id).toBe(2); + + expect(results[1].a.id).toBe(1); + expect(results[1].b.id).toBe(3); + expect(results[1].r.length).toBe(2); + expect(results[1].r[0].startNode.id).toBe(1); + expect(results[1].r[0].endNode.id).toBe(2); + expect(results[1].r[1].startNode.id).toBe(2); + expect(results[1].r[1].endNode.id).toBe(3); + + expect(results[2].a.id).toBe(1); + expect(results[2].b.id).toBe(4); + expect(results[2].r.length).toBe(3); + expect(results[2].r[0].startNode.id).toBe(1); + expect(results[2].r[0].endNode.id).toBe(2); + expect(results[2].r[1].startNode.id).toBe(2); + expect(results[2].r[1].endNode.id).toBe(3); + expect(results[2].r[2].startNode.id).toBe(3); + expect(results[2].r[2].endNode.id).toBe(4); + + expect(results[3].a.id).toBe(2); + expect(results[3].b.id).toBe(3); + expect(results[3].r.length).toBe(undefined); + expect(results[3].r.startNode.id).toBe(2); + expect(results[3].r.endNode.id).toBe(3); + + expect(results[4].a.id).toBe(2); + expect(results[4].b.id).toBe(4); + expect(results[4].r.length).toBe(2); + expect(results[4].r[0].startNode.id).toBe(2); + expect(results[4].r[0].endNode.id).toBe(3); + expect(results[4].r[1].startNode.id).toBe(3); + expect(results[4].r[1].endNode.id).toBe(4); + + expect(results[5].a.id).toBe(3); + expect(results[5].b.id).toBe(4); + expect(results[5].r.length).toBe(undefined); + expect(results[5].r.startNode.id).toBe(3); + expect(results[5].r.endNode.id).toBe(4); +}); + +test("Test return match pattern with variable length relationships", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH p=(a:Person)-[:KNOWS*0..3]->(b:Person) + RETURN p AS pattern + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(6); + + expect(results[0].pattern.length).toBe(3); + expect(results[0].pattern[0].id).toBe(1); + expect(results[0].pattern[1].startNode.id).toBe(1); + expect(results[0].pattern[1].endNode.id).toBe(2); + expect(results[0].pattern[2].id).toBe(2); + + expect(results[1].pattern.length).toBe(5); + expect(results[1].pattern[0].id).toBe(1); + expect(results[1].pattern[1].startNode.id).toBe(1); + expect(results[1].pattern[1].endNode.id).toBe(2); + expect(results[1].pattern[2].id).toBe(2); + expect(results[1].pattern[3].startNode.id).toBe(2); + expect(results[1].pattern[3].endNode.id).toBe(3); + expect(results[1].pattern[4].id).toBe(3); + + expect(results[2].pattern.length).toBe(7); + expect(results[2].pattern[0].id).toBe(1); + expect(results[2].pattern[1].startNode.id).toBe(1); + expect(results[2].pattern[1].endNode.id).toBe(2); + expect(results[2].pattern[2].id).toBe(2); + expect(results[2].pattern[3].startNode.id).toBe(2); + expect(results[2].pattern[3].endNode.id).toBe(3); + expect(results[2].pattern[4].id).toBe(3); + expect(results[2].pattern[5].startNode.id).toBe(3); + expect(results[2].pattern[5].endNode.id).toBe(4); + expect(results[2].pattern[6].id).toBe(4); + + expect(results[3].pattern.length).toBe(3); + expect(results[3].pattern[0].id).toBe(2); + expect(results[3].pattern[1].startNode.id).toBe(2); + expect(results[3].pattern[1].endNode.id).toBe(3); + expect(results[3].pattern[2].id).toBe(3); + + expect(results[4].pattern.length).toBe(5); + expect(results[4].pattern[0].id).toBe(2); + expect(results[4].pattern[1].startNode.id).toBe(2); + expect(results[4].pattern[1].endNode.id).toBe(3); + expect(results[4].pattern[2].id).toBe(3); + expect(results[4].pattern[3].startNode.id).toBe(3); + expect(results[4].pattern[3].endNode.id).toBe(4); + expect(results[4].pattern[4].id).toBe(4); + + expect(results[5].pattern.length).toBe(3); + expect(results[5].pattern[0].id).toBe(3); + expect(results[5].pattern[1].startNode.id).toBe(3); + expect(results[5].pattern[1].endNode.id).toBe(4); + expect(results[5].pattern[2].id).toBe(4); +}); + +test("Test statement with graph pattern in where clause", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'}, + {id: 4, name: 'Person 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 3}, + {left_id: 3, right_id: 4} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + // Test positive match + const match = new Runner(` + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) + RETURN a.name AS name1, b.name AS name2 + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(3); + expect(results[0]).toEqual({ name1: "Person 1", name2: "Person 2" }); + expect(results[1]).toEqual({ name1: "Person 2", name2: "Person 3" }); + expect(results[2]).toEqual({ name1: "Person 3", name2: "Person 4" }); + + // Test negative match + const nomatch = new Runner(` + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) <> true + RETURN a.name AS name1, b.name AS name2 + `); + await nomatch.run(); + const noresults = nomatch.results; + expect(noresults.length).toBe(13); + expect(noresults[0]).toEqual({ name1: "Person 1", name2: "Person 1" }); + expect(noresults[1]).toEqual({ name1: "Person 1", name2: "Person 3" }); + expect(noresults[2]).toEqual({ name1: "Person 1", name2: "Person 4" }); + expect(noresults[3]).toEqual({ name1: "Person 2", name2: "Person 1" }); + expect(noresults[4]).toEqual({ name1: "Person 2", name2: "Person 2" }); + expect(noresults[5]).toEqual({ name1: "Person 2", name2: "Person 4" }); + expect(noresults[6]).toEqual({ name1: "Person 3", name2: "Person 1" }); + expect(noresults[7]).toEqual({ name1: "Person 3", name2: "Person 2" }); + expect(noresults[8]).toEqual({ name1: "Person 3", name2: "Person 3" }); + expect(noresults[9]).toEqual({ name1: "Person 4", name2: "Person 1" }); + expect(noresults[10]).toEqual({ name1: "Person 4", name2: "Person 2" }); + expect(noresults[11]).toEqual({ name1: "Person 4", name2: "Person 3" }); + expect(noresults[12]).toEqual({ name1: "Person 4", name2: "Person 4" }); +}); + +test("Test person who does not know anyone", async () => { + await new Runner(` + CREATE VIRTUAL (:Person) AS { + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'}, + {id: 3, name: 'Person 3'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {left_id: 1, right_id: 2}, + {left_id: 2, right_id: 1} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH (a:Person) + WHERE NOT (a)-[:KNOWS]->(:Person) + RETURN a.name AS name + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(1); + expect(results[0]).toEqual({ name: "Person 3" }); +}); + +test("Test manager chain", async () => { + await new Runner(` + CREATE VIRTUAL (:Employee) AS { + unwind [ + {id: 1, name: 'Employee 1'}, + {id: 2, name: 'Employee 2'}, + {id: 3, name: 'Employee 3'}, + {id: 4, name: 'Employee 4'} + ] as record + RETURN record.id as id, record.name as name + } + `).run(); + await new Runner(` + CREATE VIRTUAL (:Employee)-[:MANAGED_BY]-(:Employee) AS { + unwind [ + {left_id: 2, right_id: 1}, + {left_id: 3, right_id: 2}, + {left_id: 4, right_id: 2} + ] as record + RETURN record.left_id as left_id, record.right_id as right_id + } + `).run(); + const match = new Runner(` + MATCH p=(e:Employee)-[:MANAGED_BY*]->(m:Employee) + WHERE NOT (m)-[:MANAGED_BY]->(:Employee) + RETURN p + `); + await match.run(); + const results = match.results; + expect(results.length).toBe(2); +}); + +test("Test equality comparison", async () => { + const runner = new Runner(` + unwind range(1,10) as i + return i=5 as \`isEqual\`, i<>5 as \`isNotEqual\` + `); + await runner.run(); + const results = runner.results; + expect(results.length).toBe(10); + for (let index = 0; index < results.length; index++) { + const result = results[index]; + if (index + 1 === 5) { + expect(result).toEqual({ isEqual: 1, isNotEqual: 0 }); + } else { + expect(result).toEqual({ isEqual: 0, isNotEqual: 1 }); + } + } +}); diff --git a/tests/extensibility.test.ts b/tests/extensibility.test.ts index f97b446..2154e9b 100644 --- a/tests/extensibility.test.ts +++ b/tests/extensibility.test.ts @@ -1,19 +1,19 @@ import { - Function, AggregateFunction, AsyncFunction, - PredicateFunction, - ReducerElement, + Function, + FunctionCategory, FunctionDef, - FunctionMetadata, FunctionDefOptions, - ParameterSchema, + FunctionMetadata, OutputSchema, - FunctionCategory + ParameterSchema, + PredicateFunction, + ReducerElement, } from "../src/extensibility"; import { getFunctionMetadata, - getRegisteredFunctionFactory + getRegisteredFunctionFactory, } from "../src/parsing/functions/function_metadata"; describe("Extensibility API Exports", () => { @@ -23,12 +23,12 @@ describe("Extensibility API Exports", () => { super("customFunc"); this._expectedParameterCount = 1; } - + public value(): string { return "custom value"; } } - + const func = new CustomFunction(); expect(func.name).toBe("customFunc"); expect(func.toString()).toBe("Function (customFunc)"); @@ -42,9 +42,9 @@ describe("Extensibility API Exports", () => { this._expectedParameterCount = 2; } } - + const func = new TwoParamFunction(); - + // Should throw when wrong number of parameters expect(() => { func.parameters = []; @@ -58,7 +58,7 @@ describe("Extensibility API Exports", () => { // _expectedParameterCount is null by default } } - + const func = new FlexibleFunction(); // Should not throw func.parameters = []; @@ -75,37 +75,37 @@ describe("Extensibility API Exports", () => { this._value = v; } } - + class CustomSum extends AggregateFunction { private _total: number = 0; - + constructor() { super("customSum"); } - + public reduce(element: ReducerElement): void { this._total += element.value; } - + public element(): ReducerElement { const el = new SumElement(); el.value = this._total; return el; } - + public value(): number { return this._total; } } - + const agg = new CustomSum(); expect(agg.name).toBe("customSum"); - + const elem = new SumElement(); elem.value = 5; agg.reduce(elem); expect(agg.value()).toBe(5); - + const elem2 = new SumElement(); elem2.value = 3; agg.reduce(elem2); @@ -117,12 +117,12 @@ describe("Extensibility API Exports", () => { constructor() { super("customPredicate"); } - + public value(): boolean { return true; } } - + const pred = new CustomPredicate(); expect(pred.name).toBe("customPredicate"); expect(pred.toString()).toBe("PredicateFunction (customPredicate)"); @@ -137,16 +137,16 @@ describe("Extensibility API Exports", () => { test("ReducerElement class is exported and can be extended", () => { class NumberElement extends ReducerElement { private _num: number = 0; - + public get value(): number { return this._num; } - + public set value(v: number) { this._num = v; } } - + const elem = new NumberElement(); elem.value = 42; expect(elem.value).toBe(42); @@ -158,23 +158,21 @@ describe("FunctionDef Decorator", () => { @FunctionDef({ description: "Test function for extensibility", category: "scalar", - parameters: [ - { name: "input", description: "Input value", type: "string" } - ], + parameters: [{ name: "input", description: "Input value", type: "string" }], output: { description: "Output value", type: "string" }, - examples: ["RETURN testExtFunc('hello')"] + examples: ["RETURN testExtFunc('hello')"], }) class TestExtFunc extends Function { constructor() { super("testExtFunc"); this._expectedParameterCount = 1; } - + public value(): string { return "test result"; } } - + // Verify the decorated class still works correctly const instance = new TestExtFunc(); expect(instance.name).toBe("testExtFunc"); @@ -185,23 +183,21 @@ describe("FunctionDef Decorator", () => { @FunctionDef({ description: "Test aggregate function", category: "aggregate", - parameters: [ - { name: "value", description: "Numeric value", type: "number" } - ], - output: { description: "Aggregated result", type: "number" } + parameters: [{ name: "value", description: "Numeric value", type: "number" }], + output: { description: "Aggregated result", type: "number" }, }) class TestAggExt extends AggregateFunction { private _sum: number = 0; - + constructor() { super("testAggExt"); } - + public value(): number { return this._sum; } } - + const instance = new TestAggExt(); expect(instance.name).toBe("testAggExt"); expect(instance.value()).toBe(0); @@ -212,9 +208,15 @@ describe("FunctionDef Decorator", () => { description: "Test async provider for extensibility", category: "async", parameters: [ - { name: "count", description: "Number of items", type: "number", required: false, default: 1 } + { + name: "count", + description: "Number of items", + type: "number", + required: false, + default: 1, + }, ], - output: { description: "Data object", type: "object" } + output: { description: "Data object", type: "object" }, }) class Simple extends AsyncFunction { public async *generate(count: number = 1): AsyncGenerator { @@ -223,7 +225,7 @@ describe("FunctionDef Decorator", () => { } } } - + // Verify the decorated class still works correctly const loader = new Simple("simple"); const results: any[] = []; @@ -233,12 +235,12 @@ describe("FunctionDef Decorator", () => { expect(results.length).toBe(2); expect(results[0]).toEqual({ id: 0, data: "item0" }); expect(results[1]).toEqual({ id: 1, data: "item1" }); - + // Verify the async provider was registered const provider = getRegisteredFunctionFactory("simple", "async"); expect(provider).toBeDefined(); expect(typeof provider).toBe("function"); - + // Verify the metadata was registered const metadata = getFunctionMetadata("simple", "async"); expect(metadata).toBeDefined(); @@ -251,21 +253,19 @@ describe("FunctionDef Decorator", () => { @FunctionDef({ description: "Test predicate function", category: "predicate", - parameters: [ - { name: "list", description: "List to check", type: "array" } - ], - output: { description: "Boolean result", type: "boolean" } + parameters: [{ name: "list", description: "List to check", type: "array" }], + output: { description: "Boolean result", type: "boolean" }, }) class TestPredExt extends PredicateFunction { constructor() { super("testPredExt"); } - + public value(): boolean { return true; } } - + const instance = new TestPredExt(); expect(instance.name).toBe("testPredExt"); expect(instance.value()).toBe(true); @@ -279,9 +279,9 @@ describe("Type Exports", () => { description: "Testing type exports", category: "scalar", parameters: [], - output: { description: "Output", type: "string" } + output: { description: "Output", type: "string" }, }; - + expect(meta.name).toBe("typeTest"); expect(meta.description).toBe("Testing type exports"); }); @@ -293,9 +293,9 @@ describe("Type Exports", () => { type: "string", required: true, default: "default value", - example: "example value" + example: "example value", }; - + expect(param.name).toBe("testParam"); expect(param.required).toBe(true); }); @@ -307,20 +307,20 @@ describe("Type Exports", () => { type: "array", items: { description: "Item in array", - type: "string" - } + type: "string", + }, }; - + const objectParam: ParameterSchema = { name: "config", description: "Configuration object", type: "object", properties: { enabled: { description: "Is enabled", type: "boolean" }, - value: { description: "Value", type: "number" } - } + value: { description: "Value", type: "number" }, + }, }; - + expect(arrayParam.items?.type).toBe("string"); expect(objectParam.properties?.enabled.type).toBe("boolean"); }); @@ -331,11 +331,11 @@ describe("Type Exports", () => { type: "object", properties: { success: { description: "Success flag", type: "boolean" }, - data: { description: "Result data", type: "array" } + data: { description: "Result data", type: "array" }, }, - example: { success: true, data: [] } + example: { success: true, data: [] }, }; - + expect(output.type).toBe("object"); expect(output.properties?.success.type).toBe("boolean"); }); @@ -346,7 +346,7 @@ describe("Type Exports", () => { const predicate: FunctionCategory = "predicate"; const async: FunctionCategory = "async"; const custom: FunctionCategory = "myCustomCategory"; - + expect(scalar).toBe("scalar"); expect(aggregate).toBe("aggregate"); expect(predicate).toBe("predicate"); @@ -360,9 +360,9 @@ describe("Type Exports", () => { category: "scalar", parameters: [], output: { description: "Output", type: "string" }, - notes: "Some additional notes" + notes: "Some additional notes", }; - + expect(options.description).toBe("Function options test"); expect(options.notes).toBe("Some additional notes"); }); @@ -378,14 +378,14 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Doubles a number", category: "scalar", parameters: [{ name: "value", description: "Number to double", type: "number" }], - output: { description: "Doubled value", type: "number" } + output: { description: "Doubled value", type: "number" }, }) class Double extends Function { constructor() { super("double"); this._expectedParameterCount = 1; } - + public value(): number { return this.getChildren()[0].value() * 2; } @@ -394,7 +394,7 @@ describe("Plugin Functions Integration with FlowQuery", () => { // Execute a FlowQuery statement that uses the custom function const runner = new Runner("WITH 5 AS num RETURN double(num) AS result"); await runner.run(); - + expect(runner.results.length).toBe(1); expect(runner.results[0]).toEqual({ result: 10 }); }); @@ -404,25 +404,25 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Reverses a string", category: "scalar", parameters: [{ name: "text", description: "String to reverse", type: "string" }], - output: { description: "Reversed string", type: "string" } + output: { description: "Reversed string", type: "string" }, }) class StrReverse extends Function { constructor() { super("strreverse"); this._expectedParameterCount = 1; } - + public value(): string { const input = String(this.getChildren()[0].value()); - return input.split('').reverse().join(''); + return input.split("").reverse().join(""); } } const runner = new Runner("WITH 'hello' AS s RETURN strreverse(s) AS reversed"); await runner.run(); - + expect(runner.results.length).toBe(1); - expect(runner.results[0]).toEqual({ reversed: 'olleh' }); + expect(runner.results[0]).toEqual({ reversed: "olleh" }); }); test("Custom aggregate function can be used in a FlowQuery statement", async () => { @@ -441,18 +441,18 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Calculates the product of values", category: "aggregate", parameters: [{ name: "value", description: "Number to multiply", type: "number" }], - output: { description: "Product of all values", type: "number" } + output: { description: "Product of all values", type: "number" }, }) class Product extends AggregateFunction { constructor() { super("product"); this._expectedParameterCount = 1; } - + public reduce(element: ReducerElement): void { element.value = this.firstChild().value(); } - + public element(): ReducerElement { return new ProductElement(); } @@ -460,7 +460,7 @@ describe("Plugin Functions Integration with FlowQuery", () => { const runner = new Runner("UNWIND [2, 3, 4] AS num RETURN product(num) AS result"); await runner.run(); - + expect(runner.results.length).toBe(1); expect(runner.results[0]).toEqual({ result: 24 }); }); @@ -470,14 +470,14 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Adds 100 to a number", category: "scalar", parameters: [{ name: "value", description: "Number", type: "number" }], - output: { description: "Number plus 100", type: "number" } + output: { description: "Number plus 100", type: "number" }, }) class AddHundred extends Function { constructor() { super("addhundred"); this._expectedParameterCount = 1; } - + public value(): number { return this.getChildren()[0].value() + 100; } @@ -486,7 +486,7 @@ describe("Plugin Functions Integration with FlowQuery", () => { // Use the custom function with expressions const runner = new Runner("WITH 5 * 3 AS num RETURN addhundred(num) + 1 AS result"); await runner.run(); - + expect(runner.results.length).toBe(1); expect(runner.results[0]).toEqual({ result: 116 }); // (5*3) + 100 + 1 = 116 }); @@ -496,14 +496,14 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Triples a number", category: "scalar", parameters: [{ name: "value", description: "Number to triple", type: "number" }], - output: { description: "Tripled value", type: "number" } + output: { description: "Tripled value", type: "number" }, }) class Triple extends Function { constructor() { super("triple"); this._expectedParameterCount = 1; } - + public value(): number { return this.getChildren()[0].value() * 3; } @@ -513,14 +513,14 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Squares a number", category: "scalar", parameters: [{ name: "value", description: "Number to square", type: "number" }], - output: { description: "Squared value", type: "number" } + output: { description: "Squared value", type: "number" }, }) class Square extends Function { constructor() { super("square"); this._expectedParameterCount = 1; } - + public value(): number { const v = this.getChildren()[0].value(); return v * v; @@ -528,9 +528,11 @@ describe("Plugin Functions Integration with FlowQuery", () => { } // Use both custom functions in a query - const runner = new Runner("WITH 2 AS num RETURN triple(num) AS tripled, square(num) AS squared"); + const runner = new Runner( + "WITH 2 AS num RETURN triple(num) AS tripled, square(num) AS squared" + ); await runner.run(); - + expect(runner.results.length).toBe(1); expect(runner.results[0]).toEqual({ tripled: 6, squared: 4 }); }); @@ -540,7 +542,7 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Provides example data for testing", category: "async", parameters: [], - output: { description: "Example data o.bject", type: "object" } + output: { description: "Example data o.bject", type: "object" }, }) class GetExampleData extends AsyncFunction { public async *generate(): AsyncGenerator { @@ -553,9 +555,11 @@ describe("Plugin Functions Integration with FlowQuery", () => { expect(getRegisteredFunctionFactory("getExampleData", "async")).toBeDefined(); // Use the async provider in a FlowQuery statement - const runner = new Runner("LOAD JSON FROM getExampleData() AS data RETURN data.id AS id, data.name AS name"); + const runner = new Runner( + "LOAD JSON FROM getExampleData() AS data RETURN data.id AS id, data.name AS name" + ); await runner.run(); - + expect(runner.results.length).toBe(2); expect(runner.results[0]).toEqual({ id: 1, name: "Alice" }); expect(runner.results[1]).toEqual({ id: 2, name: "Bob" }); @@ -566,7 +570,7 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "Test function for case insensitivity", category: "async", parameters: [], - output: { description: "Test data", type: "object" } + output: { description: "Test data", type: "object" }, }) class MixedCaseFunc extends AsyncFunction { public async *generate(): AsyncGenerator { @@ -604,14 +608,14 @@ describe("Plugin Functions Integration with FlowQuery", () => { description: "A unique test function for introspection", category: "scalar", parameters: [{ name: "x", description: "Input value", type: "number" }], - output: { description: "Output value", type: "number" } + output: { description: "Output value", type: "number" }, }) class IntrospectTestFunc extends Function { constructor() { super("introspectTestFunc"); this._expectedParameterCount = 1; } - + public value(): number { return this.getChildren()[0].value() + 42; } @@ -630,7 +634,7 @@ describe("Plugin Functions Integration with FlowQuery", () => { RETURN f.name AS name, f.description AS description, f.category AS category `); await runner.run(); - + expect(runner.results.length).toBe(1); expect(runner.results[0].name).toBe("introspecttestfunc"); expect(runner.results[0].description).toBe("A unique test function for introspection"); diff --git a/tests/graph/create.test.ts b/tests/graph/create.test.ts new file mode 100644 index 0000000..14a235a --- /dev/null +++ b/tests/graph/create.test.ts @@ -0,0 +1,36 @@ +import Database from "../../src/graph/database"; +import PhysicalNode from "../../src/graph/physical_node"; +import PhysicalRelationship from "../../src/graph/physical_relationship"; +import CreateNode from "../../src/parsing/operations/create_node"; +import CreateRelationship from "../../src/parsing/operations/create_relationship"; +import Parser from "../../src/parsing/parser"; + +test("Test CreateNode operation", async () => { + const node = new PhysicalNode(null, "Person"); + expect(node.label).toBe("Person"); + expect(node.statement).toBeNull(); + const parser = new Parser(); + const statement = parser.parse("WITH 1 as x RETURN x"); + const op = new CreateNode(node, statement); + await op.run(); + const db = Database.getInstance(); + const found = db.getNode(node); + expect(found!.label).toBe(node.label); + const data = await found!.data(); + expect(data).toEqual([{ x: 1 }]); +}); + +test("Test CreateRelationship operation", async () => { + const relationship = new PhysicalRelationship(); + relationship.type = "KNOWS"; + expect(relationship.type).toBe("KNOWS"); + expect(relationship.statement).toBeNull(); + const parser = new Parser(); + const statement = parser.parse("WITH 1 as x RETURN x"); + const op = new CreateRelationship(relationship, statement); + await op.run(); + const db = Database.getInstance(); + const found = db.getRelationship(relationship); + const data = await found!.data(); + expect(data).toEqual([{ x: 1 }]); +}); diff --git a/tests/graph/data.test.ts b/tests/graph/data.test.ts new file mode 100644 index 0000000..c2dcb95 --- /dev/null +++ b/tests/graph/data.test.ts @@ -0,0 +1,58 @@ +import Data from "../../src/graph/data"; +import NodeData from "../../src/graph/node_data"; +import RelationshipData from "../../src/graph/relationship_data"; + +test("Data iteration", () => { + const records = [ + { id: "1", name: "Alice" }, + { id: "2", name: "Bob" }, + { id: "3", name: "Charlie" }, + ]; + const data = new Data(records); + expect(data.next()).toBe(true); + expect(data.next()).toBe(true); + expect(data.next()).toBe(true); + expect(data.next()).toBe(false); +}); + +test("Data find", () => { + const records = [ + { id: "1", name: "Alice" }, + { id: "2", name: "Bob" }, + { id: "3", name: "Charlie" }, + { id: "2", name: "Bob Duplicate" }, + ]; + const data: NodeData = new NodeData(records); + data.find("2"); + expect(data.current()).toEqual({ id: "2", name: "Bob" }); + expect(data.find("2")).toBe(true); + expect(data.current()).toEqual({ id: "2", name: "Bob Duplicate" }); + expect(data.find("2")).toBe(false); +}); + +test("Data find non-existing", () => { + const records = [ + { id: "1", name: "Alice" }, + { id: "2", name: "Bob" }, + ]; + const data: NodeData = new NodeData(records); + expect(data.find("3")).toBe(false); +}); + +test("RelationshipData find", () => { + const records = [ + { left_id: "1", right_id: "2", type: "FRIEND", id: "r1" }, + { left_id: "2", right_id: "3", type: "COLLEAGUE", id: "r2" }, + { left_id: "1", right_id: "3", type: "FRIEND", id: "r3" }, + ]; + const data: RelationshipData = new RelationshipData(records); + data.find("1"); + expect(data.current()).toEqual({ left_id: "1", right_id: "2", type: "FRIEND", id: "r1" }); + expect(data.find("1")).toBe(true); + expect(data.current()).toEqual({ left_id: "1", right_id: "3", type: "FRIEND", id: "r3" }); + expect(data.find("1")).toBe(false); + expect(data.find("2")).toBe(true); + expect(data.current()).toEqual({ left_id: "2", right_id: "3", type: "COLLEAGUE", id: "r2" }); + expect(data.find("2")).toBe(false); + expect(data.find("4")).toBe(false); +}); diff --git a/tests/graph/match.test.ts b/tests/graph/match.test.ts new file mode 100644 index 0000000..6e5fdb4 --- /dev/null +++ b/tests/graph/match.test.ts @@ -0,0 +1,29 @@ +import Runner from "../../src/compute/runner"; +import PhysicalNode from "../../src/graph/physical_node"; +import CreateNode from "../../src/parsing/operations/create_node"; +import Parser from "../../src/parsing/parser"; + +test("Test CreateNode and match operations", async () => { + const node = new PhysicalNode(null, "Person"); + expect(node.label).toBe("Person"); + expect(node.statement).toBeNull(); + const parser = new Parser(); + const statement = parser.parse(` + unwind [ + {id: 1, name: 'Person 1'}, + {id: 2, name: 'Person 2'} + ] as record + RETURN record.id as id, record.name as name + `); + const op = new CreateNode(node, statement); + await op.run(); + const runner = new Runner("match (n:Person) RETURN n"); + await runner.run(); + expect(runner.results.length).toBe(2); + expect(runner.results[0].n).toBeDefined(); + expect(runner.results[0].n.id).toBe(1); + expect(runner.results[0].n.name).toBe("Person 1"); + expect(runner.results[1].n).toBeDefined(); + expect(runner.results[1].n.id).toBe(2); + expect(runner.results[1].n.name).toBe("Person 2"); +}); diff --git a/tests/parsing/parser.test.ts b/tests/parsing/parser.test.ts index 1e87c53..6f5c1a0 100644 --- a/tests/parsing/parser.test.ts +++ b/tests/parsing/parser.test.ts @@ -1,5 +1,11 @@ +import Node from "../../src/graph/node"; +import NodeReference from "../../src/graph/node_reference"; +import Relationship from "../../src/graph/relationship"; import AsyncFunction from "../../src/parsing/functions/async_function"; import { FunctionDef } from "../../src/parsing/functions/function_metadata"; +import CreateNode from "../../src/parsing/operations/create_node"; +import CreateRelationship from "../../src/parsing/operations/create_relationship"; +import Match from "../../src/parsing/operations/match"; import Parser from "../../src/parsing/parser"; // Test class for CALL operation parsing test - defined at module level for Prettier compatibility @@ -424,7 +430,7 @@ test("Test non-well formed statements", () => { "Only one RETURN statement is allowed" ); expect(() => new Parser().parse("return 1 with 1 as n")).toThrow( - "Last statement must be a RETURN, WHERE, or a CALL statement" + "Last statement must be a RETURN, WHERE, CALL, or CREATE statement" ); }); @@ -465,7 +471,13 @@ test("Test limit", () => { test("Test return -2", () => { const parser = new Parser(); const ast = parser.parse("return -2"); - expect(ast.print()).toBe("ASTNode\n" + "- Return\n" + "-- Expression\n" + "--- Number (-2)"); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- Return\n" + + "-- Expression\n" + + "--- Number (-2)" + ); }); test("Test call operation", () => { @@ -481,3 +493,262 @@ test("Test call operation", () => { "--- Reference (result)" ); }); + +test("Test f-string", () => { + const parser = new Parser(); + const ast = parser.parse("with 1 as value RETURN f'Value is: {value}.'"); + expect(ast.print()).toBe( + "ASTNode\n" + + "- With\n" + + "-- Expression (value)\n" + + "--- Number (1)\n" + + "- Return\n" + + "-- Expression\n" + + "--- FString\n" + + "---- String (Value is: )\n" + + "---- Expression\n" + + "----- Reference (value)\n" + + "---- String (.)" + ); +}); + +test("Test create node operation", () => { + const parser = new Parser(); + const ast = parser.parse(` + CREATE VIRTUAL (:Person) AS { + unwind range(1, 3) AS id + return id, f'Person {id}' AS name + } + `); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- CreateNode" + ); + const create: CreateNode = ast.firstChild() as CreateNode; + expect(create.node).not.toBeNull(); + expect(create.node!.label).toBe("Person"); + expect(create.statement!.print()).toBe( + "ASTNode\n" + + "- Unwind\n" + + "-- Expression (id)\n" + + "--- Function (range)\n" + + "---- Expression\n" + + "----- Number (1)\n" + + "---- Expression\n" + + "----- Number (3)\n" + + "- Return\n" + + "-- Expression (id)\n" + + "--- Reference (id)\n" + + "-- Expression (name)\n" + + "--- FString\n" + + "---- String (Person )\n" + + "---- Expression\n" + + "----- Reference (id)\n" + + "---- String ()" + ); +}); + +test("Test match operation", () => { + const parser = new Parser(); + const ast = parser.parse("MATCH (n:Person) RETURN n"); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- Match\n" + + "- Return\n" + + "-- Expression (n)\n" + + "--- Reference (n)" + ); + const match = ast.firstChild() as Match; + expect(match.patterns[0].startNode).not.toBeNull(); + expect(match.patterns[0].startNode!.label).toBe("Person"); + expect(match.patterns[0].startNode!.identifier).toBe("n"); +}); + +test("Test create relationship operation", () => { + const parser = new Parser(); + const ast = parser.parse(` + CREATE VIRTUAL (:Person)-[:KNOWS]-(:Person) AS { + unwind [ + {from_id: 1, to_id: 2, since: '2020-01-01'}, + {from_id: 2, to_id: 3, since: '2021-01-01'} + ] AS pair + return pair.from_id AS from, pair.to_id AS to, pair.since AS since + } + `); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- CreateRelationship" + ); + const create = ast.firstChild() as CreateRelationship; + expect(create.relationship).not.toBeNull(); + expect(create.relationship!.type).toBe("KNOWS"); + expect(create.statement!.print()).toBe( + "ASTNode\n" + + "- Unwind\n" + + "-- Expression (pair)\n" + + "--- JSONArray\n" + + "---- Expression\n" + + "----- AssociativeArray\n" + + "------ KeyValuePair\n" + + "------- String (from_id)\n" + + "------- Expression\n" + + "-------- Number (1)\n" + + "------ KeyValuePair\n" + + "------- String (to_id)\n" + + "------- Expression\n" + + "-------- Number (2)\n" + + "------ KeyValuePair\n" + + "------- String (since)\n" + + "------- Expression\n" + + "-------- String (2020-01-01)\n" + + "---- Expression\n" + + "----- AssociativeArray\n" + + "------ KeyValuePair\n" + + "------- String (from_id)\n" + + "------- Expression\n" + + "-------- Number (2)\n" + + "------ KeyValuePair\n" + + "------- String (to_id)\n" + + "------- Expression\n" + + "-------- Number (3)\n" + + "------ KeyValuePair\n" + + "------- String (since)\n" + + "------- Expression\n" + + "-------- String (2021-01-01)\n" + + "- Return\n" + + "-- Expression (from)\n" + + "--- Lookup\n" + + "---- Identifier (from_id)\n" + + "---- Reference (pair)\n" + + "-- Expression (to)\n" + + "--- Lookup\n" + + "---- Identifier (to_id)\n" + + "---- Reference (pair)\n" + + "-- Expression (since)\n" + + "--- Lookup\n" + + "---- Identifier (since)\n" + + "---- Reference (pair)" + ); +}); + +test("Match with graph pattern including relationships", () => { + const parser = new Parser(); + const ast = parser.parse("MATCH (a:Person)-[:KNOWS]-(b:Person) RETURN a, b"); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- Match\n" + + "- Return\n" + + "-- Expression (a)\n" + + "--- Reference (a)\n" + + "-- Expression (b)\n" + + "--- Reference (b)" + ); + const match = ast.firstChild() as Match; + expect(match.patterns[0].chain.length).toBe(3); + const source = match.patterns[0].chain[0] as Node; + const relationship = match.patterns[0].chain[1] as Relationship; + const target = match.patterns[0].chain[2] as Node; + expect(source.identifier).toBe("a"); + expect(source.label).toBe("Person"); + expect(relationship.type).toBe("KNOWS"); + expect(target.identifier).toBe("b"); + expect(target.label).toBe("Person"); +}); + +test("Test not equal operator", () => { + const parser = new Parser(); + const ast = parser.parse("RETURN 1 <> 2"); + expect(ast.print()).toBe( + "ASTNode\n" + + "- Return\n" + + "-- Expression\n" + + "--- NotEquals\n" + + "---- Number (1)\n" + + "---- Number (2)" + ); +}); + +test("Test equal operator", () => { + const parser = new Parser(); + const ast = parser.parse("RETURN 1 = 2"); + expect(ast.print()).toBe( + "ASTNode\n" + + "- Return\n" + + "-- Expression\n" + + "--- Equals\n" + + "---- Number (1)\n" + + "---- Number (2)" + ); +}); + +test("Test not operator", () => { + const parser = new Parser(); + const ast = parser.parse("RETURN NOT true"); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- Return\n" + + "-- Expression\n" + + "--- Not\n" + + "---- Expression\n" + + "----- Boolean" + ); +}); + +test("Parse relationship with hops", () => { + const parser = new Parser(); + const ast = parser.parse("MATCH (a:Test)-[:KNOWS*1..3]->(b:Test) RETURN a, b"); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- Match\n" + + "- Return\n" + + "-- Expression (a)\n" + + "--- Reference (a)\n" + + "-- Expression (b)\n" + + "--- Reference (b)" + ); + const match = ast.firstChild() as Match; + expect(match.patterns[0].chain.length).toBe(3); + const source = match.patterns[0].chain[0] as Node; + const relationship = match.patterns[0].chain[1] as Relationship; + const target = match.patterns[0].chain[2] as Node; + expect(source.identifier).toBe("a"); + expect(relationship.type).toBe("KNOWS"); + expect(relationship.hops).not.toBeNull(); + expect(relationship.hops!.min).toBe(1); + expect(relationship.hops!.max).toBe(3); + expect(target.identifier).toBe("b"); +}); + +test("Parse statement with graph pattern in where clause", () => { + const parser = new Parser(); + const ast = parser.parse("MATCH (a:Person) WHERE (a)-[:KNOWS]->(:Person) RETURN a"); + // prettier-ignore + expect(ast.print()).toBe( + "ASTNode\n" + + "- Match\n" + + "- Where\n" + + "-- Expression\n" + + "--- PatternExpression\n" + + "- Return\n" + + "-- Expression (a)\n" + + "--- Reference (a)" + ); + const match = ast.firstChild() as Match; + expect(match.patterns[0].startNode).not.toBeNull(); + expect(match.patterns[0].startNode!.identifier).toBe("a"); + const where = match.next as any; + const pattern = where.firstChild().firstChild() as any; + expect(pattern.chain.length).toBe(3); + const source = pattern.chain[0] as NodeReference; + const relationship = pattern.chain[1] as Relationship; + const target = pattern.chain[2] as Node; + expect(source.reference?.identifier).toBe("a"); + expect(relationship.type).toBe("KNOWS"); + expect(target.label).toBe("Person"); +}); diff --git a/tests/tokenization/tokenizer.test.ts b/tests/tokenization/tokenizer.test.ts index e0f4428..e22c7b0 100644 --- a/tests/tokenization/tokenizer.test.ts +++ b/tests/tokenization/tokenizer.test.ts @@ -28,6 +28,13 @@ test("Test f-string", () => { expect(tokens.length).toBeGreaterThan(0); }); +test("Test another f-string", () => { + const tokenizer = new Tokenizer("RETURN f`Value is: {value}`"); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + test("Test", () => { const tokenizer = new Tokenizer("WITH 1 AS n RETURN n"); const tokens = tokenizer.tokenize(); @@ -65,3 +72,86 @@ test("Test range with function", () => { expect(tokens).toBeDefined(); expect(tokens.length).toBeGreaterThan(0); }); + +test("Test create virtual node", () => { + const tokenizer = new Tokenizer(` + CREATE VIRTUAL (:Person) AS { + call users() YIELD id, name + } + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Test create virtual relationship", () => { + const tokenizer = new Tokenizer(` + CREATE VIRTUAL (:Person)-[:KNOWS]->(:Person) AS { + call friendships() YIELD user1_id, user2_id + } + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Match based on virtual node", () => { + const tokenizer = new Tokenizer(` + MATCH (a:Person) + RETURN a.name + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Match based on virtual nodes and relationships", () => { + const tokenizer = new Tokenizer(` + MATCH (a:Person)-[r:KNOWS]->(b:Person) + RETURN a.name, b.name + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Test not equal operator", () => { + const tokenizer = new Tokenizer(` + MATCH (n:Person) + WHERE n.age <> 30 + RETURN n.name AS name, n.age AS age + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Test equal operator", () => { + const tokenizer = new Tokenizer(` + MATCH (n:Person) + WHERE n.age = 30 + RETURN n.name AS name, n.age AS age + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Test boolean operators", () => { + const tokenizer = new Tokenizer(` + return true AND false OR true NOT false + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +}); + +test("Test relationship with hops", () => { + const tokenizer = new Tokenizer(` + MATCH (a:Person)-[r:KNOWS*1..3]->(b:Person) + RETURN a.name, b.name + `); + const tokens = tokenizer.tokenize(); + expect(tokens).toBeDefined(); + expect(tokens.length).toBeGreaterThan(0); +});